Python3 文件处理相关脚本

Posted 2020-09-20

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了Python3 文件处理相关脚本相关的知识，希望对你有一定的参考价值。

对文件相关处理是脚本中最常见的，下面列举几种实用的案例：

批量删除：

(1)删除指定目录，指定后缀文件

例：删除目录J:/start下的 .log与.tmp结尾文件

def del_files(path, filters):
    if os.path.exists(path) and os.path.isdir(path):
        for root, dirs, files in os.walk(path):
            for name in files:  # name.find(".tmp")>0
                for subfix in filters:
                    if name.endswith(subfix):
                        os.remove(os.path.join(root, name))
                        print ("Delete File: " + os.path.join(root, name))


def test_del_files():
    filters = [".log",".tmp"]
    del_files("J:/StartUML",filters)

(2)只保留特定文件

def del_all(dir, retain_file):
    if os.path.exists(dir) and os.path.isdir(dir):
        dir_content = [x for x in os.listdir(dir) if x != retain_file]
        for f in dir_content:
            fpath = os.path.join(dir, f)
            if os.path.isdir(fpath):
                shutil.rmtree(fpath)
            else:
                os.remove(fpath)

 del_all("J:/StartUML","11.txt")

批量复制与移动：

(1)复制处理：

#方案一
def copy_all(sourceDir,targetDir):
    if os.path.exists(sourceDir):
        for file in os.listdir(sourceDir):
            sourceFile = os.path.join(sourceDir, file)
            targetFile = os.path.join(targetDir, file)
            if os.path.isfile(sourceFile):
                if not os.path.exists(targetDir):
                    os.mkdir(targetDir)
                # 如果目的路径里面不存在某个文件或者存在那个同名文件但是文件有残缺，则复制，否则跳过
                if not os.path.exists(targetFile) or                         (os.path.exists(targetFile) and (os.path.getsize(targetFile) != os.path.getsize(sourceFile))):
                    shutil.copy(sourceFile,targetFile)
            if os.path.isdir(sourceFile):
                copy_all(sourceFile, targetFile)

#方案二
def copy_files(src_dir,dest_dir,isonelevel):
    if os.path.exists(src_dir) and os.path.isdir(src_dir):
        if not os.path.exists(dest_dir):
             os.mkdir(dest_dir)
        for parent, dirnames, filenames in os.walk(src_dir):
            # if parent.startswith(src_dir):
            if isonelevel:
                dstdir = dest_dir
            else:
                dstdir = parent.replace(src_dir,dest_dir,1)
            for dirname in dirnames:
                os.mkdir(os.path.join(dstdir,dirname))
            for fname in filenames:
                shutil.copy(os.path.join(parent, fname), os.path.join(dstdir, fname))


# copy_all("J:/ZIMU","J:/StartUML")

(2)移动处理

#walk遍历处理实现
def move_files(src_dir, dst_dir):
    if os.path.exists(src_dir) and os.path.isdir(src_dir):
        if not os.path.exists(dst_dir):
            os.mkdir(dst_dir)
        for pdir ,dirs, fnames in os.walk(src_dir,topdown=True):
            newdstdir = pdir.replace(src_dir,dst_dir,1)
            if not os.path.exists(newdstdir):
                os.mkdir(newdstdir)
            for fn in fnames:
                os.rename(os.path.join(pdir,fn),os.path.join(newdstdir,fn))
            for dir in dirs:
                dstSource = os.path.join(newdstdir,dir)
                if not os.path.exists(dstSource):
                    os.mkdir(dstSource)
        shutil.rmtree(src_dir)

#递归实现
def move_recursive(sourceDir,targetDir):
    if os.path.exists(sourceDir):
        for file in os.listdir(sourceDir):
            sourceFile = os.path.join(sourceDir, file)
            targetFile = os.path.join(targetDir, file)
            if not os.path.exists(targetDir):
                os.mkdir(targetDir)
            if os.path.isfile(sourceFile):
                os.rename(sourceFile,targetFile)
            elif os.path.isdir(sourceFile):
                move_recursive(sourceFile,targetFile)


def move_all(sourceDir,targetDir):
    move_recursive()
    shutil.rmtree(sourceDir)

# move_all("J:/StartUML/AGirls","J:/StartUML/ABoys")

搜索与查找：

（1）查找指定文件名称文件

# 指定目录及其子目录中查找文件名含有关键字的文件
def search_file_pattern_name1(path, word):
    for filename in os.listdir(path):
        fp = os.path.join(path, filename)
        if os.path.isfile(fp) and word in filename:
            print(fp)
        elif os.path.isdir(fp):
            search_file_pattern_name1(fp, word)

# search_file("J:/AndroidSrc4.2/packages" ".png")

def search_file_pattern_name2(dirname,keyworld):
    results = []
    for root, dirs, files in os.walk(dirname):
        results += [os.path.relpath(os.path.join(root, x), start = dirname) for x in files  if keyworld in x]
    for result in results:
        print(result)

（2）查找文本内容包含指定关键词的所以文件，输出该文件路径

def search_file_txtcontent1(dir, word,isaccurate):
    if os.path.exists(dir):
        for filename in os.listdir(dir):
            fp = os.path.join(dir, filename)
            if os.path.isfile(fp):
                with open(fp) as f:
                    num = 0
                    for line in f:
                        num += 1
                        if word in line:
                            if isaccurate:
                                dSearch = line.split()
                                for search_word in dSearch:
                                    if search_word == word:
                                        print ("accurate find word ", "fileneme=",filename, " line =" , num)
                            else:
                                print ("blur find word ", "fileneme=", filename, " line =", num)
                                # break
            elif os.path.isdir(fp):
                search_file_txtcontent1(fp, word,isaccurate)


# search_file_txtcontent1("J:/AndroidSrc4.2/packages/apps/Launcher2" ,"onCreate",False)


# fileinput模块可以遍历文本文件的所有行.它的工作方式和readlines很类似,不同点在于,
# 它不是将全部的行读到列表中而是创建了一个xreadlines对象.
def search_file_txtcontent2(dir_path,searchKey,isaccurate):
    # pattern = "\d{3}-\d{3}-\d{4}" # 如800-333-1212
    if os.path.exists(dir_path):
        for pdir, subdirs, subfiles in os.walk(dir_path):
            for fname in subfiles:
                fn = os.path.join(pdir,fname)
                if os.path.splitext(fn)[1] == ".java":
                    finput = fileinput.input(fn)
                    for eachline in finput:
                        if isaccurate:
                            for m in re.finditer(r"\bonCreate\b", eachline):
                                if m.group(0):
                                    print("accurate find ============")
                                    print (‘filename:‘, fileinput.filename(), ‘line:‘, fileinput.lineno(), eachline)
                        else:
                            a = re.search(searchKey, eachline)
                            if a:
                                print("============")
                                print (‘filename:‘, fileinput.filename(), ‘line:‘, fileinput.lineno(), eachline)

# search_file_txtcontent2("J:/AndroidSrc4.2/packages/apps/Launcher2","onCreate",True)

（3）文本替换处理，将文本内指定原内容替换为新的内容

#方案一，单文件处理
def search_replace_content(src_file, oldWorld, newWorld):
    if os.path.exists(src_file):
        # print("tempfile name is", "=>", file)  # TemporaryFile创建的临时文件的名字
        if os.path.exists(src_file):
            fopen = open(src_file, ‘r‘)
        else:
            print("file %s not found" % src_file)
            sys.exit()

        temp_file = tempfile.mktemp()
        file_dst = open(temp_file, ‘w+b‘)  # 打开临时文件
        for line in fopen:
            line = re.sub(oldWorld, newWorld, line)
            file_dst.write(line)  # 把替换后的内容写入到临时文件中
        fopen.close()
        file_dst.seek(0)
        file_dst.close()

        if os.path.exists(src_file):
            os.remove(src_file)
        shutil.copy(temp_file, src_file)  # copy临时文件到原文件
        try:
            os.remove(temp_file)  # 删掉临时文件
        except OSError:
            pass

#方案二，多文件处理
def search_replace_bigtxt(dir_search,oldKey, newKey):
    for parent_dir, subdirs, files in os.walk(dir_search):
        for file in files:
            fname = os.path.join(dir, file)
            inFile = codecs.open(fname, "r", "utf-8")
            outFile = codecs.open(fname + ".new", "w", "utf-8")
            for line in inFile:
                newline = line.replace(oldKey, newKey)
                outFile.write(newline)
            inFile.close()
            outFile.close()
            os.rename(fname + ".new", fname)

多组词替换处理

def easy2_replace_txt():
    replacements = {‘zero‘: ‘0‘, ‘temp‘: ‘bob‘, ‘garbage‘: ‘nothing‘}
    with open(‘path/to/input/file‘) as infile, open(‘path/to/output/file‘, ‘w‘) as outfile:
        for line in infile:
            for src, target in replacements.items():
                line = line.replace(src, target)
            outfile.write(line)

以上是关于Python3 文件处理相关脚本的主要内容，如果未能解决你的问题，请参考以下文章

常用python日期日志获取内容循环的代码片段

Python2 和 Python3 中默认编码的差异

学习笔记：python3，代码片段（2017）

selenium10-python3部分代码复习

html 将以编程方式附加外部脚本文件的javascript代码片段，并按顺序排列。用于响应式网站，其中ma

scrapy主动退出爬虫的代码片段(python3)