Python文件操作

Posted 数据攻城小狮子

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Python文件操作相关的知识,希望对你有一定的参考价值。

文章目录

txt文件操作

def mergeTxt(txtFiles):
    with open('result.txt','w',encoding='utf8') as fp:
        with open(txtFiles[0],encoding='utf8') as fp1,open(txtFiles[1],encoding='utf8') as fp2:
            while True:
                # 交替读取文件1和文件2中的行,写入结果文件
                line1=fp1.readline()
                if line1:
                    fp.write(line1)
                    fp.write('\\n')
                else:
                    # 如果文件1结束,结束循环
                    
                    flag=False
                    break
                line2=fp2.readline()
                if line2:
                    fp.write(line2)
                else:
                    # 如果文件2结束,结束循环
                    flag=True
                    break
            # 获取尚未结束的文件对象
            fp3=fp1 if flag else fp2
            # 把剩余内容写入结果文件
            for line in fp3:
                fp.write(line)
txtFiles=['文本1.txt','文本2.txt']
print("文本1:")
with open(txtFiles[0],encoding='utf8') as fp1:
    for line1 in fp1:
        print(line1)
print("文本2:")
with open(txtFiles[1],encoding='utf8') as fp2:
    for line2 in fp2:
        print(line2)
mergeTxt(txtFiles)
print('合并后:')
with open('result.txt',encoding='utf8') as fp3:
    for line3 in fp3:
        print(line3)

JSON文件操作

import json
information=[
    '小区名称':'小区A','均价':8000,'月交易量':20,
    '小区名称':'小区B','均价':8500,'月交易量':35,
    '小区名称':'小区C','均价':7800,'月交易量':50,
    '小区名称':'小区D','均价':12000,'月交易量':18
]
# 写入
with open('房屋信息.json','w',encoding='utf8') as fp:
    json.dump(information,fp,indent=4,separators=[',',':'])
# 读取
with open('房屋信息.json',encoding='utf8') as fp:
    information=json.load(fp)
    for info in information:
        print(info)

CSV文件操作

from csv import reader,writer
from random import randrange
from datetime import date,timedelta

fn='data.csv'
with open(fn,'w',encoding='utf8') as fp:
    # 创建csv文件写对象
    wr=writer(fp)
    # 写入表头
    wr.writerow(['日期','销量'])
    # 第一天的日期,2022年1月1日
    startDate=date(2020,1,1)
    # 生成100个模拟数据
    for i in range(100):
        # 生成一个模拟数据,写入csv文件
        amount=500+i*5+randrange(5,50)
        wr.writerow([str(startDate),amount])
        # 下一天
        startDate=startDate+timedelta(days=1)
# 读取并显示上面代码生成的csv文件内容
with open(fn,encoding='utf8') as fp:
    for line in reader(fp):
        if line:
            print(*line)

Word、Excel、PowerPoint文件操作

检查并输出当前文件夹及其子文件夹中包含指定字符串的Word、Excel、PowerPoint文件名称

from sys import argv
from os import listdir
from os.path import join,isfile,isdir
from docx import Document
from openpyxl import load_workbook
from pptx import Presentation
def checkdocx(dstStr, fn):
    # 打开.docx文档
    document = Document(fn)
    # 遍历所有段落文本
    for p in document.paragraphs:
        if dstStr in p.text:
            return True
    # 遍历所有表格中的单元格文本
    for table in document.tables:
        for row in table.rows:
            for cell in row.cells:
                if dstStr in cell.text:
                    return True
    return False


def checkxlsx(dstStr, fn):
    # 打开.xlsx文件
    wb = load_workbook(fn)
    # 遍历所有工作表的单元格
    for ws in wb.worksheets:
        for row in ws.rows:
            for cell in row:
                try:
                    if dstStr in cell.value:
                        return True
                except:
                    pass
    return False


def checkpptx(dstStr, fn):
    # 打开.pptx文档
    presentation = Presentation(fn)
    # 遍历所有幻灯片
    for slide in presentation.slides:
        for shape in slide.shapes:
            # 表格中的单元格文本
            if shape.shape_type == 19:
                for row in shape.table.rows:
                    for cell in row.cells:
                        if dstStr in cell.text_frame.text:
                            return True
            # 文本框
            elif shape.shape_type == 14:
                try:
                    if dstStr in shape.text:
                        return True
                except:
                    pass
    return False


def main(dstStr, flag):
    # 使用广度优先的方式遍历当前文件夹及其所有子文件夹
    # 一个圆点表示当前文件夹
    dirs = ['.']
    while dirs:
        # 获取第一个尚未遍历的文件夹名称
        currentDir = dirs.pop(0)
        for fn in listdir(currentDir):
            path = join(currentDir, fn)
            if isfile(path):
                if path.endswith('.docx') and checkdocx(dstStr, path):
                    print(path)
                elif path.endswith('.xlsx') and checkxlsx(dstStr, path):
                    print(path)
                elif path.endswith('.pptx') and checkpptx(dstStr, path):
                    print(path)
            # 广度优先遍历目录树
            elif flag and isdir(path):
                dirs.append(path)


if argv[1] != '/s':
    dstStr = argv[1]
    flag = False
else:
    dstStr = argv[2]
    flag = True
main('姓名', flag)# 指定查找字符串为'姓名'

以上是关于Python文件操作的主要内容,如果未能解决你的问题,请参考以下文章

ArcPy开发教程1-面向ArcGIS的Python语言基础

初学者python怎么学

怎么把python程序打包成操作系统?

python开发之路

Python之简介

图灵社区 阅读 为啥要选择Python语言实现机器学习算法