Python文件操作
Posted 数据攻城小狮子
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Python文件操作相关的知识,希望对你有一定的参考价值。
文章目录
txt文件操作
def mergeTxt(txtFiles):
with open('result.txt','w',encoding='utf8') as fp:
with open(txtFiles[0],encoding='utf8') as fp1,open(txtFiles[1],encoding='utf8') as fp2:
while True:
# 交替读取文件1和文件2中的行,写入结果文件
line1=fp1.readline()
if line1:
fp.write(line1)
fp.write('\\n')
else:
# 如果文件1结束,结束循环
flag=False
break
line2=fp2.readline()
if line2:
fp.write(line2)
else:
# 如果文件2结束,结束循环
flag=True
break
# 获取尚未结束的文件对象
fp3=fp1 if flag else fp2
# 把剩余内容写入结果文件
for line in fp3:
fp.write(line)
txtFiles=['文本1.txt','文本2.txt']
print("文本1:")
with open(txtFiles[0],encoding='utf8') as fp1:
for line1 in fp1:
print(line1)
print("文本2:")
with open(txtFiles[1],encoding='utf8') as fp2:
for line2 in fp2:
print(line2)
mergeTxt(txtFiles)
print('合并后:')
with open('result.txt',encoding='utf8') as fp3:
for line3 in fp3:
print(line3)
JSON文件操作
import json
information=[
'小区名称':'小区A','均价':8000,'月交易量':20,
'小区名称':'小区B','均价':8500,'月交易量':35,
'小区名称':'小区C','均价':7800,'月交易量':50,
'小区名称':'小区D','均价':12000,'月交易量':18
]
# 写入
with open('房屋信息.json','w',encoding='utf8') as fp:
json.dump(information,fp,indent=4,separators=[',',':'])
# 读取
with open('房屋信息.json',encoding='utf8') as fp:
information=json.load(fp)
for info in information:
print(info)
CSV文件操作
from csv import reader,writer
from random import randrange
from datetime import date,timedelta
fn='data.csv'
with open(fn,'w',encoding='utf8') as fp:
# 创建csv文件写对象
wr=writer(fp)
# 写入表头
wr.writerow(['日期','销量'])
# 第一天的日期,2022年1月1日
startDate=date(2020,1,1)
# 生成100个模拟数据
for i in range(100):
# 生成一个模拟数据,写入csv文件
amount=500+i*5+randrange(5,50)
wr.writerow([str(startDate),amount])
# 下一天
startDate=startDate+timedelta(days=1)
# 读取并显示上面代码生成的csv文件内容
with open(fn,encoding='utf8') as fp:
for line in reader(fp):
if line:
print(*line)
Word、Excel、PowerPoint文件操作
检查并输出当前文件夹及其子文件夹中包含指定字符串的Word、Excel、PowerPoint文件名称
from sys import argv
from os import listdir
from os.path import join,isfile,isdir
from docx import Document
from openpyxl import load_workbook
from pptx import Presentation
def checkdocx(dstStr, fn):
# 打开.docx文档
document = Document(fn)
# 遍历所有段落文本
for p in document.paragraphs:
if dstStr in p.text:
return True
# 遍历所有表格中的单元格文本
for table in document.tables:
for row in table.rows:
for cell in row.cells:
if dstStr in cell.text:
return True
return False
def checkxlsx(dstStr, fn):
# 打开.xlsx文件
wb = load_workbook(fn)
# 遍历所有工作表的单元格
for ws in wb.worksheets:
for row in ws.rows:
for cell in row:
try:
if dstStr in cell.value:
return True
except:
pass
return False
def checkpptx(dstStr, fn):
# 打开.pptx文档
presentation = Presentation(fn)
# 遍历所有幻灯片
for slide in presentation.slides:
for shape in slide.shapes:
# 表格中的单元格文本
if shape.shape_type == 19:
for row in shape.table.rows:
for cell in row.cells:
if dstStr in cell.text_frame.text:
return True
# 文本框
elif shape.shape_type == 14:
try:
if dstStr in shape.text:
return True
except:
pass
return False
def main(dstStr, flag):
# 使用广度优先的方式遍历当前文件夹及其所有子文件夹
# 一个圆点表示当前文件夹
dirs = ['.']
while dirs:
# 获取第一个尚未遍历的文件夹名称
currentDir = dirs.pop(0)
for fn in listdir(currentDir):
path = join(currentDir, fn)
if isfile(path):
if path.endswith('.docx') and checkdocx(dstStr, path):
print(path)
elif path.endswith('.xlsx') and checkxlsx(dstStr, path):
print(path)
elif path.endswith('.pptx') and checkpptx(dstStr, path):
print(path)
# 广度优先遍历目录树
elif flag and isdir(path):
dirs.append(path)
if argv[1] != '/s':
dstStr = argv[1]
flag = False
else:
dstStr = argv[2]
flag = True
main('姓名', flag)# 指定查找字符串为'姓名'
以上是关于Python文件操作的主要内容,如果未能解决你的问题,请参考以下文章