python 3 文本处理例子求代码

Posted 2023-04-13

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了python 3 文本处理例子求代码相关的知识，希望对你有一定的参考价值。

2016-05-23
文本1
文本2
2016-05-24
文本1
文本2
2016-05-25
文本1
文本2
.....
以上文本已经list到F序列
现在想再把它再次加入一个序列每3行为一项目最好是二维序列可以通过索引（如F[1][1]）取出"2016-05-23"等等...

#coding:utf-8
#file: FileSplit.py

import os,os.path,time

def FileSplit(sourceFile, targetFolder):
sFile = open(sourceFile, 'r')
number = 1000 #每个小文件中保存100000条数据
dataLine = sFile.readline()
tempData = [] #缓存列表
fileNum = 1
if not os.path.isdir(targetFolder): #如果目标目录不存在，则创建
os.mkdir(targetFolder)
while dataLine: #有数据
for row in range(number):
tempData.append(dataLine) #将一行数据添加到列表中
dataLine = sFile.readline()
if not dataLine :
break
tFilename = os.path.join(targetFolder,os.path.split(sourceFile)[1] + str(fileNum) + ".txt")
tFile = open(tFilename, 'a+') #创建小文件
tFile.writelines(tempData) #将列表保存到文件中
tFile.close()
tempData = [] #清空缓存列表
print(tFilename + " 创建于: " + str(time.ctime()))
fileNum += 1 #文件编号

sFile.close()

if __name__ == "__main__" :
FileSplit("access.log","access")
#coding:utf-8
#file: Map.py

import os,os.path,re

def Map(sourceFile, targetFolder):
sFile = open(sourceFile, 'r')
dataLine = sFile.readline()
tempData = #缓存列表
if not os.path.isdir(targetFolder): #如果目标目录不存在，则创建
os.mkdir(targetFolder)
while dataLine: #有数据
p_re = re.compile(r'(GET|POST)\s(.*?)\sHTTP/1.[01]',re.IGNORECASE) #用正则表达式解析数据
match = p_re.findall(dataLine)
if match:
visitUrl = match[0][1]
if visitUrl in tempData:
tempData[visitUrl] += 1
else:
tempData[visitUrl] = 1
dataLine = sFile.readline() #读入下一行数据

sFile.close()

tList = []
for key,value in sorted(tempData.items(),key = lambda k:k[1],reverse = True):
tList.append(key + " " + str(value) + '\n')

tFilename = os.path.join(targetFolder,os.path.split(sourceFile)[1] + "_map.txt")
tFile = open(tFilename, 'a+') #创建小文件
tFile.writelines(tList) #将列表保存到文件中
tFile.close()

if __name__ == "__main__" :
Map("access\\access.log1.txt","access")
Map("access\\access.log2.txt","access")
Map("access\\access.log3.txt","access")
#coding:utf-8
#file: Reduce.py

import os,os.path,re

def Reduce(sourceFolder, targetFile):
tempData = #缓存列表
p_re = re.compile(r'(.*?)(\d1,$)',re.IGNORECASE) #用正则表达式解析数据
for root,dirs,files in os.walk(sourceFolder):
for fil in files:
if fil.endswith('_map.txt'): #是reduce文件
sFile = open(os.path.abspath(os.path.join(root,fil)), 'r')
dataLine = sFile.readline()

while dataLine: #有数据
subdata = p_re.findall(dataLine) #用空格分割数据
#print(subdata[0][0]," ",subdata[0][1])
if subdata[0][0] in tempData:
tempData[subdata[0][0]] += int(subdata[0][1])
else:
tempData[subdata[0][0]] = int(subdata[0][1])
dataLine = sFile.readline() #读入下一行数据

sFile.close()

tList = []
for key,value in sorted(tempData.items(),key = lambda k:k[1],reverse = True):
tList.append(key + " " + str(value) + '\n')

tFilename = os.path.join(sourceFolder,targetFile + "_reduce.txt")
tFile = open(tFilename, 'a+') #创建小文件
tFile.writelines(tList) #将列表保存到文件中
tFile.close()

if __name__ == "__main__" :
Reduce("access","access") 参考技术A # -*- coding:utf-8 -*-
F=[]
with open('test.txt','r') as f:
    lines=f.readlines()
    flen=len(lines)

    for i in range(0,flen,3):
        temp=[]
        temp.append(lines[i].strip())
        temp.append(lines[i+1].strip())
        temp.append(lines[i+2].strip())
        F.append(temp)

#序号从0开始
print F[0][0]
print F[0][1]
print F[0][2]
print F[1][0]
print F[1][1]
print F[1][2]

>>>

2016-05-23
文本1
文本2
2016-05-24
文本1
文本2

本回答被提问者和网友采纳参考技术B 看不懂问题

06 文件处理

‘‘‘‘‘‘
‘‘‘
文件处理：
    open()
    
    写文件
        wt：写文本
        
    读文件
        rt：读文本
        
    追加写文件
        at：追加文本
        
注意：必须指定字符编码,以什么方式写
        就得以什么方式打开
        
执行python代码的过程：
    1、先启动python解释器
    2、把写好的python文件加载到解释器中
    3、检测python语法，执行代码
    
打开文件会产生两种资源：
    1、python程序
    2、操作系统打开文件
‘‘‘


#参数一：文件的绝对路径
#参数二：文件的模式
#参数三：encoding  指定的字符编码

#写文本文件
f = open(‘file.txt‘,mode=‘wt‘,encoding=‘utf-8‘)
f.write(‘tank‘) #产生file.txt文本，内容为tank
#关闭操作系统文件资源
f.close()


#读文本文件  r==rt
f = open(‘file.txt‘,‘r‘,encoding=‘utf-8‘)
print(f.read())
f.close()


#追加写文本文件     a==at
f = open(‘file.txt‘, ‘a‘, encoding=‘utf-8‘)
f.write(‘\n 合肥学院‘)
f.close()


‘‘‘
文件处理之上下文管理
    with open() as f    "句柄"
‘‘‘
#写文本文件
with open(‘file.txt‘,‘w‘,encoding=‘utf-8‘) as f:
    f.write(‘墨菲定律‘)

#读文本文件
with open(‘file.txt‘,‘r‘,encoding=‘utf-8‘) as f:
    res = f.read()
    print(res)

#追加写文本文件
with open(‘file.txt‘,‘a‘,encoding=‘utf-8‘) as f:
    f.write(‘\n 围城‘)


‘‘‘
对图片、音频、视频读写
    rb模式，读取二进制，不需要指定字符编码
‘‘‘
#读取相片xb.jpg
with open(‘xb.jpg‘,‘rb‘) as f:
    res = f.read()
    print(res)
#
jpg = res
#
# #把xb.jpg的二进制流写入xb_copy.jpg文件中
with open (‘xb_copy.jpg‘,‘wb‘) as f_w:
    f_w.write(jpg)

‘‘‘
with    管理多个文件
‘‘‘
#通过with来管理open打开的两个文件句柄f_r,f_w
with open(‘xb.jpg‘,‘rb‘) as f_r,open(‘xb_copy.jpg‘,‘wb‘) as f_w:
    #通过f_r句柄把图片的二进制流读取出来
     res = f_r.read()
    #通过f_W句柄把图片的二进制流写入xb_copy.jpg文件中
     f_w.write(res)

以上是关于python 3 文本处理例子求代码的主要内容，如果未能解决你的问题，请参考以下文章