如何在 Python 中删除文件的前四行和后 12 行?
Posted
技术标签:
【中文标题】如何在 Python 中删除文件的前四行和后 12 行?【英文标题】:How to remove the first four lines and the last 12 lines in to a file in Python? 【发布时间】:2016-05-11 15:13:42 【问题描述】: h = httplib.HTTPSConnection(host, port)
h.set_debuglevel(0)
headers =
"Content-Type": "multipart/form-data; boundary=%s" % (boundary,),
"Connection": "Keep-Alive",
h.request('POST', uri, body, headers)
res = h.getresponse()
#print res.read()
data = """MIME-Version: 1.0
Content-Type: multipart/mixed; boundary=--Nuance_NMSP_vutc5w1XobDdefsYG3wq
""" + res.read()
msg = email.message_from_string(data)
#print msg
for index, part in enumerate(msg.walk(), start=1):
content_type = part.get_content_type()
#print content_type
payload = part.get_payload()
print res.getheaders()
if content_type == "audio/x-wav" and len(payload):
with open('output.pcm'.format(index), 'wb') as f_pcm:
print f_pcm.write(payload)
我正在向服务器发送请求,服务器正在以.txt
的形式将响应发送回客户端,如上所述。 .txt
包含顶部的信息标题和底部的标题,为文本格式,其余为二进制。
如何编写和解析文本并将其写入单独的.txt
文件,并将二进制文件写入.pcm
文件?
【问题讨论】:
请您编辑问题以包含您用于获取此问题的代码。 我添加了我的代码,但不是完整的代码。 使用python的email
包解析MIME。
你能告诉我怎么做吗?我正在尝试但没有得到。
@sam 你能附上你的文件Output.txt
例如。使用 Skydrive 或 Dropbox,这样我们就有了一个可以使用的示例。使用您发布的文件内容对我不起作用。
【参考方案1】:
以下示例应该适合您。
filecontent = []
with open("Output.txt", "rb") as inputfile:
for linenr, line in enumerate(inputfile):
filecontent.append(line)
linecount = linenr + 1
with open("AsciiOut.txt", "wb") as outputfile, open("BinOut.pcm", "wb") as binoutputfile:
for linenr, line in enumerate(filecontent):
if linenr < 4:
outputfile.write(line)
elif linenr < linecount - 12:
binoutputfile.write(line)
else:
outputfile.write(line)
【讨论】:
你解决的是症状而不是实际问题;他真正想要的是提取多部分 mime 消息的各个部分。 “行计数”方法必然会在任何最小扰动下中断(包括将附件更改为本身恰好有换行符的文件)。【参考方案2】:建议使用 Python 的 email
库尝试解码 MIME 的方法如下:
import ssl
import os
import json
import email
import uuid
from io import BytesIO
import httplib
input_folder = os.path.dirname(os.path.abspath(__file__))
output_folder = os.path.join(input_folder, 'output')
def get_filename(ext, base, sub_folder):
filename = '.'.format(base, ext)
return os.path.join(output_folder, sub_folder, filename)
def compare_files(file1, file2):
with open(file1, 'rb') as f_file1, open(file2, 'rb') as f_file2:
if f_file1.read() == f_file2.read():
print 'Same:\n \n '.format(file1, file2)
else:
print 'Different:\n \n '.format(file1, file2)
class Part(object):
"""Represent a part in a multipart messsage"""
def __init__(self, name, contentType, data, paramName=None):
super(Part, self).__init__()
self.name = name
self.paramName = paramName
self.contentType = contentType
self.data = data
def encode(self):
body = BytesIO()
if self.paramName:
body.write('Content-Disposition: form-data; name="%s"; paramName="%s"\r\n' % (self.name, self.paramName))
else:
body.write('Content-Disposition: form-data; name="%s"\r\n' % (self.name,))
body.write("Content-Type: %s\r\n" % (self.contentType,))
body.write("\r\n")
body.write(self.data)
return body.getvalue()
class Request(object):
"""A handy class for creating a request"""
def __init__(self):
super(Request, self).__init__()
self.parameters = []
def add_json_parameter(self, name, paramName, data):
self.parameters.append(Part(name=name, paramName=paramName, contentType="application/json; charset=utf-8", data=data))
def add_audio_parameter(self, name, paramName, data):
self.parameters.append(Part(name=name, paramName=paramName, contentType="audio/x-wav;codec=pcm;bit=16;rate=16000", data=data))
def encode(self):
boundary = uuid.uuid4().hex
body = BytesIO()
for parameter in self.parameters:
body.write("--%s\r\n" % (boundary,))
body.write(parameter.encode())
body.write("\r\n")
body.write("--%s--\r\n" % (boundary,))
return body.getvalue(), boundary
def get_tts(required_text, LNG):
required_text = required_text.strip()
output_filename = "".join([x if x.isalnum() else "_" for x in required_text[:80]])
host = "mtldev08.nuance.com"
port = 443
uri = "/NmspServlet/"
if LNG == "ENG":
parameters = 'lang' : 'eng_GBR', 'location' : '47.4925, 19.0513'
if LNG == "GED":
parameters = 'lang' : 'deu-DEU', 'location' : '48.396231, 9.972909'
RequestData = """
"appKey": "9c9fa7201e90d3d96718bc3f36ce4cfe1781f2e82f4e5792996623b3b474fee2c77699eb5354f2136063e1ff19c378f0f6dd984471a38ca5c393801bffb062d6",
"appId": "NMDPTRIAL_AutomotiveTesting_NCS61HTTP",
"uId": "Alexander",
"inCodec": "PCM_16_8K",
"outCodec": "PCM_16_8K",
"cmdName": "NVC_TTS_CMD",
"appName": "Python",
"appVersion": "1",
"language": "%(lang)s",
"carrier": "carrier",
"deviceModel": "deviceModel",
"cmdDict":
"tts_voice": "Serena",
"tts_language": "%(lang)s",
"locale": "canada",
"application_name": "Testing Python Script",
"organization_id": "NUANCE",
"phone_OS": "4.0",
"phone_network": "wifi",
"audio_source": "SpeakerAndMicrophone",
"location": "%(location)s",
"application_session_id": "1234567890",
"utterance_number": "5",
"ui_langugage": "en",
"phone_submodel": "nmPhone2,1",
"application_state_id": "45"
""" % (parameters)
TEXT_TO_READ = """
"tts_type": "text"
"""
TEXT_TO_READ = json.loads(TEXT_TO_READ)
TEXT_TO_READ["tts_input"] = required_text
TEXT_TO_READ = json.dumps(TEXT_TO_READ)
request = Request()
request.add_json_parameter("RequestData", None, RequestData)
request.add_json_parameter("TtsParameter", "TEXT_TO_READ", TEXT_TO_READ)
#ssl._create_default_https_context = ssl._create_unverified_context
body, boundary = request.encode()
h = httplib.HTTPSConnection(host, port)
#h.set_debuglevel(1)
headers =
"Content-Type": "multipart/form-data; boundary=%s" % (boundary,),
"Connection": "Keep-Alive",
h.request('POST', uri, body, headers)
res = h.getresponse()
data = """MIME-Version: 1.0
Content-Type: multipart/mixed; boundary=--Nuance_NMSP_vutc5w1XobDdefsYG3wq
""" + res.read()
msg = email.message_from_string(data)
for part in msg.walk():
content_type = part.get_content_type()
payload = part.get_payload()
if content_type == "audio/x-wav" and len(payload):
ref_filename = get_filename('pcm', output_filename + '_ref', LNG)
if not os.path.exists(ref_filename):
with open(ref_filename, 'wb') as f_pcm:
f_pcm.write(payload)
cur_filename = get_filename('pcm', output_filename, LNG)
with open(cur_filename, 'wb') as f_pcm:
f_pcm.write(payload)
compare_files(ref_filename, cur_filename)
elif content_type == "application/json":
with open(get_filename('json', output_filename, LNG), 'w') as f_json:
f_json.write(payload)
filename = r'input.txt'
with open(filename) as f_input:
for line in f_input:
LNG, text = line.strip().split('|')
print "Getting : ".format(LNG, text)
get_tts(text, LNG)
这假设您的 input.txt
文件具有以下格式:
ENG|I am tired
GED|Ich gehe nach hause
这将为每行文本生成一个输出 pcm 和 json 文件。它适用于多种文件/语言。
【讨论】:
它正在生成plain1.txt。它正在读取完整的 .txt 文件 res.read 是什么,但它可以解析它。你能帮帮我吗? 为了进一步帮助(并且能够运行脚本),我需要知道调用h.request
所需的参数。您复制粘贴在问题中的数据不合适。
你能看一次吗?
我添加了一种将原始 pcm 提取到文件中的简单方法,我能够听到返回的单词。
谢谢,但是如何读取 res 中的剩余文本并将其写入另一个文件?以上是关于如何在 Python 中删除文件的前四行和后 12 行?的主要内容,如果未能解决你的问题,请参考以下文章
如何在 Python 中读取和删除文件中的前 n 行 - 优雅的解决方案 [重复]