问题贴,今天遇见了个问题,解决了一天都没解决
Posted Jason_WangYing
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了问题贴,今天遇见了个问题,解决了一天都没解决相关的知识,希望对你有一定的参考价值。
今天用多线程写了个类,获取ts文件内容,并用另一个线程写入对应的文档,但是预计了个奇怪问题,文件写入,只能写入获取文件数据一半的数据量,到现在也没搞明白怎么回事。
直接上代码:
import threading
import requests
import time
from queue import Queue
import urllib3
urllib3.disable_warnings()
#这个就是ts文件的列表
ts_list=list()
# base_url = 'https://v4.cdtlas.com/20220223/MKPjICFW/1100kb/hls/index.m3u8'
# 页面地址:https://www.acfun.cn/v/ac33003549
base_url = 'https://ali-safety-video.acfun.cn/mediacloud/acfun/acfun_video/c185ef40f16fac2e-c4c2fc4890da062a1d1e0f2eb10f0634-hls_1080p_2.m3u8?pkey=ABASsT6ICcJOVagTydVvSQcR4002-YEqr_dEGDVGKunt4KjMuu-qcHnk69tpuMrirtXONFSMWIk9nA46NCwXkEPvd87umIrlEzuGoW7c-iQiFqoSzt9XAXqSdaWBbqFg02fP3iObVSJn9ZlyZtjCzg6PeSzaFhs8WpWSLGIviiL82UZjxgkdfD6hurOBhfeyfnrzCgZTh-3X5S6bTj6XACT1IUDz38_GDfMtonlVfZ0C5g&safety_id=AAIC1Add8kyLWL7-w9SQPeRw'
ii =0
ij = 0
class Producer(threading.Thread):
def __init__(self, url_queue, data_queue,name_queue, *args, **kwargs):
super(Producer, self).__init__(*args, **kwargs)
self.url_queue = url_queue
self.data_queue = data_queue
self.name_queue = name_queue
def run(self) -> None:
# 从url_page中拿url
while True:
if self.url_queue.empty():
print("Produceer 完毕 !!")
break
url = self.url_queue.get()
self.get_data(url)
def get_data(self,url):
print('*'*100+url +'*'*100)
global ii
# print("~~~~~~" + "拿到第个数据,******".format(ii) + "~~~~~~")
ii = ii + 1
res = requests.get(url,verify = False)
data = res.content
self.data_queue.put(data)
time.sleep(2)
# name = self.name_queue.get().split('.ts?')[0]
# with open(".ts".format(name), 'ab+') as f:
# f.write(data)
# f.flush()
# f.close()
class Consumer(threading.Thread):
def __init__(self, url_queue, data_queue, name_queue, *args, **kwargs):
super(Consumer, self).__init__(*args, **kwargs)
self.url_queue = url_queue
self.data_queue = data_queue
self.name_queue = name_queue
print(self.name_queue)
def run(self) -> None:
while True:
if self.data_queue.empty():
if self.url_queue.empty():
print('写入完毕'*300)
return
data = self.data_queue.get()
print(self.name_queue.get())
global ij
print("写入数据量:".format(ij))
ij = ij +1
name = self.name_queue.get().split('.ts?')[0]
with open(".ts".format(ij), 'ab+') as f:
f.write(data)
f.flush()
f.close()
def parse_ts(data):
try:
data = str(data)
finish_data = []
for line in data.split('\\n'):
if line.endswith('.ts'):
finish_data.append(line)
elif line.find('.ts?') >= 0:
finish_data.append(line)
else:
print('*'*20+'\\n'+'解析错误,未解析到需要的ts文件,具体内容如下:'+line+'\\n'+'*'*20)
except Exception as e:
import traceback
traceback.print_exc()
print('*'*20+"报错" * 50+'*'*20)
finally:
return finish_data
def main():
name_queue = Queue()
url_queue = Queue()
data_queue = Queue()
# 准备url
item = 0
for x in ts_list:
# 判断是否需要拼接,有的ts文件是直接自带全部链接的
if x.find('http://') >=0 or x.find('https://') >= 0:
url = x
else:
# 需要去除base_url最后一个字段,
url = '/.ts'.format(base_url.rsplit('/',1)[0],x)
# url_queue.put(url)
url_queue.put(url)
item = item+1
# name_queue.put(x)
name_queue.put(x)
print("总数据量是:".format(item))
# 创建生产者、消费者进程
for x in range(8):
t = Producer(url_queue, data_queue, name_queue)
t.start()
for x in range(8):
c = Consumer(url_queue, data_queue,name_queue)
c.start()
if __name__ == '__main__':
# 获取m3u8的url,根据url获取ts列表
res = requests.get(base_url,verify = False)
if res.status_code == 200:
ts_list = parse_ts(res.text)
print(ts_list)
main()
else:
print("请求m3u8文件错误")
先记录下,等过几天有空了,再详细解决下。
以上是关于问题贴,今天遇见了个问题,解决了一天都没解决的主要内容,如果未能解决你的问题,请参考以下文章
关于页面图表相应式的问题,问题遇见时间,9月1日,到今天9月2日,尚未解决