python小白学习记录 多线程爬取ts片段
Posted jswf
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python小白学习记录 多线程爬取ts片段相关的知识,希望对你有一定的参考价值。
from lxml import etree import requests from urllib import request import time import os from queue import Queue import threading import re from multiprocessing import pool from urllib import request def download(urls): for index in range(0,1342): n = index if(index<=999): n = str(n).zfill(3) else: n = str(n) # print(n) url = "https://zy.512wx.com/20171106/vM1OOVna/1200kb/hls/ppvod1983%s.ts" %n print("url",url) urls.put(url) class Consumer(threading.Thread): def __init__(self,urls,*args,**kwargs): super(Consumer, self).__init__(*args,**kwargs) self.urls = urls self.queueLock = threading.Lock() def run(self): while True: if self.urls.empty(): break else: self.queueLock.acquire() file = self.urls.get() self.queueLock.release() name = file.split(‘/‘)[-1] print("name",name) with open("./video/{}".format(name), "wb") as fp: if os.access("./video/{}".format(name), os.F_OK): pass else: resp = requests.get(file,verify = False) fp.write(resp.content) print(name + "下载完成") def get_ts(urls): with open("./video/kuiba.ts","wb") as fp: for index in range(0, len(urls)): resp = requests.get(urls[index]) fp.write(resp.content) print(str(index)+"下载完成") def main(): urls = Queue(2000) download(urls) # while not urls.empty(): # print(urls.get()) for index in range(0,16): x = Consumer(urls) x.start() if __name__ == ‘__main__‘: main()
此处开了16个子线程
以上是关于python小白学习记录 多线程爬取ts片段的主要内容,如果未能解决你的问题,请参考以下文章