python爬取百思不得姐视频
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python爬取百思不得姐视频相关的知识,希望对你有一定的参考价值。
声明:本文只用来学习python ,切勿用于非法用途
#coding:utf-8
from Tkinter import *
from ScrolledText import ScrolledText ##scrollbar
import re
import threading
import requests
import sys
import urllib
reload(sys)
sys.setdefaultencoding(‘utf-8‘) ## output encodig utf-8
url_name = [] ###url + name
a = 1
## get the url_html
def get():
global a
hd = {‘User-Agent‘:‘Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 \
(KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36‘}
url = ‘http://www.budejie.com/‘+str(a)
var1.set(‘have scrapyed page %s film‘ % (a))
html = requests.get(url, headers=hd).text ##.text if used for get html code
a += 1
url_pattern = re.compile(r‘(<div class="j-r-list-c">.*?</div>.*?</div>)‘, re.S) ##bianyi up
for i in url_content:
url_reg = r‘data-mp4="(.*?)">‘ ### r zhuanyi
url_items = re.findall(url_reg, i)
if url_items: #####if exists
name_reg = re.compile(r‘<a href="/detail-.{8}.html">(.*?)</a>‘)
name_items = re.findall(name_reg, i)
for j,k in zip(name_items, url_items):#zip two list yi yi dui yin
url_name.append([j,k])
print j,k
return url_name
### how to download films
id = 1 # film‘s number
def write():
global id
while id < 2:
url_name = get() ### url + name
#print url_name
for i in url_name:
urllib.urlretrieve(i[1], ‘video/%s.mp4‘ % (i[0].decode(‘utf-8‘)))
text.insert(END, str(id)+‘.‘+i[1]+‘\n‘+i[0]+‘\n‘)
url_name.pop(0)
id += 1
var1.set(‘scrapy over‘)
def start():
th = threading.Thread(target=write)
th.start()
root = Tk()
root.title(‘comk专属‘)
text = ScrolledText(root, font=(‘微软雅黑‘, 10))
text.grid() ## made setting active
button = Button(root, text=‘开始爬取‘, font=(‘微软雅黑‘, 10), command=start)
button.grid()
var1 = StringVar()
label = Label(root, font=(‘微软雅黑‘, 10), fg=‘red‘, textvariable=var1)
label.grid()
var1.set(‘comk来了...ready~~~‘)
root.mainloop()
本文出自 “净空蓝星” 博客,谢绝转载!
以上是关于python爬取百思不得姐视频的主要内容,如果未能解决你的问题,请参考以下文章