python鐖厧铔嬪瀛愬浘
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python鐖厧铔嬪瀛愬浘相关的知识,希望对你有一定的参考价值。
鏍囩锛?/p>
# python3 # jiandan meizi tu import urllib.request import os import time import random def url_open(url): req1 = urllib.request.Request(url, headers={鈥?/span>User-Agent鈥?/span>: 鈥?/span>Mozilla/4.0鈥?/span>}) req2 = urllib.request.Request(url, headers={鈥?/span>User-Agent鈥?/span>: 鈥?/span>Mozilla/4.1鈥?/span>}) req3 = urllib.request.Request(url, headers={鈥?/span>User-Agent鈥?/span>: 鈥?/span>Mozilla/4.5鈥?/span>}) req4 = urllib.request.Request(url, headers={鈥?/span>User-Agent鈥?/span>: 鈥?/span>Mozilla/5.1鈥?/span>}) req_list = [req1, req2,req3, req4] response = urllib.request.urlopen(random.choice(req_list)) html = response.read() # print (鈥榰rl_open done!鈥? return html def get_current_page(url): html = url_open(url).decode(鈥?/span>utf-8鈥?/span>) a = html.find(鈥?/span>current-comment-page鈥?/span>) + 23 b = html.find(鈥?/span>]鈥?/span>,a) return html[a:b] def find_imgs(url): html = url_open(url).decode(鈥?/span>utf-8鈥?/span>) img_addrs = [] a = html.find(鈥?/span>img src="http鈥?/span>) while a != -1: b = html.find(鈥?/span>.jpg鈥?/span>,a, a+255) if b != -1: img_addrs.append(html[a+9:b+4]) else: b = a + 13 a = html.find(鈥?/span>img src="http鈥?/span>, b) return img_addrs def save_imgs(folder,img_addrs): for each in img_addrs: filename = each.split(鈥?/span>/鈥?/span>)[-1] with open(filename,鈥?/span>wb鈥?/span>) as f: img = url_open(each) f.write(img) def download_mm(folder = 鈥?/span>xx鈥?/span>,pages = 300): # os.mkdir(folder) os.chdir(folder) url = 鈥?/span>http://jandan.net/ooxx/鈥?/span> current_page_num = int(get_current_page(url)) for i in range(pages): print (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()),鈥?/span>current_page_num鈥?/span>, current_page_num) if i%3 == 0: print (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()),"sleep 2 seconds...") time.sleep(2) current_page_num -= 1 page_url = url + 鈥?/span>page-鈥?/span> + str(current_page_num) + 鈥?/span>#comments鈥?/span> img_addrs = find_imgs(page_url) save_imgs(folder, img_addrs) if __name__ == 鈥?/span>__main__鈥?/span>: download_mm()
以上是关于python鐖厧铔嬪瀛愬浘的主要内容,如果未能解决你的问题,请参考以下文章
python 鐖彇涔屼簯鎵€鏈夊巶鍟嗗悕瀛楋紝url锛屾紡娲炴€绘暟 骞跺瓨鍏ユ暟鎹簱