python鐖厧铔嬪瀛愬浘

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python鐖厧铔嬪瀛愬浘相关的知识,希望对你有一定的参考价值。

鏍囩锛?/p>

 

# python3
# jiandan meizi tu
import urllib.request
import os
import time
import random


def url_open(url):
    req1 = urllib.request.Request(url, headers={鈥?/span>User-Agent鈥?/span>: 鈥?/span>Mozilla/4.0鈥?/span>})
    req2 = urllib.request.Request(url, headers={鈥?/span>User-Agent鈥?/span>: 鈥?/span>Mozilla/4.1鈥?/span>})
    req3 = urllib.request.Request(url, headers={鈥?/span>User-Agent鈥?/span>: 鈥?/span>Mozilla/4.5鈥?/span>})
    req4 = urllib.request.Request(url, headers={鈥?/span>User-Agent鈥?/span>: 鈥?/span>Mozilla/5.1鈥?/span>})
    req_list = [req1, req2,req3, req4]
    response = urllib.request.urlopen(random.choice(req_list))
    html = response.read()
    # print (鈥榰rl_open done!鈥?
    return html

def get_current_page(url):
    html = url_open(url).decode(鈥?/span>utf-8鈥?/span>)
    a = html.find(鈥?/span>current-comment-page鈥?/span>) + 23
    b = html.find(鈥?/span>]鈥?/span>,a)
    return html[a:b]

def find_imgs(url):
    html = url_open(url).decode(鈥?/span>utf-8鈥?/span>)
    img_addrs = []
    a = html.find(鈥?/span>img src="http鈥?/span>)
    while a != -1:        
        b = html.find(鈥?/span>.jpg鈥?/span>,a, a+255)
        if b != -1:
            img_addrs.append(html[a+9:b+4])
        else:
            b = a + 13
        a = html.find(鈥?/span>img src="http鈥?/span>, b)
    return img_addrs

def save_imgs(folder,img_addrs):
    for each in img_addrs:
        filename = each.split(鈥?/span>/鈥?/span>)[-1]
        with open(filename,鈥?/span>wb鈥?/span>) as f:
            img = url_open(each)
            f.write(img)


def download_mm(folder = 鈥?/span>xx鈥?/span>,pages = 300):
    # os.mkdir(folder)
    os.chdir(folder)
    
    url = 鈥?/span>http://jandan.net/ooxx/鈥?/span>
    current_page_num = int(get_current_page(url))
    for i in range(pages):
        print (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()),鈥?/span>current_page_num鈥?/span>, current_page_num)
        if i%3 == 0:
            print (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()),"sleep 2 seconds...")
            time.sleep(2)
        current_page_num -= 1
        page_url = url + 鈥?/span>page-鈥?/span> + str(current_page_num) + 鈥?/span>#comments鈥?/span>
        img_addrs = find_imgs(page_url)
        save_imgs(folder, img_addrs)

if __name__ == 鈥?/span>__main__鈥?/span>:
    download_mm()

 

以上是关于python鐖厧铔嬪瀛愬浘的主要内容,如果未能解决你的问题,请参考以下文章

Python鐖彇鍙ュ瓙杩?鑾庡+姣斾簹璇綍

python鐖创鍚ф暟鎹瓨mysql瀹屾暣浠g爜妗堜緥

python 鐖彇涔屼簯鎵€鏈夊巶鍟嗗悕瀛楋紝url锛屾紡娲炴€绘暟 骞跺瓨鍏ユ暟鎹簱

Python鐖櫕瀹炶返 鈥斺€?3.鍒╃敤鐖櫕鎻愬彇杩斿洖鍊硷紝妯℃嫙鏈夐亾璇嶅吀鎺ュ彛

銆岀71鏈熴€? 鐖櫕鎶€鏈?鎶撳寘

SQL 4