Python-爬虫-懒得写的部分
Posted hare1925
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Python-爬虫-懒得写的部分相关的知识,希望对你有一定的参考价值。
requests
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import requests
import re
url = ""
hd = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0",}
px = {"http":"http://127.0.0.1:8888"} # 代理
rst = requests.get(url, headers = hd)
title = re.compile("<title>(.*?)</title>", re.S).findall(rst.text)
urllib
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import urllib
import urllib.request
import re
import random
# 浏览器伪装
opener = urllib.request.build_opener()
UA = ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Safari/537.36")
opener.addheaders = [UA]
urllib.request.install_opener(opener)
url = ""
data = urllib.request.urlopen(url).read().decode(‘utf-8‘, ‘ignore‘)
# 构建用户代理池
uapools=[
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Safari/537.36"
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0"
""
]
def UA():
opener = urllib.request.build_opener()
thisua = random.choice(uapools)
ua = ("User-Agent", thisua)
opener.add_handlers = [ua]
urllib.request.install_opener(opener)
# print("当前使用UA:" + str(thisua))
for i in range(0, 10):
UA()
data = urllib.request.urlopen(url).read().decode(‘utf-8‘, ‘ignore‘)
范例
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import urllib.request
import re
import random
import time
uapools = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Safari/537.36"
]
def UA():
opener = urllib.request.build_opener()
thisua = random.choice(uapools)
ua = ("User-Agent", thisua)
opener.addheaders = [ua]
urllib.request.install_opener(opener)
print("当前使用UA: " + str(thisua))
for i in range(0, 35): # 总页数
UA()
thisurl = "" # 构建 url
try:
data = urllib.request.urlopen(thisurl).read().decode(‘utf-8‘, ‘ignore‘)
pat = ‘‘ # 构建正则
rst = re.compile(pat, re.S).findall(data)
for j in range(0, len(rst)): # 打印
print(rst[j])
print("------")
except Exception as err:
pass
以上是关于Python-爬虫-懒得写的部分的主要内容,如果未能解决你的问题,请参考以下文章