Python requests多页面爬取案例
Posted ggg566
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Python requests多页面爬取案例 相关的知识,希望对你有一定的参考价值。
原文: http://blog.gqylpy.com/gqy/321
import requests
from fake_useragent import UserAgent # 随机ua库
class Boring():
def __init__(self, page_scope=(4, 7)):
"""
:param page_scope: 页码范围
"""
self.page_scope = page_scope
self.all_id = self.get_all_company_id()
self.enterprise_info = self.get_all_company_info()
self.show_enterprise_info()
@property
def firefox_ua(self):
"""返回随机火狐UA头"""
ua = UserAgent(use_cache_server=False)
return 'User-Agent': ua.Firefox # ua.Firefox:随机生成火狐浏览器UA
def get_all_company_id(self):
"""
将返回指定页码数内的公司的id
:param start_page: 起始页码
:param end_page: 结束页码
"""
all_id =
url = 'http://125.35.6.84:81/xk/itownet/portalAction.do?method=getXkzsList' # 此连接见图1
for page in range(self.page_scope[0], self.page_scope[1] + 1):
json_text = requests.post(url, data=self.post_data(page), headers=self.firefox_ua).json()
current_page_all_id = [dict['ID'] for dict in json_text['list']]
all_id.setdefault(page, current_page_all_id)
return all_id
def get_all_company_info(self):
"""开始获取公司信息"""
url = 'http://125.35.6.84:81/xk/itownet/portalAction.do?method=getXkzsById' # 见图3
enterprise_info =
for page in self.all_id:
for id in self.all_id.get(page):
response = requests.post(url, data='id': id, headers=self.firefox_ua) # data='id': id:见图4
if response.headers['Content-Type'] == 'application/json;charset=UTF-8':
json_text = response.json()
enterprise_info.setdefault(json_text.get('businessPerson'), json_text.get('epsName'))
# 这里仅获取企业负责人和企业名
return enterprise_info
def show_enterprise_info(self):
[print(k, v) for k, v in self.enterprise_info.items()]
def post_data(self, page):
"""获取公司列表时要提交的form"""
return
'on': 'true',
'page': page,
'pageSize': '15',
'productName': '',
'conditionType': '1',
'applyname': '',
'applysn': '',
# 见图2
# go
Boring()
原文: http://blog.gqylpy.com/gqy/321
以上是关于Python requests多页面爬取案例 的主要内容,如果未能解决你的问题,请参考以下文章
Python requests多页面爬取案例 -- 2019-08-07 10:34:05
Python requests多页面爬取案例 -- 2019-08-08 20:39:58
[实战演练]python3使用requests模块爬取页面内容