bevol APP 平台爬取
Posted Rannie
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了bevol APP 平台爬取相关的知识,希望对你有一定的参考价值。
bevol 平台爬取
import requests
import time
import os
import openpyxl
class Base_params:
@property
def headers(self):
"""
headers
:return:headers
"""
return {
# "User-Agent": ‘okhttp/3/10.0‘
"User-Agent": ‘Dalvik/2.1.0 (Linux; U; android 7.1.2; Redmi 2A MIUI/V9.6.2.0.NCKCNFD) Resolution/720*1280 Version/5.45.0 Build/5450095 Device/(Xiaomi;Redmi 2A‘,
}
@property
def now_time_stamp(self):
"""
:return:当前时间戳
"""
ntime = str(round(time.time(), 3))
return ntime.replace(‘.‘, ‘‘)
# 首页排行榜
class Spider_beovl(Base_params):
def __init__(self, r, pager, dataCategoryListsId, sheet):
self.pager = pager
self.dataCategoryListsId = dataCategoryListsId
self.sheet = sheet
self.good_list = []
self.excel_name = ‘bevol.xlsx‘
self.r = r
@property
def payload(self):
"""
基本配置
:return:配置参数
"""
return {
‘uid‘: ‘10547754‘,
‘uuid‘: ‘866174733832030‘,
‘model‘: ‘OPPO R11 Plus‘,
‘o‘: ‘Android‘,
‘pageSize‘: 20,
‘req_timestamp‘: self.now_time_stamp,
‘pager‘: self.pager,
‘dataCategoryListsId‘: self.dataCategoryListsId
}
def synthesize_top_data(self):
"""
综合榜
:param url: 综合榜 url
:return: 爬取的数据
"""
url = ‘https://api.bevol.com/data_category/list/‘
resp = requests.post(url, headers=self.headers, data=self.payload).json()
try:
for i in range(20):
title = resp[‘result‘][‘list‘][i][‘title‘]
grade = resp[‘result‘][‘list‘][i][‘grade‘]
like_num = resp[‘result‘][‘list‘][i][‘likeNum‘]
not_like_num = resp[‘result‘][‘list‘][i][‘notLikeNum‘]
collection_num = resp[‘result‘][‘list‘][i][‘collectionNum‘]
comment_num = resp[‘result‘][‘list‘][i][‘commentNum‘]
price_capacity = resp[‘result‘][‘list‘][i][‘standardPriceCapacity‘]
image_url = resp[‘result‘][‘list‘][i][‘imageSrc‘]
goods_info = {
f‘{title}‘:[f‘{grade}‘, f‘{like_num}‘, f‘{not_like_num}‘, f‘{collection_num}‘, f‘{comment_num}‘, f‘{price_capacity}‘, f‘{image_url}‘]
}
self.good_list.append(goods_info)
except Exception as e:
print(‘已到底‘)
return self.good_list
def save_excel(self):
"""
存入Excel
:return:是否存入成功
"""
if not os.path.exists(self.excel_name):
self.create_excel()
workbook = openpyxl.load_workbook(self.excel_name)
wb = workbook[self.sheet]
wb[‘A1‘] = ‘名称‘
wb[‘B1‘] = ‘评分‘
wb[‘C1‘] = ‘喜欢数‘
wb[‘D1‘] = ‘不喜欢数‘
wb[‘E1‘] = ‘收藏数‘
wb[‘F1‘] = ‘评论数‘
wb[‘G1‘] = ‘容量/价格‘
wb[‘H1‘] = ‘图片链接‘
for i in self.good_list:
self.r += 1
for k, v in i.items():
wb[f‘A{self.r}‘] = k
wb[f‘B{self.r}‘] = v[0]
wb[f‘C{self.r}‘] = v[1]
wb[f‘D{self.r}‘] = v[2]
wb[f‘E{self.r}‘] = v[3]
wb[f‘F{self.r}‘] = v[4]
wb[f‘G{self.r}‘] = v[5]
wb[f‘H{self.r}‘] = v[6]
workbook.save(self.excel_name)
def create_excel(self):
"""
创建表格
:param sheet: sheet 名称
:return:
"""
workbook = openpyxl.Workbook()
ws = workbook.active
ws.title = self.sheet
ws[‘A1‘] = ‘名称‘
ws[‘B1‘] = ‘评分‘
ws[‘C1‘] = ‘喜欢数‘
ws[‘D1‘] = ‘不喜欢数‘
ws[‘E1‘] = ‘收藏数‘
ws[‘F1‘] = ‘评论数‘
ws[‘G1‘] = ‘容量/价格‘
ws[‘H1‘] = ‘图片链接‘
workbook.save(self.excel_name)
# 第几行 第几页 categoryId 什么榜
# run = Spider_beovl(41, 2, 26, ‘新品榜‘)
# run.synthesize_top_data()
# run.save_excel()
# 首页推荐
class Home_recommend():
def __init__(self, sheet, excel_name):
self.sheet = sheet
self.excel_name = excel_name
self.good_list = []
@property
def headers(self):
"""
headers
:return:headers
"""
return {
"User-Agent": ‘Dalvik/2.1.0 (Linux; U; Android 7.1.2; Redmi 2A MIUI/V9.6.2.0.NCKCNFD) Resolution/720*1280 Version/5.45.0 Build/5450095 Device/(Xiaomi;Redmi 2A‘,
}
@property
def payload(self):
return {
‘uid‘: ‘10547754‘,
‘uuid‘: ‘866174733832030‘,
‘imei‘: ‘1cae65c555f22ad73561b62b3451ede8‘,
‘model‘: ‘OPPO R11 Plus‘,
‘o‘: ‘Android‘,
‘sys_v‘:‘5.1.1‘,
‘v‘: ‘4.1.1‘,
‘channel‘: ‘yingyongbao‘,
‘opentime‘: ‘1594864081‘,
‘req_timestamp‘: 1594864163324,
‘pager‘: ‘1‘,
‘pageNum‘: ‘1‘,
‘signature‘: ‘2f56f216eea8f12848d5373052737fb7‘
}
def home_recommend_crawl(self):
url = ‘https://api.bevol.com/auto/data2‘
resp = requests.post(url, headers=self.headers, data=self.payload).json()
# print(resp)
for i in range(40):
try:
title = resp[‘result‘][‘entityMap‘][‘article‘][i][‘title‘]
article_url = resp[‘result‘][‘entityMap‘][‘article‘][i][‘h5url‘]
skinname = resp[‘result‘][‘entityMap‘][‘article‘][i][‘skinName‘]
try:
claim = resp[‘result‘][‘entityMap‘][‘goods‘][i][‘entityTag‘][0][‘name‘]
country = resp[‘result‘][‘entityMap‘][‘goods‘][i][‘entityTag‘][1][‘name‘]
except Exception as e:
claim = ‘‘
country = ‘‘
entity_image_src = resp[‘result‘][‘entityMap‘][‘article‘][i][‘entityImage‘]
content = resp[‘result‘][‘entityMap‘][‘comment‘][i][‘article‘]
goods_info = {
f‘{title}‘: [f‘{article_url}‘]
}
self.good_list.append(goods_info)
except Exception as e:
print(e)
def save_excel(self):
"""
存入Excel
:return:是否存入成功
"""
r = 1
print(self.good_list)
if not os.path.exists(self.excel_name):
self.create_excel()
workbook = openpyxl.load_workbook(self.excel_name)
wb = workbook[self.sheet]
wb[‘A1‘] = ‘名称‘
wb[‘B1‘] = ‘文章链接‘
wb[‘C1‘] = ‘皮肤特色‘
wb[‘D1‘] = ‘图片链接‘
wb[‘E1‘] = ‘评论内容‘
wb[‘F1‘] = ‘图片链接‘
for i in self.good_list:
r += 1
for k, v in i.items():
ws = workbook.active
ws.title = self.sheet
wb[f‘A{r}‘] = k
wb[f‘B{r}‘] = v[0]
wb[f‘C{r}‘] = v[1]
wb[f‘D{r}‘] = v[2]
wb[f‘E{r}‘] = v[3]
wb[f‘F{r}‘] = v[4]
workbook.save(self.excel_name)
def create_excel(self):
"""
创建表格
:param sheet: sheet 名称
:return:
"""
workbook = openpyxl.Workbook()
ws = workbook.active
ws.title = self.sheet
ws[‘A1‘] = ‘名称‘
ws[‘B1‘] = ‘文章链接‘
ws[‘C1‘] = ‘皮肤特色‘
ws[‘D1‘] = ‘图片链接‘
ws[‘E1‘] = ‘评论内容‘
ws[‘F1‘] = ‘图片链接‘
workbook.save(self.excel_name)
# home_rec = Home_recommend(‘主页文章‘, ‘主页推荐.xlsx‘)
# home_rec.home_recommend_crawl()
# home_rec.save_excel()
# 优惠低价中心
class Low_price_centre(Base_params):
def __init__(self, sheet, excel_name):
self.sheet = sheet
self.excel_name = excel_name
self.good_list = []
def low_price(self):
url = ‘https://api.bevol.com/cps/cpsWelfareList?uid=10547754&uuid=866174733832030&imei=1cae65c555f22ad73561b62b3451ede8&model=OPPO%20R11%20Plus&sys_v=5.1.1&v=4.1.1&o=Android&channel=yingyongbao&opentime=1594893672&req_timestamp=1594896096406&pager=3&pageSize=20&signature=1e9f9733dd0f9e92a19404c07b5d5773‘
resp = requests.post(url, headers=self.headers).json()
for i in range(20):
try:
title = resp[‘result‘][i][‘title‘]
grade = resp[‘result‘][i][‘grade‘]
reservePrice = resp[‘result‘][i][‘reservePrice‘]
discountPrice = resp[‘result‘][i][‘discountPrice‘]
channelLink = resp[‘result‘][i][‘channelLink‘]
goodsVolume = resp[‘result‘][i][‘goodsVolume‘]
content = resp[‘result‘][i][‘content‘]
goods_info = {
f‘{title}‘: [f‘{grade}‘, f‘{reservePrice}‘, f‘{discountPrice}‘, f‘{channelLink}‘, f‘{goodsVolume}‘,
f‘{content}‘]
}
self.good_list.append(goods_info)
print(title, grade)
except Exception as e:
pass
def save_excel(self):
"""
存入Excel
:return:是否存入成功
"""
r = 1
print(self.good_list)
if not os.path.exists(self.excel_name):
self.create_excel()
workbook = openpyxl.load_workbook(self.excel_name)
wb = workbook[self.sheet]
wb[‘A1‘] = ‘名称‘
wb[‘B1‘] = ‘评分‘
wb[‘C1‘] = ‘预计价格‘
wb[‘D1‘] = ‘打折价格‘
wb[‘E1‘] = ‘平台链接‘
wb[‘F1‘] = ‘商品体积‘
wb[‘G1‘] = ‘商品介绍‘
for i in self.good_list:
r += 1
for k, v in i.items():
ws = workbook.active
ws.title = self.sheet
wb[f‘A{r}‘] = k
wb[f‘B{r}‘] = v[0]
wb[f‘C{r}‘] = v[1]
wb[f‘D{r}‘] = v[2]
wb[f‘E{r}‘] = v[3]
wb[f‘F{r}‘] = v[4]
wb[f‘G{r}‘] = v[5]
workbook.save(self.excel_name)
def create_excel(self):
"""
创建表格
:param sheet: sheet 名称
:return:
"""
workbook = openpyxl.Workbook()
ws = workbook.active
ws.title = self.sheet
ws[‘A1‘] = ‘名称‘
ws[‘B1‘] = ‘评分‘
ws[‘C1‘] = ‘预计价格‘
ws[‘D1‘] = ‘打折价格‘
ws[‘E1‘] = ‘平台链接‘
ws[‘F1‘] = ‘商品体积‘
ws[‘G1‘] = ‘商品介绍‘
workbook.save(self.excel_name)
run = Low_price_centre(‘有‘, ‘优惠中心.xlsx‘)
run.low_price()
run.save_excel()
以上是关于bevol APP 平台爬取的主要内容,如果未能解决你的问题,请参考以下文章
如何用30行代码爬取Google Play 100万个App的数据