bevol APP 平台爬取

Posted Rannie

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了bevol APP 平台爬取相关的知识,希望对你有一定的参考价值。

bevol 平台爬取

import requests
import time
import os
import openpyxl

class Base_params:

    @property
    def headers(self):
        """
        headers
        :return:headers
        """
        return {
            # "User-Agent": ‘okhttp/3/10.0‘
            "User-Agent": ‘Dalvik/2.1.0 (Linux; U; android 7.1.2; Redmi 2A MIUI/V9.6.2.0.NCKCNFD) Resolution/720*1280 Version/5.45.0 Build/5450095 Device/(Xiaomi;Redmi 2A‘,
        }

    @property
    def now_time_stamp(self):
        """
        :return:当前时间戳
        """
        ntime = str(round(time.time(), 3))
        return ntime.replace(‘.‘, ‘‘)


# 首页排行榜
class Spider_beovl(Base_params):

    def __init__(self, r, pager, dataCategoryListsId, sheet):
        self.pager = pager
        self.dataCategoryListsId = dataCategoryListsId
        self.sheet = sheet
        self.good_list = []
        self.excel_name = ‘bevol.xlsx‘
        self.r = r

    @property
    def payload(self):
        """
        基本配置
        :return:配置参数
        """
        return {
            ‘uid‘: ‘10547754‘,
            ‘uuid‘: ‘866174733832030‘,
            ‘model‘: ‘OPPO R11 Plus‘,
            ‘o‘: ‘Android‘,
            ‘pageSize‘: 20,
            ‘req_timestamp‘: self.now_time_stamp,
            ‘pager‘: self.pager,
            ‘dataCategoryListsId‘: self.dataCategoryListsId

        }

    def synthesize_top_data(self):
        """
        综合榜
        :param url: 综合榜 url
        :return: 爬取的数据
        """

        url = ‘https://api.bevol.com/data_category/list/‘
        resp = requests.post(url, headers=self.headers, data=self.payload).json()
        try:
            for i in range(20):
                title = resp[‘result‘][‘list‘][i][‘title‘]
                grade = resp[‘result‘][‘list‘][i][‘grade‘]
                like_num = resp[‘result‘][‘list‘][i][‘likeNum‘]
                not_like_num = resp[‘result‘][‘list‘][i][‘notLikeNum‘]
                collection_num = resp[‘result‘][‘list‘][i][‘collectionNum‘]
                comment_num = resp[‘result‘][‘list‘][i][‘commentNum‘]
                price_capacity = resp[‘result‘][‘list‘][i][‘standardPriceCapacity‘]
                image_url = resp[‘result‘][‘list‘][i][‘imageSrc‘]
                goods_info = {
                    f‘{title}‘:[f‘{grade}‘, f‘{like_num}‘, f‘{not_like_num}‘, f‘{collection_num}‘, f‘{comment_num}‘, f‘{price_capacity}‘, f‘{image_url}‘]
                }
                self.good_list.append(goods_info)
        except Exception as e:
            print(‘已到底‘)
        return self.good_list

    def save_excel(self):
        """
        存入Excel
        :return:是否存入成功
        """
        if not os.path.exists(self.excel_name):
            self.create_excel()
        workbook = openpyxl.load_workbook(self.excel_name)
        wb = workbook[self.sheet]
        wb[‘A1‘] = ‘名称‘
        wb[‘B1‘] = ‘评分‘
        wb[‘C1‘] = ‘喜欢数‘
        wb[‘D1‘] = ‘不喜欢数‘
        wb[‘E1‘] = ‘收藏数‘
        wb[‘F1‘] = ‘评论数‘
        wb[‘G1‘] = ‘容量/价格‘
        wb[‘H1‘] = ‘图片链接‘
        for i in self.good_list:
            self.r += 1
            for k, v in i.items():
                wb[f‘A{self.r}‘] = k
                wb[f‘B{self.r}‘] = v[0]
                wb[f‘C{self.r}‘] = v[1]
                wb[f‘D{self.r}‘] = v[2]
                wb[f‘E{self.r}‘] = v[3]
                wb[f‘F{self.r}‘] = v[4]
                wb[f‘G{self.r}‘] = v[5]
                wb[f‘H{self.r}‘] = v[6]
        workbook.save(self.excel_name)


    def create_excel(self):
        """
        创建表格
        :param sheet: sheet 名称
        :return:
        """
        workbook = openpyxl.Workbook()
        ws = workbook.active
        ws.title = self.sheet
        ws[‘A1‘] = ‘名称‘
        ws[‘B1‘] = ‘评分‘
        ws[‘C1‘] = ‘喜欢数‘
        ws[‘D1‘] = ‘不喜欢数‘
        ws[‘E1‘] = ‘收藏数‘
        ws[‘F1‘] = ‘评论数‘
        ws[‘G1‘] = ‘容量/价格‘
        ws[‘H1‘] = ‘图片链接‘
        workbook.save(self.excel_name)


#           第几行   第几页    categoryId   什么榜
# run = Spider_beovl(41, 2, 26, ‘新品榜‘)
# run.synthesize_top_data()
# run.save_excel()


# 首页推荐
class Home_recommend():

    def __init__(self, sheet, excel_name):
        self.sheet = sheet
        self.excel_name = excel_name
        self.good_list = []

    @property
    def headers(self):
        """
        headers
        :return:headers
        """
        return {
            "User-Agent": ‘Dalvik/2.1.0 (Linux; U; Android 7.1.2; Redmi 2A MIUI/V9.6.2.0.NCKCNFD) Resolution/720*1280 Version/5.45.0 Build/5450095 Device/(Xiaomi;Redmi 2A‘,
        }

    @property
    def payload(self):
        return {
            ‘uid‘: ‘10547754‘,
            ‘uuid‘: ‘866174733832030‘,
            ‘imei‘: ‘1cae65c555f22ad73561b62b3451ede8‘,
            ‘model‘: ‘OPPO R11 Plus‘,
            ‘o‘: ‘Android‘,
            ‘sys_v‘:‘5.1.1‘,
            ‘v‘: ‘4.1.1‘,
            ‘channel‘: ‘yingyongbao‘,
            ‘opentime‘: ‘1594864081‘,
            ‘req_timestamp‘: 1594864163324,
            ‘pager‘: ‘1‘,
            ‘pageNum‘: ‘1‘,
            ‘signature‘: ‘2f56f216eea8f12848d5373052737fb7‘

        }

    def home_recommend_crawl(self):
        url = ‘https://api.bevol.com/auto/data2‘
        resp = requests.post(url, headers=self.headers, data=self.payload).json()
        # print(resp)

        for i in range(40):
            try:
                title = resp[‘result‘][‘entityMap‘][‘article‘][i][‘title‘]
                article_url = resp[‘result‘][‘entityMap‘][‘article‘][i][‘h5url‘]
                skinname = resp[‘result‘][‘entityMap‘][‘article‘][i][‘skinName‘]
                try:
                    claim = resp[‘result‘][‘entityMap‘][‘goods‘][i][‘entityTag‘][0][‘name‘]
                    country = resp[‘result‘][‘entityMap‘][‘goods‘][i][‘entityTag‘][1][‘name‘]
                except Exception as e:
                    claim = ‘‘
                    country = ‘‘
                entity_image_src = resp[‘result‘][‘entityMap‘][‘article‘][i][‘entityImage‘]
                content = resp[‘result‘][‘entityMap‘][‘comment‘][i][‘article‘]
                goods_info = {
                    f‘{title}‘: [f‘{article_url}‘]
                }
                self.good_list.append(goods_info)
            except Exception as e:
                print(e)

    def save_excel(self):
        """
        存入Excel
        :return:是否存入成功
        """
        r = 1
        print(self.good_list)
        if not os.path.exists(self.excel_name):
            self.create_excel()
        workbook = openpyxl.load_workbook(self.excel_name)
        wb = workbook[self.sheet]
        wb[‘A1‘] = ‘名称‘
        wb[‘B1‘] = ‘文章链接‘
        wb[‘C1‘] = ‘皮肤特色‘
        wb[‘D1‘] = ‘图片链接‘
        wb[‘E1‘] = ‘评论内容‘
        wb[‘F1‘] = ‘图片链接‘
        for i in self.good_list:
            r += 1
            for k, v in i.items():
                ws = workbook.active
                ws.title = self.sheet
                wb[f‘A{r}‘] = k
                wb[f‘B{r}‘] = v[0]
                wb[f‘C{r}‘] = v[1]
                wb[f‘D{r}‘] = v[2]
                wb[f‘E{r}‘] = v[3]
                wb[f‘F{r}‘] = v[4]
        workbook.save(self.excel_name)

    def create_excel(self):
        """
        创建表格
        :param sheet: sheet 名称
        :return:
        """
        workbook = openpyxl.Workbook()
        ws = workbook.active
        ws.title = self.sheet
        ws[‘A1‘] = ‘名称‘
        ws[‘B1‘] = ‘文章链接‘
        ws[‘C1‘] = ‘皮肤特色‘
        ws[‘D1‘] = ‘图片链接‘
        ws[‘E1‘] = ‘评论内容‘
        ws[‘F1‘] = ‘图片链接‘
        workbook.save(self.excel_name)


# home_rec = Home_recommend(‘主页文章‘, ‘主页推荐.xlsx‘)
# home_rec.home_recommend_crawl()
# home_rec.save_excel()


# 优惠低价中心
class Low_price_centre(Base_params):

    def __init__(self, sheet, excel_name):
        self.sheet = sheet
        self.excel_name = excel_name
        self.good_list = []


    def low_price(self):
        url = ‘https://api.bevol.com/cps/cpsWelfareList?uid=10547754&uuid=866174733832030&imei=1cae65c555f22ad73561b62b3451ede8&model=OPPO%20R11%20Plus&sys_v=5.1.1&v=4.1.1&o=Android&channel=yingyongbao&opentime=1594893672&req_timestamp=1594896096406&pager=3&pageSize=20&signature=1e9f9733dd0f9e92a19404c07b5d5773‘
        resp = requests.post(url, headers=self.headers).json()
        for i in range(20):
            try:
                title = resp[‘result‘][i][‘title‘]
                grade = resp[‘result‘][i][‘grade‘]
                reservePrice = resp[‘result‘][i][‘reservePrice‘]
                discountPrice = resp[‘result‘][i][‘discountPrice‘]
                channelLink = resp[‘result‘][i][‘channelLink‘]
                goodsVolume = resp[‘result‘][i][‘goodsVolume‘]
                content = resp[‘result‘][i][‘content‘]
                goods_info = {
                    f‘{title}‘: [f‘{grade}‘, f‘{reservePrice}‘, f‘{discountPrice}‘, f‘{channelLink}‘, f‘{goodsVolume}‘,
                                 f‘{content}‘]
                }
                self.good_list.append(goods_info)
                print(title, grade)
            except Exception as e:
                pass

    def save_excel(self):
        """
        存入Excel
        :return:是否存入成功
        """
        r = 1
        print(self.good_list)
        if not os.path.exists(self.excel_name):
            self.create_excel()
        workbook = openpyxl.load_workbook(self.excel_name)
        wb = workbook[self.sheet]
        wb[‘A1‘] = ‘名称‘
        wb[‘B1‘] = ‘评分‘
        wb[‘C1‘] = ‘预计价格‘
        wb[‘D1‘] = ‘打折价格‘
        wb[‘E1‘] = ‘平台链接‘
        wb[‘F1‘] = ‘商品体积‘
        wb[‘G1‘] = ‘商品介绍‘
        for i in self.good_list:
            r += 1
            for k, v in i.items():
                ws = workbook.active
                ws.title = self.sheet
                wb[f‘A{r}‘] = k
                wb[f‘B{r}‘] = v[0]
                wb[f‘C{r}‘] = v[1]
                wb[f‘D{r}‘] = v[2]
                wb[f‘E{r}‘] = v[3]
                wb[f‘F{r}‘] = v[4]
                wb[f‘G{r}‘] = v[5]
        workbook.save(self.excel_name)

    def create_excel(self):
        """
        创建表格
        :param sheet: sheet 名称
        :return:
        """
        workbook = openpyxl.Workbook()
        ws = workbook.active
        ws.title = self.sheet
        ws[‘A1‘] = ‘名称‘
        ws[‘B1‘] = ‘评分‘
        ws[‘C1‘] = ‘预计价格‘
        ws[‘D1‘] = ‘打折价格‘
        ws[‘E1‘] = ‘平台链接‘
        ws[‘F1‘] = ‘商品体积‘
        ws[‘G1‘] = ‘商品介绍‘
        workbook.save(self.excel_name)

run = Low_price_centre(‘有‘, ‘优惠中心.xlsx‘)
run.low_price()
run.save_excel()





以上是关于bevol APP 平台爬取的主要内容,如果未能解决你的问题,请参考以下文章

scrapy按顺序启动多个爬虫代码片段(python3)

爬虫学习 09.移动端数据爬取

scrapy主动退出爬虫的代码片段(python3)

如何用30行代码爬取Google Play 100万个App的数据

如何用30行代码爬取Google Play 100万个App的数据

python小白学习记录 多线程爬取ts片段