#yyds干货盘点#Python实战案例,requests模块,Python实现获取动态图表
Posted 逻辑教育
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了#yyds干货盘点#Python实战案例,requests模块,Python实现获取动态图表相关的知识,希望对你有一定的参考价值。
前言
利用Python实现获取动态图表,废话不多说~
让我们愉快地开始吧~
开发工具
Python版本: 3.6.4
相关模块:
re模块;
requests模块;
urllib模块;
pandas模块;
以及一些Python自带的模块。
环境搭建
安装Python并添加到环境变量,pip安装需要的相关模块即可。
看一下B站2019年「数据可视化」版块的情况,第一个视频超2百万的播放量,4万+的弹幕
百度指数
获取百度指数,首先需要登陆你的百度账号
以关键词「王者荣耀」为例,时间自定义为2020-10-01~2020-10-10
通过开发者工具,我们就能看到曲线图的数据接口
然而一看请求得到的结果,发现并没有数据,原因是这里使用了JS加密
找到解决方法,成功实现爬取,代码实现
import time
import json
import execjs
import datetime
import requests
from urllib.parse import urlencode
def get_data(keywords, startDate, endDate, area):
"""
获取加密的参数数据
"""
# data_url = "http://index.baidu.com/api/SearchApi/index?area=0&word=[[%7B%22name%22:%22%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80%22,%22wordType%22:1%7D]]&startDate=2020-10-01&endDate=2020-10-10"
params =
word: json.dumps([[name: keyword, wordType: 1] for keyword in keywords]),
startDate: startDate,
endDate: endDate,
area: area
data_url = http://index.baidu.com/api/SearchApi/index? + urlencode(params)
# print(data_url)
headers =
# 复制登录后的cookie
"Cookie": 你的cookie,
"Referer": "http://index.baidu.com/v2/main/index.html",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
# 获取data和uniqid
res = requests.get(url=data_url, headers=headers).json()
data = res["data"]["userIndexes"][0]["all"]["data"]
uniqid = res["data"]["uniqid"]
# 获取js函数中的参数t = "ev-fxk9T8V1lwAL6,51348+.9270-%"
t_url = "http://index.baidu.com/Interface/ptbk?uniqid=".format(uniqid)
rep = requests.get(url=t_url, headers=headers).json()
t = rep["data"]
return "data": data, "t": t
def get_search_index(word, startDate, endDate, area):
"""
获取最终数据
"""
word = word
startDate = startDate
endDate = endDate
# 调用get_data获取data和uniqid
res = get_data(word, startDate, endDate, area)
e = res["data"]
t = res["t"]
# 读取js文件
with open(parsing_data_function.js, encoding=utf-8) as f:
js = f.read()
# 通过compile命令转成一个js对象
docjs = execjs.compile(js)
# 调用function方法,得到指数数值
res = docjs.call(decrypt, t, e)
# print(res)
return res
def get_date_list(begin_date, end_date):
"""
获取时间列表
"""
dates = []
dt = datetime.datetime.strptime(begin_date, "%Y-%m-%d")
date = begin_date[:]
while date <= end_date:
dates.append(date)
dt += datetime.timedelta(days=1)
date = dt.strftime("%Y-%m-%d")
return dates
def get_area():
areas = "901": "山东", "902": "贵州", "903": "江西", "904": "重庆", "905": "内蒙古", "906": "湖北", "907": "辽宁", "908": "湖南", "909": "福建", "910": "上海", "911": "北京", "912": "广西", "913": "广东", "914": "四川", "915": "云南", "916": "江苏", "917": "浙江", "918": "青海", "919": "宁夏", "920": "河北", "921": "黑龙江", "922": "吉林", "923": "天津", "924": "陕西", "925": "甘肃", "926": "新疆", "927": "河南", "928": "安徽", "929": "山西", "930": "海南", "931": "台湾", "932": "西藏", "933": "香港", "934": "澳门"
for value in areas.keys():
try:
word = [王者荣耀]
time.sleep(1)
startDate = 2020-10-01
endDate = 2020-10-10
area = value
res = get_search_index(word, startDate, endDate, area)
result = res.split(,)
dates = get_date_list(startDate, endDate)
for num, date in zip(result, dates):
print(areas[value], num, date)
with open(area.csv, a+, encoding=utf-8) as f:
f.write(areas[value] + , + str(num) + , + date + \\n)
except:
pass
def get_word():
words = [诸葛大力, 张伟, 胡一菲, 吕子乔, 陈美嘉, 赵海棠, 咖喱酱, 曾小贤, 秦羽墨]
for word in words:
try:
time.sleep(2)
startDate = 2020-10-01
endDate = 2020-10-10
area = 0
res = get_search_index(word, startDate, endDate, area)
result = res.split(,)
dates = get_date_list(startDate, endDate)
for num, date in zip(result, dates):
print(word, num, date)
with open(word.csv, a+, encoding=utf-8) as f:
f.write(word + , + str(num) + , + date + \\n)
except:
pass
get_area()
get_word()
得到的CSV文件结果如下,有两种形式的数据
一种是多个关键词每日指数数据,另一种是一个关键词各省市每日指数数据
有了数据就可以用Python制作动图
import pandas as pd
import bar_chart_race as bcr
# 读取数据
# df = pd.read_csv(word.csv, encoding=utf-8, header=None, names=[name, number, day])
df = pd.read_csv(area.csv, encoding=utf-8, header=None, names=[name, number, day])
# 数据处理,数据透视表
df_result = pd.pivot_table(df, values=number, index=[day], columns=[name], fill_value=0)
# 生成GIF
# bcr.bar_chart_race(df_result, filename=word.gif, title=爱情公寓5演职人员热度排行)
bcr.bar_chart_race(df_result, filename=area.gif, title=国内各省市王者荣耀热度排行)
5行Python代码,看看实现的效果
微博指数
百度搜索新浪的微博指数,打开网站一看,发现网页版无法使用
我们只需打开开发者工具,将你的浏览器模拟为手机端,刷新网页即可
可以看到,微指数的界面出来了
添加关键词,查看指数的数据接口
请求是Post方法,并且不需要登陆微博账号
import re
import time
import json
import requests
import datetime
# 请求头信息
headers = """accept: application/json
accept-encoding: gzip, deflate, br
accept-language: zh-CN,zh;q=0.9
content-length: 50
content-type: application/x-www-form-urlencoded
cookie: 你的cookie
origin: https://data.weibo.com
referer: https://data.weibo.com/index/newindex?visit_type=trend&wid=1011224685661
sec-fetch-mode: cors
sec-fetch-site: same-origin
user-agent: Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1
x-requested-with: XMLHttpRequest"""
# 将请求头字符串转化为字典
headers = dict([line.split(": ",1) for line in headers.split("\\n")])
print(headers)
# 数据接口
url = https://data.weibo.com/index/ajax/newindex/getchartdata
# 获取时间列表
def get_date_list(begin_date, end_date):
dates = []
dt = datetime.datetime.strptime(begin_date, "%Y-%m-%d")
date = begin_date[:]
while date <= end_date:
dates.append(date)
dt += datetime.timedelta(days=1)
date = dt.strftime("%Y-%m-%d")
return dates
# 相关信息
names = [汤唯, 朱亚文, 邓家佳, 乔振宇, 王学圻, 张艺兴, 俞灏明, 吴越, 梁冠华, 李昕亮, 苏可, 孙骁骁, 赵韩樱子, 孙耀琦, 魏巍]
# 获取微指数数据
for name in names:
try:
# 获取关键词ID
url_id = https://data.weibo.com/index/ajax/newindex/searchword
data_id =
word: name
html_id = requests.post(url=url_id, data=data_id, headers=headers)
pattern = re.compile(rli wid=\\\\\\"(.*?)\\\\\\" word)
id = pattern.findall(html_id.text)[0]
# 接口参数
data =
wid: id,
dateGroup: 1month
time.sleep(2)
# 请求数据
html = requests.post(url=url, data=data, headers=headers)
result = json.loads(html.text)
# 处理数据
if result[data]:
values = result[data][0][trend][s]
startDate = 2019-01-01
endDate = 2020-01-01
dates = result[data][0][trend][x]
# 保存数据
for value, date in zip(values, dates):
print(name, value, date)
with open(weibo.csv, a+, encoding=utf-8) as f:
f.write(name + , + str(value) + , + date + \\n)
except:
pass
获取到的信息
也来生成一个动态图表
import pandas as pd
import bar_chart_race as bcr
# 读取数据
df = pd.read_csv(weibo.csv, encoding=utf-8, header=None, names=[name, number, day])
# 数据处理,数据透视表
df_result = pd.pivot_table(df, values=number, index=[day], columns=[name], fill_value=0)
# print(df_result[:10])
# 生成GIF
bcr.bar_chart_race(df_result[:10], filename=weibo.gif, title=大明风华演职人员热度排行)
本文源码详见个人主页简介获取
结果展示
以上是关于#yyds干货盘点#Python实战案例,requests模块,Python实现获取动态图表的主要内容,如果未能解决你的问题,请参考以下文章
#yyds干货盘点#Python实战案例,CV2模块,Python实现抖音字符视频
#yyds干货盘点#Python实战案例,PIL模块,Python实现自动化生成倒计时图片
快速入门vue,含实战案例,“建议收藏”,用到省的百度了#yyds干货盘点#
rsync企业真实项目备份案例实战(需求收集--服务器配置---客户端配置---报警机制---数据校验---邮件告警) #yyds干货盘点#