基于requests+pyecharts+MySQL的疫情可视化BI大屏
Posted Drunkpoem
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了基于requests+pyecharts+MySQL的疫情可视化BI大屏相关的知识,希望对你有一定的参考价值。
在此向全国抗疫的各行各业人士致敬!
什么是pyecharts?
pyecharts 是一个用于生成 Echarts 图表的类库。
echarts 是百度开源的一个数据可视化 JS 库,主要用于数据可视化。pyecharts 是一个用于生成 Echarts 图表的类库。实际上就是 Echarts 与 Python 的对接。
使用 pyecharts 可以生成独立的网页,也可以在 flask , Django 中集成使用。
pyecharts0.5和1.0在用法上有较大差异,本项目用的是0.5版本
先安装所需的库
pip install pyecharts==0.5.11
pip install echarts-countries-pypkg
pip install pyecharts-snapshot
pip install bs4
pip install webbrowser
pip install selenium
pip install jieba
pip install requests
项目组成
本项目由五个py文件组成,出问题时方便快速定位以及修改,最终效果如下:
1.国内疫情数据获取
国内疫情数据通过腾讯的实时疫情数据平台获取(https://xw.qq.com/act/qgfeiyan)
不难找到他数据来源的json (https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5)
从看似杂乱无章实则井然有序的数据中我们不难发现他的排列规律
接下来就是获取数据并存入mysql
import json
import pymysql
import requests
#与数据库连接
host = '127.0.0.1'
port = 3306
username = 'user'
password = 'password'
db = 'yq'
charset = 'utf8'
conn = pymysql.Connect(host=host,port=port,user=username,password=password,db=db,charset=charset)
cursor = conn.cursor()
sql="delete from 中国疫情"
try:
cursor.execute(sql)
except:
pass
# 疫情数据的url
url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
response = requests.get(url)
# 发送get请求
result = json.loads(response.text)
#解析获取到的json数据
areaTree = json.loads(result['data'])['areaTree']
for i in areaTree[0]['children']:
sheng_name = i['name'] # 省名
confirm = i['total']['confirm'] # 总数
confirm_add = i['today']['confirm'] # 今日增加数
heal = i['total']['heal'] # 治愈
dead = i['total']['dead'] # 死亡
add = 'insert into 中国疫情(省 , 新增, 总数,治愈, 死亡)values ("%s","%s","%s","%s","%s")' % (sheng_name, confirm_add, confirm, heal, dead)
cursor.execute(add)
conn.commit()
print('爬取成功')
效果如下
2.国外疫情数据获取
国外疫情数据的获取方法与地址与国内相似,直接上代码
import json
import pymysql
import requests
host = '127.0.0.1'
port = 3306
username = 'user'
password = 'password'
db = 'yq'
charset = 'utf8'
conn = pymysql.Connect(host=host,port=port,user=username,password=password,db=db,charset=charset)
cursor = conn.cursor()
sql="delete from 国外疫情"
try:
cursor.execute(sql)
except:
pass
# 疫情数据的url
url = "https://view.inews.qq.com/g2/getOnsInfo?name=disease_foreign"
response = requests.get(url)
# 发送get请求
result = json.loads(response.text)
#解析获取到的json数据
areaTree = json.loads(result['data'])['foreignList']
# print(areaTree)
for i in areaTree:
# print(i)
county_name = i['name'] # 国名
confirm = i['confirm'] # 总数
# print(county_name)
# print(confirm)
add = 'insert into 国外疫情(国家 , 累计确诊人数)values ("%s","%s")' % (county_name, confirm)
cursor.execute(add)
conn.commit()
print('爬取成功')
效果如图
3.疫情新闻获取(用于制作词云图)
疫情新闻采用selenium从百度疫情实时大数据报告平台获取(https://voice.baidu.com/act/newpneumonia/newpneumonia?fraz=partner&paaz=gjyj#tab1&qq-pf-to=pcqq.c2c)
新闻数据在页面最底下,所以我们要先控制浏览器滑到合适的位置
browser.get(url)
element = browser.find_element_by_xpath('//*[@id="ptab-1"]/div[3]/div[11]/span')
element.click()
滑到新闻页面后根据新闻的Xpath获取每一条数据,并用jieba库对其进行分词处理,然后将每个词以及他出现次数存入字典中
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import jieba
jieba.setLogLevel(jieba.logging.INFO)
def is_number(s):
try:
float(s)
return True
except ValueError:
pass
try:
import unicodedata
unicodedata.numeric(s)
return True
except (TypeError, ValueError):
pass
return False
url = 'https://voice.baidu.com/act/newpneumonia/newpneumonia?fraz=partner&paaz=gjyj'
# 获取无界面浏览器
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
browser = webdriver.Chrome(options=chrome_options)
# 有界面模式
# browser = webdriver.Chrome()
news_list = ''
try:
browser.get(url)
element = browser.find_element_by_xpath('//*[@id="ptab-1"]/div[3]/div[11]/span')
element.click()
news = browser.find_elements_by_xpath('//*[@id="ptab-1"]/div[3]/div/div[2]/a/div')
# 将爬取到的所有新闻标题放到一个String中
for i in news:
news_list = news_list + str(i.text)
except NoSuchElementException:
# print('NoSuchElementException')
browser.close()
browser.quit()
# print(news_list)
d={}
word=[]
value=[]
stopwords = [line.strip() for line in open('StopWords.txt', encoding='utf-8').readlines()]
mytext = jieba.cut(news_list, cut_all=False)
for i in mytext:
if i not in stopwords:
d[i]=d.get(i,0)+1
# print(d)
for i in d:
if is_number(i) !=True:
word.append(i)
value.append(int(d[i]))
# print(word)
# print(value)
4.疫情走势图数据
疫情走势数据是直接从我们已经有的CSV中获取,这里我就不多说了
import os
path='F:疫情可视化\\covid19_china'
path_list = os.listdir(path)
# print(path_list)
date=['2.17','2.18','2.19','2.20','2.21','2.22','2.23','2.24','2.25']
total=[]
cure=[]
dead=[]
for filename in path_list:
tot=0
cur=0
dea=0
ls = []
f=open(os.path.join(path,filename),'r',encoding='gbk')
for line in f:
ls.append(line.strip('\\n').split(','))
# print(ls)
f.close()
try:
for i in range(1,len(ls)):
tot+=eval(ls[i][3].split('"')[1])
cur+=eval(ls[i][5].split('"')[1])
dea+=eval(ls[i][6].split('"')[1])
except:
pass
total.append(tot)
cure.append(cur)
dead.append(dea)
# print(total)
# print(cure)
# print(dead)
5.数据处理与可视化
所有数据我们都已经有了,万事俱备只欠东风
从MySQL中读取我们已经获取到的国内外疫情数据,从另外两个文件中将疫情新闻和疫情走势的数据以字典的形势传递过来
1.国内疫情热力图的制作
provice = list(sheng.keys())
values = list(sheng.values())
map = Map('中国疫情热力图',title_pos='center', width=1200, height=600)
map.add("", provice, values, visual_range=[0, 10000], maptype='china', is_visualmap=True,is_label_show=True,
visual_text_color='#000')
map.render("中国疫情.html")
2.非湖北地区确诊数前五的直方图制作
for i in range(6):
# print(sheng1[i][0])
# print(sheng1[i][1])
top5[sheng1[i][0]]=sheng1[i][1]
del top5['湖北']
# print(top5)
bar=Bar('非湖北地区确诊TOP5',title_pos='center',width=800,height=500)
bar.add('',list(top5.keys()),list(top5.values()),is_label_show=True,mark_point=['min','max'])
bar.render('确诊top5.html')
3.疫情新闻词云图的制作
wc=WordCloud()
wc.add('',word ,value, word_size_range=[20, 300],shape='diamond')
wc.render('慈云.html')
4.疫情走势图的制作
line=Line('中国疫情走势')
line.add("总确诊", date, total, mark_point=["max", "min"])
line.add("总治愈", date, cure, legend_pos="20%",mark_point=["max", "min"])
line.add("死亡", date, dead, legend_pos="20%",mark_point=["max", "min"])
line.render('走势.html')
5.全球疫情玫瑰图的制作
pie = Pie("全球疫情玫瑰图",title_pos='center')
pie.add("", guo.keys(), guo.values(), center=[60, 60], is_random=True, radius=[30, 75], rosetype='radius',
is_legend_show=False, is_label_show=True,legend_orient='vertical')
pie.render('饼子图.html')
6.页面整合
将五个图按照合适的排版整合在一个页面上
page = Page(page_title= "COVID-19")
page.add(line)
page.add(pie)
page.add(map)
page.add(bar)
page.add(wc)
page.render("page.html")
with open("page.html", "r+", encoding='utf-8') as html:
html_bf = BeautifulSoup(html, 'lxml')
# print(html_bf)
divs = html_bf.select('div')
divs[0]['style'] = "width:500px;height:350px;position:absolute;top:5px;left:0px;border-style:solid;border-color:#444444;border-width:0px;"
divs[1]["style"] = "width:500px;height:350px;position:absolute;top:370px;left:-50px;border-style:solid;border-color:#444444;border-width:0px;"
divs[2]["style"] = "width:500px;height:350px;position:absolute;top:370px;left:961px;border-style:solid;border-color:#444444;border-width:0px;"
divs[3]["style"] = "width:505px;height:350px;position:absolute;top:0px;left:961px;border-style:solid;border-color:#444444;border-width:0px;"
divs[4]["style"] = "width:500px;height:800px;position:absolute;top:0px;left:450px;border-style:solid;border-color:#444444;border-width:0px;"
body = html_bf.find("body")
div_title = "<div align=\\"center\\" style=\\"width:1500px;\\">\\n<span style=\\"font-size:32px;font face=\\'黑体\\';color:#000000\\"><b>COVID-19 疫情数据统计与分析</b></div>"
body["style"] = "background-color:#ffffff;"
body.insert(0, BeautifulSoup(div_title, "lxml").div)
html_new = str(html_bf)
html.seek(0, 0)
html.truncate()
html.write(html_new)
html.close()
至此,本项目以全部结束,附上数据处理全部代码
from pyecharts import Map,Bar,WordCloud,Line,Pie,Page
import pymysql
from yq_news import word,value
from csvdata import total,cure,dead,date
from bs4 import BeautifulSoup
import webbrowser
host = '127.0.0.1'
port = 3306
username = 'root'
password = 'root'
db = 'yq'
charset = 'utf8'
conn = pymysql.Connect(host=host,port=port,user=username,password=password,db=db,charset=charset)
cursor = conn.cursor()
sheng={}
top5={}
guo={}
sql= "select 省,总数 from 中国疫情"
try:
cursor.execute(sql)
results=cursor.fetchall()
for row in results:
sheng[row[0]]=int(row[1])
except:
pass
sql= "select * from 国外疫情 where num>20000"
try:
cursor.execute(sql)
results=cursor.fetchall()
# print(results)
for row in results:
guo[row[0]]=int(row[1])
except:
pass
conn.close()
# print(guo)
# print(sheng)
provice = list(sheng.keys())
values = list(sheng.values())
map = Map('中国疫情热力图',title_pos='center', width=1200, height=600)
map.add("", provice, values, visual_range=[0, 10000], maptype='china', is_visualmap=True,is_label_show=True,
visual_text_color='#000')
map.render("中国疫情.html")
sheng1=sorted(sheng.items(),key=lambda x:x[1],reverse=True)
# print(sheng1)
for i in range(6):
# print(sheng1[i][0])
# print(sheng1[i][1])
top5[sheng1[i][0]]=sheng1[i][1]
del top5['湖北']
# print(top5)
bar=Bar('非湖北地区确诊TOP5',title_pos='center',width=800,height=500)
bar.add('',list(top5.keys()),list(top5.values()),is_label_show=True,mark_point=['min','max'])
bar.render('确诊top5.html')
wc=WordCloud()
wc.add('',word ,value, word_size_range=[20, 300],shape='diamond')
wc.render('慈云.html')
line=Line('中国疫情走势')
line.add("总确诊", date, total, mark_point=["max", "min"])
line.add("总治愈", date, cure, legend_pos="20%",mark_point=["max", "min"])
line.add("死亡", date, dead, legend_pos="20%",mark_point=["max", "min"])
line.render('走势.html')
pie = Pie("全球疫情玫瑰图",title_pos='center')
pie.add("", guo.keys(), guo.values(), center=[60, 60], is_random=True, radius=[30, 75], rosetype='radius',
is_legend_show=False, is_label_show=True,legend_orient='vertical')
pie.render('饼子图.html')
page = Page(page_title= "COVID-19")
page.add(line)
page.add(pie)
page.add(map)
page.add(bar)
page.add(wc)
page.render("page.html")
with open("page.html", "r+", encoding='utf-8') as html:
html_bf = BeautifulSoup(html, 'lxml')
# print(html_bf)
divs = html_bf.select('div')
divs[0]['style'] = "width:500px;height:350px;position:absolute;top:5px;left:0px;border-style:solid;border-color:#444444;border-width:0px;"
divs[1]["style"] = "width:500px;height:350px;position:absolute;top:370px;left:-50px;border-style:solid;border-color:#444444;border-width:0px;"
divs[2]["style"] = "width:500px;height:350px;position:absolute;top:370px;left:961px;border-style:solid;border-color:#444444;border-width:0px;"
divs[3]["style"] = "width:505px;height:350px;position:absolute;top:0px;left:961px;border-style:solid;border-color:#444444;border-width:0px;"
divs[4]["style"] = "width:500px;height:800px;position:absolute;top:0px;left:450px;border-style:solid;border-color:#444444;border-width:0px;"
body = html_bf.find("body")
div_title = "<div align=\\"center\\" style=\\"width:1500px;\\">\\n<span style=\\"font-size:32px;font face=\\'黑体\\';color:#000000\\"><b>COVID-19 疫情数据统计与分析</b></div>"
body["style"] = "background-color:#ffffff;"
body.insert(0, BeautifulSoup(div_title, "lxml").div)
html_new = str(html_bf)
html.seek(0, 0)
html.truncate()
html.write(html_new)
html.close()
webbrowser.open_new_tab('page.html')
以上是关于基于requests+pyecharts+MySQL的疫情可视化BI大屏的主要内容,如果未能解决你的问题,请参考以下文章