05-数据爬取
Posted --lzx1--
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了05-数据爬取相关的知识,希望对你有一定的参考价值。
数据爬取
代码:
Yiqing.py
from os import path
import requests
from bs4 import BeautifulSoup
import json
import pymysql
import time
from _ast import Try
url = ‘https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0‘ #请求地址
headers = {‘user-agent‘:‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/74.0.3729.131 Safari/537.36‘}#创建头部信息
response = requests.get(url,headers = headers) #发送网络请求
#print(response.content.decode(‘utf-8‘))#以字节流形式打印网页源码
content = response.content.decode(‘utf-8‘)
#print(content)
soup = BeautifulSoup(content, ‘html.parser‘)
listA = soup.find_all(name=‘script‘,attrs={"id":"getAreaStat"})
#世界确诊
listB = soup.find_all(name=‘script‘,attrs={"id":"getListByCountryTypeService2"})
#listA = soup.find_all(name=‘div‘,attrs={"class":"c-touchable-feedback c-touchable-feedback-no-default"})
account = str(listA)
world_messages = str(listB)[87:-21]
messages = account[52:-21]
messages_json = json.loads(messages)
world_messages_json = json.loads(world_messages)
valuesList = []
cityList = []
worldList = []
localtime = time.localtime(time.time())
L=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
print(L)
for i in range(len(messages_json)):
#value = messages_json[i]
#value = (messages_json[i].get(‘provinceName‘),messages_json[i].get(‘provinceShortName‘),messages_json[i].get(‘currentConfirmedCount‘),messages_json[i].get(‘confirmedCount‘),messages_json[i].get(‘suspectedCount‘),messages_json[i].get(‘curedCount‘),messages_json[i].get(‘deadCount‘),messages_json[i].get(‘comment‘),messages_json[i].get(‘locationId‘))
value = (messages_json[i].get(‘provinceName‘),messages_json[i].get(‘confirmedCount‘),messages_json[i].get(‘curedCount‘),messages_json[i].get(‘deadCount‘),messages_json[i].get(‘locationId‘))
valuesList.append(value)
cityValue = messages_json[i].get(‘cities‘)
#print(cityValue) 一个省内没有划分开的值
for j in range(len(cityValue)):
#cityValueList = (cityValue[j].get(‘cityName‘),cityValue[j].get(‘currentConfirmedCount‘),cityValue[j].get(‘confirmedCount‘),cityValue[j].get(‘suspectedCount‘),cityValue[j].get(‘curedCount‘),cityValue[j].get(‘deadCount‘),cityValue[j].get(‘locationId‘),messages_json[i].get(‘provinceShortName‘))
cityValueList = (messages_json[i].get(‘provinceName‘),cityValue[j].get(‘cityName‘),cityValue[j].get(‘confirmedCount‘),cityValue[j].get(‘curedCount‘),cityValue[j].get(‘deadCount‘),cityValue[j].get(‘locationId‘))
#print(cityValueList) 省份内各个城市的值
cityList.append(cityValueList)
#print(cityList) #城市
#print(valuesList) #省份
db=pymysql.connect("localhost","root","123456","payiqing", charset=‘utf8‘)
cursor = db.cursor()
sql_city="insert into info_copy (Province,City,Confirmed_num,Cured_num,Dead_num,Code,Date) values (%s,%s,%s,%s,%s,%s,‘"+L+"‘)"
sql_province="insert into info_copy (Province,Confirmed_num,Cured_num,Dead_num,Code,Date) values (%s,%s,%s,%s,%s,‘"+L+"‘)"
#print(sql)
value_tuple= tuple(valuesList)
city_tuple=tuple(cityList)
try:
cursor.executemany(sql_province,valuesList)
cursor.executemany(sql_city,city_tuple)
db.commit()
except:
print(‘执行失败,进入回调4‘)
db.rollback()
db.close()
Main.jsp
<%@ page language="java" contentType="text/html; charset=UTF-8"
pageEncoding="UTF-8"%>
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>疫情</title>
</head>
<framesetrows="15%,*">
<framesrc="top.jsp">
<framesetcols="12%,*">
<framesrc="main_left.jsp">
<framesrc="main_right.jsp"name="main_right">
</frameset>
</frameset>
<body >
</body>
</html>
Cha.jsp
<%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%> <%--设置页面的脚本支持语言为java—导入util包中的类—申明编码方式为UTF-8--%>
<%@ page import="java.sql.*"%> <%--导入java中的sql包--%>
<%@page import="com.javao.msg.DBUtil"%>
<%
request.setCharacterEncoding("UTF-8"); //设置响应的编码为UTF-8
response.setCharacterEncoding("UTF-8"); //设置响应的编码为UTF-8
%>
<%
String path = request.getContextPath(); //相对Path设置
String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; //相对Path设置
%>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <%--文档声明--%>
<html>
<head>
<base href="<%=basePath%>"> <%--设置基础路径,basepath为变量--%>
<title>疫情</title> <%--页面标题--%>
<script src="js/echarts.min.js"></script>
</head>
<body>
<div id="main" style="width: 600px;height:400px;"></div>
<script type="text/javascript">
// 基于准备好的dom,初始化echarts实例
var myChart = echarts.init(document.getElementById(‘main‘));
// 指定图表的配置项和数据
var option = {
title: {
text: ‘疫情情况‘
},
tooltip: {},
legend: {
data:[‘人数‘]
},
xAxis: {
data: ["湖北省","广东省","浙江省","河南省","安徽省","江西省","江苏省","重庆市","山东省","四川省","北京市","黑龙江省","上海市","河北省","陕西省","广西壮族自治区","云南省","海南省","山西省","辽宁省","贵州省","天津市","甘肃省","吉林省","内蒙古自治区","宁夏回族自治区","新疆维吾尔自治区","青海省","西藏自治区省"]
},
yAxis: {},
series: [{
name: ‘人数‘,
type: ‘bar‘,
data: [67786,1356,1215,1273,990,935,631,576,760,539,536,482,346,318,245,252,174,168,133,125,146,136,127,93,75,75,76,18,1]
}]
};
// 使用刚指定的配置项和数据显示图表。
myChart.setOption(option);
</script>
<%
String Date = request.getParameter("Date");
Connection conn = null; //定义静态数据库连接
Statement stat = null; //滞空stat。
ResultSet rs = null; //将rs滞空。
conn = DBUtil.getConnection();
stat = conn.createStatement();
rs = stat.executeQuery("select * from info_copy where Date like ‘%" + Date + "%‘");//查找data表name字段
%>
<br>
<div align="center">
<h1 style="font-family:KaiTi;color:OrangeRed">信息如下</h1>
</div>
<br>
<table align="center" width="1000" border="100" cellSpacing=1 style="font-size:15pt;border:dashed 1pt"> <%--表格宽度450--%>
<tr>
<td width="600">日期</td>
<td width="300">省份</td>
<td width="300">城市</td>
<td width="400">总确诊数</td>
<td width="400">治愈病例</td>
<td width="400">死亡病例</td>
</tr>
<%
while(rs.next())
{
out.print("<tr>");
out.print("<td>" + rs.getString("Date") + "</td>"); //输出name内容
out.print("<td>" + rs.getString("Province") + "</td>"); //输出gender内容
out.print("<td>" + rs.getString("City") + "</td>");
out.print("<td>" + rs.getString("Confirmed_num") + "</td>"); //输出major内容
out.print("<td>" + rs.getString("Cured_num") + "</td>"); //输出gender内容
out.print("<td>" + rs.getString("Dead_num") + "</td>");
%>
<%
out.print("</tr>");
}
%>
</table>
<br>
<%
if(rs != null)
{
rs.close(); //关闭结果集,但是rs还是有null值。
rs = null; //将rs滞空。
}
if(stat != null) //判断stat是否滞空。
{
stat.close(); //关闭stat。
stat = null; //滞空stat。
}
if(conn != null)
{
conn.close(); //关闭数据库连接
conn = null;
}
%>
</body>
</html>
Cha1.jsp
<%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%> <%--设置页面的脚本支持语言为java—导入util包中的类—申明编码方式为UTF-8--%>
<%@ page import="java.sql.*"%> <%--导入java中的sql包--%>
<%@page import="com.javao.msg.DBUtil"%>
<%
request.setCharacterEncoding("UTF-8"); //设置响应的编码为UTF-8
response.setCharacterEncoding("UTF-8"); //设置响应的编码为UTF-8
%>
<%
String path = request.getContextPath(); //相对Path设置
String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; //相对Path设置
%>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <%--文档声明--%>
<html>
<head>
<base href="<%=basePath%>"> <%--设置基础路径,basepath为变量--%>
<title>按日期查询</title> <%--页面标题--%>
</head>
<body>
<%
String Date = request.getParameter("Date");
Connection conn = null; //定义静态数据库连接
Statement stat = null; //滞空stat。
ResultSet rs = null; //将rs滞空。
conn = DBUtil.getConnection();
stat = conn.createStatement();
rs = stat.executeQuery("select * from info where Date like ‘%" + Date + "%‘");//查找data表name字段
%>
<br>
<div align="center">
<h1 style="font-family:KaiTi;color:OrangeRed">符合条件的信息</h1>
</div>
<hr noshade>
<br>
<table align="center" width="1000" border="100" cellSpacing=1 style="font-size:15pt;border:dashed 1pt"> <%--表格宽度450--%>
<tr>
<td width="110">序号</td>
<td width="600">日期</td>
<td width="300">省份</td>
<td width="300">城市</td>
<td width="400">总确诊数</td>
<td width="211">疑似病例</td>
<td width="400">治愈病例</td>
<td width="400">死亡病例</td>
</tr>
<%
while(rs.next())
{
out.print("<tr>");
out.print("<td>" + rs.getString("id") + "</td>");
out.print("<td>" + rs.getString("Date") + "</td>"); //输出name内容
out.print("<td>" + rs.getString("Province") + "</td>"); //输出gender内容
out.print("<td>" + rs.getString("City") + "</td>");
out.print("<td>" + rs.getString("Confirmed_num") + "</td>"); //输出major内容
out.print("<td>" + rs.getString("Yisi_num") + "</td>");
out.print("<td>" + rs.getString("Cured_num") + "</td>"); //输出gender内容
out.print("<td>" + rs.getString("Dead_num") + "</td>");
%>
<%
out.print("</tr>");
}
%>
</table>
<br>
<%
if(rs != null)
{
rs.close(); //关闭结果集,但是rs还是有null值。
rs = null; //将rs滞空。
}
if(stat != null) //判断stat是否滞空。
{
stat.close(); //关闭stat。
stat = null; //滞空stat。
}
if(conn != null)
{
conn.close(); //关闭数据库连接
conn = null;
}
%>
</body>
</html>
Show.jsp
<%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%> <%--设置页面的脚本支持语言为java—导入util包中的类—申明编码方式为UTF-8--%>
<%@ page import="java.sql.*"%> <%--导入java中的sql包--%>
<%@page import="com.javao.msg.DBUtil"%>
<%
request.setCharacterEncoding("UTF-8"); //设置响应的编码为UTF-8
response.setCharacterEncoding("UTF-8"); //设置响应的编码为UTF-8
%>
<%
String path = request.getContextPath(); //相对Path设置
String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; //相对Path设置
%>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <%--文档声明--%>
<html>
<head>
<title>疫情</title> <%--页面标题--%>
</head>
<body>
<%
String Date = request.getParameter("Date");
Connection conn = null; //定义静态数据库连接
Statement stat = null;
ResultSet rs = null; //将rs滞空。
conn = DBUtil.getConnection();
stat = conn.createStatement();
rs = stat.executeQuery("select * from info"); //查找data表
%>
<%
%>
<%
%>
<br>
<form action="cha.jsp" method="post"> <%--post方法跳转到select_for_age.jsp文件--%>
<h2 align="center">按日期查询:
<input type="text" name="Date" value="" title="不能为空" ></input>
<input type="submit" value="查询"/>
<br>
</h2>
</form>
<%
if(rs != null)
{
rs.close(); //关闭结果集,但是rs还是有null值。
rs = null; //将rs滞空。
}
if(stat != null)
{
stat.close(); //关闭stat。
stat = null; //滞空stat。
}
if(conn != null)
{
conn.close(); //关闭数据库连接
conn = null;
}
%>
</body>
</html>
截图:
以上是关于05-数据爬取的主要内容,如果未能解决你的问题,请参考以下文章