05-数据爬取

Posted --lzx1--

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了05-数据爬取相关的知识,希望对你有一定的参考价值。

数据爬取

代码:

Yiqing.py

from os import path

import requests

from bs4 import BeautifulSoup

import json

import pymysql

 

import time

from _ast import Try

 

url = ‘https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0‘  #请求地址

headers = {‘user-agent‘:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/74.0.3729.131 Safari/537.36‘}#创建头部信息

response =  requests.get(url,headers = headers)  #发送网络请求

#print(response.content.decode(‘utf-8‘))#以字节流形式打印网页源码

content = response.content.decode(utf-8‘)

#print(content)

soup = BeautifulSoup(content, ‘html.parser‘)

listA = soup.find_all(name=‘script‘,attrs={"id":"getAreaStat"})

#世界确诊

listB = soup.find_all(name=‘script‘,attrs={"id":"getListByCountryTypeService2"})

#listA = soup.find_all(name=‘div‘,attrs={"class":"c-touchable-feedback c-touchable-feedback-no-default"})

account = str(listA)

world_messages = str(listB)[87:-21]

messages = account[52:-21]

messages_json = json.loads(messages)

world_messages_json = json.loads(world_messages)

valuesList = []

cityList = []

worldList = []

localtime = time.localtime(time.time())

L=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

print(L)

for i in range(len(messages_json)):

    #value = messages_json[i]

    #value = (messages_json[i].get(‘provinceName‘),messages_json[i].get(‘provinceShortName‘),messages_json[i].get(‘currentConfirmedCount‘),messages_json[i].get(‘confirmedCount‘),messages_json[i].get(‘suspectedCount‘),messages_json[i].get(‘curedCount‘),messages_json[i].get(‘deadCount‘),messages_json[i].get(‘comment‘),messages_json[i].get(‘locationId‘))

    value = (messages_json[i].get(‘provinceName‘),messages_json[i].get(‘confirmedCount‘),messages_json[i].get(‘curedCount‘),messages_json[i].get(‘deadCount‘),messages_json[i].get(‘locationId‘))

    valuesList.append(value)

    cityValue = messages_json[i].get(‘cities‘)

    #print(cityValue) 一个省内没有划分开的值

    for j in range(len(cityValue)):

        #cityValueList = (cityValue[j].get(‘cityName‘),cityValue[j].get(‘currentConfirmedCount‘),cityValue[j].get(‘confirmedCount‘),cityValue[j].get(‘suspectedCount‘),cityValue[j].get(‘curedCount‘),cityValue[j].get(‘deadCount‘),cityValue[j].get(‘locationId‘),messages_json[i].get(‘provinceShortName‘))

        cityValueList = (messages_json[i].get(‘provinceName‘),cityValue[j].get(‘cityName‘),cityValue[j].get(‘confirmedCount‘),cityValue[j].get(‘curedCount‘),cityValue[j].get(‘deadCount‘),cityValue[j].get(‘locationId‘))

        #print(cityValueList)  省份内各个城市的值

        cityList.append(cityValueList)

   

#print(cityList)  #城市

#print(valuesList)  #省份

db=pymysql.connect("localhost","root","123456","payiqing", charset=‘utf8‘)

cursor = db.cursor()

    

sql_city="insert into info_copy (Province,City,Confirmed_num,Cured_num,Dead_num,Code,Date) values (%s,%s,%s,%s,%s,%s,‘"+L+"‘)"

sql_province="insert into info_copy (Province,Confirmed_num,Cured_num,Dead_num,Code,Date) values (%s,%s,%s,%s,%s,‘"+L+"‘)"

#print(sql)

 

value_tuple= tuple(valuesList)

city_tuple=tuple(cityList)

 

try:

    cursor.executemany(sql_province,valuesList)

    cursor.executemany(sql_city,city_tuple)

    db.commit()

except:

    print(执行失败,进入回调4‘)

    db.rollback()  

    

    

    

db.close()

Main.jsp

<%@ page language="java" contentType="text/html; charset=UTF-8"

    pageEncoding="UTF-8"%>

<!DOCTYPE html>

<html>

<head>

<meta charset="UTF-8">

<title>疫情</title>

</head>

<framesetrows="15%,*">

<framesrc="top.jsp">

<framesetcols="12%,*">

<framesrc="main_left.jsp">

<framesrc="main_right.jsp"name="main_right">

</frameset>

</frameset>

<body >

 

</body>

</html>

Cha.jsp

<%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%> <%--设置页面的脚本支持语言为java—导入util包中的类—申明编码方式为UTF-8--%>

<%@ page import="java.sql.*"%> <%--导入java中的sql包--%>

<%@page import="com.javao.msg.DBUtil"%>

<%

request.setCharacterEncoding("UTF-8"); //设置响应的编码为UTF-8

response.setCharacterEncoding("UTF-8"); //设置响应的编码为UTF-8

%>

<%

String path = request.getContextPath(); //相对Path设置

String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; //相对Path设置

%>

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <%--文档声明--%>

<html>

  <head>

    <base href="<%=basePath%>"> <%--设置基础路径,basepath为变量--%>

    <title>疫情</title> <%--页面标题--%>

    <script src="js/echarts.min.js"></script>

  </head>

  <body>

 

   <div id="main" style="width: 600px;height:400px;"></div>

    <script type="text/javascript">

        // 基于准备好的dom,初始化echarts实例

        var myChart = echarts.init(document.getElementById(‘main‘));

 

        // 指定图表的配置项和数据

        var option = {

            title: {

                text: ‘疫情情况‘

            },

            tooltip: {},

            legend: {

                data:[‘人数‘]

            },

            xAxis: {

                data: ["湖北省","广东省","浙江省","河南省","安徽省","江西省","江苏省","重庆市","山东省","四川省","北京市","黑龙江省","上海市","河北省","陕西省","广西壮族自治区","云南省","海南省","山西省","辽宁省","贵州省","天津市","甘肃省","吉林省","内蒙古自治区","宁夏回族自治区","新疆维吾尔自治区","青海省","西藏自治区省"]

            },

            yAxis: {},

            series: [{

                name: ‘人数‘,

                type: ‘bar‘,

                data: [67786,1356,1215,1273,990,935,631,576,760,539,536,482,346,318,245,252,174,168,133,125,146,136,127,93,75,75,76,18,1]

            }]

        };

 

        // 使用刚指定的配置项和数据显示图表。

        myChart.setOption(option);

    </script>

    <%

    String Date = request.getParameter("Date");

    Connection conn = null; //定义静态数据库连接

    Statement stat = null; //滞空stat。

    ResultSet rs = null; //将rs滞空。

    conn = DBUtil.getConnection();

    stat = conn.createStatement();

    rs = stat.executeQuery("select * from info_copy where Date like ‘%" + Date + "%‘");//查找data表name字段

    %>

    <br>

    <div align="center">

<h1 style="font-family:KaiTi;color:OrangeRed">信息如下</h1>

</div>

      

    <br>

     <table align="center" width="1000" border="100" cellSpacing=1 style="font-size:15pt;border:dashed 1pt"> <%--表格宽度450--%>

    <tr>

    <td width="600">日期</td>

    <td width="300">省份</td>

     <td width="300">城市</td>

    <td width="400">总确诊数</td>

 

    <td width="400">治愈病例</td>

    <td width="400">死亡病例</td>

   

    </tr>

    <%

    while(rs.next())

    {

    out.print("<tr>");

    out.print("<td>" + rs.getString("Date") + "</td>"); //输出name内容

    out.print("<td>" + rs.getString("Province") + "</td>"); //输出gender内容

    out.print("<td>" + rs.getString("City") + "</td>");

    out.print("<td>" + rs.getString("Confirmed_num") + "</td>"); //输出major内容

   

    out.print("<td>" + rs.getString("Cured_num") + "</td>"); //输出gender内容

    out.print("<td>" + rs.getString("Dead_num") + "</td>");

   

        %>

 

    <%

    out.print("</tr>");

    }

 

    %>

    </table>   

    <br>

 

    <%

    if(rs != null)

    {

        rs.close(); //关闭结果集,但是rs还是有null值。

        rs = null; //将rs滞空。

    }

        if(stat != null) //判断stat是否滞空。

    {

        stat.close(); //关闭stat。

        stat = null; //滞空stat。

    }

        if(conn != null)

    {

        conn.close(); //关闭数据库连接

        conn = null;

    }

    %>

  </body>

</html>

Cha1.jsp

<%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%> <%--设置页面的脚本支持语言为java—导入util包中的类—申明编码方式为UTF-8--%>

<%@ page import="java.sql.*"%> <%--导入java中的sql包--%>

<%@page import="com.javao.msg.DBUtil"%>

<%

request.setCharacterEncoding("UTF-8"); //设置响应的编码为UTF-8

response.setCharacterEncoding("UTF-8"); //设置响应的编码为UTF-8

%>

<%

String path = request.getContextPath(); //相对Path设置

String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; //相对Path设置

%>

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <%--文档声明--%>

<html>

  <head>

    <base href="<%=basePath%>"> <%--设置基础路径,basepath为变量--%>

    <title>按日期查询</title> <%--页面标题--%>

  </head>

  <body>

    <%

    String Date = request.getParameter("Date");

    Connection conn = null; //定义静态数据库连接

    Statement stat = null; //滞空stat。

    ResultSet rs = null; //将rs滞空。

    conn = DBUtil.getConnection();

    stat = conn.createStatement();

    rs = stat.executeQuery("select * from info where Date like ‘%" + Date + "%‘");//查找data表name字段

    %>

    <br>

    <div align="center">

<h1 style="font-family:KaiTi;color:OrangeRed">符合条件的信息</h1>

</div>

        <hr noshade>

    <br>

     <table align="center" width="1000" border="100" cellSpacing=1 style="font-size:15pt;border:dashed 1pt"> <%--表格宽度450--%>

    <tr>

    <td width="110">序号</td>

    <td width="600">日期</td>

    <td width="300">省份</td>

     <td width="300">城市</td>

    <td width="400">总确诊数</td>

    <td width="211">疑似病例</td>

    <td width="400">治愈病例</td>

    <td width="400">死亡病例</td>

   

    </tr>

    <%

    while(rs.next())

    {

    out.print("<tr>");

    out.print("<td>" + rs.getString("id") + "</td>");

    out.print("<td>" + rs.getString("Date") + "</td>"); //输出name内容

    out.print("<td>" + rs.getString("Province") + "</td>"); //输出gender内容

    out.print("<td>" + rs.getString("City") + "</td>");

    out.print("<td>" + rs.getString("Confirmed_num") + "</td>"); //输出major内容

    out.print("<td>" + rs.getString("Yisi_num") + "</td>");

    out.print("<td>" + rs.getString("Cured_num") + "</td>"); //输出gender内容

    out.print("<td>" + rs.getString("Dead_num") + "</td>");

   

        %>

 

    <%

    out.print("</tr>");

    }

 

    %>

    </table>   

    <br>

 

    <%

    if(rs != null)

    {

        rs.close(); //关闭结果集,但是rs还是有null值。

        rs = null; //将rs滞空。

    }

        if(stat != null) //判断stat是否滞空。

    {

        stat.close(); //关闭stat。

        stat = null; //滞空stat。

    }

        if(conn != null)

    {

        conn.close(); //关闭数据库连接

        conn = null;

    }

    %>

  </body>

</html>

Show.jsp

<%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%> <%--设置页面的脚本支持语言为java—导入util包中的类—申明编码方式为UTF-8--%>

<%@ page import="java.sql.*"%> <%--导入java中的sql包--%>

<%@page import="com.javao.msg.DBUtil"%>

<%

request.setCharacterEncoding("UTF-8"); //设置响应的编码为UTF-8

response.setCharacterEncoding("UTF-8"); //设置响应的编码为UTF-8

%>

<%

String path = request.getContextPath(); //相对Path设置

String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; //相对Path设置

%>

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <%--文档声明--%>

<html>

  <head>

    <title>疫情</title> <%--页面标题--%>

 

  </head>

  <body>

  <%

 

  String Date = request.getParameter("Date");

 

    Connection conn = null; //定义静态数据库连接

    Statement stat = null;

    ResultSet rs = null; //将rs滞空。

    conn = DBUtil.getConnection();

    stat = conn.createStatement();

    rs = stat.executeQuery("select * from info"); //查找data表

  %>

 

 

    <%

  

    %>

  

    <%

  

   

    %>

  

    <br>

   

   <form action="cha.jsp" method="post"> <%--post方法跳转到select_for_age.jsp文件--%>

    <h2 align="center">按日期查询:

    <input type="text" name="Date"  value="" title="不能为空" ></input>

    <input type="submit" value="查询"/>

    <br>

    </h2>

    </form>

   

 

<%

    if(rs != null)

    {

        rs.close(); //关闭结果集,但是rs还是有null值。

        rs = null; //将rs滞空。

    }

        if(stat != null)

    {

        stat.close(); //关闭stat。

        stat = null; //滞空stat。

    }

        if(conn != null)

    {

        conn.close(); //关闭数据库连接

        conn = null;

    }

    %>

  </body>

</html>

截图:

 

 

 

 

以上是关于05-数据爬取的主要内容,如果未能解决你的问题,请参考以下文章

Python 爬取 热词并进行分类数据分析-[简单准备] (2020年寒假小目标05)

2020学习05 爬虫,修改了一些bug

05 爬取华为官网VMALL的手机评论

05,Python网络爬虫之三种数据解析方式

通过websocket爬取数据的SSL验证问题

爬虫学习 ----- 第二章 爬取静态网站 ---------- 05. 防盗链,爬取梨视频之 referer XHR