数据爬取
Posted nyar
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了数据爬取相关的知识,希望对你有一定的参考价值。
import json
import pymysql
import requests
from bs4 import BeautifulSoup
import datetime
url = ‘https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0‘ # 请求地址
headers = {
‘user-agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/74.0.3729.131 Safari/537.36‘} # 创建头部信息
response = requests.get(url, headers=headers) # 发送网络请求
content = response.content.decode(‘utf-8‘)
soup = BeautifulSoup(content, ‘html.parser‘)
# 世界确诊getAreaStat
listB = soup.find_all(name=‘script‘, attrs={"id": "getListByCountryTypeService2true"})
world_messages = str(listB)[95:-21]
print(world_messages)
world_messages_json = json.loads(world_messages)
worldList = []
now_time = datetime.datetime.now().strftime(‘%Y-%m-%d‘)
for k in range(len(world_messages_json)):
worldvalue = (now_time,
world_messages_json[k].get(‘countryType‘), world_messages_json[k].get(‘continents‘),
world_messages_json[k].get(‘provinceId‘), world_messages_json[k].get(‘provinceName‘),
world_messages_json[k].get(‘provinceShortName‘), world_messages_json[k].get(‘cityName‘),
world_messages_json[k].get(‘currentConfirmedCount‘), world_messages_json[k].get(‘confirmedCount‘),
world_messages_json[k].get(‘suspectedCount‘), world_messages_json[k].get(‘curedCount‘),
world_messages_json[k].get(‘deadCount‘), world_messages_json[k].get(‘locationId‘),
world_messages_json[k].get(‘countryShortCode‘),)
worldList.append(worldvalue)
db = pymysql.connect("localhost", "root", "0000", "yiqing", charset=‘utf8‘)
cursor = db.cursor()
sql_world = "INSERT INTO world_map values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
worldTuple = tuple(worldList)
print(worldTuple)
try:
cursor.executemany(sql_world, worldTuple)
db.commit()
except:
print(‘执行失败,进入回调2‘)
db.rollback()
db.close()
以上是关于数据爬取的主要内容,如果未能解决你的问题,请参考以下文章