使用爬虫获取省份降雨情况,并生成json文件
Posted 及时行樂_
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了使用爬虫获取省份降雨情况,并生成json文件相关的知识,希望对你有一定的参考价值。
先上代码
import requests
import json
import re
headers =
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/86.0.4240.183 Safari/537.36',
def getCityInfo(url:str) ->(float):
response = requests.get(url, headers=headers)
response.encoding = response.apparent_encoding
_precip = re.findall('"od26":"(.*?)"',response.text)
_precip.reverse()
return _precip
def getcityname(cityid):
if cityid == 0:
str = '北京'
elif cityid == 1:
str = '上海'
elif cityid == 2:
str = '天津'
elif cityid == 3:
str = '重庆'
elif cityid == 4:
str = '黑龙江'
elif cityid == 5:
str = '吉林'
elif cityid == 6:
str = '辽宁'
elif cityid == 7:
str = '内蒙古'
elif cityid == 8:
str = '河北'
elif cityid == 9:
str = '山西'
elif cityid == 10:
str = '陕西'
elif cityid == 11:
str = '山东'
elif cityid == 12:
str = '新疆'
elif cityid == 13:
str = '西藏'
elif cityid == 14:
str = '青海'
elif cityid == 15:
str = '甘肃'
elif cityid == 16:
str = '宁夏'
elif cityid == 17:
str = '河南'
elif cityid == 18:
str = '江苏'
elif cityid == 19:
str = '湖北'
elif cityid == 20:
str = '浙江'
elif cityid == 21:
str = '安徽'
elif cityid == 22:
str = '福建'
elif cityid == 23:
str = '江西'
elif cityid == 24:
str = '湖南'
elif cityid == 25:
str = '贵州'
elif cityid == 26:
str = '四川'
elif cityid == 27:
str = '广东'
elif cityid == 28:
str = '云南'
elif cityid == 29:
str = '广西'
elif cityid == 30:
str = '海南'
elif cityid == 31:
str = '香港'
elif cityid == 32:
str = '澳门'
elif cityid == 33:
str = '台湾'
else :
str = '未知'
return str
if __name__ == "__main__":
city_dict = []
for i in range(34):
city_sign = 101010101 + i*10000
if i < 4:
city_sign -= 1
url = 'http://www.weather.com.cn/html/weather/%s.shtml'%city_sign
list_precip =getCityInfo(url)
splitstring = [float(s) for s in list_precip]
total = sum(splitstring)
temp =
# temp['cityid'] = city_sign
temp['name'] = getcityname(i)
temp['count'] = total
city_dict.append(temp)
print(city_dict)
# # 将字典转换成json字符串
# str_json = json.dumps(city_dict)
# print(type(str_json))
# 字典转换成json 存入本地文件
with open('./province.json','w') as f:
# 设置不转换成ascii json字符串首缩进
f.write( json.dumps(city_dict,ensure_ascii=False,indent=2))
# print(precip)
依次解读函数,第一个getCityInfo函数,获取未来24小时的降雨信息,注意这个得到的降雨需要一个逆转才能一次看到,未来一小时的,未来两小时的。
第二个函数getcityname,负责通过省会的城市id找到对应的省份。
然后就主函数,就是循环34个省会,查找省会城市的降雨量。
注意,北京、天津、重庆、上海为直辖市,城市id结尾为00,其他省会的结尾为01
看看结果的json文件。
[
"name": "北京",
"count": 0.0
,
"name": "上海",
"count": 0.0
,
"name": "天津",
"count": 0.0
,
"name": "重庆",
"count": 0.0
,
"name": "黑龙江",
"count": 0.0
,
"name": "吉林",
"count": 0.0
,
"name": "辽宁",
"count": 0.0
,
"name": "内蒙古",
"count": 0.0
,
"name": "河北",
"count": 0.0
,
"name": "山西",
"count": 0.0
,
"name": "陕西",
"count": 0.0
,
"name": "山东",
"count": 0.0
,
"name": "新疆",
"count": 0.0
,
"name": "西藏",
"count": 0.0
,
"name": "青海",
"count": 0.0
,
"name": "甘肃",
"count": 0.0
,
"name": "宁夏",
"count": 0.0
,
"name": "河南",
"count": 0.0
,
"name": "江苏",
"count": 0.0
,
"name": "湖北",
"count": 0.0
,
"name": "浙江",
"count": 0.0
,
"name": "安徽",
"count": 0.0
,
"name": "福建",
"count": 18.3
,
"name": "江西",
"count": 0.0
,
"name": "湖南",
"count": 0.0
,
"name": "贵州",
"count": 0.0
,
"name": "四川",
"count": 0.0
,
"name": "广东",
"count": 0.0
,
"name": "云南",
"count": 0.0
,
"name": "广西",
"count": 0.0
,
"name": "海南",
"count": 0.0
,
"name": "香港",
"count": 0.0
,
"name": "澳门",
"count": 0.0
,
"name": "台湾",
"count": 0.0
]
以上是关于使用爬虫获取省份降雨情况,并生成json文件的主要内容,如果未能解决你的问题,请参考以下文章