1 #需求:查询地区天气 2 #分析:第一步,抓取上面所有的天气信息 3 4 from html.parser import HTMLParser 5 from urllib import request 6 import pickle 7 import json 8 9 #解析中国天气网HTML 10 class WeatherHtmlParser(HTMLParser): 11 def __init__(self): 12 self.flag = False 13 self.weather_data = None 14 super(WeatherHtmlParser,self).__init__() 15 16 def handle_starttag(self,tag,attr): 17 if tag == "script": 18 self.flag = True 19 20 def handle_endtag(self,tag): 21 if tag == "script": 22 self.flag = False 23 24 def handle_data(self,data): 25 if self.flag: 26 if "var hour3data=" in data: 27 data = data.strip("\n") 28 data = data.strip("var hour3data=") 29 self.weather_data = json.loads(data) 30 31 32 #全国城市天气预报代码 33 class CityCodeHtmlParser(HTMLParser): 34 35 def __init__(self): 36 self.flag = False 37 self.city_dict = {} 38 super(CityCodeHtmlParser,self).__init__() 39 40 def handle_starttag(self,tag,attr): 41 if tag == "p" or tag == "br": 42 self.flag = True 43 44 def handle_endtag(self,tag): 45 if tag == "p" or tag == "br": 46 self.flag = False 47 48 def handle_data(self,data): 49 if self.flag: 50 if "=" in data: 51 data = data.split("=") 52 self.city_dict[data[1]] = data[0] 53 54 55 def printWeatherInfo(func): 56 def call(): 57 info = func() 58 if info == None: 59 return None 60 61 #一天之内的天气 62 one_day = info["1d"] 63 for item in one_day: 64 item = item.split(",") 65 print("%s::天气:%s; 温度:%s; 风向:%s; 风力:%s" % (item[0],item[2],item[3],item[4],item[5])) 66 67 #未来7天内的天气 68 flag = input("是否打印未来7天内的天气:") 69 if flag == "是": 70 seven_day = info["7d"] 71 for i in range(7): 72 if i >= 1: 73 for item in seven_day[i]: 74 item = item.split(",") 75 print("%s::天气:%s; 温度:%s; 风向:%s; 风力:%s" % (item[0],item[2],item[3],item[4],item[5])) 76 else: 77 return None 78 79 return call 80 81 82 83 #抓取天气信息 84 @printWeatherInfo 85 def getAllWeather(): 86 city = input("请输入你要查询的城市:") 87 city = queryCityCode(city) 88 if city == None: 89 return None 90 url_address = "http://www.weather.com.cn/weather1d/%s.shtml" % city 91 req = request.Request(url_address) 92 req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36") 93 with request.urlopen(req) as html: 94 data = html.read().decode("utf-8") 95 html_parser = WeatherHtmlParser() 96 html_parser.feed(data) 97 html_parser.close() 98 return html_parser.weather_data 99 100 101 102 #查询城市的编码 103 def queryCityCode(city_name): 104 105 #从网上抓取信息,本来我是想放在文件里的,博客园传不了,我稍微改造了下 106 #目前代码很丑,先做个记录吧,现在毕竟没工作,先把总体的知识过一遍再说 107 def getAllCityInfo(): 108 url_address = "http://doc.orz520.com/a/doc/2014/0322/2100581.html" 109 req = request.Request(url_address) 110 req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36") 111 with request.urlopen(req) as html: 112 data = html.read().decode("utf-8") 113 html_parser = CityCodeHtmlParser() 114 html_parser.feed(data) 115 html_parser.close() 116 return html_parser.city_dict 117 118 city_dict = getAllCityInfo() 119 if city_name not in city_dict: 120 return None 121 return city_dict[city_name] 122 123 124 getAllWeather() 125 aa = input()