python 爬取学校所有人四六级成绩时发现爬出网页中文乱码
遂google
得到一解决方案
# -*- coding:utf8 -*- import urllib2 req = urllib2.Request("http://jwgl.hist.edu.cn/jwweb/jiaow/data46/search1.asp")
res = urllib2.urlopen(req)
html = res.read() res.close() html = unicode(html, "gb2312").encode("utf8") #gb2312--->utf-8
print html
但这并没有解决问题
开始继续试错
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2018-04-05 21:59 # @Author : [email protected] # @File : Test2.py # @Software: PyCharm import urllib2 import urllib import sys import chardet url = "http://jwgl.hist.edu.cn/jwweb/jiaow/data46/search1.asp " key = raw_input("请输入学号") formadate = { "ksh1":key, "Submit":"%C8%B7%B6%A8" } data = urllib.urlencode(formadate) request = urllib2.Request(url,data=data) RES = urllib2.urlopen(request).read() RES = RES.decode(‘gb2312‘).encode(‘utf-8‘) wfile=open(r‘./1.html‘,r‘wb‘) wfile.write(RES) wfile.close() print RES
成功