PYTHON流向下载

Posted 健康一贴灵

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了PYTHON流向下载相关的知识,希望对你有一定的参考价值。

  1 #-*- coding:utf-8 -*-
  2 import gzip
  3 import re
  4 import http.cookiejar
  5 import urllib.request
  6 import urllib.parse
  7 import xlwt
  8 import time,os
  9  
 10 
 11 
 12 def saveexcel(flow,filename,coding=gbk):
 13     #flow 需要转换为excel的里面,格式为双层列表
 14     #coding excel页面编码
 15     try:
 16         workbook = xlwt.Workbook(encoding=coding)
 17         sheet = workbook.add_sheet(Sheet1)
 18         for row,rowdata in enumerate(flow):
 19             for col,val in enumerate(rowdata):
 20                 sheet.write(row,col,val.strip(),style = xlwt.Style.default_style)
 21         excelname = \\%s.xls%filename
 22         workbook.save(excelname)
 23         return excelname
 24 
 25     except Exception as e:
 26         if hasattr(e,"code"):
 27             print (excel写入失败,错误原因 +str(e.code))
 28         if hasattr(e,"reason"):
 29             print (excel写入失败,错误原因 +str(e.reason))
 30         return None
 31 
 32  #从指定页面中取表单参数
 33 def getParm(data,parm):
 34     cer = re.compile(name="+parm+".* value="(.*?)", flags = 0)
 35     strlist = cer.findall(data)
 36     
 37     if strlist:
 38         return strlist[0]
 39     else:
 40         return None
 41 
 42 def getOpener():
 43     #自动设置COOKIER
 44     # deal with the Cookies
 45     print( 正在设置cookie)    
 46     cj = http.cookiejar.CookieJar()
 47     pro = urllib.request.HTTPCookieProcessor(cj)
 48     opener = urllib.request.build_opener(pro, urllib.request.HTTPHandler)
 49     urllib.request.install_opener(opener)    
 50     print( 设置cookie成功)        
 51     return opener
 52  
 53 
 54 header = {
 55     Connection: Keep-Alive,
 56     Accept: text/html, application/xhtml+xml, */*,
 57     User-Agent: Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko
 58 }
 59  
 60 url = http://115.231.58.130:8021/Default.aspx
 61 header[Referer]=http://115.231.58.130:8021/
 62 #1、设置Cookie
 63 opener = getOpener()
 64 
 65 
 66 ##2、初始化数据开始
 67 request = urllib.request.Request(url)
 68 try:
 69     html = urllib.request.urlopen(request).read()
 70     #取表单参数
 71     EVENTVALIDATION = getParm(html.decode(gbk),__EVENTVALIDATION)
 72     VIEWSTATEGENERATOR =  getParm(html.decode(gbk),__VIEWSTATEGENERATOR)
 73     VIEWSTATE = getParm(html.decode(gbk),__VIEWSTATE)
 74     btnsubmit = getParm(html.decode(gbk),sbtnSubmit)   
 75 
 76 except urllib.request.URLError as e:
 77     if hasattr(e,"code"):
 78         print (请求页面失败,请检查网络设置,错误原因 +str(e.code))
 79     if hasattr(e,"reason"):
 80         print (请求页面失败,请检查网络设置,错误原因 +str(e.reason))
 81 #取表单参数结束 
 82 
 83 
 84 id = ‘***
 85 password = ***
 86 postDict = {
 87          LoginID:id,
 88         Pwd:password,
 89         __EVENTVALIDATION:EVENTVALIDATION,
 90         __VIEWSTATEGENERATOR:VIEWSTATEGENERATOR,
 91         __VIEWSTATE:VIEWSTATE,
 92         btnSubmit:btnsubmit
 93 }
 94 postData = urllib.parse.urlencode(postDict).encode(encoding=UTF8)
 95 
 96 ##3、正式登录
 97 request = urllib.request.Request(url, postData,headers=header)
 98 try:
 99     response = urllib.request.urlopen(request)
100     data = response.read()
101 except urllib.request.URLError as e:
102     if hasattr(e,"code"):
103         print (页面加载失败,请检查网络及账号设置,错误原因 +str(e.code))
104     if hasattr(e,"reason"):
105         print (页面加载失败,请检查网络及账号设置,错误原因 +str(e.reason))
106 
107 #登录结束
108 print(login:,data.decode(gbk))
109 
110 ##4 进入产品搜索界面进行数据提取
111 
112 posturl= http://115.231.58.130:8021/Search/ProductFlow.aspx
113 request = urllib.request.Request(posturl)
114 try:
115     html = urllib.request.urlopen(request).read()
116     #取表单参数
117     EVENTVALIDATION = getParm(html.decode(gbk),__EVENTVALIDATION)
118     VIEWSTATEGENERATOR =  getParm(html.decode(gbk),__VIEWSTATEGENERATOR)
119     VIEWSTATE = getParm(html.decode(gbk),__VIEWSTATE)
120     #btnsearch = getParm(html.decode(‘gbk‘),‘btnSearcht‘)
121 except urllib.request.URLError as e:
122     if hasattr(e,"code"):
123         print (请求页面失败,请检查网络设置,错误原因 +str(e.code))
124     if hasattr(e,"reason"):
125         print (请求页面失败,请检查网络设置,错误原因 +str(e.reason))
126 
127 
128 postDict[__EVENTTARGET]=‘‘
129 postDict[__EVENTARGUMENT]=‘‘  
130 postDict[__EVENTVALIDATION]=EVENTVALIDATION
131 postDict[__VIEWSTATEGENERATOR]=VIEWSTATEGENERATOR
132 postDict[__VIEWSTATE]=VIEWSTATE
133 postDict[PName]=‘‘
134 postDict[PID]=‘‘
135 postDict[txtStartDate]=2016-01-01
136 postDict[txtEndDate]=2016-01-31
137 postDict[ConvertToExcel.x]=6
138 postDict[ConvertToExcel.y]=9
139 postDict[btnSearch]=‘‘ 
140 postData = urllib.parse.urlencode(postDict).encode(encoding=UTF8)
141 
142 print( 搜索页面数据获取成功,正在抓取流向数据...)    
143 
144 
145 ###登录搜索页面
146 request = urllib.request.Request(posturl, postData,headers=header)
147 try:
148     response = urllib.request.urlopen(request)
149     data = response.readlines()
150 except urllib.request.URLError as e:
151     if hasattr(e,"code"):
152         print (页面加载失败,请检查网络及账号设置,错误原因 +str(e.code))
153     if hasattr(e,"reason"):
154         print (页面加载失败,请检查网络及账号设置,错误原因 +str(e.reason))
155 print( 流向抓取成功,正在保存为excel...)
156 print(search:,data)
157 ##5 保存为excel
158 
159 workbook = xlwt.Workbook(encoding=gbk)
160 sheet = workbook.add_sheet(Sheet1)
161 for row,rowdata in enumerate(data):
162     rowdata_list = rowdata.decode(gbk).split(\t)
163     for col,val in enumerate(rowdata_list):
164             sheet.write(row,col,val,style = xlwt.Style.default_style)
165 
166 ntime = time.strftime(%Y%m%d%H%M%S)
167 excelname = ntime+%s.xls%宁波宝瑞达
168 workbook.save(excelname)
169 print( excel导出成功,请查看程序目录下%s文件。%excelname)

 

以上是关于PYTHON流向下载的主要内容,如果未能解决你的问题,请参考以下文章

Python Scrapy框架

常用python日期日志获取内容循环的代码片段

python 有用的Python代码片段

Python 向 Postman 请求代码片段

python [代码片段]一些有趣的代码#sort

根据图片的url地址下载图片到本地保存代码片段