python实例:从excel读取股票代码,爬取股票信息写到代码后面的单元格中
Posted 小贝书屋
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python实例:从excel读取股票代码,爬取股票信息写到代码后面的单元格中相关的知识,希望对你有一定的参考价值。
关键词:爬虫、python、request、接口、excel处理、正则
思路:
1、首先准备好excel文档,把股票代码事先编辑进去。
2、脚本读取文档,依次读出股票代码到指定站点发起请求获取股票信息
3、将获取的股票信息简单处理,依次写入到指定的文档单元格中,完成整个实例过程
用到的python库:xlrd(读取excel)、requests(获取网页数据)、lxml(处理网页数据)、openpyyxl(对excel进行写入编辑)、re(正则)
具体步骤:
一,导入相关库
import xlrd #引入读取excel库 import requests #倒入requests库 from lxml import etree #倒入lxml 库(没有这个库,pip install lxml安装) import os import sys import openpyxl import re
二,读取excel内的股票代码,写入数组(共后面的函数调用)
#读取excel文档内的股票代码 def code(): wb = xlrd.open_workbook(path+\'\\\\stock.xlsx\')# 打开Excel文件 data = wb.sheet_by_name(\'Sheet1\')#通过excel表格名称(rank)获取工作表 b=data.col_values(0)#获取第一列数据(数组) list=[] for c in b[1:]:#for循环,排除第一行数据 d=int(c) s="%06d" % d#股票代码一共有6位,常规打印无法打印出首位带0的代码的0部分,补齐缺失的0 #print(s) list.append(s) return(list) code=code()
三、循环读取股票代码查询股票信息,写入同一类数据的数组内(共后面写入excel)
#code函数获取的代码,循环爬取代码对应的股票数据,将股票数据写入对应的数组(同一类)中 def get(code): list_name=[]#股票名称 list_score=[]#综合评分 list_Short=[]#短期趋势 list_Metaphase=[]#中期趋势 list_Long=[]#长期趋势 list_comprehensive=[]#综合评判 list_day=[]#5日涨幅 list_mouth=[]#3个月涨幅 list_year=[]#1年涨幅 for num in code: url=\'http://stockpage.10jqka.com.cn/\'+num+\'/\' headers = { \'Accept-Encoding\': \'gzip, deflate\', \'Accept-Language\': \'zh-CN,zh;q=0.9\', \'Upgrade-Insecure-Requests\': \'1\', \'User-Agent\': \'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36\', \'Accept\': \'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\', \'Referer\': \'http://doctor.10jqka.com.cn/603160/\', \'Connection\': \'keep-alive\', \'Cache-Control\': \'max-age=0\', } response = requests.get(url, headers=headers).text html = etree.HTML(response) b = html.xpath(\'//h1[@class = "m_logo fl"]/a/strong/text()\') #print(b[0])#股票名称 c = html.xpath(\'//span[@class = "analyze-tips mt7"]/text()\') #print(c[0])#综合评分 d = html.xpath(\'//div[@class = "analyze-txt fr"]/div/div[2]/text()\') #print("短期趋势:",d[0])#短期趋势 e = html.xpath(\'//div[@class = "analyze-txt fr"]/div[2]/div[2]/text()\') #print("中期趋势:",e[0])#中期趋势 f = html.xpath(\'//div[@class = "analyze-txt fr"]/div[3]/div[2]/text()\') #print("远期趋势:",f[0])#远期趋势 g = html.xpath(\'//div[@class = "txt-phra"]/text()\') h = html.xpath(\'//div[@class = "txt-phra"]/strong/text()\') i = html.xpath(\'//div[@class = "txt-phra"]/text()[2]\') #print(g[0],h[0],i[0])#综合评判 m=g[0]+h[0]+i[0] #j = html.xpath(\'//tr[@class = "even hot_cont"]/td[2]/text()\') #k = html.xpath(\'//tr[@class = "even hot_cont"]/td[3]/text()\') #l = html.xpath(\'//tr[@class = "even hot_cont"]/td[4]/text()\') tr_content = re.findall(\'<tr class="even hot_cont">(.*?)</tr\', response, re.S)[0] td_content = re.findall(\'<td.*?>(.*?)</td\', tr_content, re.S)#正则获取周期涨幅 #print("5日涨幅:",j[0])#5日涨幅 #print("3个月涨幅:",k[0])#3个月涨幅 #print("1年涨幅:",l[0])#1年涨幅 list_name.append(b[0])#股票名称数组 list_score.append(c[0])#综合评分 list_Short.append(d[0])#短期趋势 list_Metaphase.append(e[0])#中期趋势 list_Long.append(f[0])#长期趋势 list_comprehensive.append(m)#综合评判 list_day.append(td_content[1])#5日涨幅 list_mouth.append(td_content[2])#3个月涨幅 list_year.append(td_content[3])#1年涨幅 return(list_name,list_score,list_Short,list_Metaphase, list_Long,list_comprehensive,list_day,list_mouth,list_year) get=get(code)
四、将写入数组的股票数据,依次写入到对应股票代码后的单元格中
#读取get函数生成的股票数据,依次写入到excel文档中 xfile = openpyxl.load_workbook(path+\'\\\\stock.xlsx\')#加载文件 sheet1 = xfile.worksheets[0] #excel中单元格为B2开始,即第2列,第2行 for i in range(len(get[0])):#股票名称 sheet1.cell(i+2, 2).value=get[0][i] for i in range(len(baidu[0])):#当前价格 sheet1.cell(i+2, 3).value=baidu[0][i] for i in range(len(baidu[1])):#当前市值 sheet1.cell(i+2, 4).value=baidu[1][i] for i in range(len(get[1])):#综合评分 sheet1.cell(i+2, 5).value=get[1][i] for i in range(len(get[2])):#短期趋势 sheet1.cell(i+2, 6).value=get[2][i] for i in range(len(get[3])):#中期趋势 sheet1.cell(i+2, 7).value=get[3][i] for i in range(len(get[4])):#长期趋势 sheet1.cell(i+2, 8).value=get[4][i] for i in range(len(get[5])):#综合评判 sheet1.cell(i+2, 9).value=get[5][i] for i in range(len(get[6])):#5日涨幅 sheet1.cell(i+2, 10).value=get[6][i] for i in range(len(get[7])):#3个月涨幅 sheet1.cell(i+2, 11).value=get[7][i] for i in range(len(get[8])):#1年涨幅 sheet1.cell(i+2, 12).value=get[8][i] xfile.save(path+\'\\\\stock.xlsx\')
直接后的文档内容
全部代码
#本脚本主要实现循环爬取数据后: # 1、同一类数据统一写入到同一个数组中, # 2、读取数组数据写入指定的excel列中,实现最终数据爬取 import xlrd #引入读取excel库 import requests #倒入requests库 from lxml import etree #倒入lxml 库(没有这个库,pip install lxml安装) import os import sys import openpyxl import re path = os.path.abspath(os.path.dirname(sys.argv[0])) #读取excel文档内的股票代码 def code(): wb = xlrd.open_workbook(path+\'\\\\stock.xlsx\')# 打开Excel文件 data = wb.sheet_by_name(\'Sheet1\')#通过excel表格名称(rank)获取工作表 b=data.col_values(0)#获取第一列数据(数组) list=[] for c in b[1:]:#for循环,排除第一行数据 d=int(c) s="%06d" % d#股票代码一共有6位,常规打印无法打印出首位带0的代码的0部分,补齐缺失的0 #print(s) list.append(s) return(list) code=code() #code函数获取的代码,循环爬取代码对应的股票数据,将股票数据写入对应的数组(同一类)中 def get(code): list_name=[]#股票名称 list_score=[]#综合评分 list_Short=[]#短期趋势 list_Metaphase=[]#中期趋势 list_Long=[]#长期趋势 list_comprehensive=[]#综合评判 list_day=[]#5日涨幅 list_mouth=[]#3个月涨幅 list_year=[]#1年涨幅 for num in code: url=\'http://stockpage.10jqka.com.cn/\'+num+\'/\' headers = { \'Accept-Encoding\': \'gzip, deflate\', \'Accept-Language\': \'zh-CN,zh;q=0.9\', \'Upgrade-Insecure-Requests\': \'1\', \'User-Agent\': \'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36\', \'Accept\': \'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\', \'Referer\': \'http://doctor.10jqka.com.cn/603160/\', \'Connection\': \'keep-alive\', \'Cache-Control\': \'max-age=0\', } response = requests.get(url, headers=headers).text html = etree.HTML(response) b = html.xpath(\'//h1[@class = "m_logo fl"]/a/strong/text()\') #print(b[0])#股票名称 c = html.xpath(\'//span[@class = "analyze-tips mt7"]/text()\') #print(c[0])#综合评分 d = html.xpath(\'//div[@class = "analyze-txt fr"]/div/div[2]/text()\') #print("短期趋势:",d[0])#短期趋势 e = html.xpath(\'//div[@class = "analyze-txt fr"]/div[2]/div[2]/text()\') #print("中期趋势:",e[0])#中期趋势 f = html.xpath(\'//div[@class = "analyze-txt fr"]/div[3]/div[2]/text()\') #print("远期趋势:",f[0])#远期趋势 g = html.xpath(\'//div[@class = "txt-phra"]/text()\') h = html.xpath(\'//div[@class = "txt-phra"]/strong/text()\') i = html.xpath(\'//div[@class = "txt-phra"]/text()[2]\') #print(g[0],h[0],i[0])#综合评判 m=g[0]+h[0]+i[0] #j = html.xpath(\'//tr[@class = "even hot_cont"]/td[2]/text()\') #k = html.xpath(\'//tr[@class = "even hot_cont"]/td[3]/text()\') #l = html.xpath(\'//tr[@class = "even hot_cont"]/td[4]/text()\') tr_content = re.findall(\'<tr class="even hot_cont">(.*?)</tr\', response, re.S)[0] td_content = re.findall(\'<td.*?>(.*?)</td\', tr_content, re.S)#正则获取周期涨幅 #print("5日涨幅:",j[0])#5日涨幅 #print("3个月涨幅:",k[0])#3个月涨幅 #print("1年涨幅:",l[0])#1年涨幅 list_name.append(b[0])#股票名称数组 list_score.append(c[0])#综合评分 list_Short.append(d[0])#短期趋势 list_Metaphase.append(e[0])#中期趋势 list_Long.append(f[0])#长期趋势 list_comprehensive.append(m)#综合评判 list_day.append(td_content[1])#5日涨幅 list_mouth.append(td_content[2])#3个月涨幅 list_year.append(td_content[3])#1年涨幅 return(list_name,list_score,list_Short,list_Metaphase, list_Long,list_comprehensive,list_day,list_mouth,list_year) get=get(code) def baidu(code): list_Price=[] list_market=[] for num in code: cookies = { \'BIDUPSID\': \'90EF3BD78F53BC8C96DF84CD3854CA2D\', \'PSTM\': \'1578233930\', \'BD_UPN\': \'12314753\', \'BAIDUID\': \'885754C8E6BD7B1A771802631815CC6D:FG=1\', \'BDORZ\': \'B490B5EBF6F3CD402E515D22BCDA1598\', \'BDUSS\': \'mxYdVpwOEx0eGJsT3VUYTJXbkZJYWhKSGpQWnlqaVBwMlExTWNNRkR4cWtabHRlSVFBQUFBJCQAAAAAAAAAAAEAAACRJsY-cGlwacnxu7AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKTZM16k2TNeV\', \'COOKIE_SESSION\': \'7_0_5_3_11_3_0_0_4_2_1_0_73199_0_169_0_1580456363_0_1580456194%7C9%23622712_32_1580376248%7C6\', \'cflag\': \'13%3A3\', \'BD_HOME\': \'1\', \'BDRCVFR[feWj1Vr5u3D]\': \'I67x6TjHwwYf0\', \'delPer\': \'0\', \'BD_CK_SAM\': \'1\', \'PSINO\': \'3\', \'H_PS_PSSID\': \'1438_21104_26350\', \'H_PS_645EC\': \'29b8ZVy4WP7OUTz6%2FjeON9IexqLhOnMXkLTzhD5NfPu4fH%2FPZmThFknleY0LwzNQZ8j8\', \'BDSVRTM\': \'121\', \'WWW_ST\': \'1580466352318\', } headers = { \'is_xhr\': \'1\', \'Accept-Encoding\': \'gzip, deflate, br\', \'Accept-Language\': \'zh-CN,zh;q=0.9\', \'User-Agent\': \'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36\', \'is_pbs\': num, \'Accept\': \'*/*\', \'Referer\': \'https://www.baidu.com/s?wd=\'+num+\'&rsv_spt=1&rsv_iqid=0xa5a17c8700013159&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=0&rsv_dl=tb&oq=\'+num+\'&rsv_t=29b8ZVy4WP7OUTz6%2FjeON9IexqLhOnMXkLTzhD5NfPu4fH%2FPZmThFknleY0LwzNQZ8j8&rsv_pq=b379448d00013935\', \'X-Requested-With\': \'XMLHttpRequest\', \'Connection\': \'keep-alive\', \'is_referer\': \'https://www.baidu.com/s?wd=\'+num+\'&rsv_spt=1&rsv_iqid=0xa5a17c8700013159&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_dl=tb&rsv_n=2&rsv_sug3=1&rsv_sug1=1&rsv_sug7=100&rsv_sug2=0&inputT=359&rsv_sug4=359\', } params = ( (\'ie\', [\'utf-8\', \'utf-8\']), (\'newi\', \'1\'), (\'mod\', \'1\'), (\'isbd\', \'1\'), (\'isid\', \'b379448d00013935\'), (\'wd\', num), (\'rsv_spt\', \'1\'), (\'rsv_iqid\', \'0xa5a17c8700013159\'), (\'issp\', \'1\'), (\'f\', \'8\'), (\'rsv_bp\', \'1\'), (\'rsv_idx\', \'2\'), (\'rqlang\', \'cn\'), (\'tn\', \'baiduhome_pg\'), (\'rsv_enter\', \'0\'), (\'rsv_dl\', \'tb\'), (\'oq\', num), (\'rsv_t\', \'29b8ZVy4WP7OUTz6/jeON9IexqLhOnMXkLTzhD5NfPu4fH/PZmThFknleY0LwzNQZ8j8\'), (\'rsv_pq\', \'b379448d00013935\'), (\'bs\', num), (\'rsv_sid\', \'1438_21104_26350\'), (\'_ss\', \'1\'), (\'clist\', \'\'), (\'hsug\', \'\'), (\'f4s\', \'1\'), (\'csor\', \'6\'), (\'_cr1\', \'29647\'), ) response = requests.get(\'https://www.baidu.com/s\', headers=headers, params=params, cookies=cookies).text html = etree.HTML(response) a = html.xpath(\'//span[@class = "op-stockdynamic-moretab-cur-num c-gap-right-small"]/text()\') #print(\'当前价格:\',a[0])#当前价格 b = html.xpath(\'//ul[@class = "op-stockdynamic-moretab-info"]/li[8]/span[2]/text()\') #print(\'当前市值:\',b[0])#当前市值 list_Price.append(a[0])#当前价格 list_market.append(b[0])#当前市值 return(list_Price,list_market) baidu=baidu(code) #读取get函数生成的股票数据,依次写入到excel文档中 xfile = openpyxl.load_workbook(path+\'\\\\stock.xlsx\')#加载文件 sheet1 = xfile.worksheets[0] #excel中单元格为B2开始,即第2列,第2行 for i in range(len(get[0])):#股票名称 sheet1.cell(i+2, 2).value=get[0][i] for i in range(len(baidu[0])):#当前价格 sheet1.cell(i+2, 3).value=baidu[0][i] for i in range(len(baidu[1])):#当前市值 sheet1.cell(i+2, 4).value=baidu[1][i] for i in range(len(get[1])):#综合评分 sheet1.cell(i+2, 5).value=get[1][i] for i in range(len(get[2])):#短期趋势 sheet1.cell(i+2, 6).value=get[2][i] for i in range(len(get[3])):#中期趋势 sheet1.cell(i+2, 7).value=get[3][i] for i in range(len(get[4])):#长期趋势 sheet1.cell(i+2, 8).value=get[4][i] for i in range(len(get[5])):#综合评判 sheet1.cell(i+2, 9).value=get[5][i] for i in range(len(get[6])):#5日涨幅 sheet1.cell(i+2, 10).value=get[6][i] for i in range(len(get[7])):#3个月涨幅 sheet1.cell(i+2, 11).value=get[7][i] for i in range(len(get[8])):#1年涨幅 sheet1.cell(i+2, 12).value=get[8][i] xfile.save(path+\'\\\\stock.xlsx\') print("爬取完成")
以上是关于python实例:从excel读取股票代码,爬取股票信息写到代码后面的单元格中的主要内容,如果未能解决你的问题,请参考以下文章