pppppp
Posted iceredtea
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了pppppp相关的知识,希望对你有一定的参考价值。
llll
# -*- coding: utf-8 -*- import pandas as pd import tushare as ts import pymysql import hashlib import datetime import time import requests import json from sqlalchemy import create_engine from redis import Redis # ====================股票列表:stock_basic======================================================================================================================================================= def Stockbasic(conn_mysql,conn_redis): # 定义计数器 success, fail, count = 0, 0, 0 # 创建空列表 code_list = [] # 创建MySQL操作游标 cursor = conn_mysql.cursor() stock_basic = pro.stock_basic(list_status=‘L‘, fields=‘ts_code,symbol,name,area,exchange,list_date‘) for index, row in stock_basic.iterrows(): stk_code = row[‘ts_code‘] # 股票代码 symbol = row[‘symbol‘] # 股票代码,无后缀 stk_name = row[‘name‘] # 股票名称 city = row[‘area‘] # 所属城市 exchange = row[‘exchange‘] # 所属交易所 list_date = row[‘list_date‘] # 上市日期 code_list.append(stk_code) # 统计请求数据量 count +=1 # 将解析到的stk_code生成一个唯一的标识进行redis存储 source = stk_code stk_code_id = hashlib.sha256(source.encode()).hexdigest() # 将解析内容的唯一表示存储到redis的stk_code中 ex = conn_redis.sadd(‘stk_code‘, stk_code_id) # 获取没有爬取的内容 if ex == 1: try: sql = ‘‘‘insert into stock_basic_data(stk_code,symbol,stk_name,city,exchange,list_date) values(‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘)‘‘‘ % (stk_code,symbol,stk_name,city,exchange,list_date) cursor.execute(sql) conn_mysql.commit() # 统计入库成功数量 success += 1 except: conn_mysql.rollback() # 统计入库失败数量 fail += 1 else: pass # 关闭MySQL操作游标 cursor.close() print(‘股票列表数据获取完毕,本次共请求数据:%d条‘ % count) print(‘股票列表数据获取完毕,新入库数据:%d条‘ % success) print(‘股票列表数据获取完毕,入库失败数据:%d条‘ % fail) return code_list # ====================开盘啦数据抓取============================================================================================================================================================== def Kplspider(code_list,cur_date,engine_mysql): ‘‘‘ 此处读取iwencai下载数据,爬取PC端开盘啦板块数据 ‘‘‘ # 定义空html_list、tag_data,分别存储爬取到的html、(基金、游资、庄股)等标签数据 html_list = [] tag_data = pd.DataFrame(columns=(‘stk_code‘,‘tag‘)) # 定义计数器 count = 0 url = ‘https://pchq.kaipanla.com/w1/api/index.php‘ headers = {‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0‘} for code in code_list: data ={ ‘c‘: ‘PCArrangeData‘, ‘a‘: ‘GetHQPlate‘, ‘StockID‘: code[:6], ‘Day‘: cur_date, ‘SelType‘: ‘1, 2, 3, 8, 9, 5, 6, 7‘, ‘UserID‘: 399083, ‘Token‘: ‘71aef0e806e61ad3169ddc9473e37886‘ } html = requests.post(url=url,data=data,headers=headers).text html_list.append(html) count +=1 # 定义空tag_list,临时存储stk_code、tag tag_list = [] stk_code = code tag = json.loads(html)["pankou"]["tag"] tag_list.append(stk_code) tag_list.append(tag) tag_data.loc[0] = tag_list tag_data.to_sql(‘stock_tag‘,engine_mysql,if_exists=‘replace‘,index=False) print(‘开盘啦数据抓取完成,本次共抓取:%d条数据‘ % count) return html_list # ====================股票所属概念、板块数据解析存储============================================================================================================================================= #def Belongto(html_list,engine_mysql): def Belongto(engine_mysql): # 定义bk_data、concept_data,分别存储所属板块、所属概念数据 bk_data = pd.DataFrame(columns=(‘stk_code‘,‘yj_bk‘,‘ej_bk‘,‘sj_bk‘)) concept_data = pd.DataFrame(columns=(‘stk_code‘,‘concept‘)) iwc_concept = pd.DataFrame(columns=(‘stk_code‘,‘concept‘)) kpl_concept = pd.DataFrame(columns=(‘stk_code‘,‘concept‘)) stk_bel = pd.read_csv(‘C:\Users\Red-tea-1919\Desktop\2019-12-26.csv‘, encoding=‘gbk‘)[[‘股票代码‘, ‘所属同花顺行业‘, ‘所属概念‘]] for index, row in stk_bl.iterrows(): iwc_code = row[‘股票代码‘][:6] yj_bk = (row[‘所属同花顺行业‘].split(‘-‘))[0] ej_bk = (row[‘所属同花顺行业‘].split(‘-‘))[1] sj_bk = (row[‘所属同花顺行业‘].split(‘-‘))[2] bk_list = [] bk_list.append(iwc_code) bk_list.append(yj_bk) bk_list.append(ej_bk) bk_list.append(sj_bk) bk_data.loc[0] = bk_list concept = row[‘所属概念‘].split(‘;‘) for i in concept: concept_list = [] concept_list.append(iwc_code) concept_list.append(i) iwc_concept.loc[0] = concept_list ‘‘‘ for html in html_list: cept_list = json.loads(html)["stockplate"] kpl_code = json.loads(html)["bid"]["code"] for cept in cept_list: cept_list = [] cept_list.append(kpl_code) cept_list.append(cept) kpl_concept.loc[0] = cept_list concept_data = pd.DataFrame([iwc_concept, kpl_concept], axis=0) concept_data.drop_duplicates() ‘‘‘ bk_data.to_sql("stock_bk", engine_mysql, if_exists="replace", index=False) # concept_data.to_sql(‘stock_concept‘,engine_mysql,if_exists=‘replace‘,index=False) print(bk_data) #print(concept_data) print(‘股票所属概念、板块数据解析存储完成‘) if __name__ == ‘__main__‘: # 初始化tushare.pro接口 pro = ts.pro_api(‘ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7‘) # 获取当天日期 cur_date = time.strftime("%Y%m%d", time.localtime()) # 创建MySQL连接对象 conn_mysql = pymysql.connect(user=‘root‘, password=‘123456‘, database=‘stock‘, charset=‘utf8‘) # 创建Redis链接对象 conn_redis = Redis(host=‘127.0.0.1‘, port=6379) # 创建Pandas读写数据库引擎 engine_mysql = create_engine(‘mysql://root:123456@127.0.0.1/stock?charset=utf8‘) # 调用Stockbasic获取最新股票列表,并返回code_list code_list = Stockbasic(conn_mysql,conn_redis) # 调用Kplspider获取概念明细数据,病返回html_list #html_lsit = Kplspider(code_list,cur_date,engine_mysql) # 调用Belongto获取最新股票所属板块、所属概念 Belongto(html_list, engine_mysql) # 关闭MySQL连接对象 conn_mysql.close()
以上是关于pppppp的主要内容,如果未能解决你的问题,请参考以下文章