清华教授教你用Python获取全部基金前十大持仓股并进行选股分析
Posted 编程界的小胖子
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了清华教授教你用Python获取全部基金前十大持仓股并进行选股分析相关的知识,希望对你有一定的参考价值。
选股逻辑
股市有风险,投资需谨慎。
投资方向:跟随基金机构进行选股,简单来说,就是优先筛选那些基金公司重仓持有的股票。
目标设定
获取全部基金前十大持仓股的持股数、市值等信息,分析出排名比较靠前的基金重仓股。
爬取全部基金代码信息
注:使用python内置sqlite数据库
建表存储基金代码信息
-- 全部标的信息
create table if not exists targets_all
(
uid string,
name string,
id string,
category string,
tag string,
type string,
version datetime not null default (datetime('now', 'localtime'))
);
复制代码
根据基金代码获取相应持仓信息
建表存储持仓信息
-- 基金前十大持仓股
create table if not exists top10_stocks_in_fund
(
uid string,
fund_id string,
rank_num integer,
stock_id string,
stock_name string,
hold_rate float,
hold_count float,
hold_value float,
update_time string
)
复制代码
爬取并存入数据库
# coding:utf-8
import logging
from collections import defaultdict
from bs4 import BeautifulSoup as BS
import requests
import uuid
from common.constant import CONN_KEY
from util import db_util
from common import global_var as gv
logging.basicConfig(level=logging.INFO)
# 将请求到的数据进行解析
def _parse_content(content):
result = content.replace("var apidata={", "")
result = result[:-2]
content = result.split("",arryear:")[0].replace("content:"", "")
content = BS(content).find_all("tr")
result = defaultdict(list)
for tr in content:
date = tr.parent.parent.previous_sibling.previous_sibling.find("font").text
td = tr.find_all("td")
if len(td) == 9:
result[date].append({
"rank_num": td[0].text,
"stock_id": td[1].text,
"stock_name": td[2].text,
"hold_rate": float(td[6].text.replace("%", ""))/100,
"hold_count": td[7].text,
"hold_value": eval(td[8].text.replace(",", ""))
})
elif len(td) == 7:
result[date].append({
"rank_num": td[0].text,
"stock_id": td[1].text,
"stock_name": td[2].text,
"hold_rate": float(td[4].text.replace("%", ""))/100,
"hold_count": td[5].text,
"hold_value": eval(td[6].text.replace(",", ""))
})
return result
# 根据基金代码获取基金持仓
def get_top10_stocks_by_fund(fund_code):
base_url = "https://fundf10.eastmoney.com/FundArchivesDatas.aspx?type=jjcc&code={fund_code}&topline=10"
url = base_url.format(fund_code=fund_code)
response = str(requests.get(url).content, "utf-8")
return _parse_content(response)
# 判断当前基金的股票持仓是否存在
def _is_exist(conn, fund_id, stock_id, update_time):
query_sql = f"select 1 from top10_stocks_in_fund " \\
f"where fund_id = '{fund_id}'" \\
f"and stock_id = '{stock_id}'" \\
f"and update_time = '{update_time}'"
res = conn.query(query_sql)
if res.fetchone():
return True
return False
# 更新基金前10大持仓股信息
def update_top10_stocks_by_fund():
# 初始化数据库连接池
db_util.init_conn_pool()
conn = gv.get_value(CONN_KEY)
query_sql = "select * from targets_all where type = '基金'"
targets = conn.query(query_sql)
# 记录更新成功和失败的条数
success_count = 0
failure_count = 0
for (uid, fund_name, fund_id, category, tag, type, version) in targets:
fund_id = str(fund_id).zfill(6)
logging.info(f"正在请求{fund_name}-{fund_id}的持仓股信息...")
stocks_in_fund = get_top10_stocks_by_fund(fund_id)
# 获取持仓信息
for update_time, stocks in stocks_in_fund.items():
# 遍历各条持股信息
for stock in stocks:
# 若当前持股信息已存在,则不再插入
if _is_exist(conn, fund_id, stock["stock_id"], update_time):
continue
# 插入新的持股信息
insert_sql = f"insert into top10_stocks_in_fund" \\
f"(uid,fund_id,rank_num,stock_id,stock_name,hold_rate,hold_count,hold_value,update_time)" \\
f"values('{uuid.uuid4()}','{fund_id}',{stock['rank_num']}," \\
f"'{stock['stock_id']}','{stock['stock_name']}',{stock['hold_rate']}," \\
f"{stock['hold_count']},{stock['hold_value']},'{update_time}')"
if conn.operate(insert_sql):
logging.info("插入成功!")
success_count = success_count+1
else:
logging.warning(f"插入失败:{insert_sql}")
failure_count = failure_count+1
logging.info(f"全部基金持仓信息更新完毕,成功:{success_count}条,失败:{failure_count}条。")
if __name__ == "__main__":
update_top10_stocks_by_fund()
复制代码
分析第一/二季度基金排名靠前的重仓股
select stock_id "股票代码", stock_name "股票名称", cast(sum(hold_value) as double) "基金持有总市值"
from top10_stocks_in_fund
where update_time = '2021-06-30'
group by 1, 2
order by 3 desc
复制代码
select stock_id "股票代码", stock_name "股票名称", cast(sum(hold_count) as double) "基金持有数(万股)"
from top10_stocks_in_fund
where update_time = '2021-06-30'
group by 1, 2
order by 3 desc
复制代码
金融市场偏好能源消费、银行券商等行业龙头股,深入研究持股数和市值变化,可以为加仓换股提供比较有意义的参考。(注:基金持仓信息大多是每季度末更新,距离越久,参考意义越不大)
喜欢小胖子的赶紧关注小胖子哦~需要相关资料的可以扫一扫领取哦,相关数据分析大量资料分享
以上是关于清华教授教你用Python获取全部基金前十大持仓股并进行选股分析的主要内容,如果未能解决你的问题,请参考以下文章
今天清华大佬教你用Python爬虫,爬取腾讯视频评论,机会难得还不点击进来看看
# yyds干货盘点 # 手把手教你使用Python网络爬虫获取基金信息
python中使用squarify包可视化treemap图:使用treemap图可视化个人或者集体的股票基金的持仓结构(treemap with squarify package)