python爬虫实例王者英雄资料爬取
Posted 是璇子鸭
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python爬虫实例王者英雄资料爬取相关的知识,希望对你有一定的参考价值。
常规爬取
import requests
import json
from urllib import request
# js png css gif jpg排除
url = 'https://pvp.qq.com/web201605/js/herolist.json' #获取所有英雄信息
res = requests.get(url) #发送请求 获取结果 json
#data = json.loads(res.text)
data_dict_list = res.json() #将json类型转成普通的python类型
print(data_dict_list)
for data in data_dict_list:
print('英雄:', data['cname']) #英雄的名字
if 'skin_name' in data:
skin_name = data['skin_name']
print('皮肤名:',skin_name)
# 拼接图片地址
skin_num = skin_name.count('|') + 1
# print(skin_num) 根据皮肤个数 拼接图片的url
for x in range(1, skin_num + 1):
href = f'https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{str(data["ename"])}/{str(data["ename"])}-bigskin-{str(x)}.jpg'
print(href)
request.urlretrieve(href, 'heroes/' + data['cname'] + '_' + str(x) + '.jpg')
else:
print('json文件不完整')
多线程爬取
10位员工同时干活
import requests
import json
from urllib import request
from concurrent.futures.thread import ThreadPoolExecutor
# url https://pvp.qq.com/web201605/js/herolist.json
# https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/变化/变化-bigskin-变化.jpg
# js png css gif jpg 排除掉
def download_image(url,name):
request.urlretrieve(url, 'heros/' + name + '_' + str(x) + '.jpg')
with ThreadPoolExecutor(max_workers=10) as pool:
url = 'https://pvp.qq.com/web201605/js/herolist.json' # 获取所有英雄的信息
res = requests.get(url) # 发送请求 获取结果 json
# data = json.loads(res.text)
# print(data)
data_dict_list = res.json() # 将json类型转成 普通的python类型
print(data_dict_list)
for data in data_dict_list:
print('英雄:', data['cname']) # 英雄的名字
if 'skin_name' in data:
skin_name = data['skin_name']
print('皮肤名:', skin_name)
# 拼接图片地址
skin_num = skin_name.count('|') + 1
# print(skin_num)
# 根据皮肤个数 拼接图片的url
for x in range(1, skin_num + 1):
href = f'https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{str(data["ename"])}/{str(data["ename"])}-bigskin-{str(x)}.jpg'
# 下载
# request.urlretrieve(href, 'heros/' + data['cname'] + '_' + str(x) + '.jpg')
pool.submit(download_image,href,data['cname'])
else:
print('json文件不完整')
附:斗鱼小姐姐资料爬取
import requests
import threading
def get_douyu(url):
res = requests.get(url)
json_data = res.json()
rl_list = json_data['data']['rl']
#print(rl_list)
for zb in rl_list:
name = zb.get('nn')
imageUrl = zb.get('rs16')
imagePath = r'C:\\Users\\JSJSYS\\PycharmProjects\\untitled\\douyu\\%s.jpg' % name
res = requests.get(imageUrl) # 像图片地址发送请求
data = res.content # 二进制数据
with open(imagePath, 'wb') as fp:
fp.write(data)
fp.flush()
t_list =[]
for x in range(0,6):
url = 'https://www.douyu.com/gapi/rkc/directory/2_201/%d' % x
get_douyu(url)
#创建一个线程
t1 = threading.Thread(target=get_douyu,args=(url,))#分配任务
t1.start() #开始工作
t_list.append(t1) #所有人加到列表中
for t in t_list:
t.join() #一起干活
以上是关于python爬虫实例王者英雄资料爬取的主要内容,如果未能解决你的问题,请参考以下文章
python爬虫-20行代码爬取王者荣耀所有英雄图片,小白也轻轻松松
python爬虫-20行代码爬取王者荣耀所有英雄图片,小白也轻轻松松