用Python3Request爬取英雄联盟皮肤单线程爬取
Posted wxzbk
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了用Python3Request爬取英雄联盟皮肤单线程爬取相关的知识,希望对你有一定的参考价值。
和王者荣耀相类似
链接:https://www.cnblogs.com/wxzbk/p/10981098.html
""" Hero_LOL """ import requests import re import threading def hero(hero_name, hero_num): #统一前戳 h_l = "https://ossweb-img.qq.com/images/lol/web201310/skin/big" # 逐一遍历英雄 print(hero_num.__len__()) num = 0 # 为了获取英雄的号码 for i in hero_num: # 逐一遍历皮肤,此处假定一个英雄最多15个皮肤 for sk_num in range(0, 20): # 从第一个开始,没有第0个皮肤 if len(i)==2: hsl = h_l + i + "0" +str(sk_num) +".jpg" else: hsl = h_l + i + "00" + str(sk_num) + ".jpg" hl = requests.get(hsl) if hl.status_code == 200: filename = "LOL/" + str(hero_name[num]) + str(sk_num) + ".jpg" print("此时正在下载:" + filename) with open(filename, "wb") as f: f.write(hl.content) else: break num += 1 def main(): """ #获取全部英雄数据 :return: """ #JS_url Hero_url = "https://lol.qq.com/biz/hero/champion.js" #User-Agent伪装浏览器标识 header = ‘User-Agent‘:‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36‘ #Get source code(获取源代码) response = requests.get(Hero_url,headers = header) #decode 编码为GBK,方便下方使用正则遍历 response = response.content.decode("GBK") #regex(正则表达式) reg = """keys":([\\s\\S]*?),"data""" #预编译 regex = re.compile(reg,re.IGNORECASE) #第一个参数 正则表达式 第二个参数忽略的意思,忽略大小写 #findall 就是获取response内所有符合规则的数据,返回值为list response_s = regex.findall(response) #也就是所有符合规则的数据放到了列表里,如果只有一个,那就是list[0] response_s = response_s[0] #通过eval转换为一个dict,具体功能自己查(其实我也没大懂) res = eval(response_s) #get the keys in the dict(再转换为list) hero_num = list(res.keys()) #Hero_number #get the values in the dict (再转换为list) hero_name = list(res.values())#Hero_name hero(hero_name,hero_num) t1 = threading.Thread(target=hero,args=(hero_name,hero_num)) t1.start() if __name__ == ‘__main__‘: main()
以上是关于用Python3Request爬取英雄联盟皮肤单线程爬取的主要内容,如果未能解决你的问题,请参考以下文章