网页爬虫---音乐
Posted sheshidu
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了网页爬虫---音乐相关的知识,希望对你有一定的参考价值。
import requests
import time
import re
import os
"""歌手字典"""
song_dict = {}
def song_static():
"""采集静态页面url和歌手"""
try:
response = requests.get(‘http://www.9ku.com/music/T_Singer.htm‘, timeout=30)
html = response.text
reg = r‘<a href="(.*?)" class="t-t">(.*?)</a>‘
static_singer = re.findall(reg, html)
for ul, title in static_singer:
url = ‘http://www.9ku.com‘ + ul
song_dict[title]=url
except requests.exceptions.Timeout as e:
print(e)
except requests.exceptions.HTTPError as e:
print(e)
# df = pd.DataFrame(song_list, columns=[‘url‘, ‘歌手‘])
# df.to_excel(‘歌手url.xlsx‘, engine=‘xlsxwriter‘, index=False)
return song_dict
#动态歌手地址采集
def song_List():
"""采集动态页面url和歌手"""
i=2
print(‘数据采集中......‘)
try:
while True:
print(‘正在采集第{}页数据‘.format(i))
response = requests.get("http://www.9ku.com/geshou/all-all-all/{}.htm".format(i),timeout=30)
html = response.text
reg = r‘<a href="(.*?)" class="t-t">(.*?)</a>‘
data = re.findall(reg,html)
if len(data):
i += 1
for ul ,title in data:
url = ‘http://www.9ku.com‘+ul
song_dict[title] = url
else:
response.close()
break
except requests.exceptions.Timeout as e:
print (e)
except requests.exceptions.HTTPError as e:
print (e)
# df = pd.DataFrame(dynamic_singer,columns=[‘url‘,‘歌手‘])
# df.to_excel(‘歌手url.xlsx‘,engine=‘xlsxwriter‘,index=False)
print (‘数据采集完成‘)
return song_dict
def song_search():
"""歌曲下载"""
while True:
name = input("请输入歌手名称:")
path ="" # 下载保存到哪个目录
if name in song_dict:
url = song_dict[name]
response = requests.get(url,timeout=30)
html = response.text
regs = r‘<div class="songName"><a target="_1" href="(.*?)" class="songNameA">‘
data = re.findall(regs, html)
for i in data:
song_id = i.strip(‘/play/‘)
url = ‘http://www.9ku.com/down/‘ + song_id
response = requests.get(url,timeout=30)
html = response.text
regs = r‘<a href="(.*?)" style="display:none">(.*?)</a>‘
data = re.findall(regs, html)
for src, title in data:
song_name = title.strip(‘Mp3下载‘)
r = requests.get(src,timeout=30).content
time.sleep(1)
f = open(‘%s/%s.mp3‘ % (path,song_name), ‘wb‘)
f.write(r)
print(‘{}:下载成功‘.format(song_name))
f.close()
else:
print("未找到歌手")
if __name__ == ‘__main__‘:
"""采集静态页面数据"""
song_static()
"""采集动态页面数据"""
song_List()
"""下载歌曲"""
song_search()
以上是关于网页爬虫---音乐的主要内容,如果未能解决你的问题,请参考以下文章