网页爬虫---音乐

Posted sheshidu

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了网页爬虫---音乐相关的知识,希望对你有一定的参考价值。

import requests
import time
import re
import os

"""歌手字典"""
song_dict = {}

def song_static():
"""采集静态页面url和歌手"""
try:
response = requests.get(‘http://www.9ku.com/music/T_Singer.htm‘, timeout=30)
html = response.text
reg = r‘<a href="(.*?)" class="t-t">(.*?)</a>‘
static_singer = re.findall(reg, html)
for ul, title in static_singer:
url = ‘http://www.9ku.com‘ + ul
song_dict[title]=url
except requests.exceptions.Timeout as e:
print(e)
except requests.exceptions.HTTPError as e:
print(e)
# df = pd.DataFrame(song_list, columns=[‘url‘, ‘歌手‘])
# df.to_excel(‘歌手url.xlsx‘, engine=‘xlsxwriter‘, index=False)
return song_dict


#动态歌手地址采集
def song_List():
"""采集动态页面url和歌手"""
i=2
print(‘数据采集中......‘)
try:
while True:
print(‘正在采集第{}页数据‘.format(i))
response = requests.get("http://www.9ku.com/geshou/all-all-all/{}.htm".format(i),timeout=30)
html = response.text
reg = r‘<a href="(.*?)" class="t-t">(.*?)</a>‘
data = re.findall(reg,html)
if len(data):
i += 1
for ul ,title in data:
url = ‘http://www.9ku.com‘+ul
song_dict[title] = url
else:
response.close()
break
except requests.exceptions.Timeout as e:
print (e)
except requests.exceptions.HTTPError as e:
print (e)
# df = pd.DataFrame(dynamic_singer,columns=[‘url‘,‘歌手‘])
# df.to_excel(‘歌手url.xlsx‘,engine=‘xlsxwriter‘,index=False)
print (‘数据采集完成‘)
return song_dict


def song_search():
"""歌曲下载"""
while True:
name = input("请输入歌手名称:")
path ="" # 下载保存到哪个目录
if name in song_dict:
url = song_dict[name]
response = requests.get(url,timeout=30)
html = response.text
regs = r‘<div class="songName"><a target="_1" href="(.*?)" class="songNameA">‘
data = re.findall(regs, html)
for i in data:
song_id = i.strip(‘/play/‘)
url = ‘http://www.9ku.com/down/‘ + song_id
response = requests.get(url,timeout=30)
html = response.text
regs = r‘<a href="(.*?)" style="display:none">(.*?)</a>‘
data = re.findall(regs, html)
for src, title in data:
song_name = title.strip(‘Mp3下载‘)
r = requests.get(src,timeout=30).content
time.sleep(1)
f = open(‘%s/%s.mp3‘ % (path,song_name), ‘wb‘)
f.write(r)
print(‘{}:下载成功‘.format(song_name))
f.close()
else:
print("未找到歌手")


if __name__ == ‘__main__‘:
"""采集静态页面数据"""
song_static()
"""采集动态页面数据"""
song_List()
"""下载歌曲"""
song_search()

以上是关于网页爬虫---音乐的主要内容,如果未能解决你的问题,请参考以下文章

Java爬虫:运用jspservlet实现一个可以下载音乐的网页

编写一个网易云音乐爬虫程序

如何用爬虫获取网易云音乐歌单中的歌曲

如何用爬虫爬取网页上的数据

小白学爬虫:网易云音乐歌单

SEO禁用蜘蛛(爬虫)搜索收录网页(全)