Python实例---获取酷狗音乐Top100

Posted 小a玖拾柒

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Python实例---获取酷狗音乐Top100相关的知识,希望对你有一定的参考价值。

项目一:获取酷狗TOP 100

http://www.kugou.com/yy/rank/home/1-8888.html

排名

image

文件&&歌手

image

时长

image

效果:

image

附源码:

import time
import json
from bs4 import BeautifulSoup
import requests


class Kugou(object):
    def __init__(self):
        self.header = {
            "User-Agent": \'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0\'
        }

    def getInfo(self, url):
        html = requests.get(url, headers=self.header)
        soup = BeautifulSoup(html.text, \'html.parser\')
        # print(soup.prettify())
        ranks = soup.select(\'.pc_temp_num\')
        titles = soup.select(\'.pc_temp_songlist > ul > li > a\')  # 层层标签查找
        times = soup.select(\'.pc_temp_time\')
        for rank, title, songTime in zip(ranks, titles, times):
            data = {
                # rank 全打印就是带HTML标签的
                \'rank\': rank.get_text().strip(),
                \'title\': title.get_text().split(\'-\')[1].strip(),
                \'singer\': title.get_text().split(\'-\')[0].strip(),
                \'songTime\': songTime.get_text().strip()
            }
            s = str(data)
            print(\'rank:%2s\\t\' % data[\'rank\'], \'title:%2s\\t\' % data[\'title\'], \'singer:%2s\\t\' %data[\'singer\'], \'songTime:%2s\\t\' % data[\'songTime\'])
            with open(\'hhh.txt\', \'a\', encoding=\'utf8\') as f:
               f.writelines(s + \'\\n\')

if __name__ == \'__main__\':
    urls = [
        \'http://www.kugou.com/yy/rank/home/{}-8888.html\'.format(str(i)) for i in range(30)
    ]

    kugou = Kugou()
    for url in urls:
        kugou.getInfo(url)
        time.sleep(1)

 

部分代码解析

--------------------------------------------------------------------
urls = [\'http://www.kugou.com/yy/rank/home/{}-8888.html\'.format(str(i)) for i in range(1, 5)]
for i in urls:
    print(i)

结果打印:
	http://www.kugou.com/yy/rank/home/1-8888.html
	http://www.kugou.com/yy/rank/home/2-8888.html
	http://www.kugou.com/yy/rank/home/3-8888.html
	http://www.kugou.com/yy/rank/home/4-8888.html
--------------------------------------------------------------------
for rank, title, songTime in zip(ranks, titles, times):
    data = {
        # rank 全打印就是带HTML标签的
        \'rank\': rank.get_text().strip(),
        \'title\': title.get_text().split(\'-\')[0].strip(),
        \'singer\': title.get_text().split(\'-\')[1].strip(),
        \'songTime\': songTime.get_text()
    }
    print(data[\'rank\'])
    print(data[\'title\'])
    print(data[\'singer\'])
    print(data[\'songTime\'])
	
结果打印:
    1
    飞驰于你
    许嵩
    4: 04
--------------------------------------------------------------------   
for rank, title, songTime in zip(ranks, titles, times):
	data = {
		# rank 全打印就是带HTML标签的
		\'rank\': rank,
		\'title\': title,
		\'songTime\': songTime
	}
	print(data[\'rank\'])
	print(data[\'title\'])
	print(data[\'songTime\'])
结果打印:
<span class="pc_temp_num">
	<strong>1</strong>
</span>
<a class="pc_temp_songname" data-active="playDwn" data-index="0" hidefocus="true" href="http://www.kugou.com/song/pjn5xaa.html" title="许嵩 - 飞驰于你">许嵩 - 飞驰于你</a>
<span class="pc_temp_time">	4:04 </span>

 

项目二:搜索曲目获取URL

根据关键字搜索后的结果:

http://songsearch.kugou.com/song_search_v2?callback=jQuery191034642999175022426_1489023388639&keyword=%E5%9B%AD%E6%B8%B8%E4%BC%9A&page=1&pagesize=30&userid=-1&clientver=&platform=WebFilter&tag=em&filter=2&iscorrection=1&privilege_filter=0&_=1489023388641%27

image

# encoding=utf-8
# Time    : 2018/4/27
# Email   : z2615@163.com
# Software: PyCharm
# Language: Python 3
import requests
import json


class KgDownLoader(object):
    def __init__(self):
        self.search_url = \'http://songsearch.kugou.com/song_search_v2?callback=jQuery191034642999175022426_1489023388639&keyword={}&page=1&pagesize=30&userid=-1&clientver=&platform=WebFilter&tag=em&filter=2&iscorrection=1&privilege_filter=0&_=1489023388641\'

        # .format(\'园游会\')
        self.play_url = \'http://www.kugou.com/yy/index.php?r=play/getdata&hash={}\'
        self.song_info = {
            \'歌名\': None,
            \'演唱者\': None,
            \'专辑\': None,
            \'filehash\': None,
            \'mp3url\': None
        }

    def get_search_data(self, keys):
        search_file = requests.get(self.search_url.format(keys))
        search_html = search_file.content.decode().replace(\')\', \'\').replace(
            \'jQuery191034642999175022426_1489023388639(\', \'\')
        views = json.loads(search_html)
        for view in views[\'data\'][\'lists\']:
            song_name = view[\'SongName\'].replace(\'<em>\', \'\').replace(\'</em>\', \'\')
            album_name = view[\'AlbumName\'].replace(\'<em>\', \'\').replace(\'</em>\', \'\')
            sing_name = view[\'SingerName\'].replace(\'<em>\', \'\').replace(\'</em>\', \'\')
            file_hash = view[\'FileHash\']
            new_info = {
                \'歌名\': song_name,
                \'演唱者\': sing_name,
                \'专辑\': album_name if album_name else None,
                \'filehash\': file_hash,
                \'mp3url\': None
            }
            self.song_info.update(new_info)
            yield self.song_info

    def get_mp3_url(self, filehash):
        mp3_file = requests.get(self.play_url.format(filehash)).content.decode()
        mp3_json = json.loads(mp3_file)
        real_url = mp3_json[\'data\'][\'play_url\']
        self.song_info[\'mp3url\'] = real_url
        yield self.song_info

    def save_mp3(self, song_name, real_url):
        with open(song_name + ".mp3", "wb")as fp:
            fp.write(requests.get(real_url).content)


if __name__ == \'__main__\':
    kg = KgDownLoader()
    mp3_info = kg.get_search_data(input(\'请输入歌名:\'))
    for x in mp3_info:
        mp3info = kg.get_mp3_url(x[\'filehash\'])
        for i in mp3info:
            print(i)

image

项目三:搜索下载歌曲

代码仅供学习参考

from selenium import webdriver

from bs4 import BeautifulSoup

import urllib.request

from selenium.webdriver.common.action_chains import ActionChains

input_string = input(\'>>>please input the search key:\')

#input_string="你就不要想起我"

driver = webdriver.Chrome()

driver.get(\'http://www.kugou.com/\')


a=driver.find_element_by_xpath(\'/html/body/div[1]/div[1]/div[1]/div[1]/input\') #输入搜索内容/html/body/div[1]/div[1]/div[1]/div[1]/input

a.send_keys(input_string)

driver.find_element_by_xpath(\'/html/body/div[1]/div[1]/div[1]/div[1]/div/i\').click() #点击搜索/html/body/div[1]/div[1]/div[1]/div[1]/div/i

for handle in driver.window_handles:#方法二,始终获得当前最后的窗口,所以多要多次使用

    driver.switch_to_window(handle)

#result_url = driver.current_url


#driver = webdriver.Firefox()

#driver.maximize_window()

#driver.get(result_url)

#j=driver.find_element_by_xpath(\'/html/body/div[4]/div[1]/div[2]/ul[2]/li[2]/div[1]/a\').get_attribute(\'title\')测试

#print(j)

soup = BeautifulSoup(driver.page_source,\'lxml\')

PageAll = len(soup.select(\'ul.list_content.clearfix > li\'))

print(PageAll)

for i in range(1,PageAll+1):

    j=driver.find_element_by_xpath(\'/html/body/div[4]/div[1]/div[2]/ul[2]/li[%d]/div[1]/a\'%i).get_attribute(\'title\')

    print(\'%d.\'%i + j)

choice=input("请输入你要下载的歌曲(输入序号):")

#global mname

#mname=driver.find_element_by_xpath(\'/html/body/div[4]/div[1]/div[2]/ul[2]/li[%d]/div[1]/a\'%choice).get_attribute(\'title\')#歌曲名

a=driver.find_element_by_xpath(\'/html/body/div[4]/div[1]/div[2]/ul[2]/li[%s]/div[1]/a\'%choice)#定位

b=driver.find_element_by_xpath(\'/html/body/div[4]/div[1]/div[2]/ul[2]/li[%s]/div[1]/a\'%choice).get_attribute(\'title\')

actions=ActionChains(driver)#selenium中定义的一个类

actions.move_to_element(a)#将鼠标移动到指定位置

actions.click(a)#点击

actions.perform()

#wait(driver)?

#driver = webdriver.Firefox()

#driver.maximize_window()

#driver.get(result_url)

#windows = driver.window_handles

#driver.switch_to.window(windows[-1])

#handles = driver.window_handles

for handle in driver.window_handles:#方法二,始终获得当前最后的窗口,所以多要多次使用

    driver.switch_to_window(handle)

Local=driver.find_element_by_xpath(\'//*[@id="myAudio"]\').get_attribute(\'src\')

print(driver.find_element_by_xpath(\'//*[@id="myAudio"]\').get_attribute(\'src\'))

def cbk(a, b, c):

    per = 100.0 * a * b / c

    if per > 100:

        per = 100

    print(\'%.2f%%\' % per)

soup=BeautifulSoup(b)

name=soup.get_text()

path=\'D:\\%s.mp3\'%name

urllib.request.urlretrieve(Local, path, cbk)

print(\'finish downloading %s.mp3\' % name + \'\\n\\n\')

【更多参考】https://blog.csdn.net/abc_123456___/article/details/81101845

以上是关于Python实例---获取酷狗音乐Top100的主要内容,如果未能解决你的问题,请参考以下文章

Python代码搜索并下载酷狗音乐

Java爬虫系列之实战:爬取酷狗音乐网 TOP500 的歌曲

爬取酷狗音乐Top500榜单

python下载酷狗音乐源码

最新酷狗音乐反爬来袭,Python掌握酷狗排行榜加密规则

最新酷狗音乐反爬来袭,Python掌握酷狗排行榜加密规则