Python爬虫包图网case

Posted 2022-09-10 smilyroy

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了Python爬虫包图网case相关的知识，希望对你有一定的参考价值。

# coding=utf-8
import requests
from lxml import etree
# 请求网页获取网页信息
responce = requests.get("https://ibaotu.com/shipin/")
# 整理网页文本对象
html = etree.HTML(responce.text)
# 定位网页元素位置
title_list = html.xpath(‘//span[@class="video-title"]/text()‘)
print(title_list)
src_list = html.xpath(‘//div[@class="video-play"]/video/@src‘)
print(src_list)
# 遍历数据，循环取文件名和链接地址
for tit, src in zip(title_list, src_list):
# 定义链接返回拼接数据
    responce = requests.get("http:" + src)
# 定义文件名称和格式
    file_name = tit + ".mp4"
    print(‘保存视频文件: ‘.format(file_name))
# 文件已二进制方式写入文件名称，播放链接内容
    with open(file_name, ‘wb‘) as f:
        f.write(responce.content)

from urllib import *
import requests
from lxml import etree

class Spider:
    def geturl(self, url):
        response = requests.get(url)
        response.encoding = response.apparent_encoding
        return response.text

    # def download(self,url):
    #     response = requests.Request(url)
    #     return response.url


    def getinfo(self, url):
        html = etree.HTML(url)
        tit1 = ‘//span[@class="video-title"]/text()‘
        tit = html.xpath(tit1)
        src = html.xpath(‘//div[@class="video-play"]/video/@src‘)
        return tit, src

    def saveinfo(self,name,data):
        for n,l in zip(name,data):
            responce = requests.get("http:" + l)
            file_name = n + ".mp4"
            print("正在下载:".format(file_name))
            with open(file_name, ‘wb‘)as f:
                f.write(responce.content)


    def run(self,firsturl):
        html = self.geturl(firsturl)
        info = self.getinfo(html)
        for date in zip(info):
            name = info[0]
            src = info[1]
            self.saveinfo(name,src)


if __name__ == ‘__main__‘:
    spider = Spider()
    spider.run("https://ibaotu.com/shipin/")

以上是关于Python爬虫包图网case的主要内容，如果未能解决你的问题，请参考以下文章

Python爬虫 包图网case

Python爬虫包图网case