python yyds

Posted J哥。

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python yyds相关的知识,希望对你有一定的参考价值。

爬取:http://www.5uz.net/xiaochengxu/list/201338/

应用名标签及内容   里面有很多细节

# import glob

import requests
from lxml import etree

# import csv
# from concurrent.futures import ThreadPoolExecutor

# f = open("数据.csv" , mode='w' , encoding= 'utf-8 ')
# csvwriter = csv.writer(f)

def main():
    # 会话

    # global href
    session = requests.session()

    url = 'http://www.5uz.net/xiaochengxu/list/201338/'

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/90.0.4430.93 Safari/537.36',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Accept-Encoding': 'gzip, deflate',
        'Cache-Control': 'no-cache'
    }

    resp = requests.get(url, headers=headers)

    resp.encoding = 'utf-8'

    html = etree.HTML(resp.text)
    lis = html.xpath('/html/body/div[10]/div[1]/div[1]/div/div/li')
    href = []
    for hrefs in lis:
        # href = hrefs.xpath('./a/@href')[-1].strip('/')   这个是错误的  strip是清除 ’/‘
        href += hrefs.xpath('./a/@href')
        # sub.append(href)
        # print(href)
    # with ThreadPoolExecutor(20) as t:


    for i in href:
        # print(i)
        subpage1 = session.get(i)
        subpage = etree.HTML(subpage1.text)
        # name1 = name.replace('\\r\\n','')

        name = subpage.xpath('/html/body/div[10]/div[1]/div[1]/div/div[1]/div[1]/div[1]/h1/text()')[0]
        company_name = subpage.xpath('normalize-space(//div[3]/span[2]/text())')[3:]
        desc_content = subpage.xpath('normalize-space(//*[@id="article"]/p/text())')
        # print(subpage)
        # name.replace('\\r\\n')
        print(name,':',company_name,'介绍',desc_content)



if __name__ == '__main__':
    main()

# 处理 \\r\\n
# 把数据晒出来,然后用replace('\\r\\n','')把不需要的符号去掉
# passage1 = re.sub("</?\\w+[^>]*>", "", str(passage))
#
# passage2 = passage1.replace('\\\\r', '\\r').replace('\\\\n', ' \\n').replace('\\\\t', '\\t').replace(']', '').replace('[',
#                                                                                                              '').replace(
#     '&nbsp;', '   ')
#
# print(passage2)

# name1 = ''.join(name).replace('\\r\\n','')

# @Python-学生  name1 = ''.join(name). replace( 'lrin','')


# html.xpath('normalize-space(//div//tr/td[@class="inquiry_intitleb"]/span/text())'

简约版的过程思路:

import requests
from lxml import etree


def main():
    session = requests.session()
    url = 'http://www.5uz.net/xiaochengxu/list/201338/'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Accept-Encoding': 'gzip, deflate',
        'Cache-Control': 'no-cache'
    }
    resp = requests.get(url, headers=headers)
    resp.encoding = 'utf-8'
    html = etree.HTML(resp.text)
    lis = html.xpath('/html/body/div[10]/div[1]/div[1]/div/div/li')
    hrefs = []
    for href in lis:
        hrefs += href.xpath('./a/@href')
    for i in hrefs:
        subpage1 = session.get(i)
        subpage = etree.HTML(subpage1.text)
        name = subpage.xpath('/html/body/div[10]/div[1]/div[1]/div/div[1]/div[1]/div[1]/h1/text()')
        print(name)


if __name__ == '__main__':
    main()

获取图片链接:

import requests
from lxml import etree
url = 'http://www.5uz.net/xiaochengxu/list/201338/'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Accept-Encoding': 'gzip, deflate',
    'Cache-Control': 'no-cache'
}
resp = requests.get(url, headers=headers)

resp.encoding='utf-8'

html = etree.HTML(resp.text)

lis = html.xpath('/html/body/div[10]/div[1]/div[1]/div/div/li')

for imgs in lis:
    img = imgs.xpath('./a/img/@src')
    print(img)

 

以上是关于python yyds的主要内容,如果未能解决你的问题,请参考以下文章

YYDS!Dexplot:一行 Python 代码轻松绘制统计图表!

# yyds干货盘点 # 盘点一份JS逆向代码转换为Python代码的教程

腾讯云AI用1行Python代码识别增值税发票,YYDS

YYDS!一行 Python 代码即可实现数据可视化大屏

#yyds干货盘点# mybatis源码解读:executor包(语句处理功能)

YYDS!一行Python代码即可实现数据可视化大屏