淘宝商品信息并且按表格排列。

Posted 2021-01-12 cwkcwk

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了淘宝商品信息并且按表格排列。相关的知识，希望对你有一定的参考价值。

import requests
import re

def gethtmltext(url):  #获取页面信息
    try:
        r = requests.get(url)  
        print(type(r))
        print(r)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        print(r.encoding)
        print(r.apparent_encoding)

        return r.text
    except:
        return ""
def parsepage(ilt,html):  #解析页面信息

    plt = re.findall(r‘"view_price":"[d.]*"‘,html) #提取价格的正则  d. 0-9的数字 []集 *出现无数次 0-9 0-9 0-9.....
    tlt = re.findall(r‘"raw_title":".*?"‘,html)    #
    print(len(plt))
    print(len(tlt))
    for i in range(len(plt)):

        price = eval(plt[i].split(":")[1]) # split()[]  ()输入分隔符  
        title = eval(tlt[i].split(":")[1]) # eval 可以按照格式互相转化　
        #print(price)
        #print([price, title])
        ilt.append([price,title]) #把得来的两个元素组成新的列表　加入ilt列表　
        #print(price)
        #print(ilt)



def printgoodlist(ilt): #打印出商品信息
    tplt ="{:4}	{:8}	{:16}"   #
为转行符　	为制表符　　
    print(tplt.format("序号","价格","名称")) #按格式打印标题　
    count = 0
    for g in ilt:
        count = count +1
        print(tplt.format(count,g[0],g[1])) #打印完了标题　打印相关信息　g遍历ilt中每个小列表　然后打印出第一个和第二个数值

def main():  #主函数
    goods = "书包"
    depth = 2   
    start_url = "https://s.taobao.com/search?q=" + goods  #通过和关键词的整合 来搜索商品
    infolist = []
    for i in range(depth):  #
        try:
            url = start_url + "&s=" + str(44 * i)     #设置翻页  
            html = getHTMLtext(url)
            parsepage(infolist,html)  #解析每个页面　＃将返回的html(r.text)和一个空列表传入parsepage()函数
        except:                     
            continue
    printgoodlist(infolist) #打印出结果
main()

以上是关于淘宝商品信息并且按表格排列。的主要内容，如果未能解决你的问题，请参考以下文章

淘宝商品信息 并且按表格排列。

淘宝商品信息并且按表格排列。