python python - selenium #python #lenovo #selenium

Posted 2021-05-10

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了python python - selenium #python #lenovo #selenium相关的知识，希望对你有一定的参考价值。

## parse everything in a single page

import time
from urllib import request
from bs4 import BeautifulSoup


t1 = time.time()
for i in range(10):
    url = "http://outlet.lenovo.com/SEUILibrary/controller/e/outlet_us/LenovoPortal/en_US/catalog.workflow:show-category-with-items?acc=true&category-id=7FCAD587E909113E3FB719E59569CDAC&&results-mode=1&RQ_SORT_ORDER1=1&page-size=1000"
    with request.urlopen(url) as conneciton:
        doc = conneciton.read()
    soup = BeautifulSoup(doc)

    items = soup.find_all("tr", attrs={"valign":"top"})
    item_counter = 0
    t2 = time.time()
    for item in items:
        try:
            btn_cart = item.select("img.htButton")
            xml_specs = item.select("ul.std-bullet-list")
            if not btn_cart or not xml_specs: continue
            item_counter += 1
        except:
            continue
    print("Parsing time: %d" % ((time.time() - t2)*1000))
    print("Items: %d" % item_counter)
print(time.time() - t1)

from selenium import webdriver
import time, re

def scanPage(browser, url):
    t1 = time.time()

    browser.get(url)

    err = True
    nResults = 0
    while err == True:
        try:
            browser.execute_script("scroll(0,20000)")
            time.sleep(1)
            items = browser.find_elements_by_class_name("facet-result")
            nResults = browser.find_element_by_class_name("search-results-header-area")
            nResults = int(re.findall("\d+(?= Results)", nResults.text)[0])
            err = False
        except:
            browser.refresh()

    for item in items:
        print("-"*20)
        try:
            url_cart = item.find_element_by_class_name("button").get_attribute("href")
        except:
            ## sold out
            continue
        try:
            name = item.find_element_by_class_name("fbr-description")
            print(name.text)
        except:
            print(item.text)
            continue
    browser.save_screenshot("C:\\Users\\Lance\\Desktop\\ss.png")

    print("Scanned: %d | Total: %d | Time: %d" % (len(items), nResults, (time.time() - t1)))

#######################################################################################################
driver_dir = "C:\\Dropbox\\_java\\_packages\\webdrivers\\"
url = "http://outlet.lenovo.com/outlet_us/desktops/#/?page-index=1&sort-criteria=1"
# url_laptop = "http://outlet.lenovo.com/outlet_us/laptops/#/?page-index=1&sort-criteria=1"
def test_firefox():
    browser = webdriver.Firefox()
    scanPage(browser, url)

def test_chrome():
    browser = webdriver.Chrome(driver_dir+"chromedriver2.10.exe")
    scanPage(browser, url)

def test_phantom():
    browser = browser = webdriver.PhantomJS(driver_dir+"phantomjs1.9.7.exe")
    scanPage(browser, url)

test_chrome()
# Scanned: 12 | Total: 12 | Time: 8
# Scanned: 12 | Total: 12 | Time: 15
# Scanned: 9 | Total: 12 | Time: 8
# Scanned: 9 | Total: 12 | Time: 10
# Scanned: 12 | Total: 12 | Time: 12
test_firefox()
# Scanned: 12 | Total: 12 | Time: 14
# Scanned: 12 | Total: 12 | Time: 12
# Scanned: 12 | Total: 12 | Time: 18
# Scanned: 13 | Total: 13 | Time: 19
# Scanned: 8 | Total: 12 | Time: 14
test_phantom()
# Scanned: 2 | Total: 12 | Time: 15
# Scanned: 1 | Total: 13 | Time: 21
# Scanned: 9 | Total: 12 | Time: 13
# Scanned: 6 | Total: 12 | Time: 9
# Scanned: 5 | Total: 12 | Time: 8

以上是关于python python - selenium #python #lenovo #selenium的主要内容，如果未能解决你的问题，请参考以下文章