python python - selenium #python #lenovo #selenium
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python python - selenium #python #lenovo #selenium相关的知识,希望对你有一定的参考价值。
## parse everything in a single page
import time
from urllib import request
from bs4 import BeautifulSoup
t1 = time.time()
for i in range(10):
url = "http://outlet.lenovo.com/SEUILibrary/controller/e/outlet_us/LenovoPortal/en_US/catalog.workflow:show-category-with-items?acc=true&category-id=7FCAD587E909113E3FB719E59569CDAC&&results-mode=1&RQ_SORT_ORDER1=1&page-size=1000"
with request.urlopen(url) as conneciton:
doc = conneciton.read()
soup = BeautifulSoup(doc)
items = soup.find_all("tr", attrs={"valign":"top"})
item_counter = 0
t2 = time.time()
for item in items:
try:
btn_cart = item.select("img.htButton")
xml_specs = item.select("ul.std-bullet-list")
if not btn_cart or not xml_specs: continue
item_counter += 1
except:
continue
print("Parsing time: %d" % ((time.time() - t2)*1000))
print("Items: %d" % item_counter)
print(time.time() - t1)
from selenium import webdriver
import time, re
def scanPage(browser, url):
t1 = time.time()
browser.get(url)
err = True
nResults = 0
while err == True:
try:
browser.execute_script("scroll(0,20000)")
time.sleep(1)
items = browser.find_elements_by_class_name("facet-result")
nResults = browser.find_element_by_class_name("search-results-header-area")
nResults = int(re.findall("\d+(?= Results)", nResults.text)[0])
err = False
except:
browser.refresh()
for item in items:
print("-"*20)
try:
url_cart = item.find_element_by_class_name("button").get_attribute("href")
except:
## sold out
continue
try:
name = item.find_element_by_class_name("fbr-description")
print(name.text)
except:
print(item.text)
continue
browser.save_screenshot("C:\\Users\\Lance\\Desktop\\ss.png")
print("Scanned: %d | Total: %d | Time: %d" % (len(items), nResults, (time.time() - t1)))
#######################################################################################################
driver_dir = "C:\\Dropbox\\_java\\_packages\\webdrivers\\"
url = "http://outlet.lenovo.com/outlet_us/desktops/#/?page-index=1&sort-criteria=1"
# url_laptop = "http://outlet.lenovo.com/outlet_us/laptops/#/?page-index=1&sort-criteria=1"
def test_firefox():
browser = webdriver.Firefox()
scanPage(browser, url)
def test_chrome():
browser = webdriver.Chrome(driver_dir+"chromedriver2.10.exe")
scanPage(browser, url)
def test_phantom():
browser = browser = webdriver.PhantomJS(driver_dir+"phantomjs1.9.7.exe")
scanPage(browser, url)
test_chrome()
# Scanned: 12 | Total: 12 | Time: 8
# Scanned: 12 | Total: 12 | Time: 15
# Scanned: 9 | Total: 12 | Time: 8
# Scanned: 9 | Total: 12 | Time: 10
# Scanned: 12 | Total: 12 | Time: 12
test_firefox()
# Scanned: 12 | Total: 12 | Time: 14
# Scanned: 12 | Total: 12 | Time: 12
# Scanned: 12 | Total: 12 | Time: 18
# Scanned: 13 | Total: 13 | Time: 19
# Scanned: 8 | Total: 12 | Time: 14
test_phantom()
# Scanned: 2 | Total: 12 | Time: 15
# Scanned: 1 | Total: 13 | Time: 21
# Scanned: 9 | Total: 12 | Time: 13
# Scanned: 6 | Total: 12 | Time: 9
# Scanned: 5 | Total: 12 | Time: 8
以上是关于python python - selenium #python #lenovo #selenium的主要内容,如果未能解决你的问题,请参考以下文章