爬虫--selenium
Posted zhuifeng-mayi
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了爬虫--selenium相关的知识,希望对你有一定的参考价值。
什么是selenium?
基本使用
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support. wait import WebDriverWait browser = webdriver.Chrome() # Chrom驱动 try: browser.get("https://www.baidu.com") # 输入 www.baidu.com 网址 input = browser.find_element_by_id("kw") # 找到一个 kw 元素赋值为 input input.send_keys("Python") # 在网页里敲入Python input.send_keys(Keys.ENTER) # 在网页里敲入回车 wait=WebDriverWait(browser,10) # 等待10秒 wait.until(EC.presence_of_element_located((By.ID,"content_left"))) # 等待content_left元素被加载出来 print(browser.current_url) # 打印目前的url print(browser.get_cookies()) # 打印cookies # print(browser.page_source) # 打印 网页源代码 finally: browser.close() # 关闭浏览器
https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=0&rsv_idx=1&tn=baidu&wd=Python&rsv_pq=c618fa5900004b25&rsv_t=c25fWJbEN2wl13gOxRoocQDIAUMPaoguAnEu9Rg4KGX4uoRC0lynG5EjFGY&rqlang=cn&rsv_enter=1&rsv_sug3=6&rsv_sug2=0&inputT=162&rsv_sug4=162 [{‘path‘: ‘/‘, ‘value‘: ‘1468_21104_18559_26350_22075‘, ‘secure‘: False, ‘domain‘: ‘.baidu.com‘, ‘httpOnly‘: False, ‘name‘: ‘H_PS_PSSID‘}, {‘path‘: ‘/‘, ‘value‘: ‘3FFEC0A0709997465509BC1AFB51F757:FG=1‘, ‘secure‘: False, ‘domain‘: ‘.baidu.com‘, ‘expiry‘: 3685018278.537968, ‘httpOnly‘: False, ‘name‘: ‘BAIDUID‘}, {‘path‘: ‘/‘, ‘value‘: ‘3FFEC0A0709997465509BC1AFB51F757‘, ‘secure‘: False, ‘domain‘: ‘.baidu.com‘, ‘expiry‘: 3685018278.538044, ‘httpOnly‘: False, ‘name‘: ‘BIDUPSID‘}, {‘path‘: ‘/‘, ‘value‘: ‘1537534637‘, ‘secure‘: False, ‘domain‘: ‘.baidu.com‘, ‘expiry‘: 3685018278.538084, ‘httpOnly‘: False, ‘name‘: ‘PSTM‘}, {‘path‘: ‘/‘, ‘value‘: ‘0‘, ‘secure‘: False, ‘domain‘: ‘www.baidu.com‘, ‘expiry‘: 2483614633.353963, ‘httpOnly‘: False, ‘name‘: ‘delPer‘}, {‘path‘: ‘/‘, ‘value‘: ‘0‘, ‘secure‘: False, ‘domain‘: ‘www.baidu.com‘, ‘httpOnly‘: False, ‘name‘: ‘BD_HOME‘}, {‘path‘: ‘/‘, ‘value‘: ‘B490B5EBF6F3CD402E515D22BCDA1598‘, ‘secure‘: False, ‘domain‘: ‘.baidu.com‘, ‘expiry‘: 1537621032.332359, ‘httpOnly‘: False, ‘name‘: ‘BDORZ‘}, {‘path‘: ‘/‘, ‘value‘: ‘12314353‘, ‘secure‘: False, ‘domain‘: ‘www.baidu.com‘, ‘expiry‘: 1538398632, ‘httpOnly‘: False, ‘name‘: ‘BD_UPN‘}, {‘path‘: ‘/‘, ‘value‘: ‘1‘, ‘secure‘: False, ‘domain‘: ‘www.baidu.com‘, ‘httpOnly‘: False, ‘name‘: ‘BD_CK_SAM‘}, {‘path‘: ‘/‘, ‘value‘: ‘1‘, ‘secure‘: False, ‘domain‘: ‘.baidu.com‘, ‘httpOnly‘: False, ‘name‘: ‘PSINO‘}, {‘path‘: ‘/‘, ‘value‘: ‘bf67l36p%2FgarFggwpeficZXTG5zE%2FhBZEp2ev5JvDSo8venU134svju%2FJL4‘, ‘secure‘: False, ‘domain‘: ‘www.baidu.com‘, ‘expiry‘: 1537537225, ‘httpOnly‘: False, ‘name‘: ‘H_PS_645EC‘}]
生命浏览器对象
from selenium import webdriver browser_1 = webdriver.Chrome() browser_2 = webdriver.Firefox() browser_3 = webdriver.Edge() browser_4 = webdriver.PhantomJS() browser_5 = webdriver.Safari()
访问页面
from selenium import webdriver browser = webdriver.Chrome() browser.get("http://www.tabao.com") print(browser.get_cookie) browser.close()
<bound method WebDriver.get_cookie of <selenium.webdriver.chrome.webdriver.WebDriver (session="65ad512a5c81e7d9f6f3bd81a4ba3495")>>
查找元素
单个元素
from selenium import webdriver browser = webdriver.Chrome() browser.get("https://www.taobao.com") input_first = browser.find_element_by_id("q") input_second = browser.find_element_by_xpath(‘//*[@id="q"]‘) input_third = browser.find_element_by_css_selector("#q") print(input_first) print("----------------------------------------------------------------") print(input_second) print("----------------------------------------------------------------") print(input_third) browser.close()
<selenium.webdriver.remote.webelement.WebElement (session="72af913bda52ce0848486b08ba93d3a1", element="0.3093750540466209-1")> ---------------------------------------------------------------- <selenium.webdriver.remote.webelement.WebElement (session="72af913bda52ce0848486b08ba93d3a1", element="0.3093750540466209-1")> ---------------------------------------------------------------- <selenium.webdriver.remote.webelement.WebElement (session="72af913bda52ce0848486b08ba93d3a1", element="0.3093750540466209-1")>
比较通用的查找方式
from selenium import webdriver browser = webdriver.Chrome() browser.get("https://www.taobao.com") input_first = browser.find_element(By.ID,‘q‘) print(input_first) browser.close()
运行的结果与上面一样!
多个元素
from selenium import webdriver browser = webdriver.Chrome() browser.get("https://www.taobao.com") input_first = browser.find_elements_by_css_selector(".service-bd li") print(input_first) browser.close()
[<selenium.webdriver.remote.webelement.WebElement (session="d52b2ef4ead695decaf705a0c331eb06", element="0.7934950105071805-1")>, <selenium.webdriver.remote.webelement.WebElement (session="d52b2ef4ead695decaf705a0c331eb06", element="0.7934950105071805-2")>, <selenium.webdriver.remote.webelement.WebElement (session="d52b2ef4ead695decaf705a0c331eb06", element="0.7934950105071805-3")>, <selenium.webdriver.remote.webelement.WebElement (session="d52b2ef4ead695decaf705a0c331eb06", element="0.7934950105071805-4")>, <selenium.webdriver.remote.webelement.WebElement (session="d52b2ef4ead695decaf705a0c331eb06", element="0.7934950105071805-5")>, <selenium.webdriver.remote.webelement.WebElement (session="d52b2ef4ead695decaf705a0c331eb06", element="0.7934950105071805-6")>, <selenium.webdriver.remote.webelement.WebElement (session="d52b2ef4ead695decaf705a0c331eb06", element="0.7934950105071805-7")>, <selenium.webdriver.remote.webelement.WebElement (session="d52b2ef4ead695decaf705a0c331eb06", element="0.7934950105071805-8")>, <selenium.webdriver.remote.webelement.WebElement (session="d52b2ef4ead695decaf705a0c331eb06", element="0.7934950105071805-9")>, <selenium.webdriver.remote.webelement.WebElement (session="d52b2ef4ead695decaf705a0c331eb06", element="0.7934950105071805-10")>, <selenium.webdriver.remote.webelement.WebElement (session="d52b2ef4ead695decaf705a0c331eb06", element="0.7934950105071805-11")>, <selenium.webdriver.remote.webelement.WebElement (session="d52b2ef4ead695decaf705a0c331eb06", element="0.7934950105071805-12")>, <selenium.webdriver.remote.webelement.WebElement (session="d52b2ef4ead695decaf705a0c331eb06", element="0.7934950105071805-13")>, <selenium.webdriver.remote.webelement.WebElement (session="d52b2ef4ead695decaf705a0c331eb06", element="0.7934950105071805-14")>, <selenium.webdriver.remote.webelement.WebElement (session="d52b2ef4ead695decaf705a0c331eb06", element="0.7934950105071805-15")>, <selenium.webdriver.remote.webelement.WebElement (session="d52b2ef4ead695decaf705a0c331eb06", element="0.7934950105071805-16")>]
元素交互操作
对获取的元素调用交互方法
from selenium import webdriver import time browser = webdriver.Chrome() browser.get("https://www.taobao.com") input = browser.find_element_by_id("q") input.send_keys(‘iPhone‘) time.sleep(1) input.clear() input.send_keys("iPad") button = browser.find_element_by_class_name(‘btn-search‘) button.click() browser.close()
交互动作
将动作附加到动作链中串行执行
from selenium import webdriver from selenium.webdriver import ActionChains browser = webdriver.Chrome() url =‘http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable‘ browser.get(url) # 请求url browser.switch_to.frame(‘iframeResult‘) # 切换到 frame source = browser.find_elements_by_css_selector(‘#draggable‘) target = browser.find_elements_by_css_selector(‘#droppable‘) actions = ActionChains(browser) # 声明动作链 actions.drag_and_drop(source,target) actions.perform() # 执行这个动作
from selenium import webdriver browser = webdriver.Chrome() browser.get(‘https://www.zhihu.com/explore‘) browser.execute_script(‘window.scrollTo(0,document.body.scrollHeight)‘) # 滑动栏拖拽到最下方 browser.execute_script(‘alert("To Bottom")‘)
执行后的结果为:
获取元素信息
获取属性
from selenium import webdriver from selenium.webdriver import ActionChains browser = webdriver.Chrome() browser.get("https://www.zhihu.com/explore") logo = browser.find_element_by_id(‘zh-top-link-logo‘) print(logo) print(logo.get_attribute(‘class‘))
<selenium.webdriver.remote.webelement.WebElement (session="ef2d80c82e37098c4c702fe5c0e2df31", element="0.9948931372437708-1")> zu-top-link-logo
获取文本值
from selenium import webdriver from selenium.webdriver import ActionChains browser = webdriver.Chrome() browser.get("https://www.zhihu.com/explore") input = browser.find_element_by_class_name(‘post-link‘) print(input.text) # 打印获取的文本信息
《红色警戒》的世界:没有希特勒 二战死了1亿人
获取ID、位置、标签名、大小
from selenium import webdriver from selenium.webdriver import ActionChains browser = webdriver.Chrome() browser.get("https://www.zhihu.com/explore") input = browser.find_element_by_class_name(‘post-link‘) print(input.text) # 打印获取的文本信息 print(input.id) # 获取id print(input.location) # 获取位置 print(input.tag_name) # 获取标签名 print(input.size) # 获取大小 browser.close()
《红色警戒》的世界:没有希特勒 二战死了1亿人 0.22091173377675544-1 {‘y‘: 304, ‘x‘: 32} a {‘height‘: 16, ‘width‘: 306}
Frame
from selenium import webdriver from selenium.common.exceptions import NoSuchElementException browser = webdriver.Chrome() url=‘http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable‘ browser.get(url) browser.switch_to.frame(‘iframeResult‘) source =browser.find_element_by_css_selector("#draggable") print(source) try: logo=browser.find_element_by_class_name(‘logo‘) except NoSuchElementException: print("NO LOGO") browser.switch_to.parent_frame() logo = browser.find_element_by_class_name(‘logo‘) print(logo) print(logo.text)
<selenium.webdriver.remote.webelement.WebElement (session="ecfecc0e705df8976f5241726b66e273", element="0.27322378119978463-1")> NO LOGO <selenium.webdriver.remote.webelement.WebElement (session="ecfecc0e705df8976f5241726b66e273", element="0.8128333237150809-2")> RUNOOB.COM
等待
隐式等待
当使用了隐式等待执行测试的时候,如果WebDriver没有在DOM中找到元素,将继续等待,超出设定时间后则抛出找不到元素的异常,换句话说,当查找元素或元素并没有立即出现的时候,隐式等待将等待一段时间再查找DOM,默认的时间是0。
from selenium import webdriver browser = webdriver.Chrome() browser.implicitly_wait(10) browser.get("https://www.zhihu.com/explore") input = browser.find_element_by_class_name(‘zu-top-add-question‘) print(input)
<selenium.webdriver.remote.webelement.WebElement (session="87a2e958b9a3b58334e8c2ec76d0419e", element="0.014192877625801792-1")>
显示等待
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC browser = webdriver.Chrome() browser.get("http://www.taobao.com/") wait = WebDriverWait(browser,10) input = wait.until(EC.presence_of_all_elements_located((By.ID,‘q‘))) button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,‘btn-search‘))) print(input,button)
Traceback (most recent call last): File "C:/Users/Administrator/Desktop/正则表达式/正则表达式.py", line 10, in <module> button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,‘btn-search‘))) File "C:UsersAdministratorDesktop正则表达式venvlibsite-packagesseleniumwebdriversupportwait.py", line 80, in until raise TimeoutException(message, screen, stacktrace) selenium.common.exceptions.TimeoutException: Message:
·title_is 标题是某内容
·title_contains 标题包含某内容
·presence_of_element located 元素加载出,传入定位元组,如(By.lD,p)
·visibility_of_element located 元素可见,传入定位元组
·visibility_of可见,传入元素对象
·presence_of_all_elements_located 所有元素加载出
·text_to_be_present_in_element某个元素文本龟含某文字
.text to_be_present_in_element_value 某个元素值包含某文字
·frame_to _be_available_and_switch_ to _it framea载并切换
·invisibility_of_element_located 元素不可见
·element_to_be_clickable 元素可点击
·staleness_of 判断一个元素是否仍在DOM,可判断页面是否已经刷新
·element to_be_selected 元素可选择,传元素对象
·element located_to_be_selected 元素可选择,传入定位元组
·element selection_state_to_be 传入元素对象以及状态,相等返回True,否则返回False
·element located_selection_state_to_be 传入定位元组以及状态,相等返回True,否则返回False
·alertis_present 是否出现Alert
前进后退
from selenium import webdriver import time browser = webdriver.Chrome() browser.get("https://www.taobao.com/") browser.get("https://www.baidu.com/") browser.get("https://www.jingdong.com/") browser.back() time.sleep(1) browser.forward() browser.close()
Cookies
from selenium import webdriver browser = webdriver.Chrome() browser.get("http://www.zhihu.com/explore") print(browser.get_cookies()) browser.add_cookie({"name":"name","domain":"www,zhihu.com","value":"germey"}) print(browser.get_cookies()) browser.delete_all_cookies() print(browser.get_cookies())
[{‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘l_n_c‘, ‘domain‘: ‘.zhihu.com‘, ‘secure‘: False, ‘value‘: ‘1‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘tgw_l7_route‘, ‘domain‘: ‘www.zhihu.com‘, ‘expiry‘: 1537604591.643548, ‘secure‘: False, ‘value‘: ‘156dfd931a77f9586c0da07030f2df36‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘d_c0‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1632211696.283527, ‘secure‘: False, ‘value‘: ‘"AFCk5k4oQA6PTrVJvEIdM1iDREt1Ez3H0lw=|1537603702"‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘_xsrf‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1615363691.643627, ‘secure‘: False, ‘value‘: ‘ZnQElKxeWBcoeNFASCTcgdhk56NJ83hf‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘__utmb‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1537605497, ‘secure‘: False, ‘value‘: ‘51854390.0.10.1537603697‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘q_c1‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1632211692.325099, ‘secure‘: False, ‘value‘: ‘104b20902a9f4159b0c1811e7dd3959c|1537603698000|1537603698000‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘r_cap_id‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1540195692.325146, ‘secure‘: False, ‘value‘: ‘"MWIwYjU3YmI1OWVkNGEwYmJhZGM0MTY5ZDQzZWU3MmQ=|1537603698|85c9986946afde9f1823ce067dc29aa2ea19d5f3"‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘cap_id‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1540195692.325191, ‘secure‘: False, ‘value‘: ‘"OTVhZmIwZDkzYzliNDAzNWI4ZTJiNWM2NzY0NWFmMjQ=|1537603698|67c5e766f10d39421a3d3afc84d45dfed316ae18"‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘l_cap_id‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1540195692.325236, ‘secure‘: False, ‘value‘: ‘"MDUzODliMmYwN2VlNDU1YjkwNGU0MjEwZDU0OTdkMjI=|1537603698|e44d90334d2319d2934c4b5cccb4d8d5a549247d"‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘n_c‘, ‘domain‘: ‘.zhihu.com‘, ‘secure‘: False, ‘value‘: ‘1‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘_zap‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1600675697, ‘secure‘: False, ‘value‘: ‘c7dbed92-9690-47b9-886e-d5539b1f74b8‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘__utma‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1600675697, ‘secure‘: False, ‘value‘: ‘51854390.1486460487.1537603697.1537603697.1537603697.1‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘__utmc‘, ‘domain‘: ‘.zhihu.com‘, ‘secure‘: False, ‘value‘: ‘51854390‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘__utmz‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1553371697, ‘secure‘: False, ‘value‘: ‘51854390.1537603697.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘__utmv‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1600675697, ‘secure‘: False, ‘value‘: ‘51854390.000--|3=entry_date=20180922=1‘}] [{‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘l_n_c‘, ‘domain‘: ‘.zhihu.com‘, ‘secure‘: False, ‘value‘: ‘1‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘tgw_l7_route‘, ‘domain‘: ‘www.zhihu.com‘, ‘expiry‘: 1537604591.643548, ‘secure‘: False, ‘value‘: ‘156dfd931a77f9586c0da07030f2df36‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘d_c0‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1632211696.283527, ‘secure‘: False, ‘value‘: ‘"AFCk5k4oQA6PTrVJvEIdM1iDREt1Ez3H0lw=|1537603702"‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘_xsrf‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1615363691.643627, ‘secure‘: False, ‘value‘: ‘ZnQElKxeWBcoeNFASCTcgdhk56NJ83hf‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘__utmb‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1537605497, ‘secure‘: False, ‘value‘: ‘51854390.0.10.1537603697‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘q_c1‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1632211692.325099, ‘secure‘: False, ‘value‘: ‘104b20902a9f4159b0c1811e7dd3959c|1537603698000|1537603698000‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘r_cap_id‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1540195692.325146, ‘secure‘: False, ‘value‘: ‘"MWIwYjU3YmI1OWVkNGEwYmJhZGM0MTY5ZDQzZWU3MmQ=|1537603698|85c9986946afde9f1823ce067dc29aa2ea19d5f3"‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘cap_id‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1540195692.325191, ‘secure‘: False, ‘value‘: ‘"OTVhZmIwZDkzYzliNDAzNWI4ZTJiNWM2NzY0NWFmMjQ=|1537603698|67c5e766f10d39421a3d3afc84d45dfed316ae18"‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘l_cap_id‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1540195692.325236, ‘secure‘: False, ‘value‘: ‘"MDUzODliMmYwN2VlNDU1YjkwNGU0MjEwZDU0OTdkMjI=|1537603698|e44d90334d2319d2934c4b5cccb4d8d5a549247d"‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘n_c‘, ‘domain‘: ‘.zhihu.com‘, ‘secure‘: False, ‘value‘: ‘1‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘_zap‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1600675697, ‘secure‘: False, ‘value‘: ‘c7dbed92-9690-47b9-886e-d5539b1f74b8‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘__utma‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1600675697, ‘secure‘: False, ‘value‘: ‘51854390.1486460487.1537603697.1537603697.1537603697.1‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘__utmc‘, ‘domain‘: ‘.zhihu.com‘, ‘secure‘: False, ‘value‘: ‘51854390‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘__utmz‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1553371697, ‘secure‘: False, ‘value‘: ‘51854390.1537603697.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)‘}, {‘path‘: ‘/‘, ‘httpOnly‘: False, ‘name‘: ‘__utmv‘, ‘domain‘: ‘.zhihu.com‘, ‘expiry‘: 1600675697, ‘secure‘: False, ‘value‘: ‘51854390.000--|3=entry_date=20180922=1‘}] []
选项卡管理
from selenium import webdriver import time browser = webdriver.Chrome() browser.get("http://www.baidu.com") browser.execute_script("window.open()") # 打开新的选项卡 print(browser.window_handles) # 返回所有窗口的一些引用 browser.switch_to_window(browser.window_handles[1]) # 切换到第二个选项卡 browser.get("https://www.taobao.com") time.sleep(1) browser.switch_to_window(browser.window_handles[0]) # 切换到第一个选项卡 browser.get("https://www.taobao.com") browser.close()
[‘CDwindow-65E31D9BF9FDC0B83D2821ABB85DB273‘, ‘CDwindow-C2A8A67F87828D4AEA0A9D391203121E‘]
异常处理
from selenium import webdriver from selenium.common.exceptions import TimeoutException,NoSuchElementException browser = webdriver.Chrome() browser.get("http://www.baidu.com") try: browser.get("https://www.baidu.com") except TimeoutException: print("TIME OUT") try: browser.find_element_by_id("name") except NoSuchElementException: print("NO ELEMENT") finally: browser.close()
NO ELEMENT
以上是关于爬虫--selenium的主要内容,如果未能解决你的问题,请参考以下文章