Python爬虫 —— 知乎之selenium模拟登陆+requests.Session()获取cookies
Posted h_z_cong
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Python爬虫 —— 知乎之selenium模拟登陆+requests.Session()获取cookies相关的知识,希望对你有一定的参考价值。
代码如下:
1 # coding:utf-8 2 from selenium import webdriver 3 import requests 4 import sys 5 import time 6 from lxml import etree 7 # reload(sys) 8 # sys.setdefaultencoding(‘utf-8‘) 9 10 class Zhihu: 11 def __init__(self,homeurl): 12 self.homeurl = homeurl 13 14 def GetCookies(self): 15 browser = webdriver.Chrome() 16 browser.get("https://www.zhihu.com/signin") 17 browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys("13060882373") 18 browser.find_element_by_css_selector(".SignFlow-password input").send_keys("XXXXXX") 19 browser.find_element_by_css_selector(".Button.SignFlow-submitButton").click() 20 time.sleep(3) 21 # js = "window.scrollTo(0, document.body.scrollHeight);" 22 # browser.execute_script(js) 23 # time.sleep(3) 24 cookies = browser.get_cookies() 25 browser.quit() 26 return cookies 27 28 def Crawl(self): 29 s = requests.Session() 30 s.headers.clear() 31 for cookie in self.GetCookies(): 32 s.cookies.set(cookie[‘name‘], cookie[‘value‘]) 33 html = s.get(self.homeurl).text 34 html_tree = etree.HTML(html) 35 items = html_tree.xpath(‘//*[@id="root"]/div/main/div/div/div[1]/div[2]/div//div[@class="ContentItem AnswerItem"]/@data-zop‘) 36 for item in items: 37 # print item 38 content = eval(item) 39 authorName = content[‘authorName‘] 40 title = content[‘title‘] 41 print authorName + "回答了:" + title 42 43 44 zhihu = Zhihu(‘https://www.zhihu.com/‘) 45 zhihu.Crawl()
以上是关于Python爬虫 —— 知乎之selenium模拟登陆+requests.Session()获取cookies的主要内容,如果未能解决你的问题,请参考以下文章