scray cookiejar

Posted 我和你并没有不同

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了scray cookiejar相关的知识,希望对你有一定的参考价值。

参考:https://blog.csdn.net/u013210620/article/details/80283637

手动操作cookie点赞

# -*- coding: utf-8 -*-
import scrapy
from bs4 import BeautifulSoup
from scrapy.selector import htmlXPathSelector
from scrapy.http import Request
from ..items import XianglongItem
from scrapy.http import HtmlResponse
from scrapy.http.response.html import HtmlResponse


class ChoutiSpider(scrapy.Spider):
    name = \'chouti\'
    allowed_domains = [\'chouti.com\']
    start_urls = [\'http://dig.chouti.com/\',]

    cookie_dict = {}
    def start_requests(self):
        for url in self.start_urls:
            yield Request(url=url,callback=self.parse_index)

    def parse_index(self,response):
        # 原始cookie
        # print(response.headers.getlist(\'Set-Cookie\'))

        # 解析后的cookie
        from scrapy.http.cookies import CookieJar
        cookie_jar = CookieJar()
        cookie_jar.extract_cookies(response, response.request)
        print("cookie_jar._cookies",cookie_jar._cookies)
        for k, v in cookie_jar._cookies.items():
            for i, j in v.items():
                for m, n in j.items():
                    self.cookie_dict[m] = n.value


        req = Request(
            url=\'http://dig.chouti.com/login\',
            method=\'POST\',
            headers={\'Content-Type\': \'application/x-www-form-urlencoded; charset=UTF-8\'},
            body=\'phone=xxx&password=xxx&oneMonth=1\',
            cookies=self.cookie_dict,
            callback=self.parse_check_login
        )
        yield req

    def parse_check_login(self,response):
        print("parse_check_login",response.text)
        yield Request(
            url=\'https://dig.chouti.com/link/vote?linksId=19440976\',
            method=\'POST\',
            cookies=self.cookie_dict,
            callback=self.parse_show_result
        )

    def parse_show_result(self,response):
        print("parse_show_result",response.text)

自动操作cookie点赞

# -*- coding: utf-8 -*-
import scrapy
from scrapy.http import Request
class ChoutiSpider(scrapy.Spider):
    name = \'chouti\'
    allowed_domains = [\'chouti.com\']
    start_urls = [\'http://dig.chouti.com/\',]

    def start_requests(self):
        for url in self.start_urls:
            yield Request(url=url,callback=self.parse_index,meta={\'cookiejar\':True})

    def parse_index(self,response):
        req = Request(
            url=\'http://dig.chouti.com/login\',
            method=\'POST\',
            headers={\'Content-Type\': \'application/x-www-form-urlencoded; charset=UTF-8\'},
            body=\'phone=8613121758648&password=woshiniba&oneMonth=1\',
            callback=self.parse_check_login,
            meta={\'cookiejar\': True}
        )
        yield req

    def parse_check_login(self,response):
        # print(response.text)
        yield Request(
            url=\'https://dig.chouti.com/link/vote?linksId=19440976\',
            method=\'POST\',
            callback=self.parse_show_result,
            meta={\'cookiejar\': True}
        )

    def parse_show_result(self,response):
        print(response.text)

 

以上是关于scray cookiejar的主要内容,如果未能解决你的问题,请参考以下文章

requests.Session()从CookieJar加载cookie

python爬虫之模拟登录将cookie保存到代码中

cookie 和 cookiejar 有啥区别?

关于cookiejar(),requests.session()中cookie的详细操作方法

cookie

爬虫之cookiejar模块