Scrapy源码 Request对象

Posted yinminbo

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Scrapy源码 Request对象相关的知识,希望对你有一定的参考价值。

Scrapy源码 Request对象

"""
This module implements the Request class which is used to represent HTTP
requests in Scrapy.

See documentation in docs/topics/request-response.rst
"""
import six
from w3lib.url import safe_url_string

from scrapy.http.headers import Headers
from scrapy.utils.python import to_bytes
from scrapy.utils.trackref import object_ref
from scrapy.utils.url import escape_ajax
from scrapy.http.common import obsolete_setter
from scrapy.utils.curl import curl_to_request_kwargs


class Request(object_ref):

def __init__(self, url, callback=None, method=‘GET‘, headers=None, body=None,
cookies=None, meta=None, encoding=‘utf-8‘, priority=0,
dont_filter=False, errback=None, flags=None, cb_kwargs=None):

self._encoding = encoding # this one has to be set first
self.method = str(method).upper()
self._set_url(url)
self._set_body(body)
assert isinstance(priority, int), "Request priority not an integer: %r" % priority
self.priority = priority

if callback is not None and not callable(callback):
raise TypeError(‘callback must be a callable, got %s‘ % type(callback).__name__)
if errback is not None and not callable(errback):
raise TypeError(‘errback must be a callable, got %s‘ % type(errback).__name__)
assert callback or not errback, "Cannot use errback without a callback"
self.callback = callback
self.errback = errback

self.cookies = cookies or {}
self.headers = Headers(headers or {}, encoding=encoding)
self.dont_filter = dont_filter

self._meta = dict(meta) if meta else None
self._cb_kwargs = dict(cb_kwargs) if cb_kwargs else None
self.flags = [] if flags is None else list(flags)

@property
def cb_kwargs(self):
if self._cb_kwargs is None:
self._cb_kwargs = {}
return self._cb_kwargs

@property
def meta(self):
if self._meta is None:
self._meta = {}
return self._meta

def _get_url(self):
return self._url

def _set_url(self, url):
if not isinstance(url, six.string_types):
raise TypeError(‘Request url must be str or unicode, got %s:‘ % type(url).__name__)

s = safe_url_string(url, self.encoding)
self._url = escape_ajax(s)

if ‘:‘ not in self._url:
raise ValueError(‘Missing scheme in request url: %s‘ % self._url)

url = property(_get_url, obsolete_setter(_set_url, ‘url‘))

def _get_body(self):
return self._body

def _set_body(self, body):
if body is None:
self._body = b‘‘
else:
self._body = to_bytes(body, self.encoding)

body = property(_get_body, obsolete_setter(_set_body, ‘body‘))

@property
def encoding(self):
return self._encoding

def __str__(self):
return "<%s %s>" % (self.method, self.url)

__repr__ = __str__

def copy(self):
"""Return a copy of this Request"""
return self.replace()

def replace(self, *args, **kwargs):
"""Create a new Request with the same attributes except for those
given new values.
"""
for x in [‘url‘, ‘method‘, ‘headers‘, ‘body‘, ‘cookies‘, ‘meta‘, ‘flags‘,
‘encoding‘, ‘priority‘, ‘dont_filter‘, ‘callback‘, ‘errback‘, ‘cb_kwargs‘]:
kwargs.setdefault(x, getattr(self, x))
cls = kwargs.pop(‘cls‘, self.__class__)
return cls(*args, **kwargs)

@classmethod
def from_curl(cls, curl_command, ignore_unknown_options=True, **kwargs):
"""Create a Request object from a string containing a `cURL
<https://curl.haxx.se/>`_ command. It populates the HTTP method, the
URL, the headers, the cookies and the body. It accepts the same
arguments as the :class:`Request` class, taking preference and
overriding the values of the same arguments contained in the cURL
command.

Unrecognized options are ignored by default. To raise an error when
finding unknown options call this method by passing
``ignore_unknown_options=False``.

.. caution:: Using :meth:`from_curl` from :class:`~scrapy.http.Request`
subclasses, such as :class:`~scrapy.http.JSONRequest`, or
:class:`~scrapy.http.XmlRpcRequest`, as well as having
:ref:`downloader middlewares <topics-downloader-middleware>`
and
:ref:`spider middlewares <topics-spider-middleware>`
enabled, such as
:class:`~scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware`,
:class:`~scrapy.downloadermiddlewares.useragent.UserAgentMiddleware`,
or
:class:`~scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware`,
may modify the :class:`~scrapy.http.Request` object.

"""
request_kwargs = curl_to_request_kwargs(curl_command, ignore_unknown_options)
request_kwargs.update(kwargs)
return cls(**request_kwargs)

以上是关于Scrapy源码 Request对象的主要内容,如果未能解决你的问题,请参考以下文章

Scrapy框架----- Request/Response

玩转 Scrapy 框架 :Scrapy 架构Request和Response介绍

玩转 Scrapy 框架 :Scrapy 架构Request和Response介绍

scrapy之Request对象

NO.5 Scrapy爬虫框架中的 Request 和 Response 对象

C#-WebForm-★内置对象简介★Request-获取请求对象Response相应请求对象Session全局变量(私有)Cookie全局变量(私有)Application全局公共变量Vi(代码片段