scrapy / scrapy

@@ -1,95 +1,18 @@
Loading
1 -
"""
2 -
Helper functions for serializing (and deserializing) requests.
3 -
"""
4 -
import inspect
1 +
import warnings
2 +
from typing import Optional
5 3
6 -
from scrapy.http import Request
7 -
from scrapy.utils.python import to_unicode
8 -
from scrapy.utils.misc import load_object
4 +
import scrapy
5 +
from scrapy.exceptions import ScrapyDeprecationWarning
6 +
from scrapy.utils.request import request_from_dict  # noqa: F401
9 7
10 8
11 -
def request_to_dict(request, spider=None):
12 -
    """Convert Request object to a dict.
9 +
warnings.warn(
10 +
    ("Module scrapy.utils.reqser is deprecated, please use scrapy.Request.to_dict "
11 +
     " and/or scrapy.utils.request.request_from_dict instead"),
12 +
    category=ScrapyDeprecationWarning,
13 +
    stacklevel=2,
14 +
)
13 15
14 -
    If a spider is given, it will try to find out the name of the spider method
15 -
    used in the callback and store that as the callback.
16 -
    """
17 -
    cb = request.callback
18 -
    if callable(cb):
19 -
        cb = _find_method(spider, cb)
20 -
    eb = request.errback
21 -
    if callable(eb):
22 -
        eb = _find_method(spider, eb)
23 -
    d = {
24 -
        'url': to_unicode(request.url),  # urls should be safe (safe_string_url)
25 -
        'callback': cb,
26 -
        'errback': eb,
27 -
        'method': request.method,
28 -
        'headers': dict(request.headers),
29 -
        'body': request.body,
30 -
        'cookies': request.cookies,
31 -
        'meta': request.meta,
32 -
        '_encoding': request._encoding,
33 -
        'priority': request.priority,
34 -
        'dont_filter': request.dont_filter,
35 -
        'flags': request.flags,
36 -
        'cb_kwargs': request.cb_kwargs,
37 -
    }
38 -
    if type(request) is not Request:
39 -
        d['_class'] = request.__module__ + '.' + request.__class__.__name__
40 -
    return d
41 16
42 -
43 -
def request_from_dict(d, spider=None):
44 -
    """Create Request object from a dict.
45 -
46 -
    If a spider is given, it will try to resolve the callbacks looking at the
47 -
    spider for methods with the same name.
48 -
    """
49 -
    cb = d['callback']
50 -
    if cb and spider:
51 -
        cb = _get_method(spider, cb)
52 -
    eb = d['errback']
53 -
    if eb and spider:
54 -
        eb = _get_method(spider, eb)
55 -
    request_cls = load_object(d['_class']) if '_class' in d else Request
56 -
    return request_cls(
57 -
        url=to_unicode(d['url']),
58 -
        callback=cb,
59 -
        errback=eb,
60 -
        method=d['method'],
61 -
        headers=d['headers'],
62 -
        body=d['body'],
63 -
        cookies=d['cookies'],
64 -
        meta=d['meta'],
65 -
        encoding=d['_encoding'],
66 -
        priority=d['priority'],
67 -
        dont_filter=d['dont_filter'],
68 -
        flags=d.get('flags'),
69 -
        cb_kwargs=d.get('cb_kwargs'),
70 -
    )
71 -
72 -
73 -
def _find_method(obj, func):
74 -
    # Only instance methods contain ``__func__``
75 -
    if obj and hasattr(func, '__func__'):
76 -
        members = inspect.getmembers(obj, predicate=inspect.ismethod)
77 -
        for name, obj_func in members:
78 -
            # We need to use __func__ to access the original
79 -
            # function object because instance method objects
80 -
            # are generated each time attribute is retrieved from
81 -
            # instance.
82 -
            #
83 -
            # Reference: The standard type hierarchy
84 -
            # https://docs.python.org/3/reference/datamodel.html
85 -
            if obj_func.__func__ is func.__func__:
86 -
                return name
87 -
    raise ValueError(f"Function {func} is not an instance method in: {obj}")
88 -
89 -
90 -
def _get_method(obj, name):
91 -
    name = str(name)
92 -
    try:
93 -
        return getattr(obj, name)
94 -
    except AttributeError:
95 -
        raise ValueError(f"Method {name!r} not found in: {obj}")
17 +
def request_to_dict(request: "scrapy.Request", spider: Optional["scrapy.Spider"] = None) -> dict:
18 +
    return request.to_dict(spider=spider)

@@ -9,7 +9,7 @@
Loading
9 9
from queuelib import queue
10 10
11 11
from scrapy.utils.deprecate import create_deprecated_class
12 -
from scrapy.utils.reqser import request_to_dict, request_from_dict
12 +
from scrapy.utils.request import request_from_dict
13 13
14 14
15 15
def _with_mkdir(queue_class):
@@ -68,7 +68,7 @@
Loading
68 68
            return cls(crawler, key)
69 69
70 70
        def push(self, request):
71 -
            request = request_to_dict(request, self.spider)
71 +
            request = request.to_dict(spider=self.spider)
72 72
            return super().push(request)
73 73
74 74
        def pop(self):

@@ -8,12 +8,16 @@
Loading
8 8
import copy
9 9
import json
10 10
import warnings
11 +
from typing import Tuple
11 12
12 13
from scrapy.http.request import Request
13 14
from scrapy.utils.deprecate import create_deprecated_class
14 15
15 16
16 17
class JsonRequest(Request):
18 +
19 +
    attributes: Tuple[str, ...] = Request.attributes + ("dumps_kwargs",)
20 +
17 21
    def __init__(self, *args, **kwargs):
18 22
        dumps_kwargs = copy.deepcopy(kwargs.pop('dumps_kwargs', {}))
19 23
        dumps_kwargs.setdefault('sort_keys', True)
@@ -36,6 +40,12 @@
Loading
36 40
        self.headers.setdefault('Content-Type', 'application/json')
37 41
        self.headers.setdefault('Accept', 'application/json, text/javascript, */*; q=0.01')
38 42
43 +
    @property
44 +
    def dumps_kwargs(self):
45 +
        if self._dumps_kwargs is None:
46 +
            self._dumps_kwargs = {}
47 +
        return self._dumps_kwargs
48 +
39 49
    def replace(self, *args, **kwargs):
40 50
        body_passed = kwargs.get('body', None) is not None
41 51
        data = kwargs.pop('data', None)

@@ -11,8 +11,9 @@
Loading
11 11
from w3lib.http import basic_auth_header
12 12
from w3lib.url import canonicalize_url
13 13
14 -
from scrapy.http import Request
14 +
from scrapy import Request, Spider
15 15
from scrapy.utils.httpobj import urlparse_cached
16 +
from scrapy.utils.misc import load_object
16 17
from scrapy.utils.python import to_bytes, to_unicode
17 18
18 19
@@ -106,3 +107,27 @@
Loading
106 107
    if referrer is None:
107 108
        return referrer
108 109
    return to_unicode(referrer, errors='replace')
110 +
111 +
112 +
def request_from_dict(d: dict, spider: Optional[Spider] = None) -> Request:
113 +
    """Create a :class:`~scrapy.Request` object from a dict.
114 +
115 +
    If a spider is given, it will try to resolve the callbacks looking at the
116 +
    spider for methods with the same name.
117 +
    """
118 +
    request_cls = load_object(d["_class"]) if "_class" in d else Request
119 +
    kwargs = {key: value for key, value in d.items() if key in request_cls.attributes}
120 +
    if d.get("callback") and spider:
121 +
        kwargs["callback"] = _get_method(spider, d["callback"])
122 +
    if d.get("errback") and spider:
123 +
        kwargs["errback"] = _get_method(spider, d["errback"])
124 +
    return request_cls(**kwargs)
125 +
126 +
127 +
def _get_method(obj, name):
128 +
    """Helper function for request_from_dict"""
129 +
    name = str(name)
130 +
    try:
131 +
        return getattr(obj, name)
132 +
    except AttributeError:
133 +
        raise ValueError(f"Method {name!r} not found in: {obj}")
109 134
imilarity index 75%
110 135
ename from tests/test_utils_reqser.py
111 136
ename to tests/test_request_dict.py

@@ -4,18 +4,27 @@
Loading
4 4
5 5
See documentation in docs/topics/request-response.rst
6 6
"""
7 +
import inspect
8 +
from typing import Optional, Tuple
9 +
7 10
from w3lib.url import safe_url_string
8 11
12 +
import scrapy
13 +
from scrapy.http.common import obsolete_setter
9 14
from scrapy.http.headers import Headers
10 -
from scrapy.utils.python import to_bytes
15 +
from scrapy.utils.curl import curl_to_request_kwargs
16 +
from scrapy.utils.python import to_bytes, to_unicode
11 17
from scrapy.utils.trackref import object_ref
12 18
from scrapy.utils.url import escape_ajax
13 -
from scrapy.http.common import obsolete_setter
14 -
from scrapy.utils.curl import curl_to_request_kwargs
15 19
16 20
17 21
class Request(object_ref):
18 22
23 +
    attributes: Tuple[str, ...] = (
24 +
        "url", "method", "headers", "body", "cookies", "meta", "flags",
25 +
        "encoding", "priority", "dont_filter", "callback", "errback", "cb_kwargs",
26 +
    )
27 +
19 28
    def __init__(self, url, callback=None, method='GET', headers=None, body=None,
20 29
                 cookies=None, meta=None, encoding='utf-8', priority=0,
21 30
                 dont_filter=False, errback=None, flags=None, cb_kwargs=None):
@@ -99,11 +108,8 @@
Loading
99 108
        return self.replace()
100 109
101 110
    def replace(self, *args, **kwargs):
102 -
        """Create a new Request with the same attributes except for those
103 -
        given new values.
104 -
        """
105 -
        for x in ['url', 'method', 'headers', 'body', 'cookies', 'meta', 'flags',
106 -
                  'encoding', 'priority', 'dont_filter', 'callback', 'errback', 'cb_kwargs']:
111 +
        """Create a new Request with the same attributes except for those given new values"""
112 +
        for x in self.attributes:
107 113
            kwargs.setdefault(x, getattr(self, x))
108 114
        cls = kwargs.pop('cls', self.__class__)
109 115
        return cls(*args, **kwargs)
@@ -136,8 +142,43 @@
Loading
136 142
137 143
        To translate a cURL command into a Scrapy request,
138 144
        you may use `curl2scrapy <https://michael-shub.github.io/curl2scrapy/>`_.
139 -
140 -
       """
145 +
        """
141 146
        request_kwargs = curl_to_request_kwargs(curl_command, ignore_unknown_options)
142 147
        request_kwargs.update(kwargs)
143 148
        return cls(**request_kwargs)
149 +
150 +
    def to_dict(self, *, spider: Optional["scrapy.Spider"] = None) -> dict:
151 +
        """Return a dictionary containing the Request's data.
152 +
153 +
        Use :func:`~scrapy.utils.request.request_from_dict` to convert back into a :class:`~scrapy.Request` object.
154 +
155 +
        If a spider is given, this method will try to find out the name of the spider method used
156 +
        as callback and include it in the output dict, raising an exception if it cannot be found.
157 +
        """
158 +
        d = {
159 +
            "url": to_unicode(self.url),  # urls are safe (safe_string_url)
160 +
            "callback": _find_method(spider, self.callback) if callable(self.callback) else self.callback,
161 +
            "errback": _find_method(spider, self.errback) if callable(self.errback) else self.errback,
162 +
            "headers": dict(self.headers),
163 +
        }
164 +
        for attr in self.attributes:
165 +
            d.setdefault(attr, getattr(self, attr))
166 +
        if type(self) is not Request:
167 +
            d["_class"] = self.__module__ + '.' + self.__class__.__name__
168 +
        return d
169 +
170 +
171 +
def _find_method(obj, func):
172 +
    """Helper function for Request.to_dict"""
173 +
    # Only instance methods contain ``__func__``
174 +
    if obj and hasattr(func, '__func__'):
175 +
        members = inspect.getmembers(obj, predicate=inspect.ismethod)
176 +
        for name, obj_func in members:
177 +
            # We need to use __func__ to access the original function object because instance
178 +
            # method objects are generated each time attribute is retrieved from instance.
179 +
            #
180 +
            # Reference: The standard type hierarchy
181 +
            # https://docs.python.org/3/reference/datamodel.html
182 +
            if obj_func.__func__ is func.__func__:
183 +
                return name
184 +
    raise ValueError(f"Function {func} is not an instance method in: {obj}")
Files Coverage
scrapy 88.29%
Project Totals (162 files) 88.29%
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading