scrapy / scrapy

@@ -4,7 +4,7 @@
Loading
4 4
5 5
See documentation in docs/topics/request-response.rst
6 6
"""
7 -
from typing import Generator
7 +
from typing import Generator, Tuple
8 8
from urllib.parse import urljoin
9 9
10 10
from scrapy.exceptions import NotSupported
@@ -12,11 +12,16 @@
Loading
12 12
from scrapy.http.headers import Headers
13 13
from scrapy.http.request import Request
14 14
from scrapy.link import Link
15 +
from scrapy.utils.python import to_unicode
15 16
from scrapy.utils.trackref import object_ref
16 17
17 18
18 19
class Response(object_ref):
19 20
21 +
    attributes: Tuple[str, ...] = (
22 +
        "url", "status", "headers", "body", "request", "flags", "certificate", "ip_address", "protocol",
23 +
    )
24 +
20 25
    def __init__(
21 26
        self,
22 27
        url,
@@ -97,12 +102,8 @@
Loading
97 102
        return self.replace()
98 103
99 104
    def replace(self, *args, **kwargs):
100 -
        """Create a new Response with the same attributes except for those
101 -
        given new values.
102 -
        """
103 -
        for x in [
104 -
            "url", "status", "headers", "body", "request", "flags", "certificate", "ip_address", "protocol",
105 -
        ]:
105 +
        """Create a new Response with the same attributes except for those given new values"""
106 +
        for x in self.attributes:
106 107
            kwargs.setdefault(x, getattr(self, x))
107 108
        cls = kwargs.pop('cls', self.__class__)
108 109
        return cls(*args, **kwargs)
@@ -206,3 +207,21 @@
Loading
206 207
            )
207 208
            for url in urls
208 209
        )
210 +
211 +
    def to_dict(self) -> dict:
212 +
        """Return a dictionary containing the Response's data.
213 +
214 +
        Use :func:`~scrapy.utils.response.response_from_dict` to convert
215 +
        back into a :class:`~scrapy.http.response.Response` object.
216 +
        """
217 +
        d = {
218 +
            "url": to_unicode(self.url),  # urls are safe (safe_string_url)
219 +
            "headers": dict(self.headers),
220 +
            "ip_address": str(self.ip_address) if self.ip_address is not None else None,
221 +
            "certificate": self.certificate.dumpPEM() if self.certificate is not None else None,
222 +
        }
223 +
        for attr in self.attributes:
224 +
            d.setdefault(attr, getattr(self, attr))
225 +
        if type(self) is not Response:
226 +
            d["_class"] = self.__module__ + "." + self.__class__.__name__
227 +
        return d

@@ -3,17 +3,20 @@
Loading
3 3
scrapy.http.Response objects
4 4
"""
5 5
import os
6 -
import webbrowser
7 6
import tempfile
7 +
import webbrowser
8 +
from ipaddress import ip_address
8 9
from typing import Any, Callable, Iterable, Optional, Tuple, Union
9 10
from weakref import WeakKeyDictionary
10 11
12 +
from twisted.internet.ssl import Certificate
13 +
from twisted.web import http
14 +
from w3lib import html
15 +
11 16
import scrapy
12 17
from scrapy.http.response import Response
13 -
14 -
from twisted.web import http
18 +
from scrapy.utils.misc import load_object
15 19
from scrapy.utils.python import to_bytes, to_unicode
16 -
from w3lib import html
17 20
18 21
19 22
_baseurl_cache: "WeakKeyDictionary[Response, str]" = WeakKeyDictionary()
@@ -86,9 +89,19 @@
Loading
86 89
    elif isinstance(response, TextResponse):
87 90
        ext = '.txt'
88 91
    else:
89 -
        raise TypeError("Unsupported response type: "
90 -
                        f"{response.__class__.__name__}")
92 +
        raise TypeError(f"Unsupported response type: {response.__class__.__name__}")
91 93
    fd, fname = tempfile.mkstemp(ext)
92 94
    os.write(fd, body)
93 95
    os.close(fd)
94 96
    return _openfunc(f"file://{fname}")
97 +
98 +
99 +
def response_from_dict(d: dict) -> Response:
100 +
    """Create a :class:`~scrapy.http.response.Response` object from
101 +
    a dict which keys match a Response's ``__init__`` parameters
102 +
    """
103 +
    response_cls = load_object(d["_class"]) if "_class" in d else Response
104 +
    kwargs = {key: value for key, value in d.items() if key in response_cls.attributes}
105 +
    kwargs["certificate"] = Certificate.loadPEM(d["certificate"]) if d["certificate"] else None
106 +
    kwargs["ip_address"] = ip_address(d["ip_address"]) if d["ip_address"] else None
107 +
    return response_cls(**kwargs)

@@ -8,7 +8,7 @@
Loading
8 8
import json
9 9
import warnings
10 10
from contextlib import suppress
11 -
from typing import Generator
11 +
from typing import Generator, Tuple
12 12
from urllib.parse import urljoin
13 13
14 14
import parsel
@@ -30,6 +30,8 @@
Loading
30 30
    _DEFAULT_ENCODING = 'ascii'
31 31
    _cached_decoded_json = _NONE
32 32
33 +
    attributes: Tuple[str, ...] = Response.attributes + ("encoding",)
34 +
33 35
    def __init__(self, *args, **kwargs):
34 36
        self._encoding = kwargs.pop('encoding', None)
35 37
        self._cached_benc = None
@@ -53,10 +55,6 @@
Loading
53 55
        else:
54 56
            super()._set_body(body)
55 57
56 -
    def replace(self, *args, **kwargs):
57 -
        kwargs.setdefault('encoding', self.encoding)
58 -
        return Response.replace(self, *args, **kwargs)
59 -
60 58
    @property
61 59
    def encoding(self):
62 60
        return self._declared_encoding() or self._body_inferred_encoding()
Files Coverage
scrapy 88.13%
Project Totals (162 files) 88.13%
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading