scrapy / scrapy
1
"""
2
This module implements the Response class which is used to represent HTTP
3
responses in Scrapy.
4

5
See documentation in docs/topics/request-response.rst
6
"""
7 7
from typing import Generator
8 7
from urllib.parse import urljoin
9

10 7
from scrapy.exceptions import NotSupported
11 7
from scrapy.http.common import obsolete_setter
12 7
from scrapy.http.headers import Headers
13 7
from scrapy.http.request import Request
14 7
from scrapy.link import Link
15 7
from scrapy.utils.trackref import object_ref
16

17

18 7
class Response(object_ref):
19

20 7
    def __init__(
21
        self,
22
        url,
23
        status=200,
24
        headers=None,
25
        body=b"",
26
        flags=None,
27
        request=None,
28
        certificate=None,
29
        ip_address=None,
30
        protocol=None,
31
    ):
32 7
        self.headers = Headers(headers or {})
33 7
        self.status = int(status)
34 7
        self._set_body(body)
35 7
        self._set_url(url)
36 7
        self.request = request
37 7
        self.flags = [] if flags is None else list(flags)
38 7
        self.certificate = certificate
39 7
        self.ip_address = ip_address
40 7
        self.protocol = protocol
41

42 7
    @property
43 4
    def cb_kwargs(self):
44 7
        try:
45 7
            return self.request.cb_kwargs
46 7
        except AttributeError:
47 7
            raise AttributeError(
48
                "Response.cb_kwargs not available, this response "
49
                "is not tied to any request"
50
            )
51

52 7
    @property
53 4
    def meta(self):
54 7
        try:
55 7
            return self.request.meta
56 7
        except AttributeError:
57 7
            raise AttributeError(
58
                "Response.meta not available, this response "
59
                "is not tied to any request"
60
            )
61

62 7
    def _get_url(self):
63 7
        return self._url
64

65 7
    def _set_url(self, url):
66 7
        if isinstance(url, str):
67 7
            self._url = url
68
        else:
69 7
            raise TypeError(f'{type(self).__name__} url must be str, '
70
                            f'got {type(url).__name__}')
71

72 7
    url = property(_get_url, obsolete_setter(_set_url, 'url'))
73

74 7
    def _get_body(self):
75 7
        return self._body
76

77 7
    def _set_body(self, body):
78 7
        if body is None:
79 7
            self._body = b''
80 7
        elif not isinstance(body, bytes):
81 0
            raise TypeError(
82
                "Response body must be bytes. "
83
                "If you want to pass unicode body use TextResponse "
84
                "or HtmlResponse.")
85
        else:
86 7
            self._body = body
87

88 7
    body = property(_get_body, obsolete_setter(_set_body, 'body'))
89

90 7
    def __str__(self):
91 7
        return f"<{self.status} {self.url}>"
92

93 7
    __repr__ = __str__
94

95 7
    def copy(self):
96
        """Return a copy of this Response"""
97 7
        return self.replace()
98

99 7
    def replace(self, *args, **kwargs):
100
        """Create a new Response with the same attributes except for those
101
        given new values.
102
        """
103 7
        for x in [
104
            "url", "status", "headers", "body", "request", "flags", "certificate", "ip_address", "protocol",
105
        ]:
106 7
            kwargs.setdefault(x, getattr(self, x))
107 7
        cls = kwargs.pop('cls', self.__class__)
108 7
        return cls(*args, **kwargs)
109

110 7
    def urljoin(self, url):
111
        """Join this Response's url with a possible relative url to form an
112
        absolute interpretation of the latter."""
113 7
        return urljoin(self.url, url)
114

115 7
    @property
116 4
    def text(self):
117
        """For subclasses of TextResponse, this will return the body
118
        as str
119
        """
120 7
        raise AttributeError("Response content isn't text")
121

122 7
    def css(self, *a, **kw):
123
        """Shortcut method implemented only by responses whose content
124
        is text (subclasses of TextResponse).
125
        """
126 7
        raise NotSupported("Response content isn't text")
127

128 7
    def xpath(self, *a, **kw):
129
        """Shortcut method implemented only by responses whose content
130
        is text (subclasses of TextResponse).
131
        """
132 7
        raise NotSupported("Response content isn't text")
133

134 7
    def follow(self, url, callback=None, method='GET', headers=None, body=None,
135
               cookies=None, meta=None, encoding='utf-8', priority=0,
136
               dont_filter=False, errback=None, cb_kwargs=None, flags=None):
137
        # type: (...) -> Request
138
        """
139
        Return a :class:`~.Request` instance to follow a link ``url``.
140
        It accepts the same arguments as ``Request.__init__`` method,
141
        but ``url`` can be a relative URL or a ``scrapy.link.Link`` object,
142
        not only an absolute URL.
143

144
        :class:`~.TextResponse` provides a :meth:`~.TextResponse.follow`
145
        method which supports selectors in addition to absolute/relative URLs
146
        and Link objects.
147

148
        .. versionadded:: 2.0
149
           The *flags* parameter.
150
        """
151 7
        if isinstance(url, Link):
152 7
            url = url.url
153 7
        elif url is None:
154 7
            raise ValueError("url can't be None")
155 7
        url = self.urljoin(url)
156

157 7
        return Request(
158
            url=url,
159
            callback=callback,
160
            method=method,
161
            headers=headers,
162
            body=body,
163
            cookies=cookies,
164
            meta=meta,
165
            encoding=encoding,
166
            priority=priority,
167
            dont_filter=dont_filter,
168
            errback=errback,
169
            cb_kwargs=cb_kwargs,
170
            flags=flags,
171
        )
172

173 7
    def follow_all(self, urls, callback=None, method='GET', headers=None, body=None,
174
                   cookies=None, meta=None, encoding='utf-8', priority=0,
175
                   dont_filter=False, errback=None, cb_kwargs=None, flags=None):
176
        # type: (...) -> Generator[Request, None, None]
177
        """
178
        .. versionadded:: 2.0
179

180
        Return an iterable of :class:`~.Request` instances to follow all links
181
        in ``urls``. It accepts the same arguments as ``Request.__init__`` method,
182
        but elements of ``urls`` can be relative URLs or :class:`~scrapy.link.Link` objects,
183
        not only absolute URLs.
184

185
        :class:`~.TextResponse` provides a :meth:`~.TextResponse.follow_all`
186
        method which supports selectors in addition to absolute/relative URLs
187
        and Link objects.
188
        """
189 7
        if not hasattr(urls, '__iter__'):
190 7
            raise TypeError("'urls' argument must be an iterable")
191 7
        return (
192
            self.follow(
193
                url=url,
194
                callback=callback,
195
                method=method,
196
                headers=headers,
197
                body=body,
198
                cookies=cookies,
199
                meta=meta,
200
                encoding=encoding,
201
                priority=priority,
202
                dont_filter=dont_filter,
203
                errback=errback,
204
                cb_kwargs=cb_kwargs,
205
                flags=flags,
206
            )
207
            for url in urls
208
        )

Read our documentation on viewing source code .

Loading