scrapy / scrapy

@@ -92,6 +92,7 @@
Loading
92 92
        s3store = cls.STORE_SCHEMES['s3']
93 93
        s3store.AWS_ACCESS_KEY_ID = settings['AWS_ACCESS_KEY_ID']
94 94
        s3store.AWS_SECRET_ACCESS_KEY = settings['AWS_SECRET_ACCESS_KEY']
95 +
        s3store.AWS_SESSION_TOKEN = settings['AWS_SESSION_TOKEN']
95 96
        s3store.AWS_ENDPOINT_URL = settings['AWS_ENDPOINT_URL']
96 97
        s3store.AWS_REGION_NAME = settings['AWS_REGION_NAME']
97 98
        s3store.AWS_USE_SSL = settings['AWS_USE_SSL']

@@ -4,7 +4,7 @@
Loading
4 4
5 5
See documentation in docs/topics/request-response.rst
6 6
"""
7 -
from typing import Generator
7 +
from typing import Generator, Tuple
8 8
from urllib.parse import urljoin
9 9
10 10
from scrapy.exceptions import NotSupported
@@ -16,6 +16,19 @@
Loading
16 16
17 17
18 18
class Response(object_ref):
19 +
    """An object that represents an HTTP response, which is usually
20 +
    downloaded (by the Downloader) and fed to the Spiders for processing.
21 +
    """
22 +
23 +
    attributes: Tuple[str, ...] = (
24 +
        "url", "status", "headers", "body", "flags", "request", "certificate", "ip_address", "protocol",
25 +
    )
26 +
    """A tuple of :class:`str` objects containing the name of all public
27 +
    attributes of the class that are also keyword parameters of the
28 +
    ``__init__`` method.
29 +
30 +
    Currently used by :meth:`Response.replace`.
31 +
    """
19 32
20 33
    def __init__(
21 34
        self,
@@ -97,12 +110,8 @@
Loading
97 110
        return self.replace()
98 111
99 112
    def replace(self, *args, **kwargs):
100 -
        """Create a new Response with the same attributes except for those
101 -
        given new values.
102 -
        """
103 -
        for x in [
104 -
            "url", "status", "headers", "body", "request", "flags", "certificate", "ip_address", "protocol",
105 -
        ]:
113 +
        """Create a new Response with the same attributes except for those given new values"""
114 +
        for x in self.attributes:
106 115
            kwargs.setdefault(x, getattr(self, x))
107 116
        cls = kwargs.pop('cls', self.__class__)
108 117
        return cls(*args, **kwargs)

@@ -79,6 +79,7 @@
Loading
79 79
class S3FilesStore:
80 80
    AWS_ACCESS_KEY_ID = None
81 81
    AWS_SECRET_ACCESS_KEY = None
82 +
    AWS_SESSION_TOKEN = None
82 83
    AWS_ENDPOINT_URL = None
83 84
    AWS_REGION_NAME = None
84 85
    AWS_USE_SSL = None
@@ -98,6 +99,7 @@
Loading
98 99
            's3',
99 100
            aws_access_key_id=self.AWS_ACCESS_KEY_ID,
100 101
            aws_secret_access_key=self.AWS_SECRET_ACCESS_KEY,
102 +
            aws_session_token=self.AWS_SESSION_TOKEN,
101 103
            endpoint_url=self.AWS_ENDPOINT_URL,
102 104
            region_name=self.AWS_REGION_NAME,
103 105
            use_ssl=self.AWS_USE_SSL,
@@ -349,6 +351,7 @@
Loading
349 351
        s3store = cls.STORE_SCHEMES['s3']
350 352
        s3store.AWS_ACCESS_KEY_ID = settings['AWS_ACCESS_KEY_ID']
351 353
        s3store.AWS_SECRET_ACCESS_KEY = settings['AWS_SECRET_ACCESS_KEY']
354 +
        s3store.AWS_SESSION_TOKEN = settings['AWS_SESSION_TOKEN']
352 355
        s3store.AWS_ENDPOINT_URL = settings['AWS_ENDPOINT_URL']
353 356
        s3store.AWS_REGION_NAME = settings['AWS_REGION_NAME']
354 357
        s3store.AWS_USE_SSL = settings['AWS_USE_SSL']

@@ -154,13 +154,14 @@
Loading
154 154
class S3FeedStorage(BlockingFeedStorage):
155 155
156 156
    def __init__(self, uri, access_key=None, secret_key=None, acl=None, endpoint_url=None, *,
157 -
                 feed_options=None):
157 +
                 feed_options=None, session_token=None):
158 158
        if not is_botocore_available():
159 159
            raise NotConfigured('missing botocore library')
160 160
        u = urlparse(uri)
161 161
        self.bucketname = u.hostname
162 162
        self.access_key = u.username or access_key
163 163
        self.secret_key = u.password or secret_key
164 +
        self.session_token = session_token
164 165
        self.keyname = u.path[1:]  # remove first "/"
165 166
        self.acl = acl
166 167
        self.endpoint_url = endpoint_url
@@ -169,6 +170,7 @@
Loading
169 170
        self.s3_client = session.create_client(
170 171
            's3', aws_access_key_id=self.access_key,
171 172
            aws_secret_access_key=self.secret_key,
173 +
            aws_session_token=self.session_token,
172 174
            endpoint_url=self.endpoint_url)
173 175
        if feed_options and feed_options.get('overwrite', True) is False:
174 176
            logger.warning('S3 does not support appending to files. To '
@@ -182,6 +184,7 @@
Loading
182 184
            uri,
183 185
            access_key=crawler.settings['AWS_ACCESS_KEY_ID'],
184 186
            secret_key=crawler.settings['AWS_SECRET_ACCESS_KEY'],
187 +
            session_token=crawler.settings['AWS_SESSION_TOKEN'],
185 188
            acl=crawler.settings['FEED_STORAGE_S3_ACL'] or None,
186 189
            endpoint_url=crawler.settings['AWS_ENDPOINT_URL'] or None,
187 190
            feed_options=feed_options,

@@ -1,5 +1,3 @@
Loading
1 -
from urllib.parse import unquote
2 -
3 1
from scrapy.core.downloader.handlers.http import HTTPDownloadHandler
4 2
from scrapy.exceptions import NotConfigured
5 3
from scrapy.utils.boto import is_botocore_available
@@ -12,6 +10,7 @@
Loading
12 10
    def __init__(self, settings, *,
13 11
                 crawler=None,
14 12
                 aws_access_key_id=None, aws_secret_access_key=None,
13 +
                 aws_session_token=None,
15 14
                 httpdownloadhandler=HTTPDownloadHandler, **kw):
16 15
        if not is_botocore_available():
17 16
            raise NotConfigured('missing botocore library')
@@ -20,6 +19,8 @@
Loading
20 19
            aws_access_key_id = settings['AWS_ACCESS_KEY_ID']
21 20
        if not aws_secret_access_key:
22 21
            aws_secret_access_key = settings['AWS_SECRET_ACCESS_KEY']
22 +
        if not aws_session_token:
23 +
            aws_session_token = settings['AWS_SESSION_TOKEN']
23 24
24 25
        # If no credentials could be found anywhere,
25 26
        # consider this an anonymous connection request by default;
@@ -38,7 +39,7 @@
Loading
38 39
        if not self.anon:
39 40
            SignerCls = botocore.auth.AUTH_TYPE_MAPS['s3']
40 41
            self._signer = SignerCls(botocore.credentials.Credentials(
41 -
                aws_access_key_id, aws_secret_access_key))
42 +
                aws_access_key_id, aws_secret_access_key, aws_session_token))
42 43
43 44
        _http_handler = create_instance(
44 45
            objcls=httpdownloadhandler,
@@ -59,7 +60,7 @@
Loading
59 60
        url = f'{scheme}://{bucket}.s3.amazonaws.com{path}'
60 61
        if self.anon:
61 62
            request = request.replace(url=url)
62 -
        elif self._signer is not None:
63 +
        else:
63 64
            import botocore.awsrequest
64 65
            awsrequest = botocore.awsrequest.AWSRequest(
65 66
                method=request.method,
@@ -69,14 +70,4 @@
Loading
69 70
            self._signer.add_auth(awsrequest)
70 71
            request = request.replace(
71 72
                url=url, headers=awsrequest.headers.items())
72 -
        else:
73 -
            signed_headers = self.conn.make_request(
74 -
                method=request.method,
75 -
                bucket=bucket,
76 -
                key=unquote(p.path),
77 -
                query_args=unquote(p.query),
78 -
                headers=request.headers,
79 -
                data=request.body,
80 -
            )
81 -
            request = request.replace(url=url, headers=signed_headers)
82 73
        return self._download_http(request, spider)

@@ -8,7 +8,7 @@
Loading
8 8
import json
9 9
import warnings
10 10
from contextlib import suppress
11 -
from typing import Generator
11 +
from typing import Generator, Tuple
12 12
from urllib.parse import urljoin
13 13
14 14
import parsel
@@ -30,6 +30,8 @@
Loading
30 30
    _DEFAULT_ENCODING = 'ascii'
31 31
    _cached_decoded_json = _NONE
32 32
33 +
    attributes: Tuple[str, ...] = Response.attributes + ("encoding",)
34 +
33 35
    def __init__(self, *args, **kwargs):
34 36
        self._encoding = kwargs.pop('encoding', None)
35 37
        self._cached_benc = None
@@ -53,10 +55,6 @@
Loading
53 55
        else:
54 56
            super()._set_body(body)
55 57
56 -
    def replace(self, *args, **kwargs):
57 -
        kwargs.setdefault('encoding', self.encoding)
58 -
        return Response.replace(self, *args, **kwargs)
59 -
60 58
    @property
61 59
    def encoding(self):
62 60
        return self._declared_encoding() or self._body_inferred_encoding()
Files Coverage
scrapy 88.42%
Project Totals (162 files) 88.42%
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading