scrapy / scrapy
1 5
from scrapy.exceptions import NotConfigured
2 5
from scrapy.utils.python import global_object_name
3 5
from scrapy.utils.request import request_httprepr
4

5

6 5
def get_header_size(headers):
7 5
    if headers is None:
8 0
        return None
9 5
    size = 0
10 5
    for key, value in headers.items():
11 5
        if isinstance(value, bytes):
12 0
            size += len(b": ") + len(key) + len(value)
13 5
        elif isinstance(value, (list, tuple)):
14 5
            for v in value:
15 5
                size += len(b": ") + len(key) + len(v)
16 5
    return size + len(b'\r\n') * (len(headers.keys()) - 1)
17

18

19 5
class DownloaderStats:
20

21 5
    def __init__(self, stats):
22 5
        self.stats = stats
23

24 5
    @classmethod
25 3
    def from_crawler(cls, crawler):
26 5
        if not crawler.settings.getbool('DOWNLOADER_STATS'):
27 0
            raise NotConfigured
28 5
        return cls(crawler.stats)
29

30 5
    def process_request(self, request, spider):
31 5
        self.stats.inc_value('downloader/request_count', spider=spider)
32 5
        self.stats.inc_value(f'downloader/request_method_count/{request.method}', spider=spider)
33 5
        reqlen = len(request_httprepr(request))
34 5
        self.stats.inc_value('downloader/request_bytes', reqlen, spider=spider)
35

36 5
    def process_response(self, request, response, spider):
37 5
        self.stats.inc_value('downloader/response_count', spider=spider)
38 5
        self.stats.inc_value(f'downloader/response_status_count/{response.status}', spider=spider)
39 5
        reslen = len(response.body) + get_header_size(response.headers)
40 5
        self.stats.inc_value('downloader/response_bytes', reslen, spider=spider)
41 5
        return response
42

43 5
    def process_exception(self, request, exception, spider):
44 5
        ex_class = global_object_name(exception.__class__)
45 5
        self.stats.inc_value('downloader/exception_count', spider=spider)
46 5
        self.stats.inc_value(f'downloader/exception_type_count/{ex_class}', spider=spider)

Read our documentation on viewing source code .

Loading