#3696 Allow customizing export column names

Open Adrián Chaves Gallaecio
Coverage Reach
utils/python.py utils/misc.py utils/conf.py utils/iterators.py utils/log.py utils/defer.py utils/datatypes.py utils/deprecate.py utils/test.py utils/console.py utils/project.py utils/reactor.py utils/curl.py utils/url.py utils/signal.py utils/request.py utils/response.py utils/ssl.py utils/testproc.py utils/reqser.py utils/spider.py utils/trackref.py utils/benchserver.py utils/gz.py utils/testsite.py utils/serialize.py utils/decorators.py utils/display.py utils/sitemap.py utils/ftp.py utils/engine.py utils/boto.py utils/http.py utils/template.py utils/ossignal.py utils/versions.py utils/httpobj.py utils/job.py utils/py36.py utils/markup.py utils/multipart.py core/downloader/handlers/http11.py core/downloader/handlers/ftp.py core/downloader/handlers/__init__.py core/downloader/handlers/s3.py core/downloader/handlers/http10.py core/downloader/handlers/datauri.py core/downloader/handlers/file.py core/downloader/handlers/http.py core/downloader/__init__.py core/downloader/webclient.py core/downloader/middleware.py core/downloader/tls.py core/downloader/contextfactory.py core/engine.py core/scraper.py core/scheduler.py core/spidermw.py extensions/feedexport.py extensions/httpcache.py extensions/memusage.py extensions/telnet.py extensions/throttle.py extensions/closespider.py extensions/debug.py extensions/logstats.py extensions/corestats.py extensions/spiderstate.py extensions/statsmailer.py extensions/memdebug.py commands/parse.py commands/genspider.py commands/startproject.py commands/__init__.py commands/check.py commands/runspider.py commands/fetch.py commands/shell.py commands/bench.py commands/settings.py commands/edit.py commands/crawl.py commands/version.py commands/view.py commands/list.py http/request/form.py http/request/__init__.py http/request/json_request.py http/request/rpc.py http/response/text.py http/response/__init__.py http/response/html.py http/response/xml.py http/cookies.py http/headers.py http/__init__.py http/common.py downloadermiddlewares/httpcache.py downloadermiddlewares/cookies.py downloadermiddlewares/robotstxt.py downloadermiddlewares/redirect.py downloadermiddlewares/decompression.py downloadermiddlewares/httpcompression.py downloadermiddlewares/httpproxy.py downloadermiddlewares/retry.py downloadermiddlewares/ajaxcrawl.py downloadermiddlewares/stats.py downloadermiddlewares/httpauth.py downloadermiddlewares/useragent.py downloadermiddlewares/downloadtimeout.py downloadermiddlewares/defaultheaders.py pipelines/files.py pipelines/media.py pipelines/images.py pipelines/__init__.py settings/__init__.py settings/default_settings.py spidermiddlewares/referer.py spidermiddlewares/offsite.py spidermiddlewares/depth.py spidermiddlewares/httperror.py spidermiddlewares/urllength.py spiders/crawl.py spiders/__init__.py spiders/feed.py spiders/sitemap.py spiders/init.py exporters.py contracts/__init__.py contracts/default.py crawler.py linkextractors/lxmlhtml.py linkextractors/__init__.py shell.py cmdline.py pqueues.py robotstxt.py mail.py item.py resolver.py responsetypes.py squeues.py dupefilters.py middleware.py statscollectors.py spiderloader.py logformatter.py selector/unified.py selector/__init__.py exceptions.py loader/__init__.py loader/processors.py loader/common.py signals.py signalmanager.py __init__.py link.py extension.py interfaces.py __main__.py

No flags found

Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.

e.g., #unittest #integration

#production #enterprise

#frontend #backend

Learn more about Codecov Flags here.


@@ -118,7 +118,7 @@
Loading
118 118
    out = feed.copy()
119 119
    out.setdefault("batch_item_count", settings.getint('FEED_EXPORT_BATCH_ITEM_COUNT'))
120 120
    out.setdefault("encoding", settings["FEED_EXPORT_ENCODING"])
121 -
    out.setdefault("fields", settings.getlist("FEED_EXPORT_FIELDS") or None)
121 +
    out.setdefault("fields", settings.getdictorlist("FEED_EXPORT_FIELDS") or None)
122 122
    out.setdefault("store_empty", settings.getbool("FEED_STORE_EMPTY"))
123 123
    out.setdefault("uri_params", settings["FEED_URI_PARAMS"])
124 124
    out.setdefault("item_export_kwargs", dict())

@@ -2,6 +2,7 @@
Loading
2 2
Item Exporters are used to export/serialize items into different formats.
3 3
"""
4 4
5 +
from collections import Mapping
5 6
import csv
6 7
import io
7 8
import marshal
@@ -68,20 +69,32 @@
Loading
68 69
                field_iter = item.field_names()
69 70
            else:
70 71
                field_iter = item.keys()
72 +
        elif isinstance(self.fields_to_export, Mapping):
73 +
            if include_empty:
74 +
                field_iter = self.fields_to_export.items()
75 +
            else:
76 +
                field_iter = (
77 +
                    (x, y) for x, y in self.fields_to_export.items()
78 +
                    if x in item
79 +
                )
71 80
        else:
72 81
            if include_empty:
73 82
                field_iter = self.fields_to_export
74 83
            else:
75 84
                field_iter = (x for x in self.fields_to_export if x in item)
76 85
77 86
        for field_name in field_iter:
78 -
            if field_name in item:
79 -
                field_meta = item.get_field_meta(field_name)
80 -
                value = self.serialize_field(field_meta, field_name, item[field_name])
87 +
            if isinstance(field_name, str):
88 +
                item_field, output_field = field_name, field_name
89 +
            else:
90 +
                item_field, output_field = field_name
91 +
            if item_field in item:
92 +
                field_meta = item.get_field_meta(item_field)
93 +
                value = self.serialize_field(field_meta, output_field, item[item_field])
81 94
            else:
82 95
                value = default_value
83 96
84 -
            yield field_name, value
97 +
            yield output_field, value
85 98
86 99
87 100
class JsonLinesItemExporter(BaseItemExporter):
@@ -246,7 +259,11 @@
Loading
246 259
            if not self.fields_to_export:
247 260
                # use declared field names, or keys if the item is a dict
248 261
                self.fields_to_export = ItemAdapter(item).field_names()
249 -
            row = list(self._build_row(self.fields_to_export))
262 +
            if isinstance(self.fields_to_export, Mapping):
263 +
                fields = self.fields_to_export.values()
264 +
            else:
265 +
                fields = self.fields_to_export
266 +
            row = list(self._build_row(fields))
250 267
            self.csv_writer.writerow(row)
251 268
252 269

@@ -1,5 +1,6 @@
Loading
1 1
import json
2 2
import copy
3 +
from collections import OrderedDict
3 4
from collections.abc import MutableMapping
4 5
from importlib import import_module
5 6
from pprint import pformat
@@ -197,6 +198,38 @@
Loading
197 198
            value = json.loads(value)
198 199
        return dict(value)
199 200
201 +
    def getdictorlist(self, name, default=None):
202 +
        """Get a setting value as either an ``OrderedDict`` or a list.
203 +
204 +
        If the setting is already a dict or a list, a copy of it will be
205 +
        returned.
206 +
207 +
        If it is a string it will be evaluated as JSON, or as a comma-separated
208 +
        list of strings as a fallback.
209 +
210 +
        For example, settings populated from the command line will return:
211 +
212 +
        -   ``OrdetedDict([('key1', 'value1'), ('key2', 'value2')])`` if set to
213 +
            ``'{"key1": "value1", "key2": "value2"}'``
214 +
215 +
        -   ``['one', 'two']`` if set to ``'["one", "two"]'`` or ``'one,two'``
216 +
217 +
        :param name: the setting name
218 +
        :type name: string
219 +
220 +
        :param default: the value to return if no setting is found
221 +
        :type default: any
222 +
        """
223 +
        value = self.get(name, default)
224 +
        if value is None:
225 +
            return OrderedDict()
226 +
        if isinstance(value, str):
227 +
            try:
228 +
                return json.loads(value, object_pairs_hook=OrderedDict)
229 +
            except ValueError:
230 +
                return value.split(',')
231 +
        return copy.deepcopy(value)
232 +
200 233
    def getwithbase(self, name):
201 234
        """Get a composition of a dictionary-like setting and its `_BASE`
202 235
        counterpart.

Everything is accounted for!

No changes detected that need to be reviewed.
What changes does Codecov check for?
Lines, not adjusted in diff, that have changed coverage data.
Files that introduced coverage data that had none before.
Files that have missing coverage data that once were tracked.
Files Coverage
scrapy 0.03% 87.90%
Project Totals (160 files) 87.90%
Loading