scrapy / scrapy
1
"""
2
Base class for Scrapy commands
3
"""
4 7
import os
5 7
from optparse import OptionGroup
6 7
from typing import Any, Dict
7

8 7
from twisted.python import failure
9

10 7
from scrapy.utils.conf import arglist_to_dict, feed_process_params_from_cli
11 7
from scrapy.exceptions import UsageError
12

13

14 7
class ScrapyCommand:
15

16 7
    requires_project = False
17 7
    crawler_process = None
18

19
    # default settings to be used for this command instead of global defaults
20 7
    default_settings: Dict[str, Any] = {}
21

22 7
    exitcode = 0
23

24 7
    def __init__(self):
25 7
        self.settings = None  # set in scrapy.cmdline
26

27 7
    def set_crawler(self, crawler):
28 7
        if hasattr(self, '_crawler'):
29 0
            raise RuntimeError("crawler already set")
30 0
        self._crawler = crawler
31

32 7
    def syntax(self):
33
        """
34
        Command syntax (preferably one-line). Do not include command name.
35
        """
36 7
        return ""
37

38 7
    def short_desc(self):
39
        """
40
        A short description of the command
41
        """
42 0
        return ""
43

44 7
    def long_desc(self):
45
        """A long description of the command. Return short description when not
46
        available. It cannot contain newlines, since contents will be formatted
47
        by optparser which removes newlines and wraps text.
48
        """
49 7
        return self.short_desc()
50

51 7
    def help(self):
52
        """An extensive help for the command. It will be shown when using the
53
        "help" command. It can contain newlines, since no post-formatting will
54
        be applied to its contents.
55
        """
56 0
        return self.long_desc()
57

58 7
    def add_options(self, parser):
59
        """
60
        Populate option parse with options available for this command
61
        """
62 7
        group = OptionGroup(parser, "Global Options")
63 7
        group.add_option("--logfile", metavar="FILE",
64
                         help="log file. if omitted stderr will be used")
65 7
        group.add_option("-L", "--loglevel", metavar="LEVEL", default=None,
66
                         help=f"log level (default: {self.settings['LOG_LEVEL']})")
67 7
        group.add_option("--nolog", action="store_true",
68
                         help="disable logging completely")
69 7
        group.add_option("--profile", metavar="FILE", default=None,
70
                         help="write python cProfile stats to FILE")
71 7
        group.add_option("--pidfile", metavar="FILE",
72
                         help="write process ID to FILE")
73 7
        group.add_option("-s", "--set", action="append", default=[], metavar="NAME=VALUE",
74
                         help="set/override setting (may be repeated)")
75 7
        group.add_option("--pdb", action="store_true", help="enable pdb on failure")
76

77 7
        parser.add_option_group(group)
78

79 7
    def process_options(self, args, opts):
80 7
        try:
81 7
            self.settings.setdict(arglist_to_dict(opts.set),
82
                                  priority='cmdline')
83 0
        except ValueError:
84 0
            raise UsageError("Invalid -s value, use -s NAME=VALUE", print_help=False)
85

86 7
        if opts.logfile:
87 0
            self.settings.set('LOG_ENABLED', True, priority='cmdline')
88 0
            self.settings.set('LOG_FILE', opts.logfile, priority='cmdline')
89

90 7
        if opts.loglevel:
91 0
            self.settings.set('LOG_ENABLED', True, priority='cmdline')
92 0
            self.settings.set('LOG_LEVEL', opts.loglevel, priority='cmdline')
93

94 7
        if opts.nolog:
95 0
            self.settings.set('LOG_ENABLED', False, priority='cmdline')
96

97 7
        if opts.pidfile:
98 0
            with open(opts.pidfile, "w") as f:
99 0
                f.write(str(os.getpid()) + os.linesep)
100

101 7
        if opts.pdb:
102 0
            failure.startDebugMode()
103

104 7
    def run(self, args, opts):
105
        """
106
        Entry point for running commands
107
        """
108 0
        raise NotImplementedError
109

110

111 7
class BaseRunSpiderCommand(ScrapyCommand):
112
    """
113
    Common class used to share functionality between the crawl, parse and runspider commands
114
    """
115 7
    def add_options(self, parser):
116 7
        ScrapyCommand.add_options(self, parser)
117 7
        parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
118
                          help="set spider argument (may be repeated)")
119 7
        parser.add_option("-o", "--output", metavar="FILE", action="append",
120
                          help="append scraped items to the end of FILE (use - for stdout)")
121 7
        parser.add_option("-O", "--overwrite-output", metavar="FILE", action="append",
122
                          help="dump scraped items into FILE, overwriting any existing file")
123 7
        parser.add_option("-t", "--output-format", metavar="FORMAT",
124
                          help="format to use for dumping items")
125

126 7
    def process_options(self, args, opts):
127 7
        ScrapyCommand.process_options(self, args, opts)
128 7
        try:
129 7
            opts.spargs = arglist_to_dict(opts.spargs)
130 0
        except ValueError:
131 0
            raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
132 7
        if opts.output or opts.overwrite_output:
133 7
            feeds = feed_process_params_from_cli(
134
                self.settings,
135
                opts.output,
136
                opts.output_format,
137
                opts.overwrite_output,
138
            )
139 7
            self.settings.set('FEEDS', feeds, priority='cmdline')

Read our documentation on viewing source code .

Loading