scrapy / scrapy
1 7
import traceback
2 7
import warnings
3 7
from collections import defaultdict
4

5 7
from zope.interface import implementer
6

7 7
from scrapy.interfaces import ISpiderLoader
8 7
from scrapy.utils.misc import walk_modules
9 7
from scrapy.utils.spider import iter_spider_classes
10

11

12 7
@implementer(ISpiderLoader)
13 4
class SpiderLoader:
14
    """
15
    SpiderLoader is a class which locates and loads spiders
16
    in a Scrapy project.
17
    """
18

19 7
    def __init__(self, settings):
20 7
        self.spider_modules = settings.getlist('SPIDER_MODULES')
21 7
        self.warn_only = settings.getbool('SPIDER_LOADER_WARN_ONLY')
22 7
        self._spiders = {}
23 7
        self._found = defaultdict(list)
24 7
        self._load_all_spiders()
25

26 7
    def _check_name_duplicates(self):
27 7
        dupes = []
28 7
        for name, locations in self._found.items():
29 7
            dupes.extend([
30
                f"  {cls} named {name!r} (in {mod})"
31
                for mod, cls in locations
32
                if len(locations) > 1
33
            ])
34

35 7
        if dupes:
36 7
            dupes_string = "\n\n".join(dupes)
37 7
            warnings.warn(
38
                "There are several spiders with the same name:\n\n"
39
                f"{dupes_string}\n\n  This can cause unexpected behavior.",
40
                category=UserWarning,
41
            )
42

43 7
    def _load_spiders(self, module):
44 7
        for spcls in iter_spider_classes(module):
45 7
            self._found[spcls.name].append((module.__name__, spcls.__name__))
46 7
            self._spiders[spcls.name] = spcls
47

48 7
    def _load_all_spiders(self):
49 7
        for name in self.spider_modules:
50 7
            try:
51 7
                for module in walk_modules(name):
52 7
                    self._load_spiders(module)
53 7
            except ImportError:
54 7
                if self.warn_only:
55 7
                    warnings.warn(
56
                        f"\n{traceback.format_exc()}Could not load spiders "
57
                        f"from module '{name}'. "
58
                        "See above traceback for details.",
59
                        category=RuntimeWarning,
60
                    )
61
                else:
62 7
                    raise
63 7
        self._check_name_duplicates()
64

65 7
    @classmethod
66 4
    def from_settings(cls, settings):
67 7
        return cls(settings)
68

69 7
    def load(self, spider_name):
70
        """
71
        Return the Spider class for the given spider name. If the spider
72
        name is not found, raise a KeyError.
73
        """
74 7
        try:
75 7
            return self._spiders[spider_name]
76 7
        except KeyError:
77 7
            raise KeyError(f"Spider not found: {spider_name}")
78

79 7
    def find_by_request(self, request):
80
        """
81
        Return the list of spider names that can handle the given request.
82
        """
83 7
        return [
84
            name for name, cls in self._spiders.items()
85
            if cls.handles_request(request)
86
        ]
87

88 7
    def list(self):
89
        """
90
        Return a list with the names of all spiders available in the project.
91
        """
92 7
        return list(self._spiders.keys())

Read our documentation on viewing source code .

Loading