scrapy / scrapy
1
"""
2
Item Loader
3

4
See documentation in docs/topics/loaders.rst
5
"""
6 7
import itemloaders
7

8 7
from scrapy.item import Item
9 7
from scrapy.selector import Selector
10

11

12 7
class ItemLoader(itemloaders.ItemLoader):
13
    """
14
    A user-friendly abstraction to populate an :ref:`item <topics-items>` with data
15
    by applying :ref:`field processors <topics-loaders-processors>` to scraped data.
16
    When instantiated with a ``selector`` or a ``response`` it supports
17
    data extraction from web pages using :ref:`selectors <topics-selectors>`.
18

19
    :param item: The item instance to populate using subsequent calls to
20
        :meth:`~ItemLoader.add_xpath`, :meth:`~ItemLoader.add_css`,
21
        or :meth:`~ItemLoader.add_value`.
22
    :type item: scrapy.item.Item
23

24
    :param selector: The selector to extract data from, when using the
25
        :meth:`add_xpath`, :meth:`add_css`, :meth:`replace_xpath`, or
26
        :meth:`replace_css` method.
27
    :type selector: :class:`~scrapy.selector.Selector` object
28

29
    :param response: The response used to construct the selector using the
30
        :attr:`default_selector_class`, unless the selector argument is given,
31
        in which case this argument is ignored.
32
    :type response: :class:`~scrapy.http.Response` object
33

34
    If no item is given, one is instantiated automatically using the class in
35
    :attr:`default_item_class`.
36

37
    The item, selector, response and remaining keyword arguments are
38
    assigned to the Loader context (accessible through the :attr:`context` attribute).
39

40
    .. attribute:: item
41

42
        The item object being parsed by this Item Loader.
43
        This is mostly used as a property so, when attempting to override this
44
        value, you may want to check out :attr:`default_item_class` first.
45

46
    .. attribute:: context
47

48
        The currently active :ref:`Context <loaders-context>` of this Item Loader.
49

50
    .. attribute:: default_item_class
51

52
        An :ref:`item <topics-items>` class (or factory), used to instantiate
53
        items when not given in the ``__init__`` method.
54

55
    .. attribute:: default_input_processor
56

57
        The default input processor to use for those fields which don't specify
58
        one.
59

60
    .. attribute:: default_output_processor
61

62
        The default output processor to use for those fields which don't specify
63
        one.
64

65
    .. attribute:: default_selector_class
66

67
        The class used to construct the :attr:`selector` of this
68
        :class:`ItemLoader`, if only a response is given in the ``__init__`` method.
69
        If a selector is given in the ``__init__`` method this attribute is ignored.
70
        This attribute is sometimes overridden in subclasses.
71

72
    .. attribute:: selector
73

74
        The :class:`~scrapy.selector.Selector` object to extract data from.
75
        It's either the selector given in the ``__init__`` method or one created from
76
        the response given in the ``__init__`` method using the
77
        :attr:`default_selector_class`. This attribute is meant to be
78
        read-only.
79
    """
80

81 7
    default_item_class = Item
82 7
    default_selector_class = Selector
83

84 7
    def __init__(self, item=None, selector=None, response=None, parent=None, **context):
85 7
        if selector is None and response is not None:
86 7
            selector = self.default_selector_class(response)
87 7
        context.update(response=response)
88 7
        super().__init__(item=item, selector=selector, parent=parent, **context)

Read our documentation on viewing source code .

Loading