Co-authored-by: alkazaz alkazaz@kth.se Co-authored-by: swill swill@kth.se Co-authored-by: lerjevik lerjevik@kth.se Co-authored-by: aljica aljica@kth.se
Showing 1 of 1 files from the diff.
scrapy/linkextractors/__init__.py
changed.
@@ -94,20 +94,31 @@
Loading
94 | 94 | def _link_allowed(self, link): |
|
95 | 95 | if not _is_valid_url(link.url): |
|
96 | 96 | return False |
|
97 | - | if self.allow_res and not _matches(link.url, self.allow_res): |
|
98 | - | return False |
|
99 | - | if self.deny_res and _matches(link.url, self.deny_res): |
|
97 | + | if not self._check_link_res(link, self.allow_res, self.deny_res): |
|
100 | 98 | return False |
|
101 | 99 | parsed_url = urlparse(link.url) |
|
102 | - | if self.allow_domains and not url_is_from_any_domain(parsed_url, self.allow_domains): |
|
103 | - | return False |
|
104 | - | if self.deny_domains and url_is_from_any_domain(parsed_url, self.deny_domains): |
|
100 | + | if not self._check_link_domains(parsed_url, self.allow_domains, self.deny_domains): |
|
105 | 101 | return False |
|
106 | 102 | if self.deny_extensions and url_has_any_extension(parsed_url, self.deny_extensions): |
|
107 | 103 | return False |
|
108 | 104 | if self.restrict_text and not _matches(link.text, self.restrict_text): |
|
109 | 105 | return False |
|
110 | 106 | return True |
|
107 | + | ||
108 | + | def _check_link_res(self, link, allow_res, deny_res): |
|
109 | + | if allow_res and not _matches(link.url, allow_res): |
|
110 | + | return False |
|
111 | + | if deny_res and _matches(link.url, deny_res): |
|
112 | + | return False |
|
113 | + | return True |
|
114 | + | ||
115 | + | def _check_link_domains(self, parsed_url, allow_domains, deny_domains): |
|
116 | + | if allow_domains and not url_is_from_any_domain(parsed_url, allow_domains): |
|
117 | + | return False |
|
118 | + | if deny_domains and url_is_from_any_domain(parsed_url, deny_domains): |
|
119 | + | return False |
|
120 | + | return True |
|
121 | + | ||
111 | 122 | ||
112 | 123 | def matches(self, url): |
|
113 | 124 |
Files | Coverage |
---|---|
scrapy | 88.03% |
Project Totals (158 files) | 88.03% |
592915698
592915698
592915698
592915698
592915698
592915698
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file.
The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files.
The size and color of each slice is representing the number of statements and the coverage, respectively.