File tree Expand file tree Collapse file tree 2 files changed +10
-2
lines changed Expand file tree Collapse file tree 2 files changed +10
-2
lines changed Original file line number Diff line number Diff line change @@ -173,7 +173,7 @@ def should_download(self, url_split):
173173 url = url_split .geturl ()
174174
175175 for exclude_url in self .excluded_urls :
176- if re .search (exclude_url , url ):
176+ if exclude_url .search (url ):
177177 return False
178178
179179 for ignored_prefix in self .ignored_prefixes :
@@ -215,7 +215,8 @@ def _parse_config(self):
215215 self .ignored_prefixes = self .options .ignored_prefixes .split (',' )
216216
217217 if self .options .excluded_urls :
218- self .excluded_urls = self .options .excluded_urls .split (',' )
218+ self .excluded_urls = [re .compile (pattern ) for pattern in self .options .excluded_urls .split (',' )]
219+
219220
220221 if self .options .workers :
221222 self .worker_size = self .options .workers
Original file line number Diff line number Diff line change @@ -331,6 +331,13 @@ def test_run_once(self):
331331 self .assertEqual (8 , len (site .pages ))
332332 self .assertEqual (0 , len (site .error_pages ))
333333
334+ def test_exclude (self ):
335+ site = self ._run_crawler_plain (ThreadSiteCrawler , ["--exclude=/sub/" ])
336+
337+ # exclude /sub/ directory = 4 pages linked on the index
338+ self .assertEqual (4 , len (site .pages ))
339+ self .assertEqual (0 , len (site .error_pages ))
340+
334341 def test_depth_0 (self ):
335342 site = self ._run_crawler_plain (
336343 ThreadSiteCrawler , ["--depth" , "0" ], "/depth/root.html" )
You can’t perform that action at this time.
0 commit comments