-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcrawl.py
More file actions
27 lines (23 loc) · 764 Bytes
/
crawl.py
File metadata and controls
27 lines (23 loc) · 764 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from WebScraper.Sitemap import Sitemap
from WebScraper.TQueue import TaskQueue
from WebScraper.ChromeBrowser import ChromeBrowser
from WebScraper.Scraper import Scraper
import json
def main():
browser = None
try:
with open("Sitemaps/wegsman.json", encoding="utf-8") as f:
json_sitemap = json.load(f)
sitemap = Sitemap(json_sitemap.get("_id"), json_sitemap.get("startUrl"), json_sitemap.get("selectors"))
queue = TaskQueue()
browser = ChromeBrowser(list(), {})
scraper = Scraper(queue, sitemap, browser)
data = scraper.run()
print(data)
browser.quit()
except Exception as e:
if browser:
browser.quit()
raise
if __name__ == '__main__':
main()