From c8ff9bbc1566ad278a9558773c9ddc0120a3395d Mon Sep 17 00:00:00 2001 From: Aidan Mischke Date: Mon, 17 Mar 2025 10:27:22 +1000 Subject: [PATCH 1/2] Fix UnboundLocalError: cannot access local variable 'group_urls' where it is not associated with a value. --- web_scraper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web_scraper.py b/web_scraper.py index 40fa4bf..8fd6ce1 100644 --- a/web_scraper.py +++ b/web_scraper.py @@ -107,10 +107,10 @@ def main(): domain_name = get_domain_name(args.url) - for group, group_urls in grouped_urls.items(): + for group, urls_in_group in grouped_urls.items(): output_filename = os.path.join(os.getcwd(), f"{domain_name}_{group}_content.txt") with open(output_filename, 'w', encoding='utf-8') as output_file: - for url in group_urls: + for url in urls_in_group: print(f"\nScraping: {url}") output_file.write(f"\n\n--- Content from: {url} ---\n\n") extract_text_from_url(url, output_file) From 0e94232756b5f544c43b2aa3449f0c7174b04303 Mon Sep 17 00:00:00 2001 From: Aidan Mischke Date: Mon, 17 Mar 2025 10:28:02 +1000 Subject: [PATCH 2/2] Fix NameError: name 'defaultdict' is not defined. --- web_scraper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/web_scraper.py b/web_scraper.py index 8fd6ce1..53f7faf 100644 --- a/web_scraper.py +++ b/web_scraper.py @@ -5,6 +5,7 @@ import os from urllib.parse import urlparse, urljoin import string +from collections import defaultdict def get_domain_name(url): parsed_url = urlparse(url)