diff --git a/playwright_news_crawler/daum_news_splitter.py b/playwright_news_crawler/daum_news_splitter.py index 1690c7d..f3f0363 100644 --- a/playwright_news_crawler/daum_news_splitter.py +++ b/playwright_news_crawler/daum_news_splitter.py @@ -13,6 +13,7 @@ class DaumNewsSplitter: def __init__(self): self.NEWS_URLS_JSONL = "news_urls.jsonl" self.NEWS_SPLITTED_JSONL = "news_splitted.jsonl" + self.CLOUDFRONT_DOMAIN_NAME = "d169623rfmadt6.cloudfront.net" self.REQ_TIMEOUT = 10 self.TIMEOUT = aiohttp.ClientTimeout(total=self.REQ_TIMEOUT) self.MAX_RETRIES = 2 @@ -96,7 +97,7 @@ async def download_and_upload_image(self, session, img_url: str, doc_id: str): print("S3 업로드 실패:", e) return None - return f"https://{s3_bucket}.s3.amazonaws.com/{s3_key}" + return f"https://{self.CLOUDFRONT_DOMAIN_NAME}/{s3_key}" async def parse_article_body(self, session, html: str): soup = BeautifulSoup(html, "lxml")