From 818594d977023c1443d3786998766b87da5a49c4 Mon Sep 17 00:00:00 2001 From: TTOAI <2ju1230@gamil.com> Date: Mon, 17 Nov 2025 06:38:07 +0900 Subject: [PATCH] =?UTF-8?q?S3=20url=20->=20CloudFront=20url=EB=A1=9C=20?= =?UTF-8?q?=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- playwright_news_crawler/daum_news_splitter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/playwright_news_crawler/daum_news_splitter.py b/playwright_news_crawler/daum_news_splitter.py index 1690c7d..f3f0363 100644 --- a/playwright_news_crawler/daum_news_splitter.py +++ b/playwright_news_crawler/daum_news_splitter.py @@ -13,6 +13,7 @@ class DaumNewsSplitter: def __init__(self): self.NEWS_URLS_JSONL = "news_urls.jsonl" self.NEWS_SPLITTED_JSONL = "news_splitted.jsonl" + self.CLOUDFRONT_DOMAIN_NAME = "d169623rfmadt6.cloudfront.net" self.REQ_TIMEOUT = 10 self.TIMEOUT = aiohttp.ClientTimeout(total=self.REQ_TIMEOUT) self.MAX_RETRIES = 2 @@ -96,7 +97,7 @@ async def download_and_upload_image(self, session, img_url: str, doc_id: str): print("S3 업로드 실패:", e) return None - return f"https://{s3_bucket}.s3.amazonaws.com/{s3_key}" + return f"https://{self.CLOUDFRONT_DOMAIN_NAME}/{s3_key}" async def parse_article_body(self, session, html: str): soup = BeautifulSoup(html, "lxml")