From f032fc31e0ee6f2e2727a22c2891ef2e181a3e22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=84=9D=EC=A7=80=EC=98=81/=EC=B1=85=EC=9E=84=EC=97=B0?= =?UTF-8?q?=EA=B5=AC=EC=9B=90/SW=EA=B3=B5=ED=95=99=28=EC=97=B0=29Open=20So?= =?UTF-8?q?urce=20TP?= Date: Fri, 20 Mar 2026 16:39:39 +0900 Subject: [PATCH] fix(download): check downlodable link with get header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 석지영/책임연구원/SW공학(연)Open Source TP --- src/fosslight_util/download.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/fosslight_util/download.py b/src/fosslight_util/download.py index 43202de..bafa71e 100755 --- a/src/fosslight_util/download.py +++ b/src/fosslight_util/download.py @@ -60,15 +60,14 @@ def alarm_handler(signum, frame): def is_downloadable(url): try: - h = requests.head(url, allow_redirects=True) - header = h.headers - content_type = header.get('content-type') - if 'text/html' in content_type.lower(): - return False - content_disposition = header.get('content-disposition') - if content_disposition and 'attachment' in content_disposition.lower(): + with requests.get(url, stream=True, allow_redirects=True, timeout=10) as r: + if r.status_code >= 400: + return False + content_type = r.headers.get('content-type', '').lower() + if 'text/html' in content_type: + logger.warning(f"Content-Type is text/html, not a downloadable link: {url}") + return False return True - return True except Exception as e: logger.warning(f"is_downloadable - failed: {e}") return False