Skip to content

Commit 8d2ac7c

Browse files
authored
security fix: use requests for downloading data packages (#34)
1 parent c8226e3 commit 8d2ac7c

File tree

2 files changed

+10
-12
lines changed

2 files changed

+10
-12
lines changed

src/tasks.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
from celery import Celery, current_task
55

6-
import subprocess
76
import traceback
87

98
import pandas as pd
@@ -30,6 +29,7 @@
3029

3130
import os
3231
from collections import defaultdict
32+
import requests
3333

3434
from db import update_progress, update_step, fetch_package_status, SavedPackageData, Session, PackageProcessStatus
3535
from util import (
@@ -74,19 +74,17 @@ def download_file(package_status_id, package_id, link, session):
7474

7575
if not check_whitelisted_link(link):
7676
print('checking content type')
77-
command = f"curl -L -I {link}"
78-
process = subprocess.run(command, shell=True, capture_output=True, text=True)
79-
80-
if "application/octet-stream" not in process.stdout or "HTTP/2 400" in process.stdout:
81-
print('The link does not point to a zip file.')
82-
raise Exception('EXPIRED_LINK')
77+
r = requests.head(link, allow_redirects=True)
78+
if r.status_code != 200 or 'content-type' not in r.headers or 'application/octet-stream' not in r.headers['content-type']:
79+
print('The link does not point to a valid file.')
80+
raise Exception('INVALID_LINK')
8381

8482
print('downloading')
8583
update_step(package_status_id, package_id, 'DOWNLOADING', session)
86-
command = f"curl -L -o {path} {link}"
87-
88-
process = subprocess.Popen(command, shell=True)
89-
process.wait()
84+
r = requests.get(link, allow_redirects=True, stream=True)
85+
with open(path, 'wb') as f:
86+
for chunk in r.iter_content(chunk_size=10*1024):
87+
f.write(chunk)
9088

9189
return path
9290

src/util.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import jwt
1010
import requests
1111

12-
discord_link_regex = r'https:\/\/click\.discord\.com\/ls\/click\?upn=([A-Za-z0-9-_]{500,})'
12+
discord_link_regex = r'^https:\/\/click\.discord\.com\/ls\/click\?upn=([A-Za-z0-9-_]{500,})$'
1313
dl_whitelisted_domains_raw = os.getenv('DL_ZIP_WHITELISTED_DOMAINS')
1414
dl_whitelisted_domains = dl_whitelisted_domains_raw and dl_whitelisted_domains_raw.split(',') or []
1515

0 commit comments

Comments
 (0)