|
3 | 3 |
|
4 | 4 | from celery import Celery, current_task
|
5 | 5 |
|
6 |
| -import subprocess |
7 | 6 | import traceback
|
8 | 7 |
|
9 | 8 | import pandas as pd
|
|
30 | 29 |
|
31 | 30 | import os
|
32 | 31 | from collections import defaultdict
|
| 32 | +import requests |
33 | 33 |
|
34 | 34 | from db import update_progress, update_step, fetch_package_status, SavedPackageData, Session, PackageProcessStatus
|
35 | 35 | from util import (
|
@@ -74,19 +74,17 @@ def download_file(package_status_id, package_id, link, session):
|
74 | 74 |
|
75 | 75 | if not check_whitelisted_link(link):
|
76 | 76 | print('checking content type')
|
77 |
| - command = f"curl -L -I {link}" |
78 |
| - process = subprocess.run(command, shell=True, capture_output=True, text=True) |
79 |
| - |
80 |
| - if "application/octet-stream" not in process.stdout or "HTTP/2 400" in process.stdout: |
81 |
| - print('The link does not point to a zip file.') |
82 |
| - raise Exception('EXPIRED_LINK') |
| 77 | + r = requests.head(link, allow_redirects=True) |
| 78 | + if r.status_code != 200 or 'content-type' not in r.headers or 'application/octet-stream' not in r.headers['content-type']: |
| 79 | + print('The link does not point to a valid file.') |
| 80 | + raise Exception('INVALID_LINK') |
83 | 81 |
|
84 | 82 | print('downloading')
|
85 | 83 | update_step(package_status_id, package_id, 'DOWNLOADING', session)
|
86 |
| - command = f"curl -L -o {path} {link}" |
87 |
| - |
88 |
| - process = subprocess.Popen(command, shell=True) |
89 |
| - process.wait() |
| 84 | + r = requests.get(link, allow_redirects=True, stream=True) |
| 85 | + with open(path, 'wb') as f: |
| 86 | + for chunk in r.iter_content(chunk_size=10*1024): |
| 87 | + f.write(chunk) |
90 | 88 |
|
91 | 89 | return path
|
92 | 90 |
|
|
0 commit comments