-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsafe_download.py
96 lines (77 loc) · 2.71 KB
/
safe_download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import os
import tempfile as tmp
from contextlib import contextmanager
import requests
from tqdm import tqdm
if not hasattr(os, 'replace'):
os.replace = os.rename
def get_valid_filename(s: str):
return ''.join(x if (x.isalnum() or x in '._- ()') else '_' for x in s)
# temp file funcs taken from https://stackoverflow.com/a/29491523/12946163
@contextmanager
def tempfile(suffix='', dir=None) -> str:
""" Context for temporary file.
Will find a free temporary filename upon entering
and will try to delete the file on leaving, even in case of an exception.
Parameters
----------
suffix : string
optional file suffix
dir : string
optional directory to save temporary file in
"""
tf = tmp.NamedTemporaryFile(delete=False, suffix=suffix, dir=dir)
tf.close()
try:
yield tf.name
finally:
try:
os.remove(tf.name)
except OSError as e:
if e.errno == 2:
pass
else:
raise
@contextmanager
def open_atomic(filepath, *args, **kwargs):
""" Open temporary file object that atomically moves to destination upon
exiting.
Allows reading and writing to and from the same filename.
The file will not be moved to destination in case of an exception.
Parameters
----------
filepath : string
the file path to be opened
fsync : bool
whether to force write the file to disk
*args : mixed
Any valid arguments for :code:`open`
**kwargs : mixed
Any valid keyword arguments for :code:`open`
"""
fsync = kwargs.get('fsync', True)
with tempfile(dir=os.path.dirname(os.path.abspath(filepath))) as tmppath:
with open(tmppath, *args, **kwargs) as file:
try:
yield file
finally:
if fsync:
file.flush()
os.fsync(file.fileno())
os.rename(tmppath, filepath)
def safe_download_url(url: str, path: str):
r = safe_request(url, stream=True)
r.raw.decode_content = True
with open_atomic(path, 'wb') as f, tqdm(desc=path,
total=int(r.headers.get('content-length', 0)),
unit='B',
unit_scale=True,
unit_divisor=1024) as bar:
for data in r.iter_content(chunk_size=1024):
bar.update(f.write(data))
def safe_request(*args, **kwargs):
"""This function handles the timeouts by simply retrying."""
try:
return requests.get(*args, **kwargs, timeout=10)
except requests.Timeout:
return safe_request(*args, **kwargs)