5
5
import logging
6
6
import mimetypes
7
7
import os
8
+ from http import HTTPStatus
8
9
from typing import Iterable , Optional , Tuple
9
10
10
11
from pip ._vendor .requests .models import Response
@@ -28,13 +29,21 @@ def _get_http_response_size(resp: Response) -> Optional[int]:
28
29
return None
29
30
30
31
32
+ def _get_http_response_etag_or_date (resp : Response ) -> Optional [str ]:
33
+ """
34
+ Return either the ETag or Date header (or None if neither exists).
35
+ The return value can be used in an If-Range header.
36
+ """
37
+ return resp .headers .get ("etag" , resp .headers .get ("date" ))
38
+
39
+
31
40
def _prepare_download (
32
41
resp : Response ,
33
42
link : Link ,
34
43
progress_bar : str ,
44
+ total_length : Optional [int ],
45
+ range_start : Optional [int ] = None ,
35
46
) -> Iterable [bytes ]:
36
- total_length = _get_http_response_size (resp )
37
-
38
47
if link .netloc == PyPI .file_storage_domain :
39
48
url = link .show_url
40
49
else :
@@ -43,10 +52,17 @@ def _prepare_download(
43
52
logged_url = redact_auth_from_url (url )
44
53
45
54
if total_length :
46
- logged_url = f"{ logged_url } ({ format_size (total_length )} )"
55
+ if range_start is not None :
56
+ logged_url = "{} ({}/{})" .format (
57
+ logged_url , format_size (range_start ), format_size (total_length )
58
+ )
59
+ else :
60
+ logged_url = "{} ({})" .format (logged_url , format_size (total_length ))
47
61
48
62
if is_from_cache (resp ):
49
63
logger .info ("Using cached %s" , logged_url )
64
+ elif range_start is not None :
65
+ logger .info ("Resume download %s" , logged_url )
50
66
else :
51
67
logger .info ("Downloading %s" , logged_url )
52
68
@@ -66,7 +82,9 @@ def _prepare_download(
66
82
if not show_progress :
67
83
return chunks
68
84
69
- renderer = get_download_progress_renderer (bar_type = progress_bar , size = total_length )
85
+ renderer = get_download_progress_renderer (
86
+ bar_type = progress_bar , size = total_length , initial_progress = range_start
87
+ )
70
88
return renderer (chunks )
71
89
72
90
@@ -113,10 +131,27 @@ def _get_http_response_filename(resp: Response, link: Link) -> str:
113
131
return filename
114
132
115
133
116
- def _http_get_download (session : PipSession , link : Link ) -> Response :
134
+ def _http_get_download (
135
+ session : PipSession ,
136
+ link : Link ,
137
+ range_start : Optional [int ] = None ,
138
+ if_range : Optional [str ] = None ,
139
+ ) -> Response :
117
140
target_url = link .url .split ("#" , 1 )[0 ]
118
- resp = session .get (target_url , headers = HEADERS , stream = True )
119
- raise_for_status (resp )
141
+ headers = {** HEADERS }
142
+ # request a partial download
143
+ if range_start is not None :
144
+ headers ["Range" ] = "bytes={}-" .format (range_start )
145
+ # make sure the file hasn't changed
146
+ if if_range is not None :
147
+ headers ["If-Range" ] = if_range
148
+ try :
149
+ resp = session .get (target_url , headers = headers , stream = True )
150
+ raise_for_status (resp )
151
+ except NetworkConnectionError as e :
152
+ assert e .response is not None
153
+ logger .critical ("HTTP error %s while getting %s" , e .response .status_code , link )
154
+ raise
120
155
return resp
121
156
122
157
@@ -125,28 +160,91 @@ def __init__(
125
160
self ,
126
161
session : PipSession ,
127
162
progress_bar : str ,
163
+ resume_incomplete : bool ,
164
+ resume_attempts : int ,
128
165
) -> None :
129
166
self ._session = session
130
167
self ._progress_bar = progress_bar
168
+ self ._resume_incomplete = resume_incomplete
169
+ assert (
170
+ resume_attempts > 0
171
+ ), "Number of max incomplete download retries must be positive"
172
+ self ._resume_attempts = resume_attempts
131
173
132
174
def __call__ (self , link : Link , location : str ) -> Tuple [str , str ]:
133
175
"""Download the file given by link into location."""
134
- try :
135
- resp = _http_get_download (self ._session , link )
136
- except NetworkConnectionError as e :
137
- assert e .response is not None
138
- logger .critical (
139
- "HTTP error %s while getting %s" , e .response .status_code , link
140
- )
141
- raise
176
+ resp = _http_get_download (self ._session , link )
177
+ total_length = _get_http_response_size (resp )
178
+ etag_or_date = _get_http_response_etag_or_date (resp )
142
179
143
180
filename = _get_http_response_filename (resp , link )
144
181
filepath = os .path .join (location , filename )
145
182
146
- chunks = _prepare_download (resp , link , self ._progress_bar )
183
+ chunks = _prepare_download (resp , link , self ._progress_bar , total_length )
184
+ bytes_received = 0
185
+
147
186
with open (filepath , "wb" ) as content_file :
187
+
188
+ # Process the initial response
148
189
for chunk in chunks :
190
+ bytes_received += len (chunk )
149
191
content_file .write (chunk )
192
+
193
+ if self ._resume_incomplete :
194
+ attempts_left = self ._resume_attempts
195
+
196
+ while total_length is not None and bytes_received < total_length :
197
+ if attempts_left <= 0 :
198
+ break
199
+ attempts_left -= 1
200
+
201
+ # Attempt to resume download
202
+ resume_resp = _http_get_download (
203
+ self ._session ,
204
+ link ,
205
+ range_start = bytes_received ,
206
+ if_range = etag_or_date ,
207
+ )
208
+
209
+ restart = resume_resp .status_code != HTTPStatus .PARTIAL_CONTENT
210
+ # If the server responded with 200 (e.g. when the file has been
211
+ # modifiedon the server or the server doesn't support range
212
+ # requests), reset the download to start from the beginning.
213
+ if restart :
214
+ content_file .seek (0 )
215
+ content_file .truncate ()
216
+ bytes_received = 0
217
+ total_length = _get_http_response_size (resume_resp )
218
+ etag_or_date = _get_http_response_etag_or_date (resume_resp )
219
+
220
+ chunks = _prepare_download (
221
+ resume_resp ,
222
+ link ,
223
+ self ._progress_bar ,
224
+ total_length ,
225
+ range_start = bytes_received ,
226
+ )
227
+ for chunk in chunks :
228
+ bytes_received += len (chunk )
229
+ content_file .write (chunk )
230
+
231
+ if total_length is not None and bytes_received < total_length :
232
+ if self ._resume_incomplete :
233
+ logger .critical (
234
+ "Failed to download %s after %d resumption attempts." ,
235
+ link ,
236
+ self ._resume_attempts ,
237
+ )
238
+ else :
239
+ logger .critical (
240
+ "Failed to download %s."
241
+ " Set --incomplete-downloads=resume to automatically"
242
+ "resume incomplete download." ,
243
+ link ,
244
+ )
245
+ os .remove (filepath )
246
+ raise RuntimeError ("Incomplete download" )
247
+
150
248
content_type = resp .headers .get ("Content-Type" , "" )
151
249
return filepath , content_type
152
250
@@ -156,32 +254,17 @@ def __init__(
156
254
self ,
157
255
session : PipSession ,
158
256
progress_bar : str ,
257
+ resume_incomplete : bool ,
258
+ resume_attempts : int ,
159
259
) -> None :
160
- self ._session = session
161
- self ._progress_bar = progress_bar
260
+ self ._downloader = Downloader (
261
+ session , progress_bar , resume_incomplete , resume_attempts
262
+ )
162
263
163
264
def __call__ (
164
265
self , links : Iterable [Link ], location : str
165
266
) -> Iterable [Tuple [Link , Tuple [str , str ]]]:
166
267
"""Download the files given by links into location."""
167
268
for link in links :
168
- try :
169
- resp = _http_get_download (self ._session , link )
170
- except NetworkConnectionError as e :
171
- assert e .response is not None
172
- logger .critical (
173
- "HTTP error %s while getting %s" ,
174
- e .response .status_code ,
175
- link ,
176
- )
177
- raise
178
-
179
- filename = _get_http_response_filename (resp , link )
180
- filepath = os .path .join (location , filename )
181
-
182
- chunks = _prepare_download (resp , link , self ._progress_bar )
183
- with open (filepath , "wb" ) as content_file :
184
- for chunk in chunks :
185
- content_file .write (chunk )
186
- content_type = resp .headers .get ("Content-Type" , "" )
269
+ filepath , content_type = self ._downloader (link , location )
187
270
yield link , (filepath , content_type )
0 commit comments