Skip to content

Commit 0617d7c

Browse files
yichi-yanggmargaritis
authored andcommitted
Add support to resume incomplete download
1 parent 102d818 commit 0617d7c

File tree

9 files changed

+413
-48
lines changed

9 files changed

+413
-48
lines changed

news/11180.feature.rst

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add support to resume incomplete download. The behavior can be controlled using flags ``--incomplete-downloads`` and ``--incomplete-download-retries``.

src/pip/_internal/cli/cmdoptions.py

+21
Original file line numberDiff line numberDiff line change
@@ -1028,6 +1028,25 @@ def check_list_path_option(options: Values) -> None:
10281028
help=("Enable deprecated functionality, that will be removed in the future."),
10291029
)
10301030

1031+
incomplete_downloads: Callable[..., Option] = partial(
1032+
Option,
1033+
"--incomplete-downloads",
1034+
dest="resume_incomplete",
1035+
choices=["resume", "discard"],
1036+
default="discard",
1037+
metavar="policy",
1038+
help="How to handle an incomplete download: resume, discard (default to %default).",
1039+
)
1040+
1041+
incomplete_download_retries: Callable[..., Option] = partial(
1042+
Option,
1043+
"--incomplete-download-retries",
1044+
dest="resume_attempts",
1045+
type="int",
1046+
default=5,
1047+
help="Maximum number of resumption retries for incomplete download "
1048+
"(default %default times).",
1049+
)
10311050

10321051
##########
10331052
# groups #
@@ -1061,6 +1080,8 @@ def check_list_path_option(options: Values) -> None:
10611080
no_python_version_warning,
10621081
use_new_feature,
10631082
use_deprecated_feature,
1083+
incomplete_downloads,
1084+
incomplete_download_retries,
10641085
],
10651086
}
10661087

src/pip/_internal/cli/progress_bars.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def _rich_progress_bar(
2626
*,
2727
bar_type: str,
2828
size: int,
29+
initial_progress: Optional[int] = None,
2930
) -> Generator[bytes, None, None]:
3031
assert bar_type == "on", "This should only be used in the default mode."
3132

@@ -51,6 +52,8 @@ def _rich_progress_bar(
5152

5253
progress = Progress(*columns, refresh_per_second=5)
5354
task_id = progress.add_task(" " * (get_indentation() + 2), total=total)
55+
if initial_progress is not None:
56+
progress.update(task_id, advance=initial_progress)
5457
with progress:
5558
for chunk in iterable:
5659
yield chunk
@@ -80,15 +83,15 @@ def write_progress(current: int, total: int) -> None:
8083

8184

8285
def get_download_progress_renderer(
83-
*, bar_type: str, size: Optional[int] = None
86+
*, bar_type: str, size: Optional[int] = None, initial_progress: Optional[int] = None
8487
) -> DownloadProgressRenderer:
8588
"""Get an object that can be used to render the download progress.
8689
8790
Returns a callable, that takes an iterable to "wrap".
8891
"""
8992
if bar_type == "on":
90-
return functools.partial(_rich_progress_bar, bar_type=bar_type, size=size)
93+
return functools.partial(_rich_progress_bar, bar_type=bar_type, size=size, initial_progress=initial_progress,)
9194
elif bar_type == "raw":
92-
return functools.partial(_raw_progress_bar, size=size)
95+
return functools.partial(_raw_progress_bar, size=size, initial_progress=initial_progress,)
9396
else:
9497
return iter # no-op, when passed an iterator

src/pip/_internal/cli/req_command.py

+4
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ def make_requirement_preparer(
127127
"fast-deps has no effect when used with the legacy resolver."
128128
)
129129

130+
resume_incomplete = options.resume_incomplete == "resume"
131+
130132
return RequirementPreparer(
131133
build_dir=temp_build_dir_path,
132134
src_dir=options.src_dir,
@@ -142,6 +144,8 @@ def make_requirement_preparer(
142144
lazy_wheel=lazy_wheel,
143145
verbosity=verbosity,
144146
legacy_resolver=legacy_resolver,
147+
resume_incomplete=resume_incomplete,
148+
resume_attempts=options.resume_attempts,
145149
)
146150

147151
@classmethod

src/pip/_internal/network/download.py

+120-37
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import logging
66
import mimetypes
77
import os
8+
from http import HTTPStatus
89
from typing import Iterable, Optional, Tuple
910

1011
from pip._vendor.requests.models import Response
@@ -28,13 +29,21 @@ def _get_http_response_size(resp: Response) -> Optional[int]:
2829
return None
2930

3031

32+
def _get_http_response_etag_or_date(resp: Response) -> Optional[str]:
33+
"""
34+
Return either the ETag or Date header (or None if neither exists).
35+
The return value can be used in an If-Range header.
36+
"""
37+
return resp.headers.get("etag", resp.headers.get("date"))
38+
39+
3140
def _prepare_download(
3241
resp: Response,
3342
link: Link,
3443
progress_bar: str,
44+
total_length: Optional[int],
45+
range_start: Optional[int] = None,
3546
) -> Iterable[bytes]:
36-
total_length = _get_http_response_size(resp)
37-
3847
if link.netloc == PyPI.file_storage_domain:
3948
url = link.show_url
4049
else:
@@ -43,10 +52,17 @@ def _prepare_download(
4352
logged_url = redact_auth_from_url(url)
4453

4554
if total_length:
46-
logged_url = f"{logged_url} ({format_size(total_length)})"
55+
if range_start is not None:
56+
logged_url = "{} ({}/{})".format(
57+
logged_url, format_size(range_start), format_size(total_length)
58+
)
59+
else:
60+
logged_url = "{} ({})".format(logged_url, format_size(total_length))
4761

4862
if is_from_cache(resp):
4963
logger.info("Using cached %s", logged_url)
64+
elif range_start is not None:
65+
logger.info("Resume download %s", logged_url)
5066
else:
5167
logger.info("Downloading %s", logged_url)
5268

@@ -66,7 +82,9 @@ def _prepare_download(
6682
if not show_progress:
6783
return chunks
6884

69-
renderer = get_download_progress_renderer(bar_type=progress_bar, size=total_length)
85+
renderer = get_download_progress_renderer(
86+
bar_type=progress_bar, size=total_length, initial_progress=range_start
87+
)
7088
return renderer(chunks)
7189

7290

@@ -113,10 +131,27 @@ def _get_http_response_filename(resp: Response, link: Link) -> str:
113131
return filename
114132

115133

116-
def _http_get_download(session: PipSession, link: Link) -> Response:
134+
def _http_get_download(
135+
session: PipSession,
136+
link: Link,
137+
range_start: Optional[int] = None,
138+
if_range: Optional[str] = None,
139+
) -> Response:
117140
target_url = link.url.split("#", 1)[0]
118-
resp = session.get(target_url, headers=HEADERS, stream=True)
119-
raise_for_status(resp)
141+
headers = {**HEADERS}
142+
# request a partial download
143+
if range_start is not None:
144+
headers["Range"] = "bytes={}-".format(range_start)
145+
# make sure the file hasn't changed
146+
if if_range is not None:
147+
headers["If-Range"] = if_range
148+
try:
149+
resp = session.get(target_url, headers=headers, stream=True)
150+
raise_for_status(resp)
151+
except NetworkConnectionError as e:
152+
assert e.response is not None
153+
logger.critical("HTTP error %s while getting %s", e.response.status_code, link)
154+
raise
120155
return resp
121156

122157

@@ -125,28 +160,91 @@ def __init__(
125160
self,
126161
session: PipSession,
127162
progress_bar: str,
163+
resume_incomplete: bool,
164+
resume_attempts: int,
128165
) -> None:
129166
self._session = session
130167
self._progress_bar = progress_bar
168+
self._resume_incomplete = resume_incomplete
169+
assert (
170+
resume_attempts > 0
171+
), "Number of max incomplete download retries must be positive"
172+
self._resume_attempts = resume_attempts
131173

132174
def __call__(self, link: Link, location: str) -> Tuple[str, str]:
133175
"""Download the file given by link into location."""
134-
try:
135-
resp = _http_get_download(self._session, link)
136-
except NetworkConnectionError as e:
137-
assert e.response is not None
138-
logger.critical(
139-
"HTTP error %s while getting %s", e.response.status_code, link
140-
)
141-
raise
176+
resp = _http_get_download(self._session, link)
177+
total_length = _get_http_response_size(resp)
178+
etag_or_date = _get_http_response_etag_or_date(resp)
142179

143180
filename = _get_http_response_filename(resp, link)
144181
filepath = os.path.join(location, filename)
145182

146-
chunks = _prepare_download(resp, link, self._progress_bar)
183+
chunks = _prepare_download(resp, link, self._progress_bar, total_length)
184+
bytes_received = 0
185+
147186
with open(filepath, "wb") as content_file:
187+
188+
# Process the initial response
148189
for chunk in chunks:
190+
bytes_received += len(chunk)
149191
content_file.write(chunk)
192+
193+
if self._resume_incomplete:
194+
attempts_left = self._resume_attempts
195+
196+
while total_length is not None and bytes_received < total_length:
197+
if attempts_left <= 0:
198+
break
199+
attempts_left -= 1
200+
201+
# Attempt to resume download
202+
resume_resp = _http_get_download(
203+
self._session,
204+
link,
205+
range_start=bytes_received,
206+
if_range=etag_or_date,
207+
)
208+
209+
restart = resume_resp.status_code != HTTPStatus.PARTIAL_CONTENT
210+
# If the server responded with 200 (e.g. when the file has been
211+
# modifiedon the server or the server doesn't support range
212+
# requests), reset the download to start from the beginning.
213+
if restart:
214+
content_file.seek(0)
215+
content_file.truncate()
216+
bytes_received = 0
217+
total_length = _get_http_response_size(resume_resp)
218+
etag_or_date = _get_http_response_etag_or_date(resume_resp)
219+
220+
chunks = _prepare_download(
221+
resume_resp,
222+
link,
223+
self._progress_bar,
224+
total_length,
225+
range_start=bytes_received,
226+
)
227+
for chunk in chunks:
228+
bytes_received += len(chunk)
229+
content_file.write(chunk)
230+
231+
if total_length is not None and bytes_received < total_length:
232+
if self._resume_incomplete:
233+
logger.critical(
234+
"Failed to download %s after %d resumption attempts.",
235+
link,
236+
self._resume_attempts,
237+
)
238+
else:
239+
logger.critical(
240+
"Failed to download %s."
241+
" Set --incomplete-downloads=resume to automatically"
242+
"resume incomplete download.",
243+
link,
244+
)
245+
os.remove(filepath)
246+
raise RuntimeError("Incomplete download")
247+
150248
content_type = resp.headers.get("Content-Type", "")
151249
return filepath, content_type
152250

@@ -156,32 +254,17 @@ def __init__(
156254
self,
157255
session: PipSession,
158256
progress_bar: str,
257+
resume_incomplete: bool,
258+
resume_attempts: int,
159259
) -> None:
160-
self._session = session
161-
self._progress_bar = progress_bar
260+
self._downloader = Downloader(
261+
session, progress_bar, resume_incomplete, resume_attempts
262+
)
162263

163264
def __call__(
164265
self, links: Iterable[Link], location: str
165266
) -> Iterable[Tuple[Link, Tuple[str, str]]]:
166267
"""Download the files given by links into location."""
167268
for link in links:
168-
try:
169-
resp = _http_get_download(self._session, link)
170-
except NetworkConnectionError as e:
171-
assert e.response is not None
172-
logger.critical(
173-
"HTTP error %s while getting %s",
174-
e.response.status_code,
175-
link,
176-
)
177-
raise
178-
179-
filename = _get_http_response_filename(resp, link)
180-
filepath = os.path.join(location, filename)
181-
182-
chunks = _prepare_download(resp, link, self._progress_bar)
183-
with open(filepath, "wb") as content_file:
184-
for chunk in chunks:
185-
content_file.write(chunk)
186-
content_type = resp.headers.get("Content-Type", "")
269+
filepath, content_type = self._downloader(link, location)
187270
yield link, (filepath, content_type)

src/pip/_internal/operations/prepare.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -231,15 +231,21 @@ def __init__(
231231
lazy_wheel: bool,
232232
verbosity: int,
233233
legacy_resolver: bool,
234+
resume_incomplete: bool,
235+
resume_attempts: int,
234236
) -> None:
235237
super().__init__()
236238

237239
self.src_dir = src_dir
238240
self.build_dir = build_dir
239241
self.build_tracker = build_tracker
240242
self._session = session
241-
self._download = Downloader(session, progress_bar)
242-
self._batch_download = BatchDownloader(session, progress_bar)
243+
self._download = Downloader(
244+
session, progress_bar, resume_incomplete, resume_attempts
245+
)
246+
self._batch_download = BatchDownloader(
247+
session, progress_bar, resume_incomplete, resume_attempts
248+
)
243249
self.finder = finder
244250

245251
# Where still-packed archives should be written to. If None, they are

0 commit comments

Comments
 (0)