Skip to content

Commit 9010df2

Browse files
authored
Merge pull request #493 from ddalcino/retry_checksums
[Security] Use SHA-256 checksums from trusted mirrors only
2 parents edad377 + b62db9e commit 9010df2

13 files changed

+284
-128
lines changed

aqt/archives.py

+37-40
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@
1919
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
2020
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
2121
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22-
22+
import binascii
2323
import posixpath
2424
import xml.etree.ElementTree as ElementTree
2525
from dataclasses import dataclass, field
2626
from logging import getLogger
2727
from typing import Dict, Iterable, List, Optional, Tuple
2828

29-
from aqt.exceptions import ArchiveDownloadError, ArchiveListError, NoPackageFound
30-
from aqt.helper import Settings, getUrl, ssplit
29+
from aqt.exceptions import ArchiveDownloadError, ArchiveListError, ChecksumDownloadFailure, NoPackageFound
30+
from aqt.helper import Settings, get_hash, getUrl, ssplit
3131
from aqt.metadata import QtRepoProperty, Version
3232

3333

@@ -45,10 +45,10 @@ def __post_init__(self):
4545
@dataclass
4646
class QtPackage:
4747
name: str
48-
archive_url: str
48+
base_url: str
49+
archive_path: str
4950
archive: str
5051
package_desc: str
51-
hashurl: str
5252
pkg_update_name: str
5353
version: Optional[Version] = field(default=None)
5454

@@ -59,9 +59,9 @@ def __repr__(self):
5959
def __str__(self):
6060
v_info = f", version={self.version}" if self.version else ""
6161
return (
62-
f"QtPackage(name={self.name}, url={self.archive_url}, "
62+
f"QtPackage(name={self.name}, url={self.archive_path}, "
6363
f"archive={self.archive}, desc={self.package_desc}"
64-
f"hashurl={self.hashurl}{v_info})"
64+
f"{v_info})"
6565
)
6666

6767

@@ -140,7 +140,7 @@ def __init__(
140140
self.os_name: str = os_name
141141
self.all_extra: bool = all_extra
142142
self.arch_list: List[str] = [item.get("arch") for item in Settings.qt_combinations]
143-
self.base: str = posixpath.join(base, "online/qtsdkrepository")
143+
self.base: str = base
144144
self.logger = getLogger("aqt.archives")
145145
self.archives: List[QtPackage] = []
146146
self.subarchives: Optional[Iterable[str]] = subarchives
@@ -217,22 +217,26 @@ def _target_packages(self) -> ModuleToPackage:
217217
def _get_archives(self):
218218
# Get packages index
219219

220-
# archive_path: windows_x86/desktop/qt5_59_src_doc_examples
221-
archive_path = posixpath.join(
220+
# os_target_folder: online/qtsdkrepository/windows_x86/desktop/qt5_59_src_doc_examples
221+
os_target_folder = posixpath.join(
222+
"online/qtsdkrepository",
222223
self.os_name + ("_x86" if self.os_name == "windows" else "_x64"),
223224
self.target,
224225
f"qt{self.version.major}_{self._version_str()}{self._arch_ext()}",
225226
)
226-
update_xml_url = posixpath.join(self.base, archive_path, "Updates.xml")
227-
archive_url = posixpath.join(self.base, archive_path)
228-
self._download_update_xml(update_xml_url)
229-
self._parse_update_xml(archive_url, self._target_packages())
227+
update_xml_path = posixpath.join(os_target_folder, "Updates.xml")
228+
self._download_update_xml(update_xml_path)
229+
self._parse_update_xml(os_target_folder, self._target_packages())
230230

231-
def _download_update_xml(self, update_xml_url):
231+
def _download_update_xml(self, update_xml_path):
232232
"""Hook for unit test."""
233-
self.update_xml_text = getUrl(update_xml_url, self.timeout)
233+
xml_hash = binascii.unhexlify(get_hash(update_xml_path, "sha256", self.timeout))
234+
if xml_hash == "":
235+
raise ChecksumDownloadFailure(f"Checksum for '{update_xml_path}' is empty")
236+
update_xml_text = getUrl(posixpath.join(self.base, update_xml_path), self.timeout, xml_hash)
237+
self.update_xml_text = update_xml_text
234238

235-
def _parse_update_xml(self, archive_url, target_packages: Optional[ModuleToPackage]):
239+
def _parse_update_xml(self, os_target_folder, target_packages: Optional[ModuleToPackage]):
236240
if not target_packages:
237241
target_packages = ModuleToPackage({})
238242
try:
@@ -270,22 +274,21 @@ def _parse_update_xml(self, archive_url, target_packages: Optional[ModuleToPacka
270274
archive_name = archive.split("-", maxsplit=1)[0]
271275
if should_filter_archives and archive_name not in self.subarchives:
272276
continue
273-
package_url = posixpath.join(
274-
# https://download.qt.io/online/qtsdkrepository/linux_x64/desktop/qt5_5150/
275-
archive_url,
277+
archive_path = posixpath.join(
278+
# online/qtsdkrepository/linux_x64/desktop/qt5_5150/
279+
os_target_folder,
276280
# qt.qt5.5150.gcc_64/
277281
pkg_name,
278282
# 5.15.0-0-202005140804qtbase-Linux-RHEL_7_6-GCC-Linux-RHEL_7_6-X86_64.7z
279283
full_version + archive,
280284
)
281-
hashurl = package_url + ".sha1"
282285
self.archives.append(
283286
QtPackage(
284287
name=archive_name,
285-
archive_url=package_url,
288+
base_url=self.base,
289+
archive_path=archive_path,
286290
archive=archive,
287291
package_desc=package_desc,
288-
hashurl=hashurl,
289292
pkg_update_name=pkg_name, # For testing purposes
290293
)
291294
)
@@ -431,25 +434,20 @@ def handle_missing_updates_xml(self, e: ArchiveDownloadError):
431434
raise ArchiveListError(msg, suggested_action=[help_msg]) from e
432435

433436
def _get_archives(self):
434-
_a = "_x64"
435-
if self.os_name == "windows":
436-
_a = "_x86"
437-
438-
archive_url = posixpath.join(
439-
# https://download.qt.io/online/qtsdkrepository/
440-
self.base,
437+
os_target_folder = posixpath.join(
438+
"online/qtsdkrepository",
441439
# linux_x64/
442-
self.os_name + _a,
440+
self.os_name + ("_x86" if self.os_name == "windows" else "_x64"),
443441
# desktop/
444442
self.target,
445443
# tools_ifw/
446444
self.tool_name,
447445
)
448-
update_xml_url = posixpath.join(archive_url, "Updates.xml")
446+
update_xml_url = posixpath.join(os_target_folder, "Updates.xml")
449447
self._download_update_xml(update_xml_url) # call super method.
450-
self._parse_update_xml(archive_url, None)
448+
self._parse_update_xml(os_target_folder, None)
451449

452-
def _parse_update_xml(self, archive_url, *ignored):
450+
def _parse_update_xml(self, os_target_folder, *ignored):
453451
try:
454452
self.update_xml = ElementTree.fromstring(self.update_xml_text)
455453
except ElementTree.ParseError as perror:
@@ -472,22 +470,21 @@ def _parse_update_xml(self, archive_url, *ignored):
472470
message = f"The package '{self.arch}' contains no downloadable archives!"
473471
raise NoPackageFound(message)
474472
for archive in ssplit(downloadable_archives):
475-
package_url = posixpath.join(
476-
# https://download.qt.io/online/qtsdkrepository/linux_x64/desktop/tools_ifw/
477-
archive_url,
473+
archive_path = posixpath.join(
474+
# online/qtsdkrepository/linux_x64/desktop/tools_ifw/
475+
os_target_folder,
478476
# qt.tools.ifw.41/
479477
name,
480478
# 4.1.1-202105261130ifw-linux-x64.7z
481479
f"{named_version}{archive}",
482480
)
483-
hashurl = package_url + ".sha1"
484481
self.archives.append(
485482
QtPackage(
486483
name=name,
487-
archive_url=package_url,
484+
base_url=self.base,
485+
archive_path=archive_path,
488486
archive=archive,
489487
package_desc=package_desc,
490-
hashurl=hashurl,
491488
pkg_update_name=name, # Redundant
492489
)
493490
)

aqt/exceptions.py

+15
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2121
from typing import Iterable
2222

23+
DOCS_CONFIG = "https://aqtinstall.readthedocs.io/en/stable/configuration.html#configuration"
24+
2325

2426
class AqtException(Exception):
2527
def __init__(self, *args, **kwargs):
@@ -47,6 +49,19 @@ class ArchiveChecksumError(ArchiveDownloadError):
4749
pass
4850

4951

52+
class ChecksumDownloadFailure(ArchiveDownloadError):
53+
def __init__(self, *args, **kwargs):
54+
kwargs["suggested_action"] = kwargs.pop("suggested_action", []).extend(
55+
[
56+
"Check your internet connection",
57+
"Consider modifying `requests.max_retries_to_retrieve_hash` in settings.ini",
58+
f"Consider modifying `mirrors.trusted_mirrors` in settings.ini (see {DOCS_CONFIG})",
59+
]
60+
)
61+
kwargs["should_show_help"] = True
62+
super(ChecksumDownloadFailure, self).__init__(*args, **kwargs)
63+
64+
5065
class ArchiveConnectionError(AqtException):
5166
pass
5267

aqt/helper.py

+72-4
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,26 @@
2424
import json
2525
import logging.config
2626
import os
27+
import posixpath
28+
import secrets
2729
import sys
2830
import xml.etree.ElementTree as ElementTree
2931
from logging import getLogger
3032
from logging.handlers import QueueListener
3133
from pathlib import Path
32-
from typing import Callable, Dict, List, Tuple
34+
from typing import Callable, Dict, Generator, List, Optional, Tuple
3335
from urllib.parse import urlparse
3436

3537
import requests
3638
import requests.adapters
3739

38-
from aqt.exceptions import ArchiveChecksumError, ArchiveConnectionError, ArchiveDownloadError, ArchiveListError
40+
from aqt.exceptions import (
41+
ArchiveChecksumError,
42+
ArchiveConnectionError,
43+
ArchiveDownloadError,
44+
ArchiveListError,
45+
ChecksumDownloadFailure,
46+
)
3947

4048

4149
def _get_meta(url: str):
@@ -47,7 +55,13 @@ def _check_content_type(ct: str) -> bool:
4755
return any(ct.startswith(t) for t in candidate)
4856

4957

50-
def getUrl(url: str, timeout) -> str:
58+
def getUrl(url: str, timeout, expected_hash: Optional[bytes] = None) -> str:
59+
"""
60+
Gets a file from `url` via HTTP GET.
61+
62+
No caller should call this function without providing an expected_hash, unless
63+
the caller is `get_hash`, which cannot know what the expected hash should be.
64+
"""
5165
logger = getLogger("aqt.helper")
5266
with requests.sessions.Session() as session:
5367
retries = requests.adapters.Retry(
@@ -76,6 +90,14 @@ def getUrl(url: str, timeout) -> str:
7690
msg = f"Failed to retrieve file at {url}\nServer response code: {r.status_code}, reason: {r.reason}"
7791
raise ArchiveDownloadError(msg)
7892
result = r.text
93+
filename = url.split("/")[-1]
94+
actual_hash = hashlib.sha256(bytes(result, "utf-8")).digest()
95+
if expected_hash is not None and expected_hash != actual_hash:
96+
raise ArchiveChecksumError(
97+
f"Downloaded file {filename} is corrupted! Detect checksum error.\n"
98+
f"Expect {expected_hash.hex()}: {url}\n"
99+
f"Actual {actual_hash.hex()}: {filename}"
100+
)
79101
return result
80102

81103

@@ -134,6 +156,42 @@ def retry_on_errors(action: Callable[[], any], acceptable_errors: Tuple, num_ret
134156
raise e from e
135157

136158

159+
def retry_on_bad_connection(function: Callable[[str], any], base_url: str):
160+
logger = getLogger("aqt.helper")
161+
fallback_url = secrets.choice(Settings.fallbacks)
162+
try:
163+
return function(base_url)
164+
except ArchiveConnectionError:
165+
logger.warning(f"Connection to '{base_url}' failed. Retrying with fallback '{fallback_url}'.")
166+
return function(fallback_url)
167+
168+
169+
def iter_list_reps(_list: List, num_reps: int) -> Generator:
170+
list_index = 0
171+
for i in range(num_reps):
172+
yield _list[list_index]
173+
list_index += 1
174+
if list_index >= len(_list):
175+
list_index = 0
176+
177+
178+
def get_hash(archive_path: str, algorithm: str, timeout) -> str:
179+
logger = getLogger("aqt.helper")
180+
for base_url in iter_list_reps(Settings.trusted_mirrors, Settings.max_retries_to_retrieve_hash):
181+
url = posixpath.join(base_url, f"{archive_path}.{algorithm}")
182+
logger.debug(f"Attempt to download checksum at {url}")
183+
try:
184+
r = getUrl(url, timeout)
185+
# sha256 & md5 files are: "some_hash archive_filename"
186+
return r.split(" ")[0]
187+
except (ArchiveConnectionError, ArchiveDownloadError):
188+
pass
189+
filename = archive_path.split("/")[-1]
190+
raise ChecksumDownloadFailure(
191+
f"Failed to download checksum for the file '{filename}' from mirrors '{Settings.trusted_mirrors}"
192+
)
193+
194+
137195
def altlink(url: str, alt: str):
138196
"""
139197
Blacklisting redirected(alt) location based on Settings.blacklist configuration.
@@ -225,7 +283,9 @@ def xml_to_modules(
225283

226284
class MyConfigParser(configparser.ConfigParser):
227285
def getlist(self, section: str, option: str, fallback=[]) -> List[str]:
228-
value = self.get(section, option)
286+
value = self.get(section, option, fallback=None)
287+
if value is None:
288+
return fallback
229289
try:
230290
result = list(filter(None, (x.strip() for x in value.splitlines())))
231291
except Exception:
@@ -361,10 +421,18 @@ def max_retries_on_connection_error(self):
361421
def max_retries_on_checksum_error(self):
362422
return self.config.getint("requests", "max_retries_on_checksum_error", fallback=int(self.max_retries))
363423

424+
@property
425+
def max_retries_to_retrieve_hash(self):
426+
return self.config.getint("requests", "max_retries_to_retrieve_hash", fallback=int(self.max_retries))
427+
364428
@property
365429
def backoff_factor(self):
366430
return self.config.getfloat("requests", "retry_backoff", fallback=0.1)
367431

432+
@property
433+
def trusted_mirrors(self):
434+
return self.config.getlist("mirrors", "trusted_mirrors", fallback=[self.baseurl])
435+
368436
@property
369437
def fallbacks(self):
370438
return self.config.getlist("mirrors", "fallbacks", fallback=[])

0 commit comments

Comments
 (0)