From 13ec6e610a1b94c7e7cea61ef95660c9928407d6 Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Mon, 24 Feb 2025 19:21:16 +0000 Subject: [PATCH] Revert "fix(pypi): change the parallelisation scheme for querying SimpleAPI (#2531)" https://github.com/bazelbuild/rules_python/issues/2622#issuecomment-2677730047 This reverts commit 475a99e283acbd602d584635a6672cb2b27ca37e. --- CHANGELOG.md | 4 - python/private/pypi/extension.bzl | 5 - python/private/pypi/simpleapi_download.bzl | 92 +++---- tests/pypi/simpleapi_download/BUILD.bazel | 5 - .../simpleapi_download_tests.bzl | 245 ------------------ 5 files changed, 39 insertions(+), 312 deletions(-) delete mode 100644 tests/pypi/simpleapi_download/BUILD.bazel delete mode 100644 tests/pypi/simpleapi_download/simpleapi_download_tests.bzl diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a62ab7840..393d3d2c42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -127,10 +127,6 @@ Unreleased changes template. * Bazel 6 support is dropped and Bazel 7.4.1 is the minimum supported version, per our Bazel support matrix. Earlier versions are not tested by CI, so functionality cannot be guaranteed. -* ({bzl:obj}`pip.parse`) From now we will make fewer calls to indexes when - fetching the metadata from SimpleAPI. The calls will be done in parallel to - each index separately, so the extension evaluation time might slow down if - not using {bzl:obj}`pip.parse.experimental_index_url_overrides`. * ({bzl:obj}`pip.parse`) Only query SimpleAPI for packages that have sha values in the `requirements.txt` file. * (rules) The version-aware rules have been folded into the base rules and diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl index 405c22f60e..9ee6a5d7fc 100644 --- a/python/private/pypi/extension.bzl +++ b/python/private/pypi/extension.bzl @@ -659,11 +659,6 @@ The indexes must support Simple API as described here: https://packaging.python.org/en/latest/specifications/simple-repository-api/ This is equivalent to `--extra-index-urls` `pip` option. - -:::{versionchanged} 1.1.0 -Starting with this version we will iterate over each index specified until -we find metadata for all references distributions. -::: """, default = [], ), diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index ef39fb8723..f88e001238 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -20,7 +20,6 @@ load("@bazel_features//:features.bzl", "bazel_features") load("//python/private:auth.bzl", _get_auth = "get_auth") load("//python/private:envsubst.bzl", "envsubst") load("//python/private:normalize_name.bzl", "normalize_name") -load("//python/private:text_util.bzl", "render") load(":parse_simpleapi_html.bzl", "parse_simpleapi_html") def simpleapi_download( @@ -29,9 +28,7 @@ def simpleapi_download( attr, cache, parallel_download = True, - read_simpleapi = None, - get_auth = None, - _fail = fail): + get_auth = None): """Download Simple API HTML. Args: @@ -61,7 +58,6 @@ def simpleapi_download( read_simpleapi: a function for reading and parsing of the SimpleAPI contents. Used in tests. get_auth: A function to get auth information passed to read_simpleapi. Used in tests. - _fail: a function to print a failure. Used in tests. Returns: dict of pkg name to the parsed HTML contents - a list of structs. @@ -77,22 +73,15 @@ def simpleapi_download( # NOTE @aignas 2024-03-31: we are not merging results from multiple indexes # to replicate how `pip` would handle this case. + async_downloads = {} contents = {} index_urls = [attr.index_url] + attr.extra_index_urls - read_simpleapi = read_simpleapi or _read_simpleapi - - found_on_index = {} - warn_overrides = False - for i, index_url in enumerate(index_urls): - if i != 0: - # Warn the user about a potential fix for the overrides - warn_overrides = True - - async_downloads = {} - sources = [pkg for pkg in attr.sources if pkg not in found_on_index] - for pkg in sources: - pkg_normalized = normalize_name(pkg) - result = read_simpleapi( + for pkg in attr.sources: + pkg_normalized = normalize_name(pkg) + + success = False + for index_url in index_urls: + result = _read_simpleapi( ctx = ctx, url = "{}/{}/".format( index_url_overrides.get(pkg_normalized, index_url).rstrip("/"), @@ -105,45 +94,42 @@ def simpleapi_download( ) if hasattr(result, "wait"): # We will process it in a separate loop: - async_downloads[pkg] = struct( - pkg_normalized = pkg_normalized, - wait = result.wait, + async_downloads.setdefault(pkg_normalized, []).append( + struct( + pkg_normalized = pkg_normalized, + wait = result.wait, + ), ) - elif result.success: - contents[pkg_normalized] = result.output - found_on_index[pkg] = index_url + continue - if not async_downloads: - continue - - # If we use `block` == False, then we need to have a second loop that is - # collecting all of the results as they were being downloaded in parallel. - for pkg, download in async_downloads.items(): + if result.success: + contents[pkg_normalized] = result.output + success = True + break + + if not async_downloads and not success: + fail("Failed to download metadata from urls: {}".format( + ", ".join(index_urls), + )) + + if not async_downloads: + return contents + + # If we use `block` == False, then we need to have a second loop that is + # collecting all of the results as they were being downloaded in parallel. + for pkg, downloads in async_downloads.items(): + success = False + for download in downloads: result = download.wait() - if result.success: + if result.success and download.pkg_normalized not in contents: contents[download.pkg_normalized] = result.output - found_on_index[pkg] = index_url - - failed_sources = [pkg for pkg in attr.sources if pkg not in found_on_index] - if failed_sources: - _fail("Failed to download metadata for {} for from urls: {}".format( - failed_sources, - index_urls, - )) - return None - - if warn_overrides: - index_url_overrides = { - pkg: found_on_index[pkg] - for pkg in attr.sources - if found_on_index[pkg] != attr.index_url - } - - # buildifier: disable=print - print("You can use the following `index_url_overrides` to avoid the 404 warnings:\n{}".format( - render.dict(index_url_overrides), - )) + success = True + + if not success: + fail("Failed to download metadata from urls: {}".format( + ", ".join(index_urls), + )) return contents diff --git a/tests/pypi/simpleapi_download/BUILD.bazel b/tests/pypi/simpleapi_download/BUILD.bazel deleted file mode 100644 index 04747b6246..0000000000 --- a/tests/pypi/simpleapi_download/BUILD.bazel +++ /dev/null @@ -1,5 +0,0 @@ -load("simpleapi_download_tests.bzl", "simpleapi_download_test_suite") - -simpleapi_download_test_suite( - name = "simpleapi_download_tests", -) diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl deleted file mode 100644 index 964d3e25ea..0000000000 --- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl +++ /dev/null @@ -1,245 +0,0 @@ -# Copyright 2024 The Bazel Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"" - -load("@rules_testing//lib:test_suite.bzl", "test_suite") -load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download", "strip_empty_path_segments") # buildifier: disable=bzl-visibility - -_tests = [] - -def _test_simple(env): - calls = [] - - def read_simpleapi(ctx, url, attr, cache, get_auth, block): - _ = ctx # buildifier: disable=unused-variable - _ = attr - _ = cache - _ = get_auth - env.expect.that_bool(block).equals(False) - calls.append(url) - if "foo" in url and "main" in url: - return struct( - output = "", - success = False, - ) - else: - return struct( - output = "data from {}".format(url), - success = True, - ) - - contents = simpleapi_download( - ctx = struct( - os = struct(environ = {}), - ), - attr = struct( - index_url_overrides = {}, - index_url = "main", - extra_index_urls = ["extra"], - sources = ["foo", "bar", "baz"], - envsubst = [], - ), - cache = {}, - parallel_download = True, - read_simpleapi = read_simpleapi, - ) - - env.expect.that_collection(calls).contains_exactly([ - "extra/foo/", - "main/bar/", - "main/baz/", - "main/foo/", - ]) - env.expect.that_dict(contents).contains_exactly({ - "bar": "data from main/bar/", - "baz": "data from main/baz/", - "foo": "data from extra/foo/", - }) - -_tests.append(_test_simple) - -def _test_fail(env): - calls = [] - fails = [] - - def read_simpleapi(ctx, url, attr, cache, get_auth, block): - _ = ctx # buildifier: disable=unused-variable - _ = attr - _ = cache - _ = get_auth - env.expect.that_bool(block).equals(False) - calls.append(url) - if "foo" in url: - return struct( - output = "", - success = False, - ) - else: - return struct( - output = "data from {}".format(url), - success = True, - ) - - simpleapi_download( - ctx = struct( - os = struct(environ = {}), - ), - attr = struct( - index_url_overrides = {}, - index_url = "main", - extra_index_urls = ["extra"], - sources = ["foo", "bar", "baz"], - envsubst = [], - ), - cache = {}, - parallel_download = True, - read_simpleapi = read_simpleapi, - _fail = fails.append, - ) - - env.expect.that_collection(fails).contains_exactly([ - """Failed to download metadata for ["foo"] for from urls: ["main", "extra"]""", - ]) - env.expect.that_collection(calls).contains_exactly([ - "extra/foo/", - "main/bar/", - "main/baz/", - "main/foo/", - ]) - -_tests.append(_test_fail) - -def _test_download_url(env): - downloads = {} - - def download(url, output, **kwargs): - _ = kwargs # buildifier: disable=unused-variable - downloads[url[0]] = output - return struct(success = True) - - simpleapi_download( - ctx = struct( - os = struct(environ = {}), - download = download, - read = lambda i: "contents of " + i, - path = lambda i: "path/for/" + i, - ), - attr = struct( - index_url_overrides = {}, - index_url = "https://example.com/main/simple/", - extra_index_urls = [], - sources = ["foo", "bar", "baz"], - envsubst = [], - ), - cache = {}, - parallel_download = False, - get_auth = lambda ctx, urls, ctx_attr: struct(), - ) - - env.expect.that_dict(downloads).contains_exactly({ - "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html", - "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html", - "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html", - }) - -_tests.append(_test_download_url) - -def _test_download_url_parallel(env): - downloads = {} - - def download(url, output, **kwargs): - _ = kwargs # buildifier: disable=unused-variable - downloads[url[0]] = output - return struct(wait = lambda: struct(success = True)) - - simpleapi_download( - ctx = struct( - os = struct(environ = {}), - download = download, - read = lambda i: "contents of " + i, - path = lambda i: "path/for/" + i, - ), - attr = struct( - index_url_overrides = {}, - index_url = "https://example.com/main/simple/", - extra_index_urls = [], - sources = ["foo", "bar", "baz"], - envsubst = [], - ), - cache = {}, - parallel_download = True, - get_auth = lambda ctx, urls, ctx_attr: struct(), - ) - - env.expect.that_dict(downloads).contains_exactly({ - "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html", - "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html", - "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html", - }) - -_tests.append(_test_download_url_parallel) - -def _test_download_envsubst_url(env): - downloads = {} - - def download(url, output, **kwargs): - _ = kwargs # buildifier: disable=unused-variable - downloads[url[0]] = output - return struct(success = True) - - simpleapi_download( - ctx = struct( - os = struct(environ = {"INDEX_URL": "https://example.com/main/simple/"}), - download = download, - read = lambda i: "contents of " + i, - path = lambda i: "path/for/" + i, - ), - attr = struct( - index_url_overrides = {}, - index_url = "$INDEX_URL", - extra_index_urls = [], - sources = ["foo", "bar", "baz"], - envsubst = ["INDEX_URL"], - ), - cache = {}, - parallel_download = False, - get_auth = lambda ctx, urls, ctx_attr: struct(), - ) - - env.expect.that_dict(downloads).contains_exactly({ - "https://example.com/main/simple/bar/": "path/for/~index_url~_bar.html", - "https://example.com/main/simple/baz/": "path/for/~index_url~_baz.html", - "https://example.com/main/simple/foo/": "path/for/~index_url~_foo.html", - }) - -_tests.append(_test_download_envsubst_url) - -def _test_strip_empty_path_segments(env): - env.expect.that_str(strip_empty_path_segments("no/scheme//is/unchanged")).equals("no/scheme//is/unchanged") - env.expect.that_str(strip_empty_path_segments("scheme://with/no/empty/segments")).equals("scheme://with/no/empty/segments") - env.expect.that_str(strip_empty_path_segments("scheme://with//empty/segments")).equals("scheme://with/empty/segments") - env.expect.that_str(strip_empty_path_segments("scheme://with///multiple//empty/segments")).equals("scheme://with/multiple/empty/segments") - env.expect.that_str(strip_empty_path_segments("scheme://with//trailing/slash/")).equals("scheme://with/trailing/slash/") - env.expect.that_str(strip_empty_path_segments("scheme://with/trailing/slashes///")).equals("scheme://with/trailing/slashes/") - -_tests.append(_test_strip_empty_path_segments) - -def simpleapi_download_test_suite(name): - """Create the test suite. - - Args: - name: the name of the test suite - """ - test_suite(name = name, basic_tests = _tests)