From 13ec6e610a1b94c7e7cea61ef95660c9928407d6 Mon Sep 17 00:00:00 2001
From: Keith Smiley <keithbsmiley@gmail.com>
Date: Mon, 24 Feb 2025 19:21:16 +0000
Subject: [PATCH] Revert "fix(pypi): change the parallelisation scheme for
 querying SimpleAPI (#2531)"

https://github.com/bazelbuild/rules_python/issues/2622#issuecomment-2677730047

This reverts commit 475a99e283acbd602d584635a6672cb2b27ca37e.
---
 CHANGELOG.md                                  |   4 -
 python/private/pypi/extension.bzl             |   5 -
 python/private/pypi/simpleapi_download.bzl    |  92 +++----
 tests/pypi/simpleapi_download/BUILD.bazel     |   5 -
 .../simpleapi_download_tests.bzl              | 245 ------------------
 5 files changed, 39 insertions(+), 312 deletions(-)
 delete mode 100644 tests/pypi/simpleapi_download/BUILD.bazel
 delete mode 100644 tests/pypi/simpleapi_download/simpleapi_download_tests.bzl

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8a62ab7840..393d3d2c42 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -127,10 +127,6 @@ Unreleased changes template.
 * Bazel 6 support is dropped and Bazel 7.4.1 is the minimum supported
   version, per our Bazel support matrix. Earlier versions are not
   tested by CI, so functionality cannot be guaranteed.
-* ({bzl:obj}`pip.parse`) From now we will make fewer calls to indexes when
-  fetching the metadata from SimpleAPI. The calls will be done in parallel to
-  each index separately, so the extension evaluation time might slow down if
-  not using {bzl:obj}`pip.parse.experimental_index_url_overrides`.
 * ({bzl:obj}`pip.parse`) Only query SimpleAPI for packages that have
   sha values in the `requirements.txt` file.
 * (rules) The version-aware rules have been folded into the base rules and
diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl
index 405c22f60e..9ee6a5d7fc 100644
--- a/python/private/pypi/extension.bzl
+++ b/python/private/pypi/extension.bzl
@@ -659,11 +659,6 @@ The indexes must support Simple API as described here:
 https://packaging.python.org/en/latest/specifications/simple-repository-api/
 
 This is equivalent to `--extra-index-urls` `pip` option.
-
-:::{versionchanged} 1.1.0
-Starting with this version we will iterate over each index specified until
-we find metadata for all references distributions.
-:::
 """,
             default = [],
         ),
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index ef39fb8723..f88e001238 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -20,7 +20,6 @@ load("@bazel_features//:features.bzl", "bazel_features")
 load("//python/private:auth.bzl", _get_auth = "get_auth")
 load("//python/private:envsubst.bzl", "envsubst")
 load("//python/private:normalize_name.bzl", "normalize_name")
-load("//python/private:text_util.bzl", "render")
 load(":parse_simpleapi_html.bzl", "parse_simpleapi_html")
 
 def simpleapi_download(
@@ -29,9 +28,7 @@ def simpleapi_download(
         attr,
         cache,
         parallel_download = True,
-        read_simpleapi = None,
-        get_auth = None,
-        _fail = fail):
+        get_auth = None):
     """Download Simple API HTML.
 
     Args:
@@ -61,7 +58,6 @@ def simpleapi_download(
         read_simpleapi: a function for reading and parsing of the SimpleAPI contents.
             Used in tests.
         get_auth: A function to get auth information passed to read_simpleapi. Used in tests.
-        _fail: a function to print a failure. Used in tests.
 
     Returns:
         dict of pkg name to the parsed HTML contents - a list of structs.
@@ -77,22 +73,15 @@ def simpleapi_download(
 
     # NOTE @aignas 2024-03-31: we are not merging results from multiple indexes
     # to replicate how `pip` would handle this case.
+    async_downloads = {}
     contents = {}
     index_urls = [attr.index_url] + attr.extra_index_urls
-    read_simpleapi = read_simpleapi or _read_simpleapi
-
-    found_on_index = {}
-    warn_overrides = False
-    for i, index_url in enumerate(index_urls):
-        if i != 0:
-            # Warn the user about a potential fix for the overrides
-            warn_overrides = True
-
-        async_downloads = {}
-        sources = [pkg for pkg in attr.sources if pkg not in found_on_index]
-        for pkg in sources:
-            pkg_normalized = normalize_name(pkg)
-            result = read_simpleapi(
+    for pkg in attr.sources:
+        pkg_normalized = normalize_name(pkg)
+
+        success = False
+        for index_url in index_urls:
+            result = _read_simpleapi(
                 ctx = ctx,
                 url = "{}/{}/".format(
                     index_url_overrides.get(pkg_normalized, index_url).rstrip("/"),
@@ -105,45 +94,42 @@ def simpleapi_download(
             )
             if hasattr(result, "wait"):
                 # We will process it in a separate loop:
-                async_downloads[pkg] = struct(
-                    pkg_normalized = pkg_normalized,
-                    wait = result.wait,
+                async_downloads.setdefault(pkg_normalized, []).append(
+                    struct(
+                        pkg_normalized = pkg_normalized,
+                        wait = result.wait,
+                    ),
                 )
-            elif result.success:
-                contents[pkg_normalized] = result.output
-                found_on_index[pkg] = index_url
+                continue
 
-        if not async_downloads:
-            continue
-
-        # If we use `block` == False, then we need to have a second loop that is
-        # collecting all of the results as they were being downloaded in parallel.
-        for pkg, download in async_downloads.items():
+            if result.success:
+                contents[pkg_normalized] = result.output
+                success = True
+                break
+
+        if not async_downloads and not success:
+            fail("Failed to download metadata from urls: {}".format(
+                ", ".join(index_urls),
+            ))
+
+    if not async_downloads:
+        return contents
+
+    # If we use `block` == False, then we need to have a second loop that is
+    # collecting all of the results as they were being downloaded in parallel.
+    for pkg, downloads in async_downloads.items():
+        success = False
+        for download in downloads:
             result = download.wait()
 
-            if result.success:
+            if result.success and download.pkg_normalized not in contents:
                 contents[download.pkg_normalized] = result.output
-                found_on_index[pkg] = index_url
-
-    failed_sources = [pkg for pkg in attr.sources if pkg not in found_on_index]
-    if failed_sources:
-        _fail("Failed to download metadata for {} for from urls: {}".format(
-            failed_sources,
-            index_urls,
-        ))
-        return None
-
-    if warn_overrides:
-        index_url_overrides = {
-            pkg: found_on_index[pkg]
-            for pkg in attr.sources
-            if found_on_index[pkg] != attr.index_url
-        }
-
-        # buildifier: disable=print
-        print("You can use the following `index_url_overrides` to avoid the 404 warnings:\n{}".format(
-            render.dict(index_url_overrides),
-        ))
+                success = True
+
+        if not success:
+            fail("Failed to download metadata from urls: {}".format(
+                ", ".join(index_urls),
+            ))
 
     return contents
 
diff --git a/tests/pypi/simpleapi_download/BUILD.bazel b/tests/pypi/simpleapi_download/BUILD.bazel
deleted file mode 100644
index 04747b6246..0000000000
--- a/tests/pypi/simpleapi_download/BUILD.bazel
+++ /dev/null
@@ -1,5 +0,0 @@
-load("simpleapi_download_tests.bzl", "simpleapi_download_test_suite")
-
-simpleapi_download_test_suite(
-    name = "simpleapi_download_tests",
-)
diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
deleted file mode 100644
index 964d3e25ea..0000000000
--- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
+++ /dev/null
@@ -1,245 +0,0 @@
-# Copyright 2024 The Bazel Authors. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-""
-
-load("@rules_testing//lib:test_suite.bzl", "test_suite")
-load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download", "strip_empty_path_segments")  # buildifier: disable=bzl-visibility
-
-_tests = []
-
-def _test_simple(env):
-    calls = []
-
-    def read_simpleapi(ctx, url, attr, cache, get_auth, block):
-        _ = ctx  # buildifier: disable=unused-variable
-        _ = attr
-        _ = cache
-        _ = get_auth
-        env.expect.that_bool(block).equals(False)
-        calls.append(url)
-        if "foo" in url and "main" in url:
-            return struct(
-                output = "",
-                success = False,
-            )
-        else:
-            return struct(
-                output = "data from {}".format(url),
-                success = True,
-            )
-
-    contents = simpleapi_download(
-        ctx = struct(
-            os = struct(environ = {}),
-        ),
-        attr = struct(
-            index_url_overrides = {},
-            index_url = "main",
-            extra_index_urls = ["extra"],
-            sources = ["foo", "bar", "baz"],
-            envsubst = [],
-        ),
-        cache = {},
-        parallel_download = True,
-        read_simpleapi = read_simpleapi,
-    )
-
-    env.expect.that_collection(calls).contains_exactly([
-        "extra/foo/",
-        "main/bar/",
-        "main/baz/",
-        "main/foo/",
-    ])
-    env.expect.that_dict(contents).contains_exactly({
-        "bar": "data from main/bar/",
-        "baz": "data from main/baz/",
-        "foo": "data from extra/foo/",
-    })
-
-_tests.append(_test_simple)
-
-def _test_fail(env):
-    calls = []
-    fails = []
-
-    def read_simpleapi(ctx, url, attr, cache, get_auth, block):
-        _ = ctx  # buildifier: disable=unused-variable
-        _ = attr
-        _ = cache
-        _ = get_auth
-        env.expect.that_bool(block).equals(False)
-        calls.append(url)
-        if "foo" in url:
-            return struct(
-                output = "",
-                success = False,
-            )
-        else:
-            return struct(
-                output = "data from {}".format(url),
-                success = True,
-            )
-
-    simpleapi_download(
-        ctx = struct(
-            os = struct(environ = {}),
-        ),
-        attr = struct(
-            index_url_overrides = {},
-            index_url = "main",
-            extra_index_urls = ["extra"],
-            sources = ["foo", "bar", "baz"],
-            envsubst = [],
-        ),
-        cache = {},
-        parallel_download = True,
-        read_simpleapi = read_simpleapi,
-        _fail = fails.append,
-    )
-
-    env.expect.that_collection(fails).contains_exactly([
-        """Failed to download metadata for ["foo"] for from urls: ["main", "extra"]""",
-    ])
-    env.expect.that_collection(calls).contains_exactly([
-        "extra/foo/",
-        "main/bar/",
-        "main/baz/",
-        "main/foo/",
-    ])
-
-_tests.append(_test_fail)
-
-def _test_download_url(env):
-    downloads = {}
-
-    def download(url, output, **kwargs):
-        _ = kwargs  # buildifier: disable=unused-variable
-        downloads[url[0]] = output
-        return struct(success = True)
-
-    simpleapi_download(
-        ctx = struct(
-            os = struct(environ = {}),
-            download = download,
-            read = lambda i: "contents of " + i,
-            path = lambda i: "path/for/" + i,
-        ),
-        attr = struct(
-            index_url_overrides = {},
-            index_url = "https://example.com/main/simple/",
-            extra_index_urls = [],
-            sources = ["foo", "bar", "baz"],
-            envsubst = [],
-        ),
-        cache = {},
-        parallel_download = False,
-        get_auth = lambda ctx, urls, ctx_attr: struct(),
-    )
-
-    env.expect.that_dict(downloads).contains_exactly({
-        "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
-        "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
-        "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
-    })
-
-_tests.append(_test_download_url)
-
-def _test_download_url_parallel(env):
-    downloads = {}
-
-    def download(url, output, **kwargs):
-        _ = kwargs  # buildifier: disable=unused-variable
-        downloads[url[0]] = output
-        return struct(wait = lambda: struct(success = True))
-
-    simpleapi_download(
-        ctx = struct(
-            os = struct(environ = {}),
-            download = download,
-            read = lambda i: "contents of " + i,
-            path = lambda i: "path/for/" + i,
-        ),
-        attr = struct(
-            index_url_overrides = {},
-            index_url = "https://example.com/main/simple/",
-            extra_index_urls = [],
-            sources = ["foo", "bar", "baz"],
-            envsubst = [],
-        ),
-        cache = {},
-        parallel_download = True,
-        get_auth = lambda ctx, urls, ctx_attr: struct(),
-    )
-
-    env.expect.that_dict(downloads).contains_exactly({
-        "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
-        "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
-        "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
-    })
-
-_tests.append(_test_download_url_parallel)
-
-def _test_download_envsubst_url(env):
-    downloads = {}
-
-    def download(url, output, **kwargs):
-        _ = kwargs  # buildifier: disable=unused-variable
-        downloads[url[0]] = output
-        return struct(success = True)
-
-    simpleapi_download(
-        ctx = struct(
-            os = struct(environ = {"INDEX_URL": "https://example.com/main/simple/"}),
-            download = download,
-            read = lambda i: "contents of " + i,
-            path = lambda i: "path/for/" + i,
-        ),
-        attr = struct(
-            index_url_overrides = {},
-            index_url = "$INDEX_URL",
-            extra_index_urls = [],
-            sources = ["foo", "bar", "baz"],
-            envsubst = ["INDEX_URL"],
-        ),
-        cache = {},
-        parallel_download = False,
-        get_auth = lambda ctx, urls, ctx_attr: struct(),
-    )
-
-    env.expect.that_dict(downloads).contains_exactly({
-        "https://example.com/main/simple/bar/": "path/for/~index_url~_bar.html",
-        "https://example.com/main/simple/baz/": "path/for/~index_url~_baz.html",
-        "https://example.com/main/simple/foo/": "path/for/~index_url~_foo.html",
-    })
-
-_tests.append(_test_download_envsubst_url)
-
-def _test_strip_empty_path_segments(env):
-    env.expect.that_str(strip_empty_path_segments("no/scheme//is/unchanged")).equals("no/scheme//is/unchanged")
-    env.expect.that_str(strip_empty_path_segments("scheme://with/no/empty/segments")).equals("scheme://with/no/empty/segments")
-    env.expect.that_str(strip_empty_path_segments("scheme://with//empty/segments")).equals("scheme://with/empty/segments")
-    env.expect.that_str(strip_empty_path_segments("scheme://with///multiple//empty/segments")).equals("scheme://with/multiple/empty/segments")
-    env.expect.that_str(strip_empty_path_segments("scheme://with//trailing/slash/")).equals("scheme://with/trailing/slash/")
-    env.expect.that_str(strip_empty_path_segments("scheme://with/trailing/slashes///")).equals("scheme://with/trailing/slashes/")
-
-_tests.append(_test_strip_empty_path_segments)
-
-def simpleapi_download_test_suite(name):
-    """Create the test suite.
-
-    Args:
-        name: the name of the test suite
-    """
-    test_suite(name = name, basic_tests = _tests)