diff --git a/.github/workflows/update-s3-html.yml b/.github/workflows/update-s3-html.yml index a64d33855b..85ef57d5d7 100644 --- a/.github/workflows/update-s3-html.yml +++ b/.github/workflows/update-s3-html.yml @@ -17,7 +17,6 @@ jobs: strategy: matrix: prefix: ["whl", "whl/test", "whl/nightly", "libtorch", "libtorch/nightly"] - subdir-pattern: ['cu[0-9]+', 'rocm[0-9]+\.[0-9]+', 'cpu', 'xpu'] fail-fast: False container: image: continuumio/miniconda3:23.10.0-1 @@ -43,4 +42,4 @@ jobs: # Install requirements pip install -r s3_management/requirements.txt - python s3_management/manage.py --subdir-pattern '${{ matrix.subdir-pattern }}' ${{ matrix.prefix }} + python s3_management/manage.py ${{ matrix.prefix }} diff --git a/s3_management/assume-role-output.txt b/s3_management/assume-role-output.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/s3_management/manage.py b/s3_management/manage.py index 5262e933bc..1f5ae9e16c 100755 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -8,7 +8,7 @@ import time from collections import defaultdict from os import makedirs, path -from re import compile, match, sub +from re import match, sub from typing import Dict, Iterable, List, Optional, Set, TypeVar import boto3 # type: ignore[import] @@ -630,12 +630,15 @@ def grant_public_read(cls, key: str) -> None: CLIENT.put_object_acl(Bucket=BUCKET.name, Key=key, ACL="public-read") @classmethod - def fetch_object_names(cls, prefix: str, pattern: str) -> List[str]: + def fetch_object_names(cls, prefix: str) -> List[str]: obj_names = [] for obj in BUCKET.objects.filter(Prefix=prefix): is_acceptable = any( [path.dirname(obj.key) == prefix] - + [match(compile(f"{prefix}/{pattern}"), path.dirname(obj.key))] + + [ + match(f"{prefix}/{pattern}", path.dirname(obj.key)) + for pattern in ACCEPTED_SUBDIR_PATTERNS + ] ) and obj.key.endswith(ACCEPTED_FILE_EXTENSIONS) if not is_acceptable: continue @@ -703,11 +706,9 @@ def _fetch_metadata(key: str) -> str: self.objects[idx].pep658 = response @classmethod - def from_S3( - cls, prefix: str, pattern: str, with_metadata: bool = True - ) -> "S3Index": + def from_S3(cls, prefix: str, with_metadata: bool = True) -> "S3Index": prefix = prefix.rstrip("/") - obj_names = cls.fetch_object_names(prefix, pattern) + obj_names = cls.fetch_object_names(prefix) def sanitize_key(key: str) -> str: return key.replace("+", "%2B") @@ -748,12 +749,6 @@ def undelete_prefix(cls, prefix: str) -> None: def create_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser("Manage S3 HTML indices for PyTorch") parser.add_argument("prefix", type=str, choices=PREFIXES + ["all"]) - parser.add_argument( - "--subdir-pattern", - type=str, - choices=ACCEPTED_SUBDIR_PATTERNS + ["all"], - default="all", - ) parser.add_argument("--do-not-upload", action="store_true") parser.add_argument("--compute-sha256", action="store_true") return parser @@ -766,22 +761,14 @@ def main() -> None: if args.compute_sha256: action = "Computing checksums" - patterns = ( - ACCEPTED_SUBDIR_PATTERNS - if args.subdir_pattern == "all" - else [args.subdir_pattern] - ) prefixes = PREFIXES if args.prefix == "all" else [args.prefix] for prefix in prefixes: generate_pep503 = prefix.startswith("whl") print(f"INFO: {action} for '{prefix}'") stime = time.time() - for pattern in patterns: - idx = S3Index.from_S3( - prefix=prefix, - pattern=pattern, - with_metadata=generate_pep503 or args.compute_sha256, - ) + idx = S3Index.from_S3( + prefix=prefix, with_metadata=generate_pep503 or args.compute_sha256 + ) etime = time.time() print( f"DEBUG: Fetched {len(idx.objects)} objects for '{prefix}' in {etime-stime:.2f} seconds"