diff --git a/src/python/pants/backend/url_handlers/s3/integration_test.py b/src/python/pants/backend/url_handlers/s3/integration_test.py index 418e734845e..acb2174831d 100644 --- a/src/python/pants/backend/url_handlers/s3/integration_test.py +++ b/src/python/pants/backend/url_handlers/s3/integration_test.py @@ -55,6 +55,12 @@ def fake_resolver_creator(session): assert session is fake_session return SimpleNamespace(load_credentials=lambda: fake_creds) + # Replace the simple fake_session object with a SimpleNamespace that has get_config_variable + fake_session = SimpleNamespace( + get_config_variable=lambda name: "us-west-2" if name == "region" else None + ) + botocore.session = SimpleNamespace(get_session=lambda: fake_session) + def fake_creds_ctor(access_key, secret_key): assert access_key == fake_creds.access_key assert secret_key == fake_creds.secret_key @@ -64,8 +70,10 @@ def fake_creds_ctor(access_key, secret_key): create_credential_resolver=fake_resolver_creator, Credentials=fake_creds_ctor ) - def fake_auth_ctor(creds): + def fake_auth_ctor(creds, service_name, region_name): assert creds is fake_creds + assert service_name == "s3" + assert region_name in ["us-east-1", "us-west-2"] def add_auth(request): request.url == expected_url @@ -73,7 +81,7 @@ def add_auth(request): return SimpleNamespace(add_auth=add_auth) - botocore.auth = SimpleNamespace(HmacV1Auth=fake_auth_ctor) + botocore.auth = SimpleNamespace(SigV4Auth=fake_auth_ctor) monkeypatch.setitem(sys.modules, "botocore", botocore) diff --git a/src/python/pants/backend/url_handlers/s3/register.py b/src/python/pants/backend/url_handlers/s3/register.py index ec756e6ef27..66af9bdea30 100644 --- a/src/python/pants/backend/url_handlers/s3/register.py +++ b/src/python/pants/backend/url_handlers/s3/register.py @@ -18,7 +18,7 @@ from pants.util.strutil import softwrap CONTENT_TYPE = "binary/octet-stream" - +EMPTY_SHA256_HASH = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" logger = logging.getLogger(__name__) @@ -26,6 +26,7 @@ @dataclass(frozen=True) class AWSCredentials: creds: Any + default_region: str | None @rule @@ -48,10 +49,11 @@ async def access_aws_credentials() -> AWSCredentials: ) raise - session = session.get_session() - creds = credentials.create_credential_resolver(session).load_credentials() + boto_session = session.get_session() + creds = credentials.create_credential_resolver(boto_session).load_credentials() + default_region = boto_session.get_config_variable("region") - return AWSCredentials(creds) + return AWSCredentials(creds=creds, default_region=default_region) @dataclass(frozen=True) @@ -69,36 +71,46 @@ async def download_from_s3( ) -> Digest: from botocore import auth, compat, exceptions # pants: no-infer-dep - # NB: The URL for auth is expected to be in path-style - path_style_url = "https://s3" + virtual_hosted_url = f"https://{request.bucket}.s3.amazonaws.com/{request.key}" if request.region: - path_style_url += f".{request.region}" - path_style_url += f".amazonaws.com/{request.bucket}/{request.key}" + virtual_hosted_url = ( + f"https://{request.bucket}.s3.{request.region}.amazonaws.com/{request.key}" + ) + if request.query: - path_style_url += f"?{request.query}" + virtual_hosted_url += f"?{request.query}" headers = compat.HTTPHeaders() http_request = SimpleNamespace( - url=path_style_url, + url=virtual_hosted_url, headers=headers, method="GET", auth_path=None, + data=None, + params={}, + context={}, + body={}, ) # NB: The added Auth header doesn't need to be valid when accessing a public bucket. When # hand-testing, you MUST test against a private bucket to ensure it works for private buckets too. - signer = auth.HmacV1Auth(aws_credentials.creds) + # adding x-amz-content-SHA256 as per boto code + # ref link - https://github.com/boto/botocore/blob/547b20801770c8ea4255ee9c3b809fea6b9f6bc4/botocore/auth.py#L52C1-L54C2 + headers.add_header( + "X-Amz-Content-SHA256", + EMPTY_SHA256_HASH, + ) + + # We require a region to sign the request with sigv4 + # If we don't know where the bucket is, default to the region from the credentials + # and fallback to us-east-1 + signing_region = request.region or aws_credentials.default_region or "us-east-1" + + signer = auth.SigV4Auth(aws_credentials.creds, "s3", signing_region) try: signer.add_auth(http_request) except exceptions.NoCredentialsError: pass # The user can still access public S3 buckets without credentials - virtual_hosted_url = f"https://{request.bucket}.s3" - if request.region: - virtual_hosted_url += f".{request.region}" - virtual_hosted_url += f".amazonaws.com/{request.key}" - if request.query: - virtual_hosted_url += f"?{request.query}" - return await Get( Digest, NativeDownloadFile(