Skip to content

Commit

Permalink
implement sigv4 signing for s3 downloads
Browse files Browse the repository at this point in the history
  • Loading branch information
chris-smith-zocdoc committed Feb 14, 2025
1 parent dd87b85 commit f75e8eb
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 20 deletions.
12 changes: 10 additions & 2 deletions src/python/pants/backend/url_handlers/s3/integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ def fake_resolver_creator(session):
assert session is fake_session
return SimpleNamespace(load_credentials=lambda: fake_creds)

# Replace the simple fake_session object with a SimpleNamespace that has get_config_variable
fake_session = SimpleNamespace(
get_config_variable=lambda name: "us-west-2" if name == "region" else None
)
botocore.session = SimpleNamespace(get_session=lambda: fake_session)

def fake_creds_ctor(access_key, secret_key):
assert access_key == fake_creds.access_key
assert secret_key == fake_creds.secret_key
Expand All @@ -64,16 +70,18 @@ def fake_creds_ctor(access_key, secret_key):
create_credential_resolver=fake_resolver_creator, Credentials=fake_creds_ctor
)

def fake_auth_ctor(creds):
def fake_auth_ctor(creds, service_name, region_name):
assert creds is fake_creds
assert service_name == "s3"
assert region_name in ["us-east-1", "us-west-2"]

def add_auth(request):
request.url == expected_url
request.headers["AUTH"] = "TOKEN"

return SimpleNamespace(add_auth=add_auth)

botocore.auth = SimpleNamespace(HmacV1Auth=fake_auth_ctor)
botocore.auth = SimpleNamespace(SigV4Auth=fake_auth_ctor)

monkeypatch.setitem(sys.modules, "botocore", botocore)

Expand Down
48 changes: 30 additions & 18 deletions src/python/pants/backend/url_handlers/s3/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@
from pants.util.strutil import softwrap

CONTENT_TYPE = "binary/octet-stream"

EMPTY_SHA256_HASH = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class AWSCredentials:
creds: Any
default_region: str | None


@rule
Expand All @@ -48,10 +49,11 @@ async def access_aws_credentials() -> AWSCredentials:
)
raise

session = session.get_session()
creds = credentials.create_credential_resolver(session).load_credentials()
boto_session = session.get_session()
creds = credentials.create_credential_resolver(boto_session).load_credentials()
default_region = boto_session.get_config_variable("region")

return AWSCredentials(creds)
return AWSCredentials(creds=creds, default_region=default_region)


@dataclass(frozen=True)
Expand All @@ -69,36 +71,46 @@ async def download_from_s3(
) -> Digest:
from botocore import auth, compat, exceptions # pants: no-infer-dep

# NB: The URL for auth is expected to be in path-style
path_style_url = "https://s3"
virtual_hosted_url = f"https://{request.bucket}.s3.amazonaws.com/{request.key}"
if request.region:
path_style_url += f".{request.region}"
path_style_url += f".amazonaws.com/{request.bucket}/{request.key}"
virtual_hosted_url = (
f"https://{request.bucket}.s3.{request.region}.amazonaws.com/{request.key}"
)

if request.query:
path_style_url += f"?{request.query}"
virtual_hosted_url += f"?{request.query}"

headers = compat.HTTPHeaders()
http_request = SimpleNamespace(
url=path_style_url,
url=virtual_hosted_url,
headers=headers,
method="GET",
auth_path=None,
data=None,
params={},
context={},
body={},
)
# NB: The added Auth header doesn't need to be valid when accessing a public bucket. When
# hand-testing, you MUST test against a private bucket to ensure it works for private buckets too.
signer = auth.HmacV1Auth(aws_credentials.creds)
# adding x-amz-content-SHA256 as per boto code
# ref link - https://github.com/boto/botocore/blob/547b20801770c8ea4255ee9c3b809fea6b9f6bc4/botocore/auth.py#L52C1-L54C2
headers.add_header(
"X-Amz-Content-SHA256",
EMPTY_SHA256_HASH,
)

# We require a region to sign the request with sigv4
# If we don't know where the bucket is, default to the region from the credentials
# and fallback to us-east-1
signing_region = request.region or aws_credentials.default_region or "us-east-1"

signer = auth.SigV4Auth(aws_credentials.creds, "s3", signing_region)
try:
signer.add_auth(http_request)
except exceptions.NoCredentialsError:
pass # The user can still access public S3 buckets without credentials

virtual_hosted_url = f"https://{request.bucket}.s3"
if request.region:
virtual_hosted_url += f".{request.region}"
virtual_hosted_url += f".amazonaws.com/{request.key}"
if request.query:
virtual_hosted_url += f"?{request.query}"

return await Get(
Digest,
NativeDownloadFile(
Expand Down

0 comments on commit f75e8eb

Please sign in to comment.