From d9b2fb39a89b86c8f912c1966f1c60b7b25e4f78 Mon Sep 17 00:00:00 2001 From: TheGreatAlgo <37487508+TheGreatAlgo@users.noreply.github.com> Date: Thu, 13 Mar 2025 19:53:37 -0400 Subject: [PATCH 01/18] fix: add auth --- py_hamt/store.py | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/py_hamt/store.py b/py_hamt/store.py index a439915..3c387a2 100644 --- a/py_hamt/store.py +++ b/py_hamt/store.py @@ -76,6 +76,11 @@ def __init__( hasher: str = "blake3", pin_on_add: bool = False, debug: bool = False, + # Authentication parameters + basic_auth: tuple[str, str] | None = None, # (username, password) + bearer_token: str | None = None, + api_key: str | None = None, + api_key_header: str = "X-API-Key" # Customizable API key header ): self.timeout_seconds = timeout_seconds """ @@ -100,6 +105,26 @@ def __init__( self.total_received: None | int = 0 if debug else None """Total bytes in responses from IPFS for blocks. Used for debugging purposes.""" + # Authentication settings + self.basic_auth = basic_auth + """Tuple of (username, password) for Basic Authentication""" + self.bearer_token = bearer_token + """Bearer token for token-based authentication""" + self.api_key = api_key + """API key for API key-based authentication""" + self.api_key_header = api_key_header + """Header name to use for API key authentication""" + + def _get_request_headers(self) -> dict[str, str]: + """Helper method to construct authentication headers""" + headers = {} + + # Apply authentication based on provided credentials + if self.bearer_token: + headers["Authorization"] = f"Bearer {self.bearer_token}" + elif self.api_key: + headers[self.api_key_header] = self.api_key + def save(self, data: bytes, cid_codec: str) -> CID: """ This saves the data to an ipfs daemon by calling the RPC API, and then returns the CID, with a multicodec set by the input cid_codec. We need to do this since the API always returns either a multicodec of raw or dag-pb if it had to shard the input data. @@ -116,9 +141,18 @@ def save(self, data: bytes, cid_codec: str) -> CID: """ pin_string: str = "true" if self.pin_on_add else "false" + # Prepare request parameters + url = f"{self.rpc_uri_stem}/api/v0/add?hash={self.hasher}&pin={pin_string}" + headers = self._get_request_headers() + auth = self.basic_auth if self.basic_auth else None + + # Make the request with appropriate authentication response = requests.post( - f"{self.rpc_uri_stem}/api/v0/add?hash={self.hasher}&pin={pin_string}", + url, files={"file": data}, + headers=headers, + auth=auth, + timeout=self.timeout_seconds ) response.raise_for_status() From e2bd2337b519029ffb6d81620eb78ab61f179012 Mon Sep 17 00:00:00 2001 From: TheGreatAlgo <37487508+TheGreatAlgo@users.noreply.github.com> Date: Thu, 13 Mar 2025 20:03:21 -0400 Subject: [PATCH 02/18] fix: return headers --- py_hamt/store.py | 1 + 1 file changed, 1 insertion(+) diff --git a/py_hamt/store.py b/py_hamt/store.py index 3c387a2..4859189 100644 --- a/py_hamt/store.py +++ b/py_hamt/store.py @@ -124,6 +124,7 @@ def _get_request_headers(self) -> dict[str, str]: headers["Authorization"] = f"Bearer {self.bearer_token}" elif self.api_key: headers[self.api_key_header] = self.api_key + return headers def save(self, data: bytes, cid_codec: str) -> CID: """ From 6b9764862d4e413ca5e60c01b9e28e96609037e0 Mon Sep 17 00:00:00 2001 From: TheGreatAlgo <37487508+TheGreatAlgo@users.noreply.github.com> Date: Fri, 14 Mar 2025 14:04:59 -0400 Subject: [PATCH 03/18] fix: update documentation and refactor --- py_hamt/store.py | 24 ++++++++++++------------ tests/test_zarr_ipfs.py | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 12 deletions(-) diff --git a/py_hamt/store.py b/py_hamt/store.py index 4859189..f19c4a6 100644 --- a/py_hamt/store.py +++ b/py_hamt/store.py @@ -66,6 +66,11 @@ class IPFSStore(Store): Use IPFS as a backing store for a HAMT. The IDs returned from save and used by load are IPFS CIDs. Save methods use the RPC API but `load` uses the HTTP Gateway, so read-only HAMTs will only access the HTTP Gateway. This allows for connection to remote gateways as well. + + You can write to an authenticated IPFS node by providing credentials in the constructor. The following authentication methods are supported: + - Basic Authentication: Provide a tuple of (username, password) to the `basic_auth` parameter. + - Bearer Token: Provide a bearer token to the `bearer_token` parameter. + - API Key: Provide an API key to the `api_key` parameter. You can customize the header name for the API key by setting the `api_key_header` parameter. """ def __init__( @@ -115,17 +120,6 @@ def __init__( self.api_key_header = api_key_header """Header name to use for API key authentication""" - def _get_request_headers(self) -> dict[str, str]: - """Helper method to construct authentication headers""" - headers = {} - - # Apply authentication based on provided credentials - if self.bearer_token: - headers["Authorization"] = f"Bearer {self.bearer_token}" - elif self.api_key: - headers[self.api_key_header] = self.api_key - return headers - def save(self, data: bytes, cid_codec: str) -> CID: """ This saves the data to an ipfs daemon by calling the RPC API, and then returns the CID, with a multicodec set by the input cid_codec. We need to do this since the API always returns either a multicodec of raw or dag-pb if it had to shard the input data. @@ -142,9 +136,15 @@ def save(self, data: bytes, cid_codec: str) -> CID: """ pin_string: str = "true" if self.pin_on_add else "false" + # Apply authentication based on provided credentials + headers = {} + if self.bearer_token: + headers["Authorization"] = f"Bearer {self.bearer_token}" + elif self.api_key: + headers[self.api_key_header] = self.api_key + # Prepare request parameters url = f"{self.rpc_uri_stem}/api/v0/add?hash={self.hasher}&pin={pin_string}" - headers = self._get_request_headers() auth = self.basic_auth if self.basic_auth else None # Make the request with appropriate authentication diff --git a/tests/test_zarr_ipfs.py b/tests/test_zarr_ipfs.py index 3406ad7..6196c43 100644 --- a/tests/test_zarr_ipfs.py +++ b/tests/test_zarr_ipfs.py @@ -137,3 +137,44 @@ def test_encryption(random_zarr_dataset: tuple[str, xr.Dataset]): # We should be unable to read precipitation values which are still encrypted with pytest.raises(Exception): ds.precip.sum() + + +def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): + zarr_path, expected_ds = random_zarr_dataset + test_ds = xr.open_zarr(zarr_path) + + with pytest.raises(ValueError, match="Encryption key is not 32 bytes"): + create_zarr_encryption_transformers(bytes(), bytes()) + + encryption_key = bytes(32) + # Encrypt only precipitation, not temperature + encrypt, decrypt = create_zarr_encryption_transformers( + encryption_key, header="sample-header".encode(), exclude_vars=["temp"] + ) + hamt = HAMT( + store=IPFSStore( + api_key="test", + bearer_token="Test", + basic_auth=("test", "test") + ), transformer_encode=encrypt, transformer_decode=decrypt + ) + test_ds.to_zarr(store=hamt, mode="w") + + hamt.make_read_only() + loaded_ds = xr.open_zarr(store=hamt) + xr.testing.assert_identical(loaded_ds, expected_ds) + + # Now trying to load without a decryptor, xarray should be able to read the metadata and still perform operations on the unencrypted variable + print("Attempting to read and print metadata of partially encrypted zarr") + ds = xr.open_zarr( + store=HAMT(store=IPFSStore( + api_key="test", + bearer_token="Test", + basic_auth=("test", "test") + ), root_node_id=hamt.root_node_id, read_only=True) + ) + print(ds) + assert ds.temp.sum() == expected_ds.temp.sum() + # We should be unable to read precipitation values which are still encrypted + with pytest.raises(Exception): + ds.precip.sum() \ No newline at end of file From f8b6eeb322c9cb4438b1a1918dc42e907f0f20b4 Mon Sep 17 00:00:00 2001 From: TheGreatAlgo <37487508+TheGreatAlgo@users.noreply.github.com> Date: Fri, 14 Mar 2025 14:06:36 -0400 Subject: [PATCH 04/18] fix: remove bearer token --- tests/test_zarr_ipfs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_zarr_ipfs.py b/tests/test_zarr_ipfs.py index 6196c43..bec3b6b 100644 --- a/tests/test_zarr_ipfs.py +++ b/tests/test_zarr_ipfs.py @@ -169,7 +169,6 @@ def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): ds = xr.open_zarr( store=HAMT(store=IPFSStore( api_key="test", - bearer_token="Test", basic_auth=("test", "test") ), root_node_id=hamt.root_node_id, read_only=True) ) From 9a212109df066b4090af9105798e5094c1a1e834 Mon Sep 17 00:00:00 2001 From: TheGreatAlgo <37487508+TheGreatAlgo@users.noreply.github.com> Date: Fri, 14 Mar 2025 14:09:50 -0400 Subject: [PATCH 05/18] fix: use only api key --- tests/test_zarr_ipfs.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/test_zarr_ipfs.py b/tests/test_zarr_ipfs.py index bec3b6b..60a1521 100644 --- a/tests/test_zarr_ipfs.py +++ b/tests/test_zarr_ipfs.py @@ -164,13 +164,22 @@ def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): loaded_ds = xr.open_zarr(store=hamt) xr.testing.assert_identical(loaded_ds, expected_ds) + # Test with just api key + hamt = HAMT( + store=IPFSStore( + api_key="test" + ), transformer_encode=encrypt, transformer_decode=decrypt + ) + test_ds.to_zarr(store=hamt, mode="w") + + hamt.make_read_only() + loaded_ds = xr.open_zarr(store=hamt) + xr.testing.assert_identical(loaded_ds, expected_ds) + # Now trying to load without a decryptor, xarray should be able to read the metadata and still perform operations on the unencrypted variable print("Attempting to read and print metadata of partially encrypted zarr") ds = xr.open_zarr( - store=HAMT(store=IPFSStore( - api_key="test", - basic_auth=("test", "test") - ), root_node_id=hamt.root_node_id, read_only=True) + store=HAMT(store=IPFSStore(), root_node_id=hamt.root_node_id, read_only=True) ) print(ds) assert ds.temp.sum() == expected_ds.temp.sum() From a6080af4367258d3edaa78d4ff5b299014facaa3 Mon Sep 17 00:00:00 2001 From: TheGreatAlgo <37487508+TheGreatAlgo@users.noreply.github.com> Date: Fri, 14 Mar 2025 14:22:38 -0400 Subject: [PATCH 06/18] fix: configure action to setup gateway restrictions --- .github/workflows/run-checks.yaml | 16 ++++++++++++++++ py_hamt/store.py | 6 +++--- tests/test_zarr_ipfs.py | 16 ++++++++-------- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/.github/workflows/run-checks.yaml b/.github/workflows/run-checks.yaml index 68c08d4..3720f53 100644 --- a/.github/workflows/run-checks.yaml +++ b/.github/workflows/run-checks.yaml @@ -25,6 +25,22 @@ jobs: run_daemon: true id: ipfs_setup + - name: Configure IPFS with multiple authentication methods + run: | + # Set CORS headers + ipfs config --json API.HTTPHeaders.Access-Control-Allow-Origin '["*"]' + ipfs config --json Gateway.HTTPHeaders.Access-Control-Allow-Origin '["*"]' + + # Configure authentication methods with test credentials + ipfs config --json Gateway.HTTPHeaders.Authorization "{\"Bearer\": \"test\"}" + ipfs config --json Gateway.HTTPHeaders.X-API-Key "\"test\"" + + # Basic auth with test:test + echo "test:test" > ~/.ipfs/credentials + BASIC_AUTH_BASE64=$(base64 ~/.ipfs/credentials | tr -d '\n') + ipfs config --json Gateway.HTTPHeaders.Basic-Auth "{\"Authorization\": \"Basic $BASIC_AUTH_BASE64\"}" + + - name: Run pytest with coverage run: uv run pytest --cov=py_hamt tests/ --cov-report=xml diff --git a/py_hamt/store.py b/py_hamt/store.py index f19c4a6..15961a9 100644 --- a/py_hamt/store.py +++ b/py_hamt/store.py @@ -66,7 +66,7 @@ class IPFSStore(Store): Use IPFS as a backing store for a HAMT. The IDs returned from save and used by load are IPFS CIDs. Save methods use the RPC API but `load` uses the HTTP Gateway, so read-only HAMTs will only access the HTTP Gateway. This allows for connection to remote gateways as well. - + You can write to an authenticated IPFS node by providing credentials in the constructor. The following authentication methods are supported: - Basic Authentication: Provide a tuple of (username, password) to the `basic_auth` parameter. - Bearer Token: Provide a bearer token to the `bearer_token` parameter. @@ -85,7 +85,7 @@ def __init__( basic_auth: tuple[str, str] | None = None, # (username, password) bearer_token: str | None = None, api_key: str | None = None, - api_key_header: str = "X-API-Key" # Customizable API key header + api_key_header: str = "X-API-Key", # Customizable API key header ): self.timeout_seconds = timeout_seconds """ @@ -153,7 +153,7 @@ def save(self, data: bytes, cid_codec: str) -> CID: files={"file": data}, headers=headers, auth=auth, - timeout=self.timeout_seconds + timeout=self.timeout_seconds, ) response.raise_for_status() diff --git a/tests/test_zarr_ipfs.py b/tests/test_zarr_ipfs.py index 60a1521..670e063 100644 --- a/tests/test_zarr_ipfs.py +++ b/tests/test_zarr_ipfs.py @@ -153,10 +153,10 @@ def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): ) hamt = HAMT( store=IPFSStore( - api_key="test", - bearer_token="Test", - basic_auth=("test", "test") - ), transformer_encode=encrypt, transformer_decode=decrypt + api_key="test", bearer_token="Test", basic_auth=("test", "test") + ), + transformer_encode=encrypt, + transformer_decode=decrypt, ) test_ds.to_zarr(store=hamt, mode="w") @@ -166,9 +166,9 @@ def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): # Test with just api key hamt = HAMT( - store=IPFSStore( - api_key="test" - ), transformer_encode=encrypt, transformer_decode=decrypt + store=IPFSStore(api_key="test"), + transformer_encode=encrypt, + transformer_decode=decrypt, ) test_ds.to_zarr(store=hamt, mode="w") @@ -185,4 +185,4 @@ def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): assert ds.temp.sum() == expected_ds.temp.sum() # We should be unable to read precipitation values which are still encrypted with pytest.raises(Exception): - ds.precip.sum() \ No newline at end of file + ds.precip.sum() From 005937849668248d72f8267c2721e164775decbe Mon Sep 17 00:00:00 2001 From: TheGreatAlgo <37487508+TheGreatAlgo@users.noreply.github.com> Date: Fri, 14 Mar 2025 14:35:00 -0400 Subject: [PATCH 07/18] fix: remove authentication attempt --- .github/workflows/run-checks.yaml | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/.github/workflows/run-checks.yaml b/.github/workflows/run-checks.yaml index 3720f53..68c08d4 100644 --- a/.github/workflows/run-checks.yaml +++ b/.github/workflows/run-checks.yaml @@ -25,22 +25,6 @@ jobs: run_daemon: true id: ipfs_setup - - name: Configure IPFS with multiple authentication methods - run: | - # Set CORS headers - ipfs config --json API.HTTPHeaders.Access-Control-Allow-Origin '["*"]' - ipfs config --json Gateway.HTTPHeaders.Access-Control-Allow-Origin '["*"]' - - # Configure authentication methods with test credentials - ipfs config --json Gateway.HTTPHeaders.Authorization "{\"Bearer\": \"test\"}" - ipfs config --json Gateway.HTTPHeaders.X-API-Key "\"test\"" - - # Basic auth with test:test - echo "test:test" > ~/.ipfs/credentials - BASIC_AUTH_BASE64=$(base64 ~/.ipfs/credentials | tr -d '\n') - ipfs config --json Gateway.HTTPHeaders.Basic-Auth "{\"Authorization\": \"Basic $BASIC_AUTH_BASE64\"}" - - - name: Run pytest with coverage run: uv run pytest --cov=py_hamt tests/ --cov-report=xml From 1858c4c47b9762acebe997cc2a893abb1158d605 Mon Sep 17 00:00:00 2001 From: TheGreatAlgo <37487508+TheGreatAlgo@users.noreply.github.com> Date: Fri, 14 Mar 2025 20:29:50 -0400 Subject: [PATCH 08/18] fix: test with nginx --- .github/workflows/run-checks.yaml | 48 +++++++++++++++++++++++++++++++ tests/test_zarr_ipfs.py | 28 ++++++++++++++++-- 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-checks.yaml b/.github/workflows/run-checks.yaml index 68c08d4..c2efa44 100644 --- a/.github/workflows/run-checks.yaml +++ b/.github/workflows/run-checks.yaml @@ -25,6 +25,54 @@ jobs: run_daemon: true id: ipfs_setup + + - name: Install and configure Nginx + run: | + # Install Nginx + sudo apt-get update + sudo apt-get install -y nginx + + # Create Nginx config for reverse proxy with auth + cat < Date: Fri, 14 Mar 2025 20:31:10 -0400 Subject: [PATCH 09/18] fix: run checks --- .github/workflows/run-checks.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/run-checks.yaml b/.github/workflows/run-checks.yaml index c2efa44..052f32f 100644 --- a/.github/workflows/run-checks.yaml +++ b/.github/workflows/run-checks.yaml @@ -61,17 +61,17 @@ jobs: # Test Nginx config sudo nginx -t - - name: Start Nginx and restart IPFS daemon - run: | - # Start Nginx - sudo systemctl start nginx + - name: Start Nginx and restart IPFS daemon + run: | + # Start Nginx + sudo systemctl start nginx - # Restart IPFS daemon to ensure it’s running - ipfs shutdown - ipfs daemon & + # Restart IPFS daemon to ensure it’s running + ipfs shutdown + ipfs daemon & - # Wait for IPFS and Nginx to be ready - sleep 5 + # Wait for IPFS and Nginx to be ready + sleep 5 - name: Run pytest with coverage run: uv run pytest --cov=py_hamt tests/ --cov-report=xml From 9f1db735a66fa808ca40c6c203aceea13e69e883 Mon Sep 17 00:00:00 2001 From: TheGreatAlgo <37487508+TheGreatAlgo@users.noreply.github.com> Date: Fri, 14 Mar 2025 20:32:40 -0400 Subject: [PATCH 10/18] fix: indent --- .github/workflows/run-checks.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/run-checks.yaml b/.github/workflows/run-checks.yaml index 052f32f..ac88fd5 100644 --- a/.github/workflows/run-checks.yaml +++ b/.github/workflows/run-checks.yaml @@ -62,16 +62,16 @@ jobs: sudo nginx -t - name: Start Nginx and restart IPFS daemon - run: | - # Start Nginx - sudo systemctl start nginx + run: | + # Start Nginx + sudo systemctl start nginx - # Restart IPFS daemon to ensure it’s running - ipfs shutdown - ipfs daemon & + # Restart IPFS daemon to ensure it’s running + ipfs shutdown + ipfs daemon & - # Wait for IPFS and Nginx to be ready - sleep 5 + # Wait for IPFS and Nginx to be ready + sleep 5 - name: Run pytest with coverage run: uv run pytest --cov=py_hamt tests/ --cov-report=xml From 20229bb0d232019cc39e623ca9fa601816b1d837 Mon Sep 17 00:00:00 2001 From: TheGreatAlgo <37487508+TheGreatAlgo@users.noreply.github.com> Date: Fri, 14 Mar 2025 20:35:48 -0400 Subject: [PATCH 11/18] fix: test wrong api key --- tests/test_zarr_ipfs.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/test_zarr_ipfs.py b/tests/test_zarr_ipfs.py index 908774d..56212cd 100644 --- a/tests/test_zarr_ipfs.py +++ b/tests/test_zarr_ipfs.py @@ -185,20 +185,17 @@ def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): # xr.testing.assert_identical(loaded_ds, expected_ds) - # Test with just api key + # Test with wrong API Key hamt = HAMT( store=IPFSStore( rpc_uri_stem = "http://127.0.0.1:5002", - api_key="test", + api_key="badKey", ), transformer_encode=encrypt, transformer_decode=decrypt, ) - test_ds.to_zarr(store=hamt, mode="w") - - hamt.make_read_only() - loaded_ds = xr.open_zarr(store=hamt) - xr.testing.assert_identical(loaded_ds, expected_ds) + with pytest.raises(Exception): + test_ds.to_zarr(store=hamt, mode="w") # Now trying to load without a decryptor, xarray should be able to read the metadata and still perform operations on the unencrypted variable print("Attempting to read and print metadata of partially encrypted zarr") From 773397d9e7eef1a178c901b15cff5ebf217f0713 Mon Sep 17 00:00:00 2001 From: TheGreatAlgo <37487508+TheGreatAlgo@users.noreply.github.com> Date: Fri, 14 Mar 2025 20:42:20 -0400 Subject: [PATCH 12/18] fix: three types of auth --- .github/workflows/run-checks.yaml | 23 +++++++++++++-- tests/test_zarr_ipfs.py | 49 +++++++++++++++++++------------ 2 files changed, 50 insertions(+), 22 deletions(-) diff --git a/.github/workflows/run-checks.yaml b/.github/workflows/run-checks.yaml index ac88fd5..3477381 100644 --- a/.github/workflows/run-checks.yaml +++ b/.github/workflows/run-checks.yaml @@ -25,7 +25,6 @@ jobs: run_daemon: true id: ipfs_setup - - name: Install and configure Nginx run: | # Install Nginx @@ -38,10 +37,28 @@ jobs: listen 5002; server_name localhost; + # Default deny unless authenticated + set \$auth_valid 0; + location /api/v0/ { # Enforce X-API-Key for API key auth - if (\$http_x_api_key != "test") { - return 401 "Unauthorized: Invalid or missing X-API-Key"; + if (\$http_x_api_key = "test") { + set \$auth_valid 1; + } + + # Check Bearer token + if (\$http_authorization = "Bearer test") { + set \$auth_valid 1; + } + + # Check Basic Auth (test:test = dGVzdDp0ZXN0) + if (\$http_authorization = "Basic dGVzdDp0ZXN0") { + set \$auth_valid 1; + } + + # Deny if no valid auth method + if (\$auth_valid = 0) { + return 401 "Unauthorized: Invalid or missing authentication"; } # Proxy to IPFS RPC API diff --git a/tests/test_zarr_ipfs.py b/tests/test_zarr_ipfs.py index 56212cd..9a64d0f 100644 --- a/tests/test_zarr_ipfs.py +++ b/tests/test_zarr_ipfs.py @@ -169,33 +169,44 @@ def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): xr.testing.assert_identical(loaded_ds, expected_ds) # # Test with just bearer_token key - # hamt = HAMT( - # store=IPFSStore( - # # api_key="test", - # # basic_auth=("test", "test"), - # bearer_token="test", - # ), - # transformer_encode=encrypt, - # transformer_decode=decrypt, - # ) - # test_ds.to_zarr(store=hamt, mode="w") - - # hamt.make_read_only() - # loaded_ds = xr.open_zarr(store=hamt) - # xr.testing.assert_identical(loaded_ds, expected_ds) + hamt = HAMT( + store=IPFSStore( + bearer_token="test", + ), + transformer_encode=encrypt, + transformer_decode=decrypt, + ) + test_ds.to_zarr(store=hamt, mode="w") + hamt.make_read_only() + loaded_ds = xr.open_zarr(store=hamt) + xr.testing.assert_identical(loaded_ds, expected_ds) - # Test with wrong API Key + # # Test with just basic auth hamt = HAMT( - store=IPFSStore( - rpc_uri_stem = "http://127.0.0.1:5002", - api_key="badKey", + store=IPFSStore( + basic_auth=("test", "test"), ), transformer_encode=encrypt, transformer_decode=decrypt, ) + test_ds.to_zarr(store=hamt, mode="w") + + hamt.make_read_only() + loaded_ds = xr.open_zarr(store=hamt) + xr.testing.assert_identical(loaded_ds, expected_ds) + + + # Test with wrong API Key with pytest.raises(Exception): - test_ds.to_zarr(store=hamt, mode="w") + hamt = HAMT( + store=IPFSStore( + rpc_uri_stem = "http://127.0.0.1:5002", + api_key="badKey", + ), + transformer_encode=encrypt, + transformer_decode=decrypt, + ) # Now trying to load without a decryptor, xarray should be able to read the metadata and still perform operations on the unencrypted variable print("Attempting to read and print metadata of partially encrypted zarr") From ca1e67a61be1493164aeef65f0a032ceff459ec5 Mon Sep 17 00:00:00 2001 From: TheGreatAlgo <37487508+TheGreatAlgo@users.noreply.github.com> Date: Fri, 14 Mar 2025 20:44:57 -0400 Subject: [PATCH 13/18] fix: test wrong --- tests/test_zarr_ipfs.py | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/tests/test_zarr_ipfs.py b/tests/test_zarr_ipfs.py index 9a64d0f..31923e0 100644 --- a/tests/test_zarr_ipfs.py +++ b/tests/test_zarr_ipfs.py @@ -168,10 +168,23 @@ def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): loaded_ds = xr.open_zarr(store=hamt) xr.testing.assert_identical(loaded_ds, expected_ds) - # # Test with just bearer_token key + # Test with wrong API Key + with pytest.raises(Exception): + hamt = HAMT( + store=IPFSStore( + rpc_uri_stem = "http://127.0.0.1:5002", + api_key="badKey", + ), + transformer_encode=encrypt, + transformer_decode=decrypt, + ) + + + # Test with just bearer_token key hamt = HAMT( store=IPFSStore( bearer_token="test", + rpc_uri_stem = "http://127.0.0.1:5002", ), transformer_encode=encrypt, transformer_decode=decrypt, @@ -182,10 +195,22 @@ def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): loaded_ds = xr.open_zarr(store=hamt) xr.testing.assert_identical(loaded_ds, expected_ds) - # # Test with just basic auth + # Test with wrong bearer + with pytest.raises(Exception): + hamt = HAMT( + store=IPFSStore( + bearer_token="wrongBearer", + rpc_uri_stem = "http://127.0.0.1:5002", + ), + transformer_encode=encrypt, + transformer_decode=decrypt, + ) + + # Test with just basic auth hamt = HAMT( store=IPFSStore( basic_auth=("test", "test"), + rpc_uri_stem = "http://127.0.0.1:5002", ), transformer_encode=encrypt, transformer_decode=decrypt, @@ -196,13 +221,12 @@ def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): loaded_ds = xr.open_zarr(store=hamt) xr.testing.assert_identical(loaded_ds, expected_ds) - - # Test with wrong API Key + # Test with wrong basic auth with pytest.raises(Exception): hamt = HAMT( - store=IPFSStore( - rpc_uri_stem = "http://127.0.0.1:5002", - api_key="badKey", + store=IPFSStore( + basic_auth=("wrong", "wrong"), + rpc_uri_stem = "http://127.0.0.1:5002", ), transformer_encode=encrypt, transformer_decode=decrypt, From 40fadc154da28eaf06d30e1a5f64921c0407854c Mon Sep 17 00:00:00 2001 From: TheGreatAlgo <37487508+TheGreatAlgo@users.noreply.github.com> Date: Fri, 14 Mar 2025 20:46:35 -0400 Subject: [PATCH 14/18] fix: file reformat --- tests/test_zarr_ipfs.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/tests/test_zarr_ipfs.py b/tests/test_zarr_ipfs.py index 31923e0..eb1d425 100644 --- a/tests/test_zarr_ipfs.py +++ b/tests/test_zarr_ipfs.py @@ -156,7 +156,7 @@ def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): hamt = HAMT( store=IPFSStore( # Reverse proxy on port 5002 - rpc_uri_stem = "http://127.0.0.1:5002", + rpc_uri_stem="http://127.0.0.1:5002", api_key="test", ), transformer_encode=encrypt, @@ -171,20 +171,19 @@ def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): # Test with wrong API Key with pytest.raises(Exception): hamt = HAMT( - store=IPFSStore( - rpc_uri_stem = "http://127.0.0.1:5002", + store=IPFSStore( + rpc_uri_stem="http://127.0.0.1:5002", api_key="badKey", ), transformer_encode=encrypt, transformer_decode=decrypt, ) - # Test with just bearer_token key hamt = HAMT( - store=IPFSStore( + store=IPFSStore( bearer_token="test", - rpc_uri_stem = "http://127.0.0.1:5002", + rpc_uri_stem="http://127.0.0.1:5002", ), transformer_encode=encrypt, transformer_decode=decrypt, @@ -198,9 +197,9 @@ def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): # Test with wrong bearer with pytest.raises(Exception): hamt = HAMT( - store=IPFSStore( + store=IPFSStore( bearer_token="wrongBearer", - rpc_uri_stem = "http://127.0.0.1:5002", + rpc_uri_stem="http://127.0.0.1:5002", ), transformer_encode=encrypt, transformer_decode=decrypt, @@ -208,9 +207,9 @@ def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): # Test with just basic auth hamt = HAMT( - store=IPFSStore( + store=IPFSStore( basic_auth=("test", "test"), - rpc_uri_stem = "http://127.0.0.1:5002", + rpc_uri_stem="http://127.0.0.1:5002", ), transformer_encode=encrypt, transformer_decode=decrypt, @@ -224,9 +223,9 @@ def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): # Test with wrong basic auth with pytest.raises(Exception): hamt = HAMT( - store=IPFSStore( + store=IPFSStore( basic_auth=("wrong", "wrong"), - rpc_uri_stem = "http://127.0.0.1:5002", + rpc_uri_stem="http://127.0.0.1:5002", ), transformer_encode=encrypt, transformer_decode=decrypt, From 933650cd8aa7d97dea764a7839e340fe48ac81a2 Mon Sep 17 00:00:00 2001 From: Abid Sikder <41392423+abidsikder@users.noreply.github.com> Date: Thu, 20 Mar 2025 13:19:34 -0400 Subject: [PATCH 15/18] simplify auth passing in store --- py_hamt/store.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/py_hamt/store.py b/py_hamt/store.py index 15961a9..13cb352 100644 --- a/py_hamt/store.py +++ b/py_hamt/store.py @@ -145,14 +145,13 @@ def save(self, data: bytes, cid_codec: str) -> CID: # Prepare request parameters url = f"{self.rpc_uri_stem}/api/v0/add?hash={self.hasher}&pin={pin_string}" - auth = self.basic_auth if self.basic_auth else None # Make the request with appropriate authentication response = requests.post( url, files={"file": data}, headers=headers, - auth=auth, + auth=self.basic_auth, timeout=self.timeout_seconds, ) response.raise_for_status() From 7fbc90113a54329761adb426f01e1e615bc0fa8e Mon Sep 17 00:00:00 2001 From: Abid Sikder <41392423+abidsikder@users.noreply.github.com> Date: Thu, 20 Mar 2025 13:19:51 -0400 Subject: [PATCH 16/18] shorten authenticated gateway test --- tests/test_zarr_ipfs.py | 120 ++++++++++------------------------------ 1 file changed, 28 insertions(+), 92 deletions(-) diff --git a/tests/test_zarr_ipfs.py b/tests/test_zarr_ipfs.py index eb1d425..eaebc02 100644 --- a/tests/test_zarr_ipfs.py +++ b/tests/test_zarr_ipfs.py @@ -12,7 +12,7 @@ from py_hamt import HAMT, IPFSStore, create_zarr_encryption_transformers -@pytest.fixture +@pytest.fixture(scope="module") def random_zarr_dataset(): """Creates a random xarray Dataset and saves it to a temporary zarr store. @@ -139,105 +139,41 @@ def test_encryption(random_zarr_dataset: tuple[str, xr.Dataset]): ds.precip.sum() +# This test assumes the other IPFSStore zarr ipfs tests are working fine, so if other things are breaking check those first def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): - zarr_path, expected_ds = random_zarr_dataset - test_ds = xr.open_zarr(zarr_path) - - with pytest.raises(ValueError, match="Encryption key is not 32 bytes"): - create_zarr_encryption_transformers(bytes(), bytes()) - - encryption_key = bytes(32) - # Encrypt only precipitation, not temperature - encrypt, decrypt = create_zarr_encryption_transformers( - encryption_key, header="sample-header".encode(), exclude_vars=["temp"] - ) + zarr_path, test_ds = random_zarr_dataset + + def write_and_check(store: IPFSStore) -> bool: + store.rpc_uri_stem = "http://127.0.0.1:5002" # 5002 is the port configured in the run-checks.yaml actions file for nginx to serve the proxy on + hamt = HAMT(store=store) + test_ds.to_zarr(store=hamt, mode="w") + loaded_ds = xr.open_zarr(store=hamt) + try: + xr.testing.assert_identical(test_ds, loaded_ds) + return True + except AssertionError as _: + return False # Test with API Key - hamt = HAMT( - store=IPFSStore( - # Reverse proxy on port 5002 - rpc_uri_stem="http://127.0.0.1:5002", - api_key="test", - ), - transformer_encode=encrypt, - transformer_decode=decrypt, - ) - test_ds.to_zarr(store=hamt, mode="w") - - hamt.make_read_only() - loaded_ds = xr.open_zarr(store=hamt) - xr.testing.assert_identical(loaded_ds, expected_ds) + api_key_store = IPFSStore(api_key="test") + assert write_and_check(api_key_store) - # Test with wrong API Key - with pytest.raises(Exception): - hamt = HAMT( - store=IPFSStore( - rpc_uri_stem="http://127.0.0.1:5002", - api_key="badKey", - ), - transformer_encode=encrypt, - transformer_decode=decrypt, - ) + # Test that wrong API Key fails + bad_api_key_store = IPFSStore(api_key="badKey") + assert not write_and_check(bad_api_key_store) - # Test with just bearer_token key - hamt = HAMT( - store=IPFSStore( - bearer_token="test", - rpc_uri_stem="http://127.0.0.1:5002", - ), - transformer_encode=encrypt, - transformer_decode=decrypt, - ) - test_ds.to_zarr(store=hamt, mode="w") - - hamt.make_read_only() - loaded_ds = xr.open_zarr(store=hamt) - xr.testing.assert_identical(loaded_ds, expected_ds) + # Test just bearer token + bearer_ipfs_store = IPFSStore(bearer_token="test") + assert write_and_check(bearer_ipfs_store) # Test with wrong bearer - with pytest.raises(Exception): - hamt = HAMT( - store=IPFSStore( - bearer_token="wrongBearer", - rpc_uri_stem="http://127.0.0.1:5002", - ), - transformer_encode=encrypt, - transformer_decode=decrypt, - ) + bad_bearer_store = IPFSStore(bearer_token="wrongBearer") + assert not write_and_check(bad_bearer_store) # Test with just basic auth - hamt = HAMT( - store=IPFSStore( - basic_auth=("test", "test"), - rpc_uri_stem="http://127.0.0.1:5002", - ), - transformer_encode=encrypt, - transformer_decode=decrypt, - ) - test_ds.to_zarr(store=hamt, mode="w") - - hamt.make_read_only() - loaded_ds = xr.open_zarr(store=hamt) - xr.testing.assert_identical(loaded_ds, expected_ds) + basic_auth_store = IPFSStore(basic_auth=("test", "test")) + assert write_and_check(basic_auth_store) # Test with wrong basic auth - with pytest.raises(Exception): - hamt = HAMT( - store=IPFSStore( - basic_auth=("wrong", "wrong"), - rpc_uri_stem="http://127.0.0.1:5002", - ), - transformer_encode=encrypt, - transformer_decode=decrypt, - ) - - # Now trying to load without a decryptor, xarray should be able to read the metadata and still perform operations on the unencrypted variable - print("Attempting to read and print metadata of partially encrypted zarr") - ds = xr.open_zarr( - store=HAMT(store=IPFSStore(), root_node_id=hamt.root_node_id, read_only=True) - ) - print(ds) - assert ds.temp.sum() == expected_ds.temp.sum() - # We should be unable to read precipitation values which are still encrypted - with pytest.raises(Exception): - ds.precip.sum() + bad_basic_auth_store = IPFSStore(basic_auth=("wrong", "wrong")) + assert not write_and_check(bad_basic_auth_store) From b30900f0cc006b5a30b6c1e025424ddcc696b5a9 Mon Sep 17 00:00:00 2001 From: Abid Sikder <41392423+abidsikder@users.noreply.github.com> Date: Thu, 20 Mar 2025 13:22:35 -0400 Subject: [PATCH 17/18] broaden errors caught to web server errors --- tests/test_zarr_ipfs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_zarr_ipfs.py b/tests/test_zarr_ipfs.py index eaebc02..30189ad 100644 --- a/tests/test_zarr_ipfs.py +++ b/tests/test_zarr_ipfs.py @@ -151,7 +151,7 @@ def write_and_check(store: IPFSStore) -> bool: try: xr.testing.assert_identical(test_ds, loaded_ds) return True - except AssertionError as _: + except Exception as _: return False # Test with API Key From dba091f8e013eee6869d0aef3fc255bfe69799b8 Mon Sep 17 00:00:00 2001 From: Abid Sikder <41392423+abidsikder@users.noreply.github.com> Date: Thu, 20 Mar 2025 13:24:37 -0400 Subject: [PATCH 18/18] broaden where the exception can be caught --- tests/test_zarr_ipfs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_zarr_ipfs.py b/tests/test_zarr_ipfs.py index 30189ad..0ccab52 100644 --- a/tests/test_zarr_ipfs.py +++ b/tests/test_zarr_ipfs.py @@ -144,11 +144,11 @@ def test_authenticated_gateway(random_zarr_dataset: tuple[str, xr.Dataset]): zarr_path, test_ds = random_zarr_dataset def write_and_check(store: IPFSStore) -> bool: - store.rpc_uri_stem = "http://127.0.0.1:5002" # 5002 is the port configured in the run-checks.yaml actions file for nginx to serve the proxy on - hamt = HAMT(store=store) - test_ds.to_zarr(store=hamt, mode="w") - loaded_ds = xr.open_zarr(store=hamt) try: + store.rpc_uri_stem = "http://127.0.0.1:5002" # 5002 is the port configured in the run-checks.yaml actions file for nginx to serve the proxy on + hamt = HAMT(store=store) + test_ds.to_zarr(store=hamt, mode="w") + loaded_ds = xr.open_zarr(store=hamt) xr.testing.assert_identical(test_ds, loaded_ds) return True except Exception as _: