diff --git a/common/auth/README.md b/common/auth/README.md index ac786f37..27d24f61 100644 --- a/common/auth/README.md +++ b/common/auth/README.md @@ -106,16 +106,26 @@ The scope is defined by the resource server. To see the definition for veda, che **Returns:** -- `bool`: `True` if permission granted, `False` otherwise +- `bool`: `True` if permission granted + +**Raises:** + +- `TokenError`: Access token expired or invalid (401) +- `PermissionDeniedError`: User lacks permission for the resource/scope (403) +- `ResourceNotFoundError`: Resource (tenant) does not exist (400 invalid_resource) **Example:** ```python -can_create = pdp_client.check_permission( - access_token=token, - resource_id="collection:my-tenant", - scope="create" -) +try: + pdp_client.check_permission( + access_token=token, + resource_id="collection:my-tenant", + scope="create" + ) + # permission granted +except PermissionDeniedError: + # permission denied ``` #### `get_rpt(access_token, resources)` diff --git a/common/auth/veda_auth/keycloak_client.py b/common/auth/veda_auth/keycloak_client.py index e7b301b1..b7978364 100644 --- a/common/auth/veda_auth/keycloak_client.py +++ b/common/auth/veda_auth/keycloak_client.py @@ -7,14 +7,75 @@ import base64 import json import logging -from typing import Any, Dict, List, Optional, Tuple -from urllib.parse import urlencode +from typing import Any, Dict, List, Optional, Tuple, Union +from urllib.parse import urlencode, urlparse import httpx logger = logging.getLogger(__name__) +class TokenError(Exception): + """Raised when the access token is expired, revoked, or invalid. + Keycloak returns HTTP 401 + """ + + def __init__(self, detail: str = "Access token is expired or invalid"): + """To use when there is a token error for RPT call""" + self.detail = detail + super().__init__(detail) + + +class ResourceNotFoundError(Exception): + """Raised when Keycloak returns HTTP 400 with invalid_resource error. + This means the requested resource (tenant) does not exist in the + resource server + """ + + def __init__(self, resource_id: str): + """Initialize with the resource ID that was not found""" + self.resource_id = resource_id + super().__init__(f"Resource not found: {resource_id}") + + +class PermissionDeniedError(Exception): + """Raised when Keycloak returns HTTP 403 forbidden which means + the user does not have permission for the requested resource and scope. + """ + + def __init__(self, resource_id: str, scope: Optional[str] = None): + """Initialize with the resource ID and optional scope that was denied""" + self.resource_id = resource_id + self.scope = scope + super().__init__(f"Permission denied: {resource_id}") + + +def parse_keycloak_from_openid_url( + openid_configuration_url: Union[str, Any] +) -> Tuple[str, str]: + """Extract Keycloak base URL and realm from an OpenID discovery URL such as https:///realms//.well-known/openid-configuration""" + if not openid_configuration_url: + raise ValueError("Missing or empty OpenID configuration URL") + url_str = str(openid_configuration_url).strip() + + parsed = urlparse(url_str) + path = (parsed.path or "").rstrip("/") + + if "/realms/" not in path: + raise ValueError( + "OpenID configuration URL must contain /realms// " + "(e.g. .../realms/my-realm/.well-known/openid-configuration). " + f"Got path: {repr(path)}" + ) + + realm = path.split("/realms/")[-1].split("/")[0] + if not realm: + raise ValueError("Could not extract realm from OpenID configuration URL") + + keycloak_url = f"{parsed.scheme}://{parsed.netloc}" + return keycloak_url, realm + + def _add_base64_padding(payload: str) -> str: """Add padding to base64 string if needed @@ -157,12 +218,71 @@ def _extract_permissions_from_jwt(self, jwt_token: str) -> List[Dict[str, Any]]: logger.warning(f"Failed to extract permissions from JWT: {e}") return [] + def _resolve_permissions( + self, rpt_response: Dict[str, Any] + ) -> List[Dict[str, Any]]: + """Extract permissions from an RPT response, and fall back to the JWT""" + permissions = rpt_response.get("permissions", []) + if not permissions: + rpt_jwt = rpt_response.get("access_token") + if rpt_jwt: + permissions = self._extract_permissions_from_jwt(rpt_jwt) + logger.debug(f"Extracted {len(permissions)} permissions from RPT JWT") + return permissions + + def _has_matching_permission( + self, + permissions: List[Dict[str, Any]], + resource_id: str, + scope: str, + ) -> bool: + """Return True if permissions contain a grant for resource_id and scope + + See https://www.keycloak.org/docs/latest/authorization_services/#_service_rpt_overview + """ + for permission in permissions: + rsname = permission.get("rsname") or permission.get("resource_id") + if rsname == resource_id and scope in permission.get("scopes", []): + return True + return False + + def _handle_rpt_http_error( + self, + error: httpx.HTTPStatusError, + resource_id: str, + scope: Optional[str] = None, + ) -> None: + """Translate an HTTPStatusError from get_rpt + + Raises TokenError for 401, PermissionDeniedError for 403, + ResourceNotFoundError for 400 invalid_resource. + Re-raises unhandled status codes. + """ + if error.response.status_code == 401: + logger.warning("Token rejected (401): %s", error.response.text) + raise TokenError( + "Access token is expired or invalid. Please re-authenticate." + ) from error + if error.response.status_code == 403: + raise PermissionDeniedError(resource_id=resource_id, scope=scope) from error + if error.response.status_code == 400: + try: + error_body = error.response.json() + except Exception: + error_body = {} + if error_body.get("error") == "invalid_resource": + raise ResourceNotFoundError(resource_id=resource_id) from error + logger.error( + f"Permission check failed: {error.response.status_code} {error.response.text}" + ) + raise error + def check_permission( self, access_token: str, resource_id: str, scope: str, - ) -> bool: + ): """Check if user has permission for a resource and scope Args: @@ -183,37 +303,10 @@ def check_permission( } ], ) - - permissions = rpt_response.get("permissions", []) - if not permissions: - rpt_jwt = rpt_response.get("access_token") - if rpt_jwt: - permissions = self._extract_permissions_from_jwt(rpt_jwt) - logger.debug( - f"Extracted {len(permissions)} permissions from RPT JWT" - ) - - # https://www.keycloak.org/docs/latest/authorization_services/#_service_rpt_overview - for permission in permissions: - # Check rsid (RPT token format), resource_id (introspection format), or rsname (resource name) - resource_identifier = ( - permission.get("rsid") - or permission.get("resource_id") - or permission.get("rsname") - ) - if resource_identifier == resource_id: - scopes = permission.get("scopes", []) - if scope in scopes: - return True - - return False + permissions = self._resolve_permissions(rpt_response) + return self._has_matching_permission(permissions, resource_id, scope) except httpx.HTTPStatusError as e: - if e.response.status_code in (401, 403): - return False - logger.error( - f"Permission check failed: {e.response.status_code} {e.response.text}" - ) - raise + self._handle_rpt_http_error(e, resource_id, scope=scope) except Exception as e: logger.error(f"Unexpected error checking permission: {e}") raise diff --git a/common/auth/veda_auth/pep_middleware.py b/common/auth/veda_auth/pep_middleware.py new file mode 100644 index 00000000..ce90a238 --- /dev/null +++ b/common/auth/veda_auth/pep_middleware.py @@ -0,0 +1,203 @@ +"""Policy Enforcement Point (PEP) middleware""" + +import logging +import re +from dataclasses import dataclass +from typing import Awaitable, Callable, Optional, Sequence + +from veda_auth.keycloak_client import ( + KeycloakPDPClient, + PermissionDeniedError, + ResourceNotFoundError, + TokenError, +) +from veda_auth.resource_extractors import COLLECTIONS_CREATE_PATH_RE + +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.requests import Request +from starlette.responses import JSONResponse, Response +from starlette.types import ASGIApp + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class ProtectedRoute: + """Routes protected by policy decision point + + path_re: regex pattern applied to request path + method: HTTP method + scope: Keycloak resource scope name to check (e.g. "create", "update", "delete") + """ + + path_re: str + method: str + scope: str + + +DEFAULT_PROTECTED_ROUTES: Sequence[ProtectedRoute] = ( + ProtectedRoute(path_re=COLLECTIONS_CREATE_PATH_RE, method="POST", scope="create"), +) + + +def pep_error_response( + status_code: int, + detail: str, + headers: Optional[dict] = None, +) -> JSONResponse: + """Abstracted error response function""" + return JSONResponse( + status_code=status_code, + content={"detail": detail}, + headers=headers or {}, + ) + + +class PEPMiddleware(BaseHTTPMiddleware): + """Middleware that enforces UMA authorization""" + + def __init__( + self, + app: ASGIApp, + *, + pdp_client: Callable[[], KeycloakPDPClient], + resource_extractor: Callable[[Request], Awaitable[Optional[str]]], + protected_routes: Optional[Sequence[ProtectedRoute]] = None, + ): + """Configure PEP middleware with a PDP client, resource extractor, and protected routes.""" + super().__init__(app) + self._get_pdp_client = pdp_client + self._extract_resource_id = resource_extractor + routes = ( + protected_routes + if protected_routes is not None + else DEFAULT_PROTECTED_ROUTES + ) + self._compiled = [ + (re.compile(r.path_re), r.method.upper(), r.scope) for r in routes + ] + + def _get_matching_scope_and_route( + self, request: Request + ) -> Optional[tuple[str, str]]: + """Return (scope, method) for the route that matches, otherwise return None""" + path = request.url.path.rstrip("/") or "/" + method = request.method.upper() + for pattern, route_method, scope in self._compiled: + if route_method == method and pattern.search(path): + return (scope, route_method) + return None + + def _get_bearer_token(self, request: Request) -> Optional[str]: + """Extract the Bearer token from the Authorization header""" + auth = request.headers.get("Authorization") + if not auth or not auth.startswith("Bearer "): + return None + return auth[7:].strip() + + async def dispatch(self, request: Request, call_next: Callable) -> Response: + """Check UMA authorization for protected routes, pass through otherwise.""" + matched_request = self._get_matching_scope_and_route(request) + if matched_request is None: + logger.debug( + "PEP: no protected route match for %s %s... continuing", + request.method, + request.url.path, + ) + return await call_next(request) + + scope, _method = matched_request + logger.info( + "PEP: matched protected route %s %s and scope=%s", + _method, + request.url.path, + scope, + ) + + pdp_client = self._get_pdp_client() + + token = self._get_bearer_token(request) + if not token: + logger.warning( + "PEP: missing Bearer token for %s %s", _method, request.url.path + ) + return pep_error_response( + 401, + "Missing or invalid Authorization header (Bearer token required)", + {"WWW-Authenticate": "Bearer"}, + ) + + resource_id = await self._extract_resource_id(request) + if not resource_id: + logger.warning("PEP: no resource ID for %s %s", _method, request.url.path) + return pep_error_response( + 403, "Could not determine resource for authorization" + ) + + logger.info( + "PEP: checking permission resource_id=%s, scope=%s, path=%s", + resource_id, + scope, + request.url.path, + ) + + try: + pdp_client.check_permission( + access_token=token, + resource_id=resource_id, + scope=scope, + ) + except TokenError as e: + logger.warning( + "PEP: token error for %s %s: %s", _method, request.url.path, e.detail + ) + return pep_error_response( + 401, e.detail, {"WWW-Authenticate": 'Bearer error="invalid_token"'} + ) + except ResourceNotFoundError as e: + logger.warning( + "PEP: resource not found for %s %s: %s", + _method, + request.url.path, + e.resource_id, + ) + return pep_error_response( + 404, + f"The requested tenant resource ({e.resource_id}) does not exist. " + "Verify that the tenant name is correct.", + ) + except PermissionDeniedError as e: + logger.warning( + "PEP: denied %s %s resource_id=%s, scope=%s", + _method, + request.url.path, + e.resource_id, + e.scope, + ) + return pep_error_response( + 403, + ( + f"You do not have permission to {e.scope or scope} this resource " + f"({e.resource_id}). Verify that your user belongs to " + f"the required tenant and role needed." + ), + ) + except Exception as e: + logger.exception( + "PEP: Keycloak check failed for resource_id=%s scope=%s: %s", + resource_id, + scope, + e, + ) + return pep_error_response( + 502, "Authorization service temporarily unavailable" + ) + + logger.info( + "PEP: authorized for resource_id=%s, scope=%s, path=%s", + resource_id, + scope, + request.url.path, + ) + + return await call_next(request) diff --git a/common/auth/veda_auth/resource_extractors.py b/common/auth/veda_auth/resource_extractors.py index 79a5a013..9901f713 100644 --- a/common/auth/veda_auth/resource_extractors.py +++ b/common/auth/veda_auth/resource_extractors.py @@ -22,13 +22,17 @@ STAC_COLLECTION_TEMPLATE = "stac:collection:{}:*" STAC_ITEM_TEMPLATE = "stac:item:{}:*" -_COLLECTIONS_CREATE_PATH_PATTERN = re.compile(r".*?/collections$") -_COLLECTIONS_PATH_PATTERN = re.compile(r".*?/collections/([^/]+)$") -_COLLECTIONS_ITEM_PATH_PATTERN = re.compile(r".*?/collections/([^/]+)/items/([^/]+)$") -_COLLECTIONS_ITEMS_PATH_PATTERN = re.compile(r".*?/collections/([^/]+)/items$") -_COLLECTIONS_BULK_ITEMS_PATH_PATTERN = re.compile( - r".*?/collections/([^/]+)/bulk_items$" -) +COLLECTIONS_CREATE_PATH_RE = r".*?/collections$" +COLLECTIONS_PATH_RE = r".*?/collections/([^/]+)$" +COLLECTIONS_ITEM_PATH_RE = r".*?/collections/([^/]+)/items/([^/]+)$" +COLLECTIONS_ITEMS_PATH_RE = r".*?/collections/([^/]+)/items$" +COLLECTIONS_BULK_ITEMS_PATH_RE = r".*?/collections/([^/]+)/bulk_items$" + +_COLLECTIONS_CREATE_PATH_PATTERN = re.compile(COLLECTIONS_CREATE_PATH_RE) +_COLLECTIONS_PATH_PATTERN = re.compile(COLLECTIONS_PATH_RE) +_COLLECTIONS_ITEM_PATH_PATTERN = re.compile(COLLECTIONS_ITEM_PATH_RE) +_COLLECTIONS_ITEMS_PATH_PATTERN = re.compile(COLLECTIONS_ITEMS_PATH_RE) +_COLLECTIONS_BULK_ITEMS_PATH_PATTERN = re.compile(COLLECTIONS_BULK_ITEMS_PATH_RE) def _stac_collection_resource_id(request: Request) -> str: diff --git a/ingest_api/runtime/src/main.py b/ingest_api/runtime/src/main.py index 574b93f7..9ec6f1da 100644 --- a/ingest_api/runtime/src/main.py +++ b/ingest_api/runtime/src/main.py @@ -1,3 +1,5 @@ +import logging + import src.dependencies as dependencies import src.schemas as schemas import src.services as services @@ -8,13 +10,17 @@ from src.doc import DESCRIPTION from src.monitoring import ObservabilityMiddleware, logger, metrics, tracer from src.utils import get_keycloak_client_credentials -from veda_auth.keycloak_client import KeycloakPDPClient +from veda_auth.keycloak_client import KeycloakPDPClient, parse_keycloak_from_openid_url +from veda_auth.pep_middleware import PEPMiddleware +from veda_auth.resource_extractors import extract_ingest_resource_id from fastapi import Depends, FastAPI, HTTPException, Security from fastapi.exceptions import RequestValidationError from fastapi.responses import JSONResponse from starlette.requests import Request +pep_logger = logging.getLogger(__name__) + app = FastAPI( title="VEDA Ingestion API", description=DESCRIPTION, @@ -229,33 +235,11 @@ def _extract_access_token(request: Request) -> str: def _parse_keycloak_config() -> tuple[str, str]: - """Extract Keycloak URL and realm from OIDC configuration URL""" - oidc_url = ( - str(auth_settings.openid_configuration_url) - if auth_settings.openid_configuration_url - else None - ) - if not oidc_url: - raise HTTPException( - status_code=503, - detail="Missing OPENID_CONFIGURATION_URL", - ) - - if "/realms/" not in oidc_url: - raise HTTPException( - status_code=503, - detail="Invalid OpenID configuration URL format", - ) - - keycloak_url = oidc_url.split("/realms/")[0] - realm_parts = oidc_url.split("/realms/") - if len(realm_parts) < 2: - raise HTTPException( - status_code=503, - detail="Could not extract realm from OpenID configuration URL", - ) - realm = realm_parts[1].split("/")[0] - return keycloak_url, realm + """Extract Keycloak URL and realm from OIDC configuration URL.""" + try: + return parse_keycloak_from_openid_url(auth_settings.openid_configuration_url) + except ValueError as e: + raise HTTPException(status_code=503, detail=str(e)) from e def _get_keycloak_credentials() -> tuple[str, str]: @@ -288,6 +272,39 @@ def _get_keycloak_credentials() -> tuple[str, str]: ) from e +def _get_keycloak_pdp_client() -> KeycloakPDPClient: + """Build Keycloak PDP client for PEP middleware from UMA resource server credentials""" + keycloak_url, realm = _parse_keycloak_config() + client_id, client_secret = _get_keycloak_credentials() + return KeycloakPDPClient( + keycloak_url=keycloak_url, + realm=realm, + client_id=client_id, + client_secret=client_secret, + ) + + +if ( + auth_settings.openid_configuration_url + and settings.keycloak_uma_resource_server_client_secret_name +): + pep_logger.info( + "PEP middleware enabled for Ingest API, secret_name=%s", + settings.keycloak_uma_resource_server_client_secret_name, + ) + app.add_middleware( + PEPMiddleware, + pdp_client=_get_keycloak_pdp_client, + resource_extractor=extract_ingest_resource_id, + ) +else: + pep_logger.info( + "PEP middleware disabled for Ingest API, openid_url=%s, secret_name=%s", + bool(auth_settings.openid_configuration_url), + bool(settings.keycloak_uma_resource_server_client_secret_name), + ) + + @app.get( "/auth/tenants/writable", response_model=schemas.TenantAccessResponse, tags=["Auth"] ) diff --git a/ingest_api/runtime/tests/test_pep_integration.py b/ingest_api/runtime/tests/test_pep_integration.py new file mode 100644 index 00000000..daf69020 --- /dev/null +++ b/ingest_api/runtime/tests/test_pep_integration.py @@ -0,0 +1,200 @@ +"""Integration tests for PEP middleware on Ingest API's POST /collections endpoint""" + +import importlib +import os +import uuid +from typing import Optional +from unittest.mock import MagicMock, patch + +import pytest +from veda_auth.keycloak_client import PermissionDeniedError, ResourceNotFoundError + +from fastapi.testclient import TestClient + +VALID_COLLECTION_TEMPLATE = { + "type": "Collection", + "title": "Test Collection for PEP", + "links": [], + "description": "Integration test collection for PEP middleware", + "extent": { + "spatial": {"bbox": [[-180, -90, 180, 90]]}, + "temporal": {"interval": [["2020-01-01T00:00:00Z", None]]}, + }, + "license": "MIT", + "stac_version": "1.0.0", + "stac_extensions": [], +} + +COLLECTIONS_ENDPOINT = "/collections" + +MOCK_KEYCLOAK_SECRET = { + "id": "test-uma-client", + "secret": "test-uma-secret", +} + + +@pytest.fixture(autouse=True) +def pep_environ(test_environ): + """Set UMA env vars""" + os.environ[ + "KEYCLOAK_UMA_RESOURCE_SERVER_CLIENT_SECRET_NAME" + ] = "test/keycloak-uma-secret" + yield + os.environ.pop("KEYCLOAK_UMA_RESOURCE_SERVER_CLIENT_SECRET_NAME", None) + + +@pytest.fixture +def mock_pdp_client(): + """A mock PDP client with mocked check_permission function""" + client = MagicMock() + client.check_permission = MagicMock(return_value=True) + return client + + +@pytest.fixture +def pep_app(mock_pdp_client, mock_ssm_parameter_store): + """Reload the Ingest app with PEP middleware enabled and mocked dependencies""" + import src.auth + import src.config + import src.main + + # reload to re-read the environment + importlib.reload(src.config) + importlib.reload(src.auth) + + with patch( + "src.utils.get_keycloak_client_credentials", + return_value=MOCK_KEYCLOAK_SECRET, + ), patch( + "veda_auth.keycloak_client.KeycloakPDPClient", + return_value=mock_pdp_client, + ), patch( + "src.collection_publisher.CollectionPublisher.ingest", + ): + # reload now that we've patched the mocked dependencies + importlib.reload(src.main) + app = src.main.app + yield app + + # restore original module state + importlib.reload(src.config) + importlib.reload(src.auth) + importlib.reload(src.main) + + +@pytest.fixture +def pep_client(pep_app): + """TestClient wrapping the PEP enabled Ingest app""" + return TestClient(pep_app) + + +def _collection(tenant: Optional[str] = None) -> dict: + """Builds a valid collection body""" + body = dict(VALID_COLLECTION_TEMPLATE) + body["id"] = f"pep-test-{uuid.uuid4().hex[:8]}" + if tenant: + body["eic:tenant"] = tenant + return body + + +class TestIngestPEPIntegration: + """Integration tests for PEP middleware on Ingest API POST /collections endpoint""" + + def test_post_collection_no_token_returns_401(self, pep_client): + """POST /collections without Authorization header should return 401.""" + response = pep_client.post(COLLECTIONS_ENDPOINT, json=_collection()) + assert response.status_code == 401 + assert response.headers.get("www-authenticate") == "Bearer" + + def test_post_collection_authorized_succeeds(self, pep_client, mock_pdp_client): + """POST /collections with valid Bearer and PDP allows should succeed""" + mock_pdp_client.check_permission.return_value = True + + response = pep_client.post( + COLLECTIONS_ENDPOINT, + json=_collection(), + headers={"Authorization": "Bearer fake-valid-token"}, + ) + assert response.status_code == 201 + + mock_pdp_client.check_permission.assert_called_once() + call_kwargs = mock_pdp_client.check_permission.call_args + assert (call_kwargs.kwargs.get("access_token")) == "fake-valid-token" + assert (call_kwargs.kwargs.get("scope")) == "create" + + def test_post_collection_denied_returns_403(self, pep_client, mock_pdp_client): + """POST /collections with valid Bearer where PDP denies should return 403""" + mock_pdp_client.check_permission.side_effect = PermissionDeniedError( + resource_id="stac:collection:test", scope="create" + ) + + response = pep_client.post( + COLLECTIONS_ENDPOINT, + json=_collection(), + headers={"Authorization": "Bearer fake-valid-token"}, + ) + assert response.status_code == 403 + assert "do not have permission" in response.json()["detail"] + + def test_post_collection_pdp_error_returns_502(self, pep_client, mock_pdp_client): + """POST /collections where PDP raises an exception should return 502""" + mock_pdp_client.check_permission.side_effect = Exception("Keycloak unavailable") + + response = pep_client.post( + COLLECTIONS_ENDPOINT, + json=_collection(), + headers={"Authorization": "Bearer fake-valid-token"}, + ) + assert response.status_code == 502 + assert "Authorization service" in response.json()["detail"] + + def test_post_collection_with_tenant_uses_tenant_resource( + self, pep_client, mock_pdp_client + ): + """POST /collections with eic:tenant in body should check the tenant resource ID""" + mock_pdp_client.check_permission.return_value = True + + response = pep_client.post( + COLLECTIONS_ENDPOINT, + json=_collection(tenant="veda"), + headers={"Authorization": "Bearer fake-valid-token"}, + ) + assert response.status_code == 201 + + call_kwargs = mock_pdp_client.check_permission.call_args + resource_id = call_kwargs.kwargs.get("resource_id") + assert resource_id == "stac:collection:veda:*" + + def test_post_collection_without_tenant_uses_public_resource( + self, pep_client, mock_pdp_client + ): + """POST /collections without eic:tenant should check the public resource ID""" + mock_pdp_client.check_permission.return_value = True + + response = pep_client.post( + COLLECTIONS_ENDPOINT, + json=_collection(), + headers={"Authorization": "Bearer fake-valid-token"}, + ) + assert response.status_code == 201 + + call_kwargs = mock_pdp_client.check_permission.call_args + resource_id = call_kwargs.kwargs.get("resource_id") + assert resource_id == "stac:collection:public:*" + + def test_post_collection_nonexistent_tenant_returns_404( + self, pep_client, mock_pdp_client + ): + """POST /collections with a tenant that doesn't exist in Keycloak should return 404""" + mock_pdp_client.check_permission.side_effect = ResourceNotFoundError( + resource_id="stac:collection:nonexistent-tenant:*" + ) + + response = pep_client.post( + COLLECTIONS_ENDPOINT, + json=_collection(tenant="nonexistent-tenant"), + headers={"Authorization": "Bearer fake-valid-token"}, + ) + assert response.status_code == 404 + assert "does not exist" in response.json()["detail"] + assert "nonexistent-tenant" in response.json()["detail"] diff --git a/local/Dockerfile.stac b/local/Dockerfile.stac index c6112d9c..bc050c70 100644 --- a/local/Dockerfile.stac +++ b/local/Dockerfile.stac @@ -5,14 +5,18 @@ FROM ghcr.io/vincentsarago/uvicorn-gunicorn:${PYTHON_VERSION} ENV CURL_CA_BUNDLE /etc/ssl/certs/ca-certificates.crt # fixes rust toolchain error -RUN pip install --upgrade pip +RUN pip install --upgrade pip # Installing boto3, which isn't needed in the lambda container instance # since lambda execution environment includes boto3 by default RUN pip install boto3 +COPY common/auth /tmp/common_auth +RUN pip install /tmp/common_auth +RUN rm -rf /tmp/common_auth + COPY stac_api/runtime /tmp/stac -RUN pip install /tmp/stac +RUN pip install /tmp/stac RUN rm -rf /tmp/stac ENV MODULE_NAME src.app diff --git a/stac_api/infrastructure/config.py b/stac_api/infrastructure/config.py index fda3a83e..4acfd0b6 100644 --- a/stac_api/infrastructure/config.py +++ b/stac_api/infrastructure/config.py @@ -72,6 +72,14 @@ class vedaSTACSettings(BaseSettings): False, description="Whether to enable STAC Auth Proxy. If enable_transactions is True, this must also be True.", ) + keycloak_uma_resource_server_client_secret_name: Optional[str] = Field( + None, + description="Name of AWS Secrets Manager secret containing Keycloak UMA resource server client_id and client_secret", + ) + keycloak_secret_kms_key_arn: Optional[str] = Field( + None, + description="ARN of KMS key used to encrypt the Keycloak secret", + ) @model_validator(mode="before") def check_transaction_fields(cls, values): diff --git a/stac_api/infrastructure/construct.py b/stac_api/infrastructure/construct.py index 6ae63cb7..40d0c529 100644 --- a/stac_api/infrastructure/construct.py +++ b/stac_api/infrastructure/construct.py @@ -1,6 +1,7 @@ """CDK Construct for a Lambda backed API implementing stac-fastapi.""" import os +from typing import Optional from aws_cdk import ( CfnOutput, @@ -9,9 +10,10 @@ aws_apigatewayv2_alpha, aws_apigatewayv2_integrations_alpha, aws_ec2, - aws_lambda, - aws_logs, ) +from aws_cdk import aws_kms as kms +from aws_cdk import aws_lambda, aws_logs +from aws_cdk import aws_secretsmanager as secretsmanager from constructs import Construct from .config import veda_stac_settings @@ -77,6 +79,10 @@ def __init__( lambda_env["VEDA_STAC_OPENID_CONFIGURATION_URL"] = str( veda_stac_settings.openid_configuration_url ) + if veda_stac_settings.keycloak_uma_resource_server_client_secret_name: + lambda_env[ + "VEDA_STAC_KEYCLOAK_UMA_RESOURCE_SERVER_CLIENT_SECRET_NAME" + ] = veda_stac_settings.keycloak_uma_resource_server_client_secret_name lambda_function = aws_lambda.Function( self, @@ -97,6 +103,19 @@ def __init__( # # lambda_function.add_environment(key="TITILER_ENDPOINT", value=raster_api.url) database.pgstac.secret.grant_read(lambda_function) + + keycloak_secret = _get_keycloak_secret( + self, veda_stac_settings.keycloak_uma_resource_server_client_secret_name + ) + if keycloak_secret: + keycloak_secret.grant_read(lambda_function) + if veda_stac_settings.keycloak_secret_kms_key_arn: + kms_key = kms.Key.from_key_arn( + self, + "keycloak-secret-kms-key", + veda_stac_settings.keycloak_secret_kms_key_arn, + ) + kms_key.grant(lambda_function, "kms:Decrypt", "kms:GenerateDataKey") database.pgstac.connections.allow_from( lambda_function, port_range=aws_ec2.Port.tcp(5432) ) @@ -140,3 +159,14 @@ def __init__( export_name=f"{stack_name}-stac-url", key="stacapiurl", ) + + +def _get_keycloak_secret( + ctx: Construct, secret_name: Optional[str] +) -> Optional[secretsmanager.ISecret]: + """Look up Keycloak UMA resource server secret by name""" + if not secret_name: + return None + return secretsmanager.Secret.from_secret_name_v2( + ctx, "keycloak-uma-resource-server-secret", secret_name + ) diff --git a/stac_api/runtime/Dockerfile b/stac_api/runtime/Dockerfile index d5dc200d..383d0d3c 100644 --- a/stac_api/runtime/Dockerfile +++ b/stac_api/runtime/Dockerfile @@ -2,9 +2,10 @@ FROM --platform=linux/amd64 public.ecr.aws/sam/build-python3.12:latest WORKDIR /tmp +COPY common/auth /tmp/common_auth COPY stac_api/runtime /tmp/stac -RUN pip install "mangum" "plpygis>=0.2.1" /tmp/stac -t /asset --no-binary pydantic +RUN pip install "mangum" "plpygis>=0.2.1" /tmp/common_auth /tmp/stac -t /asset --no-binary pydantic RUN rm -rf /tmp/stac # Reduce package size and remove useless files diff --git a/stac_api/runtime/src/app.py b/stac_api/runtime/src/app.py index 5b09d0be..bf38094a 100644 --- a/stac_api/runtime/src/app.py +++ b/stac_api/runtime/src/app.py @@ -142,6 +142,58 @@ async def lifespan(app: FastAPI): # Use standard FastAPI app when authentication is disabled app = api.app + +def _get_keycloak_pdp_client(): + """Build Keycloak PDP client for PEP from UMA resource server credentials stored in AWS Secrets Manager.""" + from src.config import get_secret_dict + from veda_auth.keycloak_client import ( + KeycloakPDPClient, + parse_keycloak_from_openid_url, + ) + + keycloak_url, realm = parse_keycloak_from_openid_url( + api_settings.openid_configuration_url + ) + + secret = get_secret_dict( + api_settings.keycloak_uma_resource_server_client_secret_name + ) + client_id = secret.get("id") + client_secret = secret.get("secret") + if not client_id: + raise RuntimeError("Keycloak UMA secret is missing 'id' (client_id)") + + return KeycloakPDPClient( + keycloak_url=keycloak_url, + realm=realm, + client_id=client_id, + client_secret=client_secret, + ) + + +if ( + api_settings.openid_configuration_url + and api_settings.keycloak_uma_resource_server_client_secret_name +): + logger.info( + "PEP middleware enabled, secret_name=%s", + api_settings.keycloak_uma_resource_server_client_secret_name, + ) + from veda_auth.pep_middleware import PEPMiddleware + from veda_auth.resource_extractors import extract_stac_resource_id + + app.add_middleware( + PEPMiddleware, + pdp_client=_get_keycloak_pdp_client, + resource_extractor=extract_stac_resource_id, + ) +else: + logger.info( + "PEP middleware disabled, openid_url=%s, secret_name=%s", + bool(api_settings.openid_configuration_url), + bool(api_settings.keycloak_uma_resource_server_client_secret_name), + ) + # Note: we want this to be added after stac_auth_proxy so that it runs before stac_auth_proxy's middleware app.add_middleware(TenantExtractionMiddleware) app.add_middleware(TenantLinksMiddleware) diff --git a/stac_api/runtime/src/config.py b/stac_api/runtime/src/config.py index f3641fa5..84615544 100644 --- a/stac_api/runtime/src/config.py +++ b/stac_api/runtime/src/config.py @@ -103,6 +103,10 @@ class _ApiSettings(Settings): "eic:tenant", description="The field name used for tenant filtering", ) + keycloak_uma_resource_server_client_secret_name: Optional[str] = Field( + None, + description="Name of AWS Secrets Manager secret containing Keycloak UMA resource server client_id and client_secret. When set with openid_configuration_url, PEP enforces UMA.", + ) @field_validator("cors_origins") @classmethod diff --git a/stac_api/runtime/tests/conftest.py b/stac_api/runtime/tests/conftest.py index 08ddd251..56ffcad7 100644 --- a/stac_api/runtime/tests/conftest.py +++ b/stac_api/runtime/tests/conftest.py @@ -7,6 +7,7 @@ """ import copy +import importlib import os import uuid from unittest.mock import MagicMock, patch @@ -311,7 +312,7 @@ def mock_validate_dict_side_effect(data, stac_type): @pytest.fixture -async def app(): +async def app(test_environ): """ Fixture to initialize the FastAPI application. @@ -324,7 +325,13 @@ async def app(): Returns: FastAPI: The FastAPI application instance. """ - from src.app import app + import src.app + import src.config + + src.config.ApiSettings.cache_clear() + importlib.reload(src.config) + importlib.reload(src.app) + app = src.app.app await connect_to_db(app, add_write_connection_pool=True) yield app diff --git a/stac_api/runtime/tests/test_pep_integration.py b/stac_api/runtime/tests/test_pep_integration.py new file mode 100644 index 00000000..1ddf9ce6 --- /dev/null +++ b/stac_api/runtime/tests/test_pep_integration.py @@ -0,0 +1,245 @@ +"""Integration tests for PEP middleware""" +import importlib +import os +import uuid +from typing import Optional +from unittest.mock import MagicMock, patch + +import pytest +import src.app +import src.config +from httpx import ASGITransport, AsyncClient +from veda_auth.keycloak_client import PermissionDeniedError, ResourceNotFoundError + +from stac_fastapi.pgstac.db import close_db_connection, connect_to_db + +VALID_COLLECTION_TEMPLATE = { + "type": "Collection", + "title": "Test Collection for PEP", + "links": [], + "description": "Integration test collection for PEP middleware", + "extent": { + "spatial": {"bbox": [[-180, -90, 180, 90]]}, + "temporal": {"interval": [["2020-01-01T00:00:00Z", None]]}, + }, + "license": "MIT", + "stac_version": "1.0.0", +} + +ROOT_PATH = "/api/stac" +COLLECTIONS_ENDPOINT = f"{ROOT_PATH}/collections" + + +MOCK_KEYCLOAK_SECRET = { + "id": "test-uma-client", + "secret": "test-uma-secret", +} + + +@pytest.fixture(autouse=True) +def pep_environ(): + """Set UMA and transaction env vars for PEP middleware tests""" + os.environ[ + "VEDA_STAC_KEYCLOAK_UMA_RESOURCE_SERVER_CLIENT_SECRET_NAME" + ] = "test/keycloak-uma-secret" + os.environ[ + "VEDA_STAC_OPENID_CONFIGURATION_URL" + ] = "https://auth.example.com/realms/test-realm/.well-known/openid-configuration" + os.environ["VEDA_STAC_ENABLE_TRANSACTIONS"] = "True" + os.environ["VEDA_STAC_ENABLE_STAC_AUTH_PROXY"] = "True" + os.environ["VEDA_STAC_ROOT_PATH"] = ROOT_PATH + yield + os.environ.pop("VEDA_STAC_KEYCLOAK_UMA_RESOURCE_SERVER_CLIENT_SECRET_NAME", None) + os.environ.pop("VEDA_STAC_ENABLE_TRANSACTIONS", None) + os.environ.pop("VEDA_STAC_ENABLE_STAC_AUTH_PROXY", None) + os.environ.pop("VEDA_STAC_ROOT_PATH", None) + + +@pytest.fixture +def mock_pdp_client(): + """A mock PDP client with mocked check_permission""" + client = MagicMock() + client.check_permission = MagicMock(return_value=True) + return client + + +@pytest.fixture +async def pep_app(mock_pdp_client): + """Load the STAC app with PEP and PDP client mocks""" + + # clear config cache and reload so we get root_path/transactions from pep_environ + src.config.ApiSettings.cache_clear() + importlib.reload(src.config) + + with patch("src.config.get_secret_dict", return_value=MOCK_KEYCLOAK_SECRET), patch( + "veda_auth.keycloak_client.KeycloakPDPClient", return_value=mock_pdp_client + ): + # reload with mocked dependencies + importlib.reload(src.app) + app = src.app.app + + await connect_to_db(app, add_write_connection_pool=True) + yield app + await close_db_connection(app) + + # restore original module + src.config.ApiSettings.cache_clear() + importlib.reload(src.config) + + +@pytest.fixture +async def pep_client(pep_app): + """PEP client for testing""" + async with AsyncClient( + transport=ASGITransport(app=pep_app), base_url="http://test" + ) as client: + yield client + + +def _collection(tenant: Optional[str] = None) -> dict: + """Build a valid STAC collection""" + body = dict(VALID_COLLECTION_TEMPLATE) + body["id"] = f"pep-test-{uuid.uuid4().hex[:8]}" + if tenant: + body["eic:tenant"] = tenant + return body + + +class TestPEPIntegration: + """Integration tests for PEP middleware for POST /collections endpoint""" + + @pytest.mark.asyncio + async def test_post_collection_no_token_returns_401(self, pep_client): + """POST /collections without Authorization header should return 401""" + response = await pep_client.post(COLLECTIONS_ENDPOINT, json=_collection()) + assert response.status_code == 401 + assert response.headers.get("www-authenticate") == "Bearer" + + @pytest.mark.asyncio + async def test_post_collection_authorized_succeeds( + self, pep_client, mock_pdp_client + ): + """POST /collections with valid Bearer""" + mock_pdp_client.check_permission.return_value = True + collection = _collection() + + response = await pep_client.post( + COLLECTIONS_ENDPOINT, + json=collection, + headers={"Authorization": "Bearer fake-valid-token"}, + ) + mock_pdp_client.check_permission.assert_called_once() + call_kwargs = mock_pdp_client.check_permission.call_args + + assert response.status_code == 201 + assert call_kwargs.kwargs.get("access_token") == "fake-valid-token" + assert call_kwargs.kwargs.get("scope") == "create" + + await pep_client.delete( + f"{COLLECTIONS_ENDPOINT}/{collection['id']}", + headers={"Authorization": "Bearer fake-valid-token"}, + ) + + @pytest.mark.asyncio + async def test_post_collection_denied_returns_403( + self, pep_client, mock_pdp_client + ): + """POST /collections with valid Bearer where PDP denies with 403""" + mock_pdp_client.check_permission.side_effect = PermissionDeniedError( + resource_id="stac:collection:test", scope="create" + ) + + response = await pep_client.post( + COLLECTIONS_ENDPOINT, + json=_collection(), + headers={"Authorization": "Bearer fake-valid-token"}, + ) + assert response.status_code == 403 + assert "do not have permission" in response.json()["detail"] + + @pytest.mark.asyncio + async def test_post_collection_pdp_error_returns_502( + self, pep_client, mock_pdp_client + ): + """POST /collections where PDP raises 502""" + mock_pdp_client.check_permission.side_effect = Exception("Keycloak Unavailable") + + response = await pep_client.post( + COLLECTIONS_ENDPOINT, + json=_collection(), + headers={"Authorization": "Bearer fake-valid-token"}, + ) + assert response.status_code == 502 + assert "Authorization service" in response.json()["detail"] + + @pytest.mark.asyncio + async def test_post_collection_with_tenant_uses_tenant_resource( + self, pep_client, mock_pdp_client + ): + """POST /collections with tenant in body and PDP called with tenant resource ID""" + mock_pdp_client.check_permission.return_value = True + collection = _collection(tenant="veda") + + response = await pep_client.post( + COLLECTIONS_ENDPOINT, + json=collection, + headers={"Authorization": "Bearer fake-valid-token"}, + ) + assert response.status_code == 201 + + call_kwargs = mock_pdp_client.check_permission.call_args + resource_id = call_kwargs.kwargs.get("resource_id") + assert resource_id == "stac:collection:veda:*" + + await pep_client.delete( + f"{COLLECTIONS_ENDPOINT}/{collection['id']}", + headers={"Authorization": "Bearer fake-valid-token"}, + ) + + @pytest.mark.asyncio + async def test_post_collection_without_tenant_uses_public_resource( + self, pep_client, mock_pdp_client + ): + """POST /collections without tenant in body so PDP is called with public resource ID""" + mock_pdp_client.check_permission.return_value = True + collection = _collection() + + response = await pep_client.post( + COLLECTIONS_ENDPOINT, + json=collection, + headers={"Authorization": "Bearer fake-valid-token"}, + ) + assert response.status_code == 201 + + call_kwargs = mock_pdp_client.check_permission.call_args + resource_id = call_kwargs.kwargs.get("resource_id") + assert resource_id == "stac:collection:public:*" + + await pep_client.delete( + f"{COLLECTIONS_ENDPOINT}/{collection['id']}", + headers={"Authorization": "Bearer fake-valid-token"}, + ) + + @pytest.mark.asyncio + async def test_post_collection_nonexistent_tenant_returns_404( + self, pep_client, mock_pdp_client + ): + """POST /collections with a tenant that doesn't exist in Keycloak should return 404""" + mock_pdp_client.check_permission.side_effect = ResourceNotFoundError( + resource_id="stac:collection:nonexistent-tenant:*" + ) + + response = await pep_client.post( + COLLECTIONS_ENDPOINT, + json=_collection(tenant="nonexistent-tenant"), + headers={"Authorization": "Bearer fake-valid-token"}, + ) + assert response.status_code == 404 + assert "does not exist" in response.json()["detail"] + assert "nonexistent-tenant" in response.json()["detail"] + + @pytest.mark.asyncio + async def test_get_collections_not_affected_by_pep(self, pep_client): + """GET /collections should not be intercepted by PEP (because its not a protected route)""" + response = await pep_client.get(COLLECTIONS_ENDPOINT) + assert response.status_code == 200