diff --git a/docs/pages/deploying-in-production.mdx b/docs/pages/deploying-in-production.mdx index 38374abbb..779e33871 100644 --- a/docs/pages/deploying-in-production.mdx +++ b/docs/pages/deploying-in-production.mdx @@ -98,6 +98,28 @@ so any thread can use any configured credential. Per-user and per-channel scoping is on the roadmap; until then, scope tool and harness access accordingly. See [Security](/security) for the full threat model. +### Optional: route LLM traffic through a gateway + +To send harness LLM calls through a gateway (LiteLLM, Portkey, self-hosted, etc.) +instead of directly to provider APIs, set `CENTAUR_LLM_GATEWAY_HOST` on the API +container and point the harness CLI at the gateway via `*_BASE_URL` on the +sandbox. Iron-proxy injects `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` for the +gateway host instead of the provider host — store the gateway's API key under +the existing `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` name. + +```yaml +api: + extraEnv: + CENTAUR_LLM_GATEWAY_HOST: "litellm.internal.example.com" # hostname only + +sandbox: + extraEnv: + ANTHROPIC_BASE_URL: "https://litellm.internal.example.com" # full URL + # or OPENAI_BASE_URL for the Codex harness +``` + +When unset, behavior is unchanged: keys route to the upstream provider hosts. + ### Codex Auth Modes :::warning[Dedicate the account to Centaur] diff --git a/services/api/api/tool_manager.py b/services/api/api/tool_manager.py index 8b03ae886..4b27540ad 100644 --- a/services/api/api/tool_manager.py +++ b/services/api/api/tool_manager.py @@ -17,7 +17,7 @@ import types import uuid from collections.abc import Callable, Mapping -from dataclasses import asdict, dataclass, is_dataclass +from dataclasses import asdict, dataclass, is_dataclass, replace from enum import Enum from pathlib import Path from typing import Any, ClassVar @@ -1875,6 +1875,31 @@ def discover(self) -> list[LoadedTool]: ), ] + @staticmethod + def _llm_gateway_host() -> str: + return os.getenv("CENTAUR_LLM_GATEWAY_HOST", "").strip() + + @classmethod + def _with_llm_gateway_hosts( + cls, secrets: tuple[SecretDef, ...] + ) -> tuple[SecretDef, ...]: + gateway = cls._llm_gateway_host() + if not gateway: + return secrets + out: list[SecretDef] = [] + for secret in secrets: + if isinstance(secret, HttpSecret) and secret.name in { + "ANTHROPIC_API_KEY", + "OPENAI_API_KEY", + }: + out.append(replace(secret, hosts=(gateway,))) + else: + out.append(secret) + return tuple(out) + + def _infra_secrets(self) -> list[HttpSecret]: + return list(self._INFRA_SECRETS) + # Harness-specific credentials, keyed by ``(engine, auth_mode)``. The # per-sandbox iron-proxy gets exactly the tuple that matches the # sandbox's harness and auth-mode env var; the shared API-side proxy @@ -1952,7 +1977,7 @@ def _harness_secrets_for( if mode_key is None: return () mode = (auth_modes.get(mode_key) or "api_key").strip() or "api_key" - return cls._HARNESS_SECRETS.get((engine, mode), ()) + return cls._with_llm_gateway_hosts(cls._HARNESS_SECRETS.get((engine, mode), ())) def secrets_for_sandbox( self, engine: str, auth_modes: Mapping[str, str] @@ -1964,7 +1989,7 @@ def secrets_for_sandbox( engines (e.g. ``amp``, ``pi-mono``) get no harness extras — they authenticate through entries that already live in ``_INFRA_SECRETS``. """ - out: list[SecretDef] = list(self._INFRA_SECRETS) + out: list[SecretDef] = list(self._infra_secrets()) for lt in self.tools.values(): out.extend(lt.all_secrets) out.extend(self._harness_secrets_for(engine, auth_modes)) @@ -1982,8 +2007,11 @@ def collect_secrets(self) -> list[SecretDef]: out: list[SecretDef] = list(self._INFRA_SECRETS) for lt in self.tools.values(): out.extend(lt.all_secrets) - for harness_set in self._HARNESS_SECRETS.values(): - out.extend(harness_set) + for engine, mode in self._HARNESS_SECRETS: + mode_key = self._HARNESS_AUTH_MODE_ENV.get(engine) + if mode_key is None: + continue + out.extend(self._harness_secrets_for(engine, {mode_key: mode})) return out def reload(self) -> dict[str, Any]: diff --git a/services/api/tests/test_tool_manager.py b/services/api/tests/test_tool_manager.py index f055960d5..e56fa7dc2 100644 --- a/services/api/tests/test_tool_manager.py +++ b/services/api/tests/test_tool_manager.py @@ -8,10 +8,12 @@ import httpx import pytest -from fastapi import FastAPI +from fastapi import FastAPI, Request sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +from api.api_keys import APIKeyInfo # noqa: E402 +from api.deps import verify_api_key # noqa: E402 from api.tool_manager import ( # noqa: E402 _LIFECYCLE_METHODS, _describe_method_docstring, @@ -611,6 +613,17 @@ async def test_tool_rest_router_lists_describes_and_invokes_tools( manager = ToolManager(tools_dir) manager.discover() app = FastAPI() + + async def allow_tools(request: Request) -> None: + request.state.api_key_info = APIKeyInfo( + id="test", + name="test", + key_prefix="test", + scopes=["tools:*"], + created_by="test", + ) + + app.dependency_overrides[verify_api_key] = allow_tools app.include_router(manager.create_rest_router()) transport = httpx.ASGITransport(app=app) @@ -662,6 +675,48 @@ async def test_tool_rest_router_lists_describes_and_invokes_tools( ) +def _manager_without_tools() -> ToolManager: + tm = ToolManager.__new__(ToolManager) + tm.tools = {} + return tm + + +def _llm_hosts(manager: ToolManager) -> dict[str, tuple[str, ...]]: + secrets = [ + *manager.secrets_for_sandbox( + "claude-code", {"CLAUDE_CODE_AUTH_MODE": "api_key"} + ), + *manager.secrets_for_sandbox("codex", {"CODEX_AUTH_MODE": "api_key"}), + ] + return { + s.name: s.hosts + for s in secrets + if s.name in {"ANTHROPIC_API_KEY", "OPENAI_API_KEY"} + } + + +def test_infra_secrets_default_to_provider_hosts( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.delenv("CENTAUR_LLM_GATEWAY_HOST", raising=False) + hosts = _llm_hosts(_manager_without_tools()) + assert hosts == { + "ANTHROPIC_API_KEY": ("api.anthropic.com",), + "OPENAI_API_KEY": ("api.openai.com",), + } + + +def test_infra_secrets_route_llm_keys_through_gateway_host( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setenv("CENTAUR_LLM_GATEWAY_HOST", "litellm.example.internal") + hosts = _llm_hosts(_manager_without_tools()) + assert hosts == { + "ANTHROPIC_API_KEY": ("litellm.example.internal",), + "OPENAI_API_KEY": ("litellm.example.internal",), + } + + class TestHarnessSecretSelection: """ToolManager picks the right harness credentials for a sandbox based on its engine and auth-mode env vars.""" @@ -670,8 +725,7 @@ def _names(self, secrets: list) -> set[str]: return {getattr(s, "name", None) for s in secrets} def test_claude_code_api_key_includes_anthropic_excludes_openai(self) -> None: - tm = ToolManager.__new__(ToolManager) - tm.tools = {} + tm = _manager_without_tools() names = self._names( tm.secrets_for_sandbox("claude-code", {"CLAUDE_CODE_AUTH_MODE": "api_key"}) ) @@ -680,8 +734,7 @@ def test_claude_code_api_key_includes_anthropic_excludes_openai(self) -> None: assert "anthropic-claude" not in names def test_claude_code_access_token_swaps_to_brokered(self) -> None: - tm = ToolManager.__new__(ToolManager) - tm.tools = {} + tm = _manager_without_tools() names = self._names( tm.secrets_for_sandbox( "claude-code", {"CLAUDE_CODE_AUTH_MODE": "access_token"} @@ -692,8 +745,7 @@ def test_claude_code_access_token_swaps_to_brokered(self) -> None: assert "OPENAI_API_KEY" not in names def test_codex_api_key_includes_openai_excludes_anthropic(self) -> None: - tm = ToolManager.__new__(ToolManager) - tm.tools = {} + tm = _manager_without_tools() names = self._names( tm.secrets_for_sandbox("codex", {"CODEX_AUTH_MODE": "api_key"}) ) @@ -702,8 +754,7 @@ def test_codex_api_key_includes_openai_excludes_anthropic(self) -> None: assert "openai-codex" not in names def test_codex_access_token_swaps_to_brokered_with_account_id(self) -> None: - tm = ToolManager.__new__(ToolManager) - tm.tools = {} + tm = _manager_without_tools() names = self._names( tm.secrets_for_sandbox("codex", {"CODEX_AUTH_MODE": "access_token"}) ) @@ -712,15 +763,13 @@ def test_codex_access_token_swaps_to_brokered_with_account_id(self) -> None: assert "OPENAI_API_KEY" not in names def test_unset_auth_mode_defaults_to_api_key(self) -> None: - tm = ToolManager.__new__(ToolManager) - tm.tools = {} + tm = _manager_without_tools() names = self._names(tm.secrets_for_sandbox("claude-code", {})) assert "ANTHROPIC_API_KEY" in names assert "anthropic-claude" not in names def test_unknown_engine_gets_no_harness_extras(self) -> None: - tm = ToolManager.__new__(ToolManager) - tm.tools = {} + tm = _manager_without_tools() names = self._names( tm.secrets_for_sandbox("amp", {"CLAUDE_CODE_AUTH_MODE": "access_token"}) ) @@ -733,8 +782,7 @@ def test_collect_secrets_returns_union_of_all_harness_variants(self) -> None: """The shared API-side proxy and token broker need every harness credential so they can manage the full set regardless of which mode any individual sandbox is using right now.""" - tm = ToolManager.__new__(ToolManager) - tm.tools = {} + tm = _manager_without_tools() names = self._names(tm.collect_secrets()) assert "ANTHROPIC_API_KEY" in names assert "OPENAI_API_KEY" in names