diff --git a/contrib/crewai/tests/test_structured_tool.py b/contrib/crewai/tests/test_structured_tool.py index 18d7654b8..e0654fd30 100644 --- a/contrib/crewai/tests/test_structured_tool.py +++ b/contrib/crewai/tests/test_structured_tool.py @@ -51,7 +51,7 @@ def test_structured_tool_auto_name_description(): def test_structured_tool_validation_errors(): # Test missing docstring with pytest.raises( - ValueError, match="Function must have a docstring if description not provided." + ValueError, match=r"Function must have a docstring if description not provided\." ): StructuredTool.from_function(func=unnamed_function, args_schema=CalculatorInput) diff --git a/examples/mcp_servers/custom_server_with_prebuilt_tools/pyproject.toml b/examples/mcp_servers/custom_server_with_prebuilt_tools/pyproject.toml index fff254952..9de870109 100644 --- a/examples/mcp_servers/custom_server_with_prebuilt_tools/pyproject.toml +++ b/examples/mcp_servers/custom_server_with_prebuilt_tools/pyproject.toml @@ -41,7 +41,9 @@ disallow_untyped_defs = false # # Uncomment the following if you are developing inside of the arcade-mcp repo & want to use editable mode # # Otherwise, you will install the following packages from PyPI -# [tool.uv.sources] -# arcade-mcp = { path = "../../../", editable = true } -# arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } -# arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } +[tool.uv.sources] +arcade-mcp = { path = "../../../", editable = true } +arcade-core = { path = "../../../libs/arcade-core/", editable = true } +arcade-tdk = { path = "../../../libs/arcade-tdk/", editable = true } +arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } +arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } diff --git a/examples/mcp_servers/datacache/README.md b/examples/mcp_servers/datacache/README.md new file mode 100644 index 000000000..10e5148db --- /dev/null +++ b/examples/mcp_servers/datacache/README.md @@ -0,0 +1,50 @@ +## Datacache (DuckDB) Example Server + +This example demonstrates the `@app.tool(datacache={keys:[...], ttl:...})` feature and `context.datacache.*` helpers. + +### What it does +- Uses a DuckDB file as a per-tool, per-identity cache +- Downloads the DuckDB file from S3 before tool execution +- Uploads it back to S3 after tool execution +- Uses a Redis lock to ensure only one tool execution per cache key runs at a time + +### Required environment variables +- `ARCADE_DATACACHE_REDIS_URL` (locking; still required for local storage) + +### Local backend (default for this example) +This example defaults `ARCADE_DATACACHE_STORAGE_BACKEND=local` in code, so you don’t need to set it explicitly unless you want S3. + +Set: +- `ARCADE_DATACACHE_REDIS_URL` (e.g. `redis://localhost:6379/0`) + +Optional: +- `ARCADE_DATACACHE_LOCAL_DIR` (default: `/tmp/arcade_datacache`) + +### S3 backend (how to switch) +Set: +- `ARCADE_DATACACHE_STORAGE_BACKEND=s3` +- `ARCADE_DATACACHE_REDIS_URL` +- `ARCADE_DATACACHE_S3_BUCKET` + +Optional: +- `ARCADE_DATACACHE_S3_PREFIX` (default: `arcade/datacache`) +- `ARCADE_DATACACHE_AWS_REGION` +- `ARCADE_DATACACHE_S3_ENDPOINT_URL` (e.g. MinIO) +- `ARCADE_DATACACHE_AWS_ACCESS_KEY_ID`, `ARCADE_DATACACHE_AWS_SECRET_ACCESS_KEY`, `ARCADE_DATACACHE_AWS_SESSION_TOKEN` + +### Running (http by default) +From this directory: + +```bash +uv sync +uv run python -m datacache.server +``` + +### Calling tools with `_meta` +The datacache keys `organization` and `project` are read from the request `_meta` and propagated into `ToolContext.metadata`. + +Your MCP client must include: +- `_meta.organization` +- `_meta.project` + +`user_id` comes from the server’s normal `ToolContext.user_id` behavior. diff --git a/examples/mcp_servers/datacache/pyproject.toml b/examples/mcp_servers/datacache/pyproject.toml new file mode 100644 index 000000000..c89a0d2bf --- /dev/null +++ b/examples/mcp_servers/datacache/pyproject.toml @@ -0,0 +1,29 @@ +[project] +name = "datacache" +version = "0.1.0" +description = "Example MCP server demonstrating Arcade datacache (DuckDB + S3 + Redis locking)" +requires-python = ">=3.10" +dependencies = [ + "arcade-mcp-server>=1.9.1,<2.0.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/datacache"] + +[project.entry-points.arcade_toolkits] +toolkit_name = "datacache" + +[tool.ruff] +line-length = 100 +target-version = "py312" + +[tool.uv.sources] +arcade-mcp = { path = "../../../", editable = true } +arcade-core = { path = "../../../libs/arcade-core/", editable = true } +arcade-tdk = { path = "../../../libs/arcade-tdk/", editable = true } +arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } +arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } diff --git a/examples/mcp_servers/datacache/src/datacache/__init__.py b/examples/mcp_servers/datacache/src/datacache/__init__.py new file mode 100644 index 000000000..2a3d65e6d --- /dev/null +++ b/examples/mcp_servers/datacache/src/datacache/__init__.py @@ -0,0 +1 @@ +"""Datacache DuckDB example server package.""" diff --git a/examples/mcp_servers/datacache/src/datacache/server.py b/examples/mcp_servers/datacache/src/datacache/server.py new file mode 100644 index 000000000..e5634a63b --- /dev/null +++ b/examples/mcp_servers/datacache/src/datacache/server.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +"""Datacache MCP server example.""" + +import os +import sys +from typing import Annotated + +from arcade_mcp_server import Context, MCPApp + +# --------------------------------------------------------------------------- +# How to set organization/project for datacache keying +# +# Datacache keys `organization` and `project` come from the MCP `tools/call` request +# params `_meta`, and are propagated into `ToolContext.metadata` automatically. +# +# Example JSON-RPC call: +# { +# "jsonrpc": "2.0", +# "id": 1, +# "method": "tools/call", +# "params": { +# "name": "datacache_upsert_profile", +# "arguments": { "profile_id": "1", "name": "Alice" }, +# "_meta": { "organization": "acme", "project": "rocket" } +# } +# } +# --------------------------------------------------------------------------- + +# Default the example to local storage so it's easy to run locally. +# (The MCP server requires ARCADE_DATACACHE_STORAGE_BACKEND when datacache is enabled.) +os.environ.setdefault("ARCADE_DATACACHE_STORAGE_BACKEND", "local") +os.environ.setdefault("ARCADE_DATACACHE_REDIS_URL", "redis://localhost:6379/0") + +app = MCPApp(name="datacache_duckdb", version="1.0.0", log_level="DEBUG") + + +@app.tool(datacache={"keys": ["organization", "project", "user_id"], "ttl": 3600}) +async def upsert_profile( + context: Context, + profile_id: Annotated[str, "Unique profile id"], + name: Annotated[str, "Display name"], +) -> dict: + """Upsert a profile row into a DuckDB-backed datacache.""" + profile = {"id": profile_id, "name": name, "kind": "example_profile"} + + response = await context.datacache.set( + "profiles", + profile, + id_col="id", + ) + return response.model_dump(mode="json") + + +@app.tool(datacache={"keys": ["organization", "project", "user_id"], "ttl": 3600}) +async def search_profiles( + context: Context, + term: Annotated[str, "Search term (case-insensitive substring match)"], +) -> list[dict]: + """Search profiles by name using a LIKE query under the hood.""" + return await context.datacache.search("profiles", "name", term) + + +if __name__ == "__main__": + transport = sys.argv[1] if len(sys.argv) > 1 else "http" + app.run(transport=transport, host="127.0.0.1", port=8000) diff --git a/examples/mcp_servers/echo/pyproject.toml b/examples/mcp_servers/echo/pyproject.toml index dc1bc362b..25e001804 100644 --- a/examples/mcp_servers/echo/pyproject.toml +++ b/examples/mcp_servers/echo/pyproject.toml @@ -21,7 +21,9 @@ packages = ["src/echo"] # # Uncomment the following if you are developing inside of the arcade-mcp repo & want to use editable mode # # Otherwise, you will install the following packages from PyPI -# [tool.uv.sources] -# arcade-mcp = { path = "../../../", editable = true } -# arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } -# arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } +[tool.uv.sources] +arcade-mcp = { path = "../../../", editable = true } +arcade-core = { path = "../../../libs/arcade-core/", editable = true } +arcade-tdk = { path = "../../../libs/arcade-tdk/", editable = true } +arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } +arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } diff --git a/examples/mcp_servers/local_filesystem/pyproject.toml b/examples/mcp_servers/local_filesystem/pyproject.toml index e30cf00a6..34b035c99 100644 --- a/examples/mcp_servers/local_filesystem/pyproject.toml +++ b/examples/mcp_servers/local_filesystem/pyproject.toml @@ -38,7 +38,9 @@ toolkit_name = "simple" # # Uncomment the following if you are developing inside of the arcade-mcp repo & want to use editable mode # # Otherwise, you will install the following packages from PyPI -# [tool.uv.sources] -# arcade-mcp = { path = "../../../", editable = true } -# arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } -# arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } +[tool.uv.sources] +arcade-mcp = { path = "../../../", editable = true } +arcade-core = { path = "../../../libs/arcade-core/", editable = true } +arcade-tdk = { path = "../../../libs/arcade-tdk/", editable = true } +arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } +arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } diff --git a/examples/mcp_servers/logging/pyproject.toml b/examples/mcp_servers/logging/pyproject.toml index aec3fb719..60a2f876e 100644 --- a/examples/mcp_servers/logging/pyproject.toml +++ b/examples/mcp_servers/logging/pyproject.toml @@ -21,7 +21,9 @@ packages = ["src/logging"] # # Uncomment the following if you are developing inside of the arcade-mcp repo & want to use editable mode # # Otherwise, you will install the following packages from PyPI -# [tool.uv.sources] -# arcade-mcp = { path = "../../../", editable = true } -# arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } -# arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } +[tool.uv.sources] +arcade-mcp = { path = "../../../", editable = true } +arcade-core = { path = "../../../libs/arcade-core/", editable = true } +arcade-tdk = { path = "../../../libs/arcade-tdk/", editable = true } +arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } +arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } diff --git a/examples/mcp_servers/progress_reporting/pyproject.toml b/examples/mcp_servers/progress_reporting/pyproject.toml index a3b515429..c5b950db6 100644 --- a/examples/mcp_servers/progress_reporting/pyproject.toml +++ b/examples/mcp_servers/progress_reporting/pyproject.toml @@ -39,7 +39,9 @@ disallow_untyped_defs = false # # Uncomment the following if you are developing inside of the arcade-mcp repo & want to use editable mode # # Otherwise, you will install the following packages from PyPI -# [tool.uv.sources] -# arcade-mcp = { path = "../../../", editable = true } -# arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } -# arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } +[tool.uv.sources] +arcade-mcp = { path = "../../../", editable = true } +arcade-core = { path = "../../../libs/arcade-core/", editable = true } +arcade-tdk = { path = "../../../libs/arcade-tdk/", editable = true } +arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } +arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } diff --git a/examples/mcp_servers/sampling/pyproject.toml b/examples/mcp_servers/sampling/pyproject.toml index 10326aaca..edb54d39e 100644 --- a/examples/mcp_servers/sampling/pyproject.toml +++ b/examples/mcp_servers/sampling/pyproject.toml @@ -39,7 +39,9 @@ disallow_untyped_defs = false # # Uncomment the following if you are developing inside of the arcade-mcp repo & want to use editable mode # # Otherwise, you will install the following packages from PyPI -# [tool.uv.sources] -# arcade-mcp = { path = "../../../", editable = true } -# arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } -# arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } +[tool.uv.sources] +arcade-mcp = { path = "../../../", editable = true } +arcade-core = { path = "../../../libs/arcade-core/", editable = true } +arcade-tdk = { path = "../../../libs/arcade-tdk/", editable = true } +arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } +arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } diff --git a/examples/mcp_servers/server_with_evaluations/pyproject.toml b/examples/mcp_servers/server_with_evaluations/pyproject.toml index e3632f402..06e62d621 100644 --- a/examples/mcp_servers/server_with_evaluations/pyproject.toml +++ b/examples/mcp_servers/server_with_evaluations/pyproject.toml @@ -40,7 +40,9 @@ disallow_untyped_defs = false # # Uncomment the following if you are developing inside of the arcade-mcp repo & want to use editable mode # # Otherwise, you will install the following packages from PyPI -# [tool.uv.sources] -# arcade-mcp = { path = "../../../", editable = true } -# arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } -# arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } +[tool.uv.sources] +arcade-mcp = { path = "../../../", editable = true } +arcade-core = { path = "../../../libs/arcade-core/", editable = true } +arcade-tdk = { path = "../../../libs/arcade-tdk/", editable = true } +arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } +arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } diff --git a/examples/mcp_servers/simple/pyproject.toml b/examples/mcp_servers/simple/pyproject.toml index 85227a7f8..158dbdb43 100644 --- a/examples/mcp_servers/simple/pyproject.toml +++ b/examples/mcp_servers/simple/pyproject.toml @@ -39,7 +39,9 @@ disallow_untyped_defs = false # # Uncomment the following if you are developing inside of the arcade-mcp repo & want to use editable mode # # Otherwise, you will install the following packages from PyPI -# [tool.uv.sources] -# arcade-mcp = { path = "../../../", editable = true } -# arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } -# arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } +[tool.uv.sources] +arcade-mcp = { path = "../../../", editable = true } +arcade-core = { path = "../../../libs/arcade-core/", editable = true } +arcade-tdk = { path = "../../../libs/arcade-tdk/", editable = true } +arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } +arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } diff --git a/examples/mcp_servers/tool_chaining/pyproject.toml b/examples/mcp_servers/tool_chaining/pyproject.toml index 52905f18c..cccac7f96 100644 --- a/examples/mcp_servers/tool_chaining/pyproject.toml +++ b/examples/mcp_servers/tool_chaining/pyproject.toml @@ -39,7 +39,9 @@ disallow_untyped_defs = false # # Uncomment the following if you are developing inside of the arcade-mcp repo & want to use editable mode # # Otherwise, you will install the following packages from PyPI -# [tool.uv.sources] -# arcade-mcp = { path = "../../../", editable = true } -# arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } -# arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } +[tool.uv.sources] +arcade-mcp = { path = "../../../", editable = true } +arcade-core = { path = "../../../libs/arcade-core/", editable = true } +arcade-tdk = { path = "../../../libs/arcade-tdk/", editable = true } +arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } +arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } diff --git a/examples/mcp_servers/user_elicitation/pyproject.toml b/examples/mcp_servers/user_elicitation/pyproject.toml index e0d72f80c..7a37f6e35 100644 --- a/examples/mcp_servers/user_elicitation/pyproject.toml +++ b/examples/mcp_servers/user_elicitation/pyproject.toml @@ -39,7 +39,9 @@ disallow_untyped_defs = false # # Uncomment the following if you are developing inside of the arcade-mcp repo & want to use editable mode # # Otherwise, you will install the following packages from PyPI -# [tool.uv.sources] -# arcade-mcp = { path = "../../../", editable = true } -# arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } -# arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } +[tool.uv.sources] +arcade-mcp = { path = "../../../", editable = true } +arcade-core = { path = "../../../libs/arcade-core/", editable = true } +arcade-tdk = { path = "../../../libs/arcade-tdk/", editable = true } +arcade-serve = { path = "../../../libs/arcade-serve/", editable = true } +arcade-mcp-server = { path = "../../../libs/arcade-mcp-server/", editable = true } diff --git a/libs/arcade-core/arcade_core/catalog.py b/libs/arcade-core/arcade_core/catalog.py index 82f9893f3..366bcc7b5 100644 --- a/libs/arcade-core/arcade_core/catalog.py +++ b/libs/arcade-core/arcade_core/catalog.py @@ -112,6 +112,7 @@ class ToolMeta(BaseModel): toolkit: str | None = None package: str | None = None path: str | None = None + datacache: dict[str, Any] | None = None date_added: datetime = Field(default_factory=datetime.now) date_updated: datetime = Field(default_factory=datetime.now) @@ -261,6 +262,7 @@ def add_tool( toolkit=toolkit_name, package=toolkit.package_name if toolkit else None, path=module.__file__ if module else None, + datacache=getattr(tool_func, "__tool_datacache__", None), ), input_model=input_model, output_model=output_model, diff --git a/libs/arcade-mcp-server/arcade_mcp_server/context.py b/libs/arcade-mcp-server/arcade_mcp_server/context.py index 0bb6fd87d..423dd63d3 100644 --- a/libs/arcade-mcp-server/arcade_mcp_server/context.py +++ b/libs/arcade-mcp-server/arcade_mcp_server/context.py @@ -37,6 +37,8 @@ ToolContext, ) +from arcade_mcp_server.datacache.client import DatacacheClient +from arcade_mcp_server.datacache.types import DatacacheSetResult from arcade_mcp_server.resource_server.base import ResourceOwner from arcade_mcp_server.types import ( AudioContent, @@ -147,6 +149,7 @@ def __init__( self._sampling = Sampling(self) self._ui = UI(self) self._notifications = Notifications(self) + self._datacache = Datacache(self) @property def server(self) -> Any: @@ -292,6 +295,15 @@ def notifications(self) -> Notifications: """ return self._notifications + @property + def datacache(self) -> Datacache: + """DuckDB-backed datacache for the currently executing tool. + + This is only available when the tool was declared with: + `@app.tool(datacache={keys:[...]})`. + """ + return self._datacache + @property def request_id(self) -> str | None: """Get the current request ID. @@ -689,6 +701,46 @@ def prompts(self) -> _NotificationsPrompts: return self._prompts +class Datacache(_ContextComponent): + """Namespaced datacache API exposed to tool code as `context.datacache.*`.""" + + def _client(self) -> DatacacheClient: + client = getattr(self._ctx, "_datacache_client", None) + if client is None: + raise RuntimeError( + "Datacache is not enabled for this tool execution. " + "Enable it by setting `datacache={keys:[...]}` on the tool decorator." + ) + return client + + async def discover_databases(self) -> list[dict[str, Any]]: + return await self._client().discover_databases() + + async def discover_tables(self, database: str) -> list[str]: + return await self._client().discover_tables(database) + + async def discover_schema(self, database: str, table: str) -> list[dict[str, Any]]: + return await self._client().discover_schema(database, table) + + async def query(self, database: str, table: str, sql: str) -> list[dict[str, Any]]: + return await self._client().query(database, table, sql) + + async def set( + self, + table_name: str, + record: dict[str, Any], + id_col: str = "id", + ttl: int | None = None, + ) -> DatacacheSetResult: + return await self._client().set(table_name, record, id_col=id_col, ttl=ttl) + + async def get(self, table_name: str, record_id: str) -> dict[str, Any] | None: + return await self._client().get(table_name, record_id) + + async def search(self, table_name: str, field: str, value: str) -> list[dict[str, Any]]: + return await self._client().search(table_name, field, value) + + def get_current_model_context() -> Context | None: """Get the current model context if available.""" return _current_model_context.get() diff --git a/libs/arcade-mcp-server/arcade_mcp_server/datacache/__init__.py b/libs/arcade-mcp-server/arcade_mcp_server/datacache/__init__.py new file mode 100644 index 000000000..c77ca47b7 --- /dev/null +++ b/libs/arcade-mcp-server/arcade_mcp_server/datacache/__init__.py @@ -0,0 +1,26 @@ +""" +Datacache support for Arcade MCP Server. + +This package provides: +- a per-tool, per-key DuckDB-backed cache +- S3 persistence (download before tool execution, upload after) +- Redis locking to ensure single-flight execution per cache key +""" + +from arcade_mcp_server.datacache.client import DatacacheClient +from arcade_mcp_server.datacache.config import ( + DatacacheConfigError, + DatacacheKeys, + build_datacache_identity, + is_datacache_enabled, +) +from arcade_mcp_server.datacache.types import DatacacheSetResult + +__all__ = [ + "DatacacheClient", + "DatacacheConfigError", + "DatacacheKeys", + "DatacacheSetResult", + "build_datacache_identity", + "is_datacache_enabled", +] diff --git a/libs/arcade-mcp-server/arcade_mcp_server/datacache/client.py b/libs/arcade-mcp-server/arcade_mcp_server/datacache/client.py new file mode 100644 index 000000000..7fbd2750d --- /dev/null +++ b/libs/arcade-mcp-server/arcade_mcp_server/datacache/client.py @@ -0,0 +1,314 @@ +from __future__ import annotations + +import asyncio +import json +import re +import time +from collections.abc import Sequence +from concurrent.futures import ThreadPoolExecutor +from dataclasses import dataclass +from typing import Any, Callable + +from arcade_mcp_server.datacache.types import DatacacheSetResult + +try: + import duckdb # type: ignore[import-not-found] +except Exception: # pragma: no cover + duckdb = None # type: ignore[assignment] + + +class DatacacheClientError(RuntimeError): + pass + + +_IDENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") + + +def _ident(name: str) -> str: + """Validate a SQL identifier (table/column name) to avoid injection.""" + if not _IDENT_RE.match(name): + raise DatacacheClientError(f"Invalid identifier: {name!r}") + return name + + +def _now_epoch() -> int: + return int(time.time()) + + +@dataclass +class DatacacheClient: + """Async-friendly DuckDB client executed on a dedicated single thread.""" + + path: str + default_ttl: int | None = None + + _executor: ThreadPoolExecutor | None = None + _conn: Any | None = None + + @classmethod + async def open(cls, *, path: str, default_ttl: int | None = None) -> DatacacheClient: + if duckdb is None: + raise DatacacheClientError("duckdb dependency not installed; required for datacache") + self = cls(path=path, default_ttl=default_ttl) + self._executor = ThreadPoolExecutor(max_workers=1) + + def _connect() -> Any: + return duckdb.connect(path) + + loop = asyncio.get_running_loop() + self._conn = await loop.run_in_executor(self._executor, _connect) + return self + + async def aclose(self) -> None: + if self._executor is None: + return + loop = asyncio.get_running_loop() + + def _close() -> None: + try: + if self._conn is not None: + self._conn.close() + finally: + self._conn = None + + try: + await loop.run_in_executor(self._executor, _close) + finally: + self._executor.shutdown(wait=False, cancel_futures=False) + self._executor = None + + async def _run(self, fn: Callable[[Any], Any]) -> Any: + if self._executor is None or self._conn is None: + raise DatacacheClientError("DatacacheClient is not open") + loop = asyncio.get_running_loop() + + def _call() -> Any: + return fn(self._conn) + + return await loop.run_in_executor(self._executor, _call) + + # ---------------------------- + # Discovery / query primitives + # ---------------------------- + + async def discover_databases(self) -> list[dict[str, Any]]: + def _q(conn: Any) -> list[dict[str, Any]]: + rows = conn.execute("PRAGMA database_list").fetchall() + cols = [d[0] for d in conn.description] + return [dict(zip(cols, r)) for r in rows] + + return await self._run(_q) + + async def discover_tables(self, database: str) -> list[str]: + def _q(conn: Any) -> list[str]: + # DuckDB uses \"main\" by default; information_schema is global. + rows = conn.execute( + """ + SELECT table_name + FROM information_schema.tables + WHERE table_catalog = ? + AND table_schema = 'main' + ORDER BY table_name + """, + [database], + ).fetchall() + return [r[0] for r in rows] + + return await self._run(_q) + + async def discover_schema(self, database: str, table: str) -> list[dict[str, Any]]: + def _q(conn: Any) -> list[dict[str, Any]]: + rows = conn.execute( + """ + SELECT column_name, data_type, is_nullable + FROM information_schema.columns + WHERE table_catalog = ? + AND table_schema = 'main' + AND table_name = ? + ORDER BY ordinal_position + """, + [database, table], + ).fetchall() + return [{"column_name": r[0], "data_type": r[1], "is_nullable": r[2]} for r in rows] + + return await self._run(_q) + + async def query(self, database: str, table: str, sql: str) -> list[dict[str, Any]]: + def _q(conn: Any) -> list[dict[str, Any]]: + if not sql: + safe_table = _ident(table) + sql_to_run = f'SELECT * FROM "{safe_table}"' # noqa: S608 + cur = conn.execute(sql_to_run) + else: + # Caller-provided SQL: treat as trusted by the tool author. + cur = conn.execute(sql) + rows = cur.fetchall() + cols = [d[0] for d in cur.description] + return [dict(zip(cols, r)) for r in rows] + + return await self._run(_q) + + # ---------------------------- + # Higher-level cache helpers + # ---------------------------- + + async def set( + self, + table_name: str, + obj: dict[str, Any], + *, + id_col: str = "id", + ttl: int | None = None, + ) -> DatacacheSetResult: + safe_table = _ident(table_name) + effective_ttl = self.default_ttl if ttl is None else ttl + if id_col not in obj: + raise DatacacheClientError(f"Object missing id_col '{id_col}'") + + # Approximate size of what we received (pre-flattening). + try: + bytes_saved = len(json.dumps(obj, sort_keys=True, default=str).encode("utf-8")) + except Exception: + bytes_saved = len(str(obj).encode("utf-8")) + + # Flatten: top-level keys only; complex values stored as JSON. + row: dict[str, Any] = {} + for k, v in obj.items(): + if isinstance(v, (dict, list)): + row[k] = json.dumps(v, sort_keys=True) + else: + row[k] = v + + # meta columns always present + now = _now_epoch() + row.setdefault("id", row.get(id_col)) + row["updated_at"] = now + row["ttl"] = effective_ttl + + record_id = str(row.get("id")) + + def _q(conn: Any) -> tuple[str, dict[str, Any] | None]: + # Ensure table exists with meta columns. + conn.execute( + f""" + CREATE TABLE IF NOT EXISTS "{safe_table}" ( + id VARCHAR PRIMARY KEY, + created_at BIGINT NOT NULL, + updated_at BIGINT NOT NULL, + ttl BIGINT + ) + """ + ) + + # Determine if this is an insert or update, and preserve created_at for updates. + existing = conn.execute( + f'SELECT created_at FROM "{safe_table}" WHERE id = ?', # noqa: S608 + [record_id], + ).fetchone() + if existing is None: + action: str = "inserted" + row["created_at"] = now + else: + action = "updated" + # Legacy safety: if an older row has NULL created_at, backfill. + row["created_at"] = existing[0] if existing[0] is not None else now + + # Ensure user columns exist. + existing_cols = { + r[0] + for r in conn.execute( + """ + SELECT column_name + FROM information_schema.columns + WHERE table_schema = 'main' AND table_name = ? + """, + [safe_table], + ).fetchall() + } + for col in row: + if col in existing_cols: + continue + # store as VARCHAR by default (safe + flexible) + safe_col = _ident(str(col)) + conn.execute(f'ALTER TABLE "{safe_table}" ADD COLUMN "{safe_col}" VARCHAR') + + # Upsert. + cols = list(row.keys()) + placeholders = ", ".join(["?"] * len(cols)) + col_sql = ", ".join([f'"{_ident(str(c))}"' for c in cols]) + updates = ", ".join([ + f'"{_ident(str(c))}"=excluded."{_ident(str(c))}"' for c in cols if c != "id" + ]) + values: Sequence[Any] = [row[c] for c in cols] + sql_upsert = ( + f'INSERT INTO "{safe_table}" ({col_sql}) VALUES ({placeholders}) ' # noqa: S608 + f"ON CONFLICT(id) DO UPDATE SET {updates}" + ) + conn.execute(sql_upsert, list(values)) + + # Return the row we just saved. + cur = conn.execute(f'SELECT * FROM "{safe_table}" WHERE id = ?', [record_id]) # noqa: S608 + saved = cur.fetchone() + if saved is None: + return action, None + cols = [d[0] for d in cur.description] + return action, dict(zip(cols, saved)) + + action, saved_row = await self._run(_q) + if saved_row is None: + raise DatacacheClientError( + "Datacache set() succeeded but could not read back saved row" + ) + + if saved_row.get("created_at") is None or saved_row.get("updated_at") is None: + raise DatacacheClientError( + "Datacache set() read back a row missing created_at/updated_at; this should not happen" + ) + created_at = int(saved_row["created_at"]) + updated_at = int(saved_row["updated_at"]) + return DatacacheSetResult( + table=table_name, + id=record_id, + action=action, # type: ignore[arg-type] + record=saved_row, + created_at=created_at, + updated_at=updated_at, + bytes_saved=bytes_saved, + ) + + async def get(self, table_name: str, id: str) -> dict[str, Any] | None: # noqa: A002 + now = _now_epoch() + safe_table = _ident(table_name) + + def _q(conn: Any) -> dict[str, Any] | None: + sql_get = ( + f'SELECT * FROM "{safe_table}" ' # noqa: S608 + "WHERE id = ? AND (ttl IS NULL OR ttl = 0 OR (updated_at + ttl) >= ?)" + ) + cur = conn.execute(sql_get, [id, now]) + row = cur.fetchone() + if row is None: + return None + cols = [d[0] for d in cur.description] + return dict(zip(cols, row)) + + return await self._run(_q) + + async def search(self, table_name: str, property: str, value: str) -> list[dict[str, Any]]: # noqa: A002 + now = _now_epoch() + needle = f"%{value}%" + safe_table = _ident(table_name) + safe_prop = _ident(property) + + def _q(conn: Any) -> list[dict[str, Any]]: + sql_search = ( + f'SELECT * FROM "{safe_table}" ' # noqa: S608 + f'WHERE lower(CAST("{safe_prop}" AS VARCHAR)) LIKE lower(?) ' + "AND (ttl IS NULL OR ttl = 0 OR (updated_at + ttl) >= ?)" + ) + cur = conn.execute(sql_search, [needle, now]) + rows = cur.fetchall() + cols = [d[0] for d in cur.description] + return [dict(zip(cols, r)) for r in rows] + + return await self._run(_q) diff --git a/libs/arcade-mcp-server/arcade_mcp_server/datacache/config.py b/libs/arcade-mcp-server/arcade_mcp_server/datacache/config.py new file mode 100644 index 000000000..22e80db8c --- /dev/null +++ b/libs/arcade-mcp-server/arcade_mcp_server/datacache/config.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import Any, Literal, TypedDict, cast + +from arcade_core.schema import ToolContext + +DatacacheKey = Literal["organization", "project", "user_id"] +DatacacheKeys = list[DatacacheKey] +DEFAULT_ORGANIZATION_IDENTITY = "default" +DEFAULT_PROJECT_IDENTITY = "default" + + +class DatacacheConfig(TypedDict, total=False): + keys: DatacacheKeys + ttl: int + + +class DatacacheConfigError(ValueError): + pass + + +def is_datacache_enabled(cfg: dict[str, Any] | None) -> bool: + """Datacache is enabled only if `keys` exists (per product spec).""" + return bool(cfg) and "keys" in cast(dict[str, Any], cfg) + + +def _normalize_keys(keys: Any) -> DatacacheKeys: + if not isinstance(keys, list) or any(not isinstance(k, str) for k in keys): + raise DatacacheConfigError("datacache.keys must be a list of strings") + allowed: set[str] = {"organization", "project", "user_id"} + normalized: list[DatacacheKey] = [] + for k in keys: + k_lower = str(k).strip().lower() + if k_lower not in allowed: + raise DatacacheConfigError( + f"Unsupported datacache key '{k}'. Allowed: organization, project, user_id" + ) + normalized.append(cast(DatacacheKey, k_lower)) + # de-dupe while preserving order + seen: set[DatacacheKey] = set() + out: list[DatacacheKey] = [] + for k in normalized: + if k not in seen: + seen.add(k) + out.append(k) + return out + + +def _normalize_ttl(ttl: Any) -> int | None: + if ttl is None: + return None + if not isinstance(ttl, int) or ttl < 0: + raise DatacacheConfigError("datacache.ttl must be a non-negative integer (seconds)") + return ttl + + +def parse_datacache_config(cfg: dict[str, Any] | None) -> DatacacheConfig | None: + if not cfg: + return None + if not isinstance(cfg, dict): + raise DatacacheConfigError("datacache must be an object/dict") + out: DatacacheConfig = {} + if "keys" in cfg: + out["keys"] = _normalize_keys(cfg.get("keys")) + if "ttl" in cfg: + ttl = _normalize_ttl(cfg.get("ttl")) + if ttl is not None: + out["ttl"] = ttl + return out + + +def _get_metadata(tool_context: ToolContext, key: str) -> str | None: + if not tool_context.metadata: + return None + key_norm = key.lower() + for item in tool_context.metadata: + if item.key.lower() == key_norm: + return item.value + return None + + +@dataclass(frozen=True) +class DatacacheIdentity: + """Resolved identity for a datacache instance.""" + + toolkit: str + key_parts: dict[str, str] + cache_key: str + cache_key_slug: str + + +_SLUG_RE = re.compile(r"[^A-Za-z0-9._-]+") + + +def _slugify(value: str, *, max_len: int = 200) -> str: + """Create a filesystem/redis/s3-safe slug.""" + value = value.strip() + value = _SLUG_RE.sub("_", value) + value = value.strip("._-") + if not value: + value = "default" + if len(value) > max_len: + value = value[:max_len] + return value + + +def build_datacache_identity( + *, + tool_fqn: str, + cfg: DatacacheConfig, + tool_context: ToolContext, +) -> DatacacheIdentity: + # Identity is scoped to the toolkit (not the individual tool) so tools can share tables. + toolkit = tool_fqn.split(".", 1)[0] if "." in tool_fqn else tool_fqn + keys = cfg.get("keys") or [] + key_parts: dict[str, str] = {} + for k in keys: + if k == "user_id": + if not tool_context.user_id: + raise DatacacheConfigError( + "datacache key 'user_id' requested but ToolContext.user_id is empty" + ) + key_parts["user_id"] = tool_context.user_id + elif k == "organization": + val = _get_metadata(tool_context, "organization") + # If organization is missing/null, use the default identity. + key_parts["organization"] = val if val else DEFAULT_ORGANIZATION_IDENTITY + elif k == "project": + val = _get_metadata(tool_context, "project") + # If project is missing/null, use the default identity. + key_parts["project"] = val if val else DEFAULT_PROJECT_IDENTITY + + # Human-readable cache key for debugging/ops (also slugified for storage/locks). + # Keep ASCII-safe and avoid '=' in the slug. Use `--` separators. + cache_key = ( + f"toolkit--{toolkit}--" + f"org--{key_parts.get('organization', '')}--" + f"project--{key_parts.get('project', '')}--" + f"user--{key_parts.get('user_id', '')}" + ) + cache_key_slug = _slugify(cache_key) + return DatacacheIdentity( + toolkit=toolkit, + key_parts=key_parts, + cache_key=cache_key, + cache_key_slug=cache_key_slug, + ) diff --git a/libs/arcade-mcp-server/arcade_mcp_server/datacache/lock.py b/libs/arcade-mcp-server/arcade_mcp_server/datacache/lock.py new file mode 100644 index 000000000..aca1bf081 --- /dev/null +++ b/libs/arcade-mcp-server/arcade_mcp_server/datacache/lock.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +import asyncio +import contextlib +import os +import time +import uuid +from dataclasses import dataclass + +try: + import redis.asyncio as redis_async +except Exception: # pragma: no cover + redis_async = None # type: ignore[assignment] + + +class DatacacheLockError(RuntimeError): + pass + + +@dataclass +class RedisLock: + redis_url: str + key: str + ttl_seconds: int + wait_seconds: int + value: str = "" + + async def __aenter__(self) -> RedisLock: + if redis_async is None: + raise DatacacheLockError( + "redis dependency not installed; required for datacache locking" + ) + + # Unique lock value so we can safely release. + self.value = f"{os.getpid()}:{uuid.uuid4()}" + client = redis_async.from_url(self.redis_url, decode_responses=True) + + deadline = time.time() + float(self.wait_seconds) + try: + while True: + ok = await client.set(self.key, self.value, ex=self.ttl_seconds, nx=True) + if ok: + self._client = client # type: ignore[attr-defined] + return self + if time.time() >= deadline: + break + await asyncio.sleep(0.1) + finally: + if not hasattr(self, "_client"): + with contextlib.suppress(Exception): + await client.aclose() + + raise DatacacheLockError(f"Timed out acquiring datacache lock: {self.key}") + + async def __aexit__(self, exc_type, exc, tb) -> None: + client = getattr(self, "_client", None) + if client is None: + return + + # Release only if value matches (Lua CAS). + lua = """ + if redis.call("get", KEYS[1]) == ARGV[1] then + return redis.call("del", KEYS[1]) + else + return 0 + end + """ + try: + await client.eval(lua, 1, self.key, self.value) + finally: + with contextlib.suppress(Exception): + await client.aclose() diff --git a/libs/arcade-mcp-server/arcade_mcp_server/datacache/storage.py b/libs/arcade-mcp-server/arcade_mcp_server/datacache/storage.py new file mode 100644 index 000000000..20e0c32ab --- /dev/null +++ b/libs/arcade-mcp-server/arcade_mcp_server/datacache/storage.py @@ -0,0 +1,147 @@ +from __future__ import annotations + +import asyncio +import os +import shutil +from dataclasses import dataclass +from typing import Any, Protocol + +try: + import boto3 # type: ignore[import-not-found] +except Exception: # pragma: no cover + boto3 = None # type: ignore[assignment] + + +class DatacacheStorageError(RuntimeError): + pass + + +class DatacacheLocation(Protocol): + """Marker protocol for storage-specific locations.""" + + +class DatacacheStorage(Protocol): + def location_for_digest(self, digest: str) -> DatacacheLocation: ... + + async def download_if_exists(self, loc: DatacacheLocation, local_path: str) -> bool: ... + + async def upload(self, loc: DatacacheLocation, local_path: str) -> None: ... + + +@dataclass(frozen=True) +class S3Location: + bucket: str + key: str + + +class S3DatacacheStorage: + def __init__( + self, + *, + bucket: str, + prefix: str, + endpoint_url: str | None = None, + region_name: str | None = None, + aws_access_key_id: str | None = None, + aws_secret_access_key: str | None = None, + aws_session_token: str | None = None, + ) -> None: + if boto3 is None: + raise DatacacheStorageError( + "boto3 dependency not installed; required for datacache S3 sync" + ) + self._bucket = bucket + self._prefix = prefix.strip("/").strip() + self._client_kwargs: dict[str, Any] = {} + if endpoint_url: + self._client_kwargs["endpoint_url"] = endpoint_url + if region_name: + self._client_kwargs["region_name"] = region_name + if aws_access_key_id and aws_secret_access_key: + self._client_kwargs["aws_access_key_id"] = aws_access_key_id + self._client_kwargs["aws_secret_access_key"] = aws_secret_access_key + if aws_session_token: + self._client_kwargs["aws_session_token"] = aws_session_token + + def location_for_digest(self, digest: str) -> S3Location: + key = f"{self._prefix}/{digest}.duckdb" + return S3Location(bucket=self._bucket, key=key) + + async def download_if_exists(self, loc: DatacacheLocation, local_path: str) -> bool: + loc = loc # type: ignore[assignment] + if not isinstance(loc, S3Location): + raise DatacacheStorageError("Invalid S3 location type") + os.makedirs(os.path.dirname(local_path), exist_ok=True) + + def _download() -> bool: + s3 = boto3.client("s3", **self._client_kwargs) + try: + s3.head_object(Bucket=loc.bucket, Key=loc.key) + except Exception: + return False + s3.download_file(loc.bucket, loc.key, local_path) + return True + + return await asyncio.to_thread(_download) + + async def upload(self, loc: DatacacheLocation, local_path: str) -> None: + loc = loc # type: ignore[assignment] + if not isinstance(loc, S3Location): + raise DatacacheStorageError("Invalid S3 location type") + if not os.path.exists(local_path): + raise DatacacheStorageError(f"Local datacache file missing: {local_path}") + + def _upload() -> None: + s3 = boto3.client("s3", **self._client_kwargs) + s3.upload_file(local_path, loc.bucket, loc.key) + + await asyncio.to_thread(_upload) + + +@dataclass(frozen=True) +class LocalFileLocation: + path: str + + +class LocalFileDatacacheStorage: + """Local filesystem storage backend. + + This is useful for local-only MCP servers and tests. It stores a canonical file per digest + under `storage_dir`, and each tool execution works on a separate `local_path` which is + copied from/to the canonical location. + """ + + def __init__(self, *, storage_dir: str) -> None: + self._storage_dir = storage_dir + + def location_for_digest(self, digest: str) -> LocalFileLocation: + os.makedirs(self._storage_dir, exist_ok=True) + return LocalFileLocation(path=os.path.join(self._storage_dir, f"{digest}.duckdb")) + + async def download_if_exists(self, loc: DatacacheLocation, local_path: str) -> bool: + loc = loc # type: ignore[assignment] + if not isinstance(loc, LocalFileLocation): + raise DatacacheStorageError("Invalid local file location type") + os.makedirs(os.path.dirname(local_path), exist_ok=True) + if not os.path.exists(loc.path): + return False + + def _copy_in() -> None: + shutil.copy2(loc.path, local_path) + + await asyncio.to_thread(_copy_in) + return True + + async def upload(self, loc: DatacacheLocation, local_path: str) -> None: + loc = loc # type: ignore[assignment] + if not isinstance(loc, LocalFileLocation): + raise DatacacheStorageError("Invalid local file location type") + if not os.path.exists(local_path): + raise DatacacheStorageError(f"Local datacache file missing: {local_path}") + + os.makedirs(os.path.dirname(loc.path), exist_ok=True) + + def _copy_out() -> None: + shutil.copy2(local_path, loc.path) + + await asyncio.to_thread(_copy_out) diff --git a/libs/arcade-mcp-server/arcade_mcp_server/datacache/types.py b/libs/arcade-mcp-server/arcade_mcp_server/datacache/types.py new file mode 100644 index 000000000..d785b0d36 --- /dev/null +++ b/libs/arcade-mcp-server/arcade_mcp_server/datacache/types.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import BaseModel, Field + + +class DatacacheSetResult(BaseModel): + table: str + id: str + action: Literal["inserted", "updated"] + record: dict[str, Any] | None = None + created_at: int = Field(..., ge=0) + updated_at: int = Field(..., ge=0) + bytes_saved: int = Field(..., ge=0) diff --git a/libs/arcade-mcp-server/arcade_mcp_server/exceptions.py b/libs/arcade-mcp-server/arcade_mcp_server/exceptions.py index d2deee898..2d362a050 100644 --- a/libs/arcade-mcp-server/arcade_mcp_server/exceptions.py +++ b/libs/arcade-mcp-server/arcade_mcp_server/exceptions.py @@ -16,33 +16,29 @@ ) __all__ = [ - # Re-exports + "AuthorizationError", + "ContextRequiredToolError", "ErrorKind", "FatalToolError", - "RetryableToolError", - "ToolExecutionError", - "ToolRuntimeError", - "UpstreamError", - "UpstreamRateLimitError", - "ContextRequiredToolError", - # Base exceptions - "MCPError", - "MCPRuntimeError", - # Server exceptions - "ServerError", - "SessionError", - "RequestError", - "ResponseError", - "ServerRequestError", "LifespanError", - # Context exceptions "MCPContextError", + "MCPError", + "MCPRuntimeError", "NotFoundError", - "AuthorizationError", "PromptError", + "ProtocolError", + "RequestError", + "ResponseError", + "RetryableToolError", "ResourceError", + "ServerError", + "ServerRequestError", + "SessionError", + "ToolExecutionError", + "ToolRuntimeError", "TransportError", - "ProtocolError", + "UpstreamError", + "UpstreamRateLimitError", ] diff --git a/libs/arcade-mcp-server/arcade_mcp_server/mcp_app.py b/libs/arcade-mcp-server/arcade_mcp_server/mcp_app.py index 91c594715..c708fe345 100644 --- a/libs/arcade-mcp-server/arcade_mcp_server/mcp_app.py +++ b/libs/arcade-mcp-server/arcade_mcp_server/mcp_app.py @@ -224,6 +224,7 @@ def add_tool( requires_auth: ToolAuthorization | None = None, requires_secrets: list[str] | None = None, requires_metadata: list[str] | None = None, + datacache: dict[str, Any] | None = None, adapters: list[ErrorAdapter] | None = None, ) -> Callable[P, T]: """Add a tool for build-time materialization (pre-server).""" @@ -235,6 +236,7 @@ def add_tool( requires_auth=requires_auth, requires_secrets=requires_secrets, requires_metadata=requires_metadata, + datacache=datacache, adapters=adapters, ) try: @@ -263,6 +265,7 @@ def tool( requires_auth: ToolAuthorization | None = None, requires_secrets: list[str] | None = None, requires_metadata: list[str] | None = None, + datacache: dict[str, Any] | None = None, adapters: list[ErrorAdapter] | None = None, ) -> Callable[[Callable[P, T]], Callable[P, T]] | Callable[P, T]: """Decorator for adding tools with optional parameters.""" @@ -275,6 +278,7 @@ def decorator(f: Callable[P, T]) -> Callable[P, T]: requires_auth=requires_auth, requires_secrets=requires_secrets, requires_metadata=requires_metadata, + datacache=datacache, adapters=adapters, ) diff --git a/libs/arcade-mcp-server/arcade_mcp_server/middleware/base.py b/libs/arcade-mcp-server/arcade_mcp_server/middleware/base.py index 93765d957..adf8b6a56 100644 --- a/libs/arcade-mcp-server/arcade_mcp_server/middleware/base.py +++ b/libs/arcade-mcp-server/arcade_mcp_server/middleware/base.py @@ -94,28 +94,26 @@ async def _build_handler_chain( # Method-specific handlers if context.method: - match context.method: - case "tools/call": - handler = partial(self.on_call_tool, call_next=handler) - case "tools/list": - handler = partial(self.on_list_tools, call_next=handler) - case "resources/read": - handler = partial(self.on_read_resource, call_next=handler) - case "resources/list": - handler = partial(self.on_list_resources, call_next=handler) - case "resources/templates/list": - handler = partial(self.on_list_resource_templates, call_next=handler) - case "prompts/get": - handler = partial(self.on_get_prompt, call_next=handler) - case "prompts/list": - handler = partial(self.on_list_prompts, call_next=handler) + if context.method == "tools/call": + handler = partial(self.on_call_tool, call_next=handler) + elif context.method == "tools/list": + handler = partial(self.on_list_tools, call_next=handler) + elif context.method == "resources/read": + handler = partial(self.on_read_resource, call_next=handler) + elif context.method == "resources/list": + handler = partial(self.on_list_resources, call_next=handler) + elif context.method == "resources/templates/list": + handler = partial(self.on_list_resource_templates, call_next=handler) + elif context.method == "prompts/get": + handler = partial(self.on_get_prompt, call_next=handler) + elif context.method == "prompts/list": + handler = partial(self.on_list_prompts, call_next=handler) # Type-specific handlers - match context.type: - case "request": - handler = partial(self.on_request, call_next=handler) - case "notification": - handler = partial(self.on_notification, call_next=handler) + if context.type == "request": + handler = partial(self.on_request, call_next=handler) + elif context.type == "notification": + handler = partial(self.on_notification, call_next=handler) # Generic message handler (always runs) handler = partial(self.on_message, call_next=handler) diff --git a/libs/arcade-mcp-server/arcade_mcp_server/server.py b/libs/arcade-mcp-server/arcade_mcp_server/server.py index 6ce1ccddf..a4019bacd 100644 --- a/libs/arcade-mcp-server/arcade_mcp_server/server.py +++ b/libs/arcade-mcp-server/arcade_mcp_server/server.py @@ -15,21 +15,32 @@ from __future__ import annotations import asyncio +import json import logging import os +import os.path from typing import Any, Callable, cast from arcade_core.auth_tokens import get_valid_access_token from arcade_core.catalog import MaterializedTool, ToolCatalog from arcade_core.executor import ToolExecutor +from arcade_core.schema import ToolAuthorizationContext, ToolContext, ToolMetadataItem from arcade_core.network.org_transport import build_org_scoped_async_http_client -from arcade_core.schema import ToolAuthorizationContext, ToolContext from arcade_core.schema import ToolAuthRequirement as CoreToolAuthRequirement from arcadepy import ArcadeError, AsyncArcade from arcadepy.types.auth_authorize_params import AuthRequirement, AuthRequirementOauth2 from arcade_mcp_server.context import Context, get_current_model_context, set_current_model_context from arcade_mcp_server.convert import convert_content_to_structured_content, convert_to_mcp_content +from arcade_mcp_server.datacache import ( + DatacacheClient, + DatacacheConfigError, + build_datacache_identity, + is_datacache_enabled, +) +from arcade_mcp_server.datacache.config import parse_datacache_config +from arcade_mcp_server.datacache.lock import RedisLock +from arcade_mcp_server.datacache.storage import LocalFileDatacacheStorage, S3DatacacheStorage from arcade_mcp_server.exceptions import NotFoundError, ToolRuntimeError from arcade_mcp_server.lifespan import LifespanManager from arcade_mcp_server.managers import PromptManager, ResourceManager, ToolManager @@ -341,6 +352,30 @@ def _default_instructions(self) -> str: "Use 'tools/list' to see available tools and 'tools/call' to execute them." ) + def _validate_datacache_boot_requirements(self) -> None: + """Fail fast on missing datacache requirements. + + If any tool enables datacache (presence of datacache.keys), we require Redis to be + configured so locking works consistently. + """ + datacache_required = False + for t in self._initial_catalog: + cfg_raw = getattr(t.meta, "datacache", None) + if is_datacache_enabled(cfg_raw): + datacache_required = True + break + + if not datacache_required: + return + + dc = self.settings.datacache + if not dc.redis_url: + logger.error( + "Datacache is enabled for at least one tool, but ARCADE_DATACACHE_REDIS_URL is not set. " + "Set ARCADE_DATACACHE_REDIS_URL and restart the server." + ) + raise SystemExit(1) + async def _start(self) -> None: """Start server components (called by MCPComponent.start).""" await self._tool_manager.start() @@ -379,6 +414,7 @@ async def start(self) -> None: logger.debug(f"{self.name} already started") return logger.info(f"Starting {self.name}") + self._validate_datacache_boot_requirements() try: await self._start() self._started = True @@ -785,6 +821,36 @@ def _select_user_id(self, session: ServerSession | None = None) -> str | None: else: logger.debug("Context user_id set from session (non-dev env)") + # metadata propagation from request _meta -> ToolContext.metadata + # Request meta is stored on the session as a SimpleNamespace in ServerSession.set_request_meta(). + meta_dict: dict[str, Any] = {} + if session is not None and getattr(session, "_request_meta", None) is not None: + try: + meta_dict = vars(session._request_meta) # type: ignore[attr-defined] + except Exception: + meta_dict = {} + + required_meta_keys: set[str] = set() + if tool.definition.requirements and tool.definition.requirements.metadata: + required_meta_keys = {m.key for m in tool.definition.requirements.metadata if m.key} + + # Always allow these common datacache keys if present + allowed_keys = required_meta_keys | {"organization", "project"} + + metadata_items: list[ToolMetadataItem] = [] + for k in sorted(allowed_keys): + if k not in meta_dict: + continue + v = meta_dict.get(k) + if v is None: + continue + v_str = json.dumps(v, sort_keys=True) if isinstance(v, (dict, list)) else str(v) + metadata_items.append(ToolMetadataItem(key=str(k), value=v_str)) + + if metadata_items: + tool_context.metadata = metadata_items + + return tool_context return session.session_id if session else None async def _check_and_warn_missing_secrets(self) -> None: @@ -867,15 +933,99 @@ async def _handle_call_tool( mctx.set_tool_context(tool_context) try: - # Execute tool - result = await ToolExecutor.run( - func=tool.tool, - definition=tool.definition, - input_model=tool.input_model, - output_model=tool.output_model, - context=mctx if mctx is not None else tool_context, - **input_params, - ) + # Optional datacache wrapper + datacache_cfg_raw = getattr(tool.meta, "datacache", None) + if is_datacache_enabled(datacache_cfg_raw): + cfg = parse_datacache_config(datacache_cfg_raw) + if cfg is None or not cfg.get("keys"): + raise DatacacheConfigError("datacache enabled but no keys configured") + + dc_settings = self.settings.datacache + if not dc_settings.redis_url: + raise DatacacheConfigError( + "ARCADE_DATACACHE_REDIS_URL must be set when datacache is enabled" + ) + if not dc_settings.storage_backend: + raise DatacacheConfigError( + "ARCADE_DATACACHE_STORAGE_BACKEND must be set to 's3' or 'local' when datacache is enabled" + ) + + identity = build_datacache_identity( + tool_fqn=tool.definition.fully_qualified_name, + cfg=cfg, + tool_context=tool_context, + ) + lock_key = f"arcade:datacache:lock:{identity.cache_key_slug}" + local_path = os.path.join( + dc_settings.local_dir, f"{identity.cache_key_slug}.duckdb" + ) + + if dc_settings.storage_backend == "s3": + if not dc_settings.s3_bucket: + raise DatacacheConfigError( + "ARCADE_DATACACHE_S3_BUCKET must be set when ARCADE_DATACACHE_STORAGE_BACKEND=s3" + ) + storage = S3DatacacheStorage( + bucket=dc_settings.s3_bucket, + prefix=dc_settings.s3_prefix, + endpoint_url=dc_settings.s3_endpoint_url, + region_name=dc_settings.aws_region, + aws_access_key_id=dc_settings.aws_access_key_id, + aws_secret_access_key=dc_settings.aws_secret_access_key, + aws_session_token=dc_settings.aws_session_token, + ) + loc = storage.location_for_digest(identity.cache_key_slug) + elif dc_settings.storage_backend == "local": + storage = LocalFileDatacacheStorage( + storage_dir=os.path.join(dc_settings.local_dir, "storage") + ) + loc = storage.location_for_digest(identity.cache_key_slug) + else: + raise DatacacheConfigError( + "ARCADE_DATACACHE_STORAGE_BACKEND must be one of: s3, local" + ) + + async with RedisLock( + redis_url=dc_settings.redis_url, + key=lock_key, + ttl_seconds=dc_settings.lock_ttl_seconds, + wait_seconds=dc_settings.lock_wait_seconds, + ): + await storage.download_if_exists(loc, local_path) + + client = await DatacacheClient.open( + path=local_path, default_ttl=cfg.get("ttl") + ) + + saved_client = None + if mctx is not None and isinstance(mctx, Context): + saved_client = getattr(mctx, "_datacache_client", None) + mctx._datacache_client = client # type: ignore[attr-defined] + + try: + result = await ToolExecutor.run( + func=tool.tool, + definition=tool.definition, + input_model=tool.input_model, + output_model=tool.output_model, + context=mctx if mctx is not None else tool_context, + **input_params, + ) + finally: + if mctx is not None and isinstance(mctx, Context): + mctx._datacache_client = saved_client # type: ignore[attr-defined] + await client.aclose() + await storage.upload(loc, local_path) + else: + # Execute tool normally + result = await ToolExecutor.run( + func=tool.tool, + definition=tool.definition, + input_model=tool.input_model, + output_model=tool.output_model, + context=mctx if mctx is not None else tool_context, + **input_params, + ) finally: # Restore the original tool context to prevent context leakage to parent tools in the case of tool chaining. if mctx is not None and saved_tool_context is not None: @@ -913,6 +1063,19 @@ async def _handle_call_tool( isError=True, ), ) + except DatacacheConfigError as e: + error_message = f"Datacache configuration error: {e}" + content = convert_to_mcp_content(error_message) + structured_content = convert_content_to_structured_content({"error": error_message}) + self._tracker.track_tool_call(False, "datacache config error") + return JSONRPCResponse( + id=message.id, + result=CallToolResult( + content=content, + structuredContent=structured_content, + isError=True, + ), + ) except NotFoundError: # Match test expectation: return a normal response with isError=True error_message = f"✗ Unknown tool: {tool_name}\n\n" diff --git a/libs/arcade-mcp-server/arcade_mcp_server/settings.py b/libs/arcade-mcp-server/arcade_mcp_server/settings.py index e8e4b8c1e..f2ae1613f 100644 --- a/libs/arcade-mcp-server/arcade_mcp_server/settings.py +++ b/libs/arcade-mcp-server/arcade_mcp_server/settings.py @@ -5,6 +5,7 @@ """ import os +import tempfile from pathlib import Path from typing import Any @@ -224,6 +225,63 @@ class ArcadeSettings(BaseSettings): model_config = {"env_prefix": "ARCADE_"} +class DatacacheSettings(BaseSettings): + """Datacache (DuckDB + S3 + Redis) settings.""" + + storage_backend: str | None = Field( + default=None, + description="Datacache storage backend: 's3' or 'local' (required when datacache is enabled)", + ) + redis_url: str | None = Field( + default=None, + description="Redis URL used for datacache locking (e.g. redis://localhost:6379/0)", + ) + s3_bucket: str | None = Field( + default=None, + description="S3 bucket for storing datacache DuckDB files", + ) + s3_prefix: str = Field( + default="arcade/datacache", + description="S3 key prefix for datacache DuckDB files", + ) + aws_access_key_id: str | None = Field(default=None, description="AWS access key ID") + aws_secret_access_key: str | None = Field(default=None, description="AWS secret access key") + aws_session_token: str | None = Field(default=None, description="AWS session token") + aws_region: str | None = Field(default=None, description="AWS region") + s3_endpoint_url: str | None = Field( + default=None, + description="Custom S3 endpoint URL (e.g. for MinIO)", + ) + local_dir: str = Field( + default_factory=lambda: os.path.join(tempfile.gettempdir(), "arcade_datacache"), + description="Local directory for storing active datacache DuckDB files", + ) + lock_ttl_seconds: int = Field( + default=900, + description="Redis lock TTL in seconds (safety to prevent deadlocks)", + ge=1, + le=86400, + ) + lock_wait_seconds: int = Field( + default=900, + description="How long to wait to acquire the datacache lock before failing", + ge=0, + le=86400, + ) + + @field_validator("storage_backend") + @classmethod + def validate_storage_backend(cls, v: str | None) -> str | None: + if v is None: + return None + v_norm = v.strip().lower() + if v_norm not in {"s3", "local"}: + raise ValueError("ARCADE_DATACACHE_STORAGE_BACKEND must be one of: s3, local") + return v_norm + + model_config = {"env_prefix": "ARCADE_DATACACHE_"} + + class ToolEnvironmentSettings(BaseSettings): """Tool environment settings. @@ -241,7 +299,9 @@ class ToolEnvironmentSettings(BaseSettings): def model_post_init(self, __context: Any) -> None: """Populate tool_environment from process env if not provided.""" if not self.tool_environment: - excluded_prefixes = ("MCP_", "_") + # IMPORTANT: do not leak server/runtime config into tool secrets. + # In particular, ARCADE_DATACACHE_* can include credentials and endpoints. + excluded_prefixes = ("MCP_", "_", "ARCADE_DATACACHE_") self.tool_environment = { key: value for key, value in os.environ.items() @@ -285,6 +345,10 @@ class MCPSettings(BaseSettings): default_factory=ArcadeSettings, description="Arcade integration settings", ) + datacache: DatacacheSettings = Field( + default_factory=DatacacheSettings, + description="Datacache settings (DuckDB + S3 + Redis)", + ) tool_environment: ToolEnvironmentSettings = Field( default_factory=ToolEnvironmentSettings, description="Tool environment settings", diff --git a/libs/arcade-mcp-server/pyproject.toml b/libs/arcade-mcp-server/pyproject.toml index 71feef519..a8a9ba228 100644 --- a/libs/arcade-mcp-server/pyproject.toml +++ b/libs/arcade-mcp-server/pyproject.toml @@ -34,6 +34,9 @@ dependencies = [ "anyio>=4.0.0", "python-dotenv>=1.0.0", "pydantic-settings>=2.10.1", + "duckdb>=1.0.0", + "boto3>=1.34.0", + "redis>=5.0.0", "python-jose[cryptography]>=3.3.0,<4.0.0", "httpx>=0.27.0,<1.0.0", ] diff --git a/libs/arcade-tdk/arcade_tdk/errors.py b/libs/arcade-tdk/arcade_tdk/errors.py index dcd90a5fa..5b9056491 100644 --- a/libs/arcade-tdk/arcade_tdk/errors.py +++ b/libs/arcade-tdk/arcade_tdk/errors.py @@ -10,6 +10,7 @@ ) __all__ = [ + "ContextRequiredToolError", "ErrorKind", "FatalToolError", "RetryableToolError", @@ -18,7 +19,6 @@ "ToolRuntimeError", "UpstreamError", "UpstreamRateLimitError", - "ContextRequiredToolError", "WeightError", ] diff --git a/libs/arcade-tdk/arcade_tdk/tool.py b/libs/arcade-tdk/arcade_tdk/tool.py index efa4b89df..8315f33e9 100644 --- a/libs/arcade-tdk/arcade_tdk/tool.py +++ b/libs/arcade-tdk/arcade_tdk/tool.py @@ -111,6 +111,7 @@ def tool( requires_auth: ToolAuthorization | None = None, requires_secrets: list[str] | None = None, requires_metadata: list[str] | None = None, + datacache: dict[str, Any] | None = None, adapters: list[ErrorAdapter] | None = None, ) -> Callable: def decorator(func: Callable) -> Callable: @@ -122,6 +123,7 @@ def decorator(func: Callable) -> Callable: func.__tool_requires_auth__ = requires_auth # type: ignore[attr-defined] func.__tool_requires_secrets__ = requires_secrets # type: ignore[attr-defined] func.__tool_requires_metadata__ = requires_metadata # type: ignore[attr-defined] + func.__tool_datacache__ = datacache # type: ignore[attr-defined] adapter_chain = _build_adapter_chain(adapters, requires_auth) diff --git a/libs/tests/arcade_mcp_server/test_context.py b/libs/tests/arcade_mcp_server/test_context.py index 39a4c7345..ef8b78788 100644 --- a/libs/tests/arcade_mcp_server/test_context.py +++ b/libs/tests/arcade_mcp_server/test_context.py @@ -8,7 +8,6 @@ from arcade_mcp_server.context import get_current_model_context as get_current_context from arcade_mcp_server.context import set_current_model_context as set_current_context from arcade_mcp_server.types import ( - MCPTool, ModelHint, ModelPreferences, ) @@ -159,7 +158,7 @@ async def test_progress_reporting(self, mcp_server): await context.progress.report(50, 100, "Processing...") session.send_progress_notification.assert_called_once_with( - progress_token="task-123", progress=50, total=100, message="Processing..." + progress_token="task-123", progress=50, total=100, message="Processing..." # noqa: S106 ) # Without total diff --git a/libs/tests/arcade_mcp_server/test_convert.py b/libs/tests/arcade_mcp_server/test_convert.py index 6e39865fc..332f5613f 100644 --- a/libs/tests/arcade_mcp_server/test_convert.py +++ b/libs/tests/arcade_mcp_server/test_convert.py @@ -95,7 +95,7 @@ def test_convert_circular_reference(self): # Should handle gracefully (implementation dependent) # Most JSON encoders will raise an error - with pytest.raises(Exception): + with pytest.raises((TypeError, ValueError)): convert_to_mcp_content(obj) def test_convert_custom_objects(self): diff --git a/libs/tests/arcade_mcp_server/test_datacache.py b/libs/tests/arcade_mcp_server/test_datacache.py new file mode 100644 index 000000000..490e6075c --- /dev/null +++ b/libs/tests/arcade_mcp_server/test_datacache.py @@ -0,0 +1,149 @@ +"""Tests for datacache plumbing and helpers.""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import Annotated + +import pytest +from arcade_core.catalog import ToolCatalog +from arcade_core.schema import ToolContext, ToolMetadataItem +from arcade_mcp_server import tool +from arcade_mcp_server.datacache.config import build_datacache_identity, parse_datacache_config +from arcade_mcp_server.datacache.storage import LocalFileDatacacheStorage + + +def test_datacache_decorator_persists_to_tool_meta(): + @tool(datacache={"keys": ["user_id"], "ttl": 123}) + def my_tool(x: Annotated[str, "x"]) -> Annotated[str, "y"]: + """My tool.""" + return x + + catalog = ToolCatalog() + catalog.add_tool(my_tool, "TestToolkit", toolkit_version="1.0.0", toolkit_description="x") + + tools = list(catalog) + assert len(tools) == 1 + mat = tools[0] + assert mat.meta.datacache is not None + assert mat.meta.datacache["keys"] == ["user_id"] + assert mat.meta.datacache["ttl"] == 123 + + +@pytest.mark.asyncio +async def test_request_meta_propagates_to_tool_context_metadata(mcp_server, materialized_tool): + session = SimpleNamespace( + session_id="test-session", + _request_meta=SimpleNamespace(organization="acme", project="rocket"), + ) + tool_ctx = await mcp_server._create_tool_context(materialized_tool, session=session) + assert tool_ctx.metadata is not None + md = {m.key: m.value for m in tool_ctx.metadata} + assert md["organization"] == "acme" + assert md["project"] == "rocket" + + +def test_datacache_identity_includes_selected_keys(): + cfg = parse_datacache_config({"keys": ["organization", "project", "user_id"], "ttl": 5}) + assert cfg is not None + + tc = ToolContext() + tc.user_id = "u1" + tc.metadata = [ + ToolMetadataItem(key="organization", value="org1"), + ToolMetadataItem(key="project", value="proj1"), + ] + + ident1 = build_datacache_identity(tool_fqn="TestToolkit.test_tool", cfg=cfg, tool_context=tc) + assert ident1.digest + + tc2 = ToolContext() + tc2.user_id = "u1" + tc2.metadata = [ + ToolMetadataItem(key="organization", value="org1"), + ToolMetadataItem(key="project", value="proj2"), + ] + ident2 = build_datacache_identity(tool_fqn="TestToolkit.test_tool", cfg=cfg, tool_context=tc2) + assert ident1.digest != ident2.digest + + +def test_datacache_identity_defaults_missing_org_project(): + cfg = parse_datacache_config({"keys": ["organization", "project", "user_id"], "ttl": 5}) + assert cfg is not None + + tc = ToolContext() + tc.user_id = "u1" + tc.metadata = [] # organization/project missing + + ident = build_datacache_identity(tool_fqn="TestToolkit.test_tool", cfg=cfg, tool_context=tc) + assert ident.key_parts["organization"] == "default" + assert ident.key_parts["project"] == "default" + + +def test_datacache_identity_shared_across_tools_in_toolkit(): + cfg = parse_datacache_config({"keys": ["organization", "project", "user_id"], "ttl": 5}) + assert cfg is not None + + tc = ToolContext() + tc.user_id = "u1" + tc.metadata = [ + ToolMetadataItem(key="organization", value="org1"), + ToolMetadataItem(key="project", value="proj1"), + ] + + ident_a = build_datacache_identity(tool_fqn="TestToolkit.tool_a", cfg=cfg, tool_context=tc) + ident_b = build_datacache_identity(tool_fqn="TestToolkit.tool_b", cfg=cfg, tool_context=tc) + assert ident_a.cache_key_slug == ident_b.cache_key_slug + assert ident_a.toolkit == "TestToolkit" + assert ident_a.key_parts == {"organization": "org1", "project": "proj1", "user_id": "u1"} + assert ident_a.cache_key == "toolkit--TestToolkit--org--org1--project--proj1--user--u1" + assert ident_a.cache_key_slug == "toolkit--TestToolkit--org--org1--project--proj1--user--u1" + + +@pytest.mark.asyncio +async def test_datacache_client_set_get_search(tmp_path): + pytest.importorskip("duckdb") + + from arcade_mcp_server.datacache.client import DatacacheClient + + db_path = tmp_path / "cache.duckdb" + client = await DatacacheClient.open(path=str(db_path), default_ttl=3600) + try: + resp1 = await client.set("profiles", {"id": "p1", "name": "Alice"}) + assert resp1.action == "inserted" + assert resp1.record is not None + assert resp1.record["id"] == "p1" + assert resp1.bytes_saved >= 1 + assert resp1.created_at >= 0 + assert resp1.updated_at >= 0 + + resp2 = await client.set("profiles", {"id": "p1", "name": "Alice2"}) + assert resp2.action == "updated" + assert resp2.created_at == resp1.created_at + assert resp2.updated_at >= resp1.updated_at + row = await client.get("profiles", "p1") + assert row is not None + assert row["id"] == "p1" + assert row["name"] == "Alice2" + + results = await client.search("profiles", "name", "ali") + assert len(results) >= 1 + finally: + await client.aclose() + + +@pytest.mark.asyncio +async def test_local_file_storage_roundtrip(tmp_path): + storage = LocalFileDatacacheStorage(storage_dir=str(tmp_path / "storage")) + loc = storage.location_for_digest("deadbeef") + + # Create a source file and upload it into the storage location + src = tmp_path / "src.duckdb" + src.write_bytes(b"hello") + await storage.upload(loc, str(src)) + + # Download from storage into a new local path + dst = tmp_path / "dst.duckdb" + exists = await storage.download_if_exists(loc, str(dst)) + assert exists is True + assert dst.read_bytes() == b"hello" diff --git a/libs/tests/arcade_mcp_server/test_error_handling_middleware.py b/libs/tests/arcade_mcp_server/test_error_handling_middleware.py index ca83dbb9a..9a7654f4f 100644 --- a/libs/tests/arcade_mcp_server/test_error_handling_middleware.py +++ b/libs/tests/arcade_mcp_server/test_error_handling_middleware.py @@ -245,7 +245,8 @@ async def test_chained_error_handling(self, error_middleware, context): async def handler(ctx): try: - raise ValueError("Original error") + msg = "Original error" + raise ValueError(msg) # noqa: TRY301 except ValueError as e: raise RuntimeError("Wrapped error") from e diff --git a/libs/tests/arcade_mcp_server/test_mcp_app.py b/libs/tests/arcade_mcp_server/test_mcp_app.py index 655ed78dd..97f61f67f 100644 --- a/libs/tests/arcade_mcp_server/test_mcp_app.py +++ b/libs/tests/arcade_mcp_server/test_mcp_app.py @@ -3,7 +3,7 @@ import subprocess import sys from typing import Annotated -from unittest.mock import MagicMock, Mock, patch +from unittest.mock import Mock, patch import pytest from arcade_core.catalog import MaterializedTool diff --git a/libs/tests/arcade_mcp_server/test_openapi_docs.py b/libs/tests/arcade_mcp_server/test_openapi_docs.py index eb5046129..cc5700727 100644 --- a/libs/tests/arcade_mcp_server/test_openapi_docs.py +++ b/libs/tests/arcade_mcp_server/test_openapi_docs.py @@ -1,6 +1,5 @@ """Test that MCP routes appear in OpenAPI documentation.""" -import pytest from arcade_core import ToolCatalog from arcade_core.toolkit import Toolkit from arcade_mcp_server.settings import MCPSettings @@ -73,7 +72,6 @@ def test_mcp_routes_in_openapi(monkeypatch): # Verify the actual proxy is mounted (not routes) # The OpenAPI docs should exist but not interfere with the mount - import inspect mounts = [route for route in app.routes if hasattr(route, "app") and hasattr(route, "path")] mcp_mounts = [m for m in mounts if m.path == "/mcp"] diff --git a/libs/tests/arcade_mcp_server/test_server.py b/libs/tests/arcade_mcp_server/test_server.py index cdfae2865..7ee4cecb7 100644 --- a/libs/tests/arcade_mcp_server/test_server.py +++ b/libs/tests/arcade_mcp_server/test_server.py @@ -475,7 +475,7 @@ async def test_error_handling_middleware(self, mcp_server): # Mock a handler to raise an exception async def failing_handler(*args, **kwargs): - raise Exception("Test error") + raise RuntimeError("Test error") mcp_server._handlers["test/fail"] = failing_handler @@ -674,7 +674,7 @@ async def test_check_tool_requirements_auth_completed(self, mcp_server): mock_auth_response = Mock() mock_auth_response.status = "completed" mock_auth_response.context = Mock() - mock_auth_response.context.token = "test-token" + mock_auth_response.context.token = "test-token" # noqa: S105 mock_auth_response.context.user_info = {"user_id": "test-user"} mcp_server._check_authorization = AsyncMock(return_value=mock_auth_response) @@ -694,7 +694,7 @@ async def test_check_tool_requirements_auth_completed(self, mcp_server): # Should return None (no error) and set authorization context assert result is None assert tool_context.authorization is not None - assert tool_context.authorization.token == "test-token" + assert tool_context.authorization.token == "test-token" # noqa: S105 assert tool_context.authorization.user_info == {"user_id": "test-user"} @pytest.mark.asyncio @@ -873,7 +873,7 @@ async def test_check_tool_requirements_combined_auth_and_secrets(self, mcp_serve mock_auth_response = Mock() mock_auth_response.status = "completed" mock_auth_response.context = Mock() - mock_auth_response.context.token = "test-token" + mock_auth_response.context.token = "test-token" # noqa: S105 mock_auth_response.context.user_info = {"user_id": "test-user"} mcp_server._check_authorization = AsyncMock(return_value=mock_auth_response) @@ -1124,7 +1124,7 @@ async def test_stdio_transport_allows_tool_with_auth( mock_auth_response = Mock() mock_auth_response.status = "completed" mock_auth_response.context = Mock() - mock_auth_response.context.token = "test-token" + mock_auth_response.context.token = "test-token" # noqa: S105 mock_auth_response.context.user_info = {} mcp_server._check_authorization = AsyncMock(return_value=mock_auth_response) @@ -1161,7 +1161,7 @@ async def test_no_transport_type_allows_tool_with_auth( mock_auth_response = Mock() mock_auth_response.status = "completed" mock_auth_response.context = Mock() - mock_auth_response.context.token = "test-token" + mock_auth_response.context.token = "test-token" # noqa: S105 mock_auth_response.context.user_info = {} mcp_server._check_authorization = AsyncMock(return_value=mock_auth_response) @@ -1208,6 +1208,26 @@ async def test_http_transport_allows_tool_without_requirements(self, mcp_server) assert isinstance(response.result, CallToolResult) assert response.result.isError is False + @pytest.mark.asyncio + async def test_startup_exits_if_datacache_enabled_without_redis_url(self): + """If any tool enables datacache, missing ARCADE_DATACACHE_REDIS_URL should fail fast.""" + from arcade_core.catalog import ToolCatalog + from arcade_mcp_server.settings import MCPSettings + + @tool(datacache={"keys": ["user_id"], "ttl": 1}) + def dc_tool(text: Annotated[str, "Input text"]) -> Annotated[str, "Output"]: + """Tool with datacache enabled.""" + return text + + catalog = ToolCatalog() + catalog.add_tool(dc_tool, "TestToolkit", toolkit_version="1.0.0", toolkit_description="x") + + settings = MCPSettings() + settings.datacache.redis_url = None + + server = MCPServer(catalog=catalog, settings=settings) + with pytest.raises(SystemExit): + await server.start() class TestMissingSecretsWarnings: """Test startup warnings for missing tool secrets.""" diff --git a/libs/tests/arcade_mcp_server/test_session.py b/libs/tests/arcade_mcp_server/test_session.py index 311f6ecc6..6af6e321e 100644 --- a/libs/tests/arcade_mcp_server/test_session.py +++ b/libs/tests/arcade_mcp_server/test_session.py @@ -133,7 +133,7 @@ async def test_progress_notification(self, server_session): """Test progress notification sending.""" # Send progress notification await server_session.send_progress_notification( - progress_token="task-123", progress=50, total=100, message="Processing..." + progress_token="task-123", progress=50, total=100, message="Processing..." # noqa: S106 ) # Verify notification was sent diff --git a/libs/tests/arcade_mcp_server/test_settings.py b/libs/tests/arcade_mcp_server/test_settings.py index d47dc4912..ec55b6c4a 100644 --- a/libs/tests/arcade_mcp_server/test_settings.py +++ b/libs/tests/arcade_mcp_server/test_settings.py @@ -1,6 +1,5 @@ """Tests for MCP Settings.""" -import pytest from arcade_mcp_server.settings import MCPSettings, ServerSettings @@ -98,3 +97,17 @@ def test_title_field_default(self): """Test that the title field default is 'ArcadeMCP'.""" field_info = ServerSettings.model_fields["title"] assert field_info.default == "ArcadeMCP" + + +class TestToolEnvironmentFiltering: + def test_datacache_env_not_exposed_to_tools(self, monkeypatch): + monkeypatch.setenv("ARCADE_DATACACHE_REDIS_URL", "redis://localhost:6379/0") + monkeypatch.setenv("ARCADE_DATACACHE_S3_BUCKET", "bucket") + monkeypatch.setenv("SOME_TOOL_SECRET", "ok") + + settings = MCPSettings.from_env() + tool_env = settings.tool_secrets() + + assert "SOME_TOOL_SECRET" in tool_env + assert "ARCADE_DATACACHE_REDIS_URL" not in tool_env + assert "ARCADE_DATACACHE_S3_BUCKET" not in tool_env diff --git a/libs/tests/arcade_mcp_server/transports/test_http_session_manager.py b/libs/tests/arcade_mcp_server/transports/test_http_session_manager.py index 61f5bc812..82cd859df 100644 --- a/libs/tests/arcade_mcp_server/transports/test_http_session_manager.py +++ b/libs/tests/arcade_mcp_server/transports/test_http_session_manager.py @@ -1,12 +1,10 @@ -from http import HTTPStatus -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import AsyncMock, patch import pytest from arcade_mcp_server.transports.http_session_manager import ( MCP_SESSION_ID_HEADER, HTTPSessionManager, ) -from arcade_mcp_server.transports.http_streamable import HTTPStreamableTransport class TestHTTPSessionManager: diff --git a/libs/tests/arcade_mcp_server/transports/test_http_streamable.py b/libs/tests/arcade_mcp_server/transports/test_http_streamable.py index 37d8ca8a7..4d09f4a11 100644 --- a/libs/tests/arcade_mcp_server/transports/test_http_streamable.py +++ b/libs/tests/arcade_mcp_server/transports/test_http_streamable.py @@ -1,4 +1,3 @@ -import json from unittest.mock import AsyncMock, MagicMock, patch import pytest diff --git a/libs/tests/cli/deploy/test_deploy.py b/libs/tests/cli/deploy/test_deploy.py index 0a6768bc6..ff7bf3638 100644 --- a/libs/tests/cli/deploy/test_deploy.py +++ b/libs/tests/cli/deploy/test_deploy.py @@ -2,7 +2,6 @@ import io import subprocess import tarfile -import time from pathlib import Path import pytest diff --git a/libs/tests/cli/test_secret.py b/libs/tests/cli/test_secret.py index 3ce781675..c8b4f6660 100644 --- a/libs/tests/cli/test_secret.py +++ b/libs/tests/cli/test_secret.py @@ -1,5 +1,4 @@ import tempfile -from io import StringIO from unittest.mock import MagicMock, patch import httpx diff --git a/libs/tests/cli/usage/test_identity.py b/libs/tests/cli/usage/test_identity.py index 1dd6e64e6..b951286e3 100644 --- a/libs/tests/cli/usage/test_identity.py +++ b/libs/tests/cli/usage/test_identity.py @@ -129,9 +129,9 @@ def test_atomic_write_cleans_up_on_failure( "tempfile.mkstemp", return_value=(999, str(temp_config_path / ".usage_temp.tmp")) ), patch("os.fdopen", side_effect=Exception("Write failed")), + pytest.raises(Exception, match=r"Write failed"), ): - with pytest.raises(Exception, match="Write failed"): - identity._write_atomic({"anon_id": "test"}) + identity._write_atomic({"anon_id": "test"}) # Verify no temp files are left behind temp_files = list(temp_config_path.glob(".usage_*.tmp")) diff --git a/libs/tests/core/converters/test_openai.py b/libs/tests/core/converters/test_openai.py index e8d0713ff..2f1fa14ec 100644 --- a/libs/tests/core/converters/test_openai.py +++ b/libs/tests/core/converters/test_openai.py @@ -5,10 +5,7 @@ import pytest from arcade_core.catalog import MaterializedTool, ToolMeta, create_func_models from arcade_core.converters.openai import ( - OpenAIFunctionParameterProperty, OpenAIFunctionParameters, - OpenAIFunctionSchema, - OpenAIToolSchema, _convert_input_parameters_to_json_schema, _convert_value_schema_to_json_schema, _create_tool_schema, diff --git a/libs/tests/core/test_executor.py b/libs/tests/core/test_executor.py index fa8963492..e3c12b52f 100644 --- a/libs/tests/core/test_executor.py +++ b/libs/tests/core/test_executor.py @@ -326,13 +326,13 @@ def check_output_error(output_error: ToolCallError, expected_error: ToolCallErro assert output_error.message == expected_error.message, "message mismatch" assert output_error.kind == expected_error.kind, "kind mismatch" if expected_error.developer_message: - assert ( - output_error.developer_message == expected_error.developer_message - ), "developer message mismatch" + assert output_error.developer_message == expected_error.developer_message, ( + "developer message mismatch" + ) assert output_error.can_retry == expected_error.can_retry, "can retry mismatch" - assert ( - output_error.additional_prompt_content == expected_error.additional_prompt_content - ), "additional prompt content mismatch" + assert output_error.additional_prompt_content == expected_error.additional_prompt_content, ( + "additional prompt content mismatch" + ) assert output_error.retry_after_ms == expected_error.retry_after_ms, "retry after ms mismatch" if expected_error.stacktrace: assert output_error.stacktrace == expected_error.stacktrace, "stacktrace mismatch" diff --git a/libs/tests/core/test_schema.py b/libs/tests/core/test_schema.py index 5cea83552..eb11cd3a5 100644 --- a/libs/tests/core/test_schema.py +++ b/libs/tests/core/test_schema.py @@ -65,14 +65,14 @@ def test_get_secret_when_secrets_is_none(): tool_context = ToolContext(secrets=None) # When no secrets dictionary is provided, get_secret should raise a ValueError. - with pytest.raises(ValueError, match="Secret 'missing_key' not found in context."): + with pytest.raises(ValueError, match=r"Secret 'missing_key' not found in context\."): tool_context.get_secret("missing_key") def test_get_secret_with_empty_key(): tool_context = ToolContext(secrets=[]) - with pytest.raises(ValueError, match="Secret key passed to get_secret cannot be empty."): + with pytest.raises(ValueError, match=r"Secret key passed to get_secret cannot be empty\."): tool_context.get_secret("") @@ -107,12 +107,12 @@ def test_get_metadata_key_not_found(): def test_get_metadata_when_metadata_is_none(): tool_context = ToolContext(metadata=None) - with pytest.raises(ValueError, match="Metadata 'missing_key' not found in context."): + with pytest.raises(ValueError, match=r"Metadata 'missing_key' not found in context\."): tool_context.get_metadata("missing_key") def test_get_metadata_with_empty_key(): tool_context = ToolContext(metadata=[]) - with pytest.raises(ValueError, match="Metadata key passed to get_metadata cannot be empty."): + with pytest.raises(ValueError, match=r"Metadata key passed to get_metadata cannot be empty\."): tool_context.get_metadata("") diff --git a/libs/tests/core/test_schema_mcp_degradation.py b/libs/tests/core/test_schema_mcp_degradation.py index f4d621af3..ec8543050 100644 --- a/libs/tests/core/test_schema_mcp_degradation.py +++ b/libs/tests/core/test_schema_mcp_degradation.py @@ -9,7 +9,6 @@ import pytest from arcade_core.schema import ToolContext - # ===================== # Non-Critical Features (No-Op Tests) # ===================== diff --git a/libs/tests/sdk/test_graphql_adapter.py b/libs/tests/sdk/test_graphql_adapter.py index de6e44dd9..ad0af4165 100644 --- a/libs/tests/sdk/test_graphql_adapter.py +++ b/libs/tests/sdk/test_graphql_adapter.py @@ -176,7 +176,7 @@ def test_server_error_extracts_headers_from_cause(self) -> None: exc = DummyTransportServerError("Error", code=429) # No headers on exc, but on __cause__ cause = Exception("inner") - cause.response = DummyResponse({"retry-after": "10"}) # type: ignore + cause.response = DummyResponse({"retry-after": "10"}) # type: ignore[attr-defined] exc.__cause__ = cause with _patch_loader(): @@ -195,7 +195,7 @@ class FakeRequestInfo: method = "POST" cause = Exception("inner") - cause.request_info = FakeRequestInfo() # type: ignore + cause.request_info = FakeRequestInfo() # type: ignore[attr-defined] exc.__cause__ = cause with _patch_loader(): @@ -219,7 +219,7 @@ class FakeResponse: request = FakeRequest() cause = Exception("inner") - cause.response = FakeResponse() # type: ignore + cause.response = FakeResponse() # type: ignore[attr-defined] exc.__cause__ = cause with _patch_loader(): diff --git a/libs/tests/tool/test_create_tool_definition_new.py b/libs/tests/tool/test_create_tool_definition_new.py index f087aee45..2a330e173 100644 --- a/libs/tests/tool/test_create_tool_definition_new.py +++ b/libs/tests/tool/test_create_tool_definition_new.py @@ -64,13 +64,13 @@ def test_create_tool_def2(test_case): for i, input_type in enumerate(input_types): param = tool_def.input.parameters[i] - assert ( - param.value_schema.val_type == get_wire_type(input_type) - ), f"Parameter {param.name} has value type {param.value_schema.val_type} but {input_type} was expected at index {i}" + assert param.value_schema.val_type == get_wire_type(input_type), ( + f"Parameter {param.name} has value type {param.value_schema.val_type} but {input_type} was expected at index {i}" + ) if output_type: - assert tool_def.output.value_schema.val_type == get_wire_type( - output_type - ), f"Output has value type {tool_def.output.val_type} but {output_type} was expected" + assert tool_def.output.value_schema.val_type == get_wire_type(output_type), ( + f"Output has value type {tool_def.output.val_type} but {output_type} was expected" + ) else: assert tool_def.output.value_schema is None, "Output is not None" diff --git a/libs/tests/worker/test_telemetry.py b/libs/tests/worker/test_telemetry.py index 040575c27..f6f90c028 100644 --- a/libs/tests/worker/test_telemetry.py +++ b/libs/tests/worker/test_telemetry.py @@ -50,7 +50,9 @@ def test_init_with_enable_true( assert handler._log_processor is not None # Verify that FastAPIInstrumentor is used - mock_instrumentor.return_value.instrument_app.assert_called_once_with(app, excluded_urls="/worker/health", exclude_spans=["send", "receive"]) + mock_instrumentor.return_value.instrument_app.assert_called_once_with( + app, excluded_urls="/worker/health", exclude_spans=["send", "receive"] + ) @patch("arcade_serve.fastapi.telemetry.logging") @@ -70,6 +72,7 @@ def test_init_with_enable_false(mock_instrumentor, mock_logging, app): # Verify that FastAPIInstrumentor is not called mock_instrumentor.return_value.instrument_app.assert_not_called() + @patch("arcade_serve.fastapi.telemetry.OTLPLogExporter") @patch("arcade_serve.fastapi.telemetry.OTLPMetricExporter") @patch("arcade_serve.fastapi.telemetry.OTLPSpanExporter") diff --git a/libs/tests/worker/test_worker_base.py b/libs/tests/worker/test_worker_base.py index 19ba74ff1..e35652f2b 100644 --- a/libs/tests/worker/test_worker_base.py +++ b/libs/tests/worker/test_worker_base.py @@ -1,4 +1,3 @@ -import os from typing import Annotated from unittest.mock import MagicMock diff --git a/toolkits/math/arcade_math/tools/__init__.py b/toolkits/math/arcade_math/tools/__init__.py index 4b3d12f8c..bee1b80af 100644 --- a/toolkits/math/arcade_math/tools/__init__.py +++ b/toolkits/math/arcade_math/tools/__init__.py @@ -39,27 +39,27 @@ ) __all__ = [ + "abs_val", "add", - "subtract", - "multiply", + "avg", + "ceil", + "deg_to_rad", "divide", - "sum_list", - "sum_range", - "mod", - "log", - "power", - "abs_val", "factorial", - "sqrt", + "floor", + "gcd", "generate_random_float", "generate_random_int", - "gcd", "lcm", - "ceil", - "floor", - "round_num", - "avg", + "log", "median", - "deg_to_rad", + "mod", + "multiply", + "power", "rad_to_deg", + "round_num", + "sqrt", + "subtract", + "sum_list", + "sum_range", ] diff --git a/toolkits/zendesk/arcade_zendesk/__init__.py b/toolkits/zendesk/arcade_zendesk/__init__.py index 373988def..16ac06623 100644 --- a/toolkits/zendesk/arcade_zendesk/__init__.py +++ b/toolkits/zendesk/arcade_zendesk/__init__.py @@ -7,9 +7,9 @@ ) __all__ = [ - "list_tickets", "add_ticket_comment", "get_ticket_comments", + "list_tickets", "mark_ticket_solved", "search_articles", ] diff --git a/toolkits/zendesk/arcade_zendesk/tools/__init__.py b/toolkits/zendesk/arcade_zendesk/tools/__init__.py index 30cb0c3c1..d2e90cbf8 100644 --- a/toolkits/zendesk/arcade_zendesk/tools/__init__.py +++ b/toolkits/zendesk/arcade_zendesk/tools/__init__.py @@ -8,9 +8,9 @@ ) __all__ = [ - "list_tickets", "add_ticket_comment", "get_ticket_comments", + "list_tickets", "mark_ticket_solved", "search_articles", "who_am_i",