diff --git a/CHANGES.md b/CHANGES.md index 2aa9526..97b0fae 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,9 @@ ## Unreleased - MCP: Fixed defunct `get_cratedb_documentation_index` tool +- CLI: Added CLI options for user-defined prompts: `--instructions` and `--conventions`, + both accepting file paths or URLs. +- CLI: Added subcommand `cratedb-mcp show-prompt` to display the system prompt. ## v0.0.4 - 2025-07-21 - Parameters: Added CLI option `--host` and environment variable `CRATEDB_MCP_HOST` diff --git a/README.md b/README.md index 27221bd..df791b6 100644 --- a/README.md +++ b/README.md @@ -309,6 +309,46 @@ All other operations will raise a `PermissionError` exception, unless the `CRATEDB_MCP_PERMIT_ALL_STATEMENTS` environment variable is set to a truthy value. +### System prompt customizations + +The CrateDB MCP server allows users to adjust the system prompt by either +redefining the baseline instructions or extending them with custom conventions. +Additional conventions can capture domain-specific details—such as information +required for particular ER data models —- or any other guidelines you develop +over time. + +If you want to **add** custom conventions to the system prompt, +use the `--conventions` option. +```shell +cratedb-mcp serve --conventions="conventions-custom.md" +``` + +If you want to **replace** the standard built-in instructions prompt completely, +use the `--instructions` option. +```shell +cratedb-mcp serve --instructions="instructions-custom.md" +``` + +Alternatively, use the `CRATEDB_MCP_INSTRUCTIONS` and `CRATEDB_MCP_CONVENTIONS` +environment variables instead of the CLI options. + +To retrieve the standard system prompt, use the `show-prompt` subcommand. By +redirecting the output to a file, you can subsequently edit its contents and +reuse it with the MCP server using the command outlined above. +```shell +cratedb-mcp show-prompt > instructions-custom.md +``` + +Instruction and convention fragments can be loaded from the following sources: + +- HTTP(S) URLs +- Local file paths +- Standard input (when fragment is "-") +- Direct string content + +Because LLMs understand Markdown well, you should also use it for writing +personal instructions or conventions. + ### Operate standalone Start MCP server with `stdio` transport (default). diff --git a/cratedb_mcp/__main__.py b/cratedb_mcp/__main__.py index 87ad6a8..67d4f28 100644 --- a/cratedb_mcp/__main__.py +++ b/cratedb_mcp/__main__.py @@ -1,77 +1,4 @@ -import importlib.resources +from cratedb_mcp.core import CrateDbMcp -from cratedb_about.instruction import GeneralInstructions -from fastmcp import FastMCP -from fastmcp.tools import Tool - -from . import __appname__ -from .tool import ( - fetch_cratedb_docs, - get_cluster_health, - get_cratedb_documentation_index, - get_table_metadata, - query_sql, -) - -instructions_general = GeneralInstructions().render() -instructions_mcp = (importlib.resources.files("cratedb_mcp") / "instructions.md").read_text() - -# Create FastMCP application object. -mcp: FastMCP = FastMCP( - name=__appname__, - instructions=instructions_mcp + instructions_general, -) - - -# ------------------------------------------ -# Text-to-SQL -# ------------------------------------------ -mcp.add_tool( - Tool.from_function( - fn=get_table_metadata, - description="Return column schema and metadata for all tables stored in CrateDB. " - "Use it to inquire entities you don't know about.", - tags={"text-to-sql"}, - ) -) -mcp.add_tool( - Tool.from_function( - fn=query_sql, - description="Send an SQL query to CrateDB and return results. " - "Only 'SELECT' queries are allowed.", - tags={"text-to-sql"}, - ) -) - - -# ------------------------------------------ -# Documentation inquiry -# ------------------------------------------ -mcp.add_tool( - Tool.from_function( - fn=get_cratedb_documentation_index, - description="Get an index of CrateDB documentation links for fetching. " - "Should download docs before answering questions. " - "Has documentation title, description, and link.", - tags={"documentation"}, - ) -) -mcp.add_tool( - Tool.from_function( - fn=fetch_cratedb_docs, - description="Download individual CrateDB documentation pages by link.", - tags={"documentation"}, - ) -) - - -# ------------------------------------------ -# Health / Status -# ------------------------------------------ -mcp.add_tool( - Tool.from_function( - fn=get_cluster_health, - description="Return the health of the CrateDB cluster.", - tags={"health", "monitoring", "status"}, - ) -) +# Is that a standard entrypoint that should be kept alive? +mcp = CrateDbMcp().mcp diff --git a/cratedb_mcp/cli.py b/cratedb_mcp/cli.py index e2d4c2d..da2c8af 100644 --- a/cratedb_mcp/cli.py +++ b/cratedb_mcp/cli.py @@ -4,7 +4,8 @@ import click from pueblo.util.cli import boot_click -from cratedb_mcp.__main__ import mcp +from cratedb_mcp.core import CrateDbMcp +from cratedb_mcp.prompt import InstructionsPrompt logger = logging.getLogger(__name__) @@ -54,9 +55,29 @@ def cli(ctx: click.Context) -> None: required=False, help="The URL path to serve on (for sse, http)", ) +@click.option( + "--instructions", + envvar="CRATEDB_MCP_INSTRUCTIONS", + type=str, + required=False, + help="If you want to change the default instructions prompt, use this option", +) +@click.option( + "--conventions", + envvar="CRATEDB_MCP_CONVENTIONS", + type=str, + required=False, + help="If you want to add custom conventions to the prompt, use this option", +) @click.pass_context def serve( - ctx: click.Context, transport: str, host: str, port: int, path: t.Optional[str] = None + ctx: click.Context, + transport: str, + host: str, + port: int, + path: t.Optional[str], + instructions: t.Optional[str], + conventions: t.Optional[str], ) -> None: """ Start MCP server. @@ -69,4 +90,13 @@ def serve( "port": port, "path": path, } - mcp.run(transport=t.cast(transport_types, transport), **transport_kwargs) # type: ignore[arg-type] + mcp_cratedb = CrateDbMcp(instructions=instructions, conventions=conventions) + mcp_cratedb.mcp.run(transport=t.cast(transport_types, transport), **transport_kwargs) # type: ignore[arg-type] + + +@cli.command() +def show_prompt() -> None: + """ + Display the system prompt. + """ + print(InstructionsPrompt().render()) # noqa: T201 diff --git a/cratedb_mcp/core.py b/cratedb_mcp/core.py new file mode 100644 index 0000000..9c0caa3 --- /dev/null +++ b/cratedb_mcp/core.py @@ -0,0 +1,90 @@ +from typing import Optional + +from fastmcp import FastMCP +from fastmcp.tools import Tool + +from cratedb_mcp.prompt import InstructionsPrompt + +from . import __appname__, __version__ +from .tool import ( + fetch_cratedb_docs, + get_cluster_health, + get_cratedb_documentation_index, + get_table_metadata, + query_sql, +) + + +class CrateDbMcp: + """ + Small wrapper around the FastMCP API to provide instructions prompt at runtime. + """ + + def __init__( + self, + mcp: Optional[FastMCP] = None, + instructions: Optional[str] = None, + conventions: Optional[str] = None, + ) -> None: + prompt = InstructionsPrompt(instructions=instructions, conventions=conventions) + self.mcp = mcp or FastMCP( + name=__appname__, + version=__version__, + instructions=prompt.render(), + ) + self.add_tools() + + def add_tools(self): + """Register all CrateDB MCP tools with the FastMCP instance.""" + # ------------------------------------------ + # Text-to-SQL + # ------------------------------------------ + self.mcp.add_tool( + Tool.from_function( + fn=get_table_metadata, + description="Return column schema and metadata for all tables stored in CrateDB. " + "Use it to inquire entities you don't know about.", + tags={"text-to-sql"}, + ) + ) + self.mcp.add_tool( + Tool.from_function( + fn=query_sql, + description="Send an SQL query to CrateDB and return results. " + "Only 'SELECT' queries are allowed.", + tags={"text-to-sql"}, + ) + ) + + # ------------------------------------------ + # Documentation inquiry + # ------------------------------------------ + self.mcp.add_tool( + Tool.from_function( + fn=get_cratedb_documentation_index, + description="Get an index of CrateDB documentation links for fetching. " + "Should download docs before answering questions. " + "Has documentation title, description, and link.", + tags={"documentation"}, + ) + ) + self.mcp.add_tool( + Tool.from_function( + fn=fetch_cratedb_docs, + description="Download individual CrateDB documentation pages by link.", + tags={"documentation"}, + ) + ) + + # ------------------------------------------ + # Health / Status + # ------------------------------------------ + self.mcp.add_tool( + Tool.from_function( + fn=get_cluster_health, + description="Return the health of the CrateDB cluster.", + tags={"health", "monitoring", "status"}, + ) + ) + + return self diff --git a/cratedb_mcp/prompt/__init__.py b/cratedb_mcp/prompt/__init__.py new file mode 100644 index 0000000..484ea81 --- /dev/null +++ b/cratedb_mcp/prompt/__init__.py @@ -0,0 +1,68 @@ +import importlib.resources +import sys +from pathlib import Path +from typing import List, Optional + +import httpx +from cratedb_about.prompt import GeneralInstructions + + +class InstructionsPrompt: + """ + Bundle instructions how to use MCP tools with general instructions how to work with CrateDB. + + - MCP: https://github.com/crate/cratedb-examples/blob/7f1bc0f94d/topic/chatbot/table-augmented-generation/aws/cratedb_tag_inline_agent.ipynb?short_path=00988ad#L776-L794 + - General: https://github.com/crate/about + """ + + def __init__(self, instructions: Optional[str] = None, conventions: Optional[str] = None): + fragments: List[str] = [] + if instructions: + fragments.append(self.load_fragment(instructions)) + else: + instructions_general = GeneralInstructions().render() + mcp_instructions_file = ( + importlib.resources.files("cratedb_mcp.prompt") / "instructions.md" + ) + if not mcp_instructions_file.is_file(): # pragma: no cover + raise FileNotFoundError(f"MCP instructions file not found: {mcp_instructions_file}") + instructions_mcp = mcp_instructions_file.read_text() + fragments.append(instructions_general) + fragments.append(instructions_mcp) + if conventions: + fragments.append(self.load_fragment(conventions)) + self.fragments = fragments + + def render(self) -> str: + return "\n\n".join(map(str.strip, self.fragments)) + + @staticmethod + def load_fragment(fragment: str) -> str: + """ + Load instruction fragment from various sources. + + Supports loading from: + - HTTP(S) URLs + - Local file paths + - Standard input (when fragment is "-") + - Direct string content + + That's a miniature variant of a "fragment" concept, + adapted from `llm` [1] written by Simon Willison. + + [1] https://github.com/simonw/llm + """ + try: + if fragment.startswith("http://") or fragment.startswith("https://"): + with httpx.Client(follow_redirects=True, max_redirects=3, timeout=5.0) as client: + response = client.get(fragment) + response.raise_for_status() + return response.text + if fragment == "-": + return sys.stdin.read() + path = Path(fragment) + if path.exists(): + return path.read_text(encoding="utf-8") + return fragment + except (httpx.HTTPError, OSError, UnicodeDecodeError) as e: + raise ValueError(f"Failed to load fragment '{fragment}': {e}") from e diff --git a/cratedb_mcp/instructions.md b/cratedb_mcp/prompt/instructions.md similarity index 100% rename from cratedb_mcp/instructions.md rename to cratedb_mcp/prompt/instructions.md diff --git a/pyproject.toml b/pyproject.toml index 2d783fb..8348f57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,7 @@ dependencies = [ "attrs", "cachetools<7", "click<9", - "cratedb-about==0.0.6", + "cratedb-about==0.0.7", "fastmcp>=2.7,<2.11", "hishel<0.2", "pueblo==0.0.11", @@ -94,11 +94,10 @@ optional-dependencies.test = [ scripts.cratedb-mcp = "cratedb_mcp.cli:cli" -[tool.setuptools] -include-package-data = true - [tool.setuptools.package-data] -cratedb_mcp = [ "*.md" ] +cratedb_mcp = [ + "**/*.md", +] [tool.setuptools.packages.find] namespaces = false diff --git a/tests/test_cli.py b/tests/test_cli.py index ec855fc..7e6bdb1 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,9 +1,9 @@ from unittest import mock +import fastmcp.server.server from click.testing import CliRunner from cratedb_mcp import __version__ -from cratedb_mcp.__main__ import mcp from cratedb_mcp.cli import cli @@ -40,7 +40,27 @@ def test_cli_help(): # Verify the outcome. assert result.exit_code == 0, result.output - assert "serve Start MCP server" in result.output + assert "Start MCP server" in result.output + assert "Display the system prompt" in result.output + + +def test_cli_show_prompt(): + """ + Verify `cratedb-mcp show-prompt` works as expected. + """ + + # Invoke the program. + runner = CliRunner() + result = runner.invoke( + cli, + args="show-prompt", + catch_exceptions=False, + ) + + # Verify the outcome. + assert result.exit_code == 0, result.output + assert "Rules for writing SQL queries" in result.output + assert "Tool instructions" in result.output def test_cli_no_command_no_option(): @@ -57,7 +77,8 @@ def test_cli_no_command_no_option(): # Verify the outcome. assert result.exit_code == 2, result.output - assert "serve Start MCP server" in result.output + assert "Start MCP server" in result.output + assert "Display the system prompt" in result.output def test_cli_valid_default(mocker, capsys): @@ -66,7 +87,7 @@ def test_cli_valid_default(mocker, capsys): The test needs to mock `anyio.run`, otherwise the call would block forever. """ - run_mock = mocker.patch.object(mcp, "run_async") + run_mock = mocker.patch.object(fastmcp.server.server.FastMCP, "run_async") # Invoke the program. runner = CliRunner() @@ -90,7 +111,7 @@ def test_cli_valid_custom(mocker, capsys): The test needs to mock `anyio.run`, otherwise the call would block forever. """ - run_mock = mocker.patch.object(mcp, "run_async") + run_mock = mocker.patch.object(fastmcp.server.server.FastMCP, "run_async") # Invoke the program. runner = CliRunner() diff --git a/tests/test_instructions.py b/tests/test_instructions.py deleted file mode 100644 index daec78e..0000000 --- a/tests/test_instructions.py +++ /dev/null @@ -1,13 +0,0 @@ -from cratedb_mcp.__main__ import mcp - - -def test_instructions(): - instructions_text = mcp.instructions - - # MCP instructions. - assert "Tool instructions" in instructions_text - - # General instructions. - assert "Things to remember when working with CrateDB" in instructions_text - assert "Rules for writing SQL queries" in instructions_text - assert "Core writing principles" in instructions_text diff --git a/tests/test_prompt.py b/tests/test_prompt.py new file mode 100644 index 0000000..e7ec5a7 --- /dev/null +++ b/tests/test_prompt.py @@ -0,0 +1,78 @@ +import io + +import pytest + +from cratedb_mcp.__main__ import mcp +from cratedb_mcp.prompt import InstructionsPrompt + + +def test_default(): + """ + The default instructions are bundled from general and MCP-related instructions. + """ + instructions_text = mcp.instructions + + # MCP instructions. + assert "Tool instructions" in instructions_text + + # General instructions. + assert "Things to remember when working with CrateDB" in instructions_text + assert "Rules for writing SQL queries" in instructions_text + assert "Core writing principles" in instructions_text + + +def test_custom_instructions(): + """ + Verify custom instructions replace the built-in ones. + """ + instructions = InstructionsPrompt(instructions="custom-instruction") + assert instructions.render() == "custom-instruction" + + +def test_custom_conventions(): + """ + Verify custom conventions are added to the built-in ones. + """ + instructions = InstructionsPrompt(conventions="custom-convention") + prompt = instructions.render() + assert "custom-convention" in prompt + assert "Core writing principles" in prompt + + +def test_fragment_local(tmp_path): + """ + Verify fragments are loaded from local filesystem. + """ + tmp = tmp_path / "test.txt" + tmp.write_text("custom-instruction") + instructions = InstructionsPrompt(instructions=str(tmp)) + assert instructions.render() == "custom-instruction" + + +def test_fragment_stdin(mocker): + """ + Verify fragments are loaded from STDIN. + """ + mocker.patch("sys.stdin", io.StringIO("custom-instruction")) + + instructions = InstructionsPrompt(instructions="-") + assert instructions.render() == "custom-instruction" + + +def test_fragment_remote_success(): + """ + Verify fragments are loaded from HTTP URLs successfully. + """ + instructions = InstructionsPrompt(instructions="https://www.example.org/") + assert "Example Domain" in instructions.render() + + +def test_fragment_remote_failure(): + """ + Verify fragment-loading from HTTP URLs fails correctly. + """ + with pytest.raises(ValueError) as ex: + InstructionsPrompt(instructions="https://httpbin.org/404") + assert ex.match( + "Failed to load fragment 'https://httpbin.org/404': (Client error|Server error)" + )