Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/release-pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
# `uv publish` does not understand `--skip-existing`.
# https://github.com/astral-sh/uv/issues/7917
# https://github.com/astral-sh/uv/issues/12369
uvx twine upload --non-interactive --repository-url https://test.pypi.org/legacy/ --skip-existing dist/*
uvx twine upload --non-interactive --verbose --repository-url https://test.pypi.org/legacy/ --skip-existing dist/*

- name: Publish package to PyPI
if: startsWith(github.event.ref, 'refs/tags')
Expand Down
21 changes: 12 additions & 9 deletions cratedb_mcp/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,24 @@
import httpx
from mcp.server.fastmcp import FastMCP

from .knowledge import DOCUMENTATION_INDEX, Queries
from .settings import DOCS_CACHE_TTL, HTTP_URL
from .knowledge import DocumentationIndex, Queries, documentation_url_permitted
from .settings import DOCS_CACHE_TTL, HTTP_TIMEOUT, HTTP_URL

# Configure Hishel, an httpx client with caching.
# Define one hour of caching time.
controller = hishel.Controller(allow_stale=True)
storage = hishel.SQLiteStorage(ttl=DOCS_CACHE_TTL)
client = hishel.CacheClient(controller=controller, storage=storage)

# Load CrateDB documentation outline.
documentation_index = DocumentationIndex()

# Create FastMCP application object.
mcp = FastMCP("cratedb-mcp")


def query_cratedb(query: str) -> list[dict]:
return httpx.post(f'{HTTP_URL}/_sql', json={'stmt': query}).json()
return httpx.post(f'{HTTP_URL}/_sql', json={'stmt': query}, timeout=HTTP_TIMEOUT).json()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Add error handling for the database query.

Similar to the documentation fetching, the database query doesn't include error handling for timeouts or connection failures.

def query_cratedb(query: str) -> list[dict]:
-    return httpx.post(f'{HTTP_URL}/_sql', json={'stmt': query}, timeout=HTTP_TIMEOUT).json()
+    try:
+        response = httpx.post(f'{HTTP_URL}/_sql', json={'stmt': query}, timeout=HTTP_TIMEOUT)
+        response.raise_for_status()  # Raise an exception for 4XX/5XX responses
+        return response.json()
+    except httpx.TimeoutException:
+        raise ValueError(f"Database query timed out after {HTTP_TIMEOUT} seconds")
+    except httpx.HTTPStatusError as e:
+        raise ValueError(f"HTTP error {e.response.status_code} while querying database")
+    except httpx.RequestError as e:
+        raise ValueError(f"Request error while querying database: {str(e)}")
+    except ValueError as e:
+        raise ValueError(f"Invalid JSON response from database: {str(e)}")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
return httpx.post(f'{HTTP_URL}/_sql', json={'stmt': query}, timeout=HTTP_TIMEOUT).json()
def query_cratedb(query: str) -> list[dict]:
try:
response = httpx.post(f'{HTTP_URL}/_sql', json={'stmt': query}, timeout=HTTP_TIMEOUT)
response.raise_for_status() # Raise an exception for 4XX/5XX responses
return response.json()
except httpx.TimeoutException:
raise ValueError(f"Database query timed out after {HTTP_TIMEOUT} seconds")
except httpx.HTTPStatusError as e:
raise ValueError(f"HTTP error {e.response.status_code} while querying database")
except httpx.RequestError as e:
raise ValueError(f"Request error while querying database: {str(e)}")
except ValueError as e:
raise ValueError(f"Invalid JSON response from database: {str(e)}")
🤖 Prompt for AI Agents
In cratedb_mcp/__main__.py at line 22, the database query using httpx.post lacks
error handling for timeouts and connection failures. Wrap the httpx.post call in
a try-except block to catch exceptions like httpx.TimeoutException and
httpx.RequestError, and handle them appropriately, such as logging the error and
returning a fallback response or raising a custom exception.



@mcp.tool(description="Send a SQL query to CrateDB, only 'SELECT' queries are allows, queries that"
Expand All @@ -27,17 +30,17 @@ def query_sql(query: str):
return query_cratedb(query)

@mcp.tool(description='Gets an index with CrateDB documentation links to fetch, should download docs'
' before answering questions. Has documentation name, description and link.')
' before answering questions. Has documentation title, description, and link.')
def get_cratedb_documentation_index():
return DOCUMENTATION_INDEX
return documentation_index.items()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Add error handling when accessing documentation items.

If the outline couldn't be loaded or if there's an error during access, the application would crash. Consider adding error handling to gracefully handle potential issues.

def get_cratedb_documentation_index():
-    return documentation_index.items()
+    try:
+        return documentation_index.items()
+    except Exception as e:
+        import logging
+        logging.error(f"Failed to access documentation index: {e}")
+        # Return a minimal fallback
+        return []
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
return documentation_index.items()
def get_cratedb_documentation_index():
try:
return documentation_index.items()
except Exception as e:
import logging
logging.error(f"Failed to access documentation index: {e}")
# Return a minimal fallback
return []
🤖 Prompt for AI Agents
In cratedb_mcp/__main__.py at line 35, the return statement directly accesses
documentation_index.items() without error handling, which can cause the
application to crash if the outline fails to load or an error occurs. Wrap the
access to documentation_index.items() in a try-except block to catch potential
exceptions, and handle them gracefully by logging the error and returning an
empty list or a suitable fallback value.


@mcp.tool(description='Downloads the latest CrateDB documentation piece by link.'
' Only used to download CrateDB docs.')
def fetch_cratedb_docs(link: str):
"""Fetches a CrateDB documentation link from GitHub raw content."""
if 'https://raw.githubusercontent.com/crate/crate/' not in link:
raise ValueError('Only github cratedb links can be fetched.')
return client.get(link).text
"""Fetches a CrateDB documentation link."""
if not documentation_url_permitted(link):
raise ValueError(f'Link is not permitted: {link}')
return client.get(link, timeout=HTTP_TIMEOUT).text
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Add error handling for HTTP requests.

The HTTP GET request doesn't include error handling for timeouts or connection failures, which could cause the application to crash if the documentation source is unavailable.

-    return client.get(link, timeout=HTTP_TIMEOUT).text
+    try:
+        response = client.get(link, timeout=HTTP_TIMEOUT)
+        response.raise_for_status()  # Raise an exception for 4XX/5XX responses
+        return response.text
+    except httpx.TimeoutException:
+        raise ValueError(f"Request timed out after {HTTP_TIMEOUT} seconds: {link}")
+    except httpx.HTTPStatusError as e:
+        raise ValueError(f"HTTP error {e.response.status_code} while fetching: {link}")
+    except httpx.RequestError as e:
+        raise ValueError(f"Request error while fetching: {link} - {str(e)}")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
return client.get(link, timeout=HTTP_TIMEOUT).text
try:
response = client.get(link, timeout=HTTP_TIMEOUT)
response.raise_for_status() # Raise an exception for 4XX/5XX responses
return response.text
except httpx.TimeoutException:
raise ValueError(f"Request timed out after {HTTP_TIMEOUT} seconds: {link}")
except httpx.HTTPStatusError as e:
raise ValueError(f"HTTP error {e.response.status_code} while fetching: {link}")
except httpx.RequestError as e:
raise ValueError(f"Request error while fetching: {link} - {str(e)}")
🤖 Prompt for AI Agents
In cratedb_mcp/__main__.py at line 43, the HTTP GET request lacks error handling
for timeouts and connection failures. Wrap the client.get call in a try-except
block to catch exceptions like requests.Timeout and requests.ConnectionError,
then handle these exceptions gracefully by logging an error or returning a
fallback value to prevent the application from crashing.


@mcp.tool(description="Returns an aggregation of all CrateDB's schema, tables and their metadata")
def get_table_metadata() -> list[dict]:
Expand Down
55 changes: 38 additions & 17 deletions cratedb_mcp/knowledge.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# ruff: noqa: E501
import cachetools
from cratedb_about import CrateDbKnowledgeOutline


class Queries:
TABLES_METADATA = """
Expand Down Expand Up @@ -83,20 +86,38 @@ class Queries:
ORDER BY severity DESC"""


# 'description' is very important, it gives context to the LLMs to properly decide which one to use.
DOCUMENTATION_INDEX = [
# TODO: Add all there are.
{
"name": "about/overview",
"description": "The most important factual and technical information about CrateDB per medium-sized (~300kB) llms.txt context file.",
"link": "https://cdn.crate.io/about/v1/llms.txt"},
{
"name": "scalar functions",
"description": "documentation about specific scalar/methods/functions for CrateDB SQL",
"link": "https://raw.githubusercontent.com/crate/crate/refs/heads/5.10/docs/general/builtins/scalar-functions.rst"},
{
"name": "optimize query 101",
"description": "documentation about optimizing CrateDB SQL statements",
"link": "https://raw.githubusercontent.com/crate/cratedb-guide/9ab661997d7704ecbb63af9c3ee33535957e24e6/docs/performance/optimization.rst"
}
]
class DocumentationIndex:
"""
Define documentation sections supplied to the MCP server.
Load knowledge outline from YAML file and read all items.

The `description` attribute is very important, it gives context
to the LLM to properly decide which one to use.

Canonical source: https://github.com/crate/about/blob/main/src/cratedb_about/outline/cratedb-outline.yaml

Examples:
```yaml
- title: "CrateDB SQL functions"
link: https://cratedb.com/docs/crate/reference/en/latest/_sources/general/builtins/scalar-functions.rst.txt
description: The reference documentation about all SQL functions CrateDB provides.

- title: "Guide: CrateDB query optimization"
link: https://cratedb.com/docs/guide/_sources/performance/optimization.rst.txt
description: Essential principles for optimizing queries in CrateDB while avoiding the most common pitfalls.
```
"""

def __init__(self):
self.outline = CrateDbKnowledgeOutline.load()

@cachetools.cached(cache={})
def items(self):
return self.outline.find_items().to_dict()


def documentation_url_permitted(url: str) -> bool:
return (
url.startswith("https://cratedb.com/") or
url.startswith("https://github.com/crate") or
url.startswith("https://raw.githubusercontent.com/crate"))
3 changes: 3 additions & 0 deletions cratedb_mcp/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,6 @@
# TODO: Add software test after refactoring away from module scope.
warnings.warn(f"Environment variable `CRATEDB_MCP_DOCS_CACHE_TTL` invalid: {e}. "
f"Using default value: {DOCS_CACHE_TTL}.", category=UserWarning, stacklevel=2)

# Configure HTTP timeout for all conversations.
HTTP_TIMEOUT = 10.0
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ dynamic = [
"version",
]
dependencies = [
"cachetools<6",
"cratedb-about==0.0.4",
"hishel<0.2",
"mcp[cli]>=1.5.0",
]
Expand Down
12 changes: 8 additions & 4 deletions tests/test_knowledge.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
from cratedb_mcp.knowledge import DOCUMENTATION_INDEX, Queries
from cratedb_mcp.knowledge import DocumentationIndex, Queries


def test_documentation_index():
assert len(DOCUMENTATION_INDEX) == 3
assert DOCUMENTATION_INDEX[1]["name"] == "scalar functions"
assert DOCUMENTATION_INDEX[2]["name"] == "optimize query 101"
documentation_index = DocumentationIndex()
titles = [item["title"] for item in documentation_index.items()]
assert len(titles) >= 50
assert "CrateDB database" in titles
assert "CrateDB features" in titles
assert "CrateDB SQL reference: Scalar functions" in titles
assert "Guide: CrateDB query optimization" in titles


def test_queries():
Expand Down
11 changes: 8 additions & 3 deletions tests/test_mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,20 @@ def test_get_documentation_index():

def test_fetch_docs_forbidden():
with pytest.raises(ValueError) as ex:
fetch_cratedb_docs("https://cratedb.com/docs/crate/reference/en/latest/_sources/general/builtins/scalar-functions.rst.txt")
assert ex.match("Only github cratedb links can be fetched")
fetch_cratedb_docs("https://example.com")
assert ex.match("Link is not permitted: https://example.com")


def test_fetch_docs_permitted():
def test_fetch_docs_permitted_github():
response = fetch_cratedb_docs("https://raw.githubusercontent.com/crate/crate/refs/heads/5.10/docs/general/builtins/scalar-functions.rst")
assert "initcap" in response


def test_fetch_docs_permitted_cratedb_com():
response = fetch_cratedb_docs("https://cratedb.com/docs/crate/reference/en/latest/_sources/general/builtins/scalar-functions.rst.txt")
assert "initcap" in response


def test_query_sql_forbidden():
with pytest.raises(ValueError) as ex:
assert "RelationUnknown" in str(query_sql("INSERT INTO foobar (id) VALUES (42) RETURNING id"))
Expand Down