Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,14 @@ on:
push:
branches: [ main ]
pull_request:

# Allow the job to be triggered manually.
workflow_dispatch:

# Run the job as a nightly recurrent job.
schedule:
- cron: '0 4 * * *'

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
Expand Down
42 changes: 28 additions & 14 deletions cratedb_mcp/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,38 +13,52 @@


def query_cratedb(query: str) -> list[dict]:
return httpx.post(f'{HTTP_URL}/_sql', json={'stmt': query}, timeout=HTTP_TIMEOUT).json()
return httpx.post(f"{HTTP_URL}/_sql", json={"stmt": query}, timeout=HTTP_TIMEOUT).json()


@mcp.tool(description="Send a SQL query to CrateDB, only 'SELECT' queries are allows, queries that"
" modify data, columns or are otherwise deemed un-safe are rejected.")
@mcp.tool(
description=(
"Send an SQL query to CrateDB. Only 'SELECT' queries are allowed; "
"queries that modify data, columns, or are otherwise deemed unsafe are rejected."
)
)
def query_sql(query: str):
if not sql_is_permitted(query):
raise ValueError('Only queries that have a SELECT statement are allowed.')
raise ValueError("Only queries that have a SELECT statement are allowed.")
return query_cratedb(query)

@mcp.tool(description='Gets an index with CrateDB documentation links to fetch, should download docs'
' before answering questions. Has documentation title, description, and link.')

@mcp.tool(
description="Get an index with CrateDB documentation links to fetch, should download docs "
"before answering questions. Has documentation title, description, and link."
)
def get_cratedb_documentation_index():
return documentation_index.items()

@mcp.tool(description='Downloads the latest CrateDB documentation piece by link.'
' Only used to download CrateDB docs.')

@mcp.tool(
description="Download the latest CrateDB documentation piece by link. "
"Only used to download CrateDB docs."
)
def fetch_cratedb_docs(link: str):
"""Fetches a CrateDB documentation link."""
"""Fetch a CrateDB documentation link."""
if not documentation_index.url_permitted(link):
raise ValueError(f'Link is not permitted: {link}')
raise ValueError(f"Link is not permitted: {link}")
return documentation_index.client.get(link, timeout=HTTP_TIMEOUT).text

@mcp.tool(description="Returns an aggregation of all CrateDB's schema, tables and their metadata")

@mcp.tool(description="Return an aggregation of all CrateDB's schema, tables and their metadata")
def get_table_metadata() -> list[dict]:
"""Returns an aggregation of schema:tables, e.g: {'doc': [{name:'mytable', ...}, ...]}
"""
Return an aggregation of schema:tables, e.g.: {'doc': [{name:'mytable', ...}, ...]}

The tables have metadata datapoints like replicas, shards, name, version, total_shards, total_records.
The tables have metadata datapoints like replicas, shards,
name, version, total_shards, total_records.
"""
return query_cratedb(Queries.TABLES_METADATA)


@mcp.tool(description="Returns the health of a CrateDB cluster.")
def get_health() -> list[dict]:
"""Queries sys.health ordered by severity."""
"""Query sys.health ordered by severity."""
return query_cratedb(Queries.HEALTH)
1 change: 0 additions & 1 deletion cratedb_mcp/knowledge.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ class DocumentationIndex:
]

def __init__(self):

# Configure Hishel, an httpx client with caching.
# Define one hour of caching time.
controller = hishel.Controller(allow_stale=True)
Expand Down
30 changes: 21 additions & 9 deletions cratedb_mcp/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,21 @@ def permit_all_statements() -> bool:
try:
permitted = to_bool(os.getenv("CRATEDB_MCP_PERMIT_ALL_STATEMENTS", "false"))
if permitted:
warnings.warn("All types of SQL statements are permitted. "
"This means the LLM agent can write and modify the connected database",
category=UserWarning, stacklevel=2)
warnings.warn(
"All types of SQL statements are permitted. "
"This means the LLM agent can write and modify the connected database",
category=UserWarning,
stacklevel=2,
)
except (ValueError, TypeError) as e:
# If the environment variable is not a valid integer, use the default value, but warn about it.
# If the environment variable is not a valid integer,
# use the default value, but warn about it.
# TODO: Add software test after refactoring away from module scope.
warnings.warn(f"Environment variable `CRATEDB_MCP_PERMIT_ALL_STATEMENTS` invalid: {e}. ",
category=UserWarning, stacklevel=2)
warnings.warn(
f"Environment variable `CRATEDB_MCP_PERMIT_ALL_STATEMENTS` invalid: {e}. ",
category=UserWarning,
stacklevel=2,
)
return permitted

@staticmethod
Expand All @@ -41,8 +48,13 @@ def docs_cache_ttl(ttl: int = 3600) -> int:
try:
return int(os.getenv("CRATEDB_MCP_DOCS_CACHE_TTL", ttl))
except ValueError as e: # pragma: no cover
# If the environment variable is not a valid integer, use the default value, but warn about it.
# If the environment variable is not a valid integer,
# use the default value, but warn about it.
# TODO: Add software test after refactoring away from module scope.
warnings.warn(f"Environment variable `CRATEDB_MCP_DOCS_CACHE_TTL` invalid: {e}. "
f"Using default value: {ttl}.", category=UserWarning, stacklevel=2)
warnings.warn(
f"Environment variable `CRATEDB_MCP_DOCS_CACHE_TTL` invalid: {e}. "
f"Using default value: {ttl}.",
category=UserWarning,
stacklevel=2,
)
return ttl
9 changes: 6 additions & 3 deletions cratedb_mcp/util/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ def sql_is_permitted(expression: str) -> bool:
Issue: https://github.com/crate/cratedb-mcp/issues/10
Question: Does SQLAlchemy provide a solid read-only mode, or any other library?
"""
is_dql = SqlStatementClassifier(expression=expression, permit_all=Settings.permit_all_statements()).is_dql
is_dql = SqlStatementClassifier(
expression=expression, permit_all=Settings.permit_all_statements()
).is_dql
if is_dql:
logger.info(f"Permitted SQL expression: {expression and expression[:50]}...")
else:
Expand All @@ -37,6 +39,7 @@ class SqlStatementClassifier:
Here, most importantly: Provide the `is_dql` property that
signals truthfulness for read-only SQL SELECT statements only.
"""

expression: str
permit_all: bool = False

Expand All @@ -59,7 +62,7 @@ def parse_sqlparse(self) -> t.List[sqlparse.sql.Statement]:
@property
def is_dql(self) -> bool:
"""
Whether the statement is a DQL statement, which effectively invokes read-only operations only.
Is it a DQL statement, which effectively invokes read-only operations only?
"""

if not self.expression:
Expand All @@ -78,7 +81,7 @@ def is_select(self) -> bool:
"""
Whether the expression is an SQL SELECT statement.
"""
return self.operation == 'SELECT'
return self.operation == "SELECT"

@property
def operation(self) -> str:
Expand Down
2 changes: 1 addition & 1 deletion docs/backlog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

## Iteration +1
- Naming things: Better names for API entrypoints
- Format code, improve linting
- Docs: Load documentation index from a custom outline file
- Release v0.0.0

Expand All @@ -14,3 +13,4 @@
- SQL: Stronger read-only mode
- Docs: HTTP caching
- Improve documentation
- Format code, improve linting
8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ optional-dependencies.test = [
scripts.cratedb-mcp = "cratedb_mcp.cli:main"

[tool.ruff]
line-length = 110
line-length = 100

extend-exclude = [
]
Expand Down Expand Up @@ -191,15 +191,15 @@ check = [
]

format = [
#{ cmd = "ruff format ." },
{ cmd = "ruff format ." },
# Configure Ruff not to auto-fix (remove!):
# unused imports (F401), unused variables (F841), `print` statements (T201), and commented-out code (ERA001).
#{ cmd = "ruff check --fix --ignore=ERA --ignore=F401 --ignore=F841 --ignore=T20 --ignore=ERA001 ." },
{ cmd = "ruff check --fix --ignore=ERA --ignore=F401 --ignore=F841 --ignore=T20 --ignore=ERA001 ." },
{ cmd = "pyproject-fmt --keep-full-version pyproject.toml" },
]

lint = [
#{ cmd = "ruff format --check ." },
{ cmd = "ruff format --check ." },
{ cmd = "ruff check ." },
{ cmd = "validate-pyproject pyproject.toml" },
{ cmd = "mypy" },
Expand Down
3 changes: 0 additions & 3 deletions tests/test_knowledge.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,10 @@ def test_documentation_index():


def test_queries():

# Verify basic parts of the query.
assert "information_schema.tables" in Queries.TABLES_METADATA

# Verify other critical parts of the query.
assert "sys.health" in Queries.TABLES_METADATA
assert "WITH partitions_health" in Queries.TABLES_METADATA
assert "LEFT JOIN" in Queries.TABLES_METADATA


12 changes: 9 additions & 3 deletions tests/test_mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,16 @@ def test_fetch_docs_forbidden():


def test_fetch_docs_permitted_github():
response = fetch_cratedb_docs("https://raw.githubusercontent.com/crate/crate/refs/heads/5.10/docs/general/builtins/scalar-functions.rst")
response = fetch_cratedb_docs(
"https://raw.githubusercontent.com/crate/crate/refs/heads/5.10/docs/general/builtins/scalar-functions.rst"
)
assert "initcap" in response


def test_fetch_docs_permitted_cratedb_com():
response = fetch_cratedb_docs("https://cratedb.com/docs/crate/reference/en/latest/_sources/general/builtins/scalar-functions.rst.txt")
response = fetch_cratedb_docs(
"https://cratedb.com/docs/crate/reference/en/latest/_sources/general/builtins/scalar-functions.rst.txt"
)
assert "initcap" in response


Expand All @@ -35,7 +39,9 @@ def test_query_sql_permitted():

def test_query_sql_forbidden_easy():
with pytest.raises(ValueError) as ex:
assert "RelationUnknown" in str(query_sql("INSERT INTO foobar (id) VALUES (42) RETURNING id"))
assert "RelationUnknown" in str(
query_sql("INSERT INTO foobar (id) VALUES (42) RETURNING id")
)
assert ex.match("Only queries that have a SELECT statement are allowed")


Expand Down
13 changes: 9 additions & 4 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ def test_sql_insert_permit_invalid(mocker):
mocker.patch.dict(os.environ, {"CRATEDB_MCP_PERMIT_ALL_STATEMENTS": "-555"})
with pytest.warns(UserWarning) as record:
assert sql_is_permitted("INSERT INTO foobar") is False
assert "Environment variable `CRATEDB_MCP_PERMIT_ALL_STATEMENTS` invalid" in record[0].message.args[0]
assert (
"Environment variable `CRATEDB_MCP_PERMIT_ALL_STATEMENTS` invalid"
in record[0].message.args[0]
)


def test_sql_select_multiple_rejected():
Expand Down Expand Up @@ -81,8 +84,10 @@ def test_sql_multiple_statements_rejected():


def test_sql_with_comments_rejected():
assert sql_is_permitted(
"/* Sneaky comment */ INSERT /* another comment */ INTO foo VALUES (1)") is False
assert (
sql_is_permitted("/* Sneaky comment */ INSERT /* another comment */ INTO foo VALUES (1)")
is False
)


def test_sql_update_rejected():
Expand Down Expand Up @@ -117,4 +122,4 @@ def test_sql_case_manipulation_rejected():

def test_sql_unicode_evasion_rejected():
"""Statements with unicode characters to evade filters are rejected"""
assert sql_is_permitted("SELECT * FROM users; \uFF1B DROP TABLE users") is False
assert sql_is_permitted("SELECT * FROM users; \uff1b DROP TABLE users") is False