diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 759bfaf..afe3ad4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -4,8 +4,14 @@ on: push: branches: [ main ] pull_request: + + # Allow the job to be triggered manually. workflow_dispatch: + # Run the job as a nightly recurrent job. + schedule: + - cron: '0 4 * * *' + concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true diff --git a/cratedb_mcp/__main__.py b/cratedb_mcp/__main__.py index d14f1e6..93a2bc7 100644 --- a/cratedb_mcp/__main__.py +++ b/cratedb_mcp/__main__.py @@ -13,38 +13,52 @@ def query_cratedb(query: str) -> list[dict]: - return httpx.post(f'{HTTP_URL}/_sql', json={'stmt': query}, timeout=HTTP_TIMEOUT).json() + return httpx.post(f"{HTTP_URL}/_sql", json={"stmt": query}, timeout=HTTP_TIMEOUT).json() -@mcp.tool(description="Send a SQL query to CrateDB, only 'SELECT' queries are allows, queries that" - " modify data, columns or are otherwise deemed un-safe are rejected.") +@mcp.tool( + description=( + "Send an SQL query to CrateDB. Only 'SELECT' queries are allowed; " + "queries that modify data, columns, or are otherwise deemed unsafe are rejected." + ) +) def query_sql(query: str): if not sql_is_permitted(query): - raise ValueError('Only queries that have a SELECT statement are allowed.') + raise ValueError("Only queries that have a SELECT statement are allowed.") return query_cratedb(query) -@mcp.tool(description='Gets an index with CrateDB documentation links to fetch, should download docs' - ' before answering questions. Has documentation title, description, and link.') + +@mcp.tool( + description="Get an index with CrateDB documentation links to fetch, should download docs " + "before answering questions. Has documentation title, description, and link." +) def get_cratedb_documentation_index(): return documentation_index.items() -@mcp.tool(description='Downloads the latest CrateDB documentation piece by link.' - ' Only used to download CrateDB docs.') + +@mcp.tool( + description="Download the latest CrateDB documentation piece by link. " + "Only used to download CrateDB docs." +) def fetch_cratedb_docs(link: str): - """Fetches a CrateDB documentation link.""" + """Fetch a CrateDB documentation link.""" if not documentation_index.url_permitted(link): - raise ValueError(f'Link is not permitted: {link}') + raise ValueError(f"Link is not permitted: {link}") return documentation_index.client.get(link, timeout=HTTP_TIMEOUT).text -@mcp.tool(description="Returns an aggregation of all CrateDB's schema, tables and their metadata") + +@mcp.tool(description="Return an aggregation of all CrateDB's schema, tables and their metadata") def get_table_metadata() -> list[dict]: - """Returns an aggregation of schema:tables, e.g: {'doc': [{name:'mytable', ...}, ...]} + """ + Return an aggregation of schema:tables, e.g.: {'doc': [{name:'mytable', ...}, ...]} - The tables have metadata datapoints like replicas, shards, name, version, total_shards, total_records. + The tables have metadata datapoints like replicas, shards, + name, version, total_shards, total_records. """ return query_cratedb(Queries.TABLES_METADATA) + @mcp.tool(description="Returns the health of a CrateDB cluster.") def get_health() -> list[dict]: - """Queries sys.health ordered by severity.""" + """Query sys.health ordered by severity.""" return query_cratedb(Queries.HEALTH) diff --git a/cratedb_mcp/knowledge.py b/cratedb_mcp/knowledge.py index ad0a59f..ebde6d5 100644 --- a/cratedb_mcp/knowledge.py +++ b/cratedb_mcp/knowledge.py @@ -123,7 +123,6 @@ class DocumentationIndex: ] def __init__(self): - # Configure Hishel, an httpx client with caching. # Define one hour of caching time. controller = hishel.Controller(allow_stale=True) diff --git a/cratedb_mcp/settings.py b/cratedb_mcp/settings.py index 84c77d1..f7f6827 100644 --- a/cratedb_mcp/settings.py +++ b/cratedb_mcp/settings.py @@ -23,14 +23,21 @@ def permit_all_statements() -> bool: try: permitted = to_bool(os.getenv("CRATEDB_MCP_PERMIT_ALL_STATEMENTS", "false")) if permitted: - warnings.warn("All types of SQL statements are permitted. " - "This means the LLM agent can write and modify the connected database", - category=UserWarning, stacklevel=2) + warnings.warn( + "All types of SQL statements are permitted. " + "This means the LLM agent can write and modify the connected database", + category=UserWarning, + stacklevel=2, + ) except (ValueError, TypeError) as e: - # If the environment variable is not a valid integer, use the default value, but warn about it. + # If the environment variable is not a valid integer, + # use the default value, but warn about it. # TODO: Add software test after refactoring away from module scope. - warnings.warn(f"Environment variable `CRATEDB_MCP_PERMIT_ALL_STATEMENTS` invalid: {e}. ", - category=UserWarning, stacklevel=2) + warnings.warn( + f"Environment variable `CRATEDB_MCP_PERMIT_ALL_STATEMENTS` invalid: {e}. ", + category=UserWarning, + stacklevel=2, + ) return permitted @staticmethod @@ -41,8 +48,13 @@ def docs_cache_ttl(ttl: int = 3600) -> int: try: return int(os.getenv("CRATEDB_MCP_DOCS_CACHE_TTL", ttl)) except ValueError as e: # pragma: no cover - # If the environment variable is not a valid integer, use the default value, but warn about it. + # If the environment variable is not a valid integer, + # use the default value, but warn about it. # TODO: Add software test after refactoring away from module scope. - warnings.warn(f"Environment variable `CRATEDB_MCP_DOCS_CACHE_TTL` invalid: {e}. " - f"Using default value: {ttl}.", category=UserWarning, stacklevel=2) + warnings.warn( + f"Environment variable `CRATEDB_MCP_DOCS_CACHE_TTL` invalid: {e}. " + f"Using default value: {ttl}.", + category=UserWarning, + stacklevel=2, + ) return ttl diff --git a/cratedb_mcp/util/sql.py b/cratedb_mcp/util/sql.py index c1ea741..0f59908 100644 --- a/cratedb_mcp/util/sql.py +++ b/cratedb_mcp/util/sql.py @@ -21,7 +21,9 @@ def sql_is_permitted(expression: str) -> bool: Issue: https://github.com/crate/cratedb-mcp/issues/10 Question: Does SQLAlchemy provide a solid read-only mode, or any other library? """ - is_dql = SqlStatementClassifier(expression=expression, permit_all=Settings.permit_all_statements()).is_dql + is_dql = SqlStatementClassifier( + expression=expression, permit_all=Settings.permit_all_statements() + ).is_dql if is_dql: logger.info(f"Permitted SQL expression: {expression and expression[:50]}...") else: @@ -37,6 +39,7 @@ class SqlStatementClassifier: Here, most importantly: Provide the `is_dql` property that signals truthfulness for read-only SQL SELECT statements only. """ + expression: str permit_all: bool = False @@ -59,7 +62,7 @@ def parse_sqlparse(self) -> t.List[sqlparse.sql.Statement]: @property def is_dql(self) -> bool: """ - Whether the statement is a DQL statement, which effectively invokes read-only operations only. + Is it a DQL statement, which effectively invokes read-only operations only? """ if not self.expression: @@ -78,7 +81,7 @@ def is_select(self) -> bool: """ Whether the expression is an SQL SELECT statement. """ - return self.operation == 'SELECT' + return self.operation == "SELECT" @property def operation(self) -> str: diff --git a/docs/backlog.md b/docs/backlog.md index 3010761..6a87ae8 100644 --- a/docs/backlog.md +++ b/docs/backlog.md @@ -2,7 +2,6 @@ ## Iteration +1 - Naming things: Better names for API entrypoints -- Format code, improve linting - Docs: Load documentation index from a custom outline file - Release v0.0.0 @@ -14,3 +13,4 @@ - SQL: Stronger read-only mode - Docs: HTTP caching - Improve documentation +- Format code, improve linting diff --git a/pyproject.toml b/pyproject.toml index 87a1d97..5434e9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,7 +93,7 @@ optional-dependencies.test = [ scripts.cratedb-mcp = "cratedb_mcp.cli:main" [tool.ruff] -line-length = 110 +line-length = 100 extend-exclude = [ ] @@ -191,15 +191,15 @@ check = [ ] format = [ - #{ cmd = "ruff format ." }, + { cmd = "ruff format ." }, # Configure Ruff not to auto-fix (remove!): # unused imports (F401), unused variables (F841), `print` statements (T201), and commented-out code (ERA001). - #{ cmd = "ruff check --fix --ignore=ERA --ignore=F401 --ignore=F841 --ignore=T20 --ignore=ERA001 ." }, + { cmd = "ruff check --fix --ignore=ERA --ignore=F401 --ignore=F841 --ignore=T20 --ignore=ERA001 ." }, { cmd = "pyproject-fmt --keep-full-version pyproject.toml" }, ] lint = [ - #{ cmd = "ruff format --check ." }, + { cmd = "ruff format --check ." }, { cmd = "ruff check ." }, { cmd = "validate-pyproject pyproject.toml" }, { cmd = "mypy" }, diff --git a/tests/test_knowledge.py b/tests/test_knowledge.py index b78ff2b..5e36e17 100644 --- a/tests/test_knowledge.py +++ b/tests/test_knowledge.py @@ -12,7 +12,6 @@ def test_documentation_index(): def test_queries(): - # Verify basic parts of the query. assert "information_schema.tables" in Queries.TABLES_METADATA @@ -20,5 +19,3 @@ def test_queries(): assert "sys.health" in Queries.TABLES_METADATA assert "WITH partitions_health" in Queries.TABLES_METADATA assert "LEFT JOIN" in Queries.TABLES_METADATA - - diff --git a/tests/test_mcp.py b/tests/test_mcp.py index 0faa2fa..1d09cc5 100644 --- a/tests/test_mcp.py +++ b/tests/test_mcp.py @@ -20,12 +20,16 @@ def test_fetch_docs_forbidden(): def test_fetch_docs_permitted_github(): - response = fetch_cratedb_docs("https://raw.githubusercontent.com/crate/crate/refs/heads/5.10/docs/general/builtins/scalar-functions.rst") + response = fetch_cratedb_docs( + "https://raw.githubusercontent.com/crate/crate/refs/heads/5.10/docs/general/builtins/scalar-functions.rst" + ) assert "initcap" in response def test_fetch_docs_permitted_cratedb_com(): - response = fetch_cratedb_docs("https://cratedb.com/docs/crate/reference/en/latest/_sources/general/builtins/scalar-functions.rst.txt") + response = fetch_cratedb_docs( + "https://cratedb.com/docs/crate/reference/en/latest/_sources/general/builtins/scalar-functions.rst.txt" + ) assert "initcap" in response @@ -35,7 +39,9 @@ def test_query_sql_permitted(): def test_query_sql_forbidden_easy(): with pytest.raises(ValueError) as ex: - assert "RelationUnknown" in str(query_sql("INSERT INTO foobar (id) VALUES (42) RETURNING id")) + assert "RelationUnknown" in str( + query_sql("INSERT INTO foobar (id) VALUES (42) RETURNING id") + ) assert ex.match("Only queries that have a SELECT statement are allowed") diff --git a/tests/test_util.py b/tests/test_util.py index f74e1e9..c93414b 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -30,7 +30,10 @@ def test_sql_insert_permit_invalid(mocker): mocker.patch.dict(os.environ, {"CRATEDB_MCP_PERMIT_ALL_STATEMENTS": "-555"}) with pytest.warns(UserWarning) as record: assert sql_is_permitted("INSERT INTO foobar") is False - assert "Environment variable `CRATEDB_MCP_PERMIT_ALL_STATEMENTS` invalid" in record[0].message.args[0] + assert ( + "Environment variable `CRATEDB_MCP_PERMIT_ALL_STATEMENTS` invalid" + in record[0].message.args[0] + ) def test_sql_select_multiple_rejected(): @@ -81,8 +84,10 @@ def test_sql_multiple_statements_rejected(): def test_sql_with_comments_rejected(): - assert sql_is_permitted( - "/* Sneaky comment */ INSERT /* another comment */ INTO foo VALUES (1)") is False + assert ( + sql_is_permitted("/* Sneaky comment */ INSERT /* another comment */ INTO foo VALUES (1)") + is False + ) def test_sql_update_rejected(): @@ -117,4 +122,4 @@ def test_sql_case_manipulation_rejected(): def test_sql_unicode_evasion_rejected(): """Statements with unicode characters to evade filters are rejected""" - assert sql_is_permitted("SELECT * FROM users; \uFF1B DROP TABLE users") is False + assert sql_is_permitted("SELECT * FROM users; \uff1b DROP TABLE users") is False