diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..ac401ef --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,16 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "daily" + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml new file mode 100644 index 0000000..d180562 --- /dev/null +++ b/.github/workflows/release-pypi.yml @@ -0,0 +1,68 @@ +# Use Trusted Publishing to stage Python packages through GitHub Actions (GHA) to the Python Package Index (PyPI). +name: "Release: Python package" + +on: + + # Build and publish packages when running a release. + push: + tags: + - '*' + + # Build packages on each pull request for validation purposes. + pull_request: + + # Build packages each night for validation purposes. + schedule: + - cron: '0 4 * * *' + + # Allow the job to be triggered manually. + workflow_dispatch: + +jobs: + build-and-publish: + name: "Build and publish to PyPI" + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest"] + python-version: ["3.13"] + env: + OS_TYPE: ${{ matrix.os }} + PYTHON_VERSION: ${{ matrix.python-version }} + + # Trusted publishing. + # Specifying a GitHub environment is optional, but strongly encouraged. + environment: pypi + # IMPORTANT: This permission is mandatory for Trusted Publishing. + permissions: + id-token: write + + steps: + - name: Acquire sources + uses: actions/checkout@v4 + + - name: Set up uv + uses: astral-sh/setup-uv@v6 + with: + cache-dependency-glob: | + pyproject.toml + cache-suffix: ${{ matrix.python-version }} + enable-cache: true + version: "latest" + + - name: Build package + run: | + uv build + + - name: Publish package to TestPyPI + run: | + # `uv publish` does not understand `--skip-existing`. + # https://github.com/astral-sh/uv/issues/7917 + # https://github.com/astral-sh/uv/issues/12369 + uvx twine upload --non-interactive --repository-url https://test.pypi.org/legacy/ --skip-existing dist/* + + - name: Publish package to PyPI + if: startsWith(github.event.ref, 'refs/tags') + run: | + uv publish diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..759bfaf --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,79 @@ +name: "Tests" + +on: + push: + branches: [ main ] + pull_request: + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + + test: + name: " + Python ${{ matrix.python-version }} + " + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ['ubuntu-latest'] + python-version: [ + '3.10', + '3.13', + ] + cratedb-version: [ + 'nightly', + ] + + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python-version }} + UV_SYSTEM_PYTHON: true + + services: + cratedb: + image: crate/crate:${{ matrix.cratedb-version }} + ports: + - 4200:4200 + - 5432:5432 + env: + CRATE_HEAP_SIZE: 4g + + steps: + + - name: Acquire sources + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Set up uv + uses: astral-sh/setup-uv@v6 + with: + cache-dependency-glob: | + pyproject.toml + cache-suffix: ${{ matrix.python-version }} + enable-cache: true + version: "latest" + + - name: Set up project + run: | + uv pip install --editable='.[all,develop,test]' + + - name: Run linter and software tests + run: poe check + + # https://github.com/codecov/codecov-action + - name: Upload coverage results to Codecov + uses: codecov/codecov-action@v5 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + with: + env_vars: OS,PYTHON + fail_ci_if_error: true diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..30fc043 --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,13 @@ +# CrateDB MCP changelog + +## Unreleased + +## v0.0.1 - 2025-05-xx +- Project: Established project layout +- Features: First working version +- Packaging: Adjusted package dependencies for interoperability +- Packaging: Added basic CLI entry point and server launcher `cratedb-mcp` +- Documentation: Show a simple Claude Desktop configuration +- MCP documentation: Add reference to medium-sized llms.txt context file +- Boilerplate: Added software tests and CI configuration +- Documentation: Added development sandbox section diff --git a/DEVELOP.md b/DEVELOP.md new file mode 100644 index 0000000..94f8e27 --- /dev/null +++ b/DEVELOP.md @@ -0,0 +1,48 @@ +# Development documentation + +## Sandbox + +Use those commands to set up a development sandbox and install the +project in editable mode. +```shell +git clone https://github.com/crate/cratedb-mcp +cd cratedb-mcp +uv venv --python 3.13 --seed .venv +source .venv/bin/activate +uv pip install --upgrade --editable='.[develop,test]' +``` + +## Software tests + +The project uses the [poethepoet] task runner, which provides convenience entry +points for invoking linters and software tests. The top-level one-shot command +will invoke both and is also used on CI/GHA. +```shell +poe check +``` + +To invoke individual software tests for working on the spot, use a +traditional `pytest` invocation. Examples: +```shell +pytest --no-cov tests/test_knowledge.py +``` +```shell +pytest --no-cov -k query +``` + +## Release + +The project uses [versioningit] so you don't need to do any version bumping +within files because the version number will be derived from the Git tag. + +However, you need to designate the new release within the [CHANGES.md](./CHANGES.md) +file, and commit it. The release procedure currently looks like this: +```shell +git commit -m 'Release v0.0.1' +git tag v0.0.1 +git push && git push --tags +``` + + +[poethepoet]: https://pypi.org/project/poethepoet/ +[versioningit]: https://pypi.org/project/versioningit/ diff --git a/README.md b/README.md index 8418313..66d2f72 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,12 @@ # CrateDB MCP Server [![Bluesky][badge-bluesky]][target-bluesky] +[![Release Notes][badge-release-notes]][project-release-notes] + [![Status][badge-status]][target-project] [![License][badge-license]][target-license] +[![CI][badge-ci]][project-ci] +[![Coverage][badge-coverage]][project-coverage] ยป [Documentation] | [Releases] @@ -43,23 +47,36 @@ our recommendation. # Install ```shell -uv pip install --upgrade git+https://github.com/crate/cratedb-mcp +uv tool install --upgrade git+https://github.com/crate/cratedb-mcp ``` +Note: We recommend to use `uv tool install` to install the program "user"-wide +into your environment so you can use it across your terminal or MCP client +sessions like Claude. # Configure + +Configure the `CRATEDB_MCP_HTTP_URL` environment variable to match your CrateDB instance. +For example, when connecting to CrateDB Cloud, use a value like +`https://admin:dZ...6LqB@testdrive.eks1.eu-west-1.aws.cratedb.net:4200/`. +When connecting to CrateDB on localhost, use `http://localhost:4200/`. ```shell -export CRATEDB_MCP_HTTP_URL=https://example.aks1.westeurope.azure.cratedb.net:4200 +export CRATEDB_MCP_HTTP_URL="http://localhost:4200/" +``` +```shell +export CRATEDB_MCP_HTTP_URL="https://example.aks1.westeurope.azure.cratedb.net:4200" ``` # Usage Start MCP server with `stdio` transport (default). ```shell -CRATEDB_MCP_TRANSPORT=stdio uv run cratedb-mcp +CRATEDB_MCP_TRANSPORT=stdio cratedb-mcp ``` Start MCP server with `sse` transport. ```shell -CRATEDB_MCP_TRANSPORT=sse uv run cratedb-mcp +CRATEDB_MCP_TRANSPORT=sse cratedb-mcp ``` +Note: If you are unable to use `uv tool install`, please use +`uv run cratedb-mcp` to acquire and run the package ephemerally. # Simple Claude configuration To use the MCP version within Claude Desktop, you can use the following configuration: @@ -79,13 +96,10 @@ To use the MCP version within Claude Desktop, you can use the following configur } ``` -You might have to change `CRATEDB_MCP_HTTP_URL` to match your CrateDB instance. For example, when connecting to CrateDB Cloud, use a value like https://admin:dZ...6LqB@testdrive.eks1.eu-west-1.aws.cratedb.net:4200/. - -NB: You can use `uv tool install` to install the program "system"-wide, -so you can use it across your terminal or Claude sessions. In this case, -omit the `uv run` prefix displayed above. +## Development +To learn how to set up a development sandbox, see the [development documentation](./DEVELOP.md). [CrateDB]: https://cratedb.com/database @@ -99,11 +113,15 @@ omit the `uv run` prefix displayed above. [Source code]: https://github.com/crate/cratedb-mcp [Releases]: https://github.com/surister/cratedb-mcp/releases +[badge-ci]: https://github.com/crate/cratedb-mcp/actions/workflows/tests.yml/badge.svg +[badge-coverage]: https://codecov.io/gh/crate/cratedb-mcp/branch/main/graph/badge.svg [badge-bluesky]: https://img.shields.io/badge/Bluesky-0285FF?logo=bluesky&logoColor=fff&label=Follow%20%40CrateDB -[badge-issues]: https://img.shields.io/github/issues/crate/cratedb-mcp [badge-license]: https://img.shields.io/github/license/crate/cratedb-mcp -[badge-release-notes]: https://img.shields.io/badge/Release%20Notes-v0.0.0-blue +[badge-release-notes]: https://img.shields.io/github/release/crate/cratedb-mcp?label=Release+Notes [badge-status]: https://img.shields.io/badge/status--alpha-orange +[project-ci]: https://github.com/crate/cratedb-mcp/actions/workflows/tests.yml +[project-coverage]: https://app.codecov.io/gh/crate/cratedb-mcp +[project-release-notes]: https://github.com/crate/cratedb-mcp/releases [target-bluesky]: https://bsky.app/search?q=cratedb [target-license]: https://github.com/crate/cratedb-mcp/blob/main/LICENSE [target-project]: https://github.com/crate/cratedb-mcp diff --git a/cratedb_mcp/__main__.py b/cratedb_mcp/__main__.py index 3ea7ed5..694f1d8 100644 --- a/cratedb_mcp/__main__.py +++ b/cratedb_mcp/__main__.py @@ -1,22 +1,20 @@ -import os - import httpx - from mcp.server.fastmcp import FastMCP -from .constants import Queries, DOCUMENTATION_INDEX +from .knowledge import DOCUMENTATION_INDEX, Queries +from .settings import HTTP_URL mcp = FastMCP("cratedb-mcp") def query_cratedb(query: str) -> list[dict]: - return httpx.post(f'{os.getenv("CRATEDB_MCP_HTTP_URL")}/_sql', json={'stmt': query}).json() + return httpx.post(f'{HTTP_URL}/_sql', json={'stmt': query}).json() @mcp.tool(description="Send a SQL query to CrateDB, only 'SELECT' queries are allows, queries that" " modify data, columns or are otherwise deemed un-safe are rejected.") def query_sql(query: str): - if not 'select' in query.lower(): + if 'select' not in query.lower(): raise ValueError('Only queries that have a SELECT statement are allowed.') return query_cratedb(query) @@ -29,7 +27,7 @@ def get_cratedb_documentation_index(): ' Only used to download CrateDB docs.') def fetch_cratedb_docs(link: str): """Fetches a CrateDB documentation link from GitHub raw content.""" - if not 'https://raw.githubusercontent.com/crate/crate/' in link: + if 'https://raw.githubusercontent.com/crate/crate/' not in link: raise ValueError('Only github cratedb links can be fetched.') return httpx.get(link).text diff --git a/cratedb_mcp/cli.py b/cratedb_mcp/cli.py index a0ce45b..5c70d2e 100644 --- a/cratedb_mcp/cli.py +++ b/cratedb_mcp/cli.py @@ -1,5 +1,6 @@ import logging import os +import typing as t from cratedb_mcp.__main__ import mcp @@ -11,4 +12,4 @@ def main(): if transport not in ("stdio", "sse"): raise ValueError(f"Unsupported transport: '{transport}'. Please use one of 'stdio', 'sse'.") logger.info(f"Starting CrateDB MCP server using transport '{transport}'") - mcp.run(transport=transport) + mcp.run(transport=t.cast(t.Literal["stdio", "sse"], transport)) diff --git a/cratedb_mcp/constants.py b/cratedb_mcp/knowledge.py similarity index 99% rename from cratedb_mcp/constants.py rename to cratedb_mcp/knowledge.py index 0edb8e7..f3a4e2e 100644 --- a/cratedb_mcp/constants.py +++ b/cratedb_mcp/knowledge.py @@ -1,3 +1,5 @@ +# ruff: noqa: E501 + class Queries: TABLES_METADATA = """ WITH partitions_health AS (SELECT table_name, diff --git a/cratedb_mcp/settings.py b/cratedb_mcp/settings.py new file mode 100644 index 0000000..5cff8a6 --- /dev/null +++ b/cratedb_mcp/settings.py @@ -0,0 +1,3 @@ +import os + +HTTP_URL: str = os.getenv("CRATEDB_MCP_HTTP_URL", "http://localhost:4200") diff --git a/pyproject.toml b/pyproject.toml index 742423f..9307d61 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,158 @@ +[build-system] +build-backend = "setuptools.build_meta" +requires = [ + "setuptools>=42", # At least v42 of setuptools required. + "versioningit", +] + [project] name = "cratedb-mcp" -version = "0.0.1" description = "MCP server for CrateDB" requires-python = ">=3.10" +classifiers = [ + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] +dynamic = [ + "version", +] dependencies = [ - "mcp[cli]>=1.5.0" + "mcp[cli]>=1.5.0", +] +optional-dependencies.develop = [ + "mypy<1.16", + "poethepoet<1", + "pyproject-fmt<3", + "ruff<0.12", + "validate-pyproject<1", +] +optional-dependencies.test = [ + "pytest<9", + "pytest-cov<6", + "pytest-mock<4", +] + +scripts.cratedb-mcp = "cratedb_mcp.cli:main" + +[tool.ruff] +line-length = 110 + +extend-exclude = [ +] + +lint.select = [ + # Builtins + "A", + # Bugbear + "B", + # comprehensions + "C4", + # Pycodestyle + "E", + # eradicate + "ERA", + # Pyflakes + "F", + # isort + "I", + # pandas-vet + "PD", + # return + "RET", + # Bandit + "S", + # print + "T20", + "W", + # flake8-2020 + "YTT", +] + +lint.per-file-ignores."tests/*" = [ + "S101", # Allow use of `assert`. +] + +[tool.pytest.ini_options] +addopts = """ + -rfEXs -p pytester --strict-markers --verbosity=3 + --cov --cov-report=term-missing --cov-report=xml + """ +minversion = "2.0" +log_level = "DEBUG" +log_cli_level = "DEBUG" +log_format = "%(asctime)-15s [%(name)-36s] %(levelname)-8s: %(message)s" +pythonpath = [ + "src", +] +xfail_strict = true +markers = [ +] + +[tool.coverage.paths] +source = [ + "src/", +] + +[tool.coverage.run] +branch = false +omit = [ + "tests/*", +] + +[tool.coverage.report] +show_missing = true +exclude_lines = [ + "# pragma: no cover", + "raise NotImplemented", +] + +[tool.mypy] +packages = [ + "cratedb_mcp", +] +check_untyped_defs = true +ignore_missing_imports = true +implicit_optional = true +install_types = true +non_interactive = true + +[tool.versioningit.vcs] +method = "git-archive" +default-tag = "v0.0.0" +describe-subst = "$Format:%(describe:tags,match=v*)$" + +# =================== +# Tasks configuration +# =================== + +[tool.poe.tasks] + +check = [ + "lint", + "test", +] + +format = [ + #{ cmd = "ruff format ." }, + # Configure Ruff not to auto-fix (remove!): + # unused imports (F401), unused variables (F841), `print` statements (T201), and commented-out code (ERA001). + #{ cmd = "ruff check --fix --ignore=ERA --ignore=F401 --ignore=F841 --ignore=T20 --ignore=ERA001 ." }, + { cmd = "pyproject-fmt --keep-full-version pyproject.toml" }, +] + +lint = [ + #{ cmd = "ruff format --check ." }, + { cmd = "ruff check ." }, + { cmd = "validate-pyproject pyproject.toml" }, + { cmd = "mypy" }, +] + +release = [ + { cmd = "python -m build" }, + { cmd = "twine upload --skip-existing dist/*" }, ] -[project.scripts] -cratedb-mcp = "cratedb_mcp.cli:main" +test = { cmd = "pytest" } diff --git a/tests/test_knowledge.py b/tests/test_knowledge.py new file mode 100644 index 0000000..0b4018e --- /dev/null +++ b/tests/test_knowledge.py @@ -0,0 +1,18 @@ +from cratedb_mcp.knowledge import DOCUMENTATION_INDEX, Queries + + +def test_documentation_index(): + assert len(DOCUMENTATION_INDEX) == 3 + assert DOCUMENTATION_INDEX[1]["name"] == "scalar functions" + assert DOCUMENTATION_INDEX[2]["name"] == "optimize query 101" + + +def test_queries(): + + # Verify basic parts of the query. + assert "information_schema.tables" in Queries.TABLES_METADATA + + # Verify other critical parts of the query. + assert "sys.health" in Queries.TABLES_METADATA + assert "WITH partitions_health" in Queries.TABLES_METADATA + assert "LEFT JOIN" in Queries.TABLES_METADATA diff --git a/tests/test_mcp.py b/tests/test_mcp.py new file mode 100644 index 0000000..8ea823a --- /dev/null +++ b/tests/test_mcp.py @@ -0,0 +1,47 @@ +import pytest + +from cratedb_mcp.__main__ import ( + fetch_cratedb_docs, + get_cratedb_documentation_index, + get_health, + get_table_metadata, + query_sql, +) + + +def test_get_documentation_index(): + assert len(get_cratedb_documentation_index()) >= 3 + + +def test_fetch_docs_forbidden(): + with pytest.raises(ValueError) as ex: + fetch_cratedb_docs("https://cratedb.com/docs/crate/reference/en/latest/_sources/general/builtins/scalar-functions.rst.txt") + assert ex.match("Only github cratedb links can be fetched") + + +def test_fetch_docs_permitted(): + response = fetch_cratedb_docs("https://raw.githubusercontent.com/crate/crate/refs/heads/5.10/docs/general/builtins/scalar-functions.rst") + assert "initcap" in response + + +def test_query_sql_forbidden(): + with pytest.raises(ValueError) as ex: + assert "RelationUnknown" in str(query_sql("INSERT INTO foobar (id) VALUES (42) RETURNING id")) + assert ex.match("Only queries that have a SELECT statement are allowed") + + +def test_query_sql_permitted(): + assert query_sql("SELECT 42")["rows"] == [[42]] + + +def test_query_sql_permitted_exploit(): + # FIXME: Read-only protection must become stronger. + query_sql("INSERT INTO foobar (operation) VALUES ('select')") + + +def test_get_table_metadata(): + assert "partitions_health" in str(get_table_metadata()) + + +def test_get_health(): + assert "missing_shards" in str(get_health())