diff --git a/.github/dependabot.yml b/.github/dependabot.yml index c9e74b9b..e7d01afb 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -120,6 +120,16 @@ updates: schedule: interval: "daily" + - directory: "/application/open-webui" + package-ecosystem: "docker-compose" + schedule: + interval: "daily" + + - directory: "/application/open-webui/init" + package-ecosystem: "docker" + schedule: + interval: "daily" + - directory: "/application/ingestr" package-ecosystem: "pip" schedule: diff --git a/.github/workflows/application-open-webui.yml b/.github/workflows/application-open-webui.yml new file mode 100644 index 00000000..19a190c9 --- /dev/null +++ b/.github/workflows/application-open-webui.yml @@ -0,0 +1,56 @@ +name: "Open WebUI" + +on: + pull_request: + paths: + - '.github/workflows/application-open-webui.yml' + - 'application/open-webui/**' + push: + branches: [ main ] + paths: + - '.github/workflows/application-open-webui.yml' + - 'application/open-webui/**' + + # Allow job to be triggered manually. + workflow_dispatch: + + # Run job each night after CrateDB nightly has been published. + #schedule: + # - cron: '0 3 * * *' + +# Cancel in-progress jobs when pushing to the same branch. +concurrency: + cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.ref }} + +jobs: + + test: + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: true + matrix: + os: [ "ubuntu-latest" ] + + name: OS ${{ matrix.os }} + + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + + steps: + + - name: Acquire sources + uses: actions/checkout@v4 + + - name: Validate application/open-webui + run: | + # TODO: Generalize invocation into `ngr` test runner. + + # Invoke software stack. + cd application/open-webui + docker compose up --detach + + # Invoke validation payload. + # TODO: Currently does not work on GHA. + # docker compose run --rm test diff --git a/application/open-webui/.env b/application/open-webui/.env new file mode 100644 index 00000000..e3d01049 --- /dev/null +++ b/application/open-webui/.env @@ -0,0 +1,9 @@ +# ------------------------------------------ +# User configuration +# ------------------------------------------ + +# Define your OpenAI API key here if you want to make it persistent. +# You can also use other models with Open WebUI, however that is +# currently out of the scope of this miniature rig. + +# OPENAI_API_KEY=your_openai_api_key diff --git a/application/open-webui/README.md b/application/open-webui/README.md new file mode 100644 index 00000000..8909b6b3 --- /dev/null +++ b/application/open-webui/README.md @@ -0,0 +1,131 @@ +# Use CrateDB with Open WebUI + +## About + +A complete end-to-end rig including CrateDB, CrateDB MCPO, and Open WebUI, +including a touch of integration tests on CI/GHA. + +This stack is intended solely for demonstration purposes and does **not** +implement any security hardening. Do **not** deploy it to production. + +## Introduction + +[Open WebUI] is an extensible, feature-rich, and user-friendly self-hosted AI +platform designed to operate entirely offline. It supports various LLM runners +like Ollama and OpenAI-compatible APIs, with built-in inference engine for RAG, +making it a powerful AI deployment solution. + +CrateDB MCPO is an adapter wrapper around the [CrateDB MCP] server. Because +Open WebUI uses [OpenAPI Tool Servers] to integrate external tooling and data +sources into LLM agents and workflows, standard MCP servers need to adapt to +how [Open WebUI MCP Support] works. + +## Usage + +### Sources + +It is advised to clone the Git repository and run the demo stack from there. +In this spirit, it will be easy for you to receive updates. +```shell +git clone https://github.com/crate/cratedb-examples +cd cratedb-examples/application/open-webui +``` + +### Start services + +Configure the API key for OpenAI within the `.env` file next to `compose.yml` +to make it persistent for unattended service operations. +```dotenv +# .env +OPENAI_API_KEY=your_openai_api_key_here +``` +Or export it for a one-off run: +```shell +export OPENAI_API_KEY=your_openai_api_key_here +``` + +Spin up the software stack. On the first occasion, it will take a while to +download the OCI images and let Open WebUI do its thing when bootstrapping +the very first time. +```shell +docker compose up +``` + +### User interface + +You can access the service's resources on those URLs. + +- CrateDB: http://localhost:4200/ +- Open WebUI: http://localhost:6200/ + +Explore the APIs here. + +- CrateDB MCPO: + - Swagger: http://localhost:5200/docs + - OpenAPI: http://localhost:5200/openapi.json +- Open WebUI: + - Swagger: http://localhost:6200/docs + - OpenAPI: http://localhost:6200/openapi.json +- Jupyter: + - http://localhost:7200/ + +### Configure + +To make the ensemble work well, you need to configure a few bits on the Open WebUI +user interface. + +- Make sure to enable the "CrateDB" tool. The toggle switch is located within the + flyout menu on the left side of the query prompt, which can be opened using the + `More (+)` button. + +- In the "Chat Controls" flyout widget, located in the top right corner of the page, + - make sure to enable `Function Calling: Native`, see [OPEN-WEBUI-15939], + - and dial down to `Temperature: 0.0`. + +### Example questions + +Enjoy conversations with CrateDB (talk to your data) and its documentation +(talk to your knowledgebase). + +- Text-to-SQL: _What is the average value for sensor 1?_ +- Knowledgebase: _How do I use CrateDB with SQLAlchemy?_ + +### Stop services +Tear down services. +```shell +docker compose down +``` +Delete all volumes. +```shell +docker compose down --volumes +``` +Delete individual volumes. +```shell +docker volume rm open-webui_open-webui +``` +```shell +docker volume rm open-webui_cratedb +``` + +### Jobs +Invoke individual jobs defined in the Compose file. +```shell +export BUILDKIT_PROGRESS=plain +docker compose run --rm setup +docker compose run --rm test +``` + +## What's inside + +- `.env`: The dotenv file defines `OPENAI_API_KEY` for `compose.yml`. +- `compose.yml`: The service composition file defines four main services: + CrateDB, CrateDB MCPO, Open WebUI, and Jupyter. Helper jobs (setup, test, ...) + excluded for brevity. Use it with Docker or Podman. +- `init/`: Runtime configuration snippets. + + +[CrateDB MCP]: https://cratedb.com/docs/guide/integrate/mcp/cratedb-mcp.html +[OpenAPI Tool Servers]: https://docs.openwebui.com/openapi-servers/ +[Open WebUI]: https://docs.openwebui.com/ +[Open WebUI MCP Support]: https://docs.openwebui.com/openapi-servers/mcp/ +[OPEN-WEBUI-15939]: https://github.com/open-webui/open-webui/issues/15939#issuecomment-3108279768 diff --git a/application/open-webui/compose.yml b/application/open-webui/compose.yml new file mode 100644 index 00000000..0d395cc0 --- /dev/null +++ b/application/open-webui/compose.yml @@ -0,0 +1,179 @@ +# Use CrateDB with Open WebUI +# +# https://cratedb.com/docs/ +# https://docs.openwebui.com/getting-started/quick-start +--- +networks: + llm-demo: + name: llm-demo + driver: bridge + +volumes: + cratedb: + open-webui: + jupyter: + +services: + + # ------- + # CrateDB + # ------- + cratedb: + image: docker.io/crate/crate:6.0.0 + environment: + CRATE_HEAP_SIZE: 2g + ports: + - "4200:4200" + - "5432:5432" + command: [ + "crate", + "-Cdiscovery.type=single-node", + "-Ccluster.routing.allocation.disk.threshold_enabled=false", + ] + networks: + - llm-demo + volumes: + - cratedb:/data + healthcheck: + test: [ "CMD", "curl", "--fail", "http://localhost:4200" ] + start_period: 3s + interval: 10s + + # ------------ + # CrateDB MCPO + # ------------ + cratedb-mcpo: + image: ghcr.io/crate/cratedb-mcpo:0.0.7 + environment: + CRATEDB_CLUSTER_URL: http://crate:crate@cratedb:4200/ + ports: + - "5200:8000" + networks: + - llm-demo + healthcheck: + test: [ "CMD", "curl", "--fail", "http://localhost:8000/docs" ] + start_period: 3s + interval: 10s + depends_on: + cratedb: + condition: service_healthy + + # ---------- + # Open WebUI + # ---------- + open-webui: + image: ghcr.io/open-webui/open-webui:0.6.18 + # https://docs.openwebui.com/getting-started/env-configuration + # https://docs.openwebui.com/getting-started/api-endpoints/#swagger-documentation-links + environment: + # From caller's environment or `.env` file. + OPENAI_API_KEY: ${OPENAI_API_KEY} + # Currently defined here. + ENABLE_SIGNUP: "false" + ENABLE_LOGIN_FORM: "false" + WEBUI_AUTH: "false" + DEFAULT_MODELS: "gpt-4.1" + DEFAULT_USER_ROLE: "admin" + ENABLE_CHANNELS: "true" + RESPONSE_WATERMARK: "This text is AI generated" + WEBUI_NAME: "CrateDB LLM Cockpit" + BYPASS_MODEL_ACCESS_CONTROL: "true" + ENABLE_OLLAMA_API: "false" + ENABLE_OPENAI_API: "true" + ENABLE_DIRECT_CONNECTIONS: "true" + ENV: "dev" + # Jupyter code execution and interpreter. + ENABLE_CODE_INTERPRETER: "true" + CODE_EXECUTION_ENGINE: "jupyter" + CODE_EXECUTION_JUPYTER_URL: "http://jupyter:8888" + CODE_EXECUTION_JUPYTER_AUTH: "token" + CODE_EXECUTION_JUPYTER_AUTH_TOKEN: "123456" + CODE_EXECUTION_JUPYTER_TIMEOUT: 60 + CODE_INTERPRETER_ENGINE: "jupyter" + CODE_INTERPRETER_JUPYTER_URL: "http://jupyter:8888" + CODE_INTERPRETER_JUPYTER_AUTH: "token" + CODE_INTERPRETER_JUPYTER_AUTH_TOKEN: "123456" + CODE_INTERPRETER_JUPYTER_TIMEOUT: 60 + ports: + - "6200:8080" + networks: + - llm-demo + volumes: + - open-webui:/app/backend/data + healthcheck: + test: [ "CMD", "curl", "--fail", "http://localhost:8080" ] + start_period: 3s + interval: 10s + retries: 60 + timeout: 90s + depends_on: + cratedb-mcpo: + condition: service_healthy + jupyter: + condition: service_healthy + + # ------- + # Jupyter + # ------- + jupyter: + image: quay.io/jupyter/minimal-notebook:notebook-7.4.4 + # https://docs.openwebui.com/tutorials/jupyter/ + environment: + JUPYTER_ENABLE_LAB: "yes" + JUPYTER_TOKEN: "123456" + ports: + - "7200:8888" + networks: + - llm-demo + volumes: + - jupyter:/home/jovyan/work + healthcheck: + test: [ "CMD", "curl", "--fail", "http://localhost:8888" ] + start_period: 3s + interval: 10s + retries: 60 + timeout: 90s + + # ----- + # Setup + # ----- + setup: + build: + context: init + command: bash /app/setup.sh + networks: + - llm-demo + depends_on: + cratedb: + condition: service_healthy + cratedb-mcpo: + condition: service_healthy + open-webui: + condition: service_healthy + + # ---- + # Test + # ---- + test: + build: + context: init + command: bash /app/test.sh + networks: + - llm-demo + depends_on: + setup: + condition: service_completed_successfully + deploy: + replicas: 0 + + # ------- + # Bundler + # ------- + # Wait for all defined services to be fully available by probing their health + # status, even when using `docker compose up --detach`. + # https://marcopeg.com/2019/docker-compose-healthcheck/ + start-dependencies: + image: docker.io/dadarek/wait-for-dependencies + depends_on: + setup: + condition: service_completed_successfully diff --git a/application/open-webui/init/.env b/application/open-webui/init/.env new file mode 100644 index 00000000..a6977fcc --- /dev/null +++ b/application/open-webui/init/.env @@ -0,0 +1,9 @@ +# ------------------------------------------ +# System configuration +# ------------------------------------------ + +# If you run `setup.sh` from within the Docker Compose +# environment, you will not need to change those settings. + +CRATEDB_URL=http://cratedb:4200 +OPEN_WEBUI_URL=http://open-webui:8080 diff --git a/application/open-webui/init/Dockerfile b/application/open-webui/init/Dockerfile new file mode 100644 index 00000000..22fbea3f --- /dev/null +++ b/application/open-webui/init/Dockerfile @@ -0,0 +1,34 @@ +FROM python:3.13-slim-bookworm + +# Configure operating system. +ENV DEBIAN_FRONTEND=noninteractive +ENV TERM=linux + +RUN set -e \ + && apt-get update \ + && apt-get --yes install --no-install-recommends --no-install-suggests curl jq \ + && rm -rf /var/lib/apt/lists/* + +# Install and configure `uv`. +# Guidelines that have been followed. +# - https://hynek.me/articles/docker-uv/ + +# Install the `uv` package manager. +COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv +COPY --from=ghcr.io/astral-sh/uv:latest /uvx /usr/local/bin/uvx + +# - Tell uv to byte-compile packages for faster application startups. +# - Silence uv complaining about not being able to use hard links. +# - Prevent uv from accidentally downloading isolated Python builds. +# - Install packages into the system Python environment. +ENV \ + UV_COMPILE_BYTECODE=true \ + UV_LINK_MODE=copy \ + UV_PYTHON_DOWNLOADS=never \ + UV_SYSTEM_PYTHON=true + +RUN uv pip install crash cratedb-mcp httpie + +RUN mkdir /app +WORKDIR /app +COPY .env *.sh *.json *.sql /app/ diff --git a/application/open-webui/init/init.sql b/application/open-webui/init/init.sql new file mode 100644 index 00000000..f38cff8e --- /dev/null +++ b/application/open-webui/init/init.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS time_series_data; + +CREATE TABLE IF NOT EXISTS time_series_data ( + "timestamp" TIMESTAMP, + value DOUBLE, + location STRING, + sensor_id INT +); + +INSERT INTO time_series_data ("timestamp", value, location, sensor_id) +VALUES + ('2023-09-14T00:00:00', 10.5, 'Sensor A', 1), + ('2023-09-14T01:00:00', 15.2, 'Sensor A', 1), + ('2023-09-14T02:00:00', 18.9, 'Sensor A', 1), + ('2023-09-14T03:00:00', 12.7, 'Sensor B', 2), + ('2023-09-14T04:00:00', 17.3, 'Sensor B', 2), + ('2023-09-14T05:00:00', 20.1, 'Sensor B', 2), + ('2023-09-14T06:00:00', 22.5, 'Sensor A', 1), + ('2023-09-14T07:00:00', 18.3, 'Sensor A', 1), + ('2023-09-14T08:00:00', 16.8, 'Sensor A', 1), + ('2023-09-14T09:00:00', 14.6, 'Sensor B', 2), + ('2023-09-14T10:00:00', 13.2, 'Sensor B', 2), + ('2023-09-14T11:00:00', 11.7, 'Sensor B', 2); + +REFRESH TABLE time_series_data; diff --git a/application/open-webui/init/setup.sh b/application/open-webui/init/setup.sh new file mode 100755 index 00000000..96dacf81 --- /dev/null +++ b/application/open-webui/init/setup.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash + +# Configure Open WebUI via HTTP API. + +# Runtime options. +set -euo pipefail + +# Uncomment for local debugging, but **never** in automated runs. +# set -x + +# Load configuration. +source .env + +# ------- +# CrateDB +# ------- + +# Provision database. +crash --hosts ${CRATEDB_URL} < init.sql + +# ---------- +# Open WebUI +# ---------- + +# Sign in to receive JWT token. +token=$( http --ignore-stdin POST ${OPEN_WEBUI_URL}/api/v1/auths/signin email= password= | jq -r .token ) +if [[ -z "${token}" ]]; then + echo "FATAL: Could not obtain JWT token from Open WebUI" >&2 + exit 1 +fi + +# Inquire health. +http --ignore-stdin ${OPEN_WEBUI_URL}/health + +# List available tools. +http --ignore-stdin ${OPEN_WEBUI_URL}/api/v1/tools/ Authorization:"Bearer $token" + +# Configure system prompt. +http --ignore-stdin ${OPEN_WEBUI_URL}/api/v1/users/user/settings/update Authorization:"Bearer $token" \ + ui[system]="$( cratedb-mcp show-prompt )" \ + ui[params][function_calling]="native" \ + ui[params][temperature]:=0.0 \ + ui[notificationEnabled]="true" + +# Configure CrateDB MCPO server. +http --ignore-stdin ${OPEN_WEBUI_URL}/api/v1/configs/tool_servers Authorization:"Bearer $token" \ + "@tool-servers.json" + +# Configure chat model. +http --ignore-stdin ${OPEN_WEBUI_URL}/api/v1/configs/models Authorization:"Bearer $token" \ + DEFAULT_MODELS="gpt-4.1" MODEL_ORDER_LIST="[]" + +# Configure embedding model. +http --ignore-stdin ${OPEN_WEBUI_URL}/api/v1/retrieval/embedding/update Authorization:"Bearer $token" \ + embedding_engine="openai" embedding_model="text-embedding-3-small" diff --git a/application/open-webui/init/test.sh b/application/open-webui/init/test.sh new file mode 100644 index 00000000..57124386 --- /dev/null +++ b/application/open-webui/init/test.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +# Test Open WebUI configuration via HTTP API. + +# Runtime options. +set -euo pipefail + +# Uncomment for local debugging, but **never** in automated runs. +# set -x + +# Load configuration. +source .env + +# Sign in to receive JWT token. +token=$( http --ignore-stdin POST ${OPEN_WEBUI_URL}/api/v1/auths/signin email= password= | jq -r .token ) +if [[ -z "${token}" ]]; then + echo "FATAL: Could not obtain JWT token from Open WebUI" >&2 + exit 1 +fi + +# Check for a canonical available model to validate that Open WebUI configuration worked. +http ${OPEN_WEBUI_URL}/api/models Authorization:"Bearer $token" refresh==true | \ + grep '"id":"gpt-4.1"' >/dev/null 2>&1 || { + echo "ERROR: Model gpt-4.1 not available" + exit 1 + } + +echo "gpt-4.1 model found" +echo "Ready." diff --git a/application/open-webui/init/tool-servers.json b/application/open-webui/init/tool-servers.json new file mode 100644 index 00000000..ff65f05e --- /dev/null +++ b/application/open-webui/init/tool-servers.json @@ -0,0 +1,18 @@ +{ + "TOOL_SERVER_CONNECTIONS": [ + { + "url": "http://cratedb-mcpo:8000", + "path": "openapi.json", + "auth_type": "bearer", + "key": "", + "config": { + "enable": true, + "access_control": null + }, + "info": { + "name": "CrateDB", + "description": "CrateDB Text-to-SQL and documentation inquiry." + } + } + ] +} \ No newline at end of file