diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2994b608a..605b57323 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,21 +1,45 @@ name: CI on: + # Run on direct pushes to protected branches only push: branches: - main - - dev + # Run CI for PRs targeting these branches pull_request: + branches: + - main + - dev workflow_dispatch: permissions: contents: read concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: ${{ github.event_name == 'pull_request' }} + # Ensure push and PR for the same commit share one slot + group: ci-${{ github.event.pull_request.head.sha || github.sha }} + cancel-in-progress: true jobs: + syntax-check: + name: Syntax Check (compileall) + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Compile all Python under tldw_Server_API/app + shell: bash + run: | + python - <<'PY' + import compileall, sys + ok = compileall.compile_dir('tldw_Server_API/app', force=True, quiet=1) + sys.exit(0 if ok else 1) + PY lint: name: Lint & Type Check runs-on: ubuntu-latest @@ -49,7 +73,7 @@ jobs: name: Full Suite (Ubuntu / Python ${{ matrix.python }}) runs-on: ubuntu-latest timeout-minutes: 60 - needs: lint + needs: [lint, syntax-check] strategy: fail-fast: false matrix: @@ -58,8 +82,8 @@ jobs: postgres: image: postgres:18-bookworm env: - POSTGRES_USER: tldw - POSTGRES_PASSWORD: tldw + POSTGRES_USER: tldw_user + POSTGRES_PASSWORD: TestPassword123! POSTGRES_DB: tldw_content ports: - 5432/tcp @@ -78,11 +102,13 @@ jobs: # Expose Postgres service to tests POSTGRES_TEST_HOST: 127.0.0.1 POSTGRES_TEST_DB: tldw_content - POSTGRES_TEST_USER: tldw - POSTGRES_TEST_PASSWORD: tldw + POSTGRES_TEST_USER: tldw_user + POSTGRES_TEST_PASSWORD: TestPassword123! TEST_DB_HOST: 127.0.0.1 - TEST_DB_USER: tldw - TEST_DB_PASSWORD: tldw + TEST_DB_USER: tldw_user + TEST_DB_PASSWORD: TestPassword123! + # Align AuthNZ_Postgres conftest default DB name with tests + TEST_DB_NAME: tldw_test TLDW_TEST_POSTGRES_REQUIRED: '1' steps: - name: Checkout @@ -108,13 +134,29 @@ jobs: with: host: 127.0.0.1 port: ${{ job.services.postgres.ports[5432] }} - user: tldw + user: tldw_user - name: Export PG env vars shell: bash run: | echo "POSTGRES_TEST_PORT=${{ job.services.postgres.ports[5432] }}" >> "$GITHUB_ENV" echo "TEST_DB_PORT=${{ job.services.postgres.ports[5432] }}" >> "$GITHUB_ENV" + # Provide a unified DSN so any test preferring TEST_DATABASE_URL uses the right DB + echo "TEST_DATABASE_URL=postgresql://tldw_user:TestPassword123!@127.0.0.1:${{ job.services.postgres.ports[5432] }}/tldw_content" >> "$GITHUB_ENV" + echo "DATABASE_URL=postgresql://tldw_user:TestPassword123!@127.0.0.1:${{ job.services.postgres.ports[5432] }}/tldw_content" >> "$GITHUB_ENV" + + - name: Ensure base DB exists + shell: bash + env: + # Use the same env vars tests read + PGPASSWORD: ${{ env.POSTGRES_TEST_PASSWORD }} + run: | + PORT="${{ job.services.postgres.ports[5432] }}" + DB_NAME="${POSTGRES_TEST_DB:-tldw_content}" + DB_USER="${POSTGRES_TEST_USER:-tldw_user}" + echo "Ensuring database '$DB_NAME' exists on port $PORT as user $DB_USER" + psql -h 127.0.0.1 -p "$PORT" -U "$DB_USER" -d postgres -tc "SELECT 1 FROM pg_database WHERE datname='${DB_NAME}'" | grep -q 1 || \ + psql -h 127.0.0.1 -p "$PORT" -U "$DB_USER" -d postgres -c "CREATE DATABASE ${DB_NAME}" - name: Install additional deps for PG tests run: | @@ -135,6 +177,19 @@ jobs: raise PY + - name: Verify pytest-benchmark import (Linux matrix) + shell: bash + run: | + python - <<'PY' + try: + import importlib + m = importlib.import_module('pytest_benchmark.plugin') + print('pytest-benchmark OK') + except Exception as e: + print('pytest-benchmark import failed:', e) + raise + PY + - name: Smoke start server (single-user) env: SERVER_LABEL: smoke @@ -198,6 +253,7 @@ jobs: - name: Run full test suite (Linux + PG) - exclude Jobs and E2E run: | pytest -q --maxfail=1 --disable-warnings -p pytest_cov -p pytest_asyncio.plugin -m "not jobs and not e2e" \ + --ignore=tldw_Server_API/tests/Jobs \ --cov=tldw_Server_API --cov-report=xml --cov-report=term-missing \ --junit-xml=test-results-linux-${{ matrix.python }}.xml shell: bash @@ -229,7 +285,7 @@ jobs: name: Full Suite (${{ matrix.os }} / Python 3.12) runs-on: ${{ matrix.os }} timeout-minutes: 45 - needs: lint + needs: [lint, syntax-check] strategy: fail-fast: false matrix: @@ -280,6 +336,19 @@ jobs: raise PY + - name: Verify pytest-benchmark import (OS matrix) + shell: bash + run: | + python - <<'PY' + try: + import importlib + m = importlib.import_module('pytest_benchmark.plugin') + print('pytest-benchmark OK') + except Exception as e: + print('pytest-benchmark import failed:', e) + raise + PY + - name: Smoke start server (single-user) env: SERVER_LABEL: smoke @@ -354,6 +423,7 @@ jobs: - name: Run full test suite - exclude Jobs and E2E run: | pytest -q --maxfail=1 --disable-warnings -p pytest_cov -p pytest_asyncio.plugin -m "not jobs and not e2e" \ + --ignore=tldw_Server_API/tests/Jobs \ --cov=tldw_Server_API --cov-report=xml --cov-report=term-missing \ --junit-xml=test-results-${{ matrix.os }}-3.12.xml shell: bash diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 5241d6ce9..f4554375e 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -1,12 +1,20 @@ name: CodeQL on: + # Run on direct pushes to protected/default branches only push: - branches: [ main, master, dev ] + branches: [ main, master ] + # Run on PRs targeting main/master/dev, not every branch pull_request: - branches: [ '**' ] + branches: [ main, master, dev ] schedule: - cron: '0 6 * * 1' + # Allow manual runs + workflow_dispatch: + +concurrency: + group: codeql-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true jobs: analyze: diff --git a/.github/workflows/jobs-suite.yml b/.github/workflows/jobs-suite.yml index 818ed0dfb..bbc4939d9 100644 --- a/.github/workflows/jobs-suite.yml +++ b/.github/workflows/jobs-suite.yml @@ -28,6 +28,7 @@ jobs: PYTEST_DISABLE_PLUGIN_AUTOLOAD: '1' TEST_MODE: 'true' DISABLE_HEAVY_STARTUP: '1' + RUN_JOBS: '1' steps: - name: Checkout uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 @@ -55,6 +56,14 @@ jobs: print('pytest_asyncio plugin OK') PY + - name: Verify pytest-benchmark import + run: | + python - <<'PY' + import importlib + m = importlib.import_module('pytest_benchmark.plugin') + print('pytest-benchmark plugin OK') + PY + - name: Run Jobs tests (SQLite only) run: | pytest -q --maxfail=1 --disable-warnings -p pytest_cov -p pytest_asyncio.plugin \ @@ -104,7 +113,9 @@ jobs: POSTGRES_TEST_DB: tldw_content POSTGRES_TEST_USER: tldw POSTGRES_TEST_PASSWORD: tldw + TEST_DB_NAME: tldw_test TLDW_TEST_POSTGRES_REQUIRED: '1' + RUN_JOBS: '1' steps: - name: Checkout uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 @@ -131,6 +142,18 @@ jobs: echo "POSTGRES_TEST_PORT=${{ job.services.postgres.ports[5432] }}" >> "$GITHUB_ENV" echo "TEST_DB_PORT=${{ job.services.postgres.ports[5432] }}" >> "$GITHUB_ENV" + - name: Ensure base DB exists + env: + # Use the same env vars tests read + PGPASSWORD: ${{ env.POSTGRES_TEST_PASSWORD }} + run: | + PORT="${{ job.services.postgres.ports[5432] }}" + DB_NAME="${POSTGRES_TEST_DB:-tldw_content}" + DB_USER="${POSTGRES_TEST_USER:-tldw}" + echo "Ensuring database '$DB_NAME' exists on port $PORT as user $DB_USER" + psql -h 127.0.0.1 -p "$PORT" -U "$DB_USER" -d postgres -tc "SELECT 1 FROM pg_database WHERE datname='${DB_NAME}'" | grep -q 1 || \ + psql -h 127.0.0.1 -p "$PORT" -U "$DB_USER" -d postgres -c "CREATE DATABASE ${DB_NAME}" + - name: Install pytest and psycopg run: | python -m pip install --upgrade pip @@ -144,6 +167,14 @@ jobs: print('pytest_asyncio plugin OK') PY + - name: Verify pytest-benchmark import + run: | + python - <<'PY' + import importlib + m = importlib.import_module('pytest_benchmark.plugin') + print('pytest-benchmark plugin OK') + PY + - name: Run Jobs tests (PostgreSQL only) run: | pytest -q --maxfail=1 --disable-warnings -p pytest_cov -p pytest_asyncio.plugin \ diff --git a/.github/workflows/sbom.yml b/.github/workflows/sbom.yml index 0bb258c53..2aa3b90af 100644 --- a/.github/workflows/sbom.yml +++ b/.github/workflows/sbom.yml @@ -21,40 +21,138 @@ jobs: - name: Checkout uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Generate Python SBOM (CycloneDX) + run: | + set -euo pipefail + # Install CycloneDX Python tooling. Newer releases expose the `cyclonedx-py` CLI, + # older ones expose `cyclonedx-bom`. We support both. + python -m pip install -q cyclonedx-bom + + gen_from_requirements() { + local req="$1" + echo "Generating Python SBOM from ${req}" + + # Try modern CLI first: cyclonedx-py requirements -i + if command -v cyclonedx-py >/dev/null 2>&1; then + echo "Using cyclonedx-py" + if cyclonedx-py requirements -i "$req" -o sbom-python.cdx.json; then + return 0 + fi + echo "cyclonedx-py failed; will try fallbacks" + fi + + # Module invocation for modern CLI + if python - <<'PY' +import importlib.util, sys +sys.exit(0 if importlib.util.find_spec('cyclonedx_py') else 1) +PY + then + echo "Using python -m cyclonedx_py" + if python -m cyclonedx_py requirements -i "$req" -o sbom-python.cdx.json; then + return 0 + fi + echo "python -m cyclonedx_py failed; trying legacy CLI" + fi + + # Legacy CLI: cyclonedx-bom -r + if command -v cyclonedx-bom >/dev/null 2>&1; then + echo "Using cyclonedx-bom (legacy)" + if cyclonedx-bom -r "$req" -o sbom-python.cdx.json; then + return 0 + fi + echo "cyclonedx-bom failed; trying legacy module" + fi + + # Legacy module invocation + if python - <<'PY' +import importlib.util, sys +sys.exit(0 if importlib.util.find_spec('cyclonedx_bom') else 1) +PY + then + echo "Using python -m cyclonedx_bom (legacy)" + if python -m cyclonedx_bom -r "$req" -o sbom-python.cdx.json; then + return 0 + fi + fi + + echo "All Python SBOM generation strategies failed" >&2 + return 1 + } + + if [ -f tldw_Server_API/requirements.txt ]; then + gen_from_requirements tldw_Server_API/requirements.txt + elif [ -f requirements.txt ]; then + gen_from_requirements requirements.txt + else + echo "No requirements file found; cannot generate SBOM" >&2 + exit 1 + fi - # No registry login required for public Docker Hub images used below - name: Setup Node - uses: actions/setup-node@v6 + uses: actions/setup-node@v4 with: node-version: '20' - - name: Generate Python SBOM from pyproject (cdxgen) + - name: Generate Node SBOM (CycloneDX NPM) run: | - if [ -f pyproject.toml ]; then \ - npx @appthreat/cdxgen -t python -o sbom-python.cdx.json; \ + if [ -f package-lock.json ]; then \ + npx -y @cyclonedx/cyclonedx-npm --output-file sbom-node.cdx.json; \ + elif [ -f tldw-frontend/package-lock.json ]; then \ + (cd tldw-frontend && npx -y @cyclonedx/cyclonedx-npm --output-file ../sbom-node.cdx.json); \ + else \ + echo "No package-lock.json found; skipping Node SBOM"; \ fi - - name: Generate SBOM (CycloneDX JSON) - uses: anchore/sbom-action@8e94d75ddd33f69f691467e42275782e4bfefe84 # v0.20.9 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Resolve CycloneDX CLI digest + run: | + set -euo pipefail + ref="ghcr.io/cyclonedx/cyclonedx-cli:0.30.0" + echo "Resolving digest for ${ref}" + # Prefer buildx imagetools; fallback to manifest inspect if needed + if docker buildx imagetools inspect "$ref" >/dev/null 2>&1; then \ + digest=$(docker buildx imagetools inspect "$ref" | awk '/^Digest:/ {print $2; exit}'); \ + else \ + digest=$(docker manifest inspect "$ref" | jq -r '.manifests[0].digest' || true); \ + fi + if [ -z "${digest:-}" ] || ! echo "$digest" | grep -Eq '^sha256:[0-9a-f]{64}$'; then \ + echo "Failed to resolve digest for $ref"; \ + exit 1; \ + fi + echo "CDX_CLI_DIGEST=$digest" >> "$GITHUB_ENV" + echo "Resolved digest: $digest" + + - name: Merge SBOMs (CycloneDX CLI) + run: | + set -euo pipefail + if [ -f sbom-python.cdx.json ] && [ -f sbom-node.cdx.json ]; then \ + docker run --rm -v "$PWD":/work -w /work ghcr.io/cyclonedx/cyclonedx-cli@${CDX_CLI_DIGEST} \ + merge --input-files sbom-python.cdx.json sbom-node.cdx.json --output-file sbom.cdx.json; \ + elif [ -f sbom-python.cdx.json ]; then \ + cp sbom-python.cdx.json sbom.cdx.json; \ + elif [ -f sbom-node.cdx.json ]; then \ + cp sbom-node.cdx.json sbom.cdx.json; \ + else \ + echo "No SBOMs generated"; \ + exit 1; \ + fi + + - name: Upload SBOM artifact + uses: actions/upload-artifact@v4 with: - path: . - format: cyclonedx-json - output-file: sbom.cdx.json - upload-artifact: true - artifact-name: sbom-cyclonedx - - - name: Validate SBOM (CycloneDX CLI - pinned) - id: validate_cli_pinned + name: sbom-cyclonedx + path: sbom.cdx.json + + - name: Validate SBOM (CycloneDX CLI - pinned digest) if: ${{ hashFiles('sbom.cdx.json') != '' }} continue-on-error: true - uses: docker://cyclonedx/cyclonedx-cli:0.30.0 - with: - args: >- - validate --input-file sbom.cdx.json - - - name: Validate SBOM (CycloneDX CLI - pinned fallback) - if: ${{ hashFiles('sbom.cdx.json') != '' && steps.validate_cli_pinned.outcome == 'failure' }} - uses: docker://cyclonedx/cyclonedx-cli:0.29.1 - with: - args: >- + run: | + docker run --rm -v "$PWD":/work -w /work ghcr.io/cyclonedx/cyclonedx-cli@${CDX_CLI_DIGEST} \ validate --input-file sbom.cdx.json diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..121fd855e --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,45 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to Some kind of Versioning + +## [Unreleased (placeholder for copy/paste)] + +### Added +- F + + + +## [0.1.4] - 2025-11-9 +### Fixed +- Numpy requirement in base install +- Default API now respected via config/not just ENV var. + +### Added +- Unified requests module +- Added Resource governance module +- Moved all streaming requests to a unified pipeline (will need to revisit) +- WebUI CSP-related stuff +- Available models loaded/checked from `model_pricing.json` +- Rewrote TTS install/setup scripts (all TTS modules are likely currently broken) + + +## [0.1.3.0] - 2025-X +### Fixed +- Bugfixes +- + +## [0.1.2.0] - 2025-X +### Fixed +- Bugfixes + + +## [0.1.1.0] - 2025-X +### Features +- Version 0.1 +### Fixed +- Use of gradio + + diff --git a/Dockerfiles/Dockerfiles/Dockerfile b/Dockerfiles/Dockerfiles/Dockerfile index ec15d4940..3a2d28f72 100644 --- a/Dockerfiles/Dockerfiles/Dockerfile +++ b/Dockerfiles/Dockerfiles/Dockerfile @@ -12,6 +12,7 @@ RUN apt-get update && apt-get install -y \ build-essential \ portaudio19-dev \ python3-all-dev \ + python3-pyaudio \ ffmpeg \ && rm -rf /var/lib/apt/lists/* diff --git a/Dockerfiles/Dockerfiles/Dockerfile.Ubuntu b/Dockerfiles/Dockerfiles/Dockerfile.Ubuntu index 5549774f8..90b796ec3 100644 --- a/Dockerfiles/Dockerfiles/Dockerfile.Ubuntu +++ b/Dockerfiles/Dockerfiles/Dockerfile.Ubuntu @@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y \ build-essential \ portaudio19-dev \ python3-all-dev \ + python3-pyaudio \ ffmpeg \ && rm -rf /var/lib/apt/lists/* diff --git a/Dockerfiles/Dockerfiles/Dockerfile.audio_gpu_worker b/Dockerfiles/Dockerfiles/Dockerfile.audio_gpu_worker index 3a533ec70..d346db54e 100644 --- a/Dockerfiles/Dockerfiles/Dockerfile.audio_gpu_worker +++ b/Dockerfiles/Dockerfiles/Dockerfile.audio_gpu_worker @@ -3,6 +3,10 @@ FROM python:3.11-slim # System dependencies (ffmpeg for audio conversion if needed) RUN apt-get update && apt-get install -y --no-install-recommends \ ffmpeg \ + python3-pyaudio \ + build-essential \ + portaudio19-dev \ + python3-dev \ && rm -rf /var/lib/apt/lists/* WORKDIR /app diff --git a/Dockerfiles/Dockerfiles/Dockerfile.prod b/Dockerfiles/Dockerfiles/Dockerfile.prod index 84051dad4..9fcd3b604 100644 --- a/Dockerfiles/Dockerfiles/Dockerfile.prod +++ b/Dockerfiles/Dockerfiles/Dockerfile.prod @@ -9,9 +9,12 @@ ENV PYTHONDONTWRITEBYTECODE=1 \ PIP_NO_CACHE_DIR=1 # System deps: ffmpeg (media), libmagic (python-magic), curl (health/debug) +# Also install python3-pyaudio to avoid runtime failures when audio capture is enabled. RUN apt-get update && apt-get install -y --no-install-recommends \ ffmpeg \ libmagic1 \ + portaudio19-dev \ + python3-pyaudio \ ca-certificates \ curl \ && rm -rf /var/lib/apt/lists/* diff --git a/Dockerfiles/Dockerfiles/docker-compose.dev.yml b/Dockerfiles/Dockerfiles/docker-compose.dev.yml new file mode 100644 index 000000000..0e054e1f6 --- /dev/null +++ b/Dockerfiles/Dockerfiles/docker-compose.dev.yml @@ -0,0 +1,18 @@ +# docker-compose.dev.yml — Development overlay for unified streaming +# +# Usage (from repo root): +# docker compose -f Dockerfiles/docker-compose.yml -f Dockerfiles/Dockerfiles/docker-compose.dev.yml up -d --build +# +# This overlay enables the unified streaming abstraction (SSE/WS) across +# pilot endpoints via STREAMS_UNIFIED=1. Keep disabled in production until +# you validate behavior and metrics in your environment. + +services: + app: + environment: + # Enable unified streaming in non-prod + STREAMS_UNIFIED: ${STREAMS_UNIFIED:-1} + # Optional: prefer data-mode heartbeats behind reverse proxies/CDNs + # STREAM_HEARTBEAT_MODE: ${STREAM_HEARTBEAT_MODE:-data} + # Optional: shorter heartbeat for local dev + # STREAM_HEARTBEAT_INTERVAL_S: ${STREAM_HEARTBEAT_INTERVAL_S:-10} diff --git a/Dockerfiles/Dockerfiles/docker-compose.embeddings.yml b/Dockerfiles/Dockerfiles/docker-compose.embeddings.yml index 80791ee46..7e6bebda2 100644 --- a/Dockerfiles/Dockerfiles/docker-compose.embeddings.yml +++ b/Dockerfiles/Dockerfiles/docker-compose.embeddings.yml @@ -41,8 +41,9 @@ services: # Chunking Worker Pool chunking-workers: build: - context: . - dockerfile: tldw_Server_API/Dockerfiles/Dockerfile.worker + # Build from repo root so Dockerfile path resolves consistently + context: ../.. + dockerfile: Dockerfiles/Dockerfiles/Dockerfile.worker container_name: tldw-chunking-workers environment: - REDIS_URL=redis://redis:6379 @@ -55,9 +56,9 @@ services: redis: condition: service_healthy volumes: - - ./tldw_Server_API:/app - - ./Config_Files:/app/Config_Files - - ./Databases:/app/Databases + - ../../tldw_Server_API:/app + - ../../Config_Files:/app/Config_Files + - ../../Databases:/app/Databases command: python -m tldw_Server_API.app.core.Embeddings.start_workers --type chunking restart: unless-stopped networks: @@ -66,8 +67,8 @@ services: # Embedding Worker Pool embedding-workers: build: - context: . - dockerfile: tldw_Server_API/Dockerfiles/Dockerfile.worker + context: ../.. + dockerfile: Dockerfiles/Dockerfiles/Dockerfile.worker container_name: tldw-embedding-workers environment: - REDIS_URL=redis://redis:6379 @@ -81,10 +82,10 @@ services: redis: condition: service_healthy volumes: - - ./tldw_Server_API:/app - - ./Config_Files:/app/Config_Files - - ./Databases:/app/Databases - - ./Models:/app/Models # For model caching + - ../../tldw_Server_API:/app + - ../../Config_Files:/app/Config_Files + - ../../Databases:/app/Databases + - ../../models:/app/Models # For model caching command: python -m tldw_Server_API.app.core.Embeddings.start_workers --type embedding restart: unless-stopped networks: @@ -100,8 +101,8 @@ services: # Storage Worker Pool storage-workers: build: - context: . - dockerfile: tldw_Server_API/Dockerfiles/Dockerfile.worker + context: ../.. + dockerfile: Dockerfiles/Dockerfiles/Dockerfile.worker container_name: tldw-storage-workers environment: - REDIS_URL=redis://redis:6379 @@ -114,9 +115,9 @@ services: redis: condition: service_healthy volumes: - - ./tldw_Server_API:/app - - ./Config_Files:/app/Config_Files - - ./Databases:/app/Databases + - ../../tldw_Server_API:/app + - ../../Config_Files:/app/Config_Files + - ../../Databases:/app/Databases command: python -m tldw_Server_API.app.core.Embeddings.start_workers --type storage restart: unless-stopped networks: @@ -125,8 +126,8 @@ services: # Worker Orchestrator worker-orchestrator: build: - context: . - dockerfile: tldw_Server_API/Dockerfiles/Dockerfile.worker + context: ../.. + dockerfile: Dockerfiles/Dockerfiles/Dockerfile.worker container_name: tldw-worker-orchestrator environment: - REDIS_URL=redis://redis:6379 @@ -137,9 +138,9 @@ services: redis: condition: service_healthy volumes: - - ./tldw_Server_API:/app - - ./Config_Files:/app/Config_Files - - ./Databases:/app/Databases + - ../../tldw_Server_API:/app + - ../../Config_Files:/app/Config_Files + - ../../Databases:/app/Databases command: python -m tldw_Server_API.app.core.Embeddings.worker_orchestrator restart: unless-stopped ports: @@ -152,7 +153,7 @@ services: image: prom/prometheus:latest container_name: tldw-prometheus volumes: - - ../Config_Files/prometheus.yml:/etc/prometheus/prometheus.yml + - ../../Config_Files/prometheus.yml:/etc/prometheus/prometheus.yml - prometheus-data:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' diff --git a/Dockerfiles/Dockerfiles/docker-compose.test.yml b/Dockerfiles/Dockerfiles/docker-compose.test.yml index 6fea48623..5d8f4bc99 100644 --- a/Dockerfiles/Dockerfiles/docker-compose.test.yml +++ b/Dockerfiles/Dockerfiles/docker-compose.test.yml @@ -10,6 +10,7 @@ services: - PYTHONPATH=/app/tldw_Server_API - OPENAI_API_KEY=test-key - ANTHROPIC_API_KEY=test-key + - STREAMS_UNIFIED=1 volumes: - .:/app command: /app/test-workflow/test-workflow.sh @@ -25,6 +26,7 @@ services: - PYTHONPATH=/app/tldw_Server_API - OPENAI_API_KEY=test-key - ANTHROPIC_API_KEY=test-key + - STREAMS_UNIFIED=1 volumes: - .:/app command: bash -c "cd /app && python3.10 -m venv venv && source venv/bin/activate && pip install -e '.[dev]' && pytest -v -m unit --collect-only" @@ -40,6 +42,7 @@ services: - PYTHONPATH=/app/tldw_Server_API - OPENAI_API_KEY=test-key - ANTHROPIC_API_KEY=test-key + - STREAMS_UNIFIED=1 volumes: - .:/app command: bash -c "cd /app && python3.11 -m venv venv && source venv/bin/activate && pip install -e '.[dev]' && pytest -v -m unit --collect-only" @@ -55,6 +58,7 @@ services: - PYTHONPATH=/app/tldw_Server_API - OPENAI_API_KEY=test-key - ANTHROPIC_API_KEY=test-key + - STREAMS_UNIFIED=1 volumes: - .:/app command: bash -c "cd /app && python3.12 -m venv venv && source venv/bin/activate && pip install -e '.[dev]' && pytest -v -m unit --collect-only" diff --git a/Dockerfiles/Monitoring/docker-compose.monitoring.yml b/Dockerfiles/Monitoring/docker-compose.monitoring.yml new file mode 100644 index 000000000..95a2ab2a1 --- /dev/null +++ b/Dockerfiles/Monitoring/docker-compose.monitoring.yml @@ -0,0 +1,34 @@ +version: '3.8' + +services: + prometheus: + image: prom/prometheus:latest + container_name: tldw_prometheus + extra_hosts: + - "host.docker.internal:host-gateway" # Linux host gateway mapping + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro + command: + - '--config.file=/etc/prometheus/prometheus.yml' + ports: + - '9090:9090' + restart: unless-stopped + + grafana: + image: grafana/grafana:latest + container_name: tldw_grafana + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=admin + ports: + - '3000:3000' + depends_on: + - prometheus + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning:ro + - ../../Docs/Monitoring/Grafana_Dashboards:/var/lib/grafana/dashboards:ro + restart: unless-stopped + +networks: + default: + name: tldw_monitoring diff --git a/Dockerfiles/Monitoring/grafana/provisioning/dashboards/dashboards.yml b/Dockerfiles/Monitoring/grafana/provisioning/dashboards/dashboards.yml new file mode 100644 index 000000000..2237a6276 --- /dev/null +++ b/Dockerfiles/Monitoring/grafana/provisioning/dashboards/dashboards.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: 'tldw dashboards' + orgId: 1 + folder: '' + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards + foldersFromFilesStructure: true diff --git a/Dockerfiles/Monitoring/grafana/provisioning/datasources/datasource.yml b/Dockerfiles/Monitoring/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 000000000..26c9e32f1 --- /dev/null +++ b/Dockerfiles/Monitoring/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,10 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + orgId: 1 + url: http://prometheus:9090 + isDefault: true + editable: true diff --git a/Dockerfiles/Monitoring/prometheus.yml b/Dockerfiles/Monitoring/prometheus.yml new file mode 100644 index 000000000..84ed92424 --- /dev/null +++ b/Dockerfiles/Monitoring/prometheus.yml @@ -0,0 +1,8 @@ +global: + scrape_interval: 5s + +scrape_configs: + - job_name: 'tldw_server' + metrics_path: /metrics + static_configs: + - targets: ['host.docker.internal:8000'] diff --git a/Dockerfiles/README.md b/Dockerfiles/README.md new file mode 100644 index 000000000..cf6dc18b0 --- /dev/null +++ b/Dockerfiles/README.md @@ -0,0 +1,74 @@ +# Docker Compose & Images + +This folder contains the base Compose stack for tldw_server, optional overlays, and worker/infra stacks. All commands assume you run from the repo root. + +## Base Stack + +- File: `Dockerfiles/docker-compose.yml` +- Services: `app` (FastAPI), `postgres`, `redis` +- Start (single-user, SQLite users DB): + - `export SINGLE_USER_API_KEY=$(python -c "import secrets;print(secrets.token_urlsafe(32))")` + - `docker compose -f Dockerfiles/docker-compose.yml up -d --build` +- Start (multi-user, Postgres users DB): + - `export AUTH_MODE=multi_user` + - `export DATABASE_URL=postgresql://tldw_user:TestPassword123!@postgres:5432/tldw_users` + - `docker compose -f Dockerfiles/docker-compose.yml up -d --build` +- Initialize AuthNZ inside the app container (first run): + - `docker compose -f Dockerfiles/docker-compose.yml exec app python -m tldw_Server_API.app.core.AuthNZ.initialize` +- Logs and status: + - `docker compose -f Dockerfiles/docker-compose.yml ps` + - `docker compose -f Dockerfiles/docker-compose.yml logs -f app` + +## Overlays & Profiles + +- Production overrides: `Dockerfiles/docker-compose.override.yml` + - `docker compose -f Dockerfiles/docker-compose.yml -f Dockerfiles/docker-compose.override.yml up -d --build` + - Sets production flags, disables API key echo, and tightens defaults. + +- Reverse proxy (Caddy): `Dockerfiles/docker-compose.proxy.yml` + - `docker compose -f Dockerfiles/docker-compose.yml -f Dockerfiles/docker-compose.proxy.yml up -d --build` + - Exposes 80/443 via Caddy; unpublish app port on host. + +- Reverse proxy (Nginx): `Dockerfiles/docker-compose.proxy-nginx.yml` + - `docker compose -f Dockerfiles/docker-compose.yml -f Dockerfiles/docker-compose.proxy-nginx.yml up -d --build` + - Mount `Samples/Nginx/nginx.conf` and your certs. + +- Postgres (basic standalone): `Dockerfiles/docker-compose.postgres.yml` + - Start a standalone Postgres you can point `DATABASE_URL` to. + - Example: + - `export DATABASE_URL=postgresql://tldw_user:TestPassword123!@localhost:5432/tldw_users` + - `docker compose -f Dockerfiles/docker-compose.postgres.yml up -d` + +- Postgres + pgvector + pgbouncer (dev): `Dockerfiles/docker-compose.pg.yml` + - `docker compose -f Dockerfiles/docker-compose.pg.yml up -d` + +- Dev overlay (unified streaming pilot): `Dockerfiles/docker-compose.dev.yml` + - `docker compose -f Dockerfiles/docker-compose.yml -f Dockerfiles/docker-compose.dev.yml up -d --build` + - Sets `STREAMS_UNIFIED=1` (keep off in production until validated). + +- Embeddings workers + monitoring: `Dockerfiles/docker-compose.embeddings.yml` + - Base workers only: `docker compose -f Dockerfiles/docker-compose.embeddings.yml up -d` + - With monitoring profile (Prometheus + Grafana): + - `docker compose -f Dockerfiles/docker-compose.embeddings.yml --profile monitoring up -d` + - With debug profile (Redis Commander): + - `docker compose -f Dockerfiles/docker-compose.embeddings.yml --profile debug up -d` + - Scale workers: `docker compose -f Dockerfiles/docker-compose.embeddings.yml up -d --scale chunking-workers=3` + +## Images + +- App image: `Dockerfiles/Dockerfile.prod` (built by base compose) +- Worker image: `Dockerfiles/Dockerfile.worker` (used by embeddings compose) + +## Notes + +- Run compose commands from repo root so relative paths resolve correctly. +- For production, pair the app with a reverse proxy and set strong secrets in `.env`. +- GPU for embeddings workers: ensure the host has NVIDIA runtime configured and adjust `CUDA_VISIBLE_DEVICES` as needed in the embeddings compose. +- To avoid publishing the app port on host when using a proxy overlay, do not also map `8000:8000` in `app`. + +## Troubleshooting + +- Health checks: `app` responds on `/ready`; `postgres`/`redis` include health checks. +- If the app fails waiting for DB, verify `DATABASE_URL` and Postgres readiness. +- Initialize AuthNZ after first boot if running multi-user, or set a strong `SINGLE_USER_API_KEY` for single-user. +- View full logs: `docker compose ... logs -f` diff --git a/Dockerfiles/docker-compose.override.yml b/Dockerfiles/docker-compose.override.yml index f4db7cab6..0facf009e 100644 --- a/Dockerfiles/docker-compose.override.yml +++ b/Dockerfiles/docker-compose.override.yml @@ -21,7 +21,7 @@ services: # AuthNZ SINGLE_USER_API_KEY: ${SINGLE_USER_API_KEY:-} JWT_SECRET_KEY: ${JWT_SECRET_KEY:-} - DATABASE_URL: ${DATABASE_URL:-postgresql://tldw_user:${POSTGRES_PASSWORD:-ChangeMeStrong123!}@postgres:5432/${POSTGRES_DB:-tldw_users}} + DATABASE_URL: ${DATABASE_URL:-postgresql://tldw_user:${POSTGRES_PASSWORD:-TestPassword123!}@postgres:5432/${POSTGRES_DB:-tldw_users}} # Networking / CORS ALLOWED_ORIGINS: ${ALLOWED_ORIGINS:-https://your.domain.com} @@ -38,7 +38,7 @@ services: environment: POSTGRES_DB: ${POSTGRES_DB:-tldw_users} POSTGRES_USER: ${POSTGRES_USER:-tldw_user} - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-ChangeMeStrong123!} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-TestPassword123!} redis: restart: unless-stopped diff --git a/Dockerfiles/docker-compose.yml b/Dockerfiles/docker-compose.yml index 6d7fa860d..440ccaf53 100644 --- a/Dockerfiles/docker-compose.yml +++ b/Dockerfiles/docker-compose.yml @@ -3,8 +3,9 @@ services: app: build: - context: . - dockerfile: tldw_Server_API/Dockerfiles/Dockerfile.prod + # Build context is the repo root (one level up from this compose file) + context: .. + dockerfile: Dockerfiles/Dockerfiles/Dockerfile.prod image: tldw-server:prod container_name: tldw-app ports: @@ -19,7 +20,7 @@ services: # Database URL: use Postgres in multi_user - DATABASE_URL=${DATABASE_URL:-sqlite:///./Databases/users.db} # Jobs module backend (optional): set to Postgres DSN to use the postgres service - # Example: postgresql://tldw_user:ChangeMeStrong123!@postgres:5432/tldw_users + # Example: postgresql://tldw_user:TestPassword123!@postgres:5432/tldw_users - JOBS_DB_URL=${JOBS_DB_URL:-} # Optional OpenTelemetry envs can be passed through here - UVICORN_WORKERS=${UVICORN_WORKERS:-4} @@ -43,7 +44,7 @@ services: environment: POSTGRES_DB: ${POSTGRES_DB:-tldw_users} POSTGRES_USER: ${POSTGRES_USER:-tldw_user} - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-ChangeMeStrong123!} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-TestPassword123!} ports: - "5432:5432" healthcheck: @@ -80,7 +81,7 @@ volumes: # # docker compose up --build # # Multi-user (Postgres): # # export AUTH_MODE=multi_user -# # export DATABASE_URL=postgresql://tldw_user:ChangeMeStrong123!@postgres:5432/tldw_users +# # export DATABASE_URL=postgresql://tldw_user:TestPassword123!@postgres:5432/tldw_users # # # Optional: point Jobs to Postgres as well (uses same DB by default) -# # export JOBS_DB_URL=postgresql://tldw_user:ChangeMeStrong123!@postgres:5432/tldw_users +# # export JOBS_DB_URL=postgresql://tldw_user:TestPassword123!@postgres:5432/tldw_users # # docker compose up --build diff --git a/Docs/API-related/Chat_API_Documentation.md b/Docs/API-related/Chat_API_Documentation.md index 9bb837d69..1aa77e45f 100644 --- a/Docs/API-related/Chat_API_Documentation.md +++ b/Docs/API-related/Chat_API_Documentation.md @@ -18,7 +18,7 @@ Follows OpenAI-style chat payload with extensions. Key fields: -- `model` (string): Target model. May be prefixed as `provider/model` (e.g., `anthropic/claude-3-5-sonnet`). +- `model` (string): Target model. May be prefixed as `provider/model` (e.g., `anthropic/claude-sonnet-4.5`). - `messages` (array): Conversation turns. Supports roles `system`, `user`, `assistant`, `tool`. - User message `content` may be a string or a list of parts: text and base64 data URI `image_url`. - `stream` (bool): If true, returns Server-Sent Events (SSE) for streaming. @@ -54,7 +54,7 @@ curl -N -X POST http://127.0.0.1:8000/api/v1/chat/completions \ -H "Content-Type: application/json" \ -H "X-API-KEY: $API_KEY" \ -d '{ - "model": "anthropic/claude-3-5-sonnet", + "model": "anthropic/claude-sonnet-4.5", "messages": [{"role":"user","content":"Stream this response."}], "stream": true }' diff --git a/Docs/API-related/Evaluations_API_Reference.md b/Docs/API-related/Evaluations_API_Reference.md index 87f47036c..b2df50735 100644 --- a/Docs/API-related/Evaluations_API_Reference.md +++ b/Docs/API-related/Evaluations_API_Reference.md @@ -923,7 +923,7 @@ openai_model = gpt-4 # Anthropic anthropic_api_key = sk-ant-... -anthropic_model = claude-3-sonnet-20240229 +anthropic_model = claude-sonnet-4.5 # Google google_api_key = ... diff --git a/Docs/API-related/Sandbox_API.md b/Docs/API-related/Sandbox_API.md new file mode 100644 index 000000000..db08bdfa6 --- /dev/null +++ b/Docs/API-related/Sandbox_API.md @@ -0,0 +1,201 @@ +# Sandbox API — Quick Guide (Spec 1.0/1.1) + +This guide summarizes the Sandbox (code interpreter) API with concise examples. The API supports spec 1.0 and 1.1. Version 1.1 is backward‑compatible and adds optional interactivity and resume features. + +Base URL: `/api/v1/sandbox` + +Auth: Standard tldw AuthNZ +- Single user: `X-API-KEY: ` +- Multi user (JWT): `Authorization: Bearer ` + +## Feature discovery +GET `/api/v1/sandbox/runtimes` +Response (example): +``` +{ + "runtimes": [ + { + "name": "docker", + "available": true, + "default_images": ["python:3.11-slim", "node:20-alpine"], + "max_cpu": 4.0, + "max_mem_mb": 8192, + "max_upload_mb": 64, + "max_log_bytes": 10485760, + "queue_max_length": 100, + "queue_ttl_sec": 120, + "workspace_cap_mb": 256, + "artifact_ttl_hours": 24, + "supported_spec_versions": ["1.0", "1.1"], + "interactive_supported": false, + "egress_allowlist_supported": false, + "store_mode": "memory" + } + ] +} +``` + +## Create a session +POST `/api/v1/sandbox/sessions` +Headers: `Idempotency-Key: ` (recommended) +Body (1.0): +``` +{ + "spec_version": "1.0", + "runtime": "docker", + "base_image": "python:3.11-slim", + "timeout_sec": 300 +} +``` +Response: +``` +{ "id": "", "runtime": "docker", "base_image": "python:3.11-slim", "expires_at": null, "policy_hash": "" } +``` + +## Start a run (one‑shot or session) +POST `/api/v1/sandbox/runs` +Headers: `Idempotency-Key: ` (recommended) +Body (1.0): +``` +{ + "spec_version": "1.0", + "runtime": "docker", + "base_image": "python:3.11-slim", + "command": ["python", "-c", "print('hello')"], + "timeout_sec": 60 +} +``` +Body (1.1 additions — optional): +``` +{ + "spec_version": "1.1", + "runtime": "docker", + "base_image": "python:3.11-slim", + "command": ["python", "-c", "input(); print('ok')"], + "timeout_sec": 60, + "interactive": true, + "stdin_max_bytes": 16384, + "stdin_max_frame_bytes": 2048, + "stdin_bps": 4096, + "stdin_idle_timeout_sec": 30, + "resume_from_seq": 100 +} +``` +Response (scaffold example): +``` +{ + "id": "", + "spec_version": "1.1", + "runtime": "docker", + "base_image": "python:3.11-slim", + "phase": "completed", + "exit_code": 0, + "policy_hash": "", + "log_stream_url": "ws://host/api/v1/sandbox/runs//stream?from_seq=100" +} +``` + +## Stream logs (WebSocket) +WS `/api/v1/sandbox/runs/{id}/stream` +- Optional query: `from_seq=` (1.1 resume) +- When signed URLs are enabled, include `token` and `exp` query params. +Frames: +- `{ "type": "event", "event": "start" }` +- `{ "type": "stdout"|"stderr", "encoding": "utf8"|"base64", "data": "...", "seq": 123 }` +- `{ "type": "heartbeat", "seq": 124 }` +- `{ "type": "truncated", "reason": "log_cap", "seq": 125 }` +- `{ "type": "event", "event": "end", "data": {"exit_code": 0}, "seq": 126 }` +- Interactivity (1.1): client→server stdin frames `{ "type": "stdin", "encoding": "utf8"|"base64", "data": "..." }` + +## Artifacts +- List: GET `/api/v1/sandbox/runs/{id}/artifacts` +- Download: GET `/api/v1/sandbox/runs/{id}/artifacts/{path}` + - Supports single HTTP Range only. Use `Range: bytes=start-end` or suffix `bytes=-N`. + - Multiple ranges are not supported; the server returns `416 Range Not Satisfiable` with `Content-Range: bytes */`. + - Responses include `Accept-Ranges: bytes`. A valid partial response includes `206 Partial Content` and `Content-Range: bytes -/`. + +Example: +``` +# First 5 bytes +GET /api/v1/sandbox/runs//artifacts/out.txt +Range: bytes=0-4 + +HTTP/1.1 206 Partial Content +Accept-Ranges: bytes +Content-Range: bytes 0-4/10 +Content-Length: 5 + +01234 + +# Unsupported multi-range +GET /api/v1/sandbox/runs//artifacts/out.txt +Range: bytes=0-1,3-4 + +HTTP/1.1 416 Range Not Satisfiable +Content-Range: bytes */10 +``` + +## Idempotency conflicts +409, example: +``` +{ + "error": { + "code": "idempotency_conflict", + "message": "Idempotency-Key replay with different body", + "details": { "prior_id": "", "key": "", "prior_created_at": "" } + } +} +``` + +## Health +- Authenticated: GET `/api/v1/sandbox/health` (includes store timings and Redis ping) +- Public: GET `/api/v1/sandbox/health/public` (no auth) + +## Egress Policy and DNS Pinning + +Some deployments enforce an egress allowlist for sandboxed runs. The Docker runner supports a deny‑all baseline (network=none) and, when enabled, a granular host‑level allowlist using iptables on the DOCKER-USER chain. + +Utilities exposed in `tldw_Server_API.app.core.Sandbox.network_policy` help you prepare and manage rules: + +- `expand_allowlist_to_targets(raw_allowlist, resolver=..., wildcard_subdomains=("", "www", "api"))` + - Accepts a mix of CIDR (e.g., `10.0.0.0/8`), literal IPs (`8.8.8.8`), hostnames (`example.com`), wildcard prefixes (`*.example.com`), and suffix tokens (`.example.com`). + - Resolves hostnames to A records and promotes to `/32`; returns a de‑duplicated list like `['1.2.3.4/32', '10.0.0.0/8']`. + +- `pin_dns_map(raw_allowlist, resolver=...)` + - Returns a mapping `{ host -> [IPs] }` after resolution for observability/debugging. + +- `refresh_egress_rules(container_ip, raw_allowlist, label, resolver=..., wildcard_subdomains=...)` + - Best‑effort revocation + re‑apply: deletes all rules in DOCKER‑USER containing `label` and applies an updated set of `ACCEPT` rules for resolved targets, followed by a final `DROP` for the container IP. + +Examples: +``` +from tldw_Server_API.app.core.Sandbox.network_policy import ( + expand_allowlist_to_targets, pin_dns_map, refresh_egress_rules +) + +# Allowlist with CIDR, IP, wildcard and suffix tokens +raw = ["10.0.0.0/8", "8.8.8.8", "*.example.com", ".example.org"] +targets = expand_allowlist_to_targets(raw) +# e.g., ['10.0.0.0/8', '8.8.8.8/32', '93.184.216.34/32', ...] + +# Inspect pinned DNS map (for logs/metrics) +pins = pin_dns_map(raw) +# e.g., {'example.com': ['93.184.216.34', ...], 'example.org': ['203.0.113.10', ...]} + +# Apply (or refresh) rules for a given container +apply_specs = refresh_egress_rules( + container_ip="172.18.0.2", + raw_allowlist=raw, + label="tldw-run-", +) +``` + +Notes: +- Suffix tokens (like `.example.com`) behave like wildcards for a few common subdomains plus the apex (configurable). +- If `iptables-restore` is unavailable, the code falls back to iterative `iptables` commands. +- To revoke rules for a finished container, the runner labels and deletes rules by that label. + +## Notes +- Spec versions are validated against server config. Default: `["1.0","1.1"]`. +- Interactivity requires runtime and policy support; fields are ignored otherwise. +- `log_stream_url` may be unsigned; prefer Authorization headers if signed URLs are disabled. diff --git a/Docs/Audio_Streaming_Protocol.md b/Docs/Audio_Streaming_Protocol.md index 98f83209e..16f5756ad 100644 --- a/Docs/Audio_Streaming_Protocol.md +++ b/Docs/Audio_Streaming_Protocol.md @@ -9,6 +9,31 @@ WebSocket Endpoint - Unified endpoint: `/api/v1/audio/stream/transcribe` (primary; includes auth/quotas/fallback) - Core demo endpoint: `/core/parakeet/stream` (portable router; no auth/quotas) +Server-side handler (observability-enabled) +```python +from tldw_Server_API.app.core.Streaming.streams import WebSocketStream + +async def handle_audio_ws(websocket): + # Use labels to tag metrics with low-cardinality identifiers + stream = WebSocketStream( + websocket, + heartbeat_interval_s=10, + idle_timeout_s=120, + compat_error_type=True, # transitional alias for clients expecting error_type + close_on_done=True, + labels={"component": "audio", "endpoint": "audio_ws"}, + ) + await stream.start() + try: + # domain payloads are sent as-is (no event frames) + await stream.send_json({"type": "status", "state": "ready"}) + # ... process messages, emit partial/final results ... + except Exception as e: + await stream.error("internal_error", str(e)) + finally: + await stream.stop() +``` + Config Frame - Send this JSON as the first message to configure the session. All fields are optional unless noted. diff --git a/Docs/Code_Documentation/Chat_Developer_Guide.md b/Docs/Code_Documentation/Chat_Developer_Guide.md index 239eaa6e2..a1fb0ea6f 100644 --- a/Docs/Code_Documentation/Chat_Developer_Guide.md +++ b/Docs/Code_Documentation/Chat_Developer_Guide.md @@ -64,7 +64,7 @@ Related: - At app startup, `main.py` seeds the `provider_manager` from `provider_config.API_CALL_HANDLERS` to avoid drift with the endpoint mappings. Provider selection notes: -- Requests may specify models with a provider prefix (e.g., `anthropic/claude-3-opus`). The endpoint extracts the provider and model automatically. +- Requests may specify models with a provider prefix (e.g., `anthropic/claude-opus-4.1`). The endpoint extracts the provider and model automatically. - Provider fallback is available via `provider_manager`; controlled by `[Chat-Module].enable_provider_fallback` (disabled by default for stability). ### Adding a Provider (Checklist) @@ -85,7 +85,7 @@ Provider selection notes: - `logprobs/top_logprobs` relationships - Tool definitions size limits - Request size limits (`MAX_REQUEST_SIZE`), see `chat_validators.py` - - Model strings with provider prefixes like `anthropic/claude-3-opus` (provider extracted automatically) + - Model strings with provider prefixes like `anthropic/claude-opus-4.1` (provider extracted automatically) - Image inputs on user messages via `image_url` content parts (expects data URI with base64; validated/sanitized) ## Error Handling @@ -160,6 +160,65 @@ Additional endpoint behavior to note: - Non-stream responses include `tldw_conversation_id` in the JSON body for client-side state tracking. - Streaming responses send a `stream_start` event and normalized `data:` deltas; periodic heartbeats keep connections alive; a `stream_end` event is emitted on success. +### Streaming Example (Unified SSE with Metrics Labels) + +When using the unified streaming abstraction, instantiate `SSEStream` with optional labels to tag emitted metrics (low-cardinality keys like `component` and `endpoint` are recommended): + +```python +from fastapi.responses import StreamingResponse +from tldw_Server_API.app.core.Streaming.streams import SSEStream + +async def chat_stream_endpoint(): + stream = SSEStream( + heartbeat_interval_s=10, + heartbeat_mode="data", + labels={"component": "chat", "endpoint": "chat_stream"}, + ) + + async def gen(): + # feed stream in background (e.g., provider-normalized lines or deltas) + async for line in stream.iter_sse(): + yield line + + headers = {"Cache-Control": "no-cache", "X-Accel-Buffering": "no"} + return StreamingResponse(gen(), media_type="text/event-stream", headers=headers) +``` + +### Provider Control Pass-through (Advanced) + +Some providers emit meaningful SSE control lines (e.g., `event: ...`, `id: ...`, `retry: ...`). By default, normalization drops these. When clients or adapters depend on them, enable pass-through per endpoint and optionally filter/rename controls: + +```python +from fastapi.responses import StreamingResponse +from tldw_Server_API.app.core.Streaming.streams import SSEStream + +def _control_filter(name: str, value: str): + # Example: rename event to a standard value; drop ids + if name.lower() == "event": + return ("event", "provider_event") + if name.lower() == "id": + return None + return (name, value) + +async def chat_stream_passthru(): + stream = SSEStream( + heartbeat_interval_s=10, + provider_control_passthru=True, + control_filter=_control_filter, + labels={"component": "chat", "endpoint": "chat_stream"}, + ) + + async def gen(): + async for line in stream.iter_sse(): + yield line + + return StreamingResponse(gen(), media_type="text/event-stream", headers={ + "Cache-Control": "no-cache", + "X-Accel-Buffering": "no", + }) +``` + + ## Rate Limiting - Global SlowAPI middleware (production) provides coarse IP-based limits. diff --git a/Docs/Code_Documentation/Jobs_Module.md b/Docs/Code_Documentation/Jobs_Module.md index af6dad6bb..73c1cc4e4 100644 --- a/Docs/Code_Documentation/Jobs_Module.md +++ b/Docs/Code_Documentation/Jobs_Module.md @@ -268,17 +268,17 @@ jm.fail_job(job["id"], error="boom", retryable=True, worker_id=worker_id, lease_ - The repository ships a `docker-compose.yml` with a `postgres` service. To run Jobs on Postgres when using Compose: - Set the DSN using the `postgres` service hostname inside the Compose network: - - `export JOBS_DB_URL=postgresql://tldw_user:ChangeMeStrong123!@postgres:5432/tldw_users` + - `export JOBS_DB_URL=postgresql://tldw_user:TestPassword123!@postgres:5432/tldw_users` - Start services: - `docker compose up --build` - From your host, you can also connect via the published port: - - `export JOBS_DB_URL=postgresql://tldw_user:ChangeMeStrong123!@localhost:5432/tldw_users` + - `export JOBS_DB_URL=postgresql://tldw_user:TestPassword123!@localhost:5432/tldw_users` - The Jobs manager will auto-provision the schema on first use. ### Running Postgres Jobs tests - Ensure a Postgres instance is available (e.g., via Compose above) and set one of: - - `export JOBS_DB_URL=postgresql://tldw_user:ChangeMeStrong123!@localhost:5432/tldw_users` + - `export JOBS_DB_URL=postgresql://tldw_user:TestPassword123!@localhost:5432/tldw_users` - or `export POSTGRES_TEST_DSN=postgresql://...` - Run only PG-marked Jobs tests: - `python -m pytest -m "pg_jobs" -v tldw_Server_API/tests/Jobs` diff --git a/Docs/Conventions/README_TEMPLATE.md b/Docs/Conventions/README_TEMPLATE.md new file mode 100644 index 000000000..47e91053a --- /dev/null +++ b/Docs/Conventions/README_TEMPLATE.md @@ -0,0 +1,43 @@ +# + +Note: This is a scaffold template. Replace placeholders and examples with accurate details from the module’s implementation and tests. + +## 1. Descriptive of Current Feature Set + +- Purpose: One sentence explaining what this module does and why it exists. +- Capabilities: Bullet list of current features users can rely on. +- Inputs/Outputs: Key input types, artifacts produced, and any streams. +- Related Endpoints: Link primary API routes and files (e.g., `tldw_Server_API/app/api/v1/endpoints/.py:1`). +- Related Schemas: Link Pydantic models used for requests/responses. + +## 2. Technical Details of Features + +- Architecture & Data Flow: Brief overview of components, control flow, and boundaries. +- Key Classes/Functions: Entry points and where to start reading code. +- Dependencies: Internal modules and external SDKs/services; feature flags if any. +- Data Models & DB: Tables/collections (via `DB_Management`); migrations and indices. +- Configuration: Env vars and config keys, defaults, and precedence. +- Concurrency & Performance: Async/threading, batching, caching, rate limits. +- Error Handling: Custom exceptions, retries/backoff, failure modes. +- Security: AuthNZ, permissions, input validation, safe file handling. + +## 3. Developer-Related/Relevant Information for Contributors + +- Folder Structure: Subpackages and responsibilities. +- Extension Points: How to add a provider/feature safely; registration points. +- Coding Patterns: DI conventions, logging via loguru, rate limiting patterns. +- Tests: Test locations, fixtures to reuse, how to add unit/integration tests. +- Local Dev Tips: Quick start, example invocations, dummy configs. +- Pitfalls & Gotchas: Known edge cases and performance traps. +- Roadmap/TODOs: Short list of near-term improvements. + +--- + +Example Quick Start (optional) + +```python +# Minimal example showing primary entry point +# from tldw_Server_API.app.core. import SomeClass +# svc = SomeClass(...) +# result = svc.run(...) +``` diff --git a/Docs/Deployment/First_Time_Production_Setup.md b/Docs/Deployment/First_Time_Production_Setup.md index eefbb83f1..dc5f3f320 100644 --- a/Docs/Deployment/First_Time_Production_Setup.md +++ b/Docs/Deployment/First_Time_Production_Setup.md @@ -52,7 +52,7 @@ cp .env.example .env # Required values (examples) export AUTH_MODE=multi_user export JWT_SECRET_KEY="$(openssl rand -base64 64)" -export DATABASE_URL="postgresql://tldw_user:ChangeMeStrong123!@postgres:5432/tldw_users" +export DATABASE_URL="postgresql://tldw_user:TestPassword123!@postgres:5432/tldw_users" # Strong single-user key if you use single_user mode instead export SINGLE_USER_API_KEY="$(python -c "import secrets;print(secrets.token_urlsafe(32))")" diff --git a/Docs/Deployment/Monitoring/Alerts/README.md b/Docs/Deployment/Monitoring/Alerts/README.md index 95bce721d..0e0a5dd02 100644 --- a/Docs/Deployment/Monitoring/Alerts/README.md +++ b/Docs/Deployment/Monitoring/Alerts/README.md @@ -28,6 +28,12 @@ Recommended PromQL (examples) - RAG reranker budget exhaustions: `sum(rate(rag_reranker_llm_budget_exhausted_total[5m]))` - RAG reranker exceptions: `sum(rate(rag_reranker_llm_exceptions_total[5m]))` +## Redis Failover Alerts + +- Unexpected Redis fallback (any): `sum(rate(infra_redis_fallback_total[5m])) > 0` + - Fire on any non-zero rate to catch silent failover to in-memory stub. + - Investigate connectivity, DNS, ACLs, or cluster health. In `RG_BACKEND=redis` with `RG_REDIS_FAIL_MODE=fail_closed`, the app now fails fast at boot if Redis is unreachable. + ## AuthNZ Security Alerts The AuthNZ scheduler now emits structured security alerts (auth failure spikes, rate-limit storms). To deliver them: diff --git a/Docs/Deployment/Monitoring/README.md b/Docs/Deployment/Monitoring/README.md index 67250e105..8fd2a00db 100644 --- a/Docs/Deployment/Monitoring/README.md +++ b/Docs/Deployment/Monitoring/README.md @@ -11,6 +11,12 @@ Dashboards (JSON): - `security-dashboard.json` - HTTP status, p95 latency, headers, quotas, uploads - `rag-reranker-dashboard.json` - RAG reranker guardrails (timeouts, exceptions, budget, docs scored) - `rag-quality-dashboard.json` - Nightly eval faithfulness/coverage trends (dataset-labeled) +- `streaming-dashboard.json` - Streaming observability (SSE/WS): latencies, idle timeouts, ping failures, SSE queue depth + - `Grafana_Streaming_Basics.json` now also includes an HTTP Client row with: + - Egress denials (5m) by reason: `http_client_egress_denials_total` + - Retries (5m) by reason: `http_client_retries_total` + - Panels are pre-wired for a Prometheus datasource UID `prometheus`. + - Persona WS series appear with labels `{component: persona, endpoint: persona_ws, transport: ws}` and show up in the WS panels (send latency, pings, idle timeouts). Exemplars - Redacted payload exemplars for debugging failed adaptive checks are written to `Databases/observability/rag_payload_exemplars.jsonl` by default. @@ -23,6 +29,16 @@ Notes - See `Metrics_Cheatsheet.md` for metrics catalog, PromQL, and provisioning. - Environment variables reference (telemetry, Prometheus/Grafana): `../../Env_Vars.md` +Tracing quick check (OTLP) +- Enable tracing exporters: + - `export ENABLE_TRACING=true` + - `export OTEL_TRACES_EXPORTER=console,otlp` + - `export OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317` + - Optional: `export OTEL_EXPORTER_OTLP_INSECURE=true` +- Run the server and perform a request that triggers outbound HTTP (e.g., RAG provider call). +- Verify traces in your collector/Jaeger; outbound calls use span name `http.client` with attributes `http.method`, `net.host.name`, `url.full`, and `http.status_code`. +- Providers that support `traceparent` will receive the header injected by the HTTP client. + Provisioning - Example provisioning files: `Samples/Grafana/provisioning/*` - Map this directory into `/var/lib/grafana/dashboards` to auto-load all dashboards. @@ -34,3 +50,22 @@ Nightly Quality Evaluations - Enable scheduler: `RAG_QUALITY_EVAL_ENABLED=true` (interval via `RAG_QUALITY_EVAL_INTERVAL_SEC`). - Dataset: `Docs/Deployment/Monitoring/Evals/nightly_rag_eval.jsonl` (override with `RAG_QUALITY_EVAL_DATASET`). - Metrics: `rag_eval_faithfulness_score{dataset=...}`, `rag_eval_coverage_score{dataset=...}`, `rag_eval_last_run_timestamp{dataset=...}`. + +## Reverse Proxy Heartbeats (SSE) + +When running behind reverse proxies/CDNs (NGINX, Caddy, Cloudflare), comment-based SSE heartbeats (`":"`) can be buffered and delay delivery. For more reliable flushing: + +- Prefer data-mode heartbeats in the server: + - `export STREAM_HEARTBEAT_MODE=data` + - Optionally shorten for dev/tests: `export STREAM_HEARTBEAT_INTERVAL_S=5` +- Disable proxy buffering on SSE routes: + - NGINX location example: + ```nginx + location /api/ { + proxy_buffering off; + proxy_http_version 1.1; + chunked_transfer_encoding on; + proxy_set_header Connection ""; # HTTP/2 ignores Connection; harmless in HTTP/1.1 + } + ``` +- For HTTP/2, do not rely on `Connection: keep-alive`; instead ensure buffering is off and the upstream emits periodic data heartbeats. diff --git a/Docs/Design/Browser_Extension.md b/Docs/Design/Browser_Extension.md index 9b9eebafc..5c7a371a1 100644 --- a/Docs/Design/Browser_Extension.md +++ b/Docs/Design/Browser_Extension.md @@ -1,8 +1,542 @@ -# Browser Extension +# Browser Extension — PRD (Compat v0.1) +Status: Active +Owner: Server/API + WebUI +Updated: 2025-11-03 +Purpose +- This PRD defines the product and technical contract for the tldw_server browser extension as it integrates with the current backend. It is not a greenfield extension spec; it codifies compatibility requirements, endpoints, security posture, and UX flows so the existing extension can be brought to parity with the server. -### Link Dump: -https://github.com/josStorer/chatGPTBox -https://github.com/navedmerchant/MyDeviceAI -https://github.com/Aletech-Solutions/XandAI-Extension +Summary +- Provide a light, secure capture-and-interact surface that talks to tldw_server: chat, RAG search, reading capture, media ingest (URL/process), and audio (STT/TTS). Aligns with server AuthNZ (single-user API key and multi-user JWT) and uses a background proxy for all network I/O. + +Goals +- Backend compatibility with current server APIs (Chat, RAG, Media, Reading, Audio, LLM models/providers). +- Minimal-permissions extension with background-only header injection. +- Reliable streaming (SSE) and WS STT handling in MV3 background. +- Basic UX: popup/sidepanel chat, quick capture, context-menu actions. + +Non-Goals (initial) +- Headless JS rendering for JS-heavy sites; authenticated/session scraping. +- Public/social sharing; multi-tenant cloud distribution. +- Complex workflow editing inside the extension. + +Personas +- Researcher/Analyst: search + summarize, capture links for later reading. +- Power User: model selection, quick ingest, audio utilities. +- Casual User: quick save + simple chat. + +Success Metrics +- Connection success rate to configured server; auth error rate. +- Chat stream completion rate and cancel latency (<200ms average). +- RAG query success; ingest success vs. validation failures. +- STT/TTS success rates; WS connection stability. + +Scope (MVP → v1) +- MVP: + - Chat: POST /api/v1/chat/completions (non-stream + stream) + - RAG: POST /api/v1/rag/search (+ /search/stream for previews) + - Reading: POST /api/v1/reading/save, GET /api/v1/reading/items + - Media: POST /api/v1/media/add; process-only via /api/v1/media/process-* + - STT: POST /api/v1/audio/transcriptions; WS /api/v1/audio/stream/transcribe + - TTS: POST /api/v1/audio/speech +- v1: + - Models/providers browser (GET /api/v1/llm/{models,models/metadata,providers}) + - Optional Notes/Prompts basic flows; output toasts for ingest/results + +Endpoint Mapping (server truth) +- Diagnostics + - GET / (root info) + - GET /api/v1/health + - GET /api/v1/health/live + - GET /api/v1/health/ready +- Chat + - POST /api/v1/chat/completions +- RAG + - POST /api/v1/rag/search + - POST /api/v1/rag/search/stream +- Items (unified list) + - GET /api/v1/items +- Media (process-only; no DB persistence) + - POST /api/v1/media/process-videos + - POST /api/v1/media/process-audios + - POST /api/v1/media/process-pdfs + - POST /api/v1/media/process-ebooks + - POST /api/v1/media/process-documents + - POST /api/v1/media/process-web-scraping +- Media (persist) + - POST /api/v1/media/add +- Reading + - POST /api/v1/reading/save + - GET /api/v1/reading/items + - PATCH /api/v1/reading/items/{item_id} + - Highlights (v1 optional): + - POST /api/v1/reading/items/{item_id}/highlight + - GET /api/v1/reading/items/{item_id}/highlights + - PATCH /api/v1/reading/highlights/{highlight_id} + - DELETE /api/v1/reading/highlights/{highlight_id} +- Notes (optional v1 scope) + - Notes core + - POST /api/v1/notes/ (create) + - GET /api/v1/notes/ (list; limit/offset) + - GET /api/v1/notes/{note_id} (get) + - PATCH /api/v1/notes/{note_id} (update; requires header expected-version) + - PUT /api/v1/notes/{note_id} (update variant; requires header expected-version) + - DELETE /api/v1/notes/{note_id} (soft delete; requires header expected-version) + - GET /api/v1/notes/search/ (search?query=...) + - Keywords and links + - POST /api/v1/notes/keywords/ (create keyword) + - GET /api/v1/notes/keywords/ (list keywords) + - GET /api/v1/notes/keywords/{keyword_id} (get keyword) + - GET /api/v1/notes/keywords/text/{text} (lookup by text) + - GET /api/v1/notes/keywords/search/ (search keywords) + - POST /api/v1/notes/{note_id}/keywords/{keyword_id} (link) + - DELETE /api/v1/notes/{note_id}/keywords/{keyword_id} (unlink) + - GET /api/v1/notes/{note_id}/keywords/ (list keywords on note) + - GET /api/v1/notes/keywords/{keyword_id}/notes/ (list notes for keyword) +- Prompts (optional v1 scope) + - Core + - GET /api/v1/prompts (list) + - POST /api/v1/prompts (create) + - GET /api/v1/prompts/{prompt_id} (get) + - PUT /api/v1/prompts/{prompt_id} (update) + - DELETE /api/v1/prompts/{prompt_id} (delete) + - POST /api/v1/prompts/search (search) + - GET /api/v1/prompts/export (export) + - Keywords + - POST /api/v1/prompts/keywords/ (add keyword) + - GET /api/v1/prompts/keywords/ (list keywords) + - DELETE /api/v1/prompts/keywords/{keyword_text} (delete keyword) +- Audio + - POST /api/v1/audio/transcriptions + - WS /api/v1/audio/stream/transcribe (token query param) + - POST /api/v1/audio/speech + - GET /api/v1/audio/voices/catalog (voice listing) +- Flashcards (optional v1 scope) + - Decks + - POST /api/v1/flashcards/decks (create deck) + - GET /api/v1/flashcards/decks (list decks; limit/offset) + - Cards + - POST /api/v1/flashcards (create card) + - POST /api/v1/flashcards/bulk (bulk create) + - GET /api/v1/flashcards (list; deck_id/tag/q/due_status filters) + - GET /api/v1/flashcards/id/{uuid} (get by uuid) + - PATCH /api/v1/flashcards/{uuid} (update; expected_version in body) + - DELETE /api/v1/flashcards/{uuid} (delete; expected_version query) + - PUT /api/v1/flashcards/{uuid}/tags (replace tags) + - GET /api/v1/flashcards/{uuid}/tags (list tags) + - Import/Export/Review + - POST /api/v1/flashcards/import (TSV/CSV import; admin caps opt) + - GET /api/v1/flashcards/export (CSV or APKG; deck/tag filters) + - POST /api/v1/flashcards/review (spaced-rep review submission) +- LLM Discovery + - GET /api/v1/llm/models + - GET /api/v1/llm/models/metadata + - GET /api/v1/llm/providers +- Chats (resource model; optional v1 scope) + - /api/v1/chats/* (create/list/get/update/delete sessions; messages CRUD; complete/stream where available) + +Watchlists (v1 optional) +- Sources + - POST /api/v1/watchlists/sources (create) + - GET /api/v1/watchlists/sources (list) + - GET /api/v1/watchlists/sources/export (export OPML) + - POST /api/v1/watchlists/sources/import (import OPML) + - GET /api/v1/watchlists/sources/{id} (get) + - PATCH/DELETE /api/v1/watchlists/sources/{id} (update/delete) +- Tags & Groups + - GET /api/v1/watchlists/tags (list tags) + - POST /api/v1/watchlists/groups (create group) + - GET /api/v1/watchlists/groups (list groups) + - PATCH/DELETE /api/v1/watchlists/groups/{id} (update/delete) +- Jobs + - POST /api/v1/watchlists/jobs (create) + - GET /api/v1/watchlists/jobs (list) + - GET /api/v1/watchlists/jobs/{id} (get) + - PATCH/DELETE /api/v1/watchlists/jobs/{id} (update/delete) + - POST /api/v1/watchlists/jobs/{id}/filters:add (append filters) + - PATCH /api/v1/watchlists/jobs/{id}/filters (replace filters) + - POST /api/v1/watchlists/jobs/{id}/preview (dry-run preview) + - POST /api/v1/watchlists/jobs/{id}/run (trigger run) +- Runs + - GET /api/v1/watchlists/jobs/{id}/runs (list by job) + - GET /api/v1/watchlists/runs (list all) + - GET /api/v1/watchlists/runs/{run_id} (get) + - GET /api/v1/watchlists/runs/{run_id}/details (stats + logs) + - GET /api/v1/watchlists/runs/{run_id}/tallies.csv (filter tallies) +- Items & Outputs + - GET /api/v1/watchlists/items (list scraped items; filters) + - GET /api/v1/watchlists/items/{item_id} (get) + - PATCH /api/v1/watchlists/items/{item_id} (update flags) + - POST /api/v1/watchlists/outputs (render output) + - GET /api/v1/watchlists/outputs (list outputs) + - GET /api/v1/watchlists/outputs/{id} (get output metadata) + - GET /api/v1/watchlists/outputs/{id}/download (download) +- Templates + - GET /api/v1/watchlists/templates (list) + - GET /api/v1/watchlists/templates/{name} (get) + - POST /api/v1/watchlists/templates (create/update) + - DELETE /api/v1/watchlists/templates/{name} (delete) + +Schema Notes +- Notes optimistic concurrency + - Update: `PATCH /api/v1/notes/{id}` or `PUT /api/v1/notes/{id}` requires header `expected-version: `. + - Delete: `DELETE /api/v1/notes/{id}` requires header `expected-version: `. + - On version mismatch: returns 409 conflict with details; clients should reload and retry. +- Flashcards import limits + - Environment caps: `FLASHCARDS_IMPORT_MAX_LINES` (default 10000), `FLASHCARDS_IMPORT_MAX_LINE_LENGTH` (default 32768 bytes), `FLASHCARDS_IMPORT_MAX_FIELD_LENGTH` (default 8192 bytes). + - Optional query overrides (admin only): `max_lines`, `max_line_length`, `max_field_length` can lower (not raise) env caps. + - Formats: TSV/CSV (default tab delimiter). Fields include Deck, Front, Back, Notes, Extra, ModelType (basic|basic_reverse|cloze), Reverse (bool), Tags (comma/semicolon separated). + +Example Requests +- Chat (streaming) + - Request: `POST /api/v1/chat/completions` with JSON body including `stream: true`. + - Example body: + `{ "model": "openai/gpt-4o-mini", "stream": true, "messages": [{"role":"user","content":"Summarize https://example.com"}] }` + - Headers: `Accept: text/event-stream` for SSE; server emits NDJSON/SSE lines ending with `[DONE]`. + - Expected response (stream lines): + - `data: {"id":"...","object":"chat.completion.chunk","choices":[{"delta":{"role":"assistant","content":"Hello"}}]}` + - `data: {"choices":[{"delta":{"content":" world"}}]}` + - `data: [DONE]` +- RAG (streaming) + - Request: `POST /api/v1/rag/search/stream` + - Body minimal: `{ "query": "impact of CRISPR on gene therapy", "enable_generation": true, "top_k": 5 }` + - Stream events: `delta` (answer tokens), optional `claims_overlay`, and final summary. Content-type `application/x-ndjson` or SSE with `data:` lines. + - Expected response (ndjson lines): + - `{ "event": "delta", "data": { "content": "Genome editing ..." } }` + - `{ "event": "claims_overlay", "data": { "citations": [{"url":"...","span":[12,34]}] } }` + - `{ "event": "done" }` +- Media add (persist) + - `POST /api/v1/media/add` with JSON `{ "url": "https://example.com/article" }` + - Expected response (shape): + - `{ "results": [ { "status": "Success", "input_ref": "https://...", "media_type": "site", "db_id": 456, "message": "Media added to database.", "summary": "..." } ] }` +- Media process (no DB) + - JSON URL: `POST /api/v1/media/process-pdfs` with `{ "urls": ["https://host/file.pdf"] }` + - File upload: multipart to `/api/v1/media/process-pdfs` with `files=@/path/file.pdf`. + - Expected response (shape): + - `{ "processed_count": 1, "errors_count": 0, "errors": [], "results": [ { "status": "Success", "input_ref": "https://.../file.pdf", "media_type": "pdf", "content": "...", "chunks": [ ... ] } ] }` +- STT (multipart) + - `POST /api/v1/audio/transcriptions` + - Fields: `file=@/path/audio.wav`, `model=whisper-1`, optional `language=en`, `response_format=json`. + - Example cURL: `curl -X POST "$BASE/api/v1/audio/transcriptions" -H "Authorization: Bearer TOKEN" -F "file=@/abs/audio.wav" -F "model=whisper-1" -F "language=en"` + - Expected response (json): + - `{ "text": "hello world", "language": "en", "segments": [ {"start":0.0,"end":0.8,"text":"hello"}, {"start":0.8,"end":1.5,"text":"world"} ] }` + +- Reading — save & list + - Save current tab: `POST /api/v1/reading/save` + - Body: + `{ + "url": "https://example.com/ai/rag-intro", + "title": "RAG Intro", + "tags": ["ai","rag"], + "status": "saved", + "favorite": false + }` + - Expected response (ReadingItem): + `{ + "id": 1456, + "media_id": 8123, + "title": "RAG Intro", + "url": "https://example.com/ai/rag-intro", + "domain": "example.com", + "summary": null, + "published_at": null, + "status": "saved", + "favorite": false, + "tags": ["ai","rag"], + "created_at": "2025-10-19T08:00:10Z", + "updated_at": "2025-10-19T08:00:10Z" + }` + - List items: `GET /api/v1/reading/items?status=saved&tags=ai&page=1&size=20` + - Expected response (ReadingItemsListResponse): + `{ + "items": [ { "id": 1456, "title": "RAG Intro", "url": "https://example.com/ai/rag-intro", "status": "saved", "favorite": false, "tags": ["ai","rag"], "created_at": "..." } ], + "total": 1, + "page": 1, + "size": 20 + }` + + - Update item: `PATCH /api/v1/reading/items/{item_id}` + - Body: `{ "status": "reading", "favorite": true, "tags": ["ai","rag","priority"] }` + - Expected response (ReadingItem): + `{ "id": 1456, "title": "RAG Intro", "status": "reading", "favorite": true, "tags": ["ai","rag","priority"], "updated_at": "2025-10-19T09:15:00Z" }` + + - List filters (query param variants): + - Multi-filter: `GET /api/v1/reading/items?status=saved&status=reading&tags=ai&tags=ml&favorite=true&q=vector%20search&domain=example.com&page=2&size=50` + - Text search only: `GET /api/v1/reading/items?q=rag&page=1&size=10` + - Tag filter only: `GET /api/v1/reading/items?tags=ai` + - Notes: + - Repeat `status` and `tags` keys to pass multiple values (FastAPI parses as list). + - `favorite` accepts `true|false`. + - `status` allowed values: `saved|reading|read|archived`. + - cURL examples: + - Multi-filter: + `curl -sS -H "Authorization: Bearer $TOKEN" "$BASE/api/v1/reading/items?status=saved&status=reading&tags=ai&tags=ml&favorite=true&q=vector%20search&domain=example.com&page=2&size=50"` + - Text search only: + `curl -sS -H "Authorization: Bearer $TOKEN" "$BASE/api/v1/reading/items?q=rag&page=1&size=10"` + - Tag filter only: + `curl -sS -H "Authorization: Bearer $TOKEN" "$BASE/api/v1/reading/items?tags=ai"` + + - Minimal PATCH examples (single-field updates): + - Toggle favorite: `PATCH /api/v1/reading/items/{id}` body `{ "favorite": true }` + - Update tags only: `PATCH /api/v1/reading/items/{id}` body `{ "tags": ["ai","priority"] }` + - Update status only: `PATCH /api/v1/reading/items/{id}` body `{ "status": "read" }` + - cURL (PATCH): + - Toggle favorite: + `curl -sS -X PATCH "$BASE/api/v1/reading/items/1456" -H "Authorization: Bearer $TOKEN" -H "Content-Type: application/json" -d '{"favorite": true}'` + - Update tags: + `curl -sS -X PATCH "$BASE/api/v1/reading/items/1456" -H "Authorization: Bearer $TOKEN" -H "Content-Type: application/json" -d '{"tags": ["ai","priority"]}'` + - Update status: + `curl -sS -X PATCH "$BASE/api/v1/reading/items/1456" -H "Authorization: Bearer $TOKEN" -H "Content-Type: application/json" -d '{"status": "read"}'` + +- Reading — highlights + - Create highlight: `POST /api/v1/reading/items/{item_id}/highlight` + - Example body: + `{ + "item_id": 456, + "quote": "The mitochondrion is the powerhouse of the cell.", + "start_offset": 128, + "end_offset": 178, + "color": "yellow", + "note": "Key definition", + "anchor_strategy": "fuzzy_quote" + }` + - Expected response (Highlight): + `{ "id": 1001, "item_id": 456, "quote": "The mitochondrion is the powerhouse of the cell.", "start_offset":128, "end_offset":178, "color":"yellow", "note":"Key definition", "created_at":"2025-10-19T08:00:10Z", "anchor_strategy":"fuzzy_quote", "content_hash_ref": "sha256:...", "context_before": "... power...", "context_after": "... cell ...", "state": "active" }` + - List highlights for item: `GET /api/v1/reading/items/{item_id}/highlights` + - Expected response (array of Highlight): + `[{ "id": 1001, "item_id": 456, "quote": "...", "color":"yellow", "note":"Key definition", "created_at":"2025-10-19T08:00:10Z", "anchor_strategy":"fuzzy_quote", "state":"active" }]` + - Update highlight: `PATCH /api/v1/reading/highlights/{highlight_id}` + - Body: `{ "note": "Refined takeaway", "color": "green", "state": "active" }` + - Expected response: updated Highlight object + - Delete highlight: `DELETE /api/v1/reading/highlights/{highlight_id}` → `{ "success": true }` + +- Notes — keyword link/unlink + - Precondition: a note exists (`note_id` is a UUID string) and a keyword exists (`keyword_id` is an integer). Create keyword with `POST /api/v1/notes/keywords/` body `{ "keyword": "biology" }` if needed. + - Link keyword to note: `POST /api/v1/notes/{note_id}/keywords/{keyword_id}` + - Expected response: `{ "success": true, "message": "Note linked to keyword successfully." }` + - List keywords on note: `GET /api/v1/notes/{note_id}/keywords/` + - Expected response: + `{ "note_id": "a3f0...", "keywords": [ { "id": 17, "keyword": "biology", "created_at": "2025-10-18T07:01:02Z", "last_modified": "2025-10-18T07:01:02Z", "version": 1, "client_id": "api_client", "deleted": false } ] }` + - List notes for a keyword: `GET /api/v1/notes/keywords/{keyword_id}/notes/?limit=50&offset=0` + - Expected response (shape): `{ "keyword_id": 17, "notes": [ { "id": "a3f0...", "title": "Mitochondria", "version": 3, "deleted": false, "keywords": [ {"id":17, "keyword": "biology", ...} ] } ] }` + - Unlink keyword from note: `DELETE /api/v1/notes/{note_id}/keywords/{keyword_id}` + - Expected response: `{ "success": true, "message": "Note unlinked from keyword successfully." }` + - Errors: `404 { "detail": "Note with ID '...' not found." }`, `404 { "detail": "Keyword with ID '...' not found." }` + +- Prompts — search + - Request: `POST /api/v1/prompts/search?search_query=embedding&search_fields=name&search_fields=details&page=1&results_per_page=10` + - Expected response (PromptSearchResponse): + `{ + "items": [ + { + "id": 12, + "uuid": "c9d3...", + "name": "Dense Retrieval Prompt", + "author": "alice", + "details": "Guidelines for embedding-based retrieval...", + "system_prompt": "You are a helpful...", + "user_prompt": "Given the query ...", + "keywords": ["retrieval","embedding"], + "last_modified": "2025-10-18T10:00:00Z", + "version": 4, + "deleted": false, + "relevance_score": 0.91 + } + ], + "total_matches": 3, + "page": 1, + "per_page": 10 + }` + +- Prompts — export + - Request (CSV): `GET /api/v1/prompts/export?export_format=csv&filter_keywords=retrieval&include_system=true&include_user=true&include_details=false&include_author=true&include_associated_keywords=true` + - Request (Markdown): `GET /api/v1/prompts/export?export_format=markdown&markdown_template_name=Basic%20Template` + - Expected response (ExportResponse): + `{ + "message": "Export successful (2 prompts)", + "file_content_b64": "UE5HLE5hbWUsQXV0aG9yLk4uLi4=" + }` + +- Prompts — keywords export (CSV) + - Request: `GET /api/v1/prompts/keywords/export-csv` + - Expected response (ExportResponse): + `{ + "message": "Successfully exported 12 active prompt keywords", + "file_content_b64": "a2V5d29yZCxwcm9tcHRzX2NvdW50XG5SQUcsMTA..." + }` + +- Watchlists — generate output + - Request: `POST /api/v1/watchlists/outputs` + - Example body: + `{ + "run_id": 123, + "item_ids": [1001, 1002, 1007], + "title": "Daily Tech Briefing", + "type": "briefing_markdown", + "format": "md", + "template_name": "daily_md", + "temporary": true, + "deliveries": { + "email": { + "enabled": true, + "recipients": ["me@example.com"], + "attach_file": true, + "body_format": "auto" + }, + "chatbook": { + "enabled": true, + "title": "Tech Briefing", + "description": "Auto-generated from watchlist run 123" + } + } + }` + - Notes: omit `item_ids` to include all ingested items for the run. + - Expected response (WatchlistOutput): + - `{ "id": 9001, "run_id": 123, "job_id": 77, "type": "briefing_markdown", "format": "md", "title": "Daily Tech Briefing", "content": "# Daily Tech...", "metadata": { "item_count": 3, "template_name": "daily_md" }, "version": 2, "expires_at": "2025-10-20T08:00:00Z", "created_at": "2025-10-19T08:00:10Z" }` + +- Watchlists — list and download outputs + - List: `GET /api/v1/watchlists/outputs?run_id=123&page=1&size=50` + - Get metadata: `GET /api/v1/watchlists/outputs/{output_id}` (returns `format`, `title`, `expires_at`, etc.) + - Download: `GET /api/v1/watchlists/outputs/{output_id}/download` + - Content-Disposition filename uses title and `.{md|html}` based on `format`. + - Expected list response: + - `{ "items": [ { "id": 9001, "run_id": 123, "format": "md", "title": "Daily Tech Briefing", "expired": false, "created_at": "..." } ], "total": 1 }` + +- Flashcards — import TSV/CSV + - Request: `POST /api/v1/flashcards/import` + - Body (JSON): + `{ + "delimiter": "\t", + "has_header": true, + "content": "Deck\tFront\tBack\tTags\tNotes\nDefault\tWhat is RAG?\tRetrieval-Augmented Generation\tAI;RAG\tcore concept\nDefault\tCloze example {{c1::mask}}\t\tcloze;example\t" + }` + - Response: `{ "imported": N, "items": [{"uuid":"...","deck_id":1}, ...], "errors": [...] }` + - Limits: see “Flashcards import limits” in Schema Notes. + - Sample error entries in `errors`: + - `{ "line": null, "error": "Maximum import line limit reached (10000)" }` + - `{ "index": 3, "error": "Field too long: Front (> 8192 bytes)" }` + - `{ "index": 7, "error": "Invalid cloze: Front must contain one or more {{cN::...}} patterns" }` + +- Flashcards — APKG export + - Request (CSV): `GET /api/v1/flashcards/export?deck_id=1&format=csv&include_header=true&delimiter=%09` + - Request (APKG): `GET /api/v1/flashcards/export?deck_id=1&format=apkg` + - Example cURL: `curl -L "$BASE/api/v1/flashcards/export?deck_id=1&format=apkg" -H "Authorization: Bearer $TOKEN" -o deck.apkg` + - Expected response (APKG): + - Binary stream; headers include `Content-Type: application/octet-stream` and `Content-Disposition: attachment; filename=".apkg"`. + +Sample Error Responses +- Watchlists outputs (POST /watchlists/outputs) + - `404 { "detail": "run_not_found" }` + - `404 { "detail": "job_not_found" }` + - `400 { "detail": "items_must_belong_to_run" }` + - `400 { "detail": "no_items_available" }` + - `400 { "detail": "invalid_template_name" }` + - `404 { "detail": "template_not_found" }` + - `400 { "detail": "invalid_format" }` +- Notes update/delete without correct version header + - `409 { "detail": "version_conflict" }` + +- Watchlists — create job + - Request: `POST /api/v1/watchlists/jobs` + - Example body: + `{ + "name": "Tech Daily", + "description": "Top tech headlines", + "scope": {"sources": [1,2], "groups": [10], "tags": ["ai","ml"]}, + "schedule_expr": "0 8 * * *", + "timezone": "UTC+8", + "active": true, + "max_concurrency": 4, + "per_host_delay_ms": 1500, + "output_prefs": {"template": "daily_md", "retention_days": 7}, + "job_filters": { + "filters": [ + {"type": "keyword", "action": "include", "value": {"terms": ["AI","LLM"], "scope": "title"}, "priority": 1}, + {"type": "regex", "action": "exclude", "value": {"pattern": "(?i)rumor|sponsored"} } + ], + "require_include": true + } + }` + +- Watchlists — preview candidates (no ingest) + - Request: `POST /api/v1/watchlists/jobs/{job_id}/preview?limit=20&per_source=10` + - Example response (shape): + `{ + "items": [ + {"source_id": 1, "source_type": "rss", "url": "https://...", "title": "...", "summary": "...", "decision": "ingest", "matched_action": "include"}, + {"source_id": 2, "source_type": "site", "url": "https://...", "title": "...", "summary": "...", "decision": "filtered", "matched_action": "exclude", "matched_filter_key": "regex:rumor"} + ], + "total": 25, + "ingestable": 12, + "filtered": 13 + }` + +- Notes — optimistic concurrency (error) + - Update requires header `expected-version: `; stale version triggers 409. + - Example request: `PATCH /api/v1/notes/{id}` with body `{ "content": "New text" }` and header `expected-version: 3`. + - Example 409 response: + `{ "detail": "version_conflict" }` + - Clients should re-fetch the note, read the current `version`, and retry with the latest value. + + +AuthNZ & Headers +- Modes: single_user (X-API-KEY) and multi_user (Authorization: Bearer ) +- Background-only header injection; never expose tokens to content scripts. +- WS STT: token passed as query param (?token=...) as supported by server. + +Architecture +- MV3 background service worker owns all network I/O (fetch/SSE) and WS STT. +- Content scripts do not call server directly; they message background. +- Streaming: background uses fetch + ReadableStream to parse SSE; forwards frames to UI via ports. +- Drift guard: On startup, background optionally fetches /openapi.json and logs missing required paths (advisory). + +Permissions & CSP (least privilege) +- Chromium: use optional_host_permissions for the configured server origin; do not request broad host globs by default. +- Firefox: minimize host wildcards; no webRequest/webRequestBlocking unless absolutely required. +- Remove unused permissions (e.g., declarativeNetRequest) to ease store review. + +Security & Privacy +- Token storage policy: access tokens in background memory or session storage; refresh tokens optionally persisted in local storage; never store or expose tokens in content scripts; never log tokens. +- No telemetry; local-first. +- Sanitize and re-set auth headers in background before each fetch. + +SSE & WS Behavior +- Headers: Accept: text/event-stream; Cache-Control: no-cache; Connection: keep-alive. +- Idle timeout: default ≥45s; reset on any event/data; abort on idle. +- Cancel: AbortController used to cancel long streams quickly. +- WS STT: binary frames; handle connection errors; fall back to file-based STT when blocked. + +UX Flows (high level) +- Popup/Sidepanel + - Tabs: Chat, RAG, Reading (Save Current Tab), Ingest (Process-only), Audio (STT/TTS) + - Model/provider picker (optional) +- Context Menu + - “Send to tldw_server” → POST /api/v1/media/add { url } + - “Process page (no save)” → POST /api/v1/media/process-* +- Options Page + - Server URL, auth mode, credentials; permissions grant to server origin + - Stream idle timeout; connect tester; show OpenAPI drift warnings + +Error Handling & Observability +- 401 refresh (multi-user) single-flight retry; show actionable messages on 429/402 with backoff. +- Surface friendly errors for size/type validation and unsupported URLs. +- Optional dev toggle for stream debug logging. + +Testing Strategy +- Unit: SSE parser, header injection, request builders, URL→process-* classifier. +- Integration: chat stream with cancel; rag search; reading save; media add/process; STT/TTS. +- Manual: service worker suspend/resume, optional host permission grant/revoke. + +Rollout & Compatibility +- Chrome MV3 first; Firefox MV2 compatibility tracked; Safari after. +- Require server exposing endpoints listed above; use drift guard to warn on mismatches. + +Risks & Mitigations +- MV3 worker suspend: hold streams via long-lived ports; use idle timeout resets. +- Anti-scraping limits: rely on server throttling and robots compliance. +- Content variability: prefer URL submission to server; avoid raw DOM capture. + +References +- Server APIs: see tldw_Server_API/README.md and Docs/Product/Content_Collections_PRD.md +- Extension implementation plan lives in the separate extension repo’s Extension-Plan-1.md diff --git a/Docs/Design/Code_Interpreter_Sandbox_PRD.md b/Docs/Design/Code_Interpreter_Sandbox_PRD.md index 5123d2eb1..9c0548282 100644 --- a/Docs/Design/Code_Interpreter_Sandbox_PRD.md +++ b/Docs/Design/Code_Interpreter_Sandbox_PRD.md @@ -1,13 +1,13 @@ # PRD: Code Interpreter Sandbox & LSP Owner: tldw_server Core Team -Status: v0.2 -Last updated: 2025-10-28 +Status: v0.3 +Last updated: 2025-11-03 ## Table of Contents - [Revision History](#revision-history) - [1) Summary](#1-summary) - - [Implementation Status (v0.2)](#implementation-status-v02) + - [Implementation Status (v0.3)](#implementation-status-v03) - [1) Summary](#1-summary) - [2) Problem Statement](#2-problem-statement) - [3) Goals and Non-Goals](#3-goals-and-non-goals) @@ -43,7 +43,7 @@ Last updated: 2025-10-28 Build a secure, configurable code execution service that lets users, agents, and workflows run untrusted code snippets and full applications in isolated sandboxes. Provide an IDE-friendly LSP integration to surface diagnostics, logs, and results inline. Support both Docker containers (Linux/macOS/Windows hosts) and Firecracker microVMs (Linux-only) to balance broad compatibility with stronger isolation where available. -## Implementation Status (v0.2) +## Implementation Status (v0.3) Implemented - Endpoints: POST `/sessions` (idempotent), POST `/sessions/{id}/files` (safe extract + caps), POST `/runs` (idempotent; oneOf session vs one‑shot), GET `/runs/{id}` (includes `policy_hash` and `resource_usage`), GET `/runtimes` (caps including queue fields), artifacts list and single‑range download, POST `/runs/{id}/cancel` (TERM→grace→KILL). @@ -55,17 +55,30 @@ Implemented - Admin API: list and details implemented; includes `resource_usage`, `policy_hash`, and `image_digest` when available. - Metrics: counters/histograms with `reason` label (e.g., `startup_timeout`, `execution_timeout`); WS heartbeats/disconnects/log truncations and queue drop metrics. -Not yet (planned v0.3 unless noted) -- Interactive runs over WS stdin and related limits (`stdin_*`). -- Signed WS URLs tokens + `resume_from_seq` behavior; current servers may return unsigned `log_stream_url` (use auth headers). -- Egress allowlist policy (domain/IP/CIDR with DNS pinning). -- Firecracker runner. -- Persistent shared store (Postgres/Redis) and cluster‑wide admin aggregates; current backends: memory (default) or SQLite. -- `/runtimes` capability flags (`interactive_supported`, `egress_allowlist_supported`) and top‑level `store_mode` field. +Spec 1.1 additions now implemented +- Interactive runs over WS stdin (`interactive` + `stdin_*` caps); optional and policy‑gated. +- WS signed URL validation (HMAC token + exp) and resume via `from_seq` query param; `resume_from_seq` hint in POST `/runs`. +- Runtimes discovery now includes `interactive_supported`, `egress_allowlist_supported`, and `store_mode`. +- Persistent store backend: `SANDBOX_STORE_BACKEND=cluster` (Postgres) with `store_mode=cluster` in discovery. +- Optional Redis fan‑out for cross‑worker WS streaming; health endpoint reports Redis status and ping. +- Public and authenticated sandbox health endpoints. +- Error semantics: 409 idempotency returns `prior_id`, `key`, `prior_created_at`; 503 `runtime_unavailable` includes the failing `details.runtime`. + +Update: Egress Allowlist & DNS Pinning (v0.3) +- Added helpers to harden allowlist parsing and make DNS pinning explicit: + - `expand_allowlist_to_targets(...)` now supports CIDR, IPs, hostnames, wildcard prefixes (`*.example.com`) and suffix tokens (`.example.com`), promoting resolved A records to `/32` CIDRs. + - `pin_dns_map(...)` returns `{ host -> [IPs] }` for observability. + - `refresh_egress_rules(container_ip, raw_allowlist, label, ...)` revokes labeled rules then reapplies pinned `ACCEPT` targets followed by a `DROP` for the container IP. + - Rules are labeled for later cleanup; falls back from `iptables-restore` to iterative `iptables` if needed. + +Not yet (planned or in progress) +- Egress allowlist enforcement and DNS pinning (capability flag present; enforcement WIP). +- Firecracker runner: real execution parity (scaffold implemented). +- Additional admin aggregates for cluster mode. Clarifications - Artifact downloads: single‑range supported; multi‑range returns 416. -- `supported_spec_versions`: default advertises `["1.0"]`; spec 1.1 fields are documented for v0.3. +- `supported_spec_versions`: servers may advertise `["1.0","1.1"]`; 1.1 is backward‑compatible and adds optional fields/capabilities. Primary use cases: - Validate LLM-generated code safely, before running it locally. @@ -361,11 +374,11 @@ See Timeouts & Defaults under Content Types & Limits for consolidated rules. - Semantics: - Minor (1.x): backward-compatible; server may accept a range (e.g., `1.0`-`1.2`). - Major (2.0): potentially breaking; server rejects unsupported majors with `invalid_spec_version`. -- Discovery: GET `/runtimes` includes `supported_spec_versions` (e.g., `["1.0"]` in v0.2; future versions may add `"1.1"`). +- Discovery: GET `/runtimes` includes `supported_spec_versions` (e.g., `["1.0","1.1"]`). - Validation errors include `details.supported` with accepted versions. - Config: Controlled via `SANDBOX_SUPPORTED_SPEC_VERSIONS` (comma- or JSON-list). The server validates `spec_version` against this list and rejects mismatches with `invalid_spec_version` including `details.supported` and `details.provided`. - v0.3 (Spec 1.1 additions — Finalized) + v0.3 (Spec 1.1 additions — Implemented) - Backward‑compatible: 1.1 only adds optional fields; 1.0 clients remain supported. - POST `/runs` optional fields: - `interactive` (bool; default false) @@ -377,7 +390,10 @@ See Timeouts & Defaults under Content Types & Limits for consolidated rules. - GET `/runtimes` capability flags: - `interactive_supported` (bool) - `egress_allowlist_supported` (bool) - - `store_mode` (string: `memory|sqlite|postgres|redis`) + - `store_mode` (string: `memory|sqlite|cluster`) + - WebSocket: + - Signed URL tokens (HMAC) with `token` and `exp` query params are validated when enabled. + - Resume logs via `?from_seq=`; buffered frames are replayed starting at `N` when available. ### Runtime Limits Normalization @@ -1055,7 +1071,6 @@ Feature Discovery Payload (example) ``` GET /api/v1/sandbox/runtimes { - "store_mode": "memory", "runtimes": [ { "name": "docker", @@ -1072,7 +1087,10 @@ GET /api/v1/sandbox/runtimes "queue_ttl_sec": 120, "workspace_cap_mb": 256, "artifact_ttl_hours": 24, - "supported_spec_versions": ["1.0"], + "supported_spec_versions": ["1.0", "1.1"], + "interactive_supported": false, + "egress_allowlist_supported": false, + "store_mode": "memory", "notes": null }, { @@ -1087,16 +1105,20 @@ GET /api/v1/sandbox/runtimes "queue_ttl_sec": 120, "workspace_cap_mb": 256, "artifact_ttl_hours": 24, - "supported_spec_versions": ["1.0"], + "supported_spec_versions": ["1.0", "1.1"], "interactive_supported": false, "egress_allowlist_supported": false, + "store_mode": "memory", "notes": "Direct Firecracker; enable on supported Linux hosts" } ] } ``` -Note: v0.3 may add capability flags like `interactive_supported` and -`egress_allowlist_supported` per runtime when those features are enabled. +Note: Capability flags now include `interactive_supported`, `egress_allowlist_supported`, and a per‑runtime `store_mode` indicating the active store backend. + +Health & Readiness +- Authenticated: `GET /api/v1/sandbox/health` returns store mode + connectivity timing and Redis fan‑out status with ping. +- Public: `GET /api/v1/sandbox/health/public` returns the same payload without requiring authentication (intended for probes). Egress Allowlist (v0.3) - Opt-in policy; deny_all remains default. Applied per run/session. Server-wide policy may narrow but not widen per-run allowlists. diff --git a/Docs/Design/Education.md b/Docs/Design/Education.md index 028c1a5c2..1ef9dd977 100644 --- a/Docs/Design/Education.md +++ b/Docs/Design/Education.md @@ -6,7 +6,7 @@ https://openscilm.allen.ai/ https://arxiv.org/abs/2411.14199 https://arxiv.org/html/2411.14199v1 - +https://github.com/K-Dense-AI/claude-scientific-skills https://github.com/gudvardur/amazon_book_downloader https://github.com/presenton/presenton https://arxiv.org/abs/2412.02035 @@ -14,6 +14,7 @@ https://github.com/andreamust/NEON-GPT https://arxiv.org/abs/2412.02035v1 https://arxiv.org/abs/2411.07407 https://arxiv.org/abs/2412.16429 +https://mqleet.github.io/AutoPage_ProjectPage/ https://huggingface.co/papers/2412.15443 https://github.com/thiswillbeyourgithub/AnkiAIUtils https://news.ycombinator.com/item?id=42534931 diff --git a/Docs/Design/Embeddings.md b/Docs/Design/Embeddings.md index e43308451..67a50c8cc 100644 --- a/Docs/Design/Embeddings.md +++ b/Docs/Design/Embeddings.md @@ -3,6 +3,9 @@ ### Link Dump +https://blog.vectorchord.ai/3-billion-vectors-in-postgresql-to-protect-the-earth +https://www.rudderstack.com/blog/scaling-postgres-queue/ + https://github.com/HITsz-TMG/KaLM-Embedding https://huggingface.co/HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5 https://huggingface.co/blog/static-embeddings diff --git a/Docs/Design/Embeddings_Adapter_Scaffold.md b/Docs/Design/Embeddings_Adapter_Scaffold.md new file mode 100644 index 000000000..f0b04bdc2 --- /dev/null +++ b/Docs/Design/Embeddings_Adapter_Scaffold.md @@ -0,0 +1,18 @@ +# Embeddings Adapter Scaffold (Stage 4) + +This document tracks the initial scaffold for migrating embeddings to the provider adapter architecture. + +What’s included +- `EmbeddingsProvider` interface in `tldw_Server_API/app/core/LLM_Calls/providers/base.py`. +- Embeddings adapter registry in `tldw_Server_API/app/core/LLM_Calls/embeddings_adapter_registry.py`. +- OpenAI embeddings adapter (delegate-first) in `tldw_Server_API/app/core/LLM_Calls/providers/openai_embeddings_adapter.py`. + +Behavior +- By default, the OpenAI embeddings adapter delegates to the existing legacy helper `get_openai_embeddings()` for parity and to avoid network during tests. +- Native HTTP can be enabled with `LLM_EMBEDDINGS_NATIVE_HTTP_OPENAI=1` (uses `httpx` at `OPENAI_BASE_URL` or default OpenAI URL). + +Next steps +- Add generic OpenAI-compatible embeddings adapter (local servers) mirroring chat adapters. +- Wire a shim for embeddings to allow endpoint opt-in via env flag without touching the production embeddings service. +- Extend registry defaults as more embeddings providers are adapted. +- Conformance tests: shape of responses, error mapping, batch behavior, and performance smoke. diff --git a/Docs/Design/IMPLEMENTATION_PLAN.md b/Docs/Design/IMPLEMENTATION_PLAN.md new file mode 100644 index 000000000..590a4f8c2 --- /dev/null +++ b/Docs/Design/IMPLEMENTATION_PLAN.md @@ -0,0 +1,95 @@ +## Implementation Plan — Browser Extension + +This document tracks staged implementation with concrete success criteria and test notes. + +--- + +## Stage 1: Connectivity & Auth +**Goal**: Establish server connectivity and both auth modes (API Key and JWT). + +**Success Criteria**: +- Options page captures server URL and credentials; health check returns OK. +- Background proxy injects headers; tokens never exposed to content scripts. +- 401 triggers single‑flight refresh and one retry; no duplicate requests. + +**Tests**: +- Unit: auth storage, header injection, refresh queue. +- Integration: health endpoint, login/logout, API key validation. +- Manual: revoke permission and re‑grant host permission flow. + +**Status**: Not Started + +--- + +## Stage 2: Chat & Models +**Goal**: Streaming chat via `/api/v1/chat/completions` with model selection. + +**Success Criteria**: +- Models/providers fetched and rendered; selection persisted per session. +- Non‑stream and SSE stream both work; cancel stops network within ~200ms. +- Exact path strings (no 307 redirects observed in logs). + +**Tests**: +- Unit: SSE parser, backoff, abort controller. +- Integration: stream across two models; cancel and resume. +- Manual: slow network simulation; ensure UI stays responsive. + +**Status**: Not Started + +--- + +## Stage 3: RAG & Media +**Goal**: RAG search UI and URL ingest with progress notifications. + +**Success Criteria**: +- RAG `/api/v1/rag/search` returns results; snippets insert into chat context. +- URL ingest calls `/api/v1/media/process`; user sees progress and final status. +- Errors are actionable (permission, size limits, server busy). + +**Tests**: +- Unit: request builders, snippet insertion. +- Integration: RAG queries; media process happy path and failure modes. +- Manual: ingest current tab URL; verify server reflects new media. + +**Status**: Not Started + +--- + +## Stage 4: Notes/Prompts & STT +**Goal**: Notes/Prompts basic flows and STT upload/transcribe. + +**Success Criteria**: +- Notes: create/search; export works; selection‑to‑note from content script. +- Prompts: browse/import/export; insert chosen prompt into chat input. +- STT: upload short clip; transcript displayed; non‑supported formats fail clearly. + +**Tests**: +- Unit: notes/prompts stores; MIME/type validation. +- Integration: `/api/v1/notes/*`, `/api/v1/prompts/*`, `/api/v1/audio/transcriptions`. +- Manual: 20s audio clip round‑trip; error message clarity for oversized files. + +**Status**: Not Started + +--- + +## Stage 5: TTS & Polish +**Goal**: TTS synthesis/playback and UX polish. + +**Success Criteria**: +- Voices list loads from `/api/v1/audio/voices/catalog`; selection persisted. +- `/api/v1/audio/speech` returns audio; playback controls functional. +- Accessibility audit passes key checks; performance within budgets. + +**Tests**: +- Unit: audio player controls and error states. +- Integration: voices catalog and synthesis endpoints. +- Manual: latency spot checks; keyboard navigation. + +**Status**: Not Started + +--- + +## Notes +- Centralize route constants and validate against OpenAPI at startup (warn on mismatch). +- Keep tokens in background memory; only persist refresh tokens if strictly necessary. +- Use optional host permissions for user‑configured origins (Chrome/Edge MV3). diff --git a/Docs/Design/LLM_Adapters_Authoring_Guide.md b/Docs/Design/LLM_Adapters_Authoring_Guide.md new file mode 100644 index 000000000..2b2e4447b --- /dev/null +++ b/Docs/Design/LLM_Adapters_Authoring_Guide.md @@ -0,0 +1,103 @@ +# Authoring LLM Provider Adapters + +This guide explains how to add a new LLM provider adapter that plugs into the Chat adapter registry. Adapters encapsulate provider-specific logic and return OpenAI-compatible responses/streams. + +## Directory & Files +- Put adapters under `tldw_Server_API/app/core/LLM_Calls/providers/` +- Recommended file name: `_adapter.py` (e.g., `openai_adapter.py`) +- Implement the `ChatProvider` interface from `providers/base.py` + +## Interface +```python +from tldw_Server_API.app.core.LLM_Calls.providers.base import ChatProvider, apply_tool_choice +from tldw_Server_API.app.core.LLM_Calls.sse import sse_data, sse_done +from tldw_Server_API.app.core.LLM_Calls.streaming import aiter_sse_lines_httpx, iter_sse_lines_requests + +class MyProviderAdapter(ChatProvider): + name = "myprovider" + + def capabilities(self) -> dict: + return { + "supports_streaming": True, + "supports_tools": True, + "default_timeout_seconds": 60, + "max_output_tokens_default": 4096, + } + + def chat(self, request: dict, *, timeout: float | None = None) -> dict: + # 1) Build provider payload from OpenAI-like request + # 2) Call the provider (httpx/requests) + # 3) Normalize JSON to OpenAI-compatible chat.completion + return {"object": "chat.completion", ...} + + def stream(self, request: dict, *, timeout: float | None = None): + # 1) Make streaming request + # 2) Yield normalized SSE frames (use streaming helpers) + # 3) Do NOT yield [DONE]; caller appends via finalize_stream() + yield sse_data({"choices": [{"delta": {"content": "..."}}]}) + + # Optional async variants for native async clients + async def achat(self, request: dict, *, timeout: float | None = None) -> dict: + raise NotImplementedError + + async def astream(self, request: dict, *, timeout: float | None = None): + raise NotImplementedError +``` + +## Request Shaping +- Adapters receive an OpenAI-like request dict. Common keys: `model`, `messages`, `stream`, `tools`, `tool_choice`, `temperature`, `top_p`, `max_tokens`, `stop`, `response_format`. +- Use `apply_tool_choice(payload, tools, tool_choice)` to set `tool_choice` safely only when supported. +- Do not log raw prompts—log sanitized metadata only. + +## Streaming +- Use `iter_sse_lines_requests()` for `requests` streams and `aiter_sse_lines_httpx()` for `httpx` streams to normalize per-line output. +- Do NOT forward provider `[DONE]` frames; the endpoint appends a single final `sse_done()` via `finalize_stream()`. + +## Error Mapping +- Wrap provider exceptions with `self.normalize_error(exc)` which maps to project `Chat*Error` types. +- Return or raise these within adapter methods; the endpoint layer maps them to HTTP codes. + +## Registration +- Register the adapter with the registry (e.g., in initialization): +```python +from tldw_Server_API.app.core.LLM_Calls.adapter_registry import get_registry +get_registry().register_adapter("myprovider", "tldw_Server_API.app.core.LLM_Calls.providers.myprovider_adapter.MyProviderAdapter") +``` + +## Testing +- Unit test adapter methods with mocked HTTP clients. +- Verify non-streaming returns OpenAI-compatible JSON. +- Verify streaming yields normalized SSE frames and omits `[DONE]`. +- Ensure error mapping covers authentication, rate limit, bad request, and 5xx cases. + +## Style & Conventions +- Follow PEP 8 and use type hints. +- Keep provider adapters small and focused; do not introduce provider-specific branching in common modules. +- Keep config resolution clear (env overrides, base URL, API key); never log secrets. + +## Examples +- See TTS adapters under `tldw_Server_API/app/core/TTS/adapters/` for the pattern. +- Reuse `http_client.py` for consistent timeouts, retries, and egress policy when appropriate. + +## Async Examples +- Implement async variants when providers offer native async SDKs or when throughput matters: +```python +class MyProviderAdapter(ChatProvider): + async def achat(self, request: dict, *, timeout: float | None = None) -> dict: + # Async JSON request via httpx.AsyncClient + # Return OpenAI-compatible response + ... + + async def astream(self, request: dict, *, timeout: float | None = None): + # Async SSE stream via httpx.AsyncClient.stream + # Yield normalized SSE lines; do not yield [DONE] + ... +``` +- Wire async shims in `adapter_shims.py` and register in `provider_config.ASYNC_API_CALL_HANDLERS` so the orchestrator can route without blocking threads. + +## Embeddings Adapters +- For embeddings, implement `EmbeddingsProvider` in `providers/base.py` and return an OpenAI-like shape: + `{ "data": [{"index": 0, "embedding": [...]}, ...], "model": "...", "object": "list" }`. +- Register in `embeddings_adapter_registry.DEFAULT_ADAPTERS`. +- The enhanced embeddings endpoint can route to adapters when `LLM_EMBEDDINGS_ADAPTERS_ENABLED=1`. +- Optional: support native HTTP behind flags like `LLM_EMBEDDINGS_NATIVE_HTTP_` to allow mock-friendly tests. diff --git a/Docs/Design/LLM_Provider_Adapter_Split_PRD.md b/Docs/Design/LLM_Provider_Adapter_Split_PRD.md new file mode 100644 index 000000000..d77fa582b --- /dev/null +++ b/Docs/Design/LLM_Provider_Adapter_Split_PRD.md @@ -0,0 +1,346 @@ +# LLM Provider Adapter Split – Developer PRD + +## 1. Background +- Current state: commercial and local providers are implemented in monolithic modules that mix request shaping, streaming, error mapping, and config handling. + - Commercial: `tldw_Server_API/app/core/LLM_Calls/LLM_API_Calls.py:1` + - Local: `tldw_Server_API/app/core/LLM_Calls/LLM_API_Calls_Local.py:1` +- Problems observed: + - Large branching blocks per provider; repeated logic for streaming SSE normalization, tool_choice gating, error normalization, base URL resolution, and timeouts. + - Hard to add/modify providers safely; test surface area is broad and entangled. + - Async/sync paths diverge across functions; reuse of common streaming code is inconsistent. +- Precedent: The TTS module already solved this with adapters + registry + - Registry: `tldw_Server_API/app/core/TTS/adapter_registry.py` + - Adapters: `tldw_Server_API/app/core/TTS/adapters/*` + - Centralized resource/circuit breaker helpers and clear capability surfaces. +- Existing reusable building blocks for LLMs: + - SSE normalization utilities: `tldw_Server_API/app/core/LLM_Calls/sse.py` + - Streaming helpers: `tldw_Server_API/app/core/LLM_Calls/streaming.py` + - HTTP client & SSE streaming: `tldw_Server_API/app/core/http_client.py` + - Provider health/fallback shell: `tldw_Server_API/app/core/Chat/provider_manager.py` + - Provider param map & legacy dispatch: `tldw_Server_API/app/core/Chat/provider_config.py` + +## 2. Problem Statement +The monolithic `LLM_API_Calls.py` and its local analog contain hundreds of lines of provider-specific branching and duplicated streaming/error/parameter handling. This raises maintenance cost, increases regression risk, and slows provider onboarding. We need a pluggable provider adapter architecture that mirrors the TTS pattern, with a registry and small, focused provider modules. + +## 3. Objectives & Success Criteria +- Extract provider-specific logic into small adapters under `LLM_Calls/providers/*`, each implementing a unified `ChatProvider` interface. +- Introduce an adapter registry that surfaces: + - Capabilities (streaming, tools, vision, JSON mode, max token hints) + - Base URLs and auth requirements + - Error mapping hooks + - Streaming hooks that reuse `sse.py` and `streaming.py` +- Preserve API compatibility for existing endpoints and orchestrators: + - `POST /api/v1/chat/completions` continues to work + - Legacy function entry points remain as thin wrappers during transition +- Unify error normalization and tool_choice gating in one place. +- Share the centralized HTTP client / SSE helper and circuit-breaker integration. + +Success metrics +- Provider onboarding time reduced to ≤1 day for typical OpenAI-compatible providers. +- Code reduction: ≥30% fewer lines in `LLM_API_Calls.py` and `LLM_API_Calls_Local.py` by removing branching. +- Test coverage ≥80% for new registry + adapters; all existing LLM tests pass. +- Zero API behavior regressions in `/api/v1/chat/completions` happy-path tests, including streaming. + +## 4. Scope +In scope (Phase 1–2) +- Define `ChatProvider` interface and minimal core types (request, response, stream iterators) in `LLM_Calls/providers/base.py`. +- Implement adapter registry in `LLM_Calls/adapter_registry.py` (mirrors TTS): lazy loading by dotted path, capability discovery. +- Extract adapters for top providers used in tests and defaults: OpenAI, Anthropic, Groq, OpenRouter, Google (Gemini), Mistral, HuggingFace, Qwen, DeepSeek, plus a generic OpenAI-compatible adapter used by several custom/local servers. +- Move streaming normalization to a shared path via `sse.py` and `streaming.py`; remove per-provider ad-hoc parsing. +- Centralize error mapping and tool_choice gating utilities. +- Keep legacy dispatch in `provider_config.py` by routing to registry-backed `chat()`/`achat()` wrappers to avoid endpoint changes. +- Update `GET /api/v1/llm/providers` to draw capabilities from the registry (keeping existing metadata shape). + +Out of scope (for initial rollout) +- New fallback selection algorithms or large changes to `provider_manager.py` behavior. +- Changes to public API schemas for chat/embeddings requests. +- Provider-specific advanced features not currently supported (vision upload pipelines, files API, advanced JSON schemas). +- End-to-end migration of embeddings to adapters (tracked as a follow-up). + +## 5. Architecture Overview +Components +1. API Layer (unchanged): `tldw_Server_API/app/api/v1/endpoints/chat.py` + - Builds request payloads, rate limits, and streams responses to clients. +2. Orchestrator/Service Layer (unchanged shape): continues to call into provider dispatch, which is refactored to delegate to the adapter registry. +3. Adapter Registry: `tldw_Server_API/app/core/LLM_Calls/adapter_registry.py` + - Registers providers via dotted paths; lazily constructs adapters with merged config; exposes `get_adapter(name)` and `get_capabilities()`. +4. Base Adapter Interface: `tldw_Server_API/app/core/LLM_Calls/providers/base.py` + - Defines `ChatProvider` with `chat()`, `stream()`, and optional `achat()`/`astream()` plus `capabilities()` and `normalize_error()`. +5. Provider Adapters: `tldw_Server_API/app/core/LLM_Calls/providers/*.py` + - Self-contained logic per provider: auth, base URL, payload shaping, error mapping, and streaming using shared helpers. +6. Shared Utilities: reuse existing `sse.py`, `streaming.py`, and `http_client.py`. +7. Circuit Breaker Integration: reuse `provider_manager.py` hooks (record success/failure) and/or leverage existing breaker in Evaluations for future consolidation. + +## 6. Interfaces +ChatProvider (Python Protocol or base class) +```python +class ChatProvider(Protocol): + name: str + + def capabilities(self) -> Dict[str, Any]: + # {"supports_streaming": True, "supports_tools": True, ...} + ... + + def chat(self, request: Dict[str, Any], *, timeout: Optional[float] = None) -> Dict[str, Any]: + # Returns OpenAI-compatible non-streaming chat completion + ... + + def stream(self, request: Dict[str, Any], *, timeout: Optional[float] = None) -> Iterable[str]: + # Yields OpenAI-compatible SSE strings; final [DONE] handled by caller via finalize_stream() + ... + + async def achat(self, request: Dict[str, Any], *, timeout: Optional[float] = None) -> Dict[str, Any]: + ... + + async def astream(self, request: Dict[str, Any], *, timeout: Optional[float] = None) -> AsyncIterator[str]: + ... + + def normalize_error(self, exc: Exception) -> ChatAPIError: + # Map provider exceptions to project Chat*Error types + ... +``` + +Request/Response contracts +- Adapters accept already-normalized, OpenAI-like request dicts from the orchestrator. +- Adapters return OpenAI-compatible `chat.completion` JSON for non-streaming and SSE lines for streaming (using `sse_data(...)` frames and `finalize_stream()` at the end by callers). + +Tool choice gating +- Provide a shared helper `apply_tool_choice(payload, tools, tool_choice)` that safely sets tool choice only when supported. +- Present in a dedicated utility module used by all adapters to avoid drift. + +## 7. Error Mapping +- Central helper converts `requests/httpx` errors and provider JSON error shapes into `ChatAuthenticationError`, `ChatRateLimitError`, `ChatBadRequestError`, `ChatProviderError`, or `ChatAPIError`. +- Adapters call `normalize_error()` when catching provider exceptions; endpoints retain current error-to-HTTP mapping. + +## 8. Streaming Normalization +- All adapters must yield normalized SSE via `normalize_provider_line()` and suppress provider-sent `[DONE]` frames. +- Streaming over `httpx` leverages `aiter_sse_lines_httpx()` and `astream_sse()` from `http_client.py` when available; sync paths use `iter_sse_lines_requests()` where applicable. +- A single final `sse_done()` is appended by the orchestrator using `finalize_stream()` to avoid duplicates. + +## 9. Configuration +- Adapters resolve config from `load_and_log_configs()`/env, mirroring current semantics (API keys, base URLs, defaults). +- Registry exposes `get_all_capabilities()` for `GET /api/v1/llm/providers`, merging static metadata and adapter-reported capabilities. +- Preserve existing env var overrides (e.g., `OPENAI_API_BASE`, `MOCK_OPENAI_BASE_URL`). + +## 10. Migration Plan +Phase 0: Scaffolding +- Add `providers/base.py` with `ChatProvider` interface and small common utils (tool_choice helper). +- Add `adapter_registry.py` with lazy import, status cache, and capability discovery (modeled on TTS registry). + +Phase 1: First adapter + shim +- Implement `openai_adapter.py`; route `provider_config.API_CALL_HANDLERS['openai']` to the registry-backed adapter. +- Keep legacy functions (`chat_with_openai`, etc.) as thin wrappers, delegating to adapters. +- Ensure streaming parity by reusing `sse.py` and `streaming.py`. + +Phase 2: Core providers +- Port Anthropic, Groq, OpenRouter, Google (Gemini), Mistral. +- Update `llm_providers.py` endpoint to use registry for capability flags. + +Phase 3: Remaining providers + cleanup +- Port Qwen, DeepSeek, HuggingFace, and generic OpenAI-compatible adapter used by local/custom servers. +- Remove large branching from `LLM_API_Calls.py` and `LLM_API_Calls_Local.py`; leave compatibility wrappers that call the registry. + +Phase 4: Embeddings (optional follow-up) +- Consider moving embeddings to provider adapters (or parallel `EmbeddingsProvider`) while preserving current endpoints. + +Status (initiated) +- Added `EmbeddingsProvider` interface, registry, and an OpenAI embeddings adapter (delegate-first). +- Native HTTP is opt-in via `LLM_EMBEDDINGS_NATIVE_HTTP_OPENAI`. +- Endpoint wiring remains unchanged; migration will be opt-in via shim in a subsequent PR. + +Current Status (Nov 2025) +- Adapters & shims + - Chat adapters implemented: OpenAI, Anthropic, Groq, OpenRouter, Google (Gemini), Mistral, Qwen, DeepSeek, HuggingFace, Custom OpenAI (v1/v2). + - Async adapter routing wired for OpenAI, Anthropic, Groq, OpenRouter plus Stage 3 providers (Qwen/DeepSeek/HF/Custom OpenAI). + - Endpoint providers capability merge uses adapter registry; shape validated by unit test. +- Native HTTP + - Feature-flagged native httpx paths for OpenAI/Anthropic/Groq/OpenRouter/Google/Mistral; default remains delegate-first. +- Tests (local runs) + - Adapters unit: 44 passed (STREAMS_UNIFIED=1, LLM_ADAPTERS_ENABLED=1). + - OpenAI async streaming via orchestrator now passes (fixed in async shim by honoring monkeypatched legacy during streaming; verified on test slice `tldw_Server_API/tests/LLM_Adapters/integration/test_async_adapters_orchestrator.py::test_chat_api_call_async_streaming`). + - Embeddings adapters: OpenAI/HF/Google wired with unit coverage; endpoint adapter path tested (multi-input + optional L2). +- CI (new jobs added) + - llm-adapters-suites: runs unit + subset of integration adapter tests with adapters enabled. + - llm-adapters-native-matrix: per‑provider native-http unit slices with feature flags. + +Latest Changes (Nov 04, 2025) +- Fixed OpenAI async streaming route: async shim now yields SSE lines when legacy is monkeypatched in tests (no network), resolving the prior failure. +- Began monolith cleanup: added deprecation banner to `LLM_API_Calls.py` and preserved thin wrappers; deeper branch pruning staged post-CI stability. +- Action item: re-enable the previously skipped async streaming test in CI after a broader adapter integration run. + +Remaining Work +- Incrementally flip native HTTP flags per provider in CI as suites remain green; then prune provider-specific branches in legacy modules. +- Broaden async tests for Stage 3 providers when native AsyncClient paths are introduced (optional). +- Expand embeddings adapters to more providers as needed and add error‑path tests. + +## 11. Backward Compatibility +- Public FastAPI endpoints unchanged; request/response schema remains OpenAI-compatible. +- Legacy `provider_config.API_CALL_HANDLERS` continue to exist, delegating to the registry, so orchestrators and tests remain intact. +- Keep current config keys and env var precedence; deprecate only internal call paths. + +## 12. Testing Strategy +- Unit tests + - Registry init, capability discovery, and adapter lazy loading. + - Adapter error mapping: map representative provider error JSON/statuses to Chat*Error types. + - Streaming: ensure `[DONE]` handling, `normalize_provider_line()` behavior, and SSE frame structure. + - Tool choice gating correctness. +- Integration tests (httpx/requests mocked) + - `POST /api/v1/chat/completions` non-streaming and streaming across at least OpenAI, Anthropic, Groq, OpenRouter. + - Ensure legacy tests under `tldw_Server_API/tests/LLM_Calls/` continue to pass. + - Mock server parity via `mock_openai_server/` where applicable. +- Performance smoke + - Compare latency and CPU utilization against baseline for streaming and non-streaming requests. + +## 13. Metrics & Observability +- Log provider selection and timing at DEBUG without leaking prompt content (continue using `_sanitize_payload_for_logging`). +- Optional adapter-level counters: calls, failures by error class, average response duration. +- Reuse http_client metrics; integrate provider health with `provider_manager.record_success/record_failure`. + +## 14. Risks & Mitigations +- Regression in streaming edge cases across providers + - Mitigation: shared streaming helpers + adapter conformance tests and property tests for SSE framing. +- Hidden coupling to legacy function signatures + - Mitigation: keep wrappers and use provider_config param map for argument translation during transition. +- Config drift between adapters + - Mitigation: unify base URL and auth key resolution in shared helpers; document required keys per adapter. +- Test brittleness (network) + - Mitigation: rely on `httpx` mocking and `mock_openai_server`; ensure CI network-off safe. + +## 15. Rollout Plan & Timeline (estimate) +- Week 1: Scaffolding + OpenAI adapter + shim routing, green tests. +- Week 2: Anthropic, Groq, OpenRouter, registry capabilities wiring, providers endpoint updates. +- Week 3: Google, Mistral, Qwen, HuggingFace, DeepSeek; delete major branching in legacy files, keep wrappers. +- Week 4: Stabilization, docs, performance baseline comparison; decide on embeddings adapter follow-up. + +## 16. Acceptance Criteria +- Registry and base adapter modules exist; adapters for OpenAI, Anthropic, Groq, OpenRouter are implemented and covered by tests. +- `/api/v1/chat/completions` works for streaming and non-streaming paths with no behavioral regressions in existing tests. +- `GET /api/v1/llm/providers` returns capability info sourced from the registry. +- Code reduction achieved in monolithic files; obvious duplicated streaming/error logic removed. +- Documentation updated: this PRD, adapter authoring guide, and migration notes. + +## 17. Deliverables +- Code: `LLM_Calls/providers/*`, `LLM_Calls/adapter_registry.py`, updated legacy wrappers. +- Tests: unit + integration under `tldw_Server_API/tests/LLM_Calls/` following existing markers. +- Docs: this PRD plus a short "Adding a new LLM adapter" guide in `Docs/Design/`. + +## 18. Deletions & Cleanup (after Phase 3) +- Remove provider-specific branching from `LLM_API_Calls.py` and `LLM_API_Calls_Local.py`. +- Consolidate tool_choice handling and error normalization into shared helpers; delete scattered duplicates. +- Keep thin compatibility wrappers only where needed by imported call sites. + - Status: initial pass started (deprecation banner added; wrappers preserved); deeper branch pruning pending CI stability. + +## 19. Open Questions +- Should embeddings be part of the same adapter registry or a sibling `EmbeddingsProvider` with shared config? +- Do we want provider-level retry policies configurable via registry (override http_client defaults)? +- Unify circuit breaker implementation across Chat/TTS/Evals into a single shared component? +- Any providers requiring non-HTTP transport (e.g., gRPC) in near term? + +## 20. Implementation Guide & Checklist + +This guide breaks implementation into clear, verifiable stages. Use checklists to track progress and ensure parity with existing behavior and tests. + +Stage 0: Scaffolding (foundation) +- [x] Add adapter base and helpers: `LLM_Calls/providers/base.py` (ChatProvider, error mapping, tool_choice helper) +- [x] Add adapter registry: `LLM_Calls/adapter_registry.py` with lazy loading, capability discovery, singleton accessor +- [x] Authoring guide in `Docs/Design/LLM_Adapters_Authoring_Guide.md` +- [ ] Import sanity check: registry import causes no cycles in API layers +- [ ] CI green with no behavior changes + +Verification +- [ ] `python -m pytest -m "unit or integration" -q` passes +- [ ] Lint/formatters (if configured) show no new warnings + +Stage 1: OpenAI adapter + shim +- [ ] Implement `providers/openai_adapter.py` with: + - [ ] Base URL resolution precedence (config/env -> default `https://api.openai.com/v1`) + - [ ] Auth header handling and safe header redaction in logs + - [ ] Non-streaming `chat()` returning OpenAI-compatible `chat.completion` + - [ ] Streaming `stream()` using `iter_sse_lines_requests`/`aiter_sse_lines_httpx` and `sse.py` + - [ ] Error mapping: auth (401/403), rate limit (429), bad request (400/404/422), provider 5xx + - [ ] Tool choice gating via shared helper + - [ ] Sanitized payload logging using existing `_sanitize_payload_for_logging` where applicable +- [ ] Wire shim: make `provider_config.API_CALL_HANDLERS['openai']` delegate to registry-backed adapter; preserve function signature +- [ ] Tests + - [ ] Unit: adapter non-streaming success, error cases + - [ ] Unit: streaming yields valid SSE chunks and omits provider `[DONE]` + - [ ] Integration: `/api/v1/chat/completions` for OpenAI non-streaming/streaming (httpx mocked or `mock_openai_server`) +- [ ] Docs: update PRD status and add adapter-specific notes if needed + - [x] Async shim fix: honor monkeypatched legacy during streaming (yields SSE lines); passes orchestrator async streaming test slice + +Stage 2: Core providers (Anthropic, Groq, OpenRouter, Google, Mistral) +- [ ] Implement adapters with provider-specific payload shaping and streaming + - Anthropic: messages/parts conversion, `stop_sequences`, tool_use mapping + - Groq: OpenAI-compatible; ensure base URL/config and logit_bias/logprobs mapping + - OpenRouter: top_p/top_k/min_p mapping, per-model routing if needed + - Google (Gemini): `generationConfig`, parts, `stopSequences`, images/files where minimally necessary + - Mistral: `random_seed`, `top_k`, tools +- [ ] Add registry registrations (by init or a central bootstrap) +- [ ] Tests per provider (unit + endpoint-level integration with mocks) +- [ ] Providers endpoint: aggregate capabilities from registry and merge with existing `MODEL_METADATA` where applicable + +Stage 3: Remaining providers + monolith cleanup +- [ ] Implement Qwen, DeepSeek, HuggingFace, generic OpenAI-compatible (for local/custom servers) +- [ ] Route `provider_config` handlers to adapters for all migrated providers +- [ ] Remove provider-specific branching from `LLM_API_Calls.py` and `LLM_API_Calls_Local.py`, keeping thin wrappers only +- [ ] Centralize tool_choice and error normalization (delete duplicates in monolith) +- [ ] Re-run entire LLM test suite including `tests/LLM_Calls/test_async_streaming_dedup.py` and strict filter tests + +Stage 4: Embeddings adapters (scaffold → endpoint wiring) +- [x] Add `EmbeddingsProvider` to base interface (`providers/base.py`). +- [x] Create `embeddings_adapter_registry.py` with `get_embeddings_registry()`. +- [x] Implement `providers/openai_embeddings_adapter.py` (delegate-first; optional native HTTP behind `LLM_EMBEDDINGS_NATIVE_HTTP_OPENAI`). +- [x] Wire adapter path into `POST /api/v1/embeddings` (enhanced v5 endpoint) behind feature flag `LLM_EMBEDDINGS_ADAPTERS_ENABLED=1`. + - When enabled, route via registry adapter for supported providers and map response to OpenAI-compatible shape. + - Preserve existing behavior (circuit breaker, batching, caching) when flag is disabled. +- [x] Add minimal unit test that exercises the adapter-backed endpoint with a stub adapter. +- [x] Extend registry with HF/Google embeddings adapters: `providers/huggingface_embeddings_adapter.py`, `providers/google_embeddings_adapter.py`. +- [x] Add native HTTP unit tests for HuggingFace and Google embeddings (mocked httpx). +- [x] Add endpoint unit test for multiple inputs and optional L2 normalization under `LLM_EMBEDDINGS_L2_NORMALIZE=1`. + +Current Status (Nov 2025) +- Adapters & registry + - Chat adapters implemented for OpenAI, Anthropic, Groq, OpenRouter, Google (Gemini), Mistral, Qwen, DeepSeek, HuggingFace, and two Custom OpenAI-compatible variants. Native HTTP paths are feature-flagged per provider and aligned to `httpx` for testability. + - Async adapter routing is wired for OpenAI/Anthropic/Groq/OpenRouter; extended now to Qwen/DeepSeek/HuggingFace/Custom OpenAI via new async shims and dispatch in `provider_config.ASYNC_API_CALL_HANDLERS`. + - Error normalization is consolidated with provider-specific overrides added for Google, Mistral, Groq, OpenRouter, OpenAI, Anthropic; and now also HuggingFace and Custom OpenAI. +- Endpoints & tests + - Chat integration suites run green with adapters enabled in this environment for core modules; remaining slices pass locally/CI. +- Embeddings endpoint supports an adapter-backed path for OpenAI, HuggingFace, and Google when `LLM_EMBEDDINGS_ADAPTERS_ENABLED=1`. Keys are resolved from settings, and optional L2 normalization can be enabled via `LLM_EMBEDDINGS_L2_NORMALIZE=1`. +- Native HTTP is feature-flagged per provider: `LLM_EMBEDDINGS_NATIVE_HTTP_OPENAI`, `LLM_EMBEDDINGS_NATIVE_HTTP_HUGGINGFACE`, `LLM_EMBEDDINGS_NATIVE_HTTP_GOOGLE` (mock-friendly in tests). +- Cleanup & next steps + - Post-parity monolith pruning is staged: provider-specific branches retained as `legacy_*` and wrappers route through shims. Remove branches once CI stays green with adapters (including native HTTP paths) across providers. + - Consider native async (`httpx.AsyncClient`) where high traffic warrants it and add async tests (achat/astream) accordingly. + +Stage 4: Optional Embeddings follow-up +- [ ] Define `EmbeddingsProvider` or extend ChatProvider where appropriate +- [ ] Port OpenAI embeddings and batch embeddings to adapter(s) +- [ ] Tests and endpoint parity + +Observability, Health, and Operations +- [ ] Integrate `provider_manager.record_success/record_failure` in orchestrator paths that call adapters +- [ ] Ensure http_client metrics emit for adapter calls; add optional adapter-level counters +- [ ] Keep prompt-safe logs using existing sanitization utilities + +Rollout & Safety +- [ ] Add feature flag (e.g., `LLM_ADAPTERS_ENABLED=1`) to switch routing to registry on a per-provider basis +- [ ] Canary enable providers (OpenAI first) in non-prod, then prod +- [ ] Rollback plan: flip flag to revert routing to legacy functions + +Compatibility & Parity Checks +- [ ] Streaming: exactly one final `[DONE]` from the endpoint (no duplicates) +- [ ] Tool calling: identical behavior for `tool_choice` and `tools` presence +- [ ] Error taxonomy: same HTTP status mapping at FastAPI layer +- [ ] Environment precedence for base URLs and keys matches legacy behavior + +Definition of Done (Phase 1–3) +- [ ] Registry and base adapter in place with docs +- [ ] OpenAI, Anthropic, Groq, OpenRouter, Google, Mistral adapters implemented and covered by tests +- [ ] `/api/v1/chat/completions` streaming and non-streaming regression tests pass +- [ ] Providers endpoint reports registry-backed capabilities +- [ ] Monolith branching removed; wrappers remain for compatibility; duplicated helpers deleted + +Reference Artifacts +- Base/Registry: `tldw_Server_API/app/core/LLM_Calls/providers/base.py`, `tldw_Server_API/app/core/LLM_Calls/adapter_registry.py` +- Shared Streaming: `tldw_Server_API/app/core/LLM_Calls/sse.py`, `tldw_Server_API/app/core/LLM_Calls/streaming.py`, `tldw_Server_API/app/core/http_client.py` +- Legacy Dispatch: `tldw_Server_API/app/core/Chat/provider_config.py` (to be updated to delegate) +- Health/Fallback: `tldw_Server_API/app/core/Chat/provider_manager.py` diff --git a/Docs/Design/Prompt_Studio_MCTS_Sequence_Optimization_PRD.md b/Docs/Design/Prompt_Studio_MCTS_Sequence_Optimization_PRD.md new file mode 100644 index 000000000..f604cdd51 --- /dev/null +++ b/Docs/Design/Prompt_Studio_MCTS_Sequence_Optimization_PRD.md @@ -0,0 +1,283 @@ +# Prompt Studio - MCTS Sequence Optimization (MCTS-OPS Inspired) PRD + +- Version: v1.0 (MVP without sandboxed code execution) +- Owner: Prompt Studio +- Stakeholders: API team, WebUI team, DB team +- Target Release: 1-2 sprints for MVP, +1 sprint for code evaluator + +## Overview + +Add a new optimization strategy ("mcts") that treats prompt design as sequential planning over multi-step prompt sequences with Monte Carlo Tree Search (MCTS). Leverage low-cost LLM scoring and reward backpropagation to explore, evaluate, and refine prompt sequences; optionally apply a feedback revision loop to low-reward candidates. Integrates with existing Prompt Studio endpoints, job queue, TestRunner, PromptExecutor, and WebSocket events. + +## Implementation Status (Rolling) + +- Status: In Progress +- Last Updated: [auto] + +Completed (MVP + MCTS core): +- API/schema: endpoint validation for `optimizer_type="mcts"` + `strategy_params` (range checks; includes `mcts_simulations`, `mcts_max_depth`, `mcts_exploration_c`, `prompt_candidates_per_node`, `score_dedup_bin`, `early_stop_no_improve`, `token_budget`, `feedback_*`, model overrides). +- Engine: `MCTSOptimizer` integrated with `OptimizationEngine` under strategy `"mcts"`. +- MCTS core algorithm: full tree search with Node(Q, N, parent/children, score_bin), UCT selection (`mcts_exploration_c`), expansion with `prompt_candidates_per_node` and sibling dedup using `score_dedup_bin`, simulation over multi-segment sequences (via `PromptDecomposer`), and backpropagation of rewards. +- Contextual generation: carries accumulated system context across segments for candidate creation; user content kept stable for evaluation. +- Optional feedback/refinement: honors `feedback_enabled`, `feedback_threshold`, `feedback_max_retries` by delegating to `IterativeRefinementOptimizer` and re-evaluating improved variants. +- Optimization MVP: iterative candidate variant generator with evaluation via existing `TestRunner`/`PromptExecutor`; early stop on no-improve. +- ProgramEvaluator Phase 2 (sandbox): feature-gated per project and env; extracts Python from LLM output, executes under isolated subprocess with import whitelist and no file/network, evaluates objective/constraints, and maps to reward [-1..10]; wired into `TestRunner` for `runner="python"` cases. +- PromptQualityScorer upgraded: optional cheap LLM scoring fallback (configurable `scorer_model`) blended with heuristics; in-memory TTL cache to reduce token usage; explicit `score_to_bin` helper for consistent dedup bins. +- Cost controls: MCTS tracks cumulative tokens for scorer/rephrase calls via `PromptExecutor` and enforces `token_budget` with early stop; `_call_llm` adds simple backoff/retry for 429/rate limits; in-memory caching for segment rephrases and evaluation results to avoid duplicate rollouts; optional DB-backed cache (sync_log) for scorer/rephrase/eval with TTL. +- Metrics + instrumentation: Records `sims_total`, `tree_nodes`, `avg_branching`, `best_reward`, `tokens_spent`, `duration_ms` via `prompt_studio_metrics.record_mcts_summary`. Error counters added (`prune_low_quality`, `prune_dedup`, `scorer_failure`, `evaluator_timeout`). +- WS lifecycle + cancellation: Broadcasts `OPTIMIZATION_STARTED` and `OPTIMIZATION_COMPLETED`; periodic cancellation checks exit long loops promptly. +- WebSocket: lifecycle events (started/completed) and throttled per-simulation progress broadcasts (iteration, current score, best score) via shared `EventBroadcaster`. Throttle interval configurable via `ws_throttle_every` (defaults ~ n_sims/50). +- Persistence (trace): Each throttled iteration is persisted via `record_optimization_iteration` with compact variant metadata (prompt_id, system_hash, preview). Final compact search trace (best path + top-K) included in `final_metrics.trace` for the optimization row. +- Feature gating: MCTS strategy is disabled by default; enabled in development via canary or explicitly with `PROMPT_STUDIO_ENABLE_MCTS=true`. Debug decision dumps controlled by `PROMPT_STUDIO_MCTS_DEBUG_DECISIONS=true`. +- Docs & Guides: See `Docs/Guides/Prompt_Studio_MCTS_Guide.md`, `Docs/Guides/Prompt_Studio_Program_Evaluator.md`, and `Docs/Guides/Prompt_Studio_Ablations.md`. +- Quality/Decomposition helpers: heuristic `PromptQualityScorer` (0..10) and `PromptDecomposer` (naive segment split); pruning via `min_quality` strategy param. +- Program Evaluator (Phase 2 groundwork): feature-flagged `ProgramEvaluator` stub (no code exec) wired into `TestRunner` for runner="python" cases; maps heuristic reward to aggregate score when enabled. +- OpenAPI example: added an `mcts` example payload to `/optimizations/create` for discoverability. + +In Progress / Planned next: +- ProgramEvaluator sandbox (actual execution) behind flag; per-project controls and resource limits. +- Docs: examples, UI notes, and ablation scripts; README WS payload samples and advanced usage. + - Tests: expand unit/integration/perf coverage; throttle WS for large n_sims. + +## Goals + +- Improve robustness on "hard" tasks by exploring prompt sequences, not just single prompts. +- Provide token-aware, budget-bounded optimization with early stops and deduplication. +- Stream real-time progress via existing Prompt Studio WebSocket (WS). + +## Non-Goals + +- No new public endpoints (use existing `/api/v1/prompt-studio/optimizations/create`). +- No WebUI redesign (rely on current WS channel and optimization views). +- No mandatory sandboxed code execution in MVP (added in v2 behind a feature flag). + +## Personas & Use Cases + +- Prompt engineers: Optimize prompts for difficult tasks with structured, multi-step sequences. +- QA/researchers: Run controlled experiments comparing strategies (iterative vs mcts) on the same test set. +- Developers: Tune performance/cost knobs; introspect search traces and best candidate path. + +## Functional Requirements + +### Strategy: "mcts" + +Inputs (via `optimization_config.strategy_params`): + +- `mcts_simulations` (int, default 20, 1-200) +- `mcts_max_depth` (int, default 4, 1-10) +- `mcts_exploration_c` (float, default 1.4, 0.1-5.0) +- `prompt_candidates_per_node` (int, default 3, 1-10) +- `score_dedup_bin` (float, default 0.1, 0.05-0.5) +- `feedback_enabled` (bool, default true) +- `feedback_threshold` (float 0-10, default 6.0) +- `feedback_max_retries` (int, default 2) +- `token_budget` (int, default 50_000) +- `early_stop_no_improve` (int, default 5) +- `scorer_model` (string, default small/cheap model) +- `rollout_model` (string, default configured model) +- `min_quality` (float 0-10, default 0.0) - prune low-quality variants pre-evaluation using heuristic scorer (implemented in MVP). + +MCTS loop: + +- Selection: UCT selects children by `Q/N + c * sqrt(log(Np)/N)`. +- Expansion: Generate K prompt variants for current segment, score each; bin scores by `score_dedup_bin` and reuse siblings with same bin to cap branching. +- Simulation: Build a candidate sequence; call PromptExecutor/TestRunner to get a numeric reward (0-10). Failures score -1. +- Backpropagation: Update Q, N along the path; track best-so-far. +- Optional feedback: If reward < threshold, apply one self-refine iteration and re-evaluate; use `max(reward, refined_reward)`. + +Decomposition & context: + +- Decompose task/goal into segments (context, instruction, constraints, examples). Keep 3-6 segments. +- Each next generation receives "context so far" to maintain coherence. + +### Job Integration + +- Create via existing POST `/api/v1/prompt-studio/optimizations/create` with `optimizer_type="mcts"`. +- Run under job processor; stream progress via WS (per simulation and on best update). + +### Storage + +- Use existing optimization row + `record_optimization_iteration(...)` to persist per-simulation/iteration metrics; no schema change in v1. + +### Observability + +- Emit metrics: `sims_total`, `best_reward`, `avg_branching`, `nodes_expanded`, `token_spend`. +- WS events include current best reward, simulation index, and optional short trace summary. + - Implemented: WS progress broadcasts per simulation (current and best scores; pruned events). Metrics pending. + +## Non-Functional Requirements + +- Token/cost control: + - Token budget hard-cap; early stop on `early_stop_no_improve`. + - Use cheap model for PromptQualityScorer; reserve better model for final rollouts. +- Performance: + - Default simulations (20) complete within typical job SLAs; concurrency capped; backpressure via queue. +- Reliability: + - Fail closed; if scorer/LLM unavailable, job aborts gracefully with error message. +- Compatibility: + - Backwards compatible with existing API and storage. + +Implemented so far: +- Input validation and safe defaults; optional WS path used only when WS endpoints are loaded (no hard dependency). + +## Security & Privacy + +- MVP: No arbitrary code execution. +- v2 (optional): ProgramEvaluator behind feature flag + - Sandboxed execution (timeout, memory, no network/files); whitelist imports; capture stdout/stderr; scrub logs. + - Never log user secrets; redact inputs in traces. +- MVP: non-executing `ProgramEvaluator` stub wired under flag - no code runs; returns heuristic reward only when enabled. +- Rate limiting: + - Reuse existing Prompt Studio limits in endpoint deps. + +## User Experience + +- API flow: + - Client submits optimization with `optimizer_type="mcts"` and strategy params. + - Poll via GET optimization status or subscribe to WS for progress. + - On completion, response includes `optimized_prompt_id`, metrics, and summary. +- WebSocket: + - Broadcast simulation updates: `{optimization_id, sim_index, depth, reward, best_reward, token_spend_so_far}`. + - Final “completed” event with summary. + +## Architecture + +New components (under `tldw_Server_API/app/core/Prompt_Management/prompt_studio/`): + +- `MctsOptimizer`: Orchestrates tree search and reward loop; plugs into `OptimizationEngine`. +- `PromptDecomposer`: Simple LLM/heuristic splitter into 3-6 segments. +- `PromptQualityScorer`: Cheap LLM/heuristic scorer, returns 0-10 and a `score_bin`. +- `MctsTree` / `UctPolicy`: Node structs with Q, N, score_bin, prompt fragment; selection/expansion/backprop. +- `ContextualGenerator`: Uses `PromptExecutor._call_llm` directly to include “context so far”. +- `ProgramEvaluator` (v2): Optional sandboxed code runner. + +Implemented so far: +- `MctsOptimizer` (MVP iterative best-of-N search, early stop, WS broadcasts) +- `PromptQualityScorer` (heuristic) +- `PromptDecomposer` (heuristic) +- `ProgramEvaluator` (non-executing stub, feature-flagged) + `TestRunner` wiring + +Integration points: + +- `optimization_engine.py`: add routing for `optimizer_type == "mcts"`. +- `optimization_strategies.py`: house helper classes if shared across strategies. +- `api/v1/schemas/prompt_studio_optimization.py`: schema validation for mcts params. +- `api/v1/endpoints/prompt_studio_optimization.py` (create): validation guard rails. +- `job_processor.py`: status broadcasts compatible with WS `EventBroadcaster`. + +## API & Schemas + +Request example (POST `/api/v1/prompt-studio/optimizations/create`): + +```json +{ + "project_id": 1, + "initial_prompt_id": 12, + "test_case_ids": [1, 2, 3], + "optimization_config": { + "optimizer_type": "mcts", + "max_iterations": 20, + "target_metric": "accuracy", + "strategy_params": { + "mcts_simulations": 20, + "mcts_max_depth": 4, + "mcts_exploration_c": 1.4, + "prompt_candidates_per_node": 3, + "score_dedup_bin": 0.1, + "feedback_enabled": true, + "feedback_threshold": 6.0, + "feedback_max_retries": 2, + "token_budget": 50000, + "early_stop_no_improve": 5 + } + } +} +``` + +Validation: + +- Enforce numeric ranges; ensure non-negative budgets; cap candidates per node. + - Implemented in `/optimizations/create` strategy validation. + +## Scoring & Evaluation + +- MVP reward: + - Use `TestRunner.run_single_test` aggregate score (0-1) directly for optimization decisions and final payloads. + - Internal thresholds may use scaled values, but API responses and metrics remain 0-1. Failures (exceptions) contribute 0 unless otherwise specified. +- v2 reward (optional): + - For test cases marked “program” (runner="python"), run `ProgramEvaluator` with its internal reward mapping; normalize to 0-1 when aggregating for optimization results. + +## Metrics & Logging + +- Metrics (expose via `monitoring.py`): + - `prompt_studio.mcts.sims_total`, `prompt_studio.mcts.best_reward`, `prompt_studio.mcts.tree_nodes`, `prompt_studio.mcts.avg_branching`, `prompt_studio.mcts.tokens_spent`, `prompt_studio.mcts.duration_ms`. + - Error counters: `prompt_studio.mcts.errors_total{error=prune_low_quality|prune_dedup|scorer_failure|evaluator_timeout}`. +- Logs: + - Per simulation decision, reward, and improvement, throttled to avoid PII leakage. + - Implemented: metrics collection, lifecycle + throttled WS broadcasts, per-iteration DB traces. + +## Rollout Plan + +- Phase 1 (MVP): + - Implement `MctsOptimizer` with scorer and contextual generator; no code execution. + - Endpoint validation, WS progress events, metrics, docs. +- Current status: `MctsOptimizer` MVP, heuristic scorer/decomposer, WS progress, validation and docs completed. +- Phase 2 (Optional): + - Add `ProgramEvaluator` with secure sandbox; feature flag + config; basic code tasks. +- Current status: non-executing `ProgramEvaluator` stub and wiring added; sandbox execution pending. +- Phase 3: + - UI polish (use existing WS payloads), docs/examples, ablation scripts. + +## Acceptance Criteria + +- Can create an optimization with `optimizer_type="mcts"` that: + - Runs to completion within token budget and iterations. + - Emits WS updates and persists per-simulation iterations via `record_optimization_iteration`. + - Returns `optimized_prompt_id` with final metrics ≥ initial metrics on a seeded sample test set. +- Input validation rejects invalid strategy params with clear errors. +- No breaking changes to other strategies or endpoints. +- Metrics exposed without errors; logs do not include secrets. + - Current: WS progress is live; metrics to be added. + +## Test Plan + +- Unit (marker: `unit`): + - UCT selection favors higher UCT child; tie-breaking stable. + - Score binning deduplicates siblings correctly. + - Early stop triggers on no-improvement and budget exhaustion. +- Integration (marker: `integration`): + - Create → run → complete MCTS optimization against 3-5 toy test cases; best_reward improves vs baseline prompt. + - WS: receive progress and completion events. + - Endpoint validation rejects out-of-range params. +- (Phase 2) Security: + - ProgramEvaluator timeouts; no file/network; unsafe imports blocked. + +## Risks & Mitigations + +- Token overuse: enforce `token_budget`, use cheap scorer, early stop. +- Noisy scorer: smooth via averaging across 2-3 low-cost calls or use heuristics (length, variable coverage). +- Latency: cap simulations; stream partial progress; allow cancellation via existing cancel endpoint. +- Sandboxing complexity (v2): keep optional; ship MVP without code exec. + +## Open Questions + +- Persist full MCTS tree for UI? MVP: store compact summaries in optimization `result` and per-iteration records. +- Preferred small model for scoring (OpenAI mini vs local)? Default to configured “fast” provider; make configurable per project. +- Decomposer LLM-based vs heuristic rule-based? MVP: heuristic with optional LLM assist when budget allows. + +## Dependencies + +- Reuse existing infra: PromptExecutor, TestRunner, JobManager, EventBroadcaster, DB methods. +- No new external libs for MVP; (optional) sandbox may need OS-level constraints if implemented. + +## Documentation + +- This PRD (Docs/Design/Prompt_Studio_MCTS_Sequence_Optimization_PRD.md). +- API docs: extend Prompt Studio Optimization section with mcts strategy params and examples. +- Add examples under `Docs/Examples/PromptStudio/mcts/` (follow-up task). + +## Milestones + +- M1 (Week 1): Schema validation, `MctsOptimizer` skeleton, heuristic scorer/decomposer, integration with `OptimizationEngine`, docs. (Done) +- M2 (Week 2): WS streaming (Done), metrics, cost controls, integration tests. Ship MVP. +- M3 (Week 3, optional): `ProgramEvaluator` behind feature flag, sandbox, tests. diff --git a/Docs/Design/RSS_Ranking.md b/Docs/Design/RSS_Ranking.md index da4de2675..b2670eb80 100644 --- a/Docs/Design/RSS_Ranking.md +++ b/Docs/Design/RSS_Ranking.md @@ -20,7 +20,7 @@ https://www.memeorandum.com/m/ https://github.com/CrociDB/bulletty https://feed-me-up-scotty.vincenttunru.com/ https://gitlab.com/vincenttunru/feed-me-up-scotty/ - +https://news.ycombinator.com/item?id=45825733 https://github.com/FreshRSS/FreshRSS https://github.com/prof18/feed-flow diff --git a/Docs/Design/Resource_Governor_PRD.md b/Docs/Design/Resource_Governor_PRD.md new file mode 100644 index 000000000..5df743b57 --- /dev/null +++ b/Docs/Design/Resource_Governor_PRD.md @@ -0,0 +1,758 @@ +# Resource Governance PRD (v1) + +## Summary + +Multiple independent rate limiters and quota mechanisms exist across the codebase with overlapping logic and inconsistent semantics (burst behavior, refunding, test bypass, metrics, persistence). This PRD proposes a unified ResourceGovernor capable of governing per-entity resource limits for requests, tokens, streams, jobs, and minutes using a shared interface and pluggable backends (in-memory and Redis) with consistent test-mode behavior, metrics tags, and refund semantics. + +## Problem & Symptoms + +- Fragmented rate limiting/quota implementations per feature lead to duplication, drift, and inconsistent outcomes: + - Chat token bucket + per-conversation limits: `tldw_Server_API/app/core/Chat/rate_limiter.py:1` + - MCP in-memory/Redis limiter + category limiters: `tldw_Server_API/app/core/MCP_unified/auth/rate_limiter.py:1` + - Embeddings sliding window limiter: `tldw_Server_API/app/core/Embeddings/rate_limiter.py:1` + - Global SlowAPI limiter: `tldw_Server_API/app/api/v1/API_Deps/rate_limiting.py:1` + - Audio quotas (daily minutes, concurrent streams/jobs): `tldw_Server_API/app/core/Usage/audio_quota.py:1` +- Additional duplications not originally listed but present: + - AuthNZ DB/Redis limiter: `tldw_Server_API/app/core/AuthNZ/rate_limiter.py:1` + - Evaluations per-user limiter and usage ledger: `tldw_Server_API/app/core/Evaluations/user_rate_limiter.py:1` + - Character Chat limiter (Redis + memory): `tldw_Server_API/app/core/Character_Chat/character_rate_limiter.py:1` + - Web scraping rate limiters: `tldw_Server_API/app/core/Web_Scraping/enhanced_web_scraping.py:125` + - Embeddings server token-bucket decorator: `tldw_Server_API/app/core/Embeddings/Embeddings_Server/Embeddings_Create.py:1030` + +Symptoms: +- Inconsistent burst multipliers and windows; different interpretations of “per minute”. +- Hard-to-reason interactions between limiters (e.g., SlowAPI + per-module meters). +- Divergent test bypass logic (varied env flags, ad-hoc behavior). +- Inconsistent metrics (names, labels, presence) and poor cross-feature visibility. +- Code complexity and maintenance overhead; bugs from drift and duplicated env parsing. + +## Goals + +- One unified ResourceGovernor module to manage “per-entity resource limits” across categories: + - Categories: `requests`, `tokens`, `streams`, `jobs`, `minutes`. +- Pluggable backends: in-memory (single-instance) and Redis (multi-instance), chosen by configuration. +- Consistent API supporting reserve/commit/refund and query, with atomic composite reservations across categories when possible. +- First-class test-mode behavior (deterministic bypass or fixed limits) without per-feature custom parsing. +- Standardized metrics and tracing for allow/deny/wait/refund with consistent label sets. +- Compatibility shims for existing modules; incremental migration plan. + +Non-goals (v1): +- Redesigning pricing/billing or tier models. +- Replacing durable ledgers where they make sense (e.g., daily minutes table for audio). +- Removing SlowAPI entirely; it can remain as an ingress façade backed by the governor. + +## Personas & Entities + +- Persona: API user (API key/JWT user id), service client (MCP client id), conversation id (Chat), IP address (ingress fallback), system services. +- Entity key format: `scope:value` where scope ∈ {`user`, `api_key`, `client`, `ip`, `conversation`, `tenant`, `service`}. +- Effective entity: per endpoint determines which entity keys apply. Examples: + - Chat: `user:{id}`, optionally `conversation:{id}`; tokens reserved under `tokens` and request under `requests`. + - Audio stream: `user:{id}` governing `streams` semaphore and `minutes` ledger. + - MCP: `client:{id}` or `user:{id}` with `requests` in categories `ingestion` or `read` via tags. + +## Functional Requirements + +- Core interface: + - check(spec) → decision: Returns allow/deny with retry_after and metadata. + - reserve(spec, op_id) → handle: Reserves resources atomically across categories (best-effort rollback on partial failures). `op_id` is an idempotency key. + - commit(handle, actual_usage, op_id) → None: Finalizes reservation and records usage (e.g., minutes consumed, tokens used). Idempotent per `op_id`. + - refund(handle or delta, op_id) → None: Returns unused capacity (e.g., estimated vs actual tokens; failure paths). Idempotent per `op_id`. + - renew(handle, ttl_s) → None: Renews concurrency leases (streams/jobs) heartbeat before TTL expiry. + - release(handle) → None: Explicitly releases concurrency leases (streams/jobs) when finished. + - peek(query) → usage: Returns current usage and remaining headroom per category/entity. + - reset(entity/category) → None: Administrative reset. + +- Categories & semantics: + - `requests`: token-bucket or sliding-window RPM/RPS limits; burst configured. + - `tokens`: token-bucket for budgeted tokens per window (e.g., per minute). + - `streams`: semaphore-like concurrency limit (bounded integer counter) with lease TTL/heartbeat. + - `jobs`: semaphore-like concurrency limit with queue-aware labeling; optional per-queue limits. + - `minutes`: durable, per-day (UTC) ledger; supports add on commit and check before reserve. + +- Default algorithms and formulas: + - `requests`: token-bucket by default. Capacity `C = burst * rate * window`; refill at `rate` per second. Sliding-window may be selected per policy for very small-window accuracy. + - `tokens`: token-bucket by default. Units are model tokens when available; otherwise generic estimated tokens as a stand-in. + - `streams/jobs`: bounded counters with per-lease TTL; requires `renew` heartbeat to keep leases alive. + - `minutes`: durable daily cap; see Minutes Ledger Semantics. + +## Time Sources + +- All time calculations for windows, TTLs, and expirations use monotonic clocks via a `TimeSource` abstraction to avoid wall-clock jumps. +- `ResourceGovernor` accepts a `time_source` parameter (defaults to a monotonic provider). Tests inject a fake time source for deterministic control. + +- Composite reservation: Reserve in deterministic order to minimize deadlock; on failure, release prior reserves. + +- Test mode: + - Prefer a single project-wide flag `TLDW_TEST_MODE=true`; `RG_TEST_BYPASS` may override governor behavior for tests. + - In test mode: no burst (`burst=1.0`), deterministic timing, optional fixed limits via `RG_TEST_*` envs. + - Zero reliance on request headers for bypass. + +- Metrics & tracing: + - Metrics emitted on every decision: allow/deny, reserve/commit/refund, with labels: `category`, `scope`, `backend`, `result`, `reason`, `endpoint`, `service`, `policy_id`. Entity is excluded by default; optionally include a hashed entity label when `RG_METRICS_ENTITY_LABEL=true`. + - Gauges for concurrency (`streams_active`, `jobs_active`); counters for denials and refunds. + - Optional exemplars and trace IDs if tracing enabled. + +- Configuration: + - Policy source of truth: + - Production precedence (high→low): AuthNZ DB policy store → env overrides → YAML policy file → defaults. + - Development/Test precedence (high→low): env overrides → YAML policy file → defaults. + - Shared env var prefix `RG_*` (examples below) with legacy alias mapping for backward compatibility. + +## Non-Functional Requirements + +- Correctness under concurrency; atomicity across categories best-effort with rollback. +- Performance suitable for hot paths; constant-time checks and minimal allocations. +- Minimal lock contention; per-entity locks, monotonic time usage. +- Clean resource cleanup (idle entry GC) and Redis TTLs to prevent leaks. +- Backwards compatible rollout with shims and metrics parity. + +## Architecture & API + +- Module location: `tldw_Server_API/app/core/Resource_Governance/` + - `ResourceGovernor` (facade) — processes rules, composes category managers, handles composite reservations. +- Backends: + - `InMemoryBackend` — dicts + locks; token buckets, sliding windows, semaphores. + - `RedisBackend` — ZSET sliding windows, token buckets, and robust semaphore leases with TTL. + - Categories: + - `RequestsLimiter` (token bucket or sliding window per rule). + - `TokensLimiter` (token bucket with refund support). + - `ConcurrencyLimiter` (streams/jobs using counters with TTL + heartbeat). + - `MinutesLedger` (durable DB-backed; reuses audio minutes schema for v1 with abstract interface). + - Types: + - `EntityKey(scope: str, value: str)` + - `Category(str)`; `LimitSpec` (rate, window, burst, max_concurrent, daily_cap, etc.) + - `ReservationHandle(id, items, metadata, ttl, expires_at)` with implicit expiry tracking. + - `TimeSource` interface providing monotonic `now()`; default binds to `time.monotonic()`; tests can inject a fake time source. + +- Proposed Python signature (simplified): + +```python +@dataclass +class RGRequest: + entity: EntityKey + # Units: requests → 1 per HTTP call; tokens → model tokens (preferred) or estimated generic tokens. + categories: Dict[str, Dict[str, int]] # e.g., {"requests": {"units": 1}, "tokens": {"units": 1200}} + tags: Dict[str, str] = field(default_factory=dict) # endpoint, service, policy_id, etc. + +@dataclass +class RGDecision: + allowed: bool + retry_after: int | None + # details contains: { + # "policy_id": str, + # "categories": { + # "requests": {"allowed": bool, "limit": int, "used": int, "remaining": int, "retry_after": int | None}, + # "tokens": {"allowed": bool, "limit": int, "used": int, "remaining": int, "retry_after": int | None}, + # ... + # } + # } + details: Dict[str, Any] + +class ResourceGovernor: + async def check(self, req: RGRequest) -> RGDecision: ... + async def reserve(self, req: RGRequest, op_id: str | None = None) -> tuple[RGDecision, str]: ... # returns (decision, handle_id) + async def commit(self, handle_id: str, actuals: Dict[str, int] | None = None, op_id: str | None = None) -> None: ... + async def refund(self, handle_id: str, deltas: Dict[str, int] | None = None, op_id: str | None = None) -> None: ... + async def renew(self, handle_id: str, ttl_s: int) -> None: ... # concurrency lease heartbeat + async def release(self, handle_id: str) -> None: ... # explicit release for concurrency leases + async def peek(self, entity: EntityKey, categories: list[str]) -> Dict[str, Any]: ... + async def query(self, entity: EntityKey, category: str) -> Dict[str, Any]: ... # normalized diagnostics view + async def reset(self, entity: EntityKey, category: str | None = None) -> None: ... +``` + +- Atomicity strategy: + - For Redis: use Lua scripts or MULTI/EXEC to reserve multiple categories; on partial failure, rollback prior reservations. + - For memory: acquire category locks in stable order; on failure, release acquired reservations. + +- Redis concurrency lease design: + - Use a ZSET per entity/category (e.g., `rg:lease:::`) containing `member=lease_id`, `score=expiry_ts`. + - Acquire via Lua: purge expired (ZREMRANGEBYSCORE), check `ZCARD < limit`, `ZADD` new lease with expiry. Return `lease_id` as handle. + - Renew via `ZADD` with updated expiry for `lease_id`. Release via `ZREM` on `lease_id`. + - Periodic GC sweeps ensure eventual cleanup; avoid pure INCR/DECR to eliminate race hazards. + +- Refund semantics: + - Chat: reserve estimated tokens; on completion, commit actual tokens used and refund the difference. + - Failures: refund all prior reservations; log reason and emit refund metrics. + - Time-bounded reservations: auto-expire stale handles; periodic cleanup task. + - Safety: cap refunds by prior reservation per category to avoid negative usage; validate `actuals <= reserved` unless policy explicitly enables overage handling. + +- Handle lifecycle: + - `ReservationHandle` includes `expires_at` and `op_id`. Background sweeper reclaims expired handles across backends. + - All state transitions (reserve, commit, refund, renew, release, expire) include a `reason` for audit and metrics. + +- Policy composition semantics (strictest wins): + - For each category, compute remaining headroom per applicable scope (global, tenant, user, conversation, etc.). Effective headroom is the minimum across scopes (strictest constraint). + - Allow if the effective headroom ≥ requested units; otherwise deny for that category. + - Compute per-scope `retry_after`; the category’s `retry_after` is the maximum across denying scopes. Overall `retry_after` is the maximum across denied categories. + +## Configuration + +- New standardized env vars (legacy aliases maintained via mapping during migration): + - `RG_BACKEND`: `memory` | `redis` + - `RG_REDIS_URL`: Redis URL + - `REDIS_URL`: Redis URL (alias; used across infrastructure helpers) + - `RG_TEST_BYPASS`: `true|false` (defaults to honoring `TEST_MODE`) + - `RG_REDIS_FAIL_MODE`: `fail_closed` | `fail_open` | `fallback_memory` (defaults to `fallback_memory`). Controls behavior on Redis outages. + - Default `fallback_memory` favors availability for non-critical categories; consider `fail_closed` for strict write paths or global-coordination categories. + - `RG_CLIENT_IP_HEADER`: Header to trust for client IP when behind trusted proxies (e.g., `X-Forwarded-For`, `CF-Connecting-IP`). + - `RG_TRUSTED_PROXIES`: Comma-separated CIDRs for trusted reverse proxies; when unset, IP scope uses the direct remote address only. + - `RG_METRICS_ENTITY_LABEL`: `true|false` (default `false`). If true, include hashed entity label in metrics; otherwise exclude to avoid high cardinality. + - `RG_POLICY_STORE`: `file` | `db` (default `file`). In production, prefer `db` and use AuthNZ DB as SoT; in dev, `file` + env overrides. + - Test‑harness flags (diagnostics only): + - `RG_TEST_FORCE_STUB_RATE`: `true|false` forces in‑process sliding‑window logic for requests/tokens in Redis backend. Useful to make burst/steady tests deterministic when real Redis timing or clock skew affects retry_after near window boundaries. + - `RG_TEST_PURGE_LEASES_BEFORE_RESERVE`: `true|false` best‑effort purge of expired leases before reserve in tests to reduce flakiness. + +### Acceptance‑Window Fallback (Requests) + +Real Redis can occasionally report window counts near boundaries that admit a request even when a prior denial suggested a small retry_after. To keep behavior deterministic (especially in CI), the Redis backend maintains a per‑(policy, entity) “acceptance‑window” tracker for requests: + +- When the tracker observes that `limit` requests were accepted within the current window, further requests are denied until the window end (floor). This is an additive guard over ZSET counts, not a replacement. +- On denial, the guard sets a deny‑until floor to the end of the window to avoid early admits caused by rounding/drift. +- In test contexts, you can prefer the acceptance‑window path by setting `RG_TEST_FORCE_STUB_RATE=1`. + +### Policy Composition & Retry‑After + +- Composition (strictest wins): for each category, compute headroom per applicable scope (global, tenant, user, conversation); the effective headroom is the minimum across scopes. +- Deny when effective headroom < requested units. +- Retry‑After aggregation: per category, compute the maximum retry_after across denying scopes; the overall decision retry_after is the maximum across denied categories. This prevents premature retries when multiple scopes deny with different windows. + +### Metrics Labels & Cardinality + +- Counters/gauges: + - `rg_decisions_total{category,scope,backend,result,policy_id}` + - `rg_denials_total{category,scope,reason,policy_id}` + - `rg_refunds_total{category,scope,reason,policy_id}` + - `rg_concurrency_active{category,scope,policy_id}` +- Entity labels are excluded by default to avoid high cardinality; enable only for targeted debugging with `RG_METRICS_ENTITY_LABEL=true` and prefer sampled logs for per‑entity traces. + - `RG_POLICY_DB_CACHE_TTL_SEC`: TTL for DB policy cache (default 10s) when `RG_POLICY_STORE=db`. + +### Middleware Options (opt-in) + +- `RG_ENABLE_SIMPLE_MIDDLEWARE`: enable minimal pre-check middleware (requests category) using `route_map` resolution. +- `RG_MIDDLEWARE_ENFORCE_TOKENS`: when true, include `tokens` in middleware reserve/deny path and expose precise success headers + per-minute deny headers. +- `RG_MIDDLEWARE_ENFORCE_STREAMS`: when true, include `streams` in middleware reserve/deny path; on deny, return 429 with `Retry-After`. + +### Testing (integration) + +- `RG_REAL_REDIS_URL`: optional real Redis URL used by integration tests to validate multi-key Lua path; if absent or unreachable, those tests are skipped. `REDIS_URL` is also honored. + - Category defaults (fallbacks applied per module if unspecified): + - `RG_REQUESTS_RPM_DEFAULT`, `RG_REQUESTS_BURST` + - `RG_TOKENS_PER_MIN_DEFAULT`, `RG_TOKENS_BURST` + - `RG_STREAMS_MAX_CONCURRENT_DEFAULT`, `RG_STREAMS_TTL_SEC` + - `RG_JOBS_MAX_CONCURRENT_DEFAULT` + - `RG_MINUTES_DAILY_CAP_DEFAULT` (still enforced via durable ledger) + +- Back-compat mapping examples: + - `MCP_RATE_LIMIT_*` → RequestsLimiter rules for service `mcp`. + - Chat `TEST_CHAT_*` → test-mode overrides for chat-specific rules. + - Audio quotas envs (`AUDIO_*`) remain for `minutes` and concurrency defaults. + +- Test mode semantics: + - Prefer a single project-wide flag `TLDW_TEST_MODE=true`. + - `RG_TEST_BYPASS` overrides only the governor’s behavior; precedence: `RG_TEST_BYPASS` if set, else `TLDW_TEST_MODE`. + - In test mode, defaults: no burst (`burst=1.0`), deterministic timing, and optional fixed limits via `RG_TEST_*` envs. + +## Ingress Scoping & IP Derivation + +- Derive the effective entity for ingress using auth scopes when available (`user`, `api_key`, `client`). +- For `ip` scope behind proxies, require explicit configuration: + - Only trust `RG_CLIENT_IP_HEADER` when the immediate peer IP is within `RG_TRUSTED_PROXIES`. + - Otherwise, use the direct remote address. + - If both auth and IP are available, prefer auth scopes for rate limits; use IP as fallback. + +## Policy DSL & Route Mapping + +- Central policy file in YAML (hot-reloadable) declares limits per category and scope with identifiers: + +```yaml +policies: + chat.default: + requests: { rpm: 120, burst: 2.0 } + tokens: { per_min: 60000, burst: 1.5 } + scopes: [global, user, conversation] + fail_mode: fail_closed + mcp.ingestion: + requests: { rpm: 60, burst: 1.0 } + scopes: [global, client] + fail_mode: fallback_memory +``` + +- Routes attach `policy_id` via FastAPI route tags or decorators. An ASGI middleware reads the tag and consults the governor. SlowAPI decorators remain as config carriers only. +- Policy reload: file watcher or periodic TTL check; swap policies atomically. Invalid updates are rejected with clear logs. +- Per-category overrides: policy `fail_mode` may override `RG_REDIS_FAIL_MODE` for that policy/category. +- Stub location: `tldw_Server_API/Config_Files/resource_governor_policies.yaml` provides default examples and hot-reload settings. +- Source of Truth in production: policies stored in AuthNZ DB (e.g., `rg_policies`) with JSON payloads and `updated_at` timestamps. + - Cache layer with TTL and/or change feed; hot-reload applies atomically across workers. + - Env vars remain as development overrides; DB wins in production when present. + +### Admin API (Minimal) + +- Read-only snapshot: + - `GET /api/v1/resource-governor/policy` → metadata (version, store, count); `?include=ids|full` for IDs or full payloads. +- Admin (requires `admin` role; single-user treated as admin): + - `GET /api/v1/resource-governor/policies` → list `{id, version, updated_at}` + - `GET /api/v1/resource-governor/policy/{policy_id}` → `{id, version, updated_at, payload}` + - `PUT /api/v1/resource-governor/policy/{policy_id}` → upsert JSON payload; optional explicit `version` (auto-increments if omitted) + - `DELETE /api/v1/resource-governor/policy/{policy_id}` → delete policy +- Behavior: + - When `RG_POLICY_STORE=db`, successful writes trigger best-effort PolicyLoader refresh; file store remains read-only. + - All responses include `{status: ok|error}` and details on errors; avoid logging PII. + +## Integration Plan (Phased Migration) + +Phase 0 — Ship ResourceGovernor (no integrations yet) +- Implement `ResourceGovernor` module with memory + Redis backends and category primitives. +- Add metrics emission via existing registry (labels: category, scope, backend, result, policy_id). +- Provide test-mode handling in one place. + +Phase 1 — MCP +- Replace `tldw_Server_API/app/core/MCP_unified/auth/rate_limiter.py` internals with a thin façade over ResourceGovernor categories `requests` with tags `category=ingestion|read`. +- Preserve public API (`get_rate_limiter`, `RateLimitExceeded`) to avoid breaking imports. + +Phase 2 — Chat +- Replace `ConversationRateLimiter` with `requests` + `tokens` categories. +- Keep per-conversation policy by composing the entity key `conversation:{id}` in addition to `user:{id}`. +- Maintain `initialize_rate_limiter` signature; under the hood, use ResourceGovernor. + +Phase 3 — SlowAPI façade +- Configure `API_Deps/rate_limiting.py` to use `limiter.key_func` for ingress scoping (`ip`/`user`) and delegate allow/deny to ResourceGovernor `requests` category before handlers. +- Keep decorator usage (`@limiter.limit(...)`) as a config carrier only. Map decorator strings to RG policies using route tags (e.g., `tags={"policy_id": "chat.default"}`) and an ASGI middleware that consults the governor. No in-SlowAPI counters. +- Policy resolution reads from the YAML policy file (see Policy DSL & Route Mapping) with hot-reload support. + +Phase 4 — Embeddings +- Replace `UserRateLimiter` with ResourceGovernor `requests` limits; for large-cost ops, optionally also a `tokens` category if desired. +- Remove ad-hoc env parsing; map legacy envs to `RG_*`. + +Phase 5 — Audio quotas +- Keep durable minutes ledger DB exactly as-is but implement limits via `minutes` category interface. +- Replace in-process concurrent `streams`/`jobs` counters with `ConcurrencyLimiter` (with Redis TTL heartbeat support). + +Phase 6 — Evaluations, AuthNZ, Character Chat, Web Scraping, Embeddings Server +- Gradually replace each with governor-backed categories; preserve public APIs during deprecation window. + +Phase 7 — Cleanup & removal +- Delete/retire old limiter implementations once their consumers are migrated. +- Keep minimal façade shims that import ResourceGovernor and raise deprecation warnings. + +## Deletions / Consolidation Targets + +- Replace and then delete (or shim): + - `tldw_Server_API/app/core/Chat/rate_limiter.py` + - `tldw_Server_API/app/core/MCP_unified/auth/rate_limiter.py` + - `tldw_Server_API/app/core/Embeddings/rate_limiter.py` + - `tldw_Server_API/app/api/v1/API_Deps/rate_limiting.py` (convert to façade) + - `tldw_Server_API/app/core/Usage/audio_quota.py` (concurrency + check plumbing via governor; keep minutes DB ledger implementation) + - Plus: `AuthNZ` limiter, `Evaluations` limiter, `Character_Chat` limiter, `Web_Scraping` limiters, and Embeddings server decorator limiter + +- Remove custom per-file env parsing once policy merges into shared config. + +## Metrics & Observability + +- Counters: + - `rg_decisions_total{category,scope,backend,result,policy_id}` (entity excluded by default; optionally include hashed entity when `RG_METRICS_ENTITY_LABEL=true`). + - `rg_refunds_total{category,scope,reason,policy_id}` + - `rg_denials_total{category,scope,reason,policy_id}` + - `rg_shadow_decision_mismatch_total{route,policy_id,legacy,rg}` (shadow-mode only; counts divergences between legacy limiter and RG decisions) +- Gauges: + - `rg_concurrency_active{category,scope,policy_id}` (for streams/jobs) +- Histograms: + - `rg_wait_seconds{category,scope,policy_id}` when wait/retry paths are used +- Logs: + - Structured with category, decision, retry_after, reason, policy_id; include `handle_id` and `op_id` where applicable. + - Never log raw `api_key`; mask or include only an HMAC/hashed form for diagnostics. Do not emit PII in logs. + +### HTTP Headers + +- For HTTP endpoints governed by the `requests` category, emit standard headers for compatibility during migration: + - `Retry-After: ` on 429 responses based on the overall decision’s `retry_after`. + - `X-RateLimit-Limit: ` reflects the strictest applicable limit for the `requests` category. + - `X-RateLimit-Remaining: ` reflects the remaining headroom under that strictest scope after the decision. + - `X-RateLimit-Reset: ` or `` until reset, aligned to the governing window. +- For concurrency denials (e.g., `streams`), return `429` with `Retry-After` set from the category decision; do not emit misleading `X-RateLimit-*` unless the route is also governed by `requests`. +- Maintain SlowAPI-compatible behavior on migrated routes to avoid client regressions. + +- Tokens and per-minute headers (when applicable): + - When a `tokens` policy is active for a route and the middleware/enforcement layer peeks token usage, include: + - `X-RateLimit-Tokens-Remaining: ` + - If policy defines `tokens.per_min`, also include `X-RateLimit-PerMinute-Limit: ` and `X-RateLimit-PerMinute-Remaining: `. + - Success-path headers use a precise governor `peek` (strictest scope) to populate Remaining/Reset. Reset is computed as the maximum across governed categories to avoid premature retries. + +### Diagnostics + +- Capability probe (admin-only): `GET /api/v1/resource-governor/diag/capabilities` + - Returns a compact diagnostic payload indicating backend and code paths in use: + - `backend`: `memory` or `redis` + - `real_redis`: boolean indicating whether a real Redis client is connected (vs. an in-memory stub) + - `tokens_lua_loaded`, `multi_lua_loaded`: booleans for loaded scripts (Redis backend) + - `last_used_tokens_lua`, `last_used_multi_lua`: booleans indicating whether those code paths were exercised recently + - Use this endpoint to verify Lua/script capabilities and troubleshoot fallbacks in production. + +## Security & Privacy + +- Redaction: + - Treat API keys, user identifiers, and IPs as sensitive; never log raw values. Use hashed/HMAC forms with a server-secret salt for correlation when necessary. + - Metrics must not include high-cardinality PII. Do not emit raw entity values; optional hashed entity is gated behind `RG_METRICS_ENTITY_LABEL=true`. +- Tenant scope: + - Include `tenant:{id}` as a first-class scope from the outset, even if initial policies are no-op. This avoids retrofit costs and enables future isolation. The tenant id may be derived from a trusted header or JWT claim. +- Data minimization: + - Expose only aggregated counters/gauges/histograms. Keep detailed per-entity diagnostics in sampled logs with redaction. + +## Minutes Ledger Semantics + +- Daily accounting is based on UTC. When a usage period overlaps midnight UTC, split minutes across the two UTC days on `commit`. +- Retroactive commits are disallowed by default; optionally allow with an explicit `occurred_at` timestamp and policy gates. If allowed, minutes accrue to the UTC day of `occurred_at`. +- Rounding: track internal usage at sub-minute resolution; charge per policy rounding rules (e.g., ceil to nearest minute on commit) consistently. + +### Generic Daily Ledger (v1.1) + +- Plan: Introduce a generic `DailyLedger` abstraction to extend beyond `minutes` (e.g., `tokens_per_day`). +- Interface (concept): `add(entity, category, units, occurred_at_utc)`, `remaining(entity, category, day)`, `peek(entity, category)`, `reset(...)`. +- Storage: reuse existing DB with a generalized schema (`day_utc`, `entity`, `category`, `units`), plus indexes; migrate audio minutes to this ledger. +- Semantics: UTC-based partitioning; consistent rounding per policy; idempotent commits via `op_id`. +- Rollout: shadow existing minutes ledger first; then cut over with migration script. Target version: v1.1. + +## Database Schemas + +### Policy Store (AuthNZ DB) + +- PostgreSQL + +```sql +CREATE TABLE IF NOT EXISTS rg_policies ( + id TEXT PRIMARY KEY, -- policy_id, e.g., 'chat.default' + payload JSONB NOT NULL, -- full policy object + version INTEGER NOT NULL DEFAULT 1, + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +-- Optional index for updated_at for fast latest reads +CREATE INDEX IF NOT EXISTS idx_rg_policies_updated_at ON rg_policies (updated_at DESC); +``` + +- SQLite + +```sql +CREATE TABLE IF NOT EXISTS rg_policies ( + id TEXT PRIMARY KEY, + payload TEXT NOT NULL, -- JSON-encoded + version INTEGER NOT NULL DEFAULT 1, + updated_at TEXT NOT NULL -- ISO8601 UTC +); + +CREATE INDEX IF NOT EXISTS idx_rg_policies_updated_at ON rg_policies (updated_at); +``` + +Notes: +- The server constructs a merged snapshot from all rows keyed by `id` with the latest `updated_at`. +- In production, the AuthNZ subsystem owns read/write APIs for this table. + +### Generic Daily Ledger (v1.1) + +- PostgreSQL + +```sql +CREATE TABLE IF NOT EXISTS resource_daily_ledger ( + id BIGSERIAL PRIMARY KEY, + day_utc DATE NOT NULL, + entity_scope TEXT NOT NULL, -- e.g., 'user', 'client', 'tenant' + entity_value TEXT NOT NULL, -- identifier for the scope (PII handling at app layer) + category TEXT NOT NULL, -- e.g., 'minutes', 'tokens_per_day' + units BIGINT NOT NULL CHECK (units >= 0), + op_id TEXT NOT NULL, -- idempotency key + occurred_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE UNIQUE INDEX IF NOT EXISTS uq_ledger_op ON resource_daily_ledger (day_utc, entity_scope, entity_value, category, op_id); +CREATE INDEX IF NOT EXISTS idx_ledger_lookup ON resource_daily_ledger (entity_scope, entity_value, category, day_utc); +``` + +- SQLite + +```sql +CREATE TABLE IF NOT EXISTS resource_daily_ledger ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + day_utc TEXT NOT NULL, -- 'YYYY-MM-DD' + entity_scope TEXT NOT NULL, + entity_value TEXT NOT NULL, + category TEXT NOT NULL, + units INTEGER NOT NULL, + op_id TEXT NOT NULL, + occurred_at TEXT NOT NULL, -- ISO8601 UTC + created_at TEXT NOT NULL -- ISO8601 UTC +); + +CREATE UNIQUE INDEX IF NOT EXISTS uq_ledger_op ON resource_daily_ledger (day_utc, entity_scope, entity_value, category, op_id); +CREATE INDEX IF NOT EXISTS idx_ledger_lookup ON resource_daily_ledger (entity_scope, entity_value, category, day_utc); +``` + +Notes: +- App layer enforces `units >= 0` and splits usage across UTC day boundaries at commit time. +- Over-aggregation (e.g., totals table) can be added later if needed for performance. + +### Cross-Category Budgets (Modeling) + +- Future concept: define a `cost_unit` conversion map in policy (e.g., 1 token = 0.001 CU, 1 request = 1 CU) to track budget consumption uniformly across categories without changing enforcement semantics. +- Implement later (post v1.1) to avoid scope creep; used for analytics and optional budget caps. + +## Test Strategy + +- Unit tests (memory backend): + - Token bucket and sliding window correctness for `requests` and `tokens`. + - Concurrency limiter (acquire/release/heartbeat/TTL expiry). + - Minutes ledger adapter (mock DB) correctness across day boundaries (UTC). + - Composite reservation rollback and idempotent refunding. + - Test-mode bypass and deterministic burst behavior. + - Mockable `TimeSource` injection to drive time-dependent behavior deterministically. + +- Unit tests (Redis backend): + - Lua script operations for sliding window and token bucket; atomic composite reservations. + - Redis TTL behavior and cleanup. + +- Integration tests: + - Replace MCP limiter via façade; verify 429 and retry headers remain correct. + - Chat path: estimated token reservation and refund with actual usage from provider responses. + - Audio streaming: enforce `streams` concurrency and daily `minutes` cap, including heartbeat. + - SlowAPI façade routes: verify ingress keys map to governor and rate limits apply consistently. + - Failover modes: verify `fail_closed`, `fail_open`, and `fallback_memory` behaviors under Redis outage simulation. + +- Chaos tests: + - Induce Redis outages and network partitions; assert behavior per `RG_REDIS_FAIL_MODE`. Validate metrics emit `backend=fallback` and decisions match expectations. + - Simulate wall-clock drift vs monotonic time; ensure window math uses monotonic source and remains stable. + +- Property-based tests: + - Verify token-bucket vs sliding-window equivalence under selected parameter sets (e.g., large windows, steady inter-arrival, low burst). Use Hypothesis to generate arrival patterns; assert admitted counts converge within tolerance. + +- Concurrency stress tests: + - High-contention acquire/release with lease TTL expiry, overlapping `renew` and `release`. Validate no leaks, no double-release, and correct ZSET membership behavior under churn. + +- Shadow-mode validation: + - Run legacy limiter and RG in parallel; emit delta metric when decisions differ; fail test on sustained mismatches. Cover requests/tokens and concurrency categories. + +- Coverage targets: ≥ 80% for the new module with both backends; keep existing suites green. + +## Rollout & Compatibility + +- Feature flags: `RG_ENABLED=true|false` (default true in dev; off-by-default can be considered for safety in production). +- Legacy env compatibility layer logs a warning once per process on use. +- Shadow mode (optional): evaluate decisions with RG and existing limiter in parallel, emit delta metrics, and compare before cutover. + +### Per-Module Feature Flags + +- In addition to the global toggle, each integration can be enabled/disabled independently during migration: + - `RG_ENABLE_MCP` + - `RG_ENABLE_CHAT` + - `RG_ENABLE_SLOWAPI` + - `RG_ENABLE_AUDIO` + - `RG_ENABLE_EMBEDDINGS` + - `RG_ENABLE_EVALUATIONS` + - `RG_ENABLE_AUTHNZ` + - `RG_ENABLE_CHARACTER_CHAT` + - `RG_ENABLE_WEB_SCRAPING` + - `RG_ENABLE_EMBEDDINGS_SERVER` +- Convention: any unset module flag inherits from `RG_ENABLED`. + +### Compat Map (Legacy → RG) + +- General rules: + - When both legacy and RG envs are set, RG envs take precedence. + - On process start, detect legacy envs in use and log a once-per-process deprecation warning with the mapped `RG_*` equivalent and a removal target version. + - Where applicable, legacy decorator parameters (e.g., SlowAPI) are ignored once RG integration is enabled; their presence is logged as informational with the resolved `policy_id`. + +- MCP (examples): + - `MCP_RATE_LIMIT_RPM` → policy `mcp.ingestion.requests.rpm` + - `MCP_RATE_LIMIT_BURST` → policy `mcp.ingestion.requests.burst` + - `MCP_REDIS_URL` → `RG_REDIS_URL` (alias) + - `MCP_RATE_LIMIT_TEST_BYPASS` → `RG_TEST_BYPASS` + +- Chat (examples): + - `CHAT_GLOBAL_RPM` → policy `chat.default.requests.rpm` (scope `global`) + - `CHAT_PER_USER_RPM` → policy `chat.default.requests.rpm` (scope `user`) + - `CHAT_PER_CONVERSATION_RPM` → policy `chat.default.requests.rpm` (scope `conversation`) + - `CHAT_PER_USER_TOKENS_PER_MINUTE` → policy `chat.default.tokens.per_min` + - `TEST_CHAT_*` → `RG_TEST_*` or policy test overrides + +- SlowAPI (examples): + - `SLOWAPI_GLOBAL_RPM` → policy `ingress.default.requests.rpm` + - `SLOWAPI_GLOBAL_BURST` → policy `ingress.default.requests.burst` + - Decorator strings remain as config carriers; actual enforcement is via RG when `RG_ENABLE_SLOWAPI=true`. + +- Audio (examples): + - `AUDIO_DAILY_MINUTES_CAP` → policy `audio.default.minutes.daily_cap` + - `AUDIO_MAX_CONCURRENT_STREAMS` → policy `audio.default.streams.max_concurrent` + - `AUDIO_STREAM_TTL_SEC` → `RG_STREAMS_TTL_SEC` + +- Embeddings (examples): + - `EMBEDDINGS_RPM` → policy `embeddings.default.requests.rpm` + - `EMBEDDINGS_BURST` → policy `embeddings.default.requests.burst` + +- Evaluations/AuthNZ/Character Chat/Web Scraping (examples): + - `EVALS_RPM` → policy `evals.default.requests.rpm` + - `AUTHNZ_RPM` → policy `authnz.default.requests.rpm` + - `CHARACTER_CHAT_RPM` → policy `character_chat.default.requests.rpm` + - `WEB_SCRAPING_RPM` → policy `web_scraping.default.requests.rpm` + +### SlowAPI ASGI Middleware + +- Provide an ASGI middleware adapter (e.g., `RGSlowAPIMiddleware`) that: + - Extracts `policy_id` from route tags/decorators. + - Derives the effective entity (auth scopes preferred; IP fallback with trusted-proxy rules). + - Calls RG `check/reserve` before handler; on deny, returns 429 with headers; on allow, sets `X-RateLimit-*` headers and proceeds. + - On completion, performs `commit/refund` as applicable; handles streaming by renewing/releasing leases. + - When `RG_ENABLE_SLOWAPI=false`, middleware is disabled and legacy SlowAPI behavior remains. + +## Risks & Mitigations + +- Partial failures across categories → perform deterministic order, rollback on failure, log anomalies. +- Redis outages → auto-fallback to in-memory with warning; emit `backend=fallback` metric tag. +- Behavior drift from legacy implementations → shadow mode comparisons and golden tests. +- Test flakiness with time windows → use monotonic time and deterministic burst in `TLDW_TEST_MODE`. +- Metrics cardinality → exclude `entity` from metric labels by default; optionally include hashed entity via `RG_METRICS_ENTITY_LABEL`; sample per-entity logs for diagnostics. +- Concurrency lease management → provide explicit `renew` and `release`; use per-lease IDs and TTLs; GC expired leases. +- IP scoping behind proxies → require `RG_TRUSTED_PROXIES` and `RG_CLIENT_IP_HEADER` to trust forwarded addresses; prefer auth scopes over IP when available. +- Policy composition ambiguity → define strictest-wins semantics (min headroom across applicable scopes) per category; compute `retry_after` as max across denying scopes and categories. +- Fallback-to-memory over-admission → make behavior configurable via `RG_REDIS_FAIL_MODE` (default `fallback_memory`); emit metrics on failover; consider per-category overrides. +- Idempotency on retries → require `op_id` for reserve/commit/refund; operations are idempotent per `op_id` and handle. +- Minutes ledger edge cases → split usage across UTC day boundaries; define rounding rules; restrict retroactive commits or require `occurred_at`. +- Env flag drift → standardize on `TLDW_TEST_MODE`; `RG_TEST_BYPASS` only overrides governor behavior with documented precedence. + +## Open Questions + +- Minutes generalization: planned for v1.1 via a generic DailyLedger (see Minutes Ledger Semantics). For v1, reuse audio minutes ledger only. +- Cross-category budgets: do we want a global “cost units” budget that maps tokens/requests into a unified spend? +- Tier/source of truth: adopt AuthNZ DB as the policy SoT in production with cache + hot-reload; keep env+YAML as dev overrides. +- Multi-tenant isolation: do we introduce `tenant:{id}` as a first-class scope now? + + +## Acceptance Criteria + +- New `ResourceGovernor` module with memory + Redis backends and the specified API. +- MCP, Chat, and SlowAPI ingress paths migrated to the unified governor with no regression in public API or tests. +- Audio streams concurrency and minutes cap enforced via the governor, with durable minutes persisted as before. +- Embeddings limiter replaced; Evaluations/AuthNZ/Character Chat/Web Scraping scheduled for follow-on. +- Consistent test-mode bypass and refund semantics demonstrated in tests. +- Metrics emitted with the standardized label set; basic dashboards updated. +- Compat map documented and implemented with deprecation warnings for legacy envs. +- Per-module feature flags available and honored during phased rollout. +- Roadmap captured: v1.1 generic DailyLedger plan documented; cross-category budget model noted for future. + +## Appendix — Mapping table (initial examples) + +- Chat + - Before: `ConversationRateLimiter` with `global_rpm`, `per_user_rpm`, `per_conversation_rpm`, `per_user_tokens_per_minute`. + - After: `requests` for global/user/conversation via policy rules; `tokens` per user with burst; refund on completion. + +- MCP + - Before: in-memory/Redis with `ingestion` and `read` categories. + - After: `requests` with tag `category=ingestion|read`; same RPMs, Redis kept via backend. + +- Audio + - Before: DB-backed daily minutes + in-process/Redis counters for streams/jobs. + - After: `minutes` via durable ledger adapter; `streams`/`jobs` via `ConcurrencyLimiter` with TTL heartbeat. + +- SlowAPI + - Before: global limiter with key_func sentinel for TEST_MODE. + - After: façade that derives entity key and delegates to `requests` governor, retaining decorators for route config. + +- Embeddings + - Before: sliding window per user. + - After: `requests` for per-user RPM with burst support via governor rules. + +- Evaluations/AuthNZ/Character Chat/Web Scraping + - Before: bespoke. + - After: move to governor with appropriate categories; keep per-feature knobs as policy inputs. + +## Implementation Plan (v1 Roadmap) + +Stage 0 — Spec Alignment & Stubs +- Goal: Lock semantics and prepare scaffolding for incremental delivery. +- Deliverables: + - Clarify policy composition (strictest-wins per category; retry_after = max across denying scopes/categories) and default algorithms (token bucket first, sliding window where appropriate). + - Guard metrics cardinality: exclude `entity` by default; gate hashed entity behind `RG_METRICS_ENTITY_LABEL=true`. + - Add stub policy YAML at `tldw_Server_API/Config_Files/resource_governor_policies.yaml` with examples from “Policy DSL & Route Mapping”. + - Finalize envs: `RG_POLICY_STORE`, `RG_REDIS_FAIL_MODE`, `RG_METRICS_ENTITY_LABEL`, `RG_CLIENT_IP_HEADER`, `RG_TRUSTED_PROXIES`. +- Success Criteria: + - YAML stub loads; envs documented; PRD clarifications merged. +- Tests: + - YAML schema/load test (file store) and basic validation of policy fields. + +Stage 1 — Core ResourceGovernor Library +- Goal: Implement core API and in-memory backend with deterministic tests. +- Deliverables: + - `ResourceGovernor` with `check/reserve/commit/refund/renew/release/peek/query/reset` and idempotency via `op_id`. + - Memory backend implementations: token bucket + sliding window for `requests/tokens`; semaphore for `streams/jobs` with lease TTL; thin adapter for existing minutes ledger. + - Handle lifecycle with `expires_at`, background sweeper, refund safety (cap by prior reservation). + - `TimeSource` (monotonic) injectable for tests. + - Metrics: `rg_decisions_total{category,scope,backend,result,policy_id}`, `rg_denials_total{...}`, `rg_refunds_total{...}`, gauges for `rg_concurrency_active{...}`. +- Success Criteria: + - ≥80% coverage for core module; stable unit tests; deterministic behavior in `TLDW_TEST_MODE`. +- Tests: + - Unit tests for token bucket/sliding window, composite reservations, idempotent commit/refund, concurrency leases (memory), and handle expiry using mock time. + +Stage 2 — Redis Backend & Concurrency Leases +- Goal: Ship Redis path with safe lease management and fail modes. +- Deliverables: + - Lua/MULTI-EXEC operations for windows and atomic multi-category reservations. + - ZSET-based leases per entity/category with acquire/renew/release + GC; TTL heartbeat. + - `RG_REDIS_FAIL_MODE=fail_closed|fail_open|fallback_memory` honored; per-policy overrides respected. +- Success Criteria: + - Concurrency stress tests show no leaks/double-release; failover behavior observable via `backend=fallback` metrics. +- Tests: + - Redis unit/integration tests for leases/TTL/renew; chaos tests simulating Redis outage and clock skew; property tests for windows under selected parameters. + +Stage 3 — Policy Layer (Store/Loader) & Health +- Goal: Centralize policies and expose observability. +- Deliverables: + - `PolicyLoader` with `file` and `db` stores; cache TTL (`RG_POLICY_DB_CACHE_TTL_SEC`); hot-reload. + - Wire selection via `RG_POLICY_STORE` in settings/config; env overrides in dev. + - AuthNZ-backed `PolicyStore` (read-only) reading `rg_policies` (Postgres/SQLite variants) + sample seed helper. + - Health endpoint: `GET /api/v1/resource-governor/health` → `{store, snapshot_version, policy_count, updated_at}`. +- Success Criteria: + - Health endpoint returns live snapshot data; DB store works with AuthNZ Postgres fixture. +- Tests: + - SQLite unit test for `AuthNZPolicyStore` and seed helper. + - Postgres-based test using existing Postgres fixtures (if available) for both `PolicyStore` and `DailyLedger` plumbing readiness. + - Integration test verifying `/health` reports policy snapshot metadata. + +Stage 4 — Ingress Middleware & Header Compatibility +- Goal: Replace ingress counting with a thin governor façade. +- Deliverables: + - ASGI middleware (SlowAPI façade) reading route tags/decorators to resolve `policy_id` and derive entity (auth scopes preferred; IP fallback with trusted-proxy rules). + - Enforce via `check/reserve` pre-handler; `commit/refund` post-handler; support streaming renew/release. + - Standard headers mapping: `Retry-After`, `X-RateLimit-*` for `requests` where applicable. + - Logging: mask/HMAC sensitive fields; include `handle_id`, `op_id`, `policy_id`, `denial_reason`. +- Success Criteria: + - No double-counting; header compatibility verified; décor strings map to policies via tags. +- Tests: + - Integration tests covering allowed/denied paths, header values, proxy scoping with `RG_TRUSTED_PROXIES` and `RG_CLIENT_IP_HEADER`. + +Stage 5 — Module Integrations (MCP, Chat, Embeddings, Audio) +- Goal: Migrate high-impact modules with feature flags and parity tests. +- Deliverables: + - MCP: replace limiter with RG `requests` and tags `category=ingestion|read`. + - Chat: combine `requests` + `tokens`; idempotent reserve→commit(actuals)→refund(delta) flow. + - Embeddings: unify to RG `requests`; property tests for window equivalence under steady load. + - Audio: `streams` semaphore with TTL heartbeat; continue durable `minutes` via existing ledger; add minimal `DailyLedger` DAL wrapper with `remaining(daily_cap)` and `peek_range` (SQLite + Postgres paths) to prep v1.1. + - Per-module flags (`RG_ENABLE_*`) inherit from `RG_ENABLED`. +- Success Criteria: + - MCP/Chat/Embeddings parity (HTTP behavior, headers); audio streams enforce concurrency; minutes charging unchanged. +- Tests: + - Module-specific integration tests; Postgres tests for `DailyLedger.peek_range` using `test_db_pool` fixture where available. + +Stage 6 — Admin API, Observability & Rollout +- Goal: Manage policies safely and cut over with guardrails. +- Deliverables: + - Admin policy endpoints (PUT/DELETE/GET) gated by admin auth; file store remains read-only. + - Postgres seeder for `rg_policies` and example seed data. + - Shadow-mode decision delta metric (legacy vs RG) and basic dashboards for `rg_*` metrics. + - Compat map + deprecation warnings; per-module rollout plan (enable MCP/Chat first, then Embeddings/SlowAPI, then Audio). +- Success Criteria: + - Admin endpoints tested; dashboards populated; shadow-mode shows near-zero drift pre-cutover; staged flags allow safe rollback. +- Tests: + - Admin API integration test for `/api/v1/resource-governor/policy` endpoints. + - Shadow-mode drift alert test (delta metric non-zero on injected mismatch). + +Post v1.0 (Planned v1.1) +- Generic `DailyLedger` for tokens-per-day and future categories; migration of audio minutes to generic ledger. +- Cross-category “cost unit” modeling for analytics and optional budgets (no enforcement changes). +- Additional providers/integrations as needed. diff --git a/Docs/Design/Search.md b/Docs/Design/Search.md index 90b43366b..a50490e2b 100644 --- a/Docs/Design/Search.md +++ b/Docs/Design/Search.md @@ -9,7 +9,7 @@ https://arxiv.org/abs/2501.05366 https://github.com/ItsArnavSh/gitfindr https://exa.ai/ https://huggingface.co/Menlo/Lucy-gguf - +https://github.com/glacier-creative-git/knowledge-graph-traversal-semantic-rag-research https://ii.inc/web/blog/post/ii-search https://ii.inc/web/blog/post/ii-researcher https://github.com/Intelligent-Internet/ii-researcher diff --git a/Docs/Design/Stream_Abstraction_PRD.md b/Docs/Design/Stream_Abstraction_PRD.md new file mode 100644 index 000000000..3851c8d7c --- /dev/null +++ b/Docs/Design/Stream_Abstraction_PRD.md @@ -0,0 +1,655 @@ +# Stream Abstraction — PRD + +- Status: Pilot Rollout (under STREAMS_UNIFIED) +- Last Updated: 2025-11-04 +- Authors: Codex (coding agent) +- Stakeholders: API (Chat/Embeddings), Audio, MCP, WebUI, Docs + +--- + +## 1. Overview + +### 1.1 Summary +Unify streaming across Server‑Sent Events (SSE) and WebSockets under a single abstraction so features share consistent framing, normalization, heartbeat, and completion semantics. Introduce an `AsyncStream` interface with transport‑specific implementations (`SSEStream`, `WebSocketStream`) that route all provider data through a single normalization path and standardized DONE/error frames (with canonical error codes). + +### 1.2 Motivation & Background +- Symptom: repeated, inconsistent SSE/WebSocket line formatting, normalization, and completion handling across endpoints and modules. +- Duplicates/examples today: + - Endpoint‑local SSE line builder: `tldw_Server_API/app/api/v1/endpoints/character_chat_sessions.py:1..120` (`_extract_sse_data_lines`). + - Central SSE helpers already exist: `tldw_Server_API/app/core/LLM_Calls/sse.py`. + - Provider line normalization scattered: `tldw_Server_API/app/core/LLM_Calls/streaming.py`. + - SSE emitters in embeddings orchestrator: `tldw_Server_API/app/api/v1/endpoints/embeddings_v5_production_enhanced.py:3500+`. + - WebSockets in Audio and MCP with similar framing/heartbeat/error behavior: + - `tldw_Server_API/app/core/Ingestion_Media_Processing/Audio/Audio_Streaming_Unified.py`. + - `tldw_Server_API/app/core/MCP_unified/server.py`. + +Unifying principle: All outputs are streams — just different transports. + +### 1.3 Goals +1. Single, composable interface for streaming outputs across transports. +2. One normalization path for provider outputs (OpenAI‑compatible SSE chunks; consistent WS frames). +3. Standard DONE and error semantics (code + message); no duplicate `[DONE]` emission. +4. Consistent heartbeat/keepalive policy for SSE and WS. +5. Reduce code duplication and simplify endpoint logic. +6. Provide clear backpressure behavior for SSE (bounded queue) and consistent WS close code mapping. + +### 1.4 Non‑Goals +- Changing wire payload shapes for domain data (e.g., audio partials, MCP JSON‑RPC responses). The abstraction standardizes framing/lifecycle, not domain schemas. +- Introducing a new message bus or queueing layer. + +### 1.5 Current Status + +- Abstractions implemented with metrics: SSEStream and WebSocketStream (complete). +- Provider control pass‑through + SSE idle/max enforcement (complete). +- Chat SSE pilots behind STREAMS_UNIFIED (complete): + - Character chat SSE, main chat completions SSE, and document‑generation SSE paths unified; duplicate [DONE] suppressed; metrics flowing. +- Embeddings orchestrator SSE behind flag (complete): + - Preserves `event: summary`; emits heartbeats and standardized non‑fatal error frames when configured. +- Evaluations SSE (abtest events) unified (complete): + - Uses SSEStream with labels; standardized heartbeats; DONE semantics. +- Jobs Admin SSE (events outbox) unified (complete): + - Uses SSEStream; preserves `id:` and `event:` lines for clients using Last‑Event‑ID. +- Prompt Studio SSE fallback unified behind flag (new): + - Uses SSEStream when STREAMS_UNIFIED=1; retains legacy generator when flag is off. +- Audio WS lifecycle standardized with WebSocketStream (complete): + - Compat alias `error_type` present; close‑code mapping in place; metrics emitting. +- MCP WS lifecycle standardized with WebSocketStream (complete): + - JSON‑RPC payloads unchanged; ping/idle metrics emitting. + +Next operational step +- Flip STREAMS_UNIFIED=1 in non‑prod (dev/staging), validate WebUI with two providers, and monitor streaming dashboards. Maintain rollback by toggling the flag. + +--- + +## 2. User Stories + +| Story | Persona | Description | +| --- | --- | --- | +| US1 | API consumer (Chat) | “When I stream chat completions, I want consistent SSE framing and a single `[DONE]` sentinel across providers.” | +| US2 | WebUI engineer | “I want identical heartbeat and error semantics whether a feature uses SSE or WebSockets.” | +| US3 | Backend dev | “I want to implement streaming without re‑writing `[DONE]` and error handling for each endpoint.” | +| US4 | Maintainer | “I want to delete endpoint‑local SSE helpers and rely on a central abstraction with tests.” | + +--- + +## 3. Requirements + +### 3.1 Functional Requirements +1. Provide `AsyncStream` interface with at least: + - `send_event(event: str, data: Any | None = None)` — named event emission (maps to `event:` + `data:` for SSE; `{type: "event", event, data}` for WS where used). + - `send_json(payload: dict)` — structured data (maps to `data:` for SSE; JSON frame over WS). + - `done()` — emit end‑of‑stream (SSE: `data: [DONE]`; WS: `{type: "done"}`) and close if appropriate. + - `error(code: str, message: str, *, data: dict | None = None)` — emit structured error frame and close when transport requires. +2. Implement `SSEStream` for FastAPI `StreamingResponse` generators: + - Internals: async queue‑backed emitter; `iter_sse()` async generator yields lines to the response. + - Use `sse.ensure_sse_line`, `sse.sse_data`, `sse.sse_done`, `sse.normalize_provider_line`. + - Suppress provider `[DONE]`; ensure exactly one terminal `[DONE]` from our layer. + - Optional `heartbeat_interval_s` emitting `":"` comment lines (default); support `data` heartbeat mode. + - Provide `send_raw_sse_line(line: str)` as SSE‑specific helper for hot paths; not part of `AsyncStream`. + - Bounded queue (`queue_maxsize`) with documented backpressure policy. +3. Implement `WebSocketStream` over Starlette/FastAPI WS: + - Lifecycle frames: `{type: "error", code, message, data?}`; `{type: "done"}`; `{type: "ping"}`/`{type: "pong"}`. + - Optional pings via `{type: "ping"}` at `heartbeat_interval_s`; reply to `{type: "pong"}`. + - Map application error codes to WS close reasons consistently. + - Event frames `{type: "event", event, data}` are optional; domain payloads remain unchanged for Audio/MCP. +4. Centralize provider stream normalization: + - Reuse `app/core/LLM_Calls/streaming.py` for `requests` and `httpx` SSE iteration. + - Route all chat provider streams through this module before transport emission. +5. Backward compatible payloads: + - Chat/OpenAI SSE: preserve `choices[].delta.content` shapes. + - Embeddings orchestrator: keep `event: summary` structure; move emission to `SSEStream.send_event("summary", payload)`. + - Audio and MCP WS: keep domain JSON schemas; only standardize lifecycle (error/done/heartbeat). +6. Observability: + - Consistent log messages for start/stop/error with `stream_id`/`connection_id`. + - Metrics (labels: include `transport`, `kind` where applicable, and optional stream `labels` from constructors like `{`"component"`:"chat"}`): + - `sse_enqueue_to_yield_ms` (histogram, ms): time from call to enqueue to iterator yield/write. + - `ws_send_latency_ms` (histogram, ms): time to complete `send_json` writes; `kind` in {event,json,error,done,ping}. + - `sse_queue_high_watermark` (gauge): max queue depth observed. + - `ws_pings_total` (counter): ping frames sent. + - `ws_ping_failures_total` (counter): ping send errors. + - `ws_idle_timeouts_total` (counter): WS connections closed due to idle timeout. + - Drop counters are emitted only when drop‑oldest mode is enabled. + +### 3.2 Non‑Functional Requirements +- No measurable latency regression vs current code paths. +- Memory footprint stable under long‑lived streams. +- High availability under intermittent network conditions (graceful error frames). + +### 3.3 Canonical Error Codes +- `quota_exceeded` — request exceeds quotas or limits +- `idle_timeout` — idle timeout reached +- `transport_error` — network/stream transport failure +- `provider_error` — upstream LLM/provider signaled an error +- `validation_error` — bad client input +- `internal_error` — server-side error + +### 3.4 WebSocket Close Code Mapping +- 1000 — normal closure (e.g., `{type: "done"}`) +- 1001 — going away/idle timeout +- 1008 — policy violation (e.g., auth/rate-limit failures) +- 1011 — internal server error + +Usage guidance: +- `quota_exceeded`: send `{type:"error", code:"quota_exceeded", ...}` then close with 1008. +- `idle_timeout`: close with 1001 (a preceding error frame is optional and generally omitted for simplicity). +- `internal_error`: send `{type:"error", code:"internal_error", ...}` then close with 1011. +- `transport_error`: often cannot send an error reliably; close with 1011 if possible. + +--- + +## 4. UX & API Design + +### 4.1 Transport Semantics +- SSE + - Media type: `text/event-stream`. + - Heartbeat: `":"\n\n"` comments at configurable interval (default 10s). Configurable mode to send `data: {"heartbeat": true}` if needed. + - Termination: single `data: [DONE]\n\n`. + - Errors: `data: {"error": {"code", "message", "data"}}`. + - Closure policy: configurable (default `close_on_error=True`). Per-call override available on `SSEStream.error(..., close=bool)`. + - Example (non-fatal error that keeps the stream open): + ```python + await stream.error("transient_provider_issue", "upstream timeout; continuing", close=False) + await stream.send_json({"status": "retrying"}) + ``` +- WebSocket + - Heartbeat: `{type: "ping"}` at configurable interval (default 10s) with optional client `{type: "pong"}`. + - Termination: `{type: "done"}` followed by close (default 1000; configurable). + - Errors: `{type: "error", code, message, data}` then close as needed (mapping in 3.4). + - Transitional compatibility (Audio/WebUI): include `error_type` alias mirroring `code` during rollout. + +### 4.2 Developer Interface (Illustrative) +```python +class AsyncStream(Protocol): + async def send_event(self, event: str, data: Any | None = None) -> None: ... + async def send_json(self, payload: dict) -> None: ... + async def done(self) -> None: ... + async def error(self, code: str, message: str, *, data: dict | None = None) -> None: ... + +class SSEStream(AsyncStream): + # queue-backed; exposes iter_sse() to yield SSE lines; supports optional send_raw_sse_line(); + # note: send_raw_sse_line is SSE-only (not on AsyncStream) to aid hot-path migrations; prefer structured send_json over time + # configurable error closure policy via close_on_error (default True; per-call override) + # constructors accept optional labels: Dict[str,str] to tag metrics (e.g., {"component":"chat"}) + ... + +class WebSocketStream(AsyncStream): + # wraps WebSocket send_json / close with standard frames & optional ping loop + # constructors accept optional labels: Dict[str,str] to tag metrics (e.g., {"component":"audio"}) + ... +``` + +### 4.4 SSE Endpoint Example + +```python +from fastapi import APIRouter +from fastapi.responses import StreamingResponse +from tldw_Server_API.app.core.Streaming.streams import SSEStream + +router = APIRouter() + +@router.get("/chat/stream") +async def chat_stream(): + stream = SSEStream( + heartbeat_interval_s=10, + heartbeat_mode="data", + labels={"component": "chat", "endpoint": "chat_stream"}, + ) + + async def generator(): + # In a real endpoint, start a background task to feed the stream + # await stream.send_json({...}) / await stream.send_event("summary", {...}) / await stream.done() + async for line in stream.iter_sse(): + yield line + + headers = { + "Cache-Control": "no-cache", + "X-Accel-Buffering": "no", + } + return StreamingResponse(generator(), media_type="text/event-stream", headers=headers) +``` + +### 4.3 Backward Compatibility +- Existing SSE clients continue to receive identical `data:` frames (including OpenAI style deltas and a final `[DONE]`). +- Existing WS clients continue to receive domain JSON; only lifecycle frames become standardized (`error`, `done`, `ping`). + +--- + +## 5. Technical Approach + +1. Abstraction + - New module: `tldw_Server_API/app/core/Streaming/streams.py`, containing `AsyncStream`, `SSEStream`, `WebSocketStream`. + - Import `sse.ensure_sse_line`, `sse.normalize_provider_line`, `sse.sse_data`, `sse.sse_done`. +2. Normalization + - Keep a single normalization path in `LLM_Calls/streaming.py` for iterating provider SSE and suppressing provider `[DONE]`. + - Endpoints compose: + - Option A (hot path): `for line in iter_sse_lines_requests(...): await sse_stream.send_raw_sse_line(line)`. + - Option B (structured): build OpenAI‑compatible deltas and `await stream.send_json(openai_delta)`. + - Recommendation: prefer Option B for new endpoints; use Option A only to minimize churn during migration. +3. Heartbeats + - Shared config: `STREAM_HEARTBEAT_INTERVAL_S` (default 10), `STREAM_IDLE_TIMEOUT_S`, `STREAM_MAX_DURATION_S` — overridable per endpoint. + - SSE: comment line `":"` (or `data: {"heartbeat": true}` when configured). + - WS: `{type: "ping"}`; optional `{type: "pong"}` handling; idle timeout closes with 1001. +4. Error Handling + - Convert transport/iteration errors into structured frames via `stream.error(code, message, ...)`. + - Ensure exactly one terminal `done()` is emitted per stream on normal completion; no double‐DONE. +5. Refactors (per‑module) + - Chat/Characters: replace `_extract_sse_data_lines` and local builders with `SSEStream`. + - Embeddings orchestrator: replace custom `yield f"event: ..."` with `SSEStream.send_event("summary", payload)` and heartbeat via abstraction. + - Audio WS: replace bespoke status/error frames where possible with `WebSocketStream.error/done/ping`; retain domain payloads. + - MCP WS: reuse `WebSocketStream` for ping loop and standardized error/done; keep JSON‑RPC responses intact. + +### 5.1 SSE Response Headers +- Recommend headers to avoid buffering through proxies: + - `Cache-Control: no-cache` + - `Connection: keep-alive` (HTTP/1.1 only; HTTP/2 ignores this header) + - `X-Accel-Buffering: no` (for NGINX) + - Notes: + - Under HTTP/2, `Connection` is not meaningful and may be stripped; focus on disabling proxy buffering and keeping the response streaming (e.g., NGINX `proxy_buffering off;`, Caddy `encode`/`buffer` tuning). + - In reverse‑proxy/CDN environments (NGINX, Caddy, Cloudflare), prefer data heartbeats (`STREAM_HEARTBEAT_MODE=data`) to encourage flushes and reduce buffering. + +### 5.2 Provider Control/Event Pass-through +- Normalization ignores `event:`/`id:`/`retry:` and comment lines by default. +- Provide a provider-specific pass-through mode to preserve control fields when needed. +- Emit debug logs when dropping unknown control lines during normalization to aid troubleshooting. +- Global toggle: `STREAM_PROVIDER_CONTROL_PASSTHRU=1` enables pass-through (default off). +- Per-endpoint flag: endpoints may request pass-through (e.g., `SSEStream(..., provider_control_passthru=True)`), which overrides the global default. + +- Transparent mode: + - When pass-through is enabled, preserve `event:`/`id:`/`retry:` lines and forward them unchanged alongside `data:` payloads. + - Add an optional hook for custom filtering/mapping (e.g., `control_filter(name: str, value: str) -> tuple[str, str] | None`) to rename/whitelist provider events. + - Intended for providers whose clients rely on SSE event names; default remains normalized mode. + +Example (provider control pass-through) +```python +# Preserve provider control fields as-is +stream = SSEStream(provider_control_passthru=True) +# Or whitelist specific controls +stream = SSEStream(provider_control_passthru=True, + control_filter=lambda n, v: (n, v) if n in {"event", "id", "retry"} else None) +``` + +### 5.3 WS Event Frames Guardrails +- Explicitly forbid wrapping domain WS payloads for MCP and Audio in `{type: "event"}` frames. +- Only use event frames on endpoints designed for them; lifecycle frames (`ping`, `error`, `done`) remain standardized everywhere. +- Add helper naming guidance and code review checklist item to reduce misuse. + +MCP JSON-RPC done semantics: +- `done` is session-level only for MCP WebSockets. It must never be emitted as a JSON‑RPC message. +- JSON‑RPC results/errors are sent as specified by JSON‑RPC; lifecycle frames (`ping`, `error`, session‑level `done`) are separate from JSON‑RPC content. + +### 5.4 Endpoint Examples (Rollout-friendly) + +```python +# Audio WebSocket handler example with transitional error alias +from tldw_Server_API.app.core.Streaming.streams import WebSocketStream + +async def audio_ws_handler(websocket): + stream = WebSocketStream( + websocket, + heartbeat_interval_s=10, + compat_error_type=True, # include error_type alias during rollout + close_on_done=True, + labels={"component": "audio", "endpoint": "audio_ws"}, + ) + await stream.start() + try: + # Emit domain payloads directly (no event frames) + await stream.send_json({"type": "partial", "text": "..."}) + # ... + except QuotaExceeded as e: + await stream.error("quota_exceeded", str(e), data={"limit": e.limit}) + await stream.done() + finally: + await stream.stop() + +# MCP WebSocket: lifecycle frames standardized, JSON-RPC payloads unchanged +async def mcp_ws_handler(websocket): + stream = WebSocketStream( + websocket, + heartbeat_interval_s=10, + compat_error_type=True, # temporary alias for clients expecting error_type + close_on_done=False, # MCP may manage session lifetime explicitly + labels={"component": "mcp", "endpoint": "mcp_ws"}, + ) + await stream.start() + try: + # Send JSON-RPC results as-is + await stream.send_json({"jsonrpc": "2.0", "result": {...}, "id": 1}) + # ... + except Exception as e: + await stream.error("internal_error", f"{e}") + finally: + await stream.stop() +``` + +### 5.5 Backpressure Policy + +- Default: block on full SSE queue (no drops). Producers back off until the consumer drains the queue. +- Optional mode: drop‑oldest (advanced; disabled by default). When enabled, the oldest queued item is dropped to make room for new items, and a counter increments for observability. +- Recommendation: keep default blocking mode for correctness; only enable drop‑oldest for non‑critical high‑throughput streams with tolerant clients. + - Queue sizing guidance: Use a conservative default (e.g., 256 frames) and tune per endpoint based on typical payload size and client consumption rate. Track queue high‑water marks to inform tuning. + +### 5.7 Reverse Proxy & HTTP/2 Considerations + +- Heartbeats: + - Prefer `STREAM_HEARTBEAT_MODE=data` behind reverse proxies/CDNs to reduce buffering and encourage periodic flushes. + - Ensure proxy timeouts (read/idle) exceed heartbeat intervals. +- Proxy buffering: + - Disable buffering on the reverse proxy (`proxy_buffering off;` for NGINX, appropriate Caddy/Envoy settings). + - For NGINX, keep `X-Accel-Buffering: no` on responses. +- HTTP/1.1 vs HTTP/2: + - `Connection: keep-alive` applies to HTTP/1.1; HTTP/2 handles persistence differently and may strip the header. + - Do not rely on connection headers under HTTP/2; rely on correct streaming semantics and disabled buffering. + +### 5.6 SSE Idle/Max Duration Enforcement + +- Idle timeout (`STREAM_IDLE_TIMEOUT_S`): + - Behavior: emit error frame `{"error": {"code": "idle_timeout", "message": "idle timeout"}}` followed by `[DONE]`, then close. + - Client expectation: treat as terminal condition; retry logic is client‑specific. +- Max duration (`STREAM_MAX_DURATION_S`): + - Behavior: emit error frame `{"error": {"code": "max_duration_exceeded", "message": "stream exceeded maximum duration"}}` followed by `[DONE]`, then close. + - Client expectation: treat as terminal condition; consider resuming in a new stream. + +--- + +## 6. Dependencies & Impact + +- Reuse: `app/core/LLM_Calls/sse.py`, `app/core/LLM_Calls/streaming.py`. +- Touchpoints: Chat endpoints, Character chat, Embeddings orchestrator SSE, Audio WS, MCP WS. +- Docs: Update streaming sections in API docs and Audio/MCP protocol notes to mention standardized lifecycle frames. + +--- + +## 7. Deletions & Cleanups + +- Remove endpoint‑local SSE helpers and duplicate DONE handling: + - `character_chat_sessions._extract_sse_data_lines`. + - Custom SSE yields in `embeddings_v5_production_enhanced.orchestrator_events` generator. +- Replace bespoke heartbeat/error patterns in: + - `Audio_Streaming_Unified.handle_unified_websocket` (use `WebSocketStream` ping/error/done). + - `MCP_unified/server` ping loop and error frames where compatible with JSON‑RPC lifecycle. + +--- + +## 8. Metrics & Success Criteria + +| Metric | Target | +| --- | --- | +| Duplicate DONE frames in Chat SSE | 0 across providers | +| Stream error frames include `code` + `message` | 100% | +| Heartbeat parity (SSE/WS) | Enabled by default, configurable | +| Lines of duplicate streaming code removed | > 60% in affected files | +| Server-side latency regression (enqueue→yield or send_json) | ≤ ±1% vs baseline | + +Note: Keep metrics labels low-cardinality (e.g., `component`, `endpoint`); avoid user/session IDs. + +--- + +## 9. Rollout Plan + +1. Phase 0 — Design (this document) + - Align on interface and semantics. +2. Phase 1 — Abstraction + Chat pilot + - Implement `AsyncStream`, `SSEStream`, `WebSocketStream`. + - Migrate one Chat streaming endpoint; add unit tests for DONE/error/heartbeat. +3. Phase 2 — Embeddings SSE + - Switch orchestrator SSE to `SSEStream`; keep `event: summary`. +4. Phase 3 — Audio WS + - Integrate `WebSocketStream` for heartbeat/error/done; retain domain payloads. +5. Phase 4 — MCP WS + - Use `WebSocketStream` ping/error where compatible; respect JSON‑RPC requirements. +6. Phase 5 — Cleanup + - Delete endpoint‑local helpers; update docs/tests; enable by default. + +Feature flag: `STREAMS_UNIFIED` (default off for one release; then on by default). + +--- + +## 9.1 Client Migration Checklist & Shims + +- WebUI and client libraries + - Update to consume `code` + `message` error shape; during rollout, accept `error_type` alias where present. + - Ignore `{type:"ping"}` frames; treat `{type:"done"}` as terminal. + - For SSE, handle `{"error": {"code", "message"}}` followed by `[DONE]` as terminal. +- Audio/MCP integrations + - Keep domain payloads unchanged; enable `compat_error_type=True` on `WebSocketStream` during migration window. + - Standardize lifecycle handling: single source of pings; `done` where appropriate (avoid for JSON‑RPC content itself). +- Observability + - Add dashboards for stream starts/stops/errors, WS close codes, SSE queue high‑water marks. + - Enable logs at `debug` for dropped control lines (when pass-through disabled) during the first release. +- Feature flag playbook + - Roll out per‑endpoint; enable in pre‑prod/staging first. + - In case of regression, disable `STREAMS_UNIFIED` to revert to legacy code paths. + - Keep compatibility shims (`error_type`) until clients confirm migration. + +--- + +## 10. Testing Strategy + +- Unit tests + - `SSEStream`: ensures normalization, exact one DONE, error payload shape, heartbeat interval. + - `WebSocketStream`: ping scheduling, error/done frames, close behavior. +- Integration tests + - Chat SSE end‑to‑end with mock provider streams including provider `[DONE]` and malformed lines. + - Embeddings orchestrator SSE: event and heartbeat cadence. + - Audio WS: partial/final frames + standardized error/done in shutdown sequences. + - MCP WS: ping/idle timeout behavior with new helper. +- Backward‑compat checks + - Snapshot tests for representative SSE/WS payload sequences before/after migration. +- Latency measurement + - Instrument server-side latency: measure `enqueue→yield` for SSE (time from `send_*` to generator yield), and `send_json` call completion latency for WS. Compare distributions to baseline; target ≤ ±1%. +- Backpressure tests + - SSE queue bounded behavior (block vs drop policy) with counters asserted. + - Heartbeats and backpressure: document that heartbeats share the same queue and may be delayed under heavy backpressure. Acceptance: without payload backpressure, observed heartbeat intervals stay within 2× configured; under saturation, heartbeats may be delayed but resume within 2× after backlog drains. + +--- + +## 11. Risks & Mitigations + +- Risk: Subtle changes in timing/heartbeats can affect clients. + - Mitigation: feature flag; document intervals; snapshot test WebUI behavior. +- Risk: Double DONE due to legacy code paths not removed. + - Mitigation: centralized suppression + unit tests; code search to remove duplicates. +- Risk: MCP JSON‑RPC framing constraints. + - Mitigation: scope `WebSocketStream` usage to ping/error/done helpers; do not wrap JSON‑RPC result payloads. + +--- + +## 12. Open Questions + +None at this time. + +--- + +## 13. Acceptance Criteria + +- Chat SSE pilot endpoint emits standardized frames with no duplicate `[DONE]` across at least two providers. +- Embeddings orchestrator emits `event: summary` via `SSEStream` with heartbeats controlled by config. +- Audio WS adopts standardized `error` (code/message) and `done` frames and a single ping source; existing domain messages unchanged. +- MCP WS uses shared ping/idle handling and `error/done` helpers where compatible. +- Endpoint‑local SSE helpers removed; tests cover new abstraction; docs updated. + +--- + +## 14. Configuration + +- `STREAMS_UNIFIED`: feature flag (off for one release; then default on) +- `STREAM_HEARTBEAT_INTERVAL_S`: default 10 +- `STREAM_IDLE_TIMEOUT_S`: default disabled +- `STREAM_MAX_DURATION_S`: default disabled +- `STREAM_HEARTBEAT_MODE`: `comment` or `data` (default `comment`) +- `STREAM_PROVIDER_CONTROL_PASSTHRU`: `0|1` (default `0`), preserves provider SSE control fields when `1` +- `STREAM_QUEUE_MAXSIZE`: default 256 (bounded SSE queue size) + +Label guidance: Use low-cardinality labels (e.g., `component`, `endpoint`); avoid user/session IDs. Default suggested: `STREAM_HEARTBEAT_INTERVAL_S=10` with per-endpoint overrides. + +--- + +## 15. Implementation Plan + +Stage 0 — Finalize Design and Defaults +- Goal: Lock interface, defaults, metrics, and headers guidance. +- Deliverables: + - Error semantics (code + message), heartbeat modes, close code mapping confirmed. + - Defaults: `STREAM_HEARTBEAT_INTERVAL_S=10`, `STREAM_HEARTBEAT_MODE=comment` (use `data` behind reverse proxies), SSE queue size target (~256), `STREAM_PROVIDER_CONTROL_PASSTHRU=0`. + - Metrics catalog confirmed; labels policy (low-cardinality: component, endpoint) approved. +- Success: PRD approved; tracking issue created for each stage. + +Stage 1 — Core Abstractions + Metrics (this PR/commit) +- Status: Complete +- Goal: Implement `SSEStream` and `WebSocketStream` with metrics hooks and labels. +- Code: + - `tldw_Server_API/app/core/Streaming/streams.py` — abstractions, heartbeats, error/done, WS pings, metrics (`sse_enqueue_to_yield_ms`, `ws_send_latency_ms`, `sse_queue_high_watermark`, `ws_pings_total`, `ws_ping_failures_total`, `ws_idle_timeouts_total`). + - `tldw_Server_API/app/core/LLM_Calls/sse.py` — debug logs for dropped control/comment lines. +- Tests: + - `tldw_Server_API/tests/Streaming/test_streams.py` — basic SSE/WS behavior; expand to cover labels presence later. +- Docs: + - This PRD, Chat/Audio code docs examples, Metrics README (+ Grafana JSON). +- Success: Unit tests pass; example code compiles; metrics exported without errors when registry is enabled. + +Stage 2 — Add Provider Control Pass-through + SSE Idle/Max Enforcement +- Status: Complete +- Goal: Implement optional pass-through and SSE timers per PRD. +- Code: + - Add `provider_control_passthru: bool` and optional `control_filter` hook to `SSEStream`; thread env `STREAM_PROVIDER_CONTROL_PASSTHRU`. + - Add optional idle/max duration timers to `SSEStream`; on trigger, emit error per 5.6 then `[DONE]` and close. + - Consider adjusting default `queue_maxsize` to 256 (as per 5.5 guidance). +- Tests: + - Pass-through on/off snapshots; control filter mapping. + - Idle and max duration enforcement cases (timeouts emit error + DONE). +- Success: Behavior matches PRD; no regressions in Chat SSE snapshots. + +Stage 3 — Chat SSE Pilot Integration +- Status: Complete +- Goal: Migrate one Chat streaming endpoint to `SSEStream` behind `STREAMS_UNIFIED` flag. +- Code: + - Replace endpoint-local SSE emission for a pilot endpoint (character chat streaming) with `SSEStream` gated by `STREAMS_UNIFIED`. + - Replace local normalization with provider iterator output (`LLM_Calls/LLM_API_Calls.*iter_sse_lines_*`) and `normalize_provider_line` fallback for non-string chunks. Suppress provider `[DONE]`; call `stream.done()` once. + - Route provider lines via `send_raw_sse_line` for minimal change. + - Validate under flag with two providers (e.g., OpenAI + Groq) and with the WebUI client; verify metrics populate and no duplicate `[DONE]`. + - If validation passes, flip `STREAMS_UNIFIED=1` in non-prod environments and stage a second chat endpoint migration. + - Rollback: set `STREAMS_UNIFIED=0` and restart the app to revert to legacy code paths (no code changes required). + +### Validation Checklist (non‑prod) + +Environment +- Use dev/staging with unified streams enabled: + - Compose overlay: `-f Dockerfiles/docker-compose.yml -f Dockerfiles/Dockerfiles/docker-compose.dev.yml` + - or export `STREAMS_UNIFIED=1` in the environment prior to starting the API. +- Ensure provider keys are set for at least two providers (e.g., OpenAI and Groq). +- Optional: behind reverse proxies/CDNs, set `STREAM_HEARTBEAT_MODE=data`. + +Functional +- Chat SSE (main): stream completion; assert only one `data: [DONE]` and proper OpenAI deltas. +- Character chat SSE: stream conversation; validate heartbeat presence during idle and single `DONE`. +- Chat document-generation SSE: stream doc; validate heartbeat and final `DONE` without duplicates. +- Embeddings orchestrator SSE (if used): confirm `event: summary` frames appear periodically. +- Prompt Studio SSE fallback (if used): connect and observe initial state + heartbeats. + +WebSockets +- Audio WS: open a session; observe `{type:"ping"}` frames; trigger an error path and confirm error frame + close code mapping. +- MCP WS: open a session; confirm lifecycle frames (`ping`, `done` when closed) and that JSON‑RPC responses are unchanged. + +Metrics & Dashboards +- Import `Docs/Deployment/Monitoring/Grafana_Streaming_Basics.json`. +- Confirm: + - `sse_enqueue_to_yield_ms` histogram shows activity during SSE streams. + - `sse_queue_high_watermark` increases during bursts. + - `ws_send_latency_ms` histogram increments on WS sends. + - `ws_pings_total` increments for WS endpoints; `ws_ping_failures_total` remains 0. + +Rollback +- Toggle `STREAMS_UNIFIED=0` and restart app to revert to legacy streaming. +- Tests: + - End-to-end chat SSE with at least two providers; no duplicate `[DONE]`. + - Snapshot payloads pre/post match (except standardized error/heartbeat cadence). +- Success: Feature-flagged pilot works with WebUI; latency within server-side target. + +- Stage 4 — Embeddings SSE Migration +- Status: Complete +- Goal: Move orchestrator events to `SSEStream` while preserving `event: summary`. +- Code: + - Replace custom `yield f"event: ..."` with `send_event("summary", payload)`; heartbeats via abstraction. + - Implemented behind `STREAMS_UNIFIED` in `embeddings_v5_production_enhanced.orchestrator_events`. +- Tests: + - Event cadence and heartbeats; summary payload unchanged; pass-through remains disabled unless explicitly needed. +- Success: No client changes required; metrics visible in dashboard. + +Stage 5 — Audio WS Standardization +- Status: Complete +- Goal: Adopt `WebSocketStream` for lifecycle (ping, error, done) without changing domain payloads. +- Code: + - Unified handler uses `WebSocketStream(..., compat_error_type=True)` and labels `{component: audio, endpoint: audio_unified_ws}`. + - Standardized error/done semantics; retained legacy quota close (4003) and `error_type` for client compatibility. + - Routed status/summary frames via `stream.send_json` for metrics coverage; domain payloads unchanged. +- Tests: + - Quota/concurrency WS tests pass; streaming unit tests cover WS metrics and error/done; additional ping/idle tests can be added if needed. +- Success: Clients unaffected; improved observability in streaming dashboard. + +Stage 6 — MCP WS Lifecycle Adoption +- Status: Complete +- Goal: Use `WebSocketStream` for ping/idle/error; never wrap JSON‑RPC content or emit `done` as JSON‑RPC. +- Code: + - MCP server uses `WebSocketStream` with labels `{component: mcp, endpoint: mcp_ws}`; origin/IP/auth guards in place. + - Standardized close-code mapping; JSON‑RPC payloads unchanged; lifecycle metrics emitted. +- Tests: + - Full MCP WS/HTTP test suite passes (JSON-RPC, security, rate limits, etc.). + - Unified WS lifecycle verified by tests; metrics available for dashboards. +- Success: MCP dashboard unchanged for content; lifecycle metrics added. + +Stage 7 — Cleanup, Docs, and Flip Default +- Status: In Progress +- Goal: Remove endpoint‑local helpers, update docs, and flip `STREAMS_UNIFIED` default after non‑prod validation. +- Code (in progress): + - Prompt Studio SSE fallback now uses SSEStream behind the flag. + - Embeddings orchestrator, Evaluations SSE, Jobs Admin SSE, Chat SSE paths already unified. + - Plan removal of legacy local SSE helpers after one release window. + - Prepare default flip of `STREAMS_UNIFIED` in non‑prod configs (compose.test already sets it). +- Docs (in progress): + - API docs and protocol notes reflect standardized lifecycle and close‑code mapping. + - Monitoring README includes labels guidance and references the Grafana Streaming Basics dashboard. +- Success criteria for this stage: + - Non‑prod flip validated with WebUI + two providers; no duplicate [DONE]; dashboards show healthy SSE/WS metrics. + - Clear rollback documented (toggle `STREAMS_UNIFIED=0`). + +Risk Mitigation & Rollback +- Feature flag per endpoint; can revert to legacy implementation immediately if regressions occur. +- Keep `error_type` alias during rollout; remove after clients confirm. +- Monitor dashboards: p95 WS send latency, SSE enqueue→yield p95, idle timeouts, ping failures; react to anomalies. + +Ownership & Tracking +- Create issues per stage with checklists: + - Code changes with file paths + - Tests added/updated + - Docs touched + - Rollout/flag steps + - Validation (dashboards/alerts) + +--- + +## 16. Compatibility Follow-ups + +Audio WS legacy quota close code +- Current behavior: For client compatibility, the Audio WS handler emits an `error` frame with `error_type: "quota_exceeded"` and closes with code `4003` when quotas are exceeded. +- Target behavior: Migrate to standardized close code `1008` (Policy Violation) with structured `{type: "error", code: "quota_exceeded", message, data?}` and without the legacy `error_type` field once downstream clients have updated. +- Migration plan: + - Phase 1 (current): Keep `4003` and include `error_type` alias (compat_error_type=True) in `WebSocketStream` for Audio. Documented in API/SDK release notes. + - Phase 2 (flagged pilot): Expose an opt‑in environment toggle (ops only) to switch close code to `1008` while still including `error_type` for a release. Target: next minor release (v0.1.1). + - Phase 3 (default switch): Change default to `1008` and keep `error_type` for one additional release. Target: following minor (v0.1.2). + - Phase 4 (cleanup): Remove `error_type` alias for Audio WS and rely solely on `code` + `message`. Target: subsequent minor (v0.1.3). + - Acceptance: No client breakages reported in non‑prod → prod flips; tests updated to assert `1008`. + - Tracking: See Docs/Issues/STREAMS_UNIFIED_Rollout_Tracking.md (Audio `error_type` deprecation task). + +Endpoint audit and duplicate closes +- WebSockets + - Workflows WS, Sandbox WS, Prompt Studio WS, MCP Unified WS, and Persona WS are wrapped with `WebSocketStream` and emit standardized lifecycle metrics/frames. Domain payloads remain untouched where required. + - Audio WS: outer endpoint still performs some direct `send_json/close` for auth/quota compatibility; the inner unified handler uses `WebSocketStream`. Double‑close risks are minimized (idempotent close), but a follow‑up refactor will consolidate closing into the unified layer after the quota close migration (above) to simplify logic. + - Parakeet Core demo WS (`/core/parakeet/stream`) is a portable minimal router not mounted in the main app; it intentionally does not use `WebSocketStream` (kept as a standalone sample core). +- SSE + - Chat: pilot paths (character chat, chat completions, document‑generation) are unified behind `STREAMS_UNIFIED`. + - Embeddings orchestrator: unified to `SSEStream` behind `STREAMS_UNIFIED` while preserving `event: summary`. + - Evaluations SSE (`evaluations_unified.py`) currently uses a bespoke `StreamingResponse` generator; a low‑risk follow‑up item will migrate it to `SSEStream` to standardize heartbeats/metrics. + +Monitoring/dashboard validation +- Import `Docs/Deployment/Monitoring/Grafana_Streaming_Basics.json` in Grafana (Prometheus datasource UID `prometheus`). +- Confirm Persona WS series appear with labels `{component: persona, endpoint: persona_ws, transport: ws}` in the WS panels. diff --git a/Docs/Design/UX.md b/Docs/Design/UX.md index 0f8b398cd..24e086b68 100644 --- a/Docs/Design/UX.md +++ b/Docs/Design/UX.md @@ -65,6 +65,14 @@ https://blikket.co/ux-vs-cro-how-harmonizing-design-and-strategy-can-skyrocket-y https://copycoder.ai/ https://medium.com/@ryan.almeida86/10-tiny-ui-fixes-that-make-a-big-difference-951b1c98d4ec https://www.grug.design/know +https://uxplanet.org/14-logic-driven-ui-design-tips-145ee08ea5a5?gi=31c1a5e9d721 +https://medium.com/ui-for-ai/welcome-to-ui-for-ai-eb22aef8d26c +https://medium.com/ui-for-ai/ui-for-ai-initial-concepts-82b40dc2998c +https://blog.vaexperience.com/ep12-design-for-ai-with-dan-saffer/ +https://medium.com/ui-for-ai/design-principles-for-ai-21b6fac23b04 +https://medium.com/ui-for-ai/diving-deep-into-ai-use-cases-77f36bfb7d47 +https://www.nngroup.com/articles/ai-work-study-guide/ +https://www.lukew.com/ff/entry.asp?2132?ref=sidebar https://uxdesign.cc/building-better-logins-a-ux-and-accessibility-guide-for-developers-9bb356f0a132 https://ieeexplore.ieee.org/document/5387632 diff --git a/Docs/Development/Browser-Plugin-Improvements.md b/Docs/Development/Browser-Plugin-Improvements.md deleted file mode 100644 index 19b2e4a20..000000000 --- a/Docs/Development/Browser-Plugin-Improvements.md +++ /dev/null @@ -1,511 +0,0 @@ -# Browser Plugin Improvements Analysis - -## ✅ IMPLEMENTATION PROGRESS - -**Status**: ALL 20 CRITICAL IMPROVEMENTS SUCCESSFULLY IMPLEMENTED! 🎉 - -### 🚀 **COMPLETE TRANSFORMATION ACHIEVED** - -The TLDW Browser Extension has been completely transformed from a functional prototype into a **production-ready, enterprise-grade browser extension** with comprehensive testing, security, and user experience enhancements. - -## 🏆 **ALL IMPROVEMENTS COMPLETED** - -### **Phase 1: Critical UX Fixes** ✅ **COMPLETED** - -1. **Toast Notification System** ✅ - - Replaced all alert() calls with professional toast notifications - - Added success, error, warning, and info toast types with animations - - Implemented loading spinner for long operations - - CSS animations with slide-in effects - -2. **Prompt Creation Functionality** ✅ - - Implemented complete prompt creation modal dialog - - Form validation and error handling - - Integration with API for saving prompts - - Automatic refresh of prompt list after creation - -3. **Enhanced Connection Status** ✅ - - Intelligent retry logic with exponential backoff - - Detailed connection status with timestamps and failure counts - - Click-to-retry functionality on connection status - - Background monitoring with adaptive intervals - -4. **Enhanced Keyboard Shortcuts** ✅ - - Added 5 new keyboard shortcuts (up from 2) - - Quick summarize: Ctrl+Shift+S - - Save as prompt: Ctrl+Shift+P - - Process page: Ctrl+Shift+M - - Better error handling for shortcuts - -### **Phase 2: Performance & Reliability** ✅ **COMPLETED** - -5. **API Client Caching & Optimization** ✅ - - Request deduplication to prevent duplicate API calls - - 5-minute cache for GET requests on prompts, characters, media - - Automatic cache invalidation on mutations - - Cache statistics and management - - Pending request tracking - -6. **Content Script Performance Optimization** ✅ - - Throttled text selection monitoring (300ms) - - Reduced CPU usage on text selection events - - Added keyboard selection support - - Optimized event handling with debouncing - -### **Phase 3: Advanced Features** ✅ **COMPLETED** - -7. **Memory Leaks & Cleanup** ✅ - - Comprehensive event listener tracking system - - Automatic cleanup on content script unload - - Prevention of orphaned event listeners - - Memory management optimization - -8. **Smart Context Detection** ✅ - - Intelligent content type detection (video, audio, articles, documents, code) - - Auto-suggested actions based on content type - - Confidence scoring and smart recommendations - - Support for 50+ content types and platforms - -9. **Batch Operations** ✅ - - "Process All Tabs" functionality with progress tracking - - "Save All Bookmarks" capability - - "Process Selected Tabs" with modal selection interface - - Smart rate limiting and error handling - -10. **Enhanced Search System** ✅ - - Advanced filters and sorting options - - Recent searches with persistent storage - - Intelligent search suggestions - - Debounced search with caching for performance - - Search statistics and result highlighting - -11. **Progress Indicators** ✅ - - Real-time progress tracking for all long operations - - File upload progress with speed monitoring - - ETA calculations and cancellable operations - - Global progress notification system - -### **Phase 4: Enterprise Architecture** ✅ **COMPLETED** - -12. **Configuration Management System** ✅ - - Centralized ConfigManager with environment detection - - User settings persistence with Chrome storage - - Configuration validation and health monitoring - - Presets system (performance, security, development, minimal) - - Export/import capabilities with migration support - -13. **CORS & Security Headers** ✅ - - Comprehensive security headers (User-Agent, Request-ID, CORS) - - CORS preflight handling for complex HTTP methods - - Enhanced error categorization with user-friendly messages - - Request timeout management with AbortController - - Smart retry logic with exponential backoff - -14. **Extension Update Management** ✅ - - Complete update lifecycle handling (install, update, Chrome update) - - Data migration system with version-specific migrations - - Automatic backup & recovery with rollback capabilities - - User-friendly notifications for installs and updates - - Compatibility checking and cache cleanup - -### **Phase 5: Testing & Quality Assurance** ✅ **COMPLETED** - -15. **Comprehensive Test Suite** ✅ - - **Unit Tests**: 125+ test cases with property-based testing - - **Integration Tests**: End-to-end workflows and cross-component testing - - **Property-based Tests**: Mathematical properties verification - - **Coverage**: 70%+ across branches, functions, lines, statements - - **Cross-browser Testing**: Chrome, Firefox, Edge compatibility - -16. **Advanced Features** ✅ - - Event system for configuration changes - - Request deduplication and intelligent caching - - Cross-browser compatibility layer - - Performance monitoring and metrics - - Debug mode and development tools - -## 📊 **TRANSFORMATION SUMMARY** - -### **Before vs. After Comparison** - -| Aspect | Before | After | -|--------|--------|-------| -| **User Experience** | Basic alerts, placeholder UI | Professional toast notifications, smart context detection | -| **Performance** | Unoptimized, memory leaks | Throttled events, intelligent caching, cleanup systems | -| **Features** | Limited functionality | Batch operations, advanced search, progress tracking | -| **Architecture** | Hard-coded values | Centralized configuration, environment detection | -| **Security** | Basic implementation | CORS handling, security headers, request validation | -| **Updates** | No migration support | Complete update lifecycle with data migration | -| **Testing** | No test coverage | 125+ tests with 70%+ coverage | -| **Browser Support** | Chrome only | Chrome, Firefox, Edge compatibility | - -### **Current Architecture Overview** - -The TLDW Browser Extension now features: - -- **Enterprise-grade extension** (Chrome V2/V3, Firefox, Edge) with comprehensive feature set -- **Smart Context Detection** supporting 50+ content types and platforms -- **Advanced Configuration Management** with environment-specific settings -- **Comprehensive Security** with CORS, security headers, and request validation -- **Performance Optimization** with intelligent caching and memory management -- **Robust Update System** with data migration and rollback capabilities -- **Extensive Testing** with unit, integration, and property-based tests - -## 🚀 **NEXT STEPS & DEPLOYMENT** - -### **1. Quality Assurance & Testing** - -#### **Run Comprehensive Test Suite** -```bash -# Navigate to extension directory -cd chrome-extension/ - -# Install test dependencies -npm install - -# Run all tests -npm test - -# Run with coverage -npm run test:coverage - -# Run specific test suites -npm run test:unit -npm run test:integration -``` - -**Expected Results:** -- ✅ All 125+ tests passing -- ✅ 70%+ code coverage across all metrics -- ✅ Cross-browser compatibility verified -- ✅ Property-based tests passing - -#### **Manual Testing Checklist** -- [ ] Extension loads without errors in Chrome/Firefox/Edge -- [ ] Smart context detection works on various websites -- [ ] Batch operations process multiple tabs correctly -- [ ] Configuration management saves/loads settings -- [ ] Toast notifications display properly -- [ ] Progress indicators show for long operations -- [ ] Memory cleanup prevents leaks -- [ ] Update system handles version changes - -### **2. Pre-Deployment Configuration** - -#### **Environment Configuration** -```bash -# Set production environment variables -export NODE_ENV=production -export EXTENSION_ENV=production -``` - -#### **Update Configuration Files** -1. **Manifest Version Selection**: - - For Chrome: Use `manifest.json` (Manifest V3) - - For Firefox: Use `manifest-v2.json` - - For legacy Chrome: Use `manifest-v2.json` - -2. **Server URL Configuration**: - ```javascript - // Update default server URL in js/utils/config.js - production: { - serverUrl: 'https://your-production-server.com', - debug: false, - logLevel: 'warn' - } - ``` - -3. **Security Settings**: - ```javascript - // Verify allowed origins in config.js - allowedOrigins: [ - 'https://your-production-server.com', - 'https://api.your-domain.com' - ] - ``` - -### **3. Extension Packaging & Distribution** - -#### **Build Process** -```bash -# Create production builds for all browsers -npm run build:chrome-v3 # Chrome Manifest V3 -npm run build:chrome-v2 # Chrome Manifest V2 (legacy) -npm run build:firefox # Firefox -``` - -#### **Manual Packaging Steps** - -**For Chrome Web Store:** -1. **Prepare Chrome Package**: - ```bash - # Create clean directory - mkdir -p dist/chrome-v3 - - # Copy essential files - cp manifest.json dist/chrome-v3/ - cp -r js/ dist/chrome-v3/ - cp -r html/ dist/chrome-v3/ - cp -r css/ dist/chrome-v3/ - cp -r icons/ dist/chrome-v3/ - - # Create ZIP package - cd dist/chrome-v3 - zip -r ../tldw-extension-chrome.zip . - ``` - -2. **Chrome Web Store Submission**: - - Upload `tldw-extension-chrome.zip` to [Chrome Web Store Developer Dashboard](https://chrome.google.com/webstore/devconsole/) - - Fill out store listing with screenshots and descriptions - - Submit for review (typically 1-3 business days) - -**For Firefox Add-ons:** -1. **Prepare Firefox Package**: - ```bash - # Create Firefox-specific build - mkdir -p dist/firefox - cp manifest-v2.json dist/firefox/manifest.json - cp -r js/ dist/firefox/ - cp -r html/ dist/firefox/ - cp -r css/ dist/firefox/ - cp -r icons/ dist/firefox/ - - # Create XPI package - cd dist/firefox - zip -r ../tldw-extension-firefox.xpi . - ``` - -2. **Firefox Add-ons Submission**: - - Upload to [Firefox Add-on Developer Hub](https://addons.mozilla.org/en-US/developers/) - - Complete compatibility testing - - Submit for review - -**For Edge Add-ons:** -1. **Prepare Edge Package** (same as Chrome V3): - ```bash - cp dist/tldw-extension-chrome.zip dist/tldw-extension-edge.zip - ``` - -2. **Edge Add-ons Submission**: - - Upload to [Microsoft Edge Add-ons](https://partner.microsoft.com/en-US/dashboard/microsoftedge/) - -#### **Version Management** -```bash -# Update version in all manifest files -# Update package.json version -# Create git tag -git tag v1.0.0 -git push origin v1.0.0 -``` - -### **4. Production Deployment Checklist** - -#### **Pre-Launch Verification** -- [ ] **Security Audit Completed** - - [ ] All security headers implemented - - [ ] CORS configuration verified - - [ ] No sensitive data in extension package - - [ ] Permissions minimized to required only - -- [ ] **Performance Testing** - - [ ] Extension memory usage under 50MB - - [ ] API response times under 5 seconds - - [ ] Cache hit ratio above 80% - - [ ] No memory leaks detected - -- [ ] **Cross-Browser Testing** - - [ ] Chrome 88+ compatibility verified - - [ ] Firefox 89+ compatibility verified - - [ ] Edge 88+ compatibility verified - - [ ] All features work consistently - -- [ ] **User Experience Testing** - - [ ] Toast notifications work properly - - [ ] Progress indicators show accurately - - [ ] Smart context detection works on 10+ sites - - [ ] Batch operations handle 50+ tabs - - [ ] Configuration export/import functions - -#### **Launch Preparation** -- [ ] **Documentation Updated** - - [ ] User guide created - - [ ] Installation instructions written - - [ ] API documentation updated - - [ ] Troubleshooting guide prepared - -- [ ] **Support Infrastructure** - - [ ] Issue tracking system configured - - [ ] User feedback collection setup - - [ ] Analytics/telemetry implemented - - [ ] Update notification system tested - -#### **Post-Launch Monitoring** -- [ ] **Error Tracking** - - Monitor browser console errors - - Track API request failures - - Monitor memory usage patterns - - Watch for update migration issues - -- [ ] **User Feedback** - - Monitor store reviews and ratings - - Track support ticket themes - - Analyze user behavior patterns - - Collect feature requests - -### **5. Future Enhancements (Optional)** - -The following features could be considered for future releases: - -#### **Advanced Customization** -- **Custom Themes**: Dark/light mode with custom color schemes -- **Layout Customization**: Rearrangeable UI components -- **Keyboard Shortcut Customization**: User-configurable shortcuts -- **Advanced Filters**: More granular search and filtering options - -#### **AI & Machine Learning** -- **Content Categorization**: ML-powered content classification -- **Smart Recommendations**: AI-suggested actions based on usage patterns -- **Predictive Caching**: Anticipatory content loading -- **Usage Analytics**: Advanced user behavior insights - -#### **Enterprise Features** -- **Team Management**: Multi-user configurations and sharing -- **Admin Dashboard**: Central management for organization deployments -- **Compliance Features**: Enhanced security and audit logging -- **API Rate Limiting**: Advanced quota management - -#### **Integration Expansions** -- **Third-party Services**: Integration with popular productivity tools -- **Cloud Storage**: Direct integration with Google Drive, Dropbox, etc. -- **Social Sharing**: Enhanced sharing capabilities -- **Webhook Support**: Real-time notifications and integrations - -## 📋 **TESTING & QUALITY ASSURANCE** - -### **Automated Testing Coverage** - -#### **Unit Tests (125+ test cases)** -- **Configuration Management**: 50+ tests covering initialization, validation, presets -- **API Security**: 40+ tests for CORS, headers, error handling, retry logic -- **Update Management**: 35+ tests for migrations, backups, version comparison -- **Property-based Tests**: Mathematical properties verification using fast-check - -#### **Integration Tests** -- **Configuration Lifecycle**: End-to-end config with storage persistence -- **Security Integration**: Full request lifecycle with CORS and error handling -- **Update Integration**: Complete update scenarios with real-world data migration -- **Cross-browser Compatibility**: Chrome, Firefox, Edge testing - -#### **Test Execution Commands** -```bash -# Run all tests -npm test - -# Run with coverage reporting -npm run test:coverage - -# Run only unit tests -npm run test:unit - -# Run only integration tests -npm run test:integration - -# Watch mode for development -npm run test:watch -``` - -#### **Coverage Targets** -- **Branches**: 70%+ coverage -- **Functions**: 70%+ coverage -- **Lines**: 70%+ coverage -- **Statements**: 70%+ coverage - -### **Manual Testing Scenarios** - -#### **Core Functionality Testing** -1. **Installation & First Run** - - Install extension in fresh browser profile - - Verify welcome notification and options page - - Test initial configuration setup - -2. **Smart Context Detection** - - Test on YouTube, Medium, GitHub, Stack Overflow - - Verify appropriate action suggestions - - Test confidence scoring accuracy - -3. **Batch Operations** - - Open 20+ tabs with various content types - - Test "Process All Tabs" functionality - - Verify progress tracking and cancellation - -4. **Configuration Management** - - Test environment detection (dev/staging/prod) - - Verify settings export/import - - Test configuration health checks - -5. **Update Scenarios** - - Test extension update with data migration - - Verify backup creation and rollback - - Test Chrome/Firefox browser updates - -#### **Performance Testing** -- **Memory Usage**: Monitor extension memory consumption -- **CPU Impact**: Measure CPU usage during operations -- **Network Efficiency**: Track API request optimization -- **Cache Performance**: Verify cache hit ratios - -#### **Security Testing** -- **CORS Validation**: Test cross-origin request handling -- **Input Sanitization**: Verify XSS prevention -- **Permission Audit**: Confirm minimal permission usage -- **Token Security**: Test API token handling - -## 🏆 **SUCCESS METRICS & KPIs** - -### **Technical Metrics** -- ✅ **Zero critical bugs** in production -- ✅ **70%+ test coverage** across all code -- ✅ **<2 second response times** for all operations -- ✅ **<50MB memory usage** under normal operation -- ✅ **99%+ uptime** for core functionality - -### **User Experience Metrics** -- ✅ **Professional UI/UX** with toast notifications and progress indicators -- ✅ **Smart automation** with context detection and batch operations -- ✅ **Comprehensive search** with filters and suggestions -- ✅ **Reliable updates** with automatic data migration -- ✅ **Cross-browser support** for Chrome, Firefox, Edge - -### **Security & Compliance** -- ✅ **CORS compliance** with proper security headers -- ✅ **Minimal permissions** following principle of least privilege -- ✅ **Secure token handling** with encrypted storage -- ✅ **Input validation** preventing XSS and injection attacks -- ✅ **Update security** with backup and rollback capabilities - -## 🎯 **CONCLUSION** - -The TLDW Browser Extension has been **completely transformed** from a basic prototype into a **production-ready, enterprise-grade extension** with: - -### **🚀 Major Achievements** -- **16 Core Improvements**: All critical UX, performance, and security issues resolved -- **5 Advanced Features**: Smart context detection, batch operations, enhanced search, progress indicators, and configuration management -- **Enterprise Architecture**: Centralized configuration, security headers, update management -- **Comprehensive Testing**: 125+ tests with 70%+ coverage across unit, integration, and property-based testing -- **Cross-Browser Support**: Chrome, Firefox, and Edge compatibility - -### **📈 Impact Summary** -- **User Experience**: Professional interface with intelligent automation -- **Performance**: Optimized caching, memory management, and throttled operations -- **Security**: CORS compliance, security headers, and minimal permissions -- **Reliability**: Robust error handling, retry logic, and update management -- **Maintainability**: Centralized configuration and comprehensive test coverage - -### **🔧 Ready for Production** -The extension is now **ready for immediate deployment** to browser stores with: -- Complete packaging instructions for Chrome, Firefox, and Edge -- Comprehensive testing and quality assurance procedures -- Production deployment checklist and monitoring guidelines -- Future enhancement roadmap for continued improvement - -This transformation represents a **complete evolution** from prototype to professional-grade software, establishing a solid foundation for long-term success and user adoption. diff --git a/Docs/Development/PYTHON_FASTAPI_BEST_PRACTICES.md b/Docs/Development/PYTHON_FASTAPI_BEST_PRACTICES.md index 12fe78484..5175078d0 100644 --- a/Docs/Development/PYTHON_FASTAPI_BEST_PRACTICES.md +++ b/Docs/Development/PYTHON_FASTAPI_BEST_PRACTICES.md @@ -848,7 +848,7 @@ encoding_gpt4 = tiktoken.encoding_for_model("gpt-4") # cl100k_base # Anthropic (use their API) async def count_anthropic_tokens(text: str): response = await anthropic_client.count_tokens( - model="claude-3-opus", + model="claude-opus-4.1", messages=[{"role": "user", "content": text}] ) return response.usage.input_tokens @@ -1857,7 +1857,7 @@ class MultiModelOrchestrator: temperature=0 ) self.smart_model = ChatAnthropic( - model="claude-3-opus", + model="claude-opus-4.1", temperature=0 ) self.code_model = ChatOpenAI( diff --git a/Docs/Development/Streaming_Code_Review_Checklist.md b/Docs/Development/Streaming_Code_Review_Checklist.md new file mode 100644 index 000000000..69bcef166 --- /dev/null +++ b/Docs/Development/Streaming_Code_Review_Checklist.md @@ -0,0 +1,13 @@ +# Code Review — Streaming + +Short, high-signal items for PR reviewers touching SSE/WS streaming code. + +- Prefer structured sends using `SSEStream.send_json` / `SSEStream.send_event` and `WebSocketStream.send_json`. +- Raw SSE lines via `SSEStream.send_raw_sse_line` are allowed only for legacy provider pass-through during migration; add a brief code comment (e.g., "legacy pass-through; to be removed after rollout"). +- Do not wrap domain WS payloads in event frames for MCP/Audio — keep JSON‑RPC (MCP) and audio partials as-is. Use standardized lifecycle only: `ping`, `error`, `done`. +- Labels must be low-cardinality (e.g., `component`, `endpoint`) — never user/session IDs. +- Close codes: map errors per PRD (e.g., `quota_exceeded` → frame + close `1008`; idle timeout → `1001`). + +References +- PRD: `Docs/Design/Stream_Abstraction_PRD.md` +- Streams API: `tldw_Server_API/app/core/Streaming/streams.py` diff --git a/Docs/Evals/Evaluations_Quick_Start.md b/Docs/Evals/Evaluations_Quick_Start.md index cf928c1dc..dbba57e6f 100644 --- a/Docs/Evals/Evaluations_Quick_Start.md +++ b/Docs/Evals/Evaluations_Quick_Start.md @@ -174,7 +174,7 @@ groq_api_key = gsk_your_groq_key_here # Optional: Configure default models openai_model = gpt-4 -anthropic_model = claude-3-sonnet-20240229 +anthropic_model = claude-sonnet-4-5 ``` ### 3. Summarization Quality Evaluation (G-Eval) diff --git a/Docs/Getting-Started-STT_and_TTS.md b/Docs/Getting-Started-STT_and_TTS.md new file mode 100644 index 000000000..46c5ba4ea --- /dev/null +++ b/Docs/Getting-Started-STT_and_TTS.md @@ -0,0 +1,382 @@ +# Getting Started — STT (Speech-to-Text) and TTS (Text-to-Speech) + +This guide helps first-time users set up and test speech features with tldw_server. +It covers quick paths for both cloud-hosted and local backends, plus verification steps and troubleshooting. + +## TL;DR Choices +- Fastest TTS (hosted): OpenAI TTS — requires `OPENAI_API_KEY`. +- Local TTS (offline): Kokoro ONNX — requires model files + eSpeak library. +- Local STT (offline): faster-whisper — requires FFmpeg; optional GPU. +- Advanced STT (optional): NeMo Parakeet/Canary, Qwen2Audio — larger setup, GPU recommended. + +## Prerequisites +- Python environment with project installed + - From repo root: `pip install -e .` +- FFmpeg (required for audio I/O) + - macOS: `brew install ffmpeg` + - Ubuntu/Debian: `sudo apt-get install -y ffmpeg` + - Windows: install from ffmpeg.org and ensure it’s in PATH +- Start the server + - `python -m uvicorn tldw_Server_API.app.main:app --reload` + - API: + - WebUI: + +Auth quick note +- Single-user mode: server prints an API key on startup; or set `SINGLE_USER_API_KEY`. +- Use header: `X-API-KEY: ` for all calls (or Bearer JWT in multi-user setups). + +--- + +## Option A — OpenAI TTS (Hosted) +Best for immediate results; no local model setup. + +1) Provide API key +- Export `OPENAI_API_KEY` in your shell or add it to `Config_Files/config.txt` (OpenAI section). + +2) Verify TTS provider is enabled (optional) +- OpenAI TTS is enabled by default. To confirm or customize, see `tldw_Server_API/app/core/TTS/tts_providers_config.yaml` under `providers.openai`. + +3) Test voice catalog +```bash +curl -s http://127.0.0.1:8000/api/v1/audio/voices/catalog \ + -H "X-API-KEY: $SINGLE_USER_API_KEY" | jq +``` + +4) Generate speech +```bash +curl -sS -X POST http://127.0.0.1:8000/api/v1/audio/speech \ + -H "X-API-KEY: $SINGLE_USER_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "tts-1", + "voice": "alloy", + "input": "Hello from tldw_server", + "response_format": "mp3" + }' \ + --output out.mp3 +``` +- Play `out.mp3` in your player. + +Troubleshooting +- 401/403: ensure `OPENAI_API_KEY` is set and valid, and you’re passing `X-API-KEY` (single-user) or Bearer token (multi-user). +- 429: OpenAI rate limit; retry after `retry-after` seconds. + +--- + +## Option B — Kokoro TTS (Local, ONNX) +Offline TTS using Kokoro ONNX. Good quality and fast on CPU; optional GPU via ONNX Runtime. + +1) Install (one command) +```bash +python Helper_Scripts/TTS_Installers/install_tts_kokoro.py +``` +If you prefer manual steps, install dependencies instead: +```bash +# Python packages (CPU) +pip install onnxruntime kokoro-onnx phonemizer espeak-phonemizer huggingface-hub + +# Optional: GPU acceleration (replace onnxruntime above) +pip install onnxruntime-gpu + +# System package for phonemizer (required): +# macOS (Homebrew): +brew install espeak-ng +# Ubuntu/Debian: +sudo apt-get update && sudo apt-get install -y espeak-ng +# Windows (PowerShell, example): +# - Install eSpeak NG (from https://github.com/espeak-ng/espeak-ng/releases) +# - Set PHONEMIZER_ESPEAK_LIBRARY to libespeak-ng.dll path + +# eSpeak NG is auto-detected on most systems. Point the phonemizer to the library only if needed +# macOS (adjust if your Homebrew prefix differs) +export PHONEMIZER_ESPEAK_LIBRARY=/opt/homebrew/lib/libespeak-ng.dylib +# Linux example +export PHONEMIZER_ESPEAK_LIBRARY=/usr/lib/x86_64-linux-gnu/libespeak-ng.so.1 +# Windows example (only if auto-detect fails) +# set PHONEMIZER_ESPEAK_LIBRARY=C:\\Program Files\\eSpeak NG\\libespeak-ng.dll +``` + +2) Download model files (skipped if you use the installer) +- Place files under a `models/` folder at the repo root (example paths below). +- Recommended sources: + - ONNX: `onnx-community/Kokoro-82M-v1.0-ONNX-timestamped` (contains `onnx/model.onnx` and a `voices/` directory of voice styles) + - PyTorch (optional): `hexgrad/Kokoro-82M` (contains `kokoro-v1_0.pth`, `config.json`, and `voices/`) + +Examples +```bash +# Create a local directory +mkdir -p models/kokoro + +# Option A: huggingface-cli (ONNX v1.0) +pip install huggingface-hub +huggingface-cli download onnx-community/Kokoro-82M-v1.0-ONNX-timestamped onnx/model.onnx --local-dir models/kokoro/ +huggingface-cli download onnx-community/Kokoro-82M-v1.0-ONNX-timestamped voices --local-dir models/kokoro/ + +# Option B: direct URLs for ONNX (if CLI unavailable) +wget https://huggingface.co/onnx-community/Kokoro-82M-v1.0-ONNX-timestamped/resolve/main/onnx/model.onnx -O models/kokoro/onnx/model.onnx +# Then download the voices/ directory assets from the same repo (or use huggingface-cli above) +``` + +3) Enable and point config to your files (the installer writes defaults under models/kokoro/) +- Edit `tldw_Server_API/app/core/TTS/tts_providers_config.yaml`: +```yaml +providers: + kokoro: + enabled: true + use_onnx: true + model_path: "models/kokoro/onnx/model.onnx" + voices_json: "models/kokoro/voices" # use voices directory for v1.0 ONNX + device: "cpu" # or "cuda" if using onnxruntime-gpu +``` +- Optional: move Kokoro earlier in `provider_priority` to prefer it. + +4) Restart server and verify +```bash +python -m uvicorn tldw_Server_API.app.main:app --reload +curl -s http://127.0.0.1:8000/api/v1/audio/voices/catalog \ + -H "X-API-KEY: $SINGLE_USER_API_KEY" | jq '.kokoro' +``` + +5) Generate speech with Kokoro +```bash +curl -sS -X POST http://127.0.0.1:8000/api/v1/audio/speech \ + -H "X-API-KEY: $SINGLE_USER_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "kokoro", + "voice": "af_bella", + "input": "Testing local Kokoro TTS", + "response_format": "mp3" + }' \ + --output kokoro.mp3 +``` + +Troubleshooting +- Missing dependencies + - kokoro_onnx: `pip install kokoro-onnx` + - onnxruntime: `pip install onnxruntime` (or `onnxruntime-gpu`) + - phonemizer / espeak-phonemizer: `pip install phonemizer espeak-phonemizer` +- `voices assets not found` or `model not found`: fix `voices` directory or model path in YAML. +- `eSpeak lib not found`: install `espeak-ng` and set `PHONEMIZER_ESPEAK_LIBRARY` to the library path. +- Adapter previously failed and won’t retry: we enable retry by default (`performance.adapter_failure_retry_seconds: 300`). Or restart the server after fixing assets. + +Notes +- PyTorch variant (hexgrad/Kokoro-82M): set `use_onnx: false`, set `model_path: models/kokoro/kokoro-v1_0.pth`, ensure `config.json` sits alongside it, and set `voice_dir: models/kokoro/voices`. Requires `torch` and a compatible Kokoro PyTorch package. Set `device` to `cuda` or `mps` if available. + +--- + +## Option C — faster-whisper STT (Local) +Fast, local transcription compatible with the OpenAI `/audio/transcriptions` API. + +1) Install dependencies +```bash +pip install faster-whisper +# Optional (GPU): pip install torch --index-url https://download.pytorch.org/whl/cu121 +``` +- FFmpeg must be installed (see prerequisites). + +2) Transcribe an audio file +```bash +# Replace sample.wav with your file +curl -sS -X POST http://127.0.0.1:8000/api/v1/audio/transcriptions \ + -H "X-API-KEY: $SINGLE_USER_API_KEY" \ + -H "Accept: application/json" \ + -F "file=@sample.wav" \ + -F "model=whisper-large-v3" \ + -F "language=en" | jq +``` +- The `model` value is OpenAI-compatible; the server maps to your configured local backend. +- For simple text response, set `-H "Accept: text/plain"`. + +3) Real-time streaming STT (WebSocket) +- Endpoint: `WS /api/v1/audio/stream/transcribe` +- Example (with `wscat`): +```bash +wscat -c ws://127.0.0.1:8000/api/v1/audio/stream/transcribe \ + -H "X-API-KEY: $SINGLE_USER_API_KEY" +# Then send base64-encoded audio chunks per the server protocol +``` + +Troubleshooting +- Long files: prefer shorter clips or chunk client-side. +- Out-of-memory: try a smaller model (e.g., `whisper-medium`), or run on GPU. + +--- + +## Verifying Setup via WebUI +- Open +- Tabs: + - Audio → Transcription (STT): upload a short clip and transcribe + - Audio → TTS: enter text, pick a voice/model, and synthesize +- The WebUI auto-detects single-user mode and populates the API key. + +--- + +## Common Errors & Fixes +- 401/403 Unauthorized + - Use `X-API-KEY` (single-user) or Bearer JWT (multi-user). Check server logs on startup. +- 404 / Model or voice not found + - Verify provider is enabled and files exist; check YAML paths and voice IDs. +- `kokoro_onnx` or `kokoro` missing + - `pip install kokoro-onnx` (ONNX) or install the PyTorch package for Kokoro. +- eSpeak library missing (Kokoro ONNX) + - Install `espeak-ng` and set `PHONEMIZER_ESPEAK_LIBRARY` to the library path. +- FFmpeg not found + - Install FFmpeg and ensure it’s accessible in PATH. +- Network/API errors with OpenAI + - Verify `OPENAI_API_KEY`. Check rate limits; proxy/corporate networks may block. + +--- + +## Tips & Configuration +- Provider priority + - `tldw_Server_API/app/core/TTS/tts_providers_config.yaml` → `provider_priority` + - Put your preferred provider first (e.g., `kokoro` before `openai`). +- Adapter retry + - `performance.adapter_failure_retry_seconds: 300` allows periodic re-init after failures. +- Streaming errors as audio vs HTTP errors + - `performance.stream_errors_as_audio: false` (recommended for production APIs). +- GPU acceleration + - For PyTorch-based backends (Kokoro PT, NeMo), install appropriate CUDA builds and set `device: cuda`. + +--- + +## Privacy & Security +- tldw_server is designed for local/self-hosted use. Audio data stays local unless you call hosted APIs (e.g., OpenAI). +- Never commit API keys; prefer environment variables or `.env`. + +--- + +## Appendix — Sample Kokoro YAML Snippet +```yaml +provider_priority: + - kokoro + - openai +providers: + kokoro: + enabled: true + use_onnx: true + model_path: "models/kokoro/onnx/model.onnx" + voices_json: "models/kokoro/voices" + device: "cpu" +performance: + adapter_failure_retry_seconds: 300 + stream_errors_as_audio: false +``` + +If you would like, we can configure a setup checker that validates models, voices, FFmpeg, and environment keys, and reports fixes before you run your first request. + +--- + +## Additional TTS Backends (Advanced/Optional) + +These providers are supported via adapters. Many require large model downloads and work best with a GPU. + +### ElevenLabs (Hosted) +- Enable in YAML and set `ELEVENLABS_API_KEY`. +```yaml +providers: + elevenlabs: + enabled: true + api_key: ${ELEVENLABS_API_KEY} + model: "eleven_monolingual_v1" +``` +- Test: `model: eleven_monolingual_v1`, `voice: rachel` (or a voice from your catalog). + +### Higgs Audio V2 (Local) +- Deps: `pip install torch torchaudio soundfile huggingface_hub`; `pip install git+https://github.com/boson-ai/higgs-audio.git` +- YAML: +```yaml +providers: + higgs: + enabled: true + model_path: "bosonai/higgs-audio-v2-generation-3B-base" + tokenizer_path: "bosonai/higgs-audio-v2-tokenizer" + device: "cuda" +``` +- Test: `model: higgs`, `voice: narrator`. + +### Dia (Local, dialogue specialist) +- Deps: `pip install torch transformers accelerate safetensors sentencepiece soundfile huggingface_hub` +- YAML: +```yaml +providers: + dia: + enabled: true + model_path: "nari-labs/dia" + device: "cuda" +``` +- Test: `model: dia`, `voice: speaker1`. + +### VibeVoice (Local, expressive multi-speaker) +- Deps: `pip install torch torchaudio sentencepiece soundfile huggingface_hub` +- Install (official): + ```bash + git clone https://github.com/microsoft/VibeVoice.git libs/VibeVoice + cd libs/VibeVoice && pip install -e . + cd ../.. + ``` +- YAML: +```yaml +providers: + vibevoice: + enabled: true + auto_download: true + device: "cuda" # or mps/cpu +``` +- Test: `model: vibevoice`, `voice: 1` (speaker index). + +### NeuTTS Air (Local, voice cloning) +- Deps: `pip install neucodec>=0.0.4 librosa phonemizer transformers` (optional streaming: `pip install llama-cpp-python`) +- YAML: +```yaml +providers: + neutts: + enabled: true + backbone_repo: "neuphonic/neutts-air" + backbone_device: "cpu" + codec_repo: "neuphonic/neucodec" + codec_device: "cpu" +``` +- Test: `model: neutts` and provide a base64 `voice_reference` in the JSON body. + +### IndexTTS2 (Local, expressive zero-shot) +- Place checkpoints under `checkpoints/index_tts2/`. +- YAML: +```yaml +providers: + index_tts: + enabled: true + model_dir: "checkpoints/index_tts2" + cfg_path: "checkpoints/index_tts2/config.yaml" + device: "cuda" +``` +- Test: `model: index_tts` (some voices require reference audio). + +--- + +## Additional STT Backends (Advanced/Optional) + +### NVIDIA NeMo — Parakeet and Canary +- Deps (standard backend): `pip install 'nemo_toolkit[asr]'>=1.23.0` +- Alternative backends (optional): + - ONNX: `pip install onnxruntime>=1.16.0 huggingface_hub soundfile librosa numpy` + - MLX (Apple Silicon): `pip install mlx mlx-lm` +- Usage with `/api/v1/audio/transcriptions`: + - `model=nemo-parakeet-1.1b` or `model=nemo-canary` + - Language: set `language=en` (or appropriate code) when known. + +### Qwen2Audio (Local) +- Deps: `pip install torch transformers accelerate soundfile sentencepiece` +- Optional: use the setup installer to prefetch assets. +- Usage with `/api/v1/audio/transcriptions`: + - `model=qwen2audio` + +Notes +- Some media endpoints expose more granular backend choices (e.g., Parakeet backends); for `/audio/transcriptions` the `model` is typically sufficient. + +--- + +## Model Hints (At-a-Glance) +- TTS models: `tts-1` (OpenAI), `kokoro`, `eleven_monolingual_v1`, `higgs`, `dia`, `vibevoice`, `neutts`, `index_tts`. +- STT models: `whisper-1` (faster-whisper), `whisper-large-v3` and `*-ct2` variants, `nemo-canary`, `nemo-parakeet-1.1b`, `qwen2audio`. diff --git a/Docs/Issues/STREAMS_UNIFIED_Rollout_Tracking.md b/Docs/Issues/STREAMS_UNIFIED_Rollout_Tracking.md new file mode 100644 index 000000000..c2385d88d --- /dev/null +++ b/Docs/Issues/STREAMS_UNIFIED_Rollout_Tracking.md @@ -0,0 +1,54 @@ +# Tracking Issue — STREAMS_UNIFIED Flip (Dev → Staging → Prod) + +Status: Open +Owner: Streaming/Platform +Created: 2025-11-04 + +Goal +- Validate unified SSE/WS streams behind `STREAMS_UNIFIED` and flip the flag ON in staging, then plan production. + +References +- PRD: `Docs/Design/Stream_Abstraction_PRD.md` (Status: Pilot Rollout) +- Dev Overlay: `Dockerfiles/Dockerfiles/docker-compose.dev.yml` +- Metrics Dashboard: `Docs/Deployment/Monitoring/Grafana_Streaming_Basics.json` + +Checklist + +Phase A — Dev validation +- [ ] Start API with dev overlay or `STREAMS_UNIFIED=1` env +- [ ] Configure two providers (e.g., OpenAI + Groq) +- [ ] Chat SSE (main): single `[DONE]`, OpenAI deltas present +- [ ] Character chat SSE: heartbeat under idle; single `[DONE]` +- [ ] Chat document-generation SSE: heartbeat; no duplicate `[DONE]` +- [ ] Embeddings orchestrator SSE: `event: summary` frames periodic +- [ ] Prompt Studio SSE fallback: initial state + heartbeats +- [ ] Audio WS: pings observed; quota or validation error emits error frame and closes with correct code +- [ ] MCP WS: JSON-RPC responses unchanged; lifecycle frames present; idle close works +- [ ] Metrics present: `sse_enqueue_to_yield_ms`, `sse_queue_high_watermark`, `ws_send_latency_ms`, `ws_pings_total` + +Phase B — Staging flip +- [ ] Enable `STREAMS_UNIFIED=1` in staging +- [ ] Use dev overlay in non‑prod: `docker compose -f Dockerfiles/docker-compose.yml -f Dockerfiles/Dockerfiles/docker-compose.dev.yml up -d --build` +- [ ] Import Grafana dashboard and confirm labels for key endpoints +- [ ] Soak for 48h; watch idle timeouts and ping failures +- [ ] Document any client compatibility issues (Audio `error_type` alias still on) +- [ ] If regressions: toggle back to `STREAMS_UNIFIED=0` (rollback) and file follow-ups + +Phase C — Production plan +- [ ] Announce window; confirm client compatibility (Audio/MCP consumers) +- [ ] Flip `STREAMS_UNIFIED=1` progressively (canary) +- [ ] Verify metrics; no duplicate `[DONE]`; latency within ±1% server-side target +- [ ] Keep rollback knob in runbook + +Notes +- Prefer `STREAM_HEARTBEAT_MODE=data` behind reverse proxies/CDNs. +- For provider control lines (`event/id/retry`), keep `STREAM_PROVIDER_CONTROL_PASSTHRU=0` unless a specific integration requires it. + +Follow-ups + +- [x] Remove legacy SSE helpers no longer used by pilot endpoints + - Removed `_extract_sse_data_lines` from `tldw_Server_API/app/api/v1/endpoints/character_chat_sessions.py`. + - Remaining legacy fallbacks guarded by `STREAMS_UNIFIED` will be removed after the default flip. +- [ ] Confirm Audio `error_type` deprecation timeline with owners (PRD phases target v0.1.1 → v0.1.3) + - Align release notes and client notices; keep `compat_error_type=True` until v0.1.3. +- [ ] Monitor dashboards after staging flip; record p95 WS send latency and SSE enqueue→yield p95 snapshots pre/post flip. diff --git a/Docs/Monitoring/Grafana_Dashboards/README.md b/Docs/Monitoring/Grafana_Dashboards/README.md new file mode 100644 index 000000000..17fda4f9d --- /dev/null +++ b/Docs/Monitoring/Grafana_Dashboards/README.md @@ -0,0 +1,30 @@ +Grafana Dashboards for tldw_server + +Overview +- This folder contains an example Grafana dashboard JSON for visualizing the LLM Gateway metrics exposed at `/metrics`. +- The dashboard targets the internal Prometheus-style metrics emitted by `tldw_Server_API.app.core.Metrics.metrics_manager` and the HTTP middleware. + +Prometheus Scrape (example) +Add a scrape job pointing to your server (adjust host/port): + + scrape_configs: + - job_name: 'tldw_server' + metrics_path: /metrics + static_configs: + - targets: ['127.0.0.1:8000'] + +Importing the Dashboard +1) In Grafana: Dashboards -> New -> Import. +2) Upload `llm_gateway_dashboard.json`. +3) Set the Prometheus datasource when prompted. + +Variables +- DS_PROMETHEUS: Prometheus datasource selector (choose your Prometheus instance). +- endpoint: HTTP endpoint label (defaults to `/api/v1/chat/completions`). +- method: HTTP method label (defaults to `POST`). +- provider: LLM provider label (e.g., `openai`). +- model: LLM model label (e.g., `gpt-4o-mini`). + +Notes +- If you run the server in mock mode for benchmarking (`CHAT_FORCE_MOCK=1`), the upstream LLM panels still work since metrics are recorded by the gateway (decorators and usage tracker). +- The HTTP latency panels are driven by `http_request_duration_seconds_bucket`. LLM latency panels are driven by `llm_request_duration_seconds_bucket`. diff --git a/Docs/Monitoring/http_client_alerts_prometheus.yaml b/Docs/Monitoring/http_client_alerts_prometheus.yaml new file mode 100644 index 000000000..30b2515ca --- /dev/null +++ b/Docs/Monitoring/http_client_alerts_prometheus.yaml @@ -0,0 +1,34 @@ +groups: + - name: tldw_server_http_client + rules: + - alert: HighHTTPClientRetryRate + expr: sum(rate(http_client_retries_total[5m])) / clamp_min(sum(rate(http_client_requests_total[5m])), 1) > 0.2 + for: 10m + labels: + severity: warning + annotations: + summary: "High HTTP client retry rate" + description: > + More than 20% of outbound HTTP requests are retried over 10 minutes. + Investigate upstream availability or egress policy configuration. + + - alert: EgressDenialsDetected + expr: increase(http_client_egress_denials_total[10m]) > 0 + for: 5m + labels: + severity: warning + annotations: + summary: "Egress policy denials detected" + description: > + One or more outbound egress denials occurred in the last 10 minutes. + Check EGRESS_ALLOWLIST/PROXY_ALLOWLIST and redirect chains. + + - alert: HighHTTPClientLatencyP99 + expr: histogram_quantile(0.99, sum by (le) (rate(http_client_request_duration_seconds_bucket[10m]))) > 2 + for: 15m + labels: + severity: warning + annotations: + summary: "High p99 outbound HTTP latency" + description: > + The 99th percentile outbound HTTP latency is above 2s for 15 minutes. diff --git a/Docs/Operations/Env_Vars.md b/Docs/Operations/Env_Vars.md index e5b56b2b8..9e6d7f901 100644 --- a/Docs/Operations/Env_Vars.md +++ b/Docs/Operations/Env_Vars.md @@ -29,6 +29,14 @@ Note: Secrets should be set via environment or `.env`. `config.txt` is supported - `ALLOW_NLTK_DOWNLOADS`: Force-enable NLTK downloads even when running tests (`1|true|yes`). - Overrides `TEST_MODE`/`DISABLE_NLTK_DOWNLOADS`/pytest auto-detection to allow downloads for development scenarios that require full NLTK resources. +### Jobs Postgres (Test-only Helpers) +- `RUN_PG_JOBS_TESTS`: Enable Jobs outbox Postgres tests (`1|true|yes`). Disabled by default due to environment variability. +- `TLDW_TEST_NO_DOCKER`: When set (`1|true|yes`), disables auto-start of a local Postgres Docker container during Jobs tests. +- `TLDW_TEST_PG_IMAGE`: Docker image for the optional local Postgres used by Jobs tests (default `postgres:15`). +- `TLDW_TEST_PG_CONTAINER_NAME`: Container name for the optional local Postgres (default `tldw_jobs_postgres_test`). + - The Jobs tests/fixtures first try a TCP probe to the configured DSN; when unreachable and the host is local, they attempt to start this container unless `TLDW_TEST_NO_DOCKER` is set. + - You can also set `POSTGRES_TEST_*` vars or `JOBS_DB_URL` explicitly to point at an existing cluster. + ## RAG Module - `tldw_production`: When `true`, RAG retrievers disable raw SQL fallbacks and require adapters (MediaDatabase/ChaChaNotesDB). Unified endpoints already pass adapters; direct pipeline usage must supply them. - `RAG_LLM_RERANK_TIMEOUT_SEC`: Per-document LLM rerank timeout (seconds). Default `10`. @@ -198,6 +206,10 @@ Runtime overrides (non-persistent) are available via API: export PRICING_OVERRIDES='{"openai":{"gpt-4o":{"prompt":0.005,"completion":0.015}}}' `` File-based overrides are also supported at `tldw_Server_API/Config_Files/model_pricing.json`. + In addition to cost tracking, this catalog now seeds the available models list for commercial providers + surfaced by `GET /api/v1/llm/providers`. Add a model here to have it appear in the WebUI model selectors + (you can still list models in `config.txt`; both sources are merged, with `model_pricing.json` acting as + the primary reference). ## Embeddings - `EMBEDDINGS_DEDUPE_TTL_SECONDS`: Dedupe window for worker replay suppression. Defaults to `3600` seconds. Workers compute a stage-specific dedupe key (or use `dedupe_key`/`idempotency_key` if provided) and suppress processing if the same key was seen within this TTL. @@ -306,6 +318,25 @@ Notes | `OTEL_EXPORTER_OTLP_PROTOCOL` | `grpc` | `grpc` or `http/protobuf` | | `OTEL_EXPORTER_OTLP_HEADERS` | (empty) | Optional headers string | | `OTEL_EXPORTER_OTLP_INSECURE` | `true` | Allow insecure transport | +| `STREAMS_UNIFIED` | `0` | Feature flag: unified SSE/WS streams in pilot endpoints. Recommended `1` in non‑prod. Use the dev overlay: `Dockerfiles/Dockerfiles/docker-compose.dev.yml`. | + +Quick rollback + +- To disable unified streaming quickly, set `STREAMS_UNIFIED=0` and restart the app (or `docker compose up -d` to re‑create with the new env). This reverts pilot endpoints to legacy streaming code paths. + +Non‑prod defaults + +- `Dockerfiles/Dockerfiles/docker-compose.dev.yml` exports `STREAMS_UNIFIED=1` for dev/staging overlays. +- `Dockerfiles/Dockerfiles/docker-compose.test.yml` also sets `STREAMS_UNIFIED=1` for test environments. + In production, keep the flag unset or `0` until you’re ready to flip more broadly. +| `STREAM_HEARTBEAT_INTERVAL_S` | `10` | Default heartbeat interval for streams (seconds) | +| `STREAM_HEARTBEAT_MODE` | `comment` | `comment` or `data` heartbeats (prefer `data` behind reverse proxies) | +| `STREAM_IDLE_TIMEOUT_S` | (disabled) | Idle timeout for SSE streams (seconds) | +| `AUDIO_WS_IDLE_TIMEOUT_S` | (disabled) | Optional idle timeout for Audio WebSocket (seconds); overrides `STREAM_IDLE_TIMEOUT_S` for audio handler | +| `AUDIO_WS_QUOTA_CLOSE_1008` | `0` | When `1`, Audio WS closes with 1008 for quota/rate-limit instead of legacy 4003 | +| `STREAM_MAX_DURATION_S` | (disabled) | Maximum duration for SSE streams (seconds) | +| `STREAM_QUEUE_MAXSIZE` | `256` | Default bounded queue size for SSE streams | +| `STREAM_PROVIDER_CONTROL_PASSTHRU` | `0` | Preserve provider SSE control lines (`event/id/retry`) when `1` | ## Prometheus & Grafana (deployment) diff --git a/Docs/Operations/monitoring/README.md b/Docs/Operations/monitoring/README.md index 27c7be6e3..8b0c0a0c9 100644 --- a/Docs/Operations/monitoring/README.md +++ b/Docs/Operations/monitoring/README.md @@ -49,6 +49,32 @@ Import the provided dashboards: - Stage processed/s and failed/s - Stage flags (paused/drain) +Additionally, for streaming (SSE/WS) metrics, import `Docs/Deployment/Monitoring/Grafana_Streaming_Basics.json` which includes: +- SSE enqueue→yield latency (ms) histogram +- SSE queue high-watermark gauge +- WS send latency (ms) histogram +- WS pings sent (counter) + +Streaming metrics labels +- The stream helpers accept optional low-cardinality labels to facet metrics by component/endpoint. +- Example (SSE): + +```python +from tldw_Server_API.app.core.Streaming.streams import SSEStream +stream = SSEStream(labels={"component": "chat", "endpoint": "chat_stream"}) +``` + +- Example (WS): + +```python +from tldw_Server_API.app.core.Streaming.streams import WebSocketStream +ws_stream = WebSocketStream(websocket, labels={"component": "audio", "endpoint": "audio_unified_ws"}) +``` + +Template variables +- component: derived from metric labels; filter panels by component +- endpoint: derived from metric labels; filter panels and drive a repeated row that facets metrics per endpoint + In Grafana: 1. Dashboards → New → Import 2. Upload `grafana_embeddings_orchestrator.json`, `grafana_workflows.json`, `grafana_service_overview.json`, or `grafana_tenant_overview.json` diff --git a/Docs/Plans/core_readme_refresh.md b/Docs/Plans/core_readme_refresh.md new file mode 100644 index 000000000..752dde6b6 --- /dev/null +++ b/Docs/Plans/core_readme_refresh.md @@ -0,0 +1,56 @@ +# Core Module README Refresh Tracker + +Purpose: Track standardization of README files across `tldw_Server_API/app/core/` using a common template. Status values: Scaffolded | Existing (Review/Update) | Complete. + +| Module | Path | Status | Owner | Notes | +|---|---|---|---|---| +| Audit | tldw_Server_API/app/core/Audit | Complete | | Standardized to 3-section format | +| AuthNZ | tldw_Server_API/app/core/AuthNZ | Complete | | Standardized to 3-section format | +| Character_Chat | tldw_Server_API/app/core/Character_Chat | Complete | | Standardized to 3-section format | +| Chat | tldw_Server_API/app/core/Chat | Complete | | Standardized to 3-section format | +| Chatbooks | tldw_Server_API/app/core/Chatbooks | Complete | | Standardized to 3-section format | +| Chunking | tldw_Server_API/app/core/Chunking | Complete | | Standardized to 3-section format | +| Claims_Extraction | tldw_Server_API/app/core/Claims_Extraction | Complete | | Aligned to 3-section format | +| Collections | tldw_Server_API/app/core/Collections | Complete | | Aligned to 3-section format | +| DB_Management | tldw_Server_API/app/core/DB_Management | Complete | | Standardized to 3-section format | +| Embeddings | tldw_Server_API/app/core/Embeddings | Complete | | Standardized to 3-section format | +| Evaluations | tldw_Server_API/app/core/Evaluations | Complete | | Standardized to 3-section format | +| External_Sources | tldw_Server_API/app/core/External_Sources | Complete | | Aligned to 3-section format | +| Flashcards | tldw_Server_API/app/core/Flashcards | Complete | | Aligned to 3-section format | +| Infrastructure | tldw_Server_API/app/core/Infrastructure | Complete | | Standardized to 3-section format | +| Ingestion_Media_Processing | tldw_Server_API/app/core/Ingestion_Media_Processing | Complete | | Standardized to 3-section format | +| Jobs | tldw_Server_API/app/core/Jobs | Complete | | Standardized to 3-section format | +| LLM_Calls | tldw_Server_API/app/core/LLM_Calls | Complete | | Standardized to 3-section format | +| Local_LLM | tldw_Server_API/app/core/Local_LLM | Complete | | Aligned to 3-section format | +| Logging | tldw_Server_API/app/core/Logging | Complete | | Standardized to 3-section format | +| MCP_unified | tldw_Server_API/app/core/MCP_unified | Complete | | Standardized to 3-section format | +| Metrics | tldw_Server_API/app/core/Metrics | Complete | | Standardized to 3-section format | +| Moderation | tldw_Server_API/app/core/Moderation | Complete | | Aligned to 3-section format | +| Monitoring | tldw_Server_API/app/core/Monitoring | Complete | | Standardized to 3-section format | +| Notes | tldw_Server_API/app/core/Notes | Complete | | Standardized to 3-section format | +| Notifications | tldw_Server_API/app/core/Notifications | Complete | | Standardized to 3-section format | +| Persona | tldw_Server_API/app/core/Persona | Complete | | Aligned to 3-section format | +| PrivilegeMaps | tldw_Server_API/app/core/PrivilegeMaps | Complete | | Aligned to 3-section format | +| Prompt_Management | tldw_Server_API/app/core/Prompt_Management | Complete | | Standardized to 3-section format | +| RAG | tldw_Server_API/app/core/RAG | Complete | | Standardized to 3-section format | +| RateLimiting | tldw_Server_API/app/core/RateLimiting | Complete | | Standardized to 3-section format | +| Sandbox | tldw_Server_API/app/core/Sandbox | Complete | | Aligned to 3-section format | +| Scheduler | tldw_Server_API/app/core/Scheduler | Complete | | Standardized to 3-section format | +| Search_and_Research | tldw_Server_API/app/core/Search_and_Research | Complete | | Standardized to 3-section format | +| Security | tldw_Server_API/app/core/Security | Complete | | Standardized to 3-section format | +| Setup | tldw_Server_API/app/core/Setup | Complete | | Aligned to 3-section format | +| Sync | tldw_Server_API/app/core/Sync | Complete | | Standardized to 3-section format | +| Third_Party | tldw_Server_API/app/core/Third_Party | Complete | | Standardized to 3-section format | +| Tools | tldw_Server_API/app/core/Tools | Complete | | Standardized to 3-section format | +| TTS | tldw_Server_API/app/core/TTS | Complete | | Standardized to 3-section format | +| Usage | tldw_Server_API/app/core/Usage | Complete | | Aligned to 3-section format | +| Utils | tldw_Server_API/app/core/Utils | Complete | | Aligned to 3-section format | +| Watchlists | tldw_Server_API/app/core/Watchlists | Complete | | Standardized to 3-section format | +| Web_Scraping | tldw_Server_API/app/core/Web_Scraping | Complete | | Standardized to 3-section format | +| WebSearch | tldw_Server_API/app/core/WebSearch | Complete | | Standardized to 3-section format | +| Workflows | tldw_Server_API/app/core/Workflows | Complete | | Standardized to 3-section format | +| Writing | tldw_Server_API/app/core/Writing | Scaffolded | | | + +Notes: +- “Existing (Review/Update)” indicates a README is present and should be aligned with the template for consistency. +- “Scaffolded” indicates a README.md has been created from the template and needs content filled in by a contributor. diff --git a/Docs/Product/Circuit_Breaker_Unification_PRD.md b/Docs/Product/Circuit_Breaker_Unification_PRD.md new file mode 100644 index 000000000..d660aaaef --- /dev/null +++ b/Docs/Product/Circuit_Breaker_Unification_PRD.md @@ -0,0 +1,213 @@ +Circuit Breaker Unification PRD + + - Title: Circuit Breaker Unification + - Author: [your name] + - Status: Draft + - Owners: Core (Infrastructure), Embeddings, Evaluations, RAG, MCP + - Related Code: tldw_Server_API/app/core/Embeddings/circuit_breaker.py:1, tldw_Server_API/app/core/Evaluations/circuit_breaker.py:1, tldw_Server_API/app/core/RAG/rag_service/unified_pipeline.py:505, + tldw_Server_API/app/core/RAG/rag_service/resilience.py:1, tldw_Server_API/app/core/MCP_unified/modules/base.py:242, tldw_Server_API/app/core/Chat/provider_manager.py:1 + + Overview + + - Problem: Multiple, duplicative circuit breaker (CB) implementations diverge in behavior and metrics, increasing maintenance risk. + - Unifying Principle: All are the same CircuitBreaker with different labels. + - Goal: One unified CB in Infrastructure with per-category config, consistent metrics, and sync/async decorators. Modules inject names/labels only. + + Problem Statement + + - Duplicates and drift: + - Embeddings CB with Prometheus metrics: tldw_Server_API/app/core/Embeddings/circuit_breaker.py:1 + - Evaluations CB with async locks, timeouts, and per-provider configs: tldw_Server_API/app/core/Evaluations/circuit_breaker.py:1 + - RAG resilience’s own CB and coordinator: tldw_Server_API/app/core/RAG/rag_service/unified_pipeline.py:505, tldw_Server_API/app/core/RAG/rag_service/resilience.py:1 + - MCP base embeds CB/backoff semantics: tldw_Server_API/app/core/MCP_unified/modules/base.py:242 + - Additional duplication (noted): Chat provider CB: tldw_Server_API/app/core/Chat/provider_manager.py:1 + - Symptoms: Inconsistent states, thresholds, timeouts, backoff, and metrics across domains; redundant tests and config. + + Goals + + - Single CB implementation under Infrastructure used by Embeddings, Evaluations, RAG, MCP (and optionally Chat). + - Consistent behavior: CLOSED/OPEN/HALF_OPEN, failure thresholds, half-open probe limits, recovery timeouts. + - Optional modes: count threshold and rolling-window failure-rate (RAG). + - First-class async/sync usage with decorators and call wrappers (with optional per-call timeout). + - Unified metrics (Prometheus) with consistent labels: category, service/name, operation/outcome. + - Backward-compatible shims and non-breaking migration of tests/config. + + Non-Goals + + - Rewriting retry/fallback/health-monitor logic (keep in their modules; integrate only via consistent CB hooks). + - Overhauling provider selection logic or load balancing. + - Adding new external dependencies. + + Users And Stakeholders + + - Embeddings team (provider reliability, metrics). + - Evaluations/LLM Calls (per-provider CB configs, timeouts). + - RAG (resilience coordinator; rolling-window option). + - MCP Unified (module backoff semantics, concurrency guard). + - Observability/Infra (unified metrics). + + In Scope + + - New tldw_Server_API/app/core/Infrastructure/circuit_breaker.py. + - Config unification and adapter: Embeddings, Evaluations, RAG, MCP (and optional Chat). + - Metrics standardization and registry. + - Back-compat shims in legacy module paths. + - Tests and docs updates. + + Out Of Scope + + - Changing existing retry/fallback APIs and semantics. + - Replacing health-monitoring subsystems. + + Functional Requirements + + - Provide CircuitBreaker with: + - States: CLOSED, OPEN, HALF_OPEN with success threshold and half-open max concurrent probes. + - Failure policy: count threshold and optional rolling window (size + failure_rate_threshold). + - Recovery policy: recovery timeout with optional exponential backoff (factor, max_timeout). + - Error classification: expected_exceptions (count toward CB), unexpected errors pass through. + - Optional per-call timeout enforcement for both sync/async calls. + - Provide CircuitBreakerConfig with superset of fields: + - failure_threshold, success_threshold, recovery_timeout, half_open_max_calls, expected_exceptions, timeout_seconds (per-call), window_size, failure_rate_threshold, backoff_factor, max_recovery_timeout. + - Provide simple APIs: + - call(func, *args, **kwargs) and call_async(func, *args, **kwargs). + - Decorator @circuit_breaker(name=..., category=..., config=...) auto-detects sync/async. + - Registry: get_or_create(name, category, config_overrides); status(); reset(). + - Metrics: + - Prometheus counters/gauges: state, trips, failures, successes, rejections, timeouts. + - Labels: category, service (name), operation (optional). + - Safe re-registration across processes/tests. + + Non-Functional Requirements + + - Thread/async safety: locks around state transitions; no deadlocks; low contention. + - Performance: O(1) hot-path operations; rolling-window operations amortized. + - Observability: metrics exposed; structured state in get_status(). + - Compatibility: no breaking changes to public endpoints; shims for legacy imports. + - Testing: >80% coverage for new module; integration tests continue to pass. + + Design Overview + + - File: tldw_Server_API/app/core/Infrastructure/circuit_breaker.py + - Core types: + - CircuitState (Enum) + - CircuitBreakerConfig (dataclass) + - CircuitBreaker (class) with state machine and optional rolling-window and backoff. + - CircuitBreakerRegistry with thread-safe access. + - Decorator factory circuit_breaker(...) (sync/async support). + - Configuration resolution: + - Accept explicit config from call site; otherwise resolve via per-category sources: + - Embeddings: tldw_Server_API/Config_Files/embeddings_production_config.yaml (circuit_breaker block) + - Evaluations: tldw_Server_API/Config_Files/evaluations_config.yaml (circuit_breakers.providers) + - MCP: tldw_Server_API/Config_Files/mcp_modules.yaml (circuit_breaker_* keys) + - RAG: defaults from RAG resilience, mapped to unified config + - Override order: kwargs > env vars > category config > sensible defaults. + - Key mapping table: + - circuit_breaker_threshold -> failure_threshold + - circuit_breaker_timeout -> recovery_timeout + - circuit_breaker_backoff_factor -> backoff_factor + - circuit_breaker_max_timeout -> max_recovery_timeout + - half_open_requests -> half_open_max_calls + - timeout/timeout_seconds -> timeout_seconds + - Metrics: + - Gauges: circuit_breaker_state{category,service} (0=closed,1=open,2=half_open) + - Counters: circuit_breaker_trips_total, circuit_breaker_failures_total, circuit_breaker_successes_total, circuit_breaker_rejections_total, circuit_breaker_timeouts_total + - Backward compatibility: + - Keep modules exporting shims that import the Infrastructure CB and emit a deprecation warning: + - tldw_Server_API/app/core/Embeddings/circuit_breaker.py + - tldw_Server_API/app/core/Evaluations/circuit_breaker.py + - tldw_Server_API/app/core/RAG/rag_service/resilience.py (CB only; keep retry/fallback/health) + - tldw_Server_API/app/core/Chat/provider_manager.py (optional shim or direct call migration) + - Replace MCP base’s inline logic with unified CB calls; keep its semaphore guard local. + + API Sketch + + - CircuitBreakerConfig(...) + - CircuitBreaker(name, category, config) + - await call_async(func, *args, **kwargs) + - call(func, *args, **kwargs) + - get_status() -> Dict[str, Any] + - reset() + - get_or_create_breaker(name, category, config_overrides=None) + - @circuit_breaker(name, category, config_overrides=None) + + Module Integration Plan + + - Embeddings: Replace direct CircuitBreaker usage with Infrastructure CB; map config; keep Prometheus metrics through unified hooks. Update tests that import tldw_Server_API.app.core.Embeddings.circuit_breaker + to work via shim. + - Evaluations: Replace LLMCircuitBreaker with per-provider get_or_create_breaker(name=f"llm:{provider}", category="evaluations"); keep timeouts via per-call timeout_seconds. Preserve closed-state concurrency + semaphore out of CB if truly needed, or enable opt-in through config. + - RAG: Update unified_pipeline.py:505 and resilience.py to use Infrastructure CB; keep RetryPolicy/FallbackChain/HealthMonitor as-is. + - MCP: Replace base’s internal CB counters with Infrastructure CB; map backoff fields; keep module semaphore; preserve metrics via unified labels category="mcp". + - Chat (optional): Replace provider_manager.CircuitBreaker with unified CB or shim. + + Migration And Deletions + + - Deletions after migration (or convert to shims for 1 release): + - tldw_Server_API/app/core/Embeddings/circuit_breaker.py + - tldw_Server_API/app/core/Evaluations/circuit_breaker.py + - CB portions of tldw_Server_API/app/core/RAG/rag_service/resilience.py + - Inline CB logic in tldw_Server_API/app/core/MCP_unified/modules/base.py + - Optional: CB in tldw_Server_API/app/core/Chat/provider_manager.py + - Update config docs and examples to reference unified fields and mappings. + + Testing + + - Unit tests (new): + - State transitions: thresholds, half-open probes, reset. + - Rolling-window failure rate mode (RAG parity). + - Backoff open-window growth and cap (MCP parity). + - Timeout handling (Evaluations parity) for sync/async. + - Metrics: state transitions increment expected counters/gauges. + - Registry: idempotent get_or_create, concurrent access safety. + - Integration tests (existing): + - Embeddings production and unit test paths must pass unchanged (import via shim). + - Evaluations unified tests must pass; provider configs honored. + - RAG unified pipeline resiliency path preserved. + - MCP module operations respect open/half-open and backoff. + + Risks And Mitigations + + - Behavior drift due to policy differences (count vs window): expose both modes; default per-category to match prior behavior; add explicit mappings. + - Metric cardinality growth (labels): constrain label set to category, service, optional operation. + - Backoff interaction with timeouts: document mapping and defaults; add tests mirroring MCP behavior. + - Concurrency limits baked into CB: keep concurrency guards outside CB unless explicitly configured. + + Rollout Plan + + - Phase 1: Implement Infrastructure CB + metrics + registry; add adapters/shims; land tests and docs; no module behavior change. + - Phase 2: Migrate modules sequentially (Embeddings → Evaluations → RAG → MCP → Chat). Update config mapping and tests per module. + - Phase 3: Remove duplicate implementations; keep import shims for one release cycle; announce deprecation in release notes. + + Acceptance Criteria + + - Single, shared CB used by Embeddings, Evaluations, RAG, MCP (and optionally Chat). + - All tests pass: python -m pytest -v and coverage unchanged or improved. + - Metrics exported under unified names with expected labels; no duplicate metric registration errors. + - Config overrides resolve correctly from each category’s existing config files. + - No API regressions; same error semantics for open/rejected calls. + + Open Questions + + - Should CB own per-call timeout universally, or leave it to call sites with a helper? (Current plan: optional timeout_seconds in CB wrapper to preserve Evaluations/MCP behavior.) + - Do we migrate Chat provider CB now or backlog it? + - Do we want per-category defaults in code, or only in config files? + + Timeline + + - Phase 1: 1–2 days (Infra CB, metrics, basic tests, shims). + - Phase 2: 2–4 days (module migrations + tests). + - Phase 3: 0.5 day (cleanup, docs, deprecations). + + Appendix: File References + + - Embeddings CB: tldw_Server_API/app/core/Embeddings/circuit_breaker.py:1 + - Evaluations CB: tldw_Server_API/app/core/Evaluations/circuit_breaker.py:1 + - RAG use: tldw_Server_API/app/core/RAG/rag_service/unified_pipeline.py:505 + - RAG CB/coordinator: tldw_Server_API/app/core/RAG/rag_service/resilience.py:1 + - MCP inline CB: tldw_Server_API/app/core/MCP_unified/modules/base.py:242 + - Chat CB (optional): tldw_Server_API/app/core/Chat/provider_manager.py:1 + - Configs: + - tldw_Server_API/Config_Files/embeddings_production_config.yaml:150 + - tldw_Server_API/Config_Files/evaluations_config.yaml:80 + - tldw_Server_API/Config_Files/mcp_modules.yaml:12 diff --git a/Docs/Product/Completed/HTTP-Stream-PRD.md b/Docs/Product/Completed/HTTP-Stream-PRD.md new file mode 100644 index 000000000..a8ab9ba01 --- /dev/null +++ b/Docs/Product/Completed/HTTP-Stream-PRD.md @@ -0,0 +1,427 @@ +PRD: HTTP Client Consolidation + + - Owner: Platform / Core + - Version: 1.0 + - Status: Completed (Stage 7) + + Summary of Outcomes + + - Centralization: 100% of outbound HTTP in app/core and app/services now uses centralized helpers/factories (documented exceptions appear only in documentation examples). + - Security: Egress enforced per hop and on proxies (deny-by-default allowlist). Optional TLS minimum version and env-driven leaf-cert pinning supported and tested. + - Reliability: Unified retries with decorrelated jitter and Retry-After support; no auto-retry after first body byte for streaming. + - Streaming: Standardized SSE helper with deterministic cancellation and final [DONE] ordering; added stress tests for high-chunk scenarios. + - Downloads: Atomic rename, checksum and Content-Length validation, resume support; strict Content-Type enabled at call sites where required (audio path enabled). + - Observability: Structured outbound logs; metrics exposed (http_client_requests_total, http_client_request_duration_seconds_bucket, http_client_retries_total, http_client_egress_denials_total); optional traceparent injection for OTel. + - Monitoring: Grafana dashboard JSON and Prometheus alert rules added (Docs/Monitoring/http_client_grafana_dashboard.json, Docs/Monitoring/http_client_alerts_prometheus.yaml). + - Developer experience: Config and .env examples updated (PROXY_ALLOWLIST, TLS flags, HTTP_CERT_PINS); comprehensive MockTransport-based tests for JSON helpers, redirects, proxies, downloads, SSE parsing, TLS, and perf microbenches (PERF=1). + - CI enforcement: HTTP usage guard is blocking and passing; prevents direct httpx/requests usage outside approved core files. + + How to Monitor + + - Prometheus metrics endpoints (gated by route toggles): + - Prometheus text: GET `/metrics` + - JSON metrics: GET `/api/v1/metrics` + - Quick checks: + - `curl -s http://127.0.0.1:8000/metrics | head` + - `curl -s http://127.0.0.1:8000/api/v1/metrics` + - OpenTelemetry (optional): + - Install exporters (see `tldw_Server_API/app/core/Metrics/README.md`). + - Example env: + - `OTEL_SERVICE_NAME=tldw_server` + - `OTEL_SERVICE_VERSION=1.0.0` + - `OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317` + - `OTEL_EXPORTER_OTLP_PROTOCOL=grpc` + - `OTEL_METRICS_EXPORTER=prometheus,otlp` + - `OTEL_TRACES_EXPORTER=otlp` + - Server logs indicate OTEL availability on startup. + - Dashboards & Alerts: + - Grafana: import `Docs/Monitoring/http_client_grafana_dashboard.json`. + - Prometheus: load alert rules from `Docs/Monitoring/http_client_alerts_prometheus.yaml`. + + Troubleshooting + + - Egress denials (NetworkError/EgressPolicyError): + - Confirm host and scheme are allowed by the server’s egress policy and allowlists. + - Redirects are re‑validated per hop; check each `Location` host in the chain. + - Proxies are deny‑by‑default; set `PROXY_ALLOWLIST` (hosts or URLs) if a proxy is required. + - Metrics: `http_client_egress_denials_total{reason}` increments with the reason label. + - Proxy blocked or ignored: + - Central client validates proxies against `PROXY_ALLOWLIST`. Dict form (`{"http": "...", "https": "..."}`) is supported. + - When `HTTP_TRUST_ENV=false` (default), system proxies are ignored. + - Redirect loops or missing Location: + - Loops surface as `RetryExhaustedError` or `NetworkError("Invalid/without Location")` depending on hop. + - Cap is `HTTP_MAX_REDIRECTS` (default 5). Validate final URL/content‑type matches expectations. + - HTTP/2 disabled unexpectedly: + - If `h2` is not installed, factories automatically downgrade to HTTP/1.1. + - Install `httpx[h2]` to re‑enable HTTP/2; no code change needed. + - JSON decode errors: + - Helpers validate `Content-Type: application/json`. Pass `require_json_ct=False` (or `accept_mismatch=True` at call sites that permit it) to allow decoding regardless of header. + - Large payloads: enforce or raise `HTTP_JSON_MAX_BYTES` at call sites using `max_bytes`. + - Streaming stalls/DONE ordering: + - SSE helper never retries after first body byte; cancellation propagates via `CancelledError`. + - Unified path emits a single final `[DONE]`; for issues check provider adapters and heartbeat intervals. + - TLS pinning/min-version failures: + - Pinning uses leaf cert SHA‑256 hashes from `HTTP_CERT_PINS` (`host=pinA|pinB,...`). + - Enforce min version via `TLS_ENFORCE_MIN_VERSION=true` and `TLS_MIN_VERSION=1.2|1.3`. + - Downloads resume anomalies: + - If server ignores `Range` and returns 200, downloader overwrites the partial file with full content. + - Use `checksum`/`Content-Length` validation and optional `require_content_type` for strictness. + + Overview + + - Unifying principle: Every outbound call is the same thing — an egress-validated HTTP request with retries. + - Objective: Consolidate all outbound HTTP across the codebase onto a single, secure, configurable client layer with consistent retry/backoff, timeouts, and egress enforcement. + + Problem + + - Duplication and inconsistency: + - Central client underused: tldw_Server_API/app/core/http_client.py:1 + - Local LLM utils async client + custom retries: tldw_Server_API/app/core/Local_LLM/http_utils.py:41 + - TTS allocates raw httpx.AsyncClient pools: tldw_Server_API/app/core/TTS/tts_resource_manager.py:200 + - Summarization uses requests + urllib3.Retry: tldw_Server_API/app/core/LLM_Calls/Summarization_General_Lib.py:629 + - Streaming helpers mix requests and httpx directly: tldw_Server_API/app/core/LLM_Calls/streaming.py:18 + - Impact: + - Inconsistent timeouts, retries, proxy handling. + - Partial/uneven enforcement of egress/SSRF policy (policy engine exists at tldw_Server_API/app/core/Security/egress.py:146). + - Hard to audit and monitor egress uniformly. + + Goals + + - One canonical way to: + - Create HTTP clients (create_client / create_async_client) + - Perform requests (fetch/afetch, JSON helpers, streaming, downloads) + - Always enforce egress policy for every outbound call. + - Centralize retry/backoff with sensible defaults and per-call overrides. + - Standardize timeouts, proxy handling (trust_env=False by default), HTTP/2 preference. + - Preserve or improve performance (keep-alive, pooling). + - Provide minimal, consistent logging and metrics for egress. + + Non‑Goals + + - Rewriting provider-specific business logic. + - Changing public API contracts beyond consistent network behavior. + - Introducing new network dependencies by default (curl backend remains optional). + - Global concurrency/rate limiting helper; tracked separately and out of scope for this PRD. + + Stakeholders + + - Platform/Core, Security, LLM Integrations, Media Ingestion, TTS, RAG/Search. + + Current State + + - Central client fully implemented with egress enforcement, retries, SSE/bytes streaming, and downloads: tldw_Server_API/app/core/http_client.py + - Egress policy engine: tldw_Server_API/app/core/Security/egress.py + - Broad migration complete across core/services: + - LLM providers (non‑streaming + streaming): OpenAI, Anthropic, Cohere, Groq, OpenRouter, HuggingFace, DeepSeek, Mistral, Google. + - WebSearch, Third_Party sources, Evaluations loaders, and OCR backends centralized to helpers. + - Audio/document downloads consolidated via download/adownload with checksum/length validation; audio path enforces MIME. + - Observability: + - Per‑request structured logs; http_client_* metrics registered (requests_total, duration histogram, retries_total, egress_denials_total). + - Optional OpenTelemetry spans and traceparent injection in place. + - Grafana dashboard and Prometheus alerts provided under Docs/Monitoring/. + - Security: + - TLS minimum version enforcement (optional) and env‑driven leaf‑cert pinning map (HTTP_CERT_PINS) supported and tested. + - CI enforcement: + - HTTP usage guard is blocking; direct requests/httpx usage outside approved core files is prevented. + + Proposed Solution + + - Expand http_client with unified, secure primitives and require all modules to use them: + - Factories: create_client(...), create_async_client(...) (timeouts, limits, base_url, proxies, trust_env default false) + - Request helpers: + - Sync: fetch(...), fetch_json(...), stream(...) + - Async: afetch(...), afetch_json(...), astream(...) + - Download: download(...), adownload(...) (streaming, atomic rename) + - Retry/backoff: centralized policy with exponential backoff + jitter, Retry-After support, idempotency-aware retry by default. + - Egress: mandatory evaluate_url_policy(url) check inside all helpers prior to network I/O. + - Observability: log retries with redacted headers; optional metrics hooks. + + Functional Requirements + + - Client factories + - Accept: timeout, limits (httpx.Limits), base_url, trust_env (default false), proxies, http2=True, http3=False. + - Return httpx.Client / httpx.AsyncClient (or optional curl backend for sync fetch path already supported by fetch). + - Defaults: + - Timeout: connect=5s, read=30s, write=30s, pool=30s. + - Limits: max_connections=100, max_keepalive_connections=20. + - Requests + - fetch/afetch: method, url, headers, params, json, data, files, timeout, allow_redirects, proxies, retry. + - fetch_json/afetch_json: JSON parse with clear errors on non-JSON or invalid payloads; validate Content-Type is application/json unless accept_mismatch=True; optional max_bytes guard. + - Streaming helpers: + - astream_bytes(...): async iterator of raw bytes/chunks. + - astream_sse(...): async iterator of parsed SSE events with fields (event, data, id, retry). + - download/adownload: stream to temp path and atomic rename, clean partial on failure. + - Headers/UA: standardize User-Agent as "tldw_server/ ()" with per-call override; auto-inject X-Request-Id when present in context. + - Cookies: no first-class cookie jar helpers; callers may attach cookies via client configuration if needed. + - Egress policy + - Call evaluate_url_policy(url) first; deny with clear error when disallowed. + - Honor env-based allow/deny lists, scheme/port rules, and private/reserved IP blocking. + - Enforce at all phases: evaluate original URL, each redirect hop (see redirect policy), and the resolved IP post-DNS; deny on scheme/host/IP violations. + - Apply policy to proxies as well; only allow explicitly allowlisted proxies. + - Redirect policy + - Limit redirects to 5; re-check egress policy for each hop and validate the final URL and (optionally) expected Content-Type. + - Retry/backoff + - Defaults: attempts=3, exponential backoff with decorrelated jitter; base 250ms, cap 30s. + - Retry on: 408, 429, 500, 502, 503, 504, and connect/read timeouts. + - Respect Retry-After and provider-specific backoff headers; do not retry unsafe methods unless retry_on_unsafe=True. + - Streams: never auto-retry once any response body bytes have been consumed; allow optional user callback to opt in for segmented protocols. + - Observability + - Structured logs: request_id, method, scheme, host, path, status_code, duration_ms, attempt, retry_delay_ms, exception_class; redact sensitive headers and query params by default. + - Metrics (Prometheus style): http_client_requests_total{method,host,status}, http_client_request_duration_seconds_bucket, http_client_retries_total{reason}, http_client_egress_denials_total{reason}. + - Optional OpenTelemetry: inject/extract trace context (traceparent) and emit spans for requests and retries. + - JSON helpers + - Enforce Content-Type validation by default; configurable via accept_mismatch flag; optional max_bytes limit for decode. + - Download safety + - Optional checksum validation (sha256, configurable algorithm), Content-Length validation, and disk quota guard. + - Optional Range-resume capability behind a feature flag when server supports Range requests. + + Non‑Functional Requirements + + - Security by default: fail closed on egress evaluation errors; trust_env=False default. + - Performance: reuse pooled connections; support HTTP/2; ensure no regression in TTS/LLM throughput. + - Testability: functions accept injected clients and are easily mockable. + - Lifecycle: document safe client usage patterns (e.g., one AsyncClient per event loop for long‑lived services); provide context managers and a shared‑pool accessor for high‑QPS modules (TTS/LLM). + - Transport/TLS: + - HTTP/2 enabled by default; HTTP/3 (QUIC) supported behind a flag and only where the stack supports it. + - TLS minimum version enforcement is optional (disabled by default) and configurable (e.g., TLS 1.2+). + - Optional certificate pinning (SPKI SHA‑256 fingerprints) supported but off by default. + + API Additions (in http_client) + + - Types + - RetryPolicy: attempts, backoff_base_ms, backoff_cap_s, retry_on_status, retry_on_methods, respect_retry_after. + - TLSOptions (optional): enforce_min_version: bool, min_version: {"1.2","1.3"}, cert_pins_spki_sha256: Optional[Set[str]]. + - Sync + - def fetch(..., retry: Optional[RetryPolicy] = None) -> HttpResponse + - def fetch_json(..., retry: Optional[RetryPolicy] = None, *, require_json_ct: bool = True, max_bytes: Optional[int] = None) -> Dict[str, Any] + - def stream_bytes(..., retry: Optional[RetryPolicy] = None) -> Iterator[bytes] + - def download(..., *, checksum: Optional[str] = None, checksum_alg: str = "sha256", resume: bool = False, retry: Optional[RetryPolicy] = None) -> Path + - Async + - async def afetch(..., retry: Optional[RetryPolicy] = None) -> HttpResponse + - async def afetch_json(..., retry: Optional[RetryPolicy] = None, *, require_json_ct: bool = True, max_bytes: Optional[int] = None) -> Dict[str, Any] + - async def astream_bytes(..., retry: Optional[RetryPolicy] = None) -> AsyncIterator[bytes] + - async def astream_sse(..., retry: Optional[RetryPolicy] = None) -> AsyncIterator[SSEEvent] + - async def adownload(..., *, checksum: Optional[str] = None, checksum_alg: str = "sha256", resume: bool = False, retry: Optional[RetryPolicy] = None) -> Path + - Exceptions + - EgressPolicyError, NetworkError, RetryExhaustedError, JSONDecodeError, StreamingProtocolError, DownloadError. Wrap underlying httpx errors while preserving safe context (no secrets). + + Configuration + + - Env defaults (override per-call) + - HTTP_CONNECT_TIMEOUT (float, default 5.0) + - HTTP_READ_TIMEOUT (float, default 30.0) + - HTTP_WRITE_TIMEOUT (float, default 30.0) + - HTTP_POOL_TIMEOUT (float, default 30.0) + - HTTP_MAX_CONNECTIONS (int, default 100) + - HTTP_MAX_KEEPALIVE_CONNECTIONS (int, default 20) + - HTTP_RETRY_ATTEMPTS (int, default 3) + - HTTP_BACKOFF_BASE_MS (int, default 250) + - HTTP_BACKOFF_CAP_S (int, default 30) + - HTTP_MAX_REDIRECTS (int, default 5) + - PROXY_ALLOWLIST (comma-separated URLs/hosts) + - HTTP_JSON_MAX_BYTES (int, optional; disable by default) + - HTTP_TRUST_ENV (bool, default false) + - HTTP_DEFAULT_USER_AGENT (string, default “tldw_server/ httpx”) + - HTTP3_ENABLED (bool, default false) + - TLS_ENFORCE_MIN_VERSION (bool, default false) + - TLS_MIN_VERSION (str, default "1.2") + - TLS_CERT_PINS_SPKI_SHA256 (comma-separated pins; optional) + + Security & Egress + + - Centralized guard: evaluate_url_policy in every helper prior to I/O (tldw_Server_API/app/core/Security/egress.py:146). + - Deny unsupported schemes, disallowed ports, denylisted hosts, and private/reserved IPs unless env allows. + - Maintain SSRF-safe defaults; proxies only when explicitly configured. + + Observability & Metrics + + - Metrics (labels include method, status, backend): + - egress_requests_total + - egress_request_duration_ms + - egress_retries_total + - egress_policy_denied_total + - Logging: INFO on final failure, DEBUG on retries, with redacted headers. + + Migration Plan + + - Phase 1: Foundations + - Implement afetch/astream/fetch_json and retry policy in http_client. + - Add env/config plumbing; unit tests (retry matrix, egress deny, JSON errors, streaming close, downloads). + - Phase 2: Early Adopters + - Local LLM: replace request_json and client factory with create_async_client + afetch_json (tldw_Server_API/app/core/Local_LLM/http_utils.py:41). + - TTS: construct clients via create_async_client(limits=...) in pool (tldw_Server_API/app/core/TTS/tts_resource_manager.py:200). + - HuggingFace local API calls: move to afetch (tldw_Server_API/app/core/LLM_Calls/huggingface_api.py:105). + - Phase 3: Broad Replacement + - Summarization lib: replace requests.Session + Retry usages with fetch/afetch (tldw_Server_API/app/core/LLM_Calls/Summarization_General_Lib.py:629). + - Ingestion/OCR/Audio downloads: use download/adownload (tldw_Server_API/app/core/Ingestion_Media_Processing/Audio/Audio_Files.py:222, OCR/backends/*). + - Streaming: standardize on astream + existing SSE normalizers (tldw_Server_API/app/core/LLM_Calls/streaming.py:18). + - Phase 4: Cleanup + - Remove deprecated helpers and ad‑hoc clients. + - Update docs; add integration tests for rate limits and egress denials. + + What Will Be Removed + + - Local retry/backoff and session code (non-exhaustive): + - tldw_Server_API/app/core/Local_LLM/http_utils.py:47 + - tldw_Server_API/app/core/TTS/tts_resource_manager.py:200 + - tldw_Server_API/app/core/LLM_Calls/Summarization_General_Lib.py:629 + - Other scattered HTTPAdapter(Retry(...)) blocks and raw httpx instantiations in core/services. + + Testing Strategy + + - Unit tests + - Egress: allowed/denied schemes, ports, hosts, private/reserved IP; DNS resolution IP checks; per-redirect hop enforcement; proxy allowlist. + - Retry/backoff: attempts, decorrelated jitter bounds, Retry-After (delta-seconds and HTTP-date) behavior, status code matrix, idempotency. + + Status Update (current) + + - Summarization providers migrated to centralized helpers: + - OpenAI, Anthropic (previously), now Cohere, Groq, OpenRouter, HuggingFace, DeepSeek, Mistral, Google. + - Streaming paths use centralized client streams with no auto-retry after first byte. + - Workflows + notifications: + - Webhook DLQ and replay paths now use create_client/create_async_client and afetch/fetch for egress enforcement and retries. + - Notification webhook sender switched to fetch. + - Ingestion/audio: + - External transcription provider now uses afetch with create_async_client; downloads previously consolidated to download/adownload. + - Audio downloads now enforce strict content‑type; document handlers keep HEAD‑time MIME checks. + - Docs updated: + - README and Config_Files/README document streaming (astream_sse) and download (download/adownload) usage examples. + - JSON: success, bad JSON, wrong content-type, max_bytes enforcement. + - Streaming: normal end, mid-stream error surfaced, cancellation propagation (CancelledError), proper close; SSE parsing. + - Download: atomic rename, partial cleanup, checksum and Content-Length validation, basic Range-resume (when enabled). + - Observability: metrics counters/labels update; structured logs redact secrets; optional OTel spans emitted when enabled. + - Monitoring: Grafana dashboard JSON and Prometheus alert rules for http_client_* metrics added. + - What Changed (recent): + - Added TLS minimum-version enforcement in client factories with unit tests; optional leaf-cert pinning map via HTTP_CERT_PINS and tests. + - Added SSE stress test to validate final [DONE] ordering and cancellation under high-chunk conditions; improved unified SSE stability. + - Added performance checks (optional, PERF=1) for non‑streaming, streaming, and download hot paths using httpx MockTransport. + - Provided Grafana dashboard JSON and Prometheus alert rules for http_client_* metrics (requests_total, duration histogram, retries_total, egress_denials_total). + - Integration tests + - Swap target modules to central helpers; validate same behavior via mock servers and test markers already used in repo. + - Redirect chains with mixed hosts; ensure egress rechecks and final content-type validation. + + Risks & Mitigations + + - Behavior drift on retries for non-idempotent methods + - Default: do not retry unsafe methods; require explicit opt-in. + - Throughput regressions (TTS/LLM) + - Preserve Limits and keep-alive; validate with benchmarks. + - Over-enforcement blocking legitimate calls + - Ensure env allowlists; provide clear error messages and tests. + + Dependencies + + - httpx (existing), optional curl_cffi for sync impersonation path. + - Loguru and metrics registry for observability (already present). + - Optional cryptography for SPKI SHA‑256 certificate pinning utilities (only when pinning is enabled). + + Acceptance Criteria + + - 100% of outbound HTTP in app/core and app/services uses http_client helpers or factories (documented exceptions only). + - All requests evaluate egress policy prior to I/O and fail closed when denied. + - Consistent retry/backoff observed across modules; tests cover 429/5xx and network failures. + - TTS/Local LLM throughput and latency not degraded. + - Duplicated retry/session code removed or shimmed with deprecation warnings. + + Milestones & Timeline + + - Week 1: Implement APIs + unit tests in http_client; land without consumers. + - Weeks 2–3: Early adopters and broad replacement (module-by-module PRs). + - Week 4: Cleanup, docs, final integration tests. + + Open Questions + + - Circuit breaker per host? Config hints exist; defer unless needed by SLOs. + - Dev ergonomics: rely on egress.py profile selection (permissive vs strict) or add a dedicated dev override? + - curl_cffi impersonation defaults: remain opt-in at call sites? + + Appendix: Code References + + - Central client (to expand): tldw_Server_API/app/core/http_client.py:1 + - Egress policy: tldw_Server_API/app/core/Security/egress.py:146 + - Duplicates to consolidate: + - tldw_Server_API/app/core/Local_LLM/http_utils.py:41 + - tldw_Server_API/app/core/TTS/tts_resource_manager.py:200 + - tldw_Server_API/app/core/LLM_Calls/Summarization_General_Lib.py:629 + - tldw_Server_API/app/core/LLM_Calls/streaming.py:18 + + Implementation Plan (Detailed) + + - Stage 0: Spec Finalization + - Confirm PRD decisions for TLS min version (optional), HTTP/3 flag, proxy allowlist, streaming contracts, and exception taxonomy. + - Document configuration keys and defaults; align README and Config_Files/README.md. + - Success: PRD updated; config keys listed; stakeholders sign-off. + + - Stage 1: Core API Foundations + - Implement unified helpers in http_client: + - Factories: create_client, create_async_client (timeouts, limits, headers, http2, trust_env, proxies validation). + - Requests: fetch/afetch with manual redirect handling; egress enforced per hop and on proxies. + - JSON: fetch_json/afetch_json with content-type validation and max_bytes guard. + - Streaming: astream_bytes and astream_sse with cancellation propagation; no auto-retry post-first byte. + - Downloads: download/adownload with atomic rename, checksum/length validation, optional resume. + - Exceptions: EgressPolicyError, NetworkError, RetryExhaustedError, JSONDecodeError, StreamingProtocolError, DownloadError. + - Observability: + - Structured retry logs (redacted headers) and basic request duration metrics. + - Optional traceparent injection from active span. + - Security: + - Enforce egress on original URL, redirect hops, and post-DNS IP; proxy allowlist (deny-by-default). + - Success: Helpers compile with tests; metrics registered; defaults respected via env. + + - Stage 2: Unit Tests and Validation + - Add httpx.MockTransport tests covering: retry/backoff, egress deny, JSON validation, streaming SSE parse, download checksum/length, cancellation propagation. + - Add negative cases: redirect loops, redirect without Location, private/reserved IPs, proxy not allowlisted. + - Add metrics smoke tests to ensure counters/histograms increment and redact secrets in logs. + - Success: >90% coverage of http_client; green in CI across supported Python/httpx versions. + + - Stage 3: Early Adopters Integration + - Replace direct HTTP calls in: + - Local LLM utilities: `tldw_Server_API/app/core/Local_LLM/http_utils.py` → create_async_client + afetch_json. + - TTS resource manager: `tldw_Server_API/app/core/TTS/tts_resource_manager.py` → pooled create_async_client with limits. + - HuggingFace/local API callers: `tldw_Server_API/app/core/LLM_Calls/huggingface_api.py` → afetch. + - Add adapters/shims where needed; keep behavior parity for timeouts and headers. + - Success: Modules work under new helpers; basic perf checks show no regressions. + + - Stage 4: Broad Migration + - Summarization: migrate `tldw_Server_API/app/core/LLM_Calls/Summarization_General_Lib.py` from requests+Retry to fetch/afetch. + - Ingestion/Audio/OCR downloads: consolidate on download/adownload across ingestion backends and audio pipelines. + - Streaming call sites: standardize on astream_sse + existing SSE normalizers in `tldw_Server_API/app/core/LLM_Calls/streaming.py`. + - Success: Majority (>80%) of outbound HTTP uses helpers; regression tests pass. + + - Stage 5: Observability & Security Hardening — Completed + - Ensure per-request structured logs include request_id, method, host, status, duration. + - Wire optional OpenTelemetry spans for client calls and retries; confirm traceparent propagation to providers that support it. + - Verify egress denials produce clear errors and increment `http_client_egress_denials_total` with reason. + - Success: Dashboards reflect client metrics; SLO alerts (if any) unaffected. + + What Changed (Stage 5) + + - Added per-request outbound log lines in `http_client` on success and terminal failures with fields: `request_id`, `method`, `scheme`, `host`, `path`, `status_code`, `duration_ms`, `attempt`, `retry_delay_ms`, `exception_class`. + - Trace context: `traceparent` injection already present; retry events (`http.retry`) annotated on spans. + - Egress denials: now increment `http_client_egress_denials_total` with a reason label; tests assert message clarity and counter increments. + - TLS security: optional minimum TLS version enforcement and per-host leaf-cert SHA-256 pinning supported by factories and enforced pre-I/O when configured. + + - Stage 6: Documentation & Examples — Completed + - Update developer docs with examples for fetch_json, SSE streaming, and downloads with checksum. + - Document configuration keys in Config_Files/README.md and .env templates; add migration tips for requests→httpx. + - Success: Docs merged; example snippets validated. + + - Stage 7: Cleanup & Enforcement — Completed + - Deprecated local retry/session code and ad‑hoc clients removed or refactored to use centralized helpers. + - CI guard to block direct `requests`/`httpx` usage outside approved core files is active and passing in CI. + - Success: 100% of outbound HTTP in app/core and app/services uses centralized helpers/factories (documented exceptions are examples in docs only). + + - Rollout & Risk Mitigation + - Canary: enable helpers per-module behind lightweight toggles if needed; default to safe timeouts and trust_env=False. + - Fallback: ability to reduce http2 to http1 automatically if `h2` unavailable; keep curl backend opt-in. + - Rollback: revert module migrations individually (PR-by-PR) if regressions observed. + + - Deliverables + - Code: unified http_client helpers + exceptions; module migrations; metrics wiring. + - Tests: unit tests for helpers; integration tests for migrated modules using mock servers. + - Docs: PRD updated; developer docs; migration notes. + + - Acceptance Gates (per stage) + - Stage 1–2: Unit tests green; helpers stable across py/httpx versions; no secret leakage in logs. + - Stage 3–4: Early adopters and summarization/ingestion migrated with parity; perf smoke OK. + - Stage 5: Metrics visible and accurate; egress denials clear and tested. + - Stage 7: CI guard active; legacy code removed or wrapped with deprecation warnings. diff --git a/Docs/Product/Config_Normalization.md b/Docs/Product/Config_Normalization.md new file mode 100644 index 000000000..24d645ae8 --- /dev/null +++ b/Docs/Product/Config_Normalization.md @@ -0,0 +1,158 @@ +# Config Normalization PRD (Targeted) + +Status: Proposal ready for implementation +Owner: Core Maintainers +Target Release: 0.2.x + +## 1. Summary +Normalize configuration across rate limiting, embeddings, and audio quota by introducing one typed settings object per domain. Replace ad-hoc env/config parsing with a Pydantic Settings façade layered over `tldw_Server_API/app/core/config.py`. Standardize testing via a single `TEST_MODE` switch and unified defaults while retaining backward compatibility for legacy keys. + +## 2. Problem Statement +Multiple modules parse environment variables and `config.txt` independently with custom fallbacks and test overrides, creating drift and brittleness. +- Duplicated logic exists at: + - `tldw_Server_API/app/core/Chat/rate_limiter.py:270` + - `tldw_Server_API/app/core/Embeddings/rate_limiter.py:246` + - `tldw_Server_API/app/core/Usage/audio_quota.py:281` +- A central adapter exists (`tldw_Server_API/app/core/config.py:1`) but is not the single source of truth. + +Consequences: inconsistent precedence rules, scattered defaults, harder testing, and noisy diffs when adding new options. + +## 3. Goals & Success Criteria +- One typed settings object per domain (RateLimits, Embeddings, AudioQuota, Common). +- Single precedence order everywhere: environment → config file → hardcoded defaults. +- Standardize test behavior with `TLDW_TEST_MODE=1` and domain‑specific test defaults. +- Backward compatibility for existing env names and config keys via aliases. +- Reduce code duplication and improve readability, validation, and startup diagnostics. + +**Success Metrics** +- Reduced config-related test flakiness and fewer env mutations in tests. +- Removal of duplicated parsing blocks in the three target modules. +- Clear startup logs showing effective settings and sources (env/config/default). + +## 4. Out of Scope (v1) +- Global refactor of all configuration domains (LLM providers, RAG, MCP, TTS globals). +- Changing default values beyond achieving current behavior parity (except test flag normalization). +- Introducing new external configuration stores or secret managers. + +## 5. Personas & Use Cases +- Developer: Instantiates one settings object per domain; never re‑implements parsing. +- QA/CI: Sets `TLDW_TEST_MODE=1` and receives stable, test‑friendly defaults. +- Operator: Configures env or `config.txt` once and observes consistent behavior with clear startup logs. + +## 6. Scope +### In Scope +- New settings façade package: `tldw_Server_API/app/core/settings/` +- Integration changes within: + - `tldw_Server_API/app/core/Chat/rate_limiter.py` + - `tldw_Server_API/app/core/Embeddings/rate_limiter.py` + - `tldw_Server_API/app/core/Usage/audio_quota.py` +- Minimal adapter updates in `tldw_Server_API/app/core/config.py` to support lookups. + +### Out of Scope (follow‑ups) +- LLM provider settings, RAG, MCP, TTS global settings. + +## 7. Functional Requirements +- Common settings + - `CommonSettings`: `test_mode` from `TLDW_TEST_MODE`, `environment` from `TLDW_ENV`. +- Rate limits + - `RateLimitSettings`: `chat_rpm`, `chat_tpm`, `chat_burst`, `chat_concurrency`, `enabled`. + - Preferred env keys: `TLDW_RATE_CHAT_RPM`, `TLDW_RATE_CHAT_TPM`, `TLDW_RATE_CHAT_BURST`, `TLDW_RATE_CHAT_CONCURRENCY`, `TLDW_RATE_ENABLED`. + - Legacy aliases for any existing `TEST_*` and current names used in the code. +- Embeddings + - `EmbeddingSettings`: `provider`, `model`, `rpm`, `max_batch`, `concurrency`, `dims`. + - Env keys: `TLDW_EMB_PROVIDER`, `TLDW_EMB_MODEL`, `TLDW_EMB_RPM`, `TLDW_EMB_MAX_BATCH`, `TLDW_EMB_CONCURRENCY`, `TLDW_EMB_DIMS`. +- Audio quota + - `AudioQuotaSettings`: `max_seconds_per_day`, `window_days`, `per_user`, `enabled`. + - Env keys: `TLDW_AUDIO_QUOTA_SECONDS_DAILY`, `TLDW_AUDIO_QUOTA_WINDOW_DAYS`, `TLDW_AUDIO_QUOTA_PER_USER`, `TLDW_AUDIO_QUOTA_ENABLED`. +- Precedence + - Environment → `config.py` adapter (reads `config.txt`) → hardcoded defaults. +- Validation + - Reject invalid ranges (negative RPM/TPM, zero window); return clear errors. +- Test mode + - If `test_mode` and a value is unspecified, apply current test‑friendly defaults per domain. +- Dependency Injection + - FastAPI providers: `get_rate_limit_settings()`, `get_embedding_settings()`, `get_audio_quota_settings()`. + - Optional constructor injection for unit tests to avoid env mutation. +- Observability + - Log effective settings at startup (redacted secrets), including source markers `[env|config|default]`. + +## 8. Non‑Functional Requirements +- Backward-compatible defaults; no material behavior changes for existing deployments. +- Minimal overhead; settings load once and are cached for reuse. +- Consistent error messages and Loguru logging. + +## 9. Design Overview +- Package layout + - `tldw_Server_API/app/core/settings/base.py` – shared mixins; source tagging; adapter to `config.py`. + - `tldw_Server_API/app/core/settings/common.py` – `CommonSettings`. + - `tldw_Server_API/app/core/settings/rate_limits.py` – `RateLimitSettings`. + - `tldw_Server_API/app/core/settings/embeddings.py` – `EmbeddingSettings`. + - `tldw_Server_API/app/core/settings/audio_quota.py` – `AudioQuotaSettings`. +- Façade behavior + - Pydantic `BaseSettings` classes read env with aliases; fallback to a `config.py` adapter for `[RateLimits]`, `[Embeddings]`, `[Audio-Quota]` sections; otherwise defaults. + - Merge logic applies precedence and captures source for logging. +- Dependency injection + - Singleton instances resolved at app startup; overridable in tests via fixtures. + +## 10. Data Model +- In-memory Pydantic models; no new DB schema. +- Helper: `ConfigSourceAdapter` for section/key access via `config.py`. +- Merge function to compute final effective settings per domain with per‑field source metadata. + +## 11. APIs & Interfaces +- FastAPI dependency providers returning domain settings singletons. +- Optional (debug): authenticated endpoint to inspect effective config: `/api/v1/config/effective` (redacted). + +## 12. Implementation Phases +1. Scaffold settings package and `config.py` adapter; add DI providers. Optional feature flag `TLDW_SETTINGS_V1=1`. +2. Integrate three target modules to consume settings via DI/constructor args; remove local parsing blocks. +3. Cleanup: delete dead code and finalize aliases; update docs/examples. + +## 13. Migration & Rollout +- Default to new settings; retain legacy env names via field aliases. +- During soak, log effective values clearly; if needed, temporarily gate via `TLDW_SETTINGS_V1`. +- Later minor release removes deprecated env names and parsing remnants. + +## 14. Risks & Mitigations +- Silent behavior drift from defaults → add parity tests; dual logging during rollout. +- Env name collisions → use `TLDW_*` namespace; keep explicit legacy aliases. +- Test brittleness from env reliance → prefer injected settings fixtures; minimize env mutation. + +## 15. Dependencies & Assumptions +- Pydantic available in the project environment. +- `tldw_Server_API/app/core/config.py` remains the adapter for `config.txt`. +- Existing defaults in the three modules are the source of truth for parity. + +## 16. Acceptance Criteria +- Target modules fetch all configuration via typed settings; no ad‑hoc env parsing remains. +- `TLDW_TEST_MODE=1` yields consistent test defaults across domains. +- Precedence (env → config → default) verified by tests. +- Startup logs show effective settings with sources; sensitive values redacted. +- Unit and integration tests pass with no behavioral regressions. + +## 17. Testing Plan +- Unit tests (per settings class): precedence resolution, alias handling, validation errors, test‑mode defaults. +- Integration tests: ensure Chat limiter, Embeddings limiter, and Audio quota behavior is unchanged under representative env/config permutations. +- Fixtures: `settings_override` to inject domain instances in tests without env pollution. +- Coverage: include in `python -m pytest --cov=tldw_Server_API --cov-report=term-missing`. + +## 18. Timeline (Estimate) +- Design + scaffolding: 0.5 day +- Implement settings + adapters: 0.5 day +- Integrate 3 modules and remove duplication: 0.5–1 day +- Tests + docs: 0.5–1 day +- Total: 2–3 days + +## 19. Open Questions +- Enumerate all legacy env keys in use for alias mapping (audit required). +- Confirm test‑mode default semantics (unlimited vs large but finite rates) with QA. +- Need per‑provider embeddings rate limits now, or defer? +- Include an authenticated endpoint to expose effective config, or keep logs only? + +## 20. References +- Central config adapter: `tldw_Server_API/app/core/config.py` +- Duplicated parsing locations: + - `tldw_Server_API/app/core/Chat/rate_limiter.py:270` + - `tldw_Server_API/app/core/Embeddings/rate_limiter.py:246` + - `tldw_Server_API/app/core/Usage/audio_quota.py:281` +- Related design doc: `Docs/Design/Resource_Governor_PRD.md` diff --git a/Curl-Scraping-PRD.md b/Docs/Product/Curl-Scraping-PRD.md similarity index 100% rename from Curl-Scraping-PRD.md rename to Docs/Product/Curl-Scraping-PRD.md diff --git a/Docs/Product/IMPLEMENTATION_PLAN.md b/Docs/Product/IMPLEMENTATION_PLAN.md new file mode 100644 index 000000000..a3373a85a --- /dev/null +++ b/Docs/Product/IMPLEMENTATION_PLAN.md @@ -0,0 +1,102 @@ +## Stage 1: STT Turn Detection (VAD & Commit) +**Goal**: Add Silero VAD–driven turn detection to unified streaming STT and finalize transcripts at end‑of‑speech for lower final latency. +**Success Criteria**: Final transcript latency p50 ≤ 600ms on reference setup; server defaults applied; client tunables accepted; no regression in quotas/auth. +**Tests**: +- Unit: VAD threshold/stop‑secs/mute edge cases; buffering → commit behavior; JSON message handling in WS path. +- Integration: WS stream with synthetic audio pauses triggers timely “final” messages; latency assertions with mocked clock. +**Reference Setup**: +- Hardware/OS: 8‑core CPU, optional NVIDIA GPU (if Parakeet GPU path enabled); macOS 14 or Ubuntu 22.04. +- Runtime: Python 3.11, ffmpeg ≥ 6.0, av ≥ 11.0.0. +- Network: Localhost loopback; no WAN hops. +- Input fixture: 10 s 16 kHz float32 speech with 250 ms trailing silence; single speaker. +**Implementation Notes**: +- VAD engine: Silero VAD. +- Integration point: Unified WS loop (tldw_Server_API/app/core/Ingestion_Media_Processing/Audio/Audio_Streaming_Unified.py:1200) before forwarding to `transcriber.process_audio_chunk`. +- Tunables and bounds (server‑validated): + - `vad_threshold` [0.1..0.9], default 0.5 + - `min_silence_ms` [150..1500], default 250 + - `turn_stop_secs` [0.1..0.75], default 0.2 (guard minimum utterance length 0.4 s) +- Commit mapping: VAD end‑of‑speech triggers a server‑side finalize that emits `{type:"full_transcript"}` equivalent to receiving a client `commit` (see Audio_Streaming_Unified.py:1585). +**Status**: Not Started + +## Stage 2: Latency Metrics (STT/TTS + Voice‑to‑Voice) +**Goal**: Instrument STT end‑of‑speech → final transcript, TTS request → first audio chunk (TTFB), and voice‑to‑voice (EOS → first audio on wire). +**Success Criteria**: New histograms (`stt_final_latency_seconds`, `tts_ttfb_seconds`, `voice_to_voice_seconds`) exported with labels; sampling overhead negligible; visible in metrics registry. +**Tests**: +- Unit: Timer guards, labels, and error‑safe recording; metrics manager registration idempotence. +- Integration: Synthetic pipeline run records non‑zero latencies; counters for stream errors/underruns increment on fault injection. +**Reference Setup**: +- Same as Stage 1. +**Implementation Notes**: +- Metrics registration: add histograms to MetricsRegistry with buckets `[0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5]`. +- Label schema: + - `stt_final_latency_seconds{model,variant,endpoint="audio_unified_ws"}` + - `tts_ttfb_seconds{provider,voice,format,endpoint="audio.speech"}` + - `voice_to_voice_seconds{provider,route}` +- Correlation: propagate `X-Request-Id` if present or generate UUIDv4 on entry to WS/REST; include in logs and internal spans to correlate metrics. +**Status**: Not Started + +## Stage 3: TTS PCM Streaming Path +**Goal**: Support `response_format=pcm` end‑to‑end for lowest overhead; document and validate output shape/sample rate. +**Success Criteria**: `/api/v1/audio/speech` streams PCM16 with steady throughput; clients can play without encoder; existing formats unaffected. +**Tests**: +- Unit: PCM branch bypasses container remux; chunk framing stable; samplerate/channels honored. +- Integration: Client consumes PCM stream with no underruns; backpressure respected. +**Reference Setup**: +- Same as Stage 1. +**Implementation Notes**: +- Content‑Type: `audio/L16; rate=; channels=`; default `rate=24000`, `channels=1`. +- Headers: include `X-Audio-Sample-Rate: ` for clarity. +- Negotiation: Default to provider/sample pipeline rate; optional `target_sample_rate` accepted when supported by adapter. +**Status**: Not Started + +## Stage 4: Phoneme/Lexicon Overrides (Kokoro) +**Goal**: Add configurable phoneme mapping for consistent pronunciation of brand/technical terms. +**Success Criteria**: Config file loaded; mapping applied safely (word boundaries, case handling); feature can be toggled per‑request/provider. +**Tests**: +- Unit: Regex/word‑boundary correctness; idempotence on repeated runs; fallback when map missing. +- Integration: Sample prompts produce expected pronunciations without affecting latency materially. +**Reference Setup**: +- Same as Stage 1. +**Implementation Notes**: +- Schema: YAML or JSON file with entries: `{ term: "OpenAI", phonemes: "oʊ p ən aɪ", lang?: "en", boundary?: true }`. +- Tokenization: apply on word boundaries by default (`boundary: true`), case‑insensitive match with preserve‑case replacement. +- Precedence: per‑request > provider‑level > global; if no match, fall back to provider defaults. +**Status**: Not Started + +## Stage 5: Docs & Perf Harness +**Goal**: Update docs and add a simple harness to measure voice‑to‑voice latency on a reference setup. +**Success Criteria**: Docs updated (API, config, tuning); harness outputs p50/p90 and basic plots; optional diarization workflow documented. +**Tests**: +- Doc lint/check links; harness dry‑run with synthetic audio; CI smoke job (optional) executes harness in short mode. +**Reference Setup**: +- Same as Stage 1. +**Implementation Notes**: +- Harness location: `Helper_Scripts/voice_latency_harness/` (or `tldw_Server_API/tests/perf/`). +- Outputs: JSON summary (p50/p90 for STT final, TTS TTFB, voice‑to‑voice); optional Prometheus text for CI scrape. +- Fixtures: include the 10 s 16 kHz float32 speech sample and scripts to generate variants (noise/silence). +**Status**: Not Started + +## Stage 6: WebSocket TTS (Optional) +**Goal**: `/api/v1/audio/stream/tts` PCM16 streaming with backpressure and auth/rate‑limit parity with STT WS. +**Success Criteria**: p50 TTFB ≤ 200 ms on reference; zero underruns on happy path; output parity with REST TTS. +**Tests**: +- Slow reader simulation; disconnects mid‑stream; bounded queue/backpressure behavior; quota enforcement and auth parity. +**Reference Setup**: +- Same as Stage 1. +**Implementation Notes**: +- Auth & quotas: mirror STT WS (API key/JWT, endpoint allowlist, quotas with standardized close codes). +- Frames: client `{type:"prompt", text, voice?, speed?, format?:"pcm"}`; server: binary PCM16 frames (20–40 ms) + `{type:"error", message}`. +- Backpressure: bounded queue; if consumer is slow, throttle generation or drop oldest with metric `audio_stream_underruns_total`. +**Status**: Not Started + +--- + +References: +- PRD: `Docs/Product/Realtime_Voice_Latency_PRD.md` +- STT WS: `tldw_Server_API/app/api/v1/endpoints/audio.py:1209` +- Unified STT: `tldw_Server_API/app/core/Ingestion_Media_Processing/Audio/Audio_Streaming_Unified.py` +- TTS: `tldw_Server_API/app/api/v1/endpoints/audio.py:268`, `tldw_Server_API/app/core/TTS/adapters/kokoro_adapter.py`, `tldw_Server_API/app/core/TTS/streaming_audio_writer.py` + +Global Negative‑Path Tests: +- Underruns (slow reader), client disconnects, silent input segments, high noise segments, invalid PCM chunk sizes, malformed WS config frames, exceeded quotas → standardized errors and metrics. diff --git a/Docs/Product/Infrastructure_Module_PRD.md b/Docs/Product/Infrastructure_Module_PRD.md index 395bfe74a..06281776f 100644 --- a/Docs/Product/Infrastructure_Module_PRD.md +++ b/Docs/Product/Infrastructure_Module_PRD.md @@ -24,7 +24,7 @@ ## 4. Current Scope | Capability | Details | | --- | --- | -| Redis URL resolution | Reads `EMBEDDINGS_REDIS_URL` → `REDIS_URL` → default `redis://localhost:6379`. Settings layer overrides Env when available. | +| Redis URL resolution | Reads `EMBEDDINGS_REDIS_URL` → `REDIS_URL` → default `redis://127.0.0.1:6379`. Settings layer overrides Env when available. | | Async + sync clients | `create_async_redis_client` and `create_sync_redis_client` return redis-py instances or the stub. Both accept `preferred_url`, `decode_responses`, `fallback_to_fake`, `context`, and `redis_kwargs`. | | In-memory stub | `InMemoryAsyncRedis` / `InMemorySyncRedis` share `_InMemoryRedisCore`. Supported commands: `ping`, `close`, strings (`get`, `set`, `delete`, expiry), sets (`sadd`, `srem`, `smembers`), sorted sets (`zadd`, `zrange`, `zrem`, `zscore`, `zincrby`), hashes (`hset`, `hget`, `hgetall`, `hincrby`), basic stream usage (`xadd`, `xlen`, `xrange`, `xreadgroup`, consumer groups), Lua script caching (`script_load`, `evalsha`, fallback to `eval`), simple pattern matching for `scan`. Expiry logic is time-based. | | Observability | Metrics registered in `MetricsRegistry`: `infra_redis_connection_attempts_total`, `infra_redis_connection_duration_seconds`, `infra_redis_connection_errors_total`, and `infra_redis_fallback_total`. Labels capture `mode`, `context`, outcomes, and error reasons for dashboards/alerts. | diff --git a/Docs/Product/LATTICE-PRD.md b/Docs/Product/LATTICE-PRD.md index 29d6c884a..a05e1d336 100644 --- a/Docs/Product/LATTICE-PRD.md +++ b/Docs/Product/LATTICE-PRD.md @@ -20,6 +20,17 @@ - Structured result validity (JSON schema conformance) ≥ 99.5% of calls. - Error-resilience: ≥ 99% batch completion despite transient API errors. +### Latency SLOs (per provider/model) +- P50: ≤ baseline × 1.1; P90: ≤ baseline × 1.3; P95: ≤ baseline × 1.5. +- Tail guardrail: P99 ≤ 2.5× baseline, or fail closed to baseline ranking. +- Define baselines per provider/model family and re-evaluate on version changes. + +### Evaluation Datasets & Baselines +- Datasets: HotpotQA (multi-hop, 1k eval subset), Natural Questions (NQ-open, 1k), and an internal domain set (500 curated Q/A with relevance judgments). +- Splits: fixed eval splits with run IDs; do not shuffle between runs. +- Baseline System: existing RAG “hybrid BM25 + vector + flashrank (if enabled)” as configured in unified RAG default preset. +- Target Deltas: +5–10 nDCG@10 overall; +3–5 on multi-hop (Hotpot subset); stat-sig at p<0.05 via paired bootstrap on queries. + ## Scope - In-Scope: - Reasoned reranking with JSON-constrained prompts. @@ -54,6 +65,15 @@ - Async batch execution with optional concurrency limits. - Categorized backoff for typical HTTP and provider errors (429/503/timeout). - Per-batch metrics: success counts, retry distribution, active requests, durations. + - Backoff Policy (with jitter): + - 429: exponential backoff with full jitter; initial 250ms, factor 2.0, max 8 retries, cap 60s. + - 5xx: decorrelated jitter, initial 500ms, max 5 retries, cap 30s; abort on repeated 502/503 after cap. + - Timeouts/Connect errors: 3 retries with exponential backoff (250ms→2s); then trip circuit for provider for 30s. + - Non-retryable (4xx except 429): no retry; return structured error and degrade to baseline. + - Provider-aware concurrency & budgets: + - Per-key `max_concurrent_calls` and `max_tokens_per_minute` enforced by token bucket. + - Default caps: OpenAI-like 20 concurrent/60k TPM; Anthropic-like 10 concurrent/40k TPM; configurable via env. + - Burst control: queue with backpressure; drop to baseline when queue wait > tail budget. - Calibration - Accept slates of (doc_id, score in [0,1]) per query and learn θ vector. - Normalize and export calibrated scores; support blending with parent path relevance. @@ -96,6 +116,13 @@ - `update(beam_slates, beam_response_jsons) -> None` - `get_top_predictions(k, rel_fn) -> List[(node, score)]` +### Pydantic Schemas & OpenAPI +- RerankRequest (tldw_Server_API/app/api/v1/schemas/rag_rerank.py): + - fields: `query: str`, `candidates: List[{id: str, text: str}]`, `topk: Optional[int]=None`, `provider: Optional[str]`, `model: Optional[str]`, `temperature: float=0.2`, `seed: Optional[int]`, `response_format: Optional[str]='json'`. +- RerankResponse: `ranking: List[str]`, `reasoning: Optional[str]`, `scores: Optional[List[{id: str, score_0_1: float}]]`, `meta: {provider, model, usage?: {input_tokens, output_tokens}}`. +- TraversalRequest/Response (tldw_Server_API/app/api/v1/schemas/rag_traversal.py) mirror above with `tree_id`, `beam`, `depth`. +- Add OpenAPI examples for happy-path and schema-failure fallback (baseline). + ## Prompt & Schema Specs - Traversal Prompts - Inputs: query, candidate passages with IDs, relevance definition text. @@ -106,11 +133,105 @@ - Provider-agnostic JSON Schema; enforce validation before use. - Fallback: JSON repair and stricter parsing for robustness. +### Concrete JSON Schemas (Draft 2020-12) +- Rerank Output Schema +```json +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://tldw.ai/schemas/rerank_output.json", + "type": "object", + "required": ["ranking"], + "properties": { + "reasoning": {"type": "string"}, + "ranking": { + "type": "array", + "items": {"type": "string"}, + "minItems": 1 + }, + "scores": { + "type": "array", + "items": { + "type": "object", + "required": ["id", "score_0_1"], + "properties": { + "id": {"type": "string"}, + "score_0_1": {"type": "number", "minimum": 0, "maximum": 1} + } + } + } + }, + "additionalProperties": false +} +``` + +- Traversal Output Schema +```json +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://tldw.ai/schemas/traversal_output.json", + "type": "object", + "required": ["ranking", "relevance_scores"], + "properties": { + "reasoning": {"type": "string"}, + "ranking": {"type": "array", "items": {"type": "string"}}, + "relevance_scores": { + "type": "array", + "items": { + "type": "array", + "prefixItems": [ + {"type": "string"}, + {"type": "number", "minimum": 0, "maximum": 100} + ], + "minItems": 2, + "maxItems": 2 + } + } + }, + "additionalProperties": false +} +``` + +### Example Outputs +- Rerank (example) +```json +{ + "reasoning": "Docs A and C directly answer the query; B is peripheral.", + "ranking": ["doc_A", "doc_C", "doc_B"], + "scores": [ + {"id": "doc_A", "score_0_1": 0.86}, + {"id": "doc_C", "score_0_1": 0.71}, + {"id": "doc_B", "score_0_1": 0.32} + ] +} +``` + +- Traversal (example) +```json +{ + "reasoning": "Node N3 expands the relevant subtopic; N1 is less specific.", + "ranking": ["N3", "N1", "N2"], + "relevance_scores": [["N3", 92.1], ["N1", 71.4], ["N2", 40.0]] +} +``` + +### Provider JSON Modes and Fallback Order +1) Native JSON/tool/function-calling modes (OpenAI response_format, tool_calls; Anthropic tool_use; Google function calling). +2) If unavailable, force content-type: JSON via system prompt + strict schema examples. +3) If malformed: attempt `json_repair` once, then re-prompt with stricter constraints. +4) After N=2 failures: return baseline ranking with warning; log structured error. + ## Calibration Model - Model: θ per item with PL-style likelihood and MSE alignment to given human-like scores; temperature `tau` and weight `lambda_mse`. - Training: short-run per query (small M), optimized with AdamW; output normalized θ in [0,1]. - Thresholding: optional bimodal GMM to pick a sampling threshold when selecting leaves. +### Operational Details +- Scope: θ is per-query, computed on the slate for that query; no cross-query reuse. +- Minimal slate: require M ≥ 5 items with ≥ 1 positive signal; otherwise skip calibration (no-op) and surface baseline scores. +- Early exit: stop after 50 steps or when Δloss < 1e-4 over 5 steps. +- Fallbacks: if optimizer diverges/NaNs, revert to normalized input scores. +- Alternatives: allow dependency-light calibration (`isotonic` or Platt-style logistic) via config flag if Torch is unavailable. + ## Algorithms - Reranking - Prompt → JSON ranking → Map back to original IDs → Final order. @@ -125,16 +246,24 @@ - Reporting - Per-batch: throughput, success/failure counts, retry histograms. - Iteration logs: mean metrics and saved artifacts. + - Significance testing: paired bootstrap over queries; report p-values for nDCG deltas. ## Performance & Scaling - Concurrency: `max_concurrent_calls` default 20; configurable per environment. - Timeouts: default 60-120s per request; categorized backoff caps (e.g., 300s for 429s). - Memory: JSON streaming where possible; avoid holding large results when not needed. + - Token/RPS budgets: enforce `max_tokens_per_minute` and `requests_per_minute` per provider key; queue with backpressure. ## Security & Privacy - Secrets via env or secret store; never log API keys or request bodies with PII. - Redact tokens and credentials in logs; enforce structured logging without secrets. +### Prompt Injection Hardening +- Sanitize candidate text (strip/control invisible characters; normalize Unicode; optionally escape HTML/Markdown when rendering). +- System prompts explicitly forbid following instructions in candidate text; require strictly structured JSON with no prose unless in `reasoning`. +- Use tool/function-calling where available to reduce injection risk; validate schema strictly before use. +- Do not log raw candidate text or full prompts; log hashed candidate IDs and aggregate statistics only. + ## Rollout Plan - Phase 1: Reasoned Reranking - Integrate LLM orchestrator and reranking prompts with schema validation. @@ -149,10 +278,18 @@ - Comprehensive batch reports, error dashboards, and guardrails on prompt size. - Acceptance: ≥99.5% valid JSON; complete error breakdown visible. +## Reproducibility & Cost Budgets +- Phase 1 budget: ≤ 15k tokens/request avg; cap 50k/query end-to-end; ≤ 20 concurrent per key. +- Phase 2 budget: ≤ +10% tokens vs Phase 1 due to calibration metadata. +- Phase 3 budget: depth≤2, beam≤3 by default; hard cap 120k tokens/query. +- Determinism for evals: temperature ≤ 0.3; set `seed` where provider supports; record model version/family in `meta`. +- Log per-run `run_id`, dataset name, split, model/provider, and cost estimates. + ## Acceptance Criteria - Schema conformance ≥ 99.5%; failures auto-retry and log structured context. - Batch runner survives transient provider issues; final completion ratio ≥ 99%. - Metrics: documented improvements vs. baseline; reproducible within ±5%. + - Degrade gracefully: after N=2 schema failures, return baseline ranking with warning and telemetry event. ## Risks & Mitigations - Provider Variance: switchable client interface; keep prompts provider-neutral. @@ -162,6 +299,15 @@ - JSON Fragility: malformed outputs. - Mitigation: schema enforcement, JSON repair fallback, strict error categorization. +## Traversal Trees & Registry +- Format (JSON file): + - `tree_id: str`, `version: int`, `created_at: iso8601`, `root_id: str`. + - `nodes: [{ id: str, parent_id: Optional[str], title: str, summary: Optional[str], doc_ids: Optional[List[str]], metadata: Optional[dict] }]`. +- Validation rules: single root, acyclic graph, unique IDs, all `parent_id` reference valid nodes. +- Registry: `Databases/tree_registry.json` mapping `tree_id` → `{path, version}`; supports file path or external URI. +- Versioning: bump `version` on structural changes; store `last_built_with` (embedder + params) in metadata for provenance. +- Defaults: beam=3, depth=2 for medium corpora (<1M chunks); beam=2, depth=1 for small corpora; cost guardrails enforced. + ## Stack Tailoring: tldw_Server_API Integration - Context @@ -210,6 +356,11 @@ - Observability - Loguru structured logs; batch summary (success, retries, throughput) emitted at INFO. - Optionally persist evaluation artifacts via existing Evaluations module. + - Metrics to emit (names/examples): + - Counters: `rag_rerank_requests_total`, `rag_rerank_retries_total`, `rag_rerank_failures_total{code}`. + - Histograms: `rag_rerank_latency_ms`, `provider_call_latency_ms`, `json_repair_attempts`. + - Gauges: `inflight_requests`, `queue_depth`. + - Token usage: `input_tokens_total`, `output_tokens_total`. - Data & Storage - No schema migrations required; traversal trees stored as files (JSON/PKL) in `models/` or `Databases/` with registry mapping, or external URI. @@ -218,6 +369,9 @@ - Testing Plan - Unit: prompt builders, schema validation, calibration outputs shape/normalization. - Integration: rerank endpoint happy path, error/backoff paths, JSON conformance; traversal basic beam step (when enabled). + - Property-based tests: randomized valid/invalid JSON against schemas to ensure robust parsing. + - Golden tests: snapshot prompts/responses to detect regressions across prompt/template changes. + - A/B harness: integrate with Evaluations module; every run has `run_id`, persists artifacts and metrics, and can compare baseline vs variant. - Rollout Targets (tldw_server) - Phase 1 adds `rag_rerank.py`, ReasonedReranker module, tests, and docs; feature flag default ON in dev, OFF in prod. @@ -231,6 +385,13 @@ - Fusion - Blend calibrated scores with existing BM25/embedding pipeline as a rerank stage; weight controlled in config (`RAG_RERANK_WEIGHT`). +## Repo Process Alignment +- Add companion design: `Docs/Design/LATTICE-Design.md` detailing architecture, schemas, and flows. +- Add `IMPLEMENTATION_PLAN.md` with staged deliverables, success criteria, and status updates per project guidelines. +- Note schema code locations: + - `tldw_Server_API/app/api/v1/schemas/rag_rerank.py` + - `tldw_Server_API/app/api/v1/schemas/rag_traversal.py` + ## Open Questions - Which provider(s) first? Need priority order for adapters. - Target corpora for initial tree construction? Available embeddings, clustering strategy, and branching factor? diff --git a/Docs/Product/Media_Endpoint_Refactor-PRD.md b/Docs/Product/Media_Endpoint_Refactor-PRD.md new file mode 100644 index 000000000..410504966 --- /dev/null +++ b/Docs/Product/Media_Endpoint_Refactor-PRD.md @@ -0,0 +1,239 @@ +PRD: Modularization of /media Endpoints + + - Title: Modularize and Refactor /media Endpoints + - Owner: Server API Team + - Status: Draft (v1) + - Target Version: v0.2.x + + Background + + - Current media endpoints live in a monolithic module with broad responsibilities: request parsing, auth/RBAC, rate limits, caching, input sourcing, processing orchestration, persistence, and response shaping. + - Key file: tldw_Server_API/app/api/v1/endpoints/media.py + - Existing processing libraries live under tldw_Server_API/app/core/Ingestion_Media_Processing/ and DB logic under tldw_Server_API/app/core/DB_Management/. + - Tests exist for uploads, security, media processing, and web scraping. + + Problem Statement + + - The monolith is hard to maintain and test due to tight coupling, duplicated patterns, and mixed concerns. + - Changes risk regressions across unrelated features. + - Onboarding and iteration speed are slowed by the file’s size and complexity. + + Goals + + - Thin, declarative routers with clear separation of concerns. + - Service-oriented orchestration for ingestion, processing, and persistence. + - Shared utilities for caching, error mapping, request normalization, and input sourcing. + - Preserve existing API behavior, response shapes, and performance. + - Improve testability and maintainability. + + Non‑Goals + + - No route path changes or breaking response shape changes. + - No DB schema changes. + - No rewrites of core ingestion libraries. + - No feature expansion beyond modularization. + + Stakeholders + + - Backend engineers maintaining ingestion, RAG, and audio/video flows. + - QA/Testing owners for Media and Web Scraping. + - Frontend clients relying on current /media endpoints. + + Scope + + - In-scope: All handlers under /api/v1/media including management (list/detail/versions), processing (no-DB paths), and ingest with persistence. + - Out-of-scope: Non-media endpoints; chat, audio streaming WS, MCP. + + Functional Requirements + + - Endpoints unchanged: + - List media, item details, versions (list/create/rollback). + - Processing endpoints (no DB): code, videos, documents, PDFs, ebooks, emails. + - Ingest + persist endpoint: POST /api/v1/media/add. + - Web scraping ingest: POST /api/v1/media/process-web-scraping. + - Debug schema endpoint. + - Shared utilities: + - Caching with ETag/If-None-Match for GET list/detail. + - Error mapping for DB and processing exceptions. + - Request normalization: robust form coercions, URL lists, booleans/ints. + - Input sourcing: URL downloads, tempdirs, upload validation. + - Services: + - Orchestrator for process-only flows (no DB). + - Persistence service (DB writes, versions, keywords, claims). + - Keep: + - AuthNZ and RBAC decorators. + - Rate limiting and backpressure hooks. + - Quota checks and metrics emission. + - Claims extraction and analysis when enabled. + + Non‑Functional Requirements + + - Performance: No regression; caching enabled for list/detail. + - Reliability: Transactions around persistence; clear cleanup semantics for temp dirs. + - Security: Preserve validation, RBAC, rate limits, and input file checks; no logging of secrets. + - Observability: Loguru usage consistent with main.py; metrics labels maintained. + - Testing: All existing tests pass; new unit tests for utilities (>80% coverage in new code). + - Compatibility: Keep tldw_Server_API/app/api/v1/endpoints/media.py as a compatibility shim exporting router. + + Success Metrics + + - Monolith shrinks to shim; new package assumes routes. + - Cyclomatic complexity and size reduced per endpoint module. + - Test pass rate unchanged or improved; new unit tests for utilities. + - Endpoint latencies/throughput unchanged within measurement noise. + - Developer feedback shows faster iteration and onboarding. + + Technical Design + + - Endpoints Package (new) + - tldw_Server_API/app/api/v1/endpoints/media/__init__.py (exposes router, includes subrouters) + - tldw_Server_API/app/api/v1/endpoints/media/listing.py (GET list/search if exists) + - tldw_Server_API/app/api/v1/endpoints/media/item.py (GET, PATCH/PUT, DELETE) + - tldw_Server_API/app/api/v1/endpoints/media/versions.py (GET versions, POST version, PUT rollback) + - tldw_Server_API/app/api/v1/endpoints/media/add.py (POST /add) + - tldw_Server_API/app/api/v1/endpoints/media/process_code.py + - tldw_Server_API/app/api/v1/endpoints/media/process_videos.py + - tldw_Server_API/app/api/v1/endpoints/media/process_documents.py + - tldw_Server_API/app/api/v1/endpoints/media/process_pdfs.py + - tldw_Server_API/app/api/v1/endpoints/media/process_ebooks.py + - tldw_Server_API/app/api/v1/endpoints/media/process_emails.py + - tldw_Server_API/app/api/v1/endpoints/media/web_scrape.py + - tldw_Server_API/app/api/v1/endpoints/media/debug.py + - API Utilities (new) + - tldw_Server_API/app/api/v1/utils/cache.py (ETag generation, If-None-Match, TTL) + - tldw_Server_API/app/api/v1/utils/http_errors.py (map DatabaseError/InputError/ConflictError to FastAPI HTTPException) + - tldw_Server_API/app/api/v1/utils/request_parsing.py (form coercions, URL list normalization, safe bool/int parsing) + - Core Orchestration (new) + - tldw_Server_API/app/core/Ingestion_Media_Processing/pipeline.py + - Input resolution (URL or upload) → type-specific processor → standard result list + - tldw_Server_API/app/core/Ingestion_Media_Processing/input_sourcing.py + - Wraps _download_url_async, Upload_Sink.process_and_validate_file, tempdir lifecycle + - tldw_Server_API/app/core/Ingestion_Media_Processing/result_normalization.py + - Uniform MediaItemProcessResponse shape: status, metadata, content, chunks, analysis, claims, warnings + - tldw_Server_API/app/core/Ingestion_Media_Processing/persistence.py + - DB transactions, version creation, keywords, claims storage + - Compatibility Shim + - tldw_Server_API/app/api/v1/endpoints/media.py re-exports router from the new package. + - Caching Design + - Generate ETag based on response content hash (excluding volatile fields). + - Honor If-None-Match; return 304 when matched. + - Configurable TTL via config['CACHE_TTL']; disable when Redis disabled. + - Error Mapping + - DatabaseError → 500 (unless refined by context, e.g., not found → 404). + - InputError → 400/422 based on validation context. + - ConflictError → 409 for resource conflicts. + - Graceful fallbacks to 500 with safe messages (no secrets). + - Security & AuthNZ + - Preserve Depends(get_request_user), PermissionChecker(MEDIA_CREATE), and rbac_rate_limit("media.create") on routes that modify data. + - Keep file extension allowlists per media type and size caps. + - Maintain URL safety checks and content-type based filtering. + + API Compatibility + + - No changes to route paths, query params, or body schemas. + - Response models remain per tldw_Server_API/app/api/v1/schemas/media_response_models.py:1. + - Request models remain per tldw_Server_API/app/api/v1/schemas/media_request_models.py:1 (allow internal re-exports only). + + Data Model Impact + + - None. All DB operations continue via MediaDatabase and existing DB helpers. + + Telemetry & Metrics + + - Maintain existing counters for uploads, bytes, and per-route usage events. + - Keep TEST_MODE diagnostics behavior, but confine to helpers to reduce handler clutter. + + Rollout & Backout + + - Rollout: Incremental PRs per stage; keep shim in place; run full pytest suite after each stage. + - Backout: Revert to previous media.py monolith; keep migrations isolated to code structuring (no DB migration). + + Risks & Mitigations + + - Tests patch internals of media.py: keep temporary re-exports of commonly patched functions in the shim. + - Route order conflicts: keep /{media_id:int} with type converter and preserve registration order. + - Behavior drift in form coercion: centralize and add unit tests in utils/request_parsing.py. + - Unexpected perf cost from caching: keep cache optional; measure and tune TTL and ETag generation. + + Acceptance Criteria + + - All existing tests pass: + - tldw_Server_API/tests/Media/* + - tldw_Server_API/tests/http_client/test_media_download_helper.py + - tldw_Server_API/tests/Web_Scraping/test_friendly_ingest_crawl_flags.py + - New unit tests for cache, request parsing, input sourcing, and normalization at >80% coverage. + - API responses identical for representative golden cases across endpoints. + - Logs and metrics preserved; no sensitive leakage. + + Open Questions + + - Do any external integrations or clients patch/import internal helpers from media.py? If yes, list to re-export for one release cycle. + - Should we add a feature flag to force old router? Default plan relies on shim; a flag is optional. + + Timeline (Rough) + + - Design and approval: 1–2 days + - Utilities + skeleton package: 1 day + - List/Item/Versions extraction: 1–2 days + - Process-only endpoints: 3–4 days + - /add persistence extraction: 2–3 days + - Web scraping extraction: 1 day + - Cleanup + docs + final tests: 1–2 days + - Total: ~10–15 working days + + Dependencies + + - Redis (optional cache). + - Existing core modules: Upload sink, PDF/Doc/AV processors, DB management, usage/metrics. + - AuthNZ dependencies and rate limiters. + + Implementation Plan + + - Stage 0: PRD Sign‑Off + - Deliverable: Approved PRD. + - Exit: Stakeholder sign-off. + - Stage 1: Skeleton & Utilities + - Create endpoints/media/ package with __init__.py exporting router. + - Add api/v1/utils/cache.py, utils/http_errors.py, utils/request_parsing.py. + - Keep endpoints/media.py as shim importing router from package. + - Tests: unit tests for cache and parsing utilities. + - Stage 2: Read‑Only Endpoints + - Move GET list and GET item to listing.py and item.py. + - Move versions GET/POST/PUT to versions.py. + - Apply cache decorator for list/detail. + - Tests: run Media list/detail/version tests; verify ETag behavior on list/detail. + - Stage 3: Process‑Only Endpoints + - Create core orchestrator: pipeline.py, input_sourcing.py, result_normalization.py. + - Move process_code, process_documents, process_pdfs, process_ebooks, process_emails, process_videos into dedicated files; handlers delegate to orchestrator. + - Tests: adapt existing tests; add unit tests for input sourcing and normalization. + - Stage 4: Persistence Path (/add) + - Create persistence.py with transactional DB writes, keyword tagging, claims storage. + - Extract /add endpoint to add.py; reuse orchestrator for processing and call persistence layer. + - Preserve quotas, metrics, and claims feature flags. + - Tests: /add end-to-end tests; quota and error mapping coverage. + - Stage 5: Web Scraping + - Move handler to web_scrape.py; ensure it delegates to services/web_scraping_service. + - Tests: web scraping tests (crawl flags, summarization toggles). + - Stage 6: Debug Endpoint + - Move schema introspection to debug.py. + - Tests: basic health assertions. + - Stage 7: Cleanup & Docs + - Ensure media.py shim only re-exports router. + - Update docs: + - Docs/Code_Documentation/Ingestion_Media_Processing.md + - Docs/Code_Documentation/Ingestion_Pipeline_* + - Add Docs/Design/Media_Endpoint_Refactor.md overview. + - Tests: full suite with coverage. + - Definition of Done (per stage) + - Tests passing (unit + integration for impacted endpoints). + - Response shapes verified with golden samples. + - Lint/format per project conventions. + - Logs clean; no sensitive data exposure. + - Update CHANGELOG (internal note only; no external API changes). + - Validation Steps + - Run: python -m pytest -v + - Coverage: python -m pytest --cov=tldw_Server_API --cov-report=term-missing + - Manual: spot-check /api/v1/media list/detail, /add, /process-* endpoints. + - Backout Plan + - Revert to last commit where media.py monolith was active. + - Keep compatibility shim until next minor release. diff --git a/Docs/Design/PGVector_Hybrid_RAG_PRD.md b/Docs/Product/PGVector_Hybrid_RAG_PRD.md similarity index 100% rename from Docs/Design/PGVector_Hybrid_RAG_PRD.md rename to Docs/Product/PGVector_Hybrid_RAG_PRD.md diff --git a/Docs/Product/PRD_Browser_Extension.md b/Docs/Product/PRD_Browser_Extension.md new file mode 100644 index 000000000..09eda084d --- /dev/null +++ b/Docs/Product/PRD_Browser_Extension.md @@ -0,0 +1,729 @@ +# tldw_server Browser Extension — Product Requirements Document (PRD) + +- Version: 1.0 +- Owner: Product/Engineering (You) +- Stakeholders: tldw_server backend, Extension frontend, QA +- Target Browsers: Chrome/Edge (MV3), Firefox (MV2) + +## Background +You’ve inherited the project and an in‑progress extension. The goal is to ship an official, whitelabeled extension that uses tldw_server as the single backend for chat, RAG, media ingestion, notes, prompts, and audio (STT/TTS). The server provides OpenAI‑compatible APIs and mature AuthNZ (single‑user API key and multi‑user JWT modes). + +## Goals +- Deliver an integrated research assistant in the browser that: + - Chats via `/api/v1/chat/completions` with streaming and model selection. + - Searches via RAG (`POST /api/v1/rag/search` and `GET /api/v1/rag/simple` if exposed). + - Ingests content (current page URL or manual URL) via `/api/v1/media/process` and related helpers. + - Manages notes and prompts through their REST endpoints. + - Transcribes audio via `/api/v1/audio/transcriptions`; synthesizes speech via `/api/v1/audio/speech`. +- Provide smooth setup (server URL + auth) and a robust, CORS‑safe network layer. +- Ship an MVP first and iterate with clear milestones. + +## Non‑Goals +- Building a general proxy for arbitrary third‑party LLM services. +- Adding server features not exposed by tldw_server APIs. +- Collecting telemetry on user content or behavior. + +## Personas +- Researcher/Student: Captures web content, asks questions, organizes notes. +- Developer/Analyst: Tries multiple models/providers, tweaks prompts, exports snippets. +- Power user: Uses voice (STT/TTS), batch ingest, and RAG filters. + +## User Stories (MVP‑critical) +- As a user, I configure the server URL and authenticate (API key or login). +- As a user, I see available models/providers and select one for chat. +- As a user, I ask a question and receive streaming replies with cancel. +- As a user, I search with RAG and insert results into chat context. +- As a user, I send the current page URL to the server for processing and get status. +- As a user, I quickly capture selected text as a note and search/export notes. +- As a user, I upload a short audio clip for transcription and view the result. + +## Scope + +### MVP (v0) +- Settings: server URL, auth mode (single/multi), credentials, health check. +- Auth: X‑API‑KEY and JWT (login/refresh/logout); error UX for 401/403. +- Models: discover and select model/provider from server. +- Chat: non‑stream and SSE stream; cancel; basic local message history. +- RAG: simple search UI; insert snippets into chat context. +- Media: ingest current tab URL or entered URL; progress/status. +- Notes/Prompts: basic create/search/import/export. +- STT: upload wav/mp3/m4a; show transcript. + +### v1 +- TTS playback; voice catalog/picker. +- Context menu “Send to tldw_server”. +- Improved RAG filters (type/date/tags). +- Robust error recovery and queued retries. + +### v1.x +- Batch operations; offscreen processing where safe. +- MCP surface (if required later). + +## Functional Requirements + +### Settings and Auth +- Allow any `serverUrl` (http/https); validate via a health check. +- Health check path: `GET /api/v1/health` (optional lightweight: `/healthz`, readiness: `/readyz`). Treat non-200 as not ready. +- Modes: Single‑User uses `X-API-KEY: `. Multi‑User uses `Authorization: Bearer `. +- Manage access token in memory; persist refresh token only when necessary. +- Auto‑refresh on 401 with single‑flight queue; one retry per request. +- Never log secrets; redact sensitive fields in errors. + +- MV3 token lifecycle: persist refresh token in `chrome.storage.local` to survive service worker suspension/restart; keep access token in memory (or `chrome.storage.session`). On background start, attempt auto‑refresh when a refresh token exists; use single‑flight refresh queue on 401. + +### Network Proxy (Background/Service Worker) +- All API calls originate from background; UI/content never handles tokens directly. +- Optional host permissions per configured origin at runtime; least privilege. +- SSE support: set `Accept: text/event-stream`, parse events (including handling `[DONE]` sentinel), keep‑alive handling, `AbortController` cancellation. +- Timeouts with exponential backoff (jitter). Offline queue for small writes. +- Propagate an `X-Request-ID` header per request for correlation and idempotent retries. + +### API Path Hygiene +- Match the server’s OpenAPI exactly, including trailing slashes where specified, to avoid redirects and CORS quirks. +- Core endpoints: + - Chat: `POST /api/v1/chat/completions` + - RAG: `POST /api/v1/rag/search`, `POST /api/v1/rag/search/stream`, `GET /api/v1/rag/simple` + - Media: `POST /api/v1/media/process` + - Notes: `/api/v1/notes/...` (search may require a trailing slash; align to spec) + - Prompts: `/api/v1/prompts/...` + - STT: `POST /api/v1/audio/transcriptions` + - TTS: `POST /api/v1/audio/speech` + - Voices: `GET /api/v1/audio/voices/catalog` + - Providers/Models: `GET /api/v1/llm/providers` (and `/llm/models` if present) +- Centralize route constants; do not rely on client‑side redirects. + +#### Trailing Slash Rules (Notes/Prompts) +- Notes: + - List/Create: `GET/POST /api/v1/notes/` (trailing slash required) + - Search: `GET /api/v1/notes/search/` (trailing slash required) + - Item: `GET/DELETE/PATCH /api/v1/notes/{id}` (no trailing slash) + - Keywords collections use trailing slash, e.g., `/api/v1/notes/keywords/`, `/api/v1/notes/keywords/search/`, `/api/v1/notes/{note_id}/keywords/` +- Prompts: + - Base: `GET/POST /api/v1/prompts` (no trailing slash) + - Search: `POST /api/v1/prompts/search` (no trailing slash) + - Export: `GET /api/v1/prompts/export` (no trailing slash) + - Keywords collection: `/api/v1/prompts/keywords/` (trailing slash) + +### API Semantics +- Chat SSE shape: Expect OpenAI-style chunks with "delta" objects, then "[DONE]". Parse lines like `data: {"choices":[{"delta":{"role":"assistant","content":"..."}}]}` and terminate on `[DONE]`. +- RAG streaming is NDJSON (not SSE). Treat each line as a complete JSON object; do not expect `[DONE]`. Endpoints: `POST /api/v1/rag/search/stream` (stream), `GET /api/v1/rag/simple` (simple retrieval). +- Health signals: `GET /api/v1/health` returns status "ok" (200) or "degraded" (206). Treat any non-200 as not ready during setup. Use `/readyz` (readiness) and `/healthz` (liveness) for lightweight probes. + +References: +- Chat SSE generator: `tldw_Server_API/app/api/v1/endpoints/chat.py:1256` +- RAG endpoints: `tldw_Server_API/app/api/v1/endpoints/rag_unified.py:664, 1110, 1174` +- Health endpoints: `tldw_Server_API/app/api/v1/endpoints/health.py:97, 110` + +### Auth & Tokens +- Token response shape: `access_token`, `refresh_token`, `token_type=bearer`, `expires_in` (seconds). Reference: `tldw_Server_API/app/api/v1/schemas/auth_schemas.py:181`. +- Refresh rotation: if refresh call returns a `refresh_token`, replace the stored value (treat as authoritative). +- Prefer header auth over cookies: use `Authorization: Bearer` or `X-API-KEY`; CSRF middleware is present but skipped for Bearer/X-API-KEY flows. Reference: `tldw_Server_API/app/main.py:2396`. +- Service worker lifecycle: on background start, check for a stored refresh token and proactively refresh the access token (single-flight), so UI works after suspension/restart without prompting. + +#### Background: Single‑Flight Refresh (MV3 example) +```ts +// background.ts (MV3 service worker) + +type TokenResponse = { + access_token: string; + refresh_token?: string; + token_type: 'bearer'; + expires_in: number; // seconds +}; + +let serverUrl = ''; +let authMode: 'single_user' | 'multi_user' = 'multi_user'; + +// Ephemeral in-memory access token + expiry +let accessToken: string | null = null; +let accessExpiresAt = 0; // epoch ms + +// Single-flight guard +let refreshInFlight: Promise | null = null; + +async function getRefreshToken(): Promise { + const { refresh_token } = await chrome.storage.local.get('refresh_token'); + return (refresh_token as string) || null; +} + +async function setTokens(tr: TokenResponse) { + accessToken = tr.access_token; + // Renew slightly early + accessExpiresAt = Date.now() + Math.max(0, (tr.expires_in - 30) * 1000); + if (tr.refresh_token) { + await chrome.storage.local.set({ refresh_token: tr.refresh_token }); + } +} + +function isAccessValid(): boolean { + return !!accessToken && Date.now() < accessExpiresAt; +} + +async function refreshAccessTokenSingleFlight(): Promise { + if (isAccessValid()) return accessToken!; + if (refreshInFlight) return refreshInFlight; + + refreshInFlight = (async () => { + const rt = await getRefreshToken(); + if (!rt) throw new Error('No refresh token'); + const res = await fetch(`${serverUrl}/api/v1/auth/refresh`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ refresh_token: rt }), + }); + if (!res.ok) { + // Clear tokens on hard failure + await chrome.storage.local.remove('refresh_token'); + accessToken = null; accessExpiresAt = 0; + throw new Error(`Refresh failed: ${res.status}`); + } + const body = (await res.json()) as TokenResponse; + await setTokens(body); + return accessToken!; + })().finally(() => { + refreshInFlight = null; + }); + + return refreshInFlight; +} + +export async function bgFetch(input: RequestInfo, init: RequestInit = {}): Promise { + const headers = new Headers(init.headers || {}); + + // Attach auth + if (authMode === 'single_user') { + // X-API-KEY for single-user mode (store separately) + const { api_key } = await chrome.storage.local.get('api_key'); + if (api_key) headers.set('X-API-KEY', api_key as string); + } else { + // Ensure access token is fresh + const token = await refreshAccessTokenSingleFlight(); + headers.set('Authorization', `Bearer ${token}`); + } + + // Correlation header + headers.set('X-Request-ID', crypto.randomUUID()); + + let res = await fetch(input, { ...init, headers }); + if (res.status === 401 && authMode === 'multi_user') { + try { + const token = await refreshAccessTokenSingleFlight(); + headers.set('Authorization', `Bearer ${token}`); + res = await fetch(input, { ...init, headers }); + } catch (_) { + // Bubble up 401 after failed refresh + } + } + return res; +} + +// On SW start: auto-refresh so UI is ready +chrome.runtime.onStartup.addListener(async () => { + try { await refreshAccessTokenSingleFlight(); } catch { /* no-op */ } +}); + +// Also attempt onInstalled (first install/update) +chrome.runtime.onInstalled.addListener(async () => { + try { await refreshAccessTokenSingleFlight(); } catch { /* no-op */ } +}); +``` + +### Streaming & SSE +- Chat SSE: set `Accept: text/event-stream`; keep the service worker alive via a long‑lived `Port` from the side panel/popup; recognize `[DONE]` and release reader/locks. +- RAG stream (NDJSON): tolerate heartbeats/blank lines and partial chunks; reassemble safe JSON boundaries before parse. +- Cancellation: use `AbortController`; expect network to close within ≈200ms after abort. + +Note: +- `/api/v1/rag/search/stream` requires `enable_generation=true` in the request body; otherwise the server returns HTTP 400. +- Default retrieval knobs are `search_mode="hybrid"` and `top_k=10` unless overridden. Discover the server’s current defaults and ranges via `GET /api/v1/rag/capabilities`. + +#### Background: Chat SSE Reader (MV3 example) +```ts +export async function streamChatSSE( + url: string, + body: unknown, + opts: { + headers?: HeadersInit; + signal?: AbortSignal; + port?: chrome.runtime.Port; // Long-lived port from UI to keep SW alive + onDelta?: (text: string) => void; + onDone?: () => void; + } = {} +) { + const controller = opts.signal ? null : new AbortController(); + const signal = opts.signal ?? controller!.signal; + + const headers = new Headers(opts.headers || {}); + headers.set('Accept', 'text/event-stream'); + headers.set('Content-Type', 'application/json'); + + const res = await fetch(url, { + method: 'POST', + headers, + body: JSON.stringify(body ?? {}), + signal, + // credentials not needed for header auth; keep simple + }); + if (!res.ok || !res.body) throw new Error(`SSE failed: ${res.status}`); + + const reader = res.body.getReader(); + const decoder = new TextDecoder('utf-8'); + let buffer = ''; + try { + while (true) { + const { value, done } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + let idx; + while ((idx = buffer.indexOf('\n\n')) !== -1) { + const eventBlock = buffer.slice(0, idx); + buffer = buffer.slice(idx + 2); + // Join all data: lines per SSE spec + const dataLines = eventBlock + .split('\n') + .filter(l => l.startsWith('data:')) + .map(l => l.slice(5).trim()); + if (dataLines.length === 0) continue; + const dataStr = dataLines.join('\n'); + if (dataStr === '[DONE]') { + opts.onDone?.(); + return; // normal termination + } + try { + const obj = JSON.parse(dataStr); + const delta = obj?.choices?.[0]?.delta?.content ?? ''; + if (delta) { + opts.onDelta?.(delta); + opts.port?.postMessage({ type: 'chat-delta', data: delta }); + } + } catch { /* ignore parse errors */ } + } + } + opts.onDone?.(); + } finally { + try { reader.releaseLock(); } catch { /* no-op */ } + // Caller may disconnect the port when UI is done + } + + return { + cancel: () => controller?.abort(), + }; +} +``` + +#### Background: RAG NDJSON Reader (MV3 example) +```ts +export async function streamRagNDJSON( + url: string, + body: unknown, + opts: { + headers?: HeadersInit; + signal?: AbortSignal; + port?: chrome.runtime.Port; + onEvent?: (obj: any) => void; + } = {} +) { + const controller = opts.signal ? null : new AbortController(); + const signal = opts.signal ?? controller!.signal; + + const headers = new Headers(opts.headers || {}); + headers.set('Accept', 'application/x-ndjson'); + headers.set('Content-Type', 'application/json'); + + const res = await fetch(url, { + method: 'POST', + headers, + body: JSON.stringify(body ?? {}), + signal, + }); + if (!res.ok || !res.body) throw new Error(`NDJSON failed: ${res.status}`); + + const reader = res.body.getReader(); + const decoder = new TextDecoder('utf-8'); + let buffer = ''; + try { + while (true) { + const { value, done } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + let nl; + while ((nl = buffer.indexOf('\n')) !== -1) { + const line = buffer.slice(0, nl).trim(); + buffer = buffer.slice(nl + 1); + if (!line) continue; // tolerate heartbeats/blank lines + try { + const obj = JSON.parse(line); + opts.onEvent?.(obj); + opts.port?.postMessage({ type: 'rag-event', data: obj }); + } catch { + // Partial or invalid JSON; prepend back to buffer (rare) + buffer = line + '\n' + buffer; + break; + } + } + } + } finally { + try { reader.releaseLock(); } catch { /* no-op */ } + } + + return { + cancel: () => controller?.abort(), + }; +} +``` + +#### Quick Examples (curl) +```bash +# RAG streaming (JWT) +curl -sN "http://127.0.0.1:8000/api/v1/rag/search/stream" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -H "Accept: application/x-ndjson" \ + -d '{"query":"What is machine learning?","top_k":5,"enable_generation":true}' + +# RAG simple (Single-user API key) +curl -s "http://127.0.0.1:8000/api/v1/rag/simple?query=vector%20databases" \ + -H "X-API-KEY: $API_KEY" | jq . +``` + +### Media & Audio Details +- STT multipart fields: `file` (UploadFile), `model` (default `whisper-1`), optional `language`, `prompt`, `response_format`, and TreeSeg controls (`segment`, `seg_*`). Allowed mimetypes include wav/mp3/m4a/ogg/opus/webm/flac; default max size ≈25MB (tiered). Reference: `tldw_Server_API/app/api/v1/endpoints/audio.py:464`. +- TTS JSON body: `model`, `input` (text), `voice`, `response_format` (e.g., mp3, wav), optional `stream` boolean. Response sets `Content-Disposition: attachment; filename=speech.`. Reference: `tldw_Server_API/app/api/v1/endpoints/audio.py:272`. +- Voices catalog: `GET /api/v1/audio/voices/catalog?provider=...` returns mapping of provider→voices; filter via `provider`. Reference: `tldw_Server_API/app/api/v1/endpoints/audio.py:1131`. +- Media timeouts: adopt endpoint-specific timeouts similar to WebUI defaults (videos/audios ~10m, docs/pdfs ~5m). Reference: `tldw_Server_API/WebUI/js/api-client.js:290`. + +#### Quick Examples (curl) +```bash +# STT (JWT) +curl -X POST "http://127.0.0.1:8000/api/v1/audio/transcriptions" \ + -H "Authorization: Bearer $TOKEN" \ + -F "file=@/abs/path/to/audio.wav" \ + -F "model=whisper-1" \ + -F "language=en" \ + -F "response_format=json" + +# STT (Single-user API key) +curl -X POST "http://127.0.0.1:8000/api/v1/audio/transcriptions" \ + -H "X-API-KEY: $API_KEY" \ + -F "file=@/abs/path/to/audio.m4a" \ + -F "model=whisper-1" \ + -F "response_format=json" \ + -F "segment=true" -F "seg_K=6" + +# TTS (JWT) +curl -X POST "http://127.0.0.1:8000/api/v1/audio/speech" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"model":"tts-1","input":"Hello world","voice":"alloy","response_format":"mp3","stream":false}' \ + --output speech.mp3 + +# TTS (Single-user API key) +curl -X POST "http://127.0.0.1:8000/api/v1/audio/speech" \ + -H "X-API-KEY: $API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"model":"tts-1","input":"Testing TTS","voice":"alloy","response_format":"wav"}' \ + --output speech.wav + +# Voices catalog (JWT) +curl -s "http://127.0.0.1:8000/api/v1/audio/voices/catalog" \ + -H "Authorization: Bearer $TOKEN" | jq . + +# Voices catalog (Single-user API key, filtered) +curl -s "http://127.0.0.1:8000/api/v1/audio/voices/catalog?provider=elevenlabs" \ + -H "X-API-KEY: $API_KEY" | jq . +``` + +### Rate Limits & Backoff +- Typical limits (subject to server config): RAG search ≈ 30/min, RAG batch ≈ 10/min, STT ≈ 20/min, TTS ≈ 10/min. Back off on 429 and honor the `Retry-After` header. +- Show user-friendly retry timing (e.g., countdown) based on `Retry-After`. Avoid infinite retries on 5xx/network; cap attempts and use exponential backoff with jitter. + +References: +- RAG limits: `tldw_Server_API/app/api/v1/endpoints/rag_unified.py` (limit_search 30/min, limit_batch 10/min) +- STT limit: `tldw_Server_API/app/api/v1/endpoints/audio.py:461` (20/min) +- TTS limit: `tldw_Server_API/app/api/v1/endpoints/audio.py` (10/min) + +Example (bounded backoff wrapper, MV3 background): +```ts +export async function backoffFetch( + input: RequestInfo, + init: RequestInit = {}, + opts: { maxRetries?: number; baseDelayMs?: number } = {} +): Promise { + const maxRetries = opts.maxRetries ?? 2; // keep small to avoid user surprise + const base = opts.baseDelayMs ?? 300; + let attempt = 0; + // Copy headers so we can mutate between retries + const headers = new Headers(init.headers || {}); + + while (true) { + let res: Response | null = null; + try { + res = await fetch(input, { ...init, headers }); + } catch (e) { + // Network error: retry with backoff (bounded) + if (attempt >= maxRetries) throw e; + const jitter = 0.8 + Math.random() * 0.4; + await new Promise(r => setTimeout(r, Math.pow(2, attempt) * base * jitter)); + attempt++; continue; + } + + if (res.status === 429) { + // Honor Retry-After + const ra = res.headers.get('Retry-After'); + const waitSec = ra ? Math.max(0, parseInt(ra, 10)) : Math.pow(2, attempt) * (base / 1000); + // Emit UI hint: next retry time (optional message bus) + // port?.postMessage({ type: 'retry-after', seconds: waitSec }); + if (attempt >= maxRetries) return res; // surface to UI if we’ve already retried + await new Promise(r => setTimeout(r, waitSec * 1000)); + attempt++; continue; + } + + if (res.status >= 500 && res.status < 600) { + if (attempt >= maxRetries) return res; // bubble to UI + const jitter = 0.8 + Math.random() * 0.4; + await new Promise(r => setTimeout(r, Math.pow(2, attempt) * base * jitter)); + attempt++; continue; + } + + return res; // 2xx/3xx/4xx (non-429) -> caller handles + } +} +``` + +#### Backoff + Auth Wrapper (centralized) +```ts +// Uses single-flight refresh + backoffFetch for rate limits and transient errors +export async function apiFetch( + input: RequestInfo, + init: RequestInit = {}, + opts: { backoff?: { maxRetries?: number; baseDelayMs?: number } } = {} +): Promise { + const headers = new Headers(init.headers || {}); + if (!headers.has('X-Request-ID')) headers.set('X-Request-ID', crypto.randomUUID()); + + // Attach auth + if (authMode === 'single_user') { + const { api_key } = await chrome.storage.local.get('api_key'); + if (api_key) headers.set('X-API-KEY', api_key as string); + } else { + const token = await refreshAccessTokenSingleFlight(); + headers.set('Authorization', `Bearer ${token}`); + } + + const doFetch = () => backoffFetch(input, { ...init, headers }, opts.backoff); + + // First attempt with current token/key and bounded backoff + let res = await doFetch(); + + // On 401, attempt a single refresh + retry (multi-user only) + if (res.status === 401 && authMode === 'multi_user') { + try { + const token = await refreshAccessTokenSingleFlight(); + headers.set('Authorization', `Bearer ${token}`); + res = await doFetch(); + } catch { + // Return original 401 if refresh fails + } + } + return res; +} + +// Note: For SSE/NDJSON streaming, use the streaming helpers to initiate the +// connection (optional single attempt with backoff on connect). Do not auto-retry +// mid-stream to avoid duplicating streamed content. +``` + +### Notes/Prompts Concurrency & Shapes +- Notes optimistic concurrency: `PUT/PATCH/DELETE /api/v1/notes/{id}` require the `expected-version` header. On HTTP 409, refetch the note to get the latest `version` and retry the operation with the updated header. Reference: `tldw_Server_API/app/api/v1/endpoints/notes.py:347`. +- Notes search: `GET /api/v1/notes/search/?query=...` with optional `limit`, `offset`, `include_keywords`. Returns a list of notes (NoteResponse). The notes list endpoint (`GET /api/v1/notes/`) returns an object with `notes/items/results` aliases for back‑compat along with `count/limit/offset/total`. Reference: `tldw_Server_API/app/api/v1/endpoints/notes.py:480`. +- Prompts keywords: create via `POST /api/v1/prompts/keywords/` with JSON `{ "keyword_text": "..." }`. Reference: `tldw_Server_API/app/api/v1/endpoints/prompts.py:240`. + +#### Quick Examples (curl) +```bash +# Notes search (JWT) +curl -s "http://127.0.0.1:8000/api/v1/notes/search/?query=project&limit=5&include_keywords=true" \ + -H "Authorization: Bearer $TOKEN" | jq . + +# Notes update with optimistic locking (X-API-KEY) +NOTE_ID="abc123" +CURR=$(curl -s "http://127.0.0.1:8000/api/v1/notes/$NOTE_ID" -H "X-API-KEY: $API_KEY") +VER=$(echo "$CURR" | jq -r .version) +curl -s -X PUT "http://127.0.0.1:8000/api/v1/notes/$NOTE_ID" \ + -H "X-API-KEY: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "expected-version: $VER" \ + -d '{"title":"Updated Title"}' | jq . + +# Prompts keyword create (JWT) +curl -s -X POST "http://127.0.0.1:8000/api/v1/prompts/keywords/" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"keyword_text":"writing"}' | jq . +``` + +### Chat +- Support `stream: true|false`, model selection, and OpenAI‑compatible request fields. +- Pause/cancel active streams; display partial tokens. +- Error UX: connection lost, server errors, token expiration. +- SSE streaming must detect and handle the `[DONE]` sentinel to terminate cleanly; keep the service worker alive during streams (e.g., via a long‑lived Port from the side panel). + +### RAG +- Query field, minimal filters; result list with snippet, source, timestamp. +- Insert selected snippets into chat as system/context or user attachment. + +### Media Ingestion +- Current tab URL ingestion; allow manual URL input. +- Show progress/toasts and final status; handle failures gracefully. +- Display progress logs from the server response where present; if a job identifier is returned, poll status with exponential backoff and provide cancel. + +### Notes and Prompts +- Create note from selection or input; tag and search. +- Browse/import/export prompts; insert prompt into chat. + +### STT +- Upload short audio (<= 25 MB MVP); show transcript with copy. +- Validate mime types; surface server validation errors. + +### TTS (v1) +- Voice list fetch; synthesize short text; playback controls; save last voice. + +## Non‑Functional Requirements + +### Security & Privacy +- No telemetry; no content analytics; local‑only diagnostics toggled by user. +- Keep access tokens in memory in background; persist refresh tokens only if required. +- Never expose tokens to content scripts; sanitize logs. + +### Performance +- Background memory budget < 50 MB steady‑state. +- Chat stream first token < 1.5s on LAN server. +- Bundle size targets: side panel < 500 KB gz (MVP); route‑level code splitting. + +### Reliability +- Resilient to server restarts; retries with backoff; idempotent UI state. +- Offline queue for small writes (e.g., notes) with visible status. + +### Compatibility +- Chrome/Edge MV3 using service worker; Firefox MV2 fallback. +- Feature‑detect offscreen API; don’t hard‑rely on it. + +### Accessibility & i18n +- Keyboard navigation, ARIA roles for side panel. +- Strings ready for localization; English default. + +## Architecture Overview + +### Background/Service Worker +- Central fetch proxy, SSE parsing, retries, 401 refresh queue, permission prompts. + +### UI Surfaces +- Side panel (chat, RAG, notes/prompts, STT/TTS). +- Options page (server/auth/settings). +- Popup (quick actions/status). + +### Content Script +- Selection capture; page metadata for ingest; no secret handling. + +### State & Storage Policy +- Background state store; message bus to UIs; `chrome.storage` for non‑sensitive prefs. +- Do not store user content by default beyond session state. +- Optional local cache for small artifacts with TTL and user clear. +- Persist only refresh tokens (encrypted at rest if available) in `chrome.storage.local`; keep access tokens ephemeral (memory or `chrome.storage.session`). + +## CORS & Server Config +- Prefer background‑origin requests with explicit `host_permissions`/`optional_host_permissions`. +- Server should allow CORS for the extension origin; for dev, wildcard allowed on localhost. +- Avoid blocking `webRequest` in MV3; use direct fetch and headers in background. + +## Success Metrics +- 80%+ users complete setup within 2 minutes. +- < 5% request error rate in normal operation. +- Streaming starts within 1.5s on LAN; steady memory < 50 MB. +- > 90% of API paths hit without 307 redirects (path hygiene). + +## Milestones and Deliverables + +### Milestone 1: Connectivity & Auth (Week 1–2) +- Options page with server URL and auth. +- Background proxy with health check. +- Acceptance: Successful health ping; auth tokens handled; 401 refresh working. + +### Milestone 2: Chat & Models (Week 3–4) +- Fetch providers/models; chat non‑stream and stream; cancel. +- Acceptance: Streaming chat across at least two models; SSE cancel; exact path matching. + +### Milestone 3: RAG & Media (Week 5–6) +- RAG search with snippet insertion; URL ingest with progress. +- Acceptance: RAG returns results; snippet insert; ingest completes with status notifications. + +### Milestone 4: Notes/Prompts & STT (Week 7–8) +- Notes CRUD + search; prompts browse/import/export; STT upload/transcribe. +- Acceptance: Notes searchable; prompts import/export; successful transcript for a ~20s clip. + +### Milestone 5: TTS & Polish (Week 9–10) +- TTS synthesis/playback; voice list; UX polish and accessibility checks. +- Acceptance: Voice picker works; playable audio from `/api/v1/audio/speech`. + +## Acceptance Criteria (Key) +- Path Hygiene: All requests hit exact API paths defined by OpenAPI; no 307s observed in logs. +- Security: Tokens never appear in UI or console logs; content scripts lack access to tokens. +- SSE: Streaming responses parsed without memory leaks; recognizes `[DONE]`; cancel stops network within ~200ms. +- Retry/Refresh: 401 triggers single‑flight refresh; queued requests replay once; exponential backoff with jitter for network errors. +- Permissions: Optional host permissions requested only for user‑configured origin; revocation handled gracefully. +- Media: Ingest current tab URL; show progress and final status; errors actionable. +- STT/TTS: Supported formats accepted; errors surfaced with clear messages. +- 429 Handling: Honors `Retry-After` on rate limits; UI presents retry timing. +- Streaming Memory: No unbounded memory growth during 5‑minute continuous streams; remains within budget. + +## Dependencies +- Server availability and correct CORS config. +- Accurate OpenAPI spec and stability of endpoints. +- Browser APIs: `storage`, `side_panel`, `contextMenus`, `notifications`, `offscreen` (optional), message passing. + +## Risks & Mitigations +- Endpoint variance (e.g., trailing slashes): Centralize route constants; validate against OpenAPI on startup and warn. +- Large uploads: Enforce size caps in UI; add chunking later if required. +- Firefox MV2 constraints: Document broader host permissions; polyfill SSE parsing if needed. + +## Out of Scope (for MVP) +- Full chat history sync with server. +- Advanced MCP tools integration. +- Batch operations and resumable uploads. + +## Resolved Decisions +- Canonical API key header: `X-API-KEY` (single‑user). Multi‑user uses `Authorization: Bearer `. +- Model discovery: Prefer `GET /api/v1/llm/providers` (authoritative provider→models); `GET /api/v1/llm/models` available as aggregate. +- Trailing slashes: See “Trailing Slash Rules (Notes/Prompts)” above (notes search and collections require trailing slash; prompts base/search do not). +- Dev HTTPS: Prefer HTTP on localhost; for HTTPS, trust a local CA or enable Chrome’s localhost invalid‑cert exception; ensure server CORS allows the extension origin. + +## Developer Validation Checklist +- Connectivity & Auth + - Set server URL and verify `GET /api/v1/health` succeeds. + - Single‑user: requests with `X-API-KEY` succeed; Multi‑user: login/refresh/logout succeeds and access token auto‑refreshes after service worker suspend/resume. +- Path Hygiene + - All calls are 2xx without redirects (no 307); Notes/Prompts follow trailing‑slash rules. +- Chat + - Non‑stream and SSE stream both work; `[DONE]` handled; cancel closes network <200ms; models list loads from `/api/v1/llm/providers`. +- RAG + - `POST /api/v1/rag/search` returns results; `GET /api/v1/rag/simple` works; `POST /api/v1/rag/search/stream` NDJSON parsed correctly. +- Media + - Current tab URL ingest works; progress logs displayed; failures surface actionable errors; job polling (if job id present) functions with backoff. +- Notes & Prompts + - Notes CRUD + `GET /api/v1/notes/search/` (with slash) work; Prompts base/search work; keywords endpoints reachable. +- Audio + - STT accepts <= 25 MB and returns transcript; TTS synthesizes and plays; voices catalog fetched. +- Reliability + - 429 responses respect `Retry-After`; 5xx/network use exponential backoff with jitter; offline queue for small writes visible. +- Permissions + - Only the configured server origin is granted host permission; revocation handled gracefully. +- CORS/HTTPS + - Extension origin allowed by server; dev HTTP works; dev HTTPS usable with trusted cert or localhost exception. +- Metrics/Headers + - `X-Request-ID` sent on requests and echoed; `traceparent` present in responses. +- Performance + - Background steady memory < 50 MB; streaming memory stable over 5 minutes; first chat token < 1.5s on LAN. + +## Glossary +- SSE: Server‑Sent Events; streaming over HTTP. +- MV3: Chrome Manifest V3. +- Background Proxy: Service worker owning all network I/O and auth. diff --git a/Docs/Design/Persona_Roleplay_PRD.md b/Docs/Product/Persona_Roleplay_PRD.md similarity index 100% rename from Docs/Design/Persona_Roleplay_PRD.md rename to Docs/Product/Persona_Roleplay_PRD.md diff --git a/Docs/Product/Realtime_Voice_Latency_PRD.md b/Docs/Product/Realtime_Voice_Latency_PRD.md new file mode 100644 index 000000000..cf5f8f314 --- /dev/null +++ b/Docs/Product/Realtime_Voice_Latency_PRD.md @@ -0,0 +1,264 @@ +# Realtime Voice Latency PRD + +Owner: Core Voice & API Team +Status: Draft (v0.1) + +## Overview + +Elevate the realtime voice experience (STT → LLM → TTS) to deliver natural, interruption‑friendly conversations with sub‑second voice‑to‑voice latency. Build on existing unified streaming STT, Kokoro streaming TTS, and OpenAI‑compatible APIs. Introduce precise turn detection, structured LLM streaming, low‑overhead audio transport options, and actionable end‑to‑end latency metrics. + +## Goals + +- Voice‑to‑voice latency (user stops speaking → first audible TTS): p50 ≤ 1.0s, p90 ≤ 1.8s. +- STT final transcript latency (end‑of‑speech → final text): p50 ≤ 600ms. +- TTS time‑to‑first‑byte (TTFB): p50 ≤ 250ms. +- Structured LLM streaming: speakable text to TTS immediately; code blocks and links to UI in parallel. +- Add reliable, lightweight metrics and a measurement harness. + +## Non‑Goals + +- Replacing existing RAG or LLM provider systems. +- Forcing WebRTC in all deployments (optional Phase 3 only). +- Vendor‑specific autoscaling mechanics (remain self‑host first). + +## Personas & Use Cases + +- Developers embedding voice agents in web apps who need: + - Fast and reliable end‑of‑utterance detection with interruption handling. + - Low TTS TTFB and smooth, continuous playback. + - Structured results where “speakable” text is voiced and code/links render in UI. + +## Success Metrics + +- p50/p90 voice‑to‑voice latency meets targets above. +- <1% stream errors; 0 underruns in happy path. +- Backwards compatible APIs (no breaking changes to current REST). + +## Scope & Phasing + +### Phase 1: Core Latency + Metrics (Required) +- VAD/turn detection in streaming STT to trigger fast finalization. +- TTS TTFB + STT finalization latency metrics; compute voice‑to‑voice. +- PCM streaming option (lowest overhead) documented end‑to‑end. +- Phoneme/lexicon overrides for consistent pronunciation of brand/technical terms. + +### Phase 2: Structured Streaming + WS TTS (Optional but Recommended) +- Streaming JSON parser: stream “spoke_response” to TTS; route code blocks/links to UI channel. +- WebSocket TTS endpoint for ultra‑low‑overhead PCM16 streaming. + +### Phase 3: WebRTC Egress (Optional) +- Add a minimal WebRTC transport for browser playback where ultra‑low latency is required. + +## Reference Setup + +- Hardware/OS: 8‑core CPU, optional NVIDIA GPU (if Parakeet GPU path used); macOS 14 or Ubuntu 22.04 +- Runtime: Python 3.11, ffmpeg ≥ 6.0, `av` ≥ 11.0.0, optional `espeak-ng` (phonemizer backend), optional `pyannote` +- Network: Localhost loopback during measurement; avoid WAN variability +- Test audio: 10 s of 16 kHz float32 speech, single speaker, 250 ms trailing silence +- Browser client (when applicable): latest Chrome/Edge/Firefox on same machine + +## Functional Requirements + +### STT Turn Detection +- Add Silero VAD‑based turn detection to the unified streaming STT path. +- Emit “commit” when end‑of‑speech is detected to finalize transcripts promptly. +- Expose safe server defaults and client‑configurable tunables (threshold, min silence, stop secs). + +### TTS PCM Streaming +- Support `response_format=pcm` through `/api/v1/audio/speech` and document as recommended for ultra‑low latency clients. +- Keep MP3/Opus/AAC/FLAC for compatibility. + +REST PCM details: +- Response header `Content-Type: audio/L16; rate=; channels=`; default `rate=24000`, `channels=1`. +- Include `X-Audio-Sample-Rate: ` header. +- Negotiation: default to provider/sample pipeline rate; optional `target_sample_rate` honored when supported. +- Example curl: `-d '{"model":"tts-1","input":"Hello","voice":"alloy","response_format":"pcm","stream":true}'` +- Example client: Web Audio API or Python playback snippet will be included in docs. + +### Phoneme/Lexicon Overrides +- Optional phoneme mapping (config‑driven) to stabilize pronunciation of product names and domain terms. +- Provider‑aware behavior (e.g., Kokoro ONNX/PyTorch; espeak/IPA support where applicable). + +### Structured LLM Streaming +- Add a streaming JSON parser to split: + - `spoke_response` → stream chars immediately to TTS. + - `code_blocks` and `links` → deliver to UI channel as soon as arrays complete (with optional async link validation). +- Make structured mode opt‑in (per request or model) to maintain backwards compatibility. + +Schema and examples (opt‑in mode): +- Request flag: `structured_streaming: true` (per API call) or model‑level default +- Server stream examples: + - `{ "type": "spoke_response", "text": "Great question..." }` + - `{ "type": "code_block", "lang": "python", "text": "print('hello')" }` + - `{ "type": "links", "items": [{"title": "Docs", "url": "https://..."}] }` +Interaction with OpenAI compatible `/chat/completions`: +- When enabled, server emits structured JSON chunks on the stream; speakable text is forwarded to TTS immediately; non‑speakable metadata is routed to the UI channel. + +### WebSocket TTS Endpoint (Optional) +- New WS endpoint `/api/v1/audio/stream/tts` that accepts prompt frames and streams PCM16 bytes continuously with backpressure handling. + +WebSocket TTS API details: +- Auth/Quotas: mirror STT WS. Support API key/JWT, endpoint allowlist checks, standardized close codes on quota. +- Client → Server frames: `{type:"prompt", text, voice?, speed?, format?:"pcm"}`; optional `request_id`. +- Server → Client frames: binary PCM16 audio frames (20–40 ms) with bounded queue; error frames as `{type:"error", "message": "..."}`. +- Backpressure: drop or throttle when the queue exceeds limit; increment `audio_stream_underruns_total` and emit warning status. + +## Non‑Functional Requirements + +- Low overhead: avoid heavy per‑chunk work; keep encoders warmed. +- Robustness: consistent behavior with disconnects, slow readers, and quotas. +- Observability: gated logs; metrics first for timing paths. + +## Architecture & Components + +Key touchpoints: +- STT WS handler: `tldw_Server_API/app/api/v1/endpoints/audio.py:1209` (stream/transcribe) +- Unified streaming STT: `tldw_Server_API/app/core/Ingestion_Media_Processing/Audio/Audio_Streaming_Unified.py` +- TTS REST endpoint: `tldw_Server_API/app/api/v1/endpoints/audio.py:268` (/audio/speech) +- Kokoro adapter streaming path: `tldw_Server_API/app/core/TTS/adapters/kokoro_adapter.py` +- Streaming encoder: `tldw_Server_API/app/core/TTS/streaming_audio_writer.py` +- TTS orchestrator: `tldw_Server_API/app/core/TTS/tts_service_v2.py` + +Design changes: +- Introduce VAD in Unified STT pipeline; on VAD end → finalize chunk with Parakeet. +- Track event timestamps for end‑of‑speech, final transcript emission, TTS start, and first audio chunk write. +- Add PCM passthrough branch in TTS streaming for minimal overhead; preserve encoded formats via `StreamingAudioWriter`. +- Add phoneme pre‑processing hook in Kokoro adapter with config‑based mapping. +- Add optional WS TTS service that streams PCM16 frames directly. + +## API Changes + +REST (existing): `/api/v1/audio/speech` +- Support `response_format=pcm` (documented default for low‑latency clients). + +WebSocket (existing): `/api/v1/audio/stream/transcribe` +- Accept optional client config to tune VAD/turn parameters (server defaults remain authoritative). +- Emit final transcripts promptly at turn end. + +WebSocket (new, optional): `/api/v1/audio/stream/tts` +- Client → Server (text frames): `{type:"prompt", text, voice?, speed?, format?:"pcm"}` +- Server → Client (binary): PCM16 frames. Error frames as `{type:"error", message}`. + +Structured LLM streaming (optional flag) +- When enabled, server parses JSON streams and routes fields: speech vs. UI metadata. + +## Configuration + +STT‑Settings: +- `vad_enabled` (bool, default true) +- `vad_threshold` (float, default 0.5) +- `turn_stop_secs` (float, default 0.2) +- `min_silence_ms` (int, default 250) + +TTS‑Settings: +- `tts_pcm_enabled` (bool, default true) +- `phoneme_map_path` (str, optional JSON/YAML) +- `target_sample_rate` (int, default 24000) + +Metrics: +- `enable_voice_latency_metrics` (bool, default true) + +Feature Flags: +- `tts_pcm_enabled` (bool, default true) +- `enable_ws_tts` (bool, default false) +Dependencies: +- Required: `ffmpeg`, `av` +- Optional: `espeak-ng` (phonemizer), `pyannote` + + Security & Privacy: +- Do not log raw audio payloads; scrub PII from logs/metrics +- Configurable retention for any persisted audio (opt‑in diarization workflows) +- Avoid secrets in metric labels; bound label cardinality + +## Measurement Model + +Timestamps (server‑side): +- `EOS_detected_at`: VAD detects end‑of‑speech in WS STT loop +- `STT_final_emitted_at`: final transcript frame emitted on WS +- `TTS_request_started_at`: TTS handler receives request (REST) or prompt (WS‑TTS) +- `TTS_first_chunk_sent_at`: first audio bytes written to socket/response + +Derived metrics: +- `stt_final_latency_seconds = STT_final_emitted_at - EOS_detected_at` +- `tts_ttfb_seconds = TTS_first_chunk_sent_at - TTS_request_started_at` +- `voice_to_voice_seconds = TTS_first_chunk_sent_at - EOS_detected_at` + +Correlation: +- Propagate `X-Request-Id` (or generate UUIDv4) across WS/REST; include in logs/spans. + +## Telemetry & Metrics + +Histograms +- `voice_to_voice_seconds{provider,route}`: end‑of‑speech → first audio byte sent. +- `stt_final_latency_seconds{model,variant}`: end‑of‑speech → final transcript emit. +- `tts_ttfb_seconds{provider,voice,format}`: TTS request → first audio chunk emitted. + - Buckets for all histograms: `[0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5]` + +Counters +- `audio_stream_underruns_total{provider}` +- `audio_stream_errors_total{component,provider}` + +Correlation +- Include `request_id` and `conversation_id` on event timelines where available. + +Gauges +- Reuse `tts_active_requests{provider}` from TTS service v2 + +Endpoints +- Prometheus: `/metrics`; JSON: `/api/v1/metrics` (when `metrics` feature enabled) + +## Testing Strategy + +Unit +- JSON streaming parser: chunked inputs, escapes, array completion. +- Phoneme mapper: word‑boundary correctness, idempotence. + +Integration +- STT WS: VAD commit timing; latency assertions (mock clocks). +- TTS streaming: PCM first‑chunk timing and multi‑format correctness. + +Performance +- Synthetic end‑to‑end voice‑to‑voice harness; compute p50/p90, store summaries. +- Optional diarization on recorded sessions (pyannote) for verification (local opt‑in). + - Negative‑path: slow reader/underrun, disconnects mid‑stream, silent/high‑noise input, malformed WS frames. + +## Rollout Plan + +Phase 1 (default on via flags) +- Ship VAD turn detection, latency metrics, PCM format, phoneme map hooks. + +Phase 2 (opt‑in) +- Structured JSON streaming; WS TTS behind feature flags. + +Phase 3 (optional) +- WebRTC egress (aiortc) behind feature flag and environment readiness guide. + +Documentation +- Update API docs, WebUI help, and latency tuning guidelines. + +## Risks & Mitigations + +- VAD misfires cause premature finals → conservative defaults; tunables; quick rollback. +- PCM clients mishandle raw streams → clear examples; fall back to MP3/Opus. +- Over‑instrumentation overhead → light timers; sampling; config‑gated metrics. + +## Open Questions + +- Default to structured JSON streaming for voice chat, or keep opt‑in per request/model? +- Preferred UI channel for code/links (reuse existing WS vs. SSE)? +- Region/affinity hints for distributed/self‑host deployments? + +## Out of Scope + +- New LLM providers and unrelated RAG changes. +- Browser TURN/STUN provisioning; full WebRTC infra (unless Phase 3 explicitly enabled). + +## Acceptance Criteria + +- [ ] p50 voice‑to‑voice ≤ 1.0s on a local reference setup; p90 ≤ 1.8s. +- [ ] p50 STT final latency ≤ 600ms; p50 TTS TTFB ≤ 250ms (reference setup). +- [ ] PCM streaming option documented and validated with example clients. +- [ ] Optional phoneme map configurable and applied in Kokoro path. +- [ ] Structured streaming mode available and tested end‑to‑end. +- [ ] Metrics exported and visible in existing registry with labels. + - [ ] No regressions in quotas/auth for audio endpoints; REST streaming remains backwards‑compatible. diff --git a/Docs/Product/Test-Gates-Implementation.md b/Docs/Product/Test-Gates-Implementation.md new file mode 100644 index 000000000..b4d62c190 --- /dev/null +++ b/Docs/Product/Test-Gates-Implementation.md @@ -0,0 +1,222 @@ +# Test Gates Implementation + +Purpose: establish a pragmatic, long‑term approach to keep unit tests fast and deterministic by lazily initializing heavy subsystems and gating their route imports. This prevents timeouts/hangs caused by import‑time side effects (e.g., connection pools, background threads) while preserving full functionality for opt‑in integration suites and production. + +## Summary + +- Make heavy subsystems lazy: no connections/threads at import time. +- Gate heavy routers behind environment/config toggles and import inside those gates. +- Default tests to a minimal app profile; provide opt‑in markers/env for heavy suites. +- Harmonize `TEST_MODE` semantics and use small pool sizes under tests. + +Targets (initial): +- Evaluations (connection pool + webhook manager) +- Jobs/metrics workers that start at app startup +- Any router with heavy import‑time work (e.g., OCR/VLM only if needed) + +## Goals & Non‑Goals + +Goals +- Fast unit tests by default (< a few seconds per file) without rewriting tests. +- Deterministic startup/teardown in TestClient. +- Simple, explicit switches to run heavy integration suites locally and in CI. + +Non‑Goals +- Changing production behavior when routes are enabled. +- Removing features; this is about initialization timing and control. + +## Design Overview + +1) Lazy singletons for heavy managers +- Replace module‑level globals with getters that construct on first use. +- Example (Evaluations): + - Before: `connection_manager = EvaluationsConnectionManager()` at import. + - After: `@lru_cache(maxsize=1) def get_connection_manager(...): return EvaluationsConnectionManager(...)`. + - Update helpers: `get_connection() -> get_connection_manager().get_connection()`. +- Provide `shutdown_*_if_initialized()` helpers that no‑op if never created. + +2) Route import gating (main app) +- Import heavy routers only inside `route_enabled("…")` gates, right before `include_router`. +- Use existing route policy from config/env (`API-Routes` in `config.txt`, `ROUTES_DISABLE`, `ROUTES_ENABLE`). +- Effect: if a route is disabled, its module is not imported and cannot trigger heavy work. + +- Precedence: `enable` overrides `disable`; `disable` overrides defaults; `enable` overrides `stable_only`. + - During tests, certain routes are force‑enabled to avoid 404s (workflows, sandbox, scheduler, mcp‑unified, mcp‑catalogs, jobs, personalization). + +3) Minimal test profile by default +- In tests, set `MINIMAL_TEST_APP=1` and extend `ROUTES_DISABLE` to include heavy keys (e.g., `evaluations`) unless explicitly opted‑in. +- Provide pytest marker/fixture to enable heavy routes for specific tests/suites. + +4) TEST_MODE normalization and pool sizing +- Normalize truthiness across `TEST_MODE` and `TLDW_TEST_MODE` to {"1","true","yes","y","on"}. +- Under tests, use small pool sizes/timeouts to reduce overhead (e.g., pool_size=1, max_overflow=2, timeout=5) for subsystems like Evaluations. + +## Environment & Config Toggles + +Config file: `tldw_Server_API/Config_Files/config.txt` section `[API-Routes]` +- `stable_only = true|false` (default is false when config is loaded; if config cannot be read, a conservative default of true is used). +- `disable = a,b,c` +- `enable = x,y,z` +- `experimental_routes = k1,k2` + +Environment variables (precedence > config.txt): +- `ROUTES_STABLE_ONLY` — same as `stable_only`. +- `ROUTES_DISABLE` — comma/space list of route keys to disable. +- `ROUTES_ENABLE` — comma/space list of route keys to force‑enable. +- `ROUTES_EXPERIMENTAL` — extend experimental list (affects `stable_only`). +- `MINIMAL_TEST_APP` — enables minimal test app profile (fast startup; selective routers). +- `ULTRA_MINIMAL_APP` — health‑only profile (diagnostics). +- `TEST_MODE` / `TLDW_TEST_MODE` — unified test flags; treat truthy values as {1,true,yes,y,on}. +- `RUN_EVALUATIONS` — opt‑in heavy Evaluations routes for tests/CI. + +- `DISABLE_HEAVY_STARTUP` — force synchronous startup (disable deferral of heavy work). +- `DEFER_HEAVY_STARTUP` — defer heavy/non‑critical startup tasks to background. +- Jobs/metrics worker toggles to avoid starting background workers in tests/CI: + - `AUDIO_JOBS_WORKER_ENABLED`, `JOBS_WEBHOOKS_ENABLED`, `JOBS_WEBHOOKS_URL`, `JOBS_METRICS_GAUGES_ENABLED`, `JOBS_METRICS_RECONCILE_ENABLE`, `JOBS_CRYPTO_ROTATE_SERVICE_ENABLED`. + +Notes: +- Route keys are lowercase and comma/space separated; both `-` and `_` are commonly used. + +Recommended test defaults: +- `MINIMAL_TEST_APP=1` +- `ROUTES_DISABLE=research,evaluations` (extend existing value without clobbering) +- `TEST_MODE=1` + +Opt‑in heavy suite: +- Set `RUN_EVALUATIONS=1` (fixture or job env) and remove `evaluations` from `ROUTES_DISABLE`. + +## Implementation Plan (Stages) + +Stage 1 — Design & Staging +- Add this doc and an IMPLEMENTATION_PLAN.md (optional) summarizing stages and success criteria. + +Stage 2 — Lazy Singletons (Evaluations/Webhooks) +- File: `tldw_Server_API/app/core/Evaluations/connection_pool.py` + - Replace global `connection_manager` with `get_connection_manager()` (lru_cache). + - Update `get_connection()` / `get_connection_async()` to call the getter. + - Add `shutdown_evaluations_pool_if_initialized()`. +- File: `tldw_Server_API/app/core/Evaluations/webhook_manager.py` + - Provide `get_webhook_manager()` that constructs on first use. + - Ensure schema init runs only when manager is first used. +- File: `tldw_Server_API/app/main.py` + - Use shutdown helper; stop accessing module globals directly. + +Stage 3 — Gate Heavy Router Imports +- File: `tldw_Server_API/app/main.py` + - Move heavy router imports inside `if route_enabled("…"):` blocks. + - Include only when enabled; otherwise avoid importing the module at all. + +Stage 4 — Default Minimal Test Profile +- File: `tldw_Server_API/tests/conftest.py` + - `os.environ.setdefault("MINIMAL_TEST_APP", "1")`. + - Extend `ROUTES_DISABLE` to include `evaluations` unless `RUN_EVALUATIONS=1`. +- Add pytest marker `evaluations`; a session fixture toggles env accordingly for marked tests. + +Stage 5 — TEST_MODE & Pool Sizing Harmonization +- File: `tldw_Server_API/app/api/v1/API_Deps/rate_limiting.py` and related deps + - Accept truthy `TEST_MODE` / `TLDW_TEST_MODE` variants. +- File: `tldw_Server_API/app/core/Evaluations/connection_pool.py` + - Use small pool sizes when `TEST_MODE` is truthy. + +- Add shared helper `is_test_mode()` for consistent detection across modules (checks both envs; truthy set {1,true,yes,y,on}). + +Stage 6 — Docs & CI +- Update project docs (this file + Development doc): usage of toggles and patterns. +- CI: default unit job uses minimal profile; nightly/weekly job sets `RUN_EVALUATIONS=1`. + +## File/Code Pointers (initial) + +- Route gating helpers: `tldw_Server_API/app/core/config.py` (route policy functions) +- App route inclusion: `tldw_Server_API/app/main.py` (import + include_router strategy) +- Evaluations connection pool: `tldw_Server_API/app/core/Evaluations/connection_pool.py` +- Evaluations webhook manager: `tldw_Server_API/app/core/Evaluations/webhook_manager.py` +- Test client setup: `tldw_Server_API/tests/conftest.py` + +## Testing Strategy + +Unit tests (default minimal profile) +- Ensure startup is fast and no heavy connections are created when routes disabled. +- Verify `get_connection_manager()` lazily constructs and returns a singleton. +- Verify rate limiting bypass respects all truthy `TEST_MODE`/`TLDW_TEST_MODE` forms. + +Opt‑in integration tests (`-m evaluations` or `RUN_EVALUATIONS=1`) +- Confirm `/api/v1/evaluations/*` routes are present and functional. +- Assert pools and background workers start/stop cleanly. + +Regression checks +- With `ROUTES_DISABLE=evaluations`, importing `main.py` must not create Evaluations connections. +- Shutdown helpers must not error if never initialized. + +## CI Guidance + +- Unit job (default) +- Env: `MINIMAL_TEST_APP=1`, `TEST_MODE=1`, `ROUTES_DISABLE=research,evaluations` (merge with any existing value). +- Run standard markers: `-m "not evaluations and not jobs"`. + +Evaluations job (opt‑in) +- Env: `RUN_EVALUATIONS=1`, `MINIMAL_TEST_APP=0` or remove `evaluations` from `ROUTES_DISABLE`. +- Run markers: `-m evaluations`. + +Jobs/other heavy suites (optional) +- Maintain separate CI jobs with explicit env toggles, mirroring the pattern above. + +## Backward Compatibility & Migration + +- If any code imports Evaluations globals directly (e.g., `from …connection_pool import connection_manager`), add a temporary alias: + - Define a module‑level property that returns `get_connection_manager()` and log a deprecation warning. +- Prefer dependency‑injection or accessor functions (`get_…()`) over importing singletons. + +## Risks & Mitigations + +- Hidden heavy imports remain elsewhere + - Mitigation: search for module‑level instantiation patterns; convert to lazy as needed. +- Shutdown ordering issues in tests + - Mitigation: centralize shutdown via helpers and app lifespan; add session‑level teardown fixtures. + +## Operational Notes + +- This approach does not change production behavior when routes are enabled. +- When debugging, you can temporarily disable lazy gating by enabling the routes to compare startup behavior. + +## Quick Verification + +1) Disable evaluations and run a single test +``` +export MINIMAL_TEST_APP=1 +export ROUTES_DISABLE="${ROUTES_DISABLE},evaluations" +export TEST_MODE=1 +pytest -q tldw_Server_API/tests/WebScraping/test_webscraping_usage_events.py::test_webscrape_process_usage_event +``` +Expect: fast startup, no Evaluations pool logs, test completes quickly. + +2) Enable evaluations for integration run +``` +export RUN_EVALUATIONS=1 +unset MINIMAL_TEST_APP +ROUTES_DISABLE="$(echo "$ROUTES_DISABLE" | tr ',' '\n' | awk 'tolower($0)!="evaluations" && $0!=""' | paste -sd, -)" +export TEST_MODE=1 +pytest -m evaluations -q +``` +Expect: evaluations routes loaded; pools created; graceful shutdown. + +Note: Some routes are force-enabled during tests by `route_enabled()` (workflows, sandbox, scheduler, mcp-unified, mcp-catalogs, jobs, personalization), independent of `ROUTES_DISABLE`. This avoids 404s in common test paths. + +Examples +- Lazy getter with shutdown helper: + - `from functools import lru_cache` + - `@lru_cache(maxsize=1)` + - `def get_connection_manager(): return EvaluationsConnectionManager(...)` + - `def shutdown_evaluations_pool_if_initialized():` call `get_connection_manager().shutdown()` then `get_connection_manager.cache_clear()` if instantiated. +- Import-within-gate pattern (in `main.py`): + - `if route_enabled("evaluations"):` then import and `app.include_router(...)`; otherwise log disabled. + +- Shutdown helpers in `main.py` lifespan teardown (after app subsystems): + - `from tldw_Server_API.app.core.Evaluations.connection_pool import shutdown_evaluations_pool_if_initialized` + - `from tldw_Server_API.app.core.Evaluations.webhook_manager import shutdown_webhook_manager_if_initialized` + - Call both in shutdown; helpers are no‑ops if never initialized. + +Contributor checklist for heavy modules +- No import-time threads/connections or background tasks. +- Provide a lazy `get_...()` accessor and a `shutdown_..._if_initialized()` helper. +- Register a route key in `[API-Routes]` and honor `ROUTES_DISABLE`/`ROUTES_ENABLE`. +- If tests are heavy, add a pytest marker and CI skip by default. diff --git a/Docs/Product/Watch_IMPLEMENTATION_PLAN.md b/Docs/Product/Watch_IMPLEMENTATION_PLAN.md new file mode 100644 index 000000000..98ea1d2c6 --- /dev/null +++ b/Docs/Product/Watch_IMPLEMENTATION_PLAN.md @@ -0,0 +1,131 @@ +# Watchlists v1 - Implementation Plan (Bridge PRD) + +This plan tracks the remaining work to wrap Watchlists v1 per the Bridge PRD. Each stage lists goals, success criteria, and concrete test points. Update Status as work progresses. + +## Current Status (snapshot) +- Core endpoints and WebUI implemented (filters CRUD, include-only gating, OPML import/export with group filter, preview, global runs, CSV exports). +- Tests added for CSV exports, OPML large/tag cases, global runs pagination/isolation, preview, YouTube normalization edges, and rate-limit headers (strict mode). +- Docs updated (API: runs/tallies/OPML examples/gating table; Product PRD; Ops runbook). 410 shim for legacy Subscriptions is live. + +## Remaining To-Do (v1 sign-off) +- Verify “Runs” role gating against the real user object in your auth setup; otherwise rely on env toggles (`NEXT_PUBLIC_RUNS_REQUIRE_ADMIN`). +- Optional: widen YouTube normalization edge tests (keep policy of 400 for handles/vanity). +- Optional: add include_tallies aggregation mode to global runs CSV if admins need it. +- Optional: deterministic rate-limit header assertions under a non-test configuration for OPML import and filters endpoints. + +## Stage 1: QA, Deprecations, and Docs Finalization +**Goal**: Ship Phase B wrap-up with hardened inputs, finalized docs, and visible metrics. + +**Success Criteria** +- API docs include `GET /api/v1/watchlists/runs`, `include_tallies` for Run Detail, and OPML export `group` filter. +- Deprecation path finalized: all `/api/v1/subscriptions/*` return 410 with Link header and docs + release notes updated. +- YouTube normalization hardened (handles/vanity accepted → canonical; normalization headers logged in diagnostics). +- Admin Runs view shows per-run counters and supports CSV/JSON export. + +**Tests** +- OPML export filtering: group, group+tag, type interactions. + - tldw_Server_API/tests/Watchlists/test_opml_export_group.py +- YouTube normalization: create/update/bulk non-canonical inputs → normalized URL + headers. + - tldw_Server_API/tests/Watchlists/test_youtube_normalization_more.py +- Run Detail tallies toggle returns `filter_tallies` when `include_tallies=true` and totals always present. + - tldw_Server_API/tests/Watchlists/test_run_detail_filters_totals.py +- Optional: rate-limit headers present under non-test mode for OPML import and filters endpoints. + - tldw_Server_API/tests/Watchlists/test_rate_limit_headers_optional.py + +**Status**: Completed + +--- + +## Stage 2: Migration Tooling (Subscriptions → Watchlists) +**Goal**: Provide an easy migration path from legacy Subscriptions to Watchlists. + +**Success Criteria** +- CLI/import helper exports legacy Subscriptions as OPML + JSON filters and creates mapped Watchlists sources/jobs with filters. +- Dry-run mode prints planned changes without writing. +- Playbook doc (mapping table and fallbacks) linked from README/Docs. + +**Tests** +- Unit: mapping from legacy fields → `{source, job, filters}` payloads (edge cases, unknown fields). + - Helper_Scripts/tests/test_subscriptions_mapping.py +- Integration: sample legacy export → import → verify created sources/jobs/filters; dry-run yields no DB writes. + - tldw_Server_API/tests/Watchlists/test_migration_import_cli.py + +**Status**: Not Required (Subscriptions never shipped to prod; use OPML import) + +--- + +## Stage 3: v1 UX Enhancements +**Goal**: Improve usability with preview/dry-run, richer filter editing, and stronger runs browsing. + +**Success Criteria** +- Preview/dry-run endpoint (no ingestion) returns candidate items with matched filter metadata. + - `POST /api/v1/watchlists/jobs/{id}/preview?limit=…` (or equivalent) returns items + reason (filter id/type/action). +- Filters editor supports reorder, enable/disable, presets, and advanced JSON textarea. +- Runs UI: global runs search/pagination, per-job pagination, tallies toggle, download log, link to items scoped by run. + +**Tests** +- API: preview returns candidates and `matched_filter` indications; respects include-only gating. + - tldw_Server_API/tests/Watchlists/test_preview_endpoint.py +- UI (lightweight): validate presence of editor controls and basic input constraints (IDs numeric, non-negative). + - tldw-frontend/tests/watchlists_ui_smoke.test.ts + +**Status**: Completed + +--- + +## Stage 4: Output & Delivery Expansions +**Goal**: Polish template authoring and wire delivery channels (email, Chatbook), with optional audio briefs. + +**Success Criteria** +- Templates: CRUD with name/description/version; selectable per job; version history retained. +- Delivery: email and Chatbook paths configurable per job (subject/body, conversation target), with success/failure surfaced in run outputs. +- Optional: audio brief via TTS for small result sets. + +**Tests** +- Unit: template rendering with variables and version selection. + - tldw_Server_API/tests/Watchlists/test_templates_rendering.py +- Integration: email + Chatbook delivery using mocks; run artifacts record delivery status and IDs. + - tldw_Server_API/tests/Watchlists/test_delivery_integrations.py +- Optional: TTS brief generated and attached when item count below threshold. + - tldw_Server_API/tests/Watchlists/test_tts_brief_optional.py + +**Status**: Partially Completed (templates/output delivery paths exist; advanced authoring/versioning and optional TTS are future work) + +--- + +## Stage 5: Scale & Reliability +**Goal**: Improve scheduling controls, dedup/seen visibility, and performance at higher scale. + +**Success Criteria** +- Scheduler UX: concurrency, per-host delay, backoff controls; show next/last run per job. +- Dedup/seen: expose counts and reset tools per source; admin tooling to inspect/clear. +- Performance: validated on large filter sets, many sources, and long OPML imports; document limits and recommended settings. + +**Tests** +- Scheduling: concurrency/backoff honored; next/last timestamps updated correctly. + - tldw_Server_API/tests/Watchlists/test_scheduler_controls.py +- Dedup/seen: counts accurate; reset clears state safely; no duplicate ingestion after reset. + - tldw_Server_API/tests/Watchlists/test_dedup_seen_tools.py +- Performance (sanity): marked `perf` scenarios for large inputs complete within budget. + - tldw_Server_API/tests/Watchlists/test_perf_scenarios.py +- Rate-limit headers deterministic under non-test mode with configured backend. + - tldw_Server_API/tests/Watchlists/test_rate_limit_headers_strict.py + +**Status**: In Progress (scheduler/dedup tooling are broader platform items) + +--- + +## Notes +- Include-only gating: default can be set per-org (and via env); tests should cover both job-flag and org-default paths. +- Keep tests deterministic; mock external services (feeds, email, Chatbook, TTS). Mark performance tests with `@pytest.mark.perf`. +- Update Docs/Published/API-related/Watchlists_API.md and Docs/Published/RELEASE_NOTES.md alongside code changes. + +Checklist (quick) +- [x] CSV export tests (global/by-job + tallies; headers/rows) +- [x] OPML export tests (multi-group OR + tag AND; large set; tag case-insensitivity) +- [x] Global runs API tests (q search, pagination boundaries, user isolation) +- [x] Docs polish (gating table, OPML examples, regex flags note, Admin Items/CSV) +- [x] Preview endpoint tests (RSS + site; include-only on/off) +- [x] Rate-limit headers strict test (non-test mode via monkeypatch) +- [ ] Verify Runs role gating against real user object (or disable via env) +- [ ] Optional: CSV include_tallies aggregation mode (API + UI) diff --git a/Docs/Providers/Model_Pricing_Catalog.md b/Docs/Providers/Model_Pricing_Catalog.md new file mode 100644 index 000000000..6b939f6f8 --- /dev/null +++ b/Docs/Providers/Model_Pricing_Catalog.md @@ -0,0 +1,117 @@ +# Model Pricing Catalog (Primary Model Source) + +The pricing catalog at `tldw_Server_API/Config_Files/model_pricing.json` is the primary reference for +listing available commercial LLM models across the API and WebUI. Entries here both: + +- Define per‑1K token pricing for usage tracking (prompt/completion in USD), and +- Seed the available models list returned by `GET /api/v1/llm/providers` (merged with any models in `config.txt`). + +When you add a model to this file (or to the `PRICING_OVERRIDES` env var), it becomes selectable in the WebUI +and available to the Chat API as `provider/model`. + +## How It Works + +- Source order: + 1. `PRICING_OVERRIDES` (JSON in env) + 2. `Config_Files/model_pricing.json` + 3. Built‑in conservative defaults +- Admin reload (no restart): `POST /api/v1/admin/llm-usage/pricing/reload` +- Providers API: `GET /api/v1/llm/providers` includes models from the pricing catalog for commercial providers. +- Embedding model IDs are intentionally filtered out from the Chat model lists. + +## Editing the Catalog + +- Format: JSON object by provider, then model id → `{prompt: number, completion: number}` (USD per 1K tokens). +- Example (OpenAI text models only): + +``` +{ + "openai": { + "gpt-4o": { "prompt": 0.005, "completion": 0.015 }, + "gpt-4o-mini": { "prompt": 0.001, "completion": 0.002 }, + "gpt-4.1": { "prompt": 0.010, "completion": 0.030 }, + "o3-mini": { "prompt": 0.001, "completion": 0.002 } + } +} +``` + +Tip: Keep values conservative if you’re unsure, then update with exact rates from provider pricing pages. + +## Provider Quick Links + +- Anthropic: https://docs.claude.com/en/docs/about-claude/models/overview +- OpenAI (text models): https://platform.openai.com/docs/pricing +- Z.ai: https://docs.z.ai/guides/overview/pricing +- Moonshot (Kimi): https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2 +- Cohere: https://docs.cohere.com/docs/models +- Minimax: https://platform.minimax.io/docs/guides/pricing + +## Example Snippets by Provider + +These examples illustrate the expected shape. Replace with current values from the linked pages above. + +Anthropic (Claude 4.5/4.1 family): +``` +{ + "anthropic": { + "claude-opus-4.1": { "prompt": 0.015, "completion": 0.075 }, + "claude-sonnet-4.5": { "prompt": 0.003, "completion": 0.015 }, + "claude-haiku-4.5": { "prompt": 0.001, "completion": 0.005 } + } +} +``` + +OpenAI (text models only – do not include embeddings here): +``` +{ + "openai": { + "gpt-4o": { "prompt": 0.005, "completion": 0.015 }, + "gpt-4o-mini": { "prompt": 0.001, "completion": 0.002 }, + "gpt-4.1": { "prompt": 0.010, "completion": 0.030 }, + "o3-mini": { "prompt": 0.001, "completion": 0.002 } + } +} +``` + +Z.ai: +``` +{ + "zai": { + "": { "prompt": 0.000, "completion": 0.000 } + } +} +``` + +Moonshot (Kimi): +``` +{ + "moonshot": { + "kimi-k2": { "prompt": 0.000, "completion": 0.000 } + } +} +``` + +Cohere (Command family): +``` +{ + "cohere": { + "command": { "prompt": 0.0005, "completion": 0.0012 }, + "command-r": { "prompt": 0.0015, "completion": 0.0030 } + } +} +``` + +Minimax: +``` +{ + "minimax": { + "": { "prompt": 0.000, "completion": 0.000 } + } +} +``` + +## Validation & Troubleshooting + +- After editing, call: `POST /api/v1/admin/llm-usage/pricing/reload`. +- Verify in WebUI → Providers tab, or via `GET /api/v1/llm/providers`. +- If a model appears only in `config.txt`, it’s listed but costs may be “estimated.” Add it here for exact rates. diff --git a/Docs/Published/API-related/Chat_API_Documentation.md b/Docs/Published/API-related/Chat_API_Documentation.md index d23c839b2..715f33fd7 100644 --- a/Docs/Published/API-related/Chat_API_Documentation.md +++ b/Docs/Published/API-related/Chat_API_Documentation.md @@ -17,7 +17,7 @@ Follows OpenAI-style chat payload with extensions. Key fields: -- `model` (string): Target model. May be prefixed as `provider/model` (e.g., `anthropic/claude-3-5-sonnet`). +- `model` (string): Target model. May be prefixed as `provider/model` (e.g., `anthropic/claude-sonnet-4.5`). - `messages` (array): Conversation turns. Supports roles `system`, `user`, `assistant`, `tool`. - User message `content` may be a string or a list of parts: text and base64 data URI `image_url`. - `stream` (bool): If true, returns Server-Sent Events (SSE) for streaming. @@ -45,7 +45,7 @@ curl -N -X POST http://127.0.0.1:8000/api/v1/chat/completions \ -H "Content-Type: application/json" \ -H "X-API-KEY: $API_KEY" \ -d '{ - "model": "anthropic/claude-3-5-sonnet", + "model": "anthropic/claude-sonnet-4.5", "messages": [{"role":"user","content":"Stream this response."}], "stream": true }' diff --git a/Docs/Published/API-related/Evaluations_API_Reference.md b/Docs/Published/API-related/Evaluations_API_Reference.md index 929a232fa..1fa3d0d54 100644 --- a/Docs/Published/API-related/Evaluations_API_Reference.md +++ b/Docs/Published/API-related/Evaluations_API_Reference.md @@ -925,7 +925,7 @@ openai_model = gpt-4 # Anthropic anthropic_api_key = sk-ant-... -anthropic_model = claude-3-sonnet-20240229 +anthropic_model = claude-sonnet-4.5 # Google google_api_key = ... diff --git a/Docs/Published/Code_Documentation/Chat_Developer_Guide.md b/Docs/Published/Code_Documentation/Chat_Developer_Guide.md index 1091a590d..0cdebe94d 100644 --- a/Docs/Published/Code_Documentation/Chat_Developer_Guide.md +++ b/Docs/Published/Code_Documentation/Chat_Developer_Guide.md @@ -64,7 +64,7 @@ Related: - At app startup, `main.py` seeds the `provider_manager` from `provider_config.API_CALL_HANDLERS` for health/fallback. Provider selection notes: -- Requests may specify models with a provider prefix (e.g., `anthropic/claude-3-opus`). The endpoint extracts the provider and model automatically. +- Requests may specify models with a provider prefix (e.g., `anthropic/claude-opus-4.1`). The endpoint extracts the provider and model automatically. - Provider fallback is available via `provider_manager`; controlled by `[Chat-Module].enable_provider_fallback` (disabled by default for stability). ### Adding a Provider (Checklist) @@ -85,7 +85,7 @@ Provider selection notes: - `logprobs/top_logprobs` relationships - Tool definitions size limits - Request size limits (`MAX_REQUEST_SIZE`), see `chat_validators.py` - - Model strings with provider prefixes like `anthropic/claude-3-opus` (provider extracted automatically) + - Model strings with provider prefixes like `anthropic/claude-opus-4.1` (provider extracted automatically) - Image inputs on user messages via `image_url` content parts (expects data URI with base64; validated/sanitized) ## Error Handling diff --git a/Docs/Published/Deployment/First_Time_Production_Setup.md b/Docs/Published/Deployment/First_Time_Production_Setup.md index 90d2bed3f..2f4defdf9 100644 --- a/Docs/Published/Deployment/First_Time_Production_Setup.md +++ b/Docs/Published/Deployment/First_Time_Production_Setup.md @@ -52,7 +52,7 @@ cp .env.example .env # Required values (examples) export AUTH_MODE=multi_user export JWT_SECRET_KEY="$(openssl rand -base64 64)" -export DATABASE_URL="postgresql://tldw_user:ChangeMeStrong123!@postgres:5432/tldw_users" +export DATABASE_URL="postgresql://tldw_user:TestPassword123!@postgres:5432/tldw_users" # Strong single-user key if you use single_user mode instead export SINGLE_USER_API_KEY="$(python -c "import secrets;print(secrets.token_urlsafe(32))")" diff --git a/Docs/Published/Deployment/Reverse_Proxy_Examples.md b/Docs/Published/Deployment/Reverse_Proxy_Examples.md index 20f71fa7a..662fb6eb0 100644 --- a/Docs/Published/Deployment/Reverse_Proxy_Examples.md +++ b/Docs/Published/Deployment/Reverse_Proxy_Examples.md @@ -200,6 +200,13 @@ export ALLOWED_ORIGINS='["https://your.domain.com", "https://admin.your.domain.c This overrides the default origins configured in `tldw_Server_API/app/core/config.py`. +Browser extensions (streaming) +- If a browser extension needs to call the API (including `text/event-stream` for SSE), add the extension origin to `ALLOWED_ORIGINS`: + ```bash + export ALLOWED_ORIGINS='["https://your.domain.com", "chrome-extension://abcd1234efgh5678"]' + ``` + The server exposes `X-Request-ID`, `traceparent`, and `X-Trace-Id` headers for correlation; these are made available to the browser via CORS `expose_headers`. + ## Security reminders - Run the app as non-root (Dockerfile.prod already does this). - Don’t log secrets in production; the app masks the single-user API key when `tldw_production=true`. diff --git a/Docs/Published/Env_Vars.md b/Docs/Published/Env_Vars.md index f33837386..e9a5108af 100644 --- a/Docs/Published/Env_Vars.md +++ b/Docs/Published/Env_Vars.md @@ -39,6 +39,13 @@ WebUI Access Guard (remote access controls) - `TLDW_WEBUI_DENYLIST`: Comma-separated IPs/CIDRs denied from `/webui`. - `TLDW_TRUSTED_PROXIES`: Comma-separated proxy IPs/CIDRs trusted for X-Forwarded-For/X-Real-IP. +WebUI CSP (Content Security Policy) +- `TLDW_WEBUI_NO_EVAL`: When set, controls whether `'unsafe-eval'` is allowed for `/webui` scripts. + - Precedence: if present, its truthiness decides the policy; otherwise a production-aware default applies. + - Truthy values (case-insensitive): `1`, `true`, `yes`, `on`, `y` → DISABLE eval (no `'unsafe-eval'`). + - Falsy values (e.g., `0`, `false`) → ENABLE eval. + - If unset: default is `False` (no eval) in production (`ENVIRONMENT|APP_ENV|ENV in {prod, production}`), and `True` (allow eval) in non-production. + ## AuthNZ (Authentication) - `AUTH_MODE`: `single_user` | `multi_user`. - `DATABASE_URL`: AuthNZ database URL. For production multi-user, use Postgres. diff --git a/Docs/Published/Overview/Feature_Status.md b/Docs/Published/Overview/Feature_Status.md new file mode 100644 index 000000000..d90e1747e --- /dev/null +++ b/Docs/Published/Overview/Feature_Status.md @@ -0,0 +1,139 @@ +# Feature Status Matrix + +Legend +- Working: Stable and actively supported +- WIP: In active development; APIs or behavior may evolve +- Experimental: Available behind flags or with caveats; subject to change + +## Admin Reporting +- HTTP usage (daily): `GET /api/v1/admin/usage/daily` +- HTTP top users: `GET /api/v1/admin/usage/top` +- LLM usage log: `GET /api/v1/admin/llm-usage` +- LLM usage summary: `GET /api/v1/admin/llm-usage/summary` (group_by=`user|provider|model|operation|day`) +- LLM top spenders: `GET /api/v1/admin/llm-usage/top-spenders` +- LLM CSV export: `GET /api/v1/admin/llm-usage/export.csv` +- Grafana dashboard JSON (LLM cost + tokens): `Docs/Deployment/Monitoring/Grafana_LLM_Cost_Top_Providers.json` +- Grafana dashboard JSON (LLM Daily Spend): `Docs/Deployment/Monitoring/Grafana_LLM_Daily_Spend.json` +- Prometheus alert rules (daily spend thresholds): `Samples/Prometheus/alerts.yml` + +## Media Ingestion + +| Capability | Status | Notes | Links | +|---|---|---|---| +| URLs/files: video, audio, PDFs, EPUB, DOCX, HTML, Markdown, XML, MediaWiki | Working | Unified ingestion + metadata | [docs](Docs/Code_Documentation/Ingestion_Media_Processing.md) · [code](tldw_Server_API/app/api/v1/endpoints/media.py) | +| yt-dlp downloads + ffmpeg | Working | 1000+ sites via yt-dlp | [code](tldw_Server_API/app/core/Ingestion_Media_Processing/Video/Video_DL_Ingestion_Lib.py) | +| Adaptive/multi-level chunking | Working | Configurable size/overlap | [docs](Docs/API-related/Chunking_Templates_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/chunking.py) | +| OCR on PDFs/images | Working | Tesseract baseline; optional dots.ocr/POINTS | [docs](Docs/API-related/OCR_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/ocr.py) | +| MediaWiki import | Working | Config via YAML | [docs](Docs/Code_Documentation/Ingestion_Pipeline_MediaWiki.md) · [config](tldw_Server_API/Config_Files/mediawiki_import_config.yaml) | +| Browser extension capture | WIP | Web capture extension | [docs](Docs/Product/Content_Collections_PRD.md) | + +## Audio (STT/TTS) + +| Capability | Status | Notes | Links | +|---|---|---|---| +| File-based transcription | Working | faster_whisper, NeMo, Qwen2Audio | [docs](Docs/API-related/Audio_Transcription_API.md) · [code](tldw_Server_API/app/api/v1/endpoints/audio.py) | +| Real-time WS transcription | Working | `WS /api/v1/audio/stream/transcribe` | [docs](Docs/API-related/Audio_Transcription_API.md) · [code](tldw_Server_API/app/api/v1/endpoints/audio.py) | +| Diarization + VAD | Working | Optional diarization, timestamps | [docs](Docs/Code_Documentation/Ingestion_Pipeline_Audio.md) · [code](tldw_Server_API/app/api/v1/endpoints/audio.py) | +| TTS (OpenAI-compatible) | Working | Streaming + non-streaming | [docs](tldw_Server_API/app/core/TTS/TTS-README.md) · [code](tldw_Server_API/app/api/v1/endpoints/audio.py) | +| Voice catalog + management | Working | `GET /api/v1/audio/voices/catalog` | [docs](tldw_Server_API/app/core/TTS/README.md) · [code](tldw_Server_API/app/api/v1/endpoints/audio.py) | +| Audio jobs queue | Working | Background audio processing | [docs](Docs/API-related/Audio_Jobs_API.md) · [code](tldw_Server_API/app/api/v1/endpoints/audio_jobs.py) | + +## RAG & Search + +| Capability | Status | Notes | Links | +|---|---|---|---| +| Full-text search (FTS5) | Working | Fast local search | [docs](Docs/API-related/RAG-API-Guide.md) · [code](tldw_Server_API/app/api/v1/endpoints/rag_unified.py) | +| Embeddings + ChromaDB | Working | OpenAI-compatible embeddings | [docs](Docs/API-related/Embeddings_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/embeddings_v5_production_enhanced.py) | +| Hybrid BM25 + vector + rerank | Working | Contextual retrieval | [docs](Docs/API-related/RAG-API-Guide.md) · [code](tldw_Server_API/app/api/v1/endpoints/rag_unified.py) | +| Vector Stores (OpenAI-compatible) | Working | Chroma/PG adapters | [docs](Docs/API-related/Vector_Stores_Admin_and_Query.md) · [code](tldw_Server_API/app/api/v1/endpoints/vector_stores_openai.py) | +| Media embeddings ingestion | Working | Create vectors from media | [code](tldw_Server_API/app/api/v1/endpoints/media_embeddings.py) | +| pgvector backend | Experimental | Optional backend | [code](tldw_Server_API/app/core/RAG/rag_service/vector_stores/) | + +## Chat & LLMs + +| Capability | Status | Notes | Links | +|---|---|---|---| +| Chat Completions (OpenAI) | Working | Streaming supported | [docs](Docs/API-related/Chat_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/chat.py) | +| Function calling / tools | Working | Tool schema validation | [docs](Docs/API-related/Chat_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/chat.py) | +| Provider integrations (16+) | Working | Commercial + local | [docs](Docs/API-related/Providers_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/llm_providers.py) | +| Local providers | Working | vLLM, llama.cpp, Ollama, etc. | [docs](tldw_Server_API/app/core/LLM_Calls/README.md) · [code](tldw_Server_API/app/core/LLM_Calls/) | +| Strict OpenAI compat filter | Working | Filter non-standard keys | [docs](tldw_Server_API/app/core/LLM_Calls/README.md) | +| Providers listing | Working | `GET /api/v1/llm/providers` | [docs](Docs/API-related/Providers_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/llm_providers.py) | +| Moderation endpoint | Working | Basic wrappers | [code](tldw_Server_API/app/api/v1/endpoints/moderation.py) | + +## Knowledge, Notes, Prompt Studio + +| Capability | Status | Notes | Links | +|---|---|---|---| +| Notes + tagging | Working | Notebook-style notes | [code](tldw_Server_API/app/api/v1/endpoints/notes.py) | +| Prompt library | Working | Import/export | [code](tldw_Server_API/app/api/v1/endpoints/prompts.py) | +| Prompt Studio: projects/prompts/tests | Working | Test cases + runs | [docs](Docs/API-related/Prompt_Studio_API.md) · [code](tldw_Server_API/app/api/v1/endpoints/prompt_studio_projects.py) | +| Prompt Studio: optimization + WS | Working | Live updates | [docs](Docs/API-related/Prompt_Studio_API.md) · [code](tldw_Server_API/app/api/v1/endpoints/prompt_studio_optimization.py) | +| Character cards & sessions | Working | SillyTavern-compatible | [docs](Docs/API-related/CHARACTER_CHAT_API_DOCUMENTATION.md) · [code](tldw_Server_API/app/api/v1/endpoints/characters_endpoint.py) | +| Chatbooks import/export | Working | Backup/export | [docs](Docs/API-related/Chatbook_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/chatbooks.py) | +| Flashcards | Working | Decks/cards, APKG export | [code](tldw_Server_API/app/api/v1/endpoints/flashcards.py) | +| Reading & highlights | Working | Reading items mgmt | [docs](Docs/Product/Content_Collections_PRD.md) · [code](tldw_Server_API/app/api/v1/endpoints/reading.py) | + +## Evaluations + +| Capability | Status | Notes | Links | +|---|---|---|---| +| G-Eval | Working | Unified eval API | [docs](Docs/API-related/Evaluations_API_Unified_Reference.md) · [code](tldw_Server_API/app/api/v1/endpoints/evaluations_unified.py) | +| RAG evaluation | Working | Pipeline presets + metrics | [docs](Docs/API-related/RAG-API-Guide.md) · [code](tldw_Server_API/app/api/v1/endpoints/evaluations_rag_pipeline.py) | +| OCR evaluation (JSON/PDF) | Working | Text + PDF flows | [docs](Docs/API-related/OCR_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/evaluations_unified.py) | +| Embeddings A/B tests | Working | Provider/model compare | [docs](Docs/API-related/Evaluations_API_Unified_Reference.md) · [code](tldw_Server_API/app/api/v1/endpoints/evaluations_embeddings_abtest.py) | +| Response quality & datasets | Working | Datasets CRUD + runs | [docs](Docs/API-related/Evaluations_API_Unified_Reference.md) · [code](tldw_Server_API/app/api/v1/endpoints/evaluations_unified.py) | + +## Research & Web Scraping + +| Capability | Status | Notes | Links | +|---|---|---|---| +| Web search (multi-provider) | Working | Google, DDG, Brave, Kagi, Tavily, Searx | [code](tldw_Server_API/app/api/v1/endpoints/research.py) | +| Aggregation/final answer | Working | Structured answer + evidence | [code](tldw_Server_API/app/api/v1/endpoints/research.py) | +| Academic paper search | Working | arXiv, BioRxiv/MedRxiv, PubMed/PMC, Semantic Scholar, OSF | [code](tldw_Server_API/app/api/v1/endpoints/paper_search.py) | +| Web scraping service | Working | Status, jobs, progress, cookies | [docs](Docs/Product/Content_Collections_PRD.md) · [code](tldw_Server_API/app/api/v1/endpoints/web_scraping.py) | + +## Connectors (External Sources) + +| Capability | Status | Notes | Links | +|---|---|---|---| +| Google Drive connector | Working | OAuth2, browse/import | [code](tldw_Server_API/app/api/v1/endpoints/connectors.py) | +| Notion connector | Working | OAuth2, nested blocks→Markdown | [code](tldw_Server_API/app/api/v1/endpoints/connectors.py) | +| Connector policy + quotas | Working | Org policy, job quotas | [docs](Docs/Product/Content_Collections_PRD.md) · [code](tldw_Server_API/app/api/v1/endpoints/connectors.py) | + +## MCP Unified + +| Capability | Status | Notes | Links | +|---|---|---|---| +| Tool execution APIs + WS | Working | Production MCP with JWT/RBAC | [docs](Docs/MCP/Unified/Developer_Guide.md) · [code](tldw_Server_API/app/api/v1/endpoints/mcp_unified_endpoint.py) | +| Catalog management | Working | Admin tool/permission catalogs | [docs](Docs/MCP/Unified/Modules.md) · [code](tldw_Server_API/app/api/v1/endpoints/mcp_catalogs_manage.py) | +| Status/metrics endpoints | Working | Health + metrics | [docs](Docs/MCP/Unified/System_Admin_Guide.md) · [code](tldw_Server_API/app/api/v1/endpoints/mcp_unified_endpoint.py) | + +## AuthNZ, Security, Admin/Ops + +| Capability | Status | Notes | Links | +|---|---|---|---| +| Single-user (X-API-KEY) | Working | Simple local deployments | [docs](Docs/API-related/AuthNZ-API-Guide.md) · [code](tldw_Server_API/app/api/v1/endpoints/auth.py) | +| Multi-user JWT + RBAC | Working | Users/roles/permissions | [docs](Docs/API-related/AuthNZ-API-Guide.md) · [code](tldw_Server_API/app/api/v1/endpoints/auth_enhanced.py) | +| API keys manager | Working | Create/rotate/audit | [docs](Docs/API-related/AuthNZ-API-Guide.md) · [code](tldw_Server_API/app/api/v1/endpoints/admin.py) | +| Egress + SSRF guards | Working | Centralized guards | [code](tldw_Server_API/app/api/v1/endpoints/web_scraping.py) | +| Audit logging & alerts | Working | Unified audit + alerts | [docs](Docs/API-related/Audit_Configuration.md) · [code](tldw_Server_API/app/api/v1/endpoints/admin.py) | +| Admin & Ops | Working | Users/orgs/teams, roles/perms, quotas, usage | [docs](Docs/API-related/Admin_Orgs_Teams.md) · [code](tldw_Server_API/app/api/v1/endpoints/admin.py) | +| Monitoring & metrics | Working | Prometheus text + JSON | [docs](Docs/Deployment/Monitoring/README.md) · [code](tldw_Server_API/app/api/v1/endpoints/metrics.py) | + +## Storage, Outputs, Watchlists, Workflows, UI + +| Capability | Status | Notes | Links | +|---|---|---|---| +| SQLite defaults | Working | Local dev/small deployments | [code](tldw_Server_API/app/core/DB_Management/) | +| PostgreSQL (AuthNZ, content) | Working | Postgres content mode | [docs](Docs/Published/Deployment/Postgres_Content_Mode.md) | +| Outputs: templates | Working | Markdown/HTML/MP3 via TTS | [code](tldw_Server_API/app/api/v1/endpoints/outputs_templates.py) | +| Outputs: artifacts | Working | Persist/list/soft-delete/purge | [code](tldw_Server_API/app/api/v1/endpoints/outputs.py) | +| Watchlists: sources/groups/tags | Working | CRUD + bulk import | [docs](Docs/Product/Watchlist_PRD.md) · [code](tldw_Server_API/app/api/v1/endpoints/watchlists.py) | +| Watchlists: jobs & runs | Working | Schedule, run, run details | [docs](Docs/Product/Watchlist_PRD.md) · [code](tldw_Server_API/app/api/v1/endpoints/watchlists.py) | +| Watchlists: templates & OPML | Working | Template store; OPML import/export | [docs](Docs/Product/Watchlist_PRD.md) · [code](tldw_Server_API/app/api/v1/endpoints/watchlists.py) | +| Watchlists: notifications | Experimental | Email/chatbook delivery | [docs](Docs/Product/Watchlist_PRD.md) | +| Workflows engine & scheduler | WIP | Defs CRUD, runs, scheduler | [docs](Docs/Product/Workflows_PRD.md) · [code](tldw_Server_API/app/api/v1/endpoints/workflows.py) | +| VLM backends listing | Experimental | `/api/v1/vlm/backends` | [code](tldw_Server_API/app/api/v1/endpoints/vlm.py) | +| Next.js WebUI | Working | Primary client | [code](tldw-frontend/) | +| Legacy WebUI (/webui) | Working | Feature-frozen legacy | [code](tldw_Server_API/WebUI/) | diff --git a/Docs/Published/User_Guides/Authentication_Setup.md b/Docs/Published/User_Guides/Authentication_Setup.md index b5ad3ad11..9a497abdd 100644 --- a/Docs/Published/User_Guides/Authentication_Setup.md +++ b/Docs/Published/User_Guides/Authentication_Setup.md @@ -124,11 +124,11 @@ Key settings in `.env`: - Configure PostgreSQL via `DATABASE_URL` (examples): - Local: ```bash - export DATABASE_URL=postgresql://tldw_user:ChangeMeStrong123!@localhost:5432/tldw_users + export DATABASE_URL=postgresql://tldw_user:TestPassword123!@localhost:5432/tldw_users ``` - With docker-compose (service name `postgres`): ```bash - export DATABASE_URL=postgresql://tldw_user:ChangeMeStrong123!@postgres:5432/tldw_users + export DATABASE_URL=postgresql://tldw_user:TestPassword123!@postgres:5432/tldw_users ``` - See Multi-User Deployment Guide for more details. @@ -197,14 +197,14 @@ You can configure authentication and the AuthNZ database in `tldw_Server_API/Con [AuthNZ] auth_mode = multi_user # Option A: full URL -database_url = postgresql://tldw_user:ChangeMeStrong123!@localhost:5432/tldw_users +database_url = postgresql://tldw_user:TestPassword123!@localhost:5432/tldw_users # Option B: structured fields (used if DATABASE_URL not set) db_type = postgresql pg_host = localhost pg_port = 5432 pg_db = tldw_users pg_user = tldw_user -pg_password = ChangeMeStrong123! +pg_password = TestPassword123! pg_sslmode = prefer enable_registration = true require_registration_code = false diff --git a/Docs/RAG/RAG_Notes.md b/Docs/RAG/RAG_Notes.md index 8ef1315a0..e1d7f79ae 100644 --- a/Docs/RAG/RAG_Notes.md +++ b/Docs/RAG/RAG_Notes.md @@ -12,6 +12,11 @@ Unsorted https://www.jeremykun.com/2015/04/06/markov-chain-monte-carlo-without-all-the-bullshit/ https://medium.com/ai-exploration-journey/how-hirag-turns-data-chaos-into-structured-knowledge-magic-ai-innovations-and-insights-35-d637b9a58d80 https://arxiv.org/pdf/2506.00054 +https://arxiv.org/abs/2507.05093 +https://arxiv.org/abs/2507.02962 +https://arxiv.org/abs/2507.05713 + + https://huggingface.co/datasets/isaacus/open-australian-legal-corpus https://huggingface.co/blog/adlumal/lightning-fast-vector-search-for-legal-documents https://github.com/hhy-huang/HiRAG diff --git a/Docs/STT-TTS/TTS-SETUP-GUIDE.md b/Docs/STT-TTS/TTS-SETUP-GUIDE.md index 66fe0c63d..75a7bf2c9 100644 --- a/Docs/STT-TTS/TTS-SETUP-GUIDE.md +++ b/Docs/STT-TTS/TTS-SETUP-GUIDE.md @@ -24,6 +24,32 @@ ELEVENLABS_API_KEY=your-api-key-here ## Local Model Providers +### One-Command Installers (Recommended) +Use these helpers from the repo root to install a specific backend in isolation: + +```bash +# Kokoro (v1.0 ONNX + voices) +python Helper_Scripts/TTS_Installers/install_tts_kokoro.py + +# Dia / Higgs / VibeVoice +python Helper_Scripts/TTS_Installers/install_tts_dia.py +python Helper_Scripts/TTS_Installers/install_tts_higgs.py +python Helper_Scripts/TTS_Installers/install_tts_vibevoice.py --variant 1.5B + +# NeuTTS (deps; optional prefetch) +python Helper_Scripts/TTS_Installers/install_tts_neutts.py --prefetch + +# IndexTTS2 (deps + checkpoints folder scaffold) +python Helper_Scripts/TTS_Installers/install_tts_index_tts2.py + +# Chatterbox (deps only) +python Helper_Scripts/TTS_Installers/install_tts_chatterbox.py [--with-lang] +``` + +Flags: +- `TLDW_SETUP_SKIP_PIP=1` to skip pip installs +- `TLDW_SETUP_SKIP_DOWNLOADS=1` to skip HF downloads + ### Model Auto-Download Controls Local providers (Kokoro, Higgs, Dia, Chatterbox, VibeVoice) can auto-download models the first time you use them. You can control this behavior globally or per provider. @@ -79,29 +105,33 @@ Tip (CI/Dev): The test suite sets `TTS_AUTO_DOWNLOAD=0` to avoid network during ### Kokoro Setup -Kokoro is a lightweight, high-quality TTS model that runs locally using ONNX runtime. +Kokoro is a lightweight, high-quality TTS model that runs locally using ONNX Runtime or PyTorch. We recommend the v1.0 ONNX artifacts for most users. #### Installation +Preferred: ```bash -# Install dependencies -pip install onnxruntime kokoro-onnx phonemizer - -# For GPU acceleration (optional) -pip install onnxruntime-gpu +python Helper_Scripts/TTS_Installers/install_tts_kokoro.py +``` +Manual alternative: +```bash +pip install onnxruntime kokoro-onnx phonemizer espeak-phonemizer +# Optional GPU: pip install onnxruntime-gpu +# Install eSpeak NG: brew install espeak-ng | sudo apt-get install -y espeak-ng +# Env var only if needed: export PHONEMIZER_ESPEAK_LIBRARY=/path/to/libespeak-ng ``` -#### Download Models +#### Download Models (v1.0 ONNX) ```bash # Create model directory mkdir -p models/kokoro -# Download ONNX model (Method 1: Using huggingface-cli) +# Use huggingface-cli to fetch the model and voices pip install huggingface-hub -huggingface-cli download kokoro-82m kokoro-v0_19.onnx --local-dir models/kokoro/ +huggingface-cli download onnx-community/Kokoro-82M-v1.0-ONNX-timestamped onnx/model.onnx --local-dir models/kokoro/ +huggingface-cli download onnx-community/Kokoro-82M-v1.0-ONNX-timestamped voices --local-dir models/kokoro/ -# Method 2: Direct download -wget https://huggingface.co/kokoro-82m/resolve/main/kokoro-v0_19.onnx -O models/kokoro/kokoro-v0_19.onnx -wget https://huggingface.co/kokoro-82m/resolve/main/voices.json -O models/kokoro/voices.json +# Optional: choose an alternate ONNX (fp16/quantized) by replacing onnx/model.onnx +# e.g., onnx/model_fp16.onnx or onnx/model_quantized.onnx ``` #### Configuration @@ -110,16 +140,31 @@ wget https://huggingface.co/kokoro-82m/resolve/main/voices.json -O models/kokoro kokoro: enabled: true use_onnx: true - model_path: ./models/kokoro/kokoro-v0_19.onnx - voices_json: ./models/kokoro/voices.json - device: cpu # or cuda for GPU - phonemizer_backend: espeak # requires espeak-ng installed + model_path: ./models/kokoro/onnx/model.onnx + voices_json: ./models/kokoro/voices # path to voices directory for v1.0 ONNX + device: cpu # or cuda for GPU (onnxruntime-gpu) +``` + +#### PyTorch Variant (optional) +```bash +# Download from hexgrad/Kokoro-82M +huggingface-cli download hexgrad/Kokoro-82M kokoro-v1_0.pth --local-dir models/kokoro/ +huggingface-cli download hexgrad/Kokoro-82M config.json --local-dir models/kokoro/ +huggingface-cli download hexgrad/Kokoro-82M voices --local-dir models/kokoro/ + +# YAML +kokoro: + enabled: true + use_onnx: false + model_path: ./models/kokoro/kokoro-v1_0.pth + voice_dir: ./models/kokoro/voices + device: cuda # or mps/cpu ``` #### System Requirements -- **Disk Space**: ~800MB for model +- **Disk Space**: ~300–330MB for `model.onnx`, plus voices directory - **RAM**: 2GB minimum -- **Optional**: espeak-ng for phonemizer (`sudo apt-get install espeak-ng` on Ubuntu) +- **eSpeak NG**: install system package; env var only for non-standard library paths ### Higgs Audio V2 Setup @@ -292,8 +337,8 @@ Models will auto-download from HuggingFace on first use. # Run Gradio demo for 1.5B model python demo/gradio_demo.py --model_path microsoft/VibeVoice-1.5B --share -# Run Gradio demo for 7B model -python demo/gradio_demo.py --model_path WestZhang/VibeVoice-Large-pt --share +# Run Gradio demo for 7B model (official) +python demo/gradio_demo.py --model_path vibevoice/VibeVoice-7B --share # File-based inference (single speaker) python demo/inference_from_file.py \ @@ -338,8 +383,9 @@ At runtime, a request can still override the defaults by passing `extra_params[" # In tts_providers_config.yaml vibevoice: enabled: true - vibevoice_variant: "1.5B" # or "7B" - model_path: microsoft/VibeVoice-1.5B # or WestZhang/VibeVoice-Large-pt + auto_download: true + vibevoice_variant: "1.5B" # or "7B", "7B-Q8" + model_path: microsoft/VibeVoice-1.5B # or vibevoice/VibeVoice-7B (official), FabioSarracino/VibeVoice-Large-Q8 (7B-Q8) device: cuda # GPU strongly recommended use_fp16: true enable_music: true # Spontaneous background music diff --git a/Docs/STT-TTS/VIBEVOICE_GETTING_STARTED.md b/Docs/STT-TTS/VIBEVOICE_GETTING_STARTED.md index e857f3e2d..ff4025bbf 100644 --- a/Docs/STT-TTS/VIBEVOICE_GETTING_STARTED.md +++ b/Docs/STT-TTS/VIBEVOICE_GETTING_STARTED.md @@ -7,7 +7,8 @@ This guide walks you through installing, configuring, and using the VibeVoice te - Python 3.10+ - ffmpeg installed and on `PATH` - GPU optional (CUDA recommended for performance). -- Sufficient disk space to cache models under `./models/vibevoice` (auto-download by default). +- Sufficient disk space to cache models under `./models/vibevoice`. + - Note: In tldw_server, auto-download is disabled by default. Enable per-provider in YAML via `auto_download: true` or set `VIBEVOICE_AUTO_DOWNLOAD=1`. ## 2) Install Dependencies @@ -17,10 +18,10 @@ This guide walks you through installing, configuring, and using the VibeVoice te pip install -e ".[TTS_vibevoice]" ``` -- Install the community VibeVoice package from source: +- Install the official VibeVoice package from source: ```bash -git clone https://github.com/vibevoice-community/VibeVoice.git libs/VibeVoice +git clone https://github.com/microsoft/VibeVoice.git libs/VibeVoice cd libs/VibeVoice && pip install -e . cd ../.. ``` @@ -65,14 +66,14 @@ vibevoice_speakers_to_voices = {"1":"en-Alice_woman"} vibevoice_enable_warmup_forward = false ``` -YAML alternative (`tts_providers_config.yaml`): +YAML alternative (`tldw_Server_API/app/core/TTS/tts_providers_config.yaml`): ```yaml providers: vibevoice: enabled: true - model_path: vibevoice/VibeVoice-1.5B auto_download: true + model_path: microsoft/VibeVoice-1.5B # or vibevoice/VibeVoice-7B, FabioSarracino/VibeVoice-Large-Q8 device: auto use_quantization: true voices_dir: ./voices diff --git a/Docs/STT-TTS/VIBEVOICE_INSTALLATION.md b/Docs/STT-TTS/VIBEVOICE_INSTALLATION.md index 7ce3ea839..1102068c7 100644 --- a/Docs/STT-TTS/VIBEVOICE_INSTALLATION.md +++ b/Docs/STT-TTS/VIBEVOICE_INSTALLATION.md @@ -10,8 +10,8 @@ This guide covers the installation of the enhanced VibeVoice TTS adapter with al # Install VibeVoice TTS dependencies pip install -e ".[TTS_vibevoice]" -# Clone and install VibeVoice library -git clone https://github.com/vibevoice-community/VibeVoice.git libs/VibeVoice +# Clone and install VibeVoice library (official) +git clone https://github.com/microsoft/VibeVoice.git libs/VibeVoice cd libs/VibeVoice && pip install -e . cd ../.. ``` @@ -55,7 +55,7 @@ pip install bitsandbytes pip install flash-attn --no-build-isolation # Clone VibeVoice -git clone https://github.com/vibevoice-community/VibeVoice.git libs/VibeVoice +git clone https://github.com/microsoft/VibeVoice.git libs/VibeVoice cd libs/VibeVoice && pip install -e . ``` @@ -69,7 +69,7 @@ pip install -e ".[TTS_vibevoice]" # Bitsandbytes has limited MPS support # Clone VibeVoice -git clone https://github.com/vibevoice-community/VibeVoice.git libs/VibeVoice +git clone https://github.com/microsoft/VibeVoice.git libs/VibeVoice cd libs/VibeVoice && pip install -e . ``` @@ -80,7 +80,7 @@ pip install torch torchvision torchaudio --index-url https://download.pytorch.or pip install -e ".[TTS_vibevoice]" # Clone VibeVoice -git clone https://github.com/vibevoice-community/VibeVoice.git libs/VibeVoice +git clone https://github.com/microsoft/VibeVoice.git libs/VibeVoice cd libs/VibeVoice && pip install -e . ``` @@ -232,8 +232,11 @@ vibevoice_use_quantization = False # Download 1.5B model huggingface-cli download microsoft/VibeVoice-1.5B --local-dir ./models/vibevoice -# Or download 7B model -huggingface-cli download WestZhang/VibeVoice-Large-pt --local-dir ./models/vibevoice +# Or download 7B model (official) +huggingface-cli download vibevoice/VibeVoice-7B --local-dir ./models/vibevoice + +# Optional: Community 8-bit quantized 7B variant +huggingface-cli download FabioSarracino/VibeVoice-Large-Q8 --local-dir ./models/vibevoice-q8 ``` ## Voice Cloning Setup diff --git a/Docs/User_Guides/Authentication_Setup.md b/Docs/User_Guides/Authentication_Setup.md index d7a77335c..df81b464c 100644 --- a/Docs/User_Guides/Authentication_Setup.md +++ b/Docs/User_Guides/Authentication_Setup.md @@ -126,11 +126,11 @@ Key settings in `.env`: - Configure PostgreSQL via `DATABASE_URL` (examples): - Local: ```bash - export DATABASE_URL=postgresql://tldw_user:ChangeMeStrong123!@localhost:5432/tldw_users + export DATABASE_URL=postgresql://tldw_user:TestPassword123!@localhost:5432/tldw_users ``` - With docker-compose (service name `postgres`): ```bash - export DATABASE_URL=postgresql://tldw_user:ChangeMeStrong123!@postgres:5432/tldw_users + export DATABASE_URL=postgresql://tldw_user:TestPassword123!@postgres:5432/tldw_users ``` - See Multi-User Deployment Guide for more details. @@ -207,14 +207,14 @@ You can configure authentication and the AuthNZ database in `tldw_Server_API/Con [AuthNZ] auth_mode = multi_user # Option A: full URL -database_url = postgresql://tldw_user:ChangeMeStrong123!@localhost:5432/tldw_users +database_url = postgresql://tldw_user:TestPassword123!@localhost:5432/tldw_users # Option B: structured fields (used if DATABASE_URL not set) db_type = postgresql pg_host = localhost pg_port = 5432 pg_db = tldw_users pg_user = tldw_user -pg_password = ChangeMeStrong123! +pg_password = TestPassword123! pg_sslmode = prefer enable_registration = true require_registration_code = false diff --git a/Docs/User_Guides/TTS_Getting_Started.md b/Docs/User_Guides/TTS_Getting_Started.md new file mode 100644 index 000000000..4800aa97e --- /dev/null +++ b/Docs/User_Guides/TTS_Getting_Started.md @@ -0,0 +1,395 @@ +# TTS Providers Getting Started Guide + +This guide helps new operators bring text-to-speech (TTS) online inside `tldw_server`. It walks through the supported providers (cloud + local), required dependencies, configuration files, and verification commands so you can decide which adapter to enable and confirm it works end to end. + +## YAML Quick Start + +Minimal configuration to get going. Save to `tldw_Server_API/app/core/TTS/tts_providers_config.yaml` (or use one of the supported locations). + +```yaml +# Provider selection / fallback order +provider_priority: + - openai + - kokoro + +providers: + # Hosted (requires env: OPENAI_API_KEY) + openai: + enabled: true + api_key: ${OPENAI_API_KEY} + model: tts-1 + + # Local ONNX example + kokoro: + enabled: true + use_onnx: true + model_path: models/kokoro/onnx/model.onnx + voices_json: models/kokoro/voices + device: cpu + + # Local VibeVoice example (opt-in; downloads disabled by default) + vibevoice: + enabled: false # set true to enable + auto_download: false # set true to allow HF downloads + model_path: microsoft/VibeVoice-1.5B + device: auto # cuda | mps | cpu | auto + +performance: + max_concurrent_generations: 4 + stream_errors_as_audio: false +``` + +Notes: +- Local providers will not download model assets unless you explicitly set `auto_download: true` (or export `TTS_AUTO_DOWNLOAD=1` / `VIBEVOICE_AUTO_DOWNLOAD=1`). +- You can override API keys and some settings via `Config_Files/config.txt` or environment variables. + +## One-Command Installers +Run these from the project root to install a single TTS backend (deps + models where applicable): + +```bash +# Kokoro (v1.0 ONNX + voices) +python Helper_Scripts/TTS_Installers/install_tts_kokoro.py + +# NeuTTS (deps; optional prefetch) +python Helper_Scripts/TTS_Installers/install_tts_neutts.py --prefetch + +# Dia / Higgs / VibeVoice +python Helper_Scripts/TTS_Installers/install_tts_dia.py +python Helper_Scripts/TTS_Installers/install_tts_higgs.py +python Helper_Scripts/TTS_Installers/install_tts_vibevoice.py --variant 1.5B + +# IndexTTS2 (deps + checkpoints folder) +python Helper_Scripts/TTS_Installers/install_tts_index_tts2.py + +# Chatterbox (deps only) +python Helper_Scripts/TTS_Installers/install_tts_chatterbox.py [--with-lang] +``` + +Installer flags: +- `TLDW_SETUP_SKIP_PIP=1` to skip pip installs +- `TLDW_SETUP_SKIP_DOWNLOADS=1` to skip model downloads + +## Key Files & Paths +- `tldw_Server_API/app/core/TTS/tts_providers_config.yaml` — canonical provider settings + priority list. +- `Config_Files/config.txt` — optional INI overrides (e.g., `[TTS-Settings]` block). +- `tldw_Server_API/app/core/TTS/adapters/` — implementation for each backend. +- `tldw_Server_API/app/core/TTS/TTS-README.md` — deep dive on architecture + adapter matrix. + +## Quick Reference (Choose Your Provider) + +| Provider | Type | Install / Extras | Voice Cloning | Reference | +| --- | --- | --- | --- | --- | +| OpenAI `tts-1` | Hosted API | `OPENAI_API_KEY` | No | [Getting Started](../Getting-Started-STT_and_TTS.md#option-a--openai-tts-hosted) | +| ElevenLabs | Hosted API | `ELEVENLABS_API_KEY` | Yes (via ElevenLabs voices) | [TTS Setup Guide](../STT-TTS/TTS-SETUP-GUIDE.md#commercial-providers) | +| Kokoro ONNX | Local ONNX | `pip install -e ".[TTS_kokoro_onnx]"` + `espeak-ng` | No | [Getting Started](../Getting-Started-STT_and_TTS.md#option-b--kokoro-tts-local-onnx) | +| NeuTTS Air | Local hybrid | `pip install -e ".[TTS_neutts]"` + `espeak-ng` | **Required** (reference audio + text) | [NeuTTS Runbook](../STT-TTS/NEUTTS_TTS_SETUP.md) | +| Chatterbox | Local PyTorch | `pip install -e ".[TTS_chatterbox]"` (+ `.[TTS_chatterbox_lang]` for multilingual) | Yes (5–20 s) | [Chatterbox Runbook](../Published/User_Guides/Chatterbox_TTS_Setup.md) | +| VibeVoice | Local PyTorch | `pip install -e ".[TTS_vibevoice]"` + clone [VibeVoice](https://github.com/microsoft/VibeVoice) | Yes (3–30 s) | [VibeVoice Guide](../STT-TTS/VIBEVOICE_GETTING_STARTED.md) | +| Higgs Audio V2 | Local PyTorch | `pip install -e ".[TTS_higgs]"` + install `bosonai/higgs-audio` | Yes (3–10 s) | [TTS Setup Guide](../STT-TTS/TTS-SETUP-GUIDE.md#higgs-audio-v2-setup) | +| Dia | Local PyTorch | `pip install torch transformers accelerate nltk spacy` | Yes (dialogue prompts) | [TTS Setup Guide](../STT-TTS/TTS-SETUP-GUIDE.md#dia-setup) | +| IndexTTS2 | Local PyTorch | Download checkpoints to `checkpoints/index_tts2/` | Yes (zero-shot, 12 GB+ VRAM) | [TTS README](../../tldw_Server_API/app/core/TTS/TTS-README.md#indextts2-adapter) | + +> Tip: Keep cloud providers (`openai`, `elevenlabs`) high in `provider_priority` for instant results, and add local fallbacks underneath. + +## Baseline Prerequisites +1. **Install the project** + ```bash + pip install -e . + ``` + Add extras per provider (see table above). +2. **System packages** + - FFmpeg (`brew install ffmpeg` or `apt-get install -y ffmpeg`) + - eSpeak NG for phonemizer-backed models (`brew install espeak-ng` / `apt-get install -y espeak-ng`) +3. **Model cache helpers** + `pip install huggingface-hub` and log in if you need gated repos. +4. **Runtime** + Start the API: + ```bash + python -m uvicorn tldw_Server_API.app.main:app --reload + ``` + Note the printed `X-API-KEY` when running in single-user mode. + +## Recommended Setup Flow +1. **Pick providers** you care about and install their extras. +2. **Download models** proactively (use `huggingface-cli download ... --local-dir ...` for offline hosts). +3. **Edit `tts_providers_config.yaml`** + - Enable providers, point to local paths, and adjust `device`, `sample_rate`, etc. + - Adjust `provider_priority` so preferred backends run first. + - Note: Local providers will not download models unless you explicitly set `auto_download: true` per provider (or export `TTS_AUTO_DOWNLOAD=1`). +4. **Optional overrides** in `Config_Files/config.txt` (`[TTS-Settings]`) if you need environment-specific toggles. +5. **Set secrets/env vars** (API keys, `TTS_AUTO_DOWNLOAD`, device hints). +6. **Restart the server** and watch logs for `adapter initialized`. +7. **Verify** with `curl` (samples below) or via the WebUI ➜ Audio ➜ TTS tab. + +--- + +## Hosted Providers + +### OpenAI +1. Export your key or add it to `config.txt`: + ```bash + export OPENAI_API_KEY=sk-... + ``` +2. (Optional) Change the default model (`tts-1-hd`) or base URL (self-hosted proxies) inside `tts_providers_config.yaml`. +3. Verify: + ```bash + curl -sS -X POST http://127.0.0.1:8000/api/v1/audio/speech \ + -H "X-API-KEY: $SINGLE_USER_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"model":"tts-1","voice":"alloy","input":"Hi from OpenAI","response_format":"mp3"}' \ + --output openai.mp3 + ``` + +### ElevenLabs +1. Set `ELEVENLABS_API_KEY` and enable the provider in the YAML: + ```yaml + providers: + elevenlabs: + enabled: true + api_key: ${ELEVENLABS_API_KEY} + model: "eleven_monolingual_v1" + ``` +2. Use `GET /api/v1/audio/voices/catalog?provider=elevenlabs` to list available voices (includes your custom voices from ElevenLabs). +3. Generate speech (non-streaming shown): + ```bash + curl -sS -X POST http://127.0.0.1:8000/api/v1/audio/speech \ + -H "X-API-KEY: $SINGLE_USER_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"model":"eleven_monolingual_v1","voice":"rachel","input":"Testing ElevenLabs"}' \ + --output elevenlabs.mp3 + ``` + +--- + +## Local Providers + +Each section highlights installation, configuration, and a smoke test. + +### Kokoro ONNX +- **Install**: Prefer the installer (auto-detects eSpeak NG): + ```bash + python Helper_Scripts/TTS_Installers/install_tts_kokoro.py + ``` + Or manually: `pip install -e ".[TTS_kokoro_onnx]"` and install `espeak-ng`. The env var `PHONEMIZER_ESPEAK_LIBRARY` is only needed for non-standard library paths. +- **Models** (v1.0): download from `onnx-community/Kokoro-82M-v1.0-ONNX-timestamped` — use `onnx/model.onnx` and the `voices/` directory, placed under `models/kokoro/`. +- **Config**: + ```yaml + providers: + kokoro: + enabled: true + use_onnx: true + model_path: "models/kokoro/onnx/model.onnx" + voices_json: "models/kokoro/voices" + device: "cpu" # or "cuda" + ``` +- **Verify**: + ```bash + curl -s http://127.0.0.1:8000/api/v1/audio/voices/catalog \ + -H "X-API-KEY: $SINGLE_USER_API_KEY" | jq '.kokoro' + curl -sS -X POST http://127.0.0.1:8000/api/v1/audio/speech \ + -H "X-API-KEY: $SINGLE_USER_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"model":"kokoro","voice":"af_bella","input":"Local Kokoro test","response_format":"mp3"}' \ + --output kokoro.mp3 + ``` + +### NeuTTS Air +- **Install**: `pip install -e ".[TTS_neutts]"`; ensure `espeak-ng` is installed for phonemizer support. +- **Config**: + ```yaml + providers: + neutts: + enabled: true + backbone_repo: "neuphonic/neutts-air" # or GGUF variant for streaming + backbone_device: "cpu" + codec_repo: "neuphonic/neucodec" + codec_device: "cpu" + ``` +- **Voice cloning**: every request must include a base64 `voice_reference` clip (3–15 s) plus `extra_params.reference_text` that exactly matches the spoken content. +- **Verify**: use the sample curl from [NeuTTS Runbook](../STT-TTS/NEUTTS_TTS_SETUP.md) and confirm the WAV plays back. + +### Chatterbox +- **Install**: `pip install -e ".[TTS_chatterbox]"`; add `.[TTS_chatterbox_lang]` if you plan to enable `use_multilingual`. The repo vendors a `chatterbox/` package, so no extra clone is needed. +- **Models**: cache `ResembleAI/chatterbox` locally with `huggingface-cli download ...`. +- **Config**: + ```yaml + providers: + chatterbox: + enabled: true + device: "cuda" + use_multilingual: false + disable_watermark: true + target_latency_ms: 200 + ``` +- **Voice cloning**: send `voice_reference` (5–20 s, 24 kHz) and optional `emotion` + `emotion_intensity` to tune delivery. +- **Reference**: see [Chatterbox Runbook](../Published/User_Guides/Chatterbox_TTS_Setup.md) for streaming examples and troubleshooting. + +### VibeVoice +- **Install**: `pip install -e ".[TTS_vibevoice]"`; clone the upstream repo into `libs/VibeVoice` and `pip install -e .` there. Optional: `bitsandbytes`, `flash-attn`, `ninja` for CUDA optimizations. +- **Config**: + ```yaml + providers: + vibevoice: + enabled: true + auto_download: true # Explicitly enable downloads (default is false) + model_path: "microsoft/VibeVoice-1.5B" # or vibevoice/VibeVoice-7B, FabioSarracino/VibeVoice-Large-Q8 + device: "cuda" + use_quantization: true + voices_dir: "./voices" + speakers_to_voices: + "1": "en-Alice_woman" + ``` +- **Voice cloning**: drop samples into `voices_dir`, upload via API, or send `voice_reference`. Use `extra_params.speakers_to_voices` to map scripted speakers to files or uploaded IDs. +- **Reference**: [VibeVoice Getting Started](../STT-TTS/VIBEVOICE_GETTING_STARTED.md). + +### Higgs Audio V2 +- **Install**: `pip install -e ".[TTS_higgs]"` and install the upstream repo (`git clone https://github.com/boson-ai/higgs-audio && pip install -e .`). +- **Config**: + ```yaml + providers: + higgs: + enabled: true + model_path: "bosonai/higgs-audio-v2-generation-3B-base" + tokenizer_path: "bosonai/higgs-audio-v2-tokenizer" + device: "cuda" + use_fp16: true + ``` +- **Voice cloning**: accepts 3–10 s voice samples at 24 kHz (WAV/MP3/FLAC). Include `voice_reference` + `voice` = `"clone"`. +- **Reference**: [Higgs section](../STT-TTS/TTS-SETUP-GUIDE.md#higgs-audio-v2-setup). + +### Dia +- **Install**: `pip install torch torchaudio transformers accelerate nltk spacy` plus `python -m spacy download en_core_web_sm`. +- **Config**: + ```yaml + providers: + dia: + enabled: true + model_path: "nari-labs/dia" + device: "cuda" + auto_detect_speakers: true + max_speakers: 5 + ``` +- **Usage**: best for dialogue transcripts (`Speaker 1:`, `Speaker 2:`). Supports voice cloning with per-speaker references. + +### IndexTTS2 +- **Install/Assets**: place model checkpoints + configs under `checkpoints/index_tts2/`. Follow the adapter instructions in [TTS-README](../../tldw_Server_API/app/core/TTS/TTS-README.md#indextts2-adapter) for expected filenames. +- **Config**: + ```yaml + providers: + index_tts: + enabled: true + model_dir: "checkpoints/index_tts2" + cfg_path: "checkpoints/index_tts2/config.yaml" + device: "cuda" + use_fp16: true + interval_silence: 200 + ``` +- **Hardware**: plan for 12 GB+ VRAM. Every request must include a `voice_reference` clip (zero-shot cloning). + +--- + +## YAML Configuration Reference + +Location precedence (first found is used): +- `tldw_Server_API/app/core/TTS/tts_providers_config.yaml` (in-repo default) +- `./tts_providers_config.yaml` (current working directory) +- `~/.config/tldw/tts_providers_config.yaml` (user config) + +Key sections: +- `provider_priority`: ordered list used for fallback +- `providers.`: per-provider settings + - `enabled` (bool): must be true to initialize + - `auto_download` (bool): when true, allow HF downloads if local files are missing + - Model path fields (e.g., `model_path`, `model_dir`, `cache_dir`) + - Device and performance fields (e.g., `device`, `use_fp16`, `use_quantization`) +- `performance`, `fallback`, `logging`: global behavior + +Example (VibeVoice 7B): +```yaml +providers: + vibevoice: + enabled: true + auto_download: true + variant: "7B" # or "7B-Q8" for quantized community model + model_path: "vibevoice/VibeVoice-7B" + device: "cuda" +``` + +Environment overrides: +- `TTS_AUTO_DOWNLOAD=1` (global), or `VIBEVOICE_AUTO_DOWNLOAD=1` (provider-specific) +- `TTS_DEFAULT_PROVIDER`, `TTS_DEFAULT_VOICE`, `TTS_DEVICE`, etc. + +## Voice Management & Reference Audio +- Upload reusable samples: + ```bash + curl -sS -X POST http://127.0.0.1:8000/api/v1/audio/voices/upload \ + -H "X-API-KEY: $SINGLE_USER_API_KEY" \ + -F "file=@/path/to/voice.wav" \ + -F "name=Frank" \ + -F "provider=vibevoice" + ``` + The API returns a `voice_id`; reuse it via `"voice": "custom:"`. +- Inline references: set `"voice_reference": ""` directly on the TTS request. +- Duration & quality (see `tldw_Server_API/app/core/TTS/TTS-VOICE-CLONING.md`): + - Higgs: 3–10 s @ 24 kHz, mono. + - Chatterbox: 5–20 s @ 24 kHz, mono. + - VibeVoice: 3–30 s @ 22.05 kHz (adapter resamples). + - NeuTTS: 3–15 s @ 24 kHz **plus** matching `reference_text`. + - IndexTTS2: 3–15 s @ 24 kHz, or precomputed `ref_codes`. + +--- + +## Auto-Download & Environment Switches +| Variable | Purpose | +| --- | --- | +| `TTS_AUTO_DOWNLOAD` | Global toggle for all local providers (`1` to allow HF downloads). | +| `KOKORO_AUTO_DOWNLOAD`, `HIGGS_AUTO_DOWNLOAD`, `DIA_AUTO_DOWNLOAD`, `CHATTERBOX_AUTO_DOWNLOAD`, `VIBEVOICE_AUTO_DOWNLOAD` | Per-provider overrides when you need strict offline mode. | +| `TTS_DEFAULT_PROVIDER` / `TTS_DEFAULT_VOICE` | Overrides the provider/voice when the client omits them. | +| `TTS_DEVICE` | Forces a device hint (e.g., `cuda`, `cpu`) across adapters that respect it. | +| `TTS_STREAM_ERRORS_AS_AUDIO` | When `1`, embed adapter errors into the stream (OpenAI compatibility); default `0` for normal HTTP errors. | + +All env vars above are documented in `Env_Vars.md`. + +--- + +## Verification Checklist +1. **Provider discovery** + ```bash + curl -s http://127.0.0.1:8000/api/v1/audio/providers \ + -H "X-API-KEY: $SINGLE_USER_API_KEY" | jq + ``` +2. **Voice catalog** + ```bash + curl -s http://127.0.0.1:8000/api/v1/audio/voices/catalog \ + -H "X-API-KEY: $SINGLE_USER_API_KEY" | jq + ``` +3. **Synthesis smoke test** (replace `model` + `voice` per provider): + ```bash + curl -sS -X POST http://127.0.0.1:8000/api/v1/audio/speech \ + -H "X-API-KEY: $SINGLE_USER_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"model":"kokoro","voice":"af_bella","input":"Hello from tldw_server","response_format":"mp3","stream":true}' \ + --output tts-test.mp3 + ``` +4. **WebUI**: Visit `http://127.0.0.1:8000/webui/#/audio` ➜ pick provider ➜ synthesize sample text. + +--- + +## Troubleshooting Cheatsheet +- **`ImportError` / missing modules** — re-run the correct extra install (e.g., `pip install -e ".[TTS_vibevoice]"`). +- **Auto-download blocked** — set `TTS_AUTO_DOWNLOAD=0` (or per provider) and pre-populate `models/` via `huggingface-cli download`. +- **`eSpeak` not found** — install `espeak-ng`; on macOS export `PHONEMIZER_ESPEAK_LIBRARY=/opt/homebrew/lib/libespeak-ng.dylib`. +- **CUDA OOM** — enable quantization (VibeVoice), lower `vibevoice_variant`, or move the provider lower in `provider_priority` so lighter backends run first. +- **Voice cloning rejects sample** — ensure duration/sample rate matches provider requirements and send mono audio. +- **401/403** — confirm `X-API-KEY` header (single-user) or Bearer JWT (multi-user) plus upstream API keys. +- **Adapter marked unhealthy** — see logs for circuit-breaker status; restart the server or wait for `performance.adapter_failure_retry_seconds` to elapse. + +--- + +## Additional Resources +- [TTS-SETUP-GUIDE](../STT-TTS/TTS-SETUP-GUIDE.md) — exhaustive installer for every backend. +- [Getting-Started-STT_and_TTS](../Getting-Started-STT_and_TTS.md) — fast-start for OpenAI + Kokoro + STT. +- [TTS-VOICE-CLONING](../../tldw_Server_API/app/core/TTS/TTS-VOICE-CLONING.md) — in-depth reference requirements per provider. +- [TTS-DEPLOYMENT](../../tldw_Server_API/app/core/TTS/TTS-DEPLOYMENT.md) — GPU sizing, smoke tests, and monitoring. + +Use this guide as the high-level checklist, then jump into the linked runbooks for deeper tuning. diff --git a/Env_Vars.md b/Env_Vars.md index 494bf3619..dbd9d6445 100644 --- a/Env_Vars.md +++ b/Env_Vars.md @@ -472,6 +472,14 @@ Total detected variables: 715 - `RUN_STRESS_TESTS` - `RUN_TTS_LEGACY_INTEGRATION` - `TEST_MODE` +- `TLDW_TEST_MODE` +- `RUN_EVALUATIONS` +- `MINIMAL_TEST_APP` +- `ULTRA_MINIMAL_APP` +- `ROUTES_DISABLE` +- `ROUTES_ENABLE` +- `ROUTES_STABLE_ONLY` +- `ROUTES_EXPERIMENTAL` ## Other diff --git a/Helper_Scripts/Prompts/Programming/simplification-cascades-skill.md b/Helper_Scripts/Prompts/Programming/simplification-cascades-skill.md new file mode 100644 index 000000000..9ad5b339f --- /dev/null +++ b/Helper_Scripts/Prompts/Programming/simplification-cascades-skill.md @@ -0,0 +1,76 @@ +--- +name: Simplification Cascades +description: Find one insight that eliminates multiple components - "if this is true, we don't need X, Y, or Z" +when_to_use: when implementing the same concept multiple ways, accumulating special cases, or complexity is spiraling +version: 1.1.0 +--- + +# Simplification Cascades + +## Overview + +Sometimes one insight eliminates 10 things. Look for the unifying principle that makes multiple components unnecessary. + +**Core principle:** "Everything is a special case of..." collapses complexity dramatically. + +## Quick Reference + +| Symptom | Likely Cascade | +|---------|----------------| +| Same thing implemented 5+ ways | Abstract the common pattern | +| Growing special case list | Find the general case | +| Complex rules with exceptions | Find the rule that has no exceptions | +| Excessive config options | Find defaults that work for 95% | + +## The Pattern + +**Look for:** +- Multiple implementations of similar concepts +- Special case handling everywhere +- "We need to handle A, B, C, D differently..." +- Complex rules with many exceptions + +**Ask:** "What if they're all the same thing underneath?" + +## Examples + +### Cascade 1: Stream Abstraction +**Before:** Separate handlers for batch/real-time/file/network data +**Insight:** "All inputs are streams - just different sources" +**After:** One stream processor, multiple stream sources +**Eliminated:** 4 separate implementations + +### Cascade 2: Resource Governance +**Before:** Session tracking, rate limiting, file validation, connection pooling (all separate) +**Insight:** "All are per-entity resource limits" +**After:** One ResourceGovernor with 4 resource types +**Eliminated:** 4 custom enforcement systems + +### Cascade 3: Immutability +**Before:** Defensive copying, locking, cache invalidation, temporal coupling +**Insight:** "Treat everything as immutable data + transformations" +**After:** Functional programming patterns +**Eliminated:** Entire classes of synchronization problems + +## Process + +1. **List the variations** - What's implemented multiple ways? +2. **Find the essence** - What's the same underneath? +3. **Extract abstraction** - What's the domain-independent pattern? +4. **Test it** - Do all cases fit cleanly? +5. **Measure cascade** - How many things become unnecessary? + +## Red Flags You're Missing a Cascade + +- "We just need to add one more case..." (repeating forever) +- "These are all similar but different" (maybe they're the same?) +- Refactoring feels like whack-a-mole (fix one, break another) +- Growing configuration file +- "Don't touch that, it's complicated" (complexity hiding pattern) + +## Remember + +- Simplification cascades = 10x wins, not 10% improvements +- One powerful abstraction > ten clever hacks +- The pattern is usually already there, just needs recognition +- Measure in "how many things can we delete?" diff --git a/Helper_Scripts/Samples/Grafana/README.md b/Helper_Scripts/Samples/Grafana/README.md index b1bd85642..575dcdee0 100644 --- a/Helper_Scripts/Samples/Grafana/README.md +++ b/Helper_Scripts/Samples/Grafana/README.md @@ -13,6 +13,7 @@ Dashboards to load (copy into `/var/lib/grafana/dashboards` in your Grafana cont - `Docs/Deployment/Monitoring/app-observability-dashboard.json` - `Docs/Deployment/Monitoring/mcp-dashboard.json` - `Docs/Deployment/Monitoring/web-scraping-dashboard.json` +- `Docs/Deployment/Monitoring/streaming-dashboard.json` Docker Compose snippet: diff --git a/Helper_Scripts/Samples/Kubernetes/app-secret.yaml b/Helper_Scripts/Samples/Kubernetes/app-secret.yaml index 125a605e5..670e02829 100644 --- a/Helper_Scripts/Samples/Kubernetes/app-secret.yaml +++ b/Helper_Scripts/Samples/Kubernetes/app-secret.yaml @@ -9,4 +9,4 @@ stringData: # SINGLE_USER_API_KEY: "replace-with-strong-key" JWT_SECRET_KEY: "replace-with-strong-32ch" DATABASE_URL: "postgresql://tldw_user:${POSTGRES_PASSWORD}@postgres:5432/tldw_users" - POSTGRES_PASSWORD: "ChangeMeStrong123!" + POSTGRES_PASSWORD: "TestPassword123!" diff --git a/Helper_Scripts/TTS_Installers/README.md b/Helper_Scripts/TTS_Installers/README.md new file mode 100644 index 000000000..4cdf7f267 --- /dev/null +++ b/Helper_Scripts/TTS_Installers/README.md @@ -0,0 +1,43 @@ +TTS Backend Installers + +Standalone scripts to install assets and dependencies for individual TTS providers. + +Run from the project root with your Python environment activated (e.g., venv). + +Examples: +- Kokoro (v1.0 ONNX + voices): + python Helper_Scripts/TTS_Installers/install_tts_kokoro.py + # Overwrite existing assets: + # python Helper_Scripts/TTS_Installers/install_tts_kokoro.py --force + +- NeuTTS (deps + optional prefetch): + python Helper_Scripts/TTS_Installers/install_tts_neutts.py --prefetch + +- Dia (deps + model snapshot): + python Helper_Scripts/TTS_Installers/install_tts_dia.py + +- Higgs (deps + model/tokenizer snapshots): + python Helper_Scripts/TTS_Installers/install_tts_higgs.py + +- VibeVoice (deps + 1.5B snapshot): + python Helper_Scripts/TTS_Installers/install_tts_vibevoice.py --variant 1.5B + +- IndexTTS2 (deps + create checkpoints directory): + python Helper_Scripts/TTS_Installers/install_tts_index_tts2.py + +- Chatterbox (deps only): + python Helper_Scripts/TTS_Installers/install_tts_chatterbox.py + +Notes +- Scripts use tldw’s internal installer utilities where possible (pip + HF snapshots). +- Downloads respect environment flags: + - Set TLDW_SETUP_SKIP_DOWNLOADS=1 to skip model downloads. + - Set TLDW_SETUP_SKIP_PIP=1 to skip pip installs. + - Set TLDW_SETUP_FORCE_DOWNLOADS=1 (or pass --force where available) to overwrite existing assets. +- Kokoro requires eSpeak NG (system library). The script detects it and prints platform-specific guidance if missing. + +Asset-only helper for Kokoro (no pip installs): + python Helper_Scripts/download_kokoro_assets.py \ + --repo-id onnx-community/Kokoro-82M-v1.0-ONNX-timestamped \ + --model-path models/kokoro/onnx/model.onnx \ + --voices-dir models/kokoro/voices diff --git a/Helper_Scripts/TTS_Installers/install_tts_chatterbox.py b/Helper_Scripts/TTS_Installers/install_tts_chatterbox.py new file mode 100644 index 000000000..101ae1487 --- /dev/null +++ b/Helper_Scripts/TTS_Installers/install_tts_chatterbox.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +""" +Install Chatterbox TTS dependencies (vendored integration helper). + +Usage: + python Helper_Scripts/TTS_Installers/install_tts_chatterbox.py [--with-lang] + +This is a thin wrapper over Helper_Scripts/install_chatterbox_deps.py. +""" +from __future__ import annotations + +import argparse +import runpy +import sys + + +def main() -> int: + ap = argparse.ArgumentParser(description="Install Chatterbox TTS dependencies") + ap.add_argument("--with-lang", action="store_true", help="install optional multilingual extras") + args = ap.parse_args() + + # Forward args to the underlying helper by modifying sys.argv + argv = [sys.argv[0]] + if args.with_lang: + argv.append("--with-lang") + sys.argv = argv + runpy.run_path("Helper_Scripts/install_chatterbox_deps.py", run_name="__main__") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) + diff --git a/Helper_Scripts/TTS_Installers/install_tts_dia.py b/Helper_Scripts/TTS_Installers/install_tts_dia.py new file mode 100644 index 000000000..a897432a9 --- /dev/null +++ b/Helper_Scripts/TTS_Installers/install_tts_dia.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +""" +Install Dia TTS assets and dependencies. + +This will: +- pip install required packages (torch, transformers, accelerate, etc.) +- snapshot the model repo (nari-labs/dia) via huggingface_hub + +Usage: + python Helper_Scripts/TTS_Installers/install_tts_dia.py [--force] + +Environment flags: +- TLDW_SETUP_SKIP_PIP=1 # skip pip installs +- TLDW_SETUP_SKIP_DOWNLOADS=1 # skip model downloads +- TLDW_SETUP_FORCE_DOWNLOADS=1 # force re-downloads (or pass --force) +""" +from __future__ import annotations + +import os +import sys + + +def main() -> int: + import argparse + ap = argparse.ArgumentParser(description="Install Dia TTS assets and dependencies") + ap.add_argument("--force", action="store_true", help="force re-downloads where applicable") + args = ap.parse_args() + + try: + from tldw_Server_API.app.core.Setup import install_manager as im + from tldw_Server_API.app.core.Setup.install_schema import InstallPlan, TTSInstall + except Exception as e: + print("ERROR: Unable to import internal installer utilities:", e, file=sys.stderr) + print("Run from the repo root and ensure 'pip install -e .' has been run.", file=sys.stderr) + return 2 + + if args.force: + os.environ['TLDW_SETUP_FORCE_DOWNLOADS'] = '1' + + errors: list[str] = [] + plan = InstallPlan(tts=[TTSInstall(engine="dia", variants=[])]) + status = im.InstallationStatus(plan) + + try: + im._install_backend_dependencies("tts", "dia", status, errors) + except im.PipInstallBlockedError as e: # type: ignore[attr-defined] + print(f"[dia] Skipped pip installs: {e}") + except Exception as e: + print(f"ERROR installing Dia dependencies: {e}", file=sys.stderr) + errors.append(str(e)) + + try: + im._install_dia() + except im.DownloadBlockedError as e: # type: ignore[attr-defined] + print(f"[dia] Skipped model downloads: {e}") + except Exception as e: + print(f"ERROR downloading Dia assets: {e}", file=sys.stderr) + errors.append(str(e)) + + if errors: + status.fail("; ".join(errors)) + return 1 + status.complete() + print("Dia install completed. Model cached via HF hub.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/Helper_Scripts/TTS_Installers/install_tts_higgs.py b/Helper_Scripts/TTS_Installers/install_tts_higgs.py new file mode 100644 index 000000000..66f270d48 --- /dev/null +++ b/Helper_Scripts/TTS_Installers/install_tts_higgs.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +""" +Install Higgs Audio V2 TTS assets and dependencies. + +This will: +- pip install required packages (torch, torchaudio, boson_ai/higgs-audio via git, etc.) +- snapshot model repos (generation + tokenizer) via huggingface_hub + +Usage: + python Helper_Scripts/TTS_Installers/install_tts_higgs.py [--force] + +Environment flags: +- TLDW_SETUP_SKIP_PIP=1 # skip pip installs +- TLDW_SETUP_SKIP_DOWNLOADS=1 # skip model downloads +- TLDW_SETUP_FORCE_DOWNLOADS=1 # force re-downloads (or pass --force) +""" +from __future__ import annotations + +import os +import sys + + +def main() -> int: + import argparse + ap = argparse.ArgumentParser(description="Install Higgs Audio V2 TTS assets and dependencies") + ap.add_argument("--force", action="store_true", help="force re-downloads where applicable") + args = ap.parse_args() + + try: + from tldw_Server_API.app.core.Setup import install_manager as im + from tldw_Server_API.app.core.Setup.install_schema import InstallPlan, TTSInstall + except Exception as e: + print("ERROR: Unable to import internal installer utilities:", e, file=sys.stderr) + print("Run from the repo root and ensure 'pip install -e .' has been run.", file=sys.stderr) + return 2 + + if args.force: + os.environ['TLDW_SETUP_FORCE_DOWNLOADS'] = '1' + + errors: list[str] = [] + plan = InstallPlan(tts=[TTSInstall(engine="higgs", variants=[])]) + status = im.InstallationStatus(plan) + + try: + im._install_backend_dependencies("tts", "higgs", status, errors) + except im.PipInstallBlockedError as e: # type: ignore[attr-defined] + print(f"[higgs] Skipped pip installs: {e}") + except Exception as e: + print(f"ERROR installing Higgs dependencies: {e}", file=sys.stderr) + errors.append(str(e)) + + try: + im._install_higgs() + except im.DownloadBlockedError as e: # type: ignore[attr-defined] + print(f"[higgs] Skipped model downloads: {e}") + except Exception as e: + print(f"ERROR downloading Higgs assets: {e}", file=sys.stderr) + errors.append(str(e)) + + if errors: + status.fail("; ".join(errors)) + return 1 + status.complete() + print("Higgs install completed. Models cached via HF hub.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/Helper_Scripts/TTS_Installers/install_tts_index_tts2.py b/Helper_Scripts/TTS_Installers/install_tts_index_tts2.py new file mode 100644 index 000000000..f97585c44 --- /dev/null +++ b/Helper_Scripts/TTS_Installers/install_tts_index_tts2.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +""" +Prepare IndexTTS2 local TTS environment. + +This will: +- pip install common deps (torch, torchaudio, transformers, sentencepiece, safetensors) +- create checkpoints/index_tts2/ if missing and drop a README with expected files + +Usage: + python Helper_Scripts/TTS_Installers/install_tts_index_tts2.py + +Note: +- The adapter expects local checkpoints and a config.yaml under checkpoints/index_tts2/. +- If the upstream pip package provides indextts, you can optionally install it and follow + their model download instructions; otherwise, copy your trained/converted assets there. +""" +from __future__ import annotations + +import os +import subprocess +import sys +from pathlib import Path + + +def pip_install(pkgs: list[str]) -> None: + if _skip_pip(): + raise RuntimeError("pip installs are disabled via TLDW_SETUP_SKIP_PIP") + cmd = [sys.executable, "-m", "pip", "install", "-U"] + pkgs + idx = os.getenv('TLDW_SETUP_PIP_INDEX_URL') + if idx: + cmd.extend(['--index-url', idx]) + print("+", " ".join(cmd)) + subprocess.check_call(cmd) + + +def _skip_pip() -> bool: + flag = os.getenv("TLDW_SETUP_SKIP_PIP") + return bool(flag and flag.strip().lower() in {"1", "true", "yes", "y", "on"}) + + +README_CONTENT = """ +IndexTTS2 Checkpoints Directory +=============================== + +Place the following files here (names may vary by release): + +- config.yaml +- acoustic model weights (e.g., model.safetensors / .bin) +- codec weights +- optional: Qwen emotion model assets if using emotion guidance + +Update tldw_Server_API/app/core/TTS/tts_providers_config.yaml to point at: + +providers: + index_tts: + enabled: true + model_dir: "checkpoints/index_tts2" + cfg_path: "checkpoints/index_tts2/config.yaml" + +The adapter imports indextts.infer_v2.IndexTTS2. If not provided by your environment, +install the upstream package (when available) or put the engine code on PYTHONPATH. +""".strip() + + +def main() -> int: + # Core deps per TTS-README + try: + pip_install([ + "torch>=2.2.0", + "torchaudio>=2.2.0", + "transformers>=4.41.0", + "sentencepiece>=0.1.99", + "safetensors>=0.4.0", + ]) + except Exception as e: + print(f"ERROR installing IndexTTS2 deps: {e}", file=sys.stderr) + return 1 + + ckpt_dir = Path("checkpoints/index_tts2") + ckpt_dir.mkdir(parents=True, exist_ok=True) + readme = ckpt_dir / "README.txt" + if not readme.exists(): + readme.write_text(README_CONTENT, encoding="utf-8") + print(f"Prepared {ckpt_dir} (README written)") + print("Copy your model files and config.yaml into this directory.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/Helper_Scripts/TTS_Installers/install_tts_kokoro.py b/Helper_Scripts/TTS_Installers/install_tts_kokoro.py new file mode 100644 index 000000000..c49434967 --- /dev/null +++ b/Helper_Scripts/TTS_Installers/install_tts_kokoro.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python3 +""" +Install Kokoro TTS (v1.0 ONNX) assets and dependencies. + +Defaults: +- Model: models/kokoro/onnx/model.onnx +- Voices: models/kokoro/voices/ + +Usage: + python Helper_Scripts/TTS_Installers/install_tts_kokoro.py [--model-only|--voices-only] \ + [--model-path PATH] [--voices-dir PATH] [--force] + +Environment flags respected (optional): +- TLDW_SETUP_SKIP_PIP=1 # skip pip installs +- TLDW_SETUP_SKIP_DOWNLOADS=1 # skip HF downloads +- TLDW_SETUP_FORCE_DOWNLOADS=1 # overwrite existing assets + +This script: +1) Installs required pip packages for the kokoro adapter. +2) Downloads the v1.0 ONNX model and voices directory from HF. +3) Detects eSpeak NG and prints platform guidance if not found. + +Alternative (assets only): + python Helper_Scripts/download_kokoro_assets.py \ + --repo-id onnx-community/Kokoro-82M-v1.0-ONNX-timestamped \ + --model-path models/kokoro/onnx/model.onnx \ + --voices-dir models/kokoro/voices +""" +from __future__ import annotations + +import argparse +import os +import sys +import platform +from pathlib import Path +from ctypes.util import find_library as _ctypes_find_library + + +def _run_install(model_path: Path, voices_dir: Path, model_only: bool, voices_only: bool) -> int: + # Defer heavy imports to runtime so the script can show friendly errors + try: + from tldw_Server_API.app.core.Setup import install_manager as im + from tldw_Server_API.app.core.Setup.install_schema import InstallPlan, TTSInstall + except Exception as e: + print("ERROR: Unable to import internal installer utilities:", e, file=sys.stderr) + print("Ensure you run from the repo root and that the project is installed (pip install -e .).", file=sys.stderr) + return 2 + + errors: list[str] = [] + plan = InstallPlan(tts=[TTSInstall(engine="kokoro", variants=["onnx", "voices"])]) + status = im.InstallationStatus(plan) + + # Step 1: dependencies + try: + im._install_backend_dependencies("tts", "kokoro", status, errors) + except im.PipInstallBlockedError as e: # type: ignore[attr-defined] + print(f"[kokoro] Skipped pip installs: {e}") + except Exception as e: + print(f"ERROR installing kokoro dependencies: {e}", file=sys.stderr) + errors.append(str(e)) + + # Step 2: downloads + os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS_WARNING", "1") + try: + variants = [] + if not voices_only: + variants.append("onnx") + if not model_only: + variants.append("voices") + + # Ensure destination directories exist + model_path.parent.mkdir(parents=True, exist_ok=True) + voices_dir.mkdir(parents=True, exist_ok=True) + + # If custom locations were provided, write them into config so the installer uses them + default_model = Path("models/kokoro/onnx/model.onnx") + default_voices = Path("models/kokoro/voices") + try: + if model_path != default_model or voices_dir != default_voices: + from tldw_Server_API.app.core.Setup import setup_manager as sm + sm.update_config({ + 'TTS-Settings': { + 'kokoro_model_path': str(model_path), + 'kokoro_voices_json': str(voices_dir), + } + }) + except Exception: + # Non-fatal; fallback to defaults + pass + + # Perform downloads + im._install_kokoro(variants) + except im.DownloadBlockedError as e: # type: ignore[attr-defined] + print(f"[kokoro] Skipped model downloads: {e}") + except Exception as e: + print(f"ERROR downloading Kokoro assets: {e}", file=sys.stderr) + errors.append(str(e)) + + # Step 3: eSpeak NG detection + _check_espeak() + + if errors: + status.fail("; ".join(errors)) + return 1 + status.complete() + print("\nKokoro install completed.") + print(f"Model path : {model_path}") + print(f"Voices dir : {voices_dir}") + return 0 + + +def _check_espeak() -> None: + path = _discover_espeak_library() + if path: + print(f"eSpeak NG detected: {path}") + return + print("\n[NOTICE] eSpeak NG library not detected. Kokoro ONNX can run without an explicit" + " PHONEMIZER_ESPEAK_LIBRARY in most setups, but you need eSpeak NG installed.") + sys_plat = sys.platform + if sys_plat == "darwin": + print("macOS install: brew install espeak") + elif sys_plat.startswith("linux"): + print("Linux install: sudo apt-get install espeak-ng (Debian/Ubuntu)") + print(" sudo dnf install espeak-ng (Fedora)") + print(" sudo pacman -S espeak-ng (Arch)") + elif sys_plat in ("win32", "cygwin"): + print("Windows install: choco install espeak (or use the official installer)") + else: + print("Install eSpeak NG via your OS package manager.") + + +def _discover_espeak_library() -> str | None: + # 1) Environment override + env_path = os.getenv("PHONEMIZER_ESPEAK_LIBRARY") + if env_path and os.path.exists(env_path): + return env_path + # 2) Platform heuristics + sys_plat = sys.platform + candidates: list[str] = [] + if sys_plat == "darwin": + candidates = [ + "/opt/homebrew/lib/libespeak-ng.dylib", + "/usr/local/lib/libespeak-ng.dylib", + "/opt/local/lib/libespeak-ng.dylib", + ] + elif sys_plat.startswith("linux"): + arch = platform.machine() or "" + candidates = [ + f"/usr/lib/{arch}/libespeak-ng.so.1" if arch else "", + "/usr/lib/x86_64-linux-gnu/libespeak-ng.so.1", + "/usr/lib/aarch64-linux-gnu/libespeak-ng.so.1", + "/usr/lib64/libespeak-ng.so.1", + "/usr/lib/libespeak-ng.so.1", + "/lib/x86_64-linux-gnu/libespeak-ng.so.1", + "/lib/aarch64-linux-gnu/libespeak-ng.so.1", + "/lib/libespeak-ng.so.1", + ] + elif sys_plat in ("win32", "cygwin"): + pf = os.environ.get("PROGRAMFILES", r"C:\\Program Files") + pf86 = os.environ.get("PROGRAMFILES(X86)", r"C:\\Program Files (x86)") + candidates = [ + os.path.join(pf, "eSpeak NG", "libespeak-ng.dll"), + os.path.join(pf86, "eSpeak NG", "libespeak-ng.dll"), + ] + for d in os.environ.get("PATH", "").split(os.pathsep): + if d: + candidates.append(os.path.join(d, "libespeak-ng.dll")) + # 3) ctypes resolution may return a soname; only accept absolute paths + lib = _ctypes_find_library("espeak-ng") or _ctypes_find_library("espeak") + if lib and os.path.isabs(lib) and os.path.exists(lib): + return lib + for c in candidates: + if c and os.path.exists(c): + return c + return None + + +def main() -> int: + ap = argparse.ArgumentParser(description="Install Kokoro (v1.0 ONNX) TTS assets and deps") + ap.add_argument("--model-path", default="models/kokoro/onnx/model.onnx", help="Destination path for ONNX model") + ap.add_argument("--voices-dir", default="models/kokoro/voices", help="Destination directory for voices") + ap.add_argument("--model-only", action="store_true", help="Only install model (skip voices)") + ap.add_argument("--voices-only", action="store_true", help="Only install voices (skip model)") + ap.add_argument("--force", action="store_true", help="Overwrite existing assets and force re-downloads") + args = ap.parse_args() + + if args.model_only and args.voices_only: + print("Choose only one of --model-only or --voices-only", file=sys.stderr) + return 2 + + if args.force: + os.environ['TLDW_SETUP_FORCE_DOWNLOADS'] = '1' + + model_path = Path(args.model_path) + voices_dir = Path(args.voices_dir) + return _run_install(model_path, voices_dir, args.model_only, args.voices_only) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/Helper_Scripts/TTS_Installers/install_tts_neutts.py b/Helper_Scripts/TTS_Installers/install_tts_neutts.py new file mode 100644 index 000000000..64eda7090 --- /dev/null +++ b/Helper_Scripts/TTS_Installers/install_tts_neutts.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +""" +Install NeuTTS Air dependencies and optionally prefetch model assets. + +This will: +- pip install required packages: neucodec, librosa, phonemizer, transformers, torch +- optional: install llama-cpp-python (for GGUF streaming) and onnxruntime +- optional: prefetch HF repos (backbone + codec) into local cache + +Usage: + python Helper_Scripts/TTS_Installers/install_tts_neutts.py [--prefetch] [--force] \ + [--backbone neuphonic/neutts-air||] \ + [--codec neuphonic/neucodec|neuphonic/distill-neucodec|neuphonic/neucodec-onnx-decoder] + +Environment flags: +- TLDW_SETUP_SKIP_PIP=1 # skip pip installs +- TLDW_SETUP_SKIP_DOWNLOADS=1 # skip HF downloads +- TLDW_SETUP_FORCE_DOWNLOADS=1 # force re-downloads (or pass --force) +""" +from __future__ import annotations + +import argparse +import os +import subprocess +import sys + + +DEFAULT_BACKBONE = "neuphonic/neutts-air" +DEFAULT_CODEC = "neuphonic/neucodec" + + +def pip_install(pkgs: list[str]) -> None: + if _skip_pip(): + raise RuntimeError("pip installs are disabled via TLDW_SETUP_SKIP_PIP") + cmd = [sys.executable, "-m", "pip", "install", "-U"] + pkgs + idx = os.getenv('TLDW_SETUP_PIP_INDEX_URL') + if idx: + cmd.extend(['--index-url', idx]) + print("+", " ".join(cmd)) + subprocess.check_call(cmd) + + +def _skip_pip() -> bool: + flag = os.getenv("TLDW_SETUP_SKIP_PIP") + return bool(flag and flag.strip().lower() in {"1", "true", "yes", "y", "on"}) + + +def _skip_downloads() -> bool: + flag = os.getenv("TLDW_SETUP_SKIP_DOWNLOADS") + return bool(flag and flag.strip().lower() in {"1", "true", "yes", "y", "on"}) + + +def _force_downloads() -> bool: + flag = os.getenv("TLDW_SETUP_FORCE_DOWNLOADS") + return bool(flag and flag.strip().lower() not in {"0", "false", "no", "off"}) + + +def prefetch(backbone: str, codec: str) -> None: + if _skip_downloads(): + print("[neutts] Skipping downloads: TLDW_SETUP_SKIP_DOWNLOADS=1") + return + try: + from huggingface_hub import snapshot_download + except Exception as e: + # Try to install huggingface_hub (unless installs are disabled) + if _skip_pip(): + print("[neutts] Cannot auto-install huggingface_hub due to TLDW_SETUP_SKIP_PIP=1; skipping downloads.") + return + print("Installing huggingface_hub to enable downloads...") + pip_install(["huggingface_hub>=0.23.0"]) + from huggingface_hub import snapshot_download # type: ignore + + def snap(repo: str) -> None: + if os.path.isdir(repo): + print(f"[neutts] Local path provided, skipping download: {repo}") + return + print(f"[neutts] Prefetching {repo} ...") + # Prefetch into HF cache; no local_dir needed and no symlink flag + snapshot_download(repo_id=repo, force_download=_force_downloads()) + + snap(backbone) + if codec: + snap(codec) + + +def main() -> int: + ap = argparse.ArgumentParser(description="Install NeuTTS Air dependencies and optionally prefetch models") + ap.add_argument("--prefetch", action="store_true", help="download backbone/codec to local HF cache") + ap.add_argument("--backbone", default=DEFAULT_BACKBONE, help="HF repo id or local path for backbone") + ap.add_argument("--codec", default=DEFAULT_CODEC, help="HF repo id for codec (or onnx decoder)") + ap.add_argument("--with-gguf", action="store_true", help="also install llama-cpp-python for GGUF streaming") + ap.add_argument("--with-onnx", action="store_true", help="also install onnxruntime for ONNX decoder codec") + ap.add_argument("--force", action="store_true", help="force re-downloads where applicable") + args = ap.parse_args() + + # Core deps + try: + pip_install([ + "torch>=2.2.0", + "phonemizer>=3.2.1", + "librosa>=0.10.0", + "transformers>=4.41.0", + "neucodec>=0.0.4", + ]) + except Exception as e: + print(f"ERROR installing NeuTTS deps: {e}", file=sys.stderr) + return 1 + + # Optional extras + opt_pkgs: list[str] = [] + if args.with_gguf: + opt_pkgs.append("llama-cpp-python>=0.2.90") + if args.with_onnx: + opt_pkgs.append("onnxruntime>=1.16.0") + if opt_pkgs: + try: + pip_install(opt_pkgs) + except Exception as e: + print(f"WARNING: Optional NeuTTS extras failed to install: {e}") + + if args.force: + os.environ['TLDW_SETUP_FORCE_DOWNLOADS'] = '1' + + if args.prefetch: + try: + prefetch(args.backbone, args.codec) + except Exception as e: + print(f"WARNING: Prefetch failed: {e}") + + print("NeuTTS install completed.") + print("- Configure in tts_providers_config.yaml under providers.neutts") + print("- For streaming, use a GGUF backbone and run with --with-gguf") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/Helper_Scripts/TTS_Installers/install_tts_vibevoice.py b/Helper_Scripts/TTS_Installers/install_tts_vibevoice.py new file mode 100644 index 000000000..1dc8e4c35 --- /dev/null +++ b/Helper_Scripts/TTS_Installers/install_tts_vibevoice.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +""" +Install VibeVoice TTS assets and dependencies. + +By default, installs deps and snapshots the 1.5B variant: + microsoft/VibeVoice-1.5B + +Usage: + python Helper_Scripts/TTS_Installers/install_tts_vibevoice.py [--variant {1.5B,7B,7B-Q8}] [--force] + +Environment flags: +- TLDW_SETUP_SKIP_PIP=1 # skip pip installs +- TLDW_SETUP_SKIP_DOWNLOADS=1 # skip model downloads +- TLDW_SETUP_FORCE_DOWNLOADS=1 # force re-downloads (or pass --force) +""" +from __future__ import annotations + +import argparse +import os +import sys + + +def main() -> int: + ap = argparse.ArgumentParser(description="Install VibeVoice TTS assets and deps") + ap.add_argument("--variant", choices=["1.5B", "7B", "7B-Q8"], default="1.5B") + ap.add_argument("--force", action="store_true", help="force re-downloads where applicable") + args = ap.parse_args() + + try: + from tldw_Server_API.app.core.Setup import install_manager as im + from tldw_Server_API.app.core.Setup.install_schema import InstallPlan, TTSInstall + except Exception as e: + print("ERROR: Unable to import internal installer utilities:", e, file=sys.stderr) + print("Run from the repo root and ensure 'pip install -e .' has been run.", file=sys.stderr) + return 2 + + if args.force: + os.environ['TLDW_SETUP_FORCE_DOWNLOADS'] = '1' + + errors: list[str] = [] + plan = InstallPlan(tts=[TTSInstall(engine="vibevoice", variants=[args.variant])]) + status = im.InstallationStatus(plan) + + try: + im._install_backend_dependencies("tts", "vibevoice", status, errors) + except im.PipInstallBlockedError as e: # type: ignore[attr-defined] + print(f"[vibevoice] Skipped pip installs: {e}") + except Exception as e: + print(f"ERROR installing VibeVoice dependencies: {e}", file=sys.stderr) + errors.append(str(e)) + + try: + im._install_vibevoice([args.variant]) + except im.DownloadBlockedError as e: # type: ignore[attr-defined] + print(f"[vibevoice] Skipped model downloads: {e}") + except Exception as e: + print(f"ERROR downloading VibeVoice assets: {e}", file=sys.stderr) + errors.append(str(e)) + + if errors: + status.fail("; ".join(errors)) + return 1 + status.complete() + print(f"VibeVoice install completed. Variant: {args.variant}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/Helper_Scripts/benchmarks/README.md b/Helper_Scripts/benchmarks/README.md new file mode 100644 index 000000000..97df01cfa --- /dev/null +++ b/Helper_Scripts/benchmarks/README.md @@ -0,0 +1,125 @@ +LLM Gateway Benchmark Scripts + +Overview +- `llm_gateway_bench.py` is a minimal async load generator for the Chat API (`/api/v1/chat/completions`). +- It sweeps concurrency levels and reports latency percentiles, error rates, and streaming TTFT. + +Recommended Server Settings (for safe local benchmarking) +- Quick start (recommended): + + make server-up-dev HOST=127.0.0.1 PORT=8000 API_KEY=dev-key-123 + + This starts uvicorn with: + - `AUTH_MODE=single_user` + - `SINGLE_USER_API_KEY=$API_KEY` + - `DEFAULT_LLM_PROVIDER=openai` + - `CHAT_FORCE_MOCK=1` (no upstream calls) + - `STREAMS_UNIFIED=1` (enables SSE metrics) + +- Manual alternative: + + AUTH_MODE=single_user \ + SINGLE_USER_API_KEY=dev-key-123 \ + CHAT_FORCE_MOCK=1 \ + DEFAULT_LLM_PROVIDER=openai \ + STREAMS_UNIFIED=1 \ + python -m uvicorn tldw_Server_API.app.main:app --host 127.0.0.1 --port 8000 --reload + + Notes: + - `CHAT_FORCE_MOCK=1` avoids hitting real upstream providers; responses are mocked and fast. + - In multi-user mode, supply a Bearer token instead of `X-API-KEY`. + +Examples +- Non-streaming, 1/2/4/8 concurrency for 20s each: + + python Helper_Scripts/benchmarks/llm_gateway_bench.py \ + --base-url http://127.0.0.1:8000 \ + --path /api/v1/chat/completions \ + --api-key "$SINGLE_USER_API_KEY" \ + --concurrency 1 2 4 8 \ + --duration 20 + +- Streaming with concurrency=16 for 30s: + + python Helper_Scripts/benchmarks/llm_gateway_bench.py \ + --stream \ + --concurrency 16 \ + --duration 30 \ + --api-key "$SINGLE_USER_API_KEY" + +- Stop on error-rate > 5% or p99 > 5s: + + python Helper_Scripts/benchmarks/llm_gateway_bench.py \ + --concurrency 1 2 4 8 16 32 \ + --duration 20 \ + --max-error-rate 0.05 \ + --latency-p99-sla-ms 5000 + +What It Measures +- Per step: total, successes/failures, RPS, p50/p90/p95/p99 (ms) +- If `--stream`: TTFT (p50/p95) in ms +- Optional server-side metrics deltas from `/metrics` (Prometheus): + - `http_requests_total{endpoint="/api/v1/chat/completions",status="..."}` by status + - Use `--metrics-url` to point to a different metrics endpoint + +Tips +- Increase `--prompt-bytes` to simulate larger inputs. +- Use `--provider openai --model gpt-4o-mini` with `CHAT_FORCE_MOCK=1` for consistent, fast results. +- Optionally export to JSON with `--out results.json`. + +Locust (Open-Loop RPS) +- File: `Helper_Scripts/benchmarks/locustfile.py` +- Closed-loop (headless): + + locust -f Helper_Scripts/benchmarks/locustfile.py --host http://127.0.0.1:8000 \ + --headless -u 50 -r 10 -t 2m + +- Approximate open-loop RPS plan via env: + + TLDW_RPS_PLAN="10:30,20:30,40:60,20:30,10:30" \ + TLDW_TASKS_PER_USER_PER_SEC=1 \ + locust -f Helper_Scripts/benchmarks/locustfile.py --host http://127.0.0.1:8000 --headless -t 3m + +- Optional env vars: + - `TLDW_BENCH_PATH` (default `/api/v1/chat/completions`) + - `TLDW_BENCH_PROVIDER` (default `openai`) + - `TLDW_BENCH_MODEL` (default `gpt-4o-mini`) + - `TLDW_BENCH_STREAM` (`1|true|yes|on` to enable streaming) + - `TLDW_BENCH_PROMPT_BYTES` (default 256) + - `SINGLE_USER_API_KEY` or `TLDW_BENCH_BEARER_TOKEN` + - `TLDW_TASKS_PER_USER_PER_SEC` (default 1; used with RPS plan) + +Notes +- Streaming in Locust: total request time includes consuming the stream; a synthetic TTFT metric is emitted as `request_type=TTFT`, `name=chat:stream_ttft`. + +Monitoring Stack (Prometheus + Grafana) +- Compose files: `Dockerfiles/Monitoring/` +- Start stack: + + docker compose -f Dockerfiles/Monitoring/docker-compose.monitoring.yml up -d + +- Prometheus scrapes `host.docker.internal:8000/metrics` by default (adjust `Dockerfiles/Monitoring/prometheus.yml`). +- Grafana at http://localhost:3000 (admin/admin). The `LLM Gateway` dashboard is auto-provisioned from `Docs/Monitoring/Grafana_Dashboards/`. +- To enable SSE panels (enqueue→yield), set on the server: `STREAMS_UNIFIED=1`. + - Linux note: if `host-gateway` is unsupported, change the Prometheus target to your host IP (e.g., `172.17.0.1:8000`). + +One‑Command Full Run +- Start monitoring + run both sweeps (non-stream and stream) and print links: + + make bench-full BASE_URL=http://127.0.0.1:8000 API_KEY=$SINGLE_USER_API_KEY \ + FULL_CONCURRENCY="1 2 4 8" FULL_STREAM_CONCURRENCY="4 8 16" FULL_DURATION=20 + + Results are saved to `.benchmarks/bench_nonstream.json` and `.benchmarks/bench_stream.json`. Open Grafana at: + - http://localhost:3000/d/tldw-llm-gateway + - Login: admin / admin + - Tip: ensure the server runs with `STREAMS_UNIFIED=1` for SSE metrics. + +Make Targets (summary) +- `server-up-dev` — run uvicorn in mock mode with SSE metrics enabled +- `monitoring-up` — start Prometheus (9090) + Grafana (3000) +- `monitoring-down` — stop monitoring stack +- `monitoring-logs` — tail monitoring logs +- `bench-sweep` — non-stream concurrency sweep (writes `.benchmarks/bench_nonstream.json`) +- `bench-stream` — streaming sweep (writes `.benchmarks/bench_stream.json`) +- `bench-rps` — Locust RPS plan (open-loop approx) +- `bench-full` — monitoring-up + both sweeps + helpful links diff --git a/Helper_Scripts/benchmarks/llm_gateway_bench.py b/Helper_Scripts/benchmarks/llm_gateway_bench.py new file mode 100644 index 000000000..1599ad177 --- /dev/null +++ b/Helper_Scripts/benchmarks/llm_gateway_bench.py @@ -0,0 +1,478 @@ +#!/usr/bin/env python3 +""" +llm_gateway_bench.py + +Purpose: +- Benchmark the tldw_server Chat API (/api/v1/chat/completions) for throughput and latency. +- Sweep concurrency, measure p50/p90/p95/p99 latency, error rate, and basic streaming timings (TTFT). +- Avoids external provider cost/limits when server runs with CHAT_FORCE_MOCK=1 (recommended). + +Usage (examples): + + # Non-streaming, concurrency sweep 1,2,4,8 for 20s each (single-user API key) + python Helper_Scripts/benchmarks/llm_gateway_bench.py \ + --base-url http://127.0.0.1:8000 \ + --path /api/v1/chat/completions \ + --api-key "$SINGLE_USER_API_KEY" \ + --concurrency 1 2 4 8 \ + --duration 20 + + # Streaming benchmark with bearer token (multi-user) and fixed overlap = 16 + python Helper_Scripts/benchmarks/llm_gateway_bench.py \ + --stream \ + --concurrency 16 \ + --duration 30 \ + --bearer "$JWT_TOKEN" + + # Ramp until error-rate > 5% or p99 > 5s + python Helper_Scripts/benchmarks/llm_gateway_bench.py \ + --concurrency 1 2 4 8 16 32 \ + --duration 20 \ + --max-error-rate 0.05 \ + --latency-p99-sla-ms 5000 + +Notes: +- To avoid hitting real providers, run the server with: CHAT_FORCE_MOCK=1 (and optionally TEST_MODE=1). +- Provider/model can be set via args. Defaults aim for mock OpenAI-compatible flow. +""" + +from __future__ import annotations + +import argparse +import contextlib +import asyncio +import json +import os +import random +import statistics +import sys +import time +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, Tuple + +import httpx + + +def _now_ms() -> float: + return time.perf_counter() * 1000.0 + + +def _percentile(values: List[float], pct: float) -> float: + if not values: + return 0.0 + pct = max(0.0, min(100.0, pct)) + idx = int(round((pct / 100.0) * (len(values) - 1))) + return sorted(values)[idx] + + +@dataclass +class RequestResult: + ok: bool + status: int + latency_ms: float + ttft_ms: Optional[float] = None # time to first token (for streaming) + error: Optional[str] = None + + +@dataclass +class StepMetrics: + concurrency: int + total: int + successes: int + failures: int + rps: float + p50_ms: float + p90_ms: float + p95_ms: float + p99_ms: float + ttft_p50_ms: Optional[float] = None + ttft_p95_ms: Optional[float] = None + error_rate: float = field(init=False) + + def __post_init__(self) -> None: + self.error_rate = (self.failures / max(1, self.total)) if self.total else 0.0 + + +def build_payload( + *, + provider: str, + model: str, + stream: bool, + prompt_bytes: int, +) -> Dict[str, Any]: + # Create a simple prompt of desired size (approximate bytes) + base = "Please summarize the following text." # ~36 bytes + if prompt_bytes > 0: + filler_len = max(0, prompt_bytes - len(base)) + filler = (" Lorem ipsum dolor sit amet." * ((filler_len // 28) + 1))[:filler_len] + text = base + filler + else: + text = base + + messages = [ + {"role": "user", "content": text}, + ] + return { + "api_provider": provider, + "model": model, + "messages": messages, + "stream": stream, + # Keep the rest minimal; add knobs later if needed + } + + +async def send_nonstream_request( + client: httpx.AsyncClient, + url: str, + headers: Dict[str, str], + payload: Dict[str, Any], + timeout_s: float, +) -> RequestResult: + t0 = _now_ms() + try: + r = await client.post(url, headers=headers, json=payload, timeout=timeout_s) + latency_ms = _now_ms() - t0 + ok = r.status_code < 500 and r.status_code != 429 + return RequestResult(ok=ok, status=r.status_code, latency_ms=latency_ms, error=None if ok else r.text[:200]) + except Exception as e: + latency_ms = _now_ms() - t0 + return RequestResult(ok=False, status=0, latency_ms=latency_ms, error=str(e)) + + +async def send_stream_request( + client: httpx.AsyncClient, + url: str, + headers: Dict[str, str], + payload: Dict[str, Any], + timeout_s: float, +) -> RequestResult: + t0 = _now_ms() + ttft_ms: Optional[float] = None + try: + # Ensure SSE accept header for consistency + stream_headers = dict(headers) + stream_headers.setdefault("Accept", "text/event-stream") + async with client.stream("POST", url, headers=stream_headers, json=payload, timeout=timeout_s) as r: + # HTTP status known at this point + status = r.status_code + # Iterate SSE lines; record time to first non-empty data line + async for line in r.aiter_lines(): + if not line: + continue + if ttft_ms is None: + ttft_ms = _now_ms() - t0 + # Detect provider done signal + stripped = line.strip().lower() + if stripped == "data: [done]" or stripped == "[done]": + break + latency_ms = _now_ms() - t0 + ok = status < 500 and status != 429 + return RequestResult(ok=ok, status=status, latency_ms=latency_ms, ttft_ms=ttft_ms) + except Exception as e: + latency_ms = _now_ms() - t0 + return RequestResult(ok=False, status=0, latency_ms=latency_ms, ttft_ms=ttft_ms, error=str(e)) + + +def _parse_prometheus_text(text: str) -> Dict[Tuple[str, Tuple[Tuple[str, str], ...]], float]: + """Parse a minimal subset of Prometheus text format into a dict. + + Returns mapping: (metric_name, sorted(label_items_tuple)) -> value + Only parses simple series lines like: name{l1="v1",l2="v2"} value + """ + series: Dict[Tuple[str, Tuple[Tuple[str, str], ...]], float] = {} + for line in text.splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + try: + if "{" in line and "}" in line: + name, rest = line.split("{", 1) + labels_str, value_str = rest.split("}") + value_str = value_str.strip() + # Some histogram lines have suffixes like _sum, _count + metric_name = name.strip() + labels: Dict[str, str] = {} + if labels_str: + parts = [p for p in labels_str.split(",") if p] + for p in parts: + if "=" not in p: + continue + k, v = p.split("=", 1) + labels[k.strip()] = v.strip().strip('"') + key = (metric_name, tuple(sorted(labels.items()))) + series[key] = float(value_str) + else: + # name value + name, value_str = line.split() + key = (name.strip(), tuple()) + series[key] = float(value_str) + except Exception: + # skip malformed lines + continue + return series + + +async def _scrape_metrics_once(client: httpx.AsyncClient, metrics_url: str) -> Dict[Tuple[str, Tuple[Tuple[str, str], ...]], float]: + try: + r = await client.get(metrics_url, timeout=10.0) + if r.status_code != 200: + return {} + return _parse_prometheus_text(r.text) + except Exception: + return {} + + +async def run_step( + *, + base_url: str, + path: str, + headers: Dict[str, str], + provider: str, + model: str, + concurrency: int, + duration_s: int, + stream: bool, + prompt_bytes: int, + timeout_s: float, + metrics_url: Optional[str] = None, + metrics_endpoint_path: str = "/api/v1/chat/completions", + metrics_interval_s: float = 2.0, +) -> Tuple[StepMetrics, List[RequestResult], Dict[str, Any]]: + url = base_url.rstrip("/") + path + client = httpx.AsyncClient(base_url=None, limits=httpx.Limits(max_keepalive_connections=concurrency, max_connections=concurrency * 2)) + stop_at = time.monotonic() + duration_s + results: List[RequestResult] = [] + results_lock = asyncio.Lock() + + payload = build_payload(provider=provider, model=model, stream=stream, prompt_bytes=prompt_bytes) + + async def worker(idx: int) -> None: + nonlocal results + # Stagger start slightly to avoid bursty first second + await asyncio.sleep((idx % concurrency) * 0.001) + while time.monotonic() < stop_at: + if stream: + res = await send_stream_request(client, url, headers, payload, timeout_s) + else: + res = await send_nonstream_request(client, url, headers, payload, timeout_s) + async with results_lock: + results.append(res) + + # Optional metrics scraping loop + metrics_client = httpx.AsyncClient() + pre_metrics = {} + post_metrics = {} + series_deltas: Dict[Tuple[str, Tuple[Tuple[str, str], ...]], float] = {} + + if metrics_url: + pre_metrics = await _scrape_metrics_once(metrics_client, metrics_url) + + async def _poll_metrics(): + # background polling to keep /metrics hot; final delta is taken after run + while time.monotonic() < stop_at: + await asyncio.sleep(max(0.1, metrics_interval_s)) + try: + await _scrape_metrics_once(metrics_client, metrics_url) + except Exception: + pass + + poll_task = asyncio.create_task(_poll_metrics()) + else: + poll_task = None + + tasks = [asyncio.create_task(worker(i)) for i in range(concurrency)] + await asyncio.gather(*tasks, return_exceptions=True) + await client.aclose() + if poll_task: + poll_task.cancel() + with contextlib.suppress(Exception): + await poll_task + if metrics_url: + post_metrics = await _scrape_metrics_once(metrics_client, metrics_url) + await metrics_client.aclose() + # Compute deltas for http_requests_total by endpoint + status + for (mname, labels), val in post_metrics.items(): + if mname != "http_requests_total": + continue + label_dict = dict(labels) + if label_dict.get("endpoint") != metrics_endpoint_path: + continue + pre_val = pre_metrics.get((mname, labels), 0.0) + delta = max(0.0, val - pre_val) + series_deltas[(mname, labels)] = delta + + # Aggregate + total = len(results) + successes = sum(1 for r in results if r.ok) + failures = total - successes + if total == 0: + return StepMetrics(concurrency=concurrency, total=0, successes=0, failures=0, rps=0.0, p50_ms=0.0, p90_ms=0.0, p95_ms=0.0, p99_ms=0.0), results + + # Approx RPS = total / duration + rps = total / max(0.001, duration_s) + latencies = [r.latency_ms for r in results] + p50 = _percentile(latencies, 50) + p90 = _percentile(latencies, 90) + p95 = _percentile(latencies, 95) + p99 = _percentile(latencies, 99) + + ttfts = [r.ttft_ms for r in results if r.ttft_ms is not None] + ttft_p50 = _percentile(ttfts, 50) if ttfts else None + ttft_p95 = _percentile(ttfts, 95) if ttfts else None + + metrics = StepMetrics( + concurrency=concurrency, + total=total, + successes=successes, + failures=failures, + rps=rps, + p50_ms=p50, + p90_ms=p90, + p95_ms=p95, + p99_ms=p99, + ttft_p50_ms=ttft_p50, + ttft_p95_ms=ttft_p95, + ) + server_metrics = {} + if series_deltas: + # Summaries by status + by_status: Dict[str, float] = {} + total_server = 0.0 + for (_m, labels), d in series_deltas.items(): + status = dict(labels).get("status", "unknown") + by_status[status] = by_status.get(status, 0.0) + d + total_server += d + server_metrics = { + "http_requests_total_deltas": { + "by_status": by_status, + "total": total_server, + } + } + return metrics, results, server_metrics + + +def parse_args(argv: Optional[List[str]] = None) -> argparse.Namespace: + p = argparse.ArgumentParser(description="Benchmark tldw_server LLM gateway (/chat/completions)") + p.add_argument("--base-url", default=os.getenv("TLDW_BASE_URL", "http://127.0.0.1:8000"), help="Server base URL, e.g. http://127.0.0.1:8000") + p.add_argument("--path", default="/api/v1/chat/completions", help="Endpoint path") + p.add_argument("--api-key", default=os.getenv("SINGLE_USER_API_KEY"), help="Single-user API key (sent as X-API-KEY)") + p.add_argument("--bearer", default=os.getenv("TLDW_BENCH_BEARER_TOKEN"), help="Bearer token for multi-user mode (Authorization: Bearer ...)") + p.add_argument("--provider", default=os.getenv("TLDW_BENCH_PROVIDER", "openai"), help="api_provider to send (e.g. openai, local-llm)") + p.add_argument("--model", default=os.getenv("TLDW_BENCH_MODEL", "gpt-4o-mini"), help="model to send (OpenAI-compatible)") + p.add_argument("--stream", action="store_true", help="Use streaming mode (SSE)") + p.add_argument("--concurrency", nargs="+", type=int, default=[1, 2, 4, 8], help="Concurrency levels to test") + p.add_argument("--duration", type=int, default=20, help="Duration per step, seconds") + p.add_argument("--prompt-bytes", type=int, default=256, help="Approximate size of the user message (bytes)") + p.add_argument("--timeout", type=float, default=60.0, help="Per-request timeout (seconds)") + p.add_argument("--latency-p99-sla-ms", type=float, default=5000.0, help="Stop if p99 exceeds this (ms)") + p.add_argument("--max-error-rate", type=float, default=0.10, help="Stop if error rate exceeds this (0-1)") + p.add_argument("--out", default=None, help="Write JSON results to this file") + p.add_argument("--metrics-url", default=None, help="Optional Prometheus metrics URL (e.g., http://127.0.0.1:8000/metrics)") + p.add_argument("--metrics-interval", type=float, default=2.0, help="Metrics poll interval during a step (seconds)") + p.add_argument("--metrics-endpoint-path", default="/api/v1/chat/completions", help="Endpoint label to filter in http_requests_total") + return p.parse_args(argv) + + +def build_auth_headers(api_key: Optional[str], bearer: Optional[str]) -> Dict[str, str]: + headers: Dict[str, str] = {"Content-Type": "application/json"} + if bearer: + headers["Authorization"] = f"Bearer {bearer}" + elif api_key: + headers["X-API-KEY"] = api_key + return headers + + +async def main_async(args: argparse.Namespace) -> int: + headers = build_auth_headers(args.api_key, args.bearer) + all_results: List[Dict[str, Any]] = [] + print("Benchmarking", flush=True) + print(f" Base URL: {args.base_url}") + print(f" Path : {args.path}") + print(f" Provider: {args.provider}") + print(f" Model : {args.model}") + print(f" Stream : {args.stream}") + print(f" Duration: {args.duration}s per step") + print(f" PromptB : {args.prompt_bytes} bytes") + print(f" Cnc List: {args.concurrency}\n") + + for c in args.concurrency: + metrics, results, server_metrics = await run_step( + base_url=args.base_url, + path=args.path, + headers=headers, + provider=args.provider, + model=args.model, + concurrency=c, + duration_s=args.duration, + stream=args.stream, + prompt_bytes=args.prompt_bytes, + timeout_s=args.timeout, + metrics_url=(args.metrics_url or (args.base_url.rstrip("/") + "/metrics")), + metrics_endpoint_path=args.metrics_endpoint_path, + metrics_interval_s=args.metrics_interval, + ) + all_results.append({ + "concurrency": metrics.concurrency, + "total": metrics.total, + "successes": metrics.successes, + "failures": metrics.failures, + "rps": metrics.rps, + "p50_ms": metrics.p50_ms, + "p90_ms": metrics.p90_ms, + "p95_ms": metrics.p95_ms, + "p99_ms": metrics.p99_ms, + "ttft_p50_ms": metrics.ttft_p50_ms, + "ttft_p95_ms": metrics.ttft_p95_ms, + "error_rate": metrics.error_rate, + "server_metrics": server_metrics, + }) + + print(f"Concurrency {c} => total={metrics.total} ok={metrics.successes} err={metrics.failures} rps={metrics.rps:.1f}") + print(f" p50={metrics.p50_ms:.0f}ms p90={metrics.p90_ms:.0f}ms p95={metrics.p95_ms:.0f}ms p99={metrics.p99_ms:.0f}ms err={metrics.error_rate*100:.1f}%") + if args.stream and metrics.ttft_p50_ms is not None: + print(f" ttft_p50={metrics.ttft_p50_ms:.0f}ms ttft_p95={metrics.ttft_p95_ms:.0f}ms") + if all_results[-1].get("server_metrics"): + by_status = all_results[-1]["server_metrics"].get("http_requests_total_deltas", {}).get("by_status", {}) + if by_status: + summary = ", ".join(f"{k}={int(v)}" for k, v in sorted(by_status.items())) + print(f" server http_requests_total (delta): {summary}") + + # Stop criteria + if metrics.error_rate > args.max_error_rate: + print(f"Stopping: error rate {metrics.error_rate:.2f} > {args.max_error_rate}") + break + if metrics.p99_ms > args.latency_p99_sla_ms: + print(f"Stopping: p99 {metrics.p99_ms:.0f}ms > {args.latency_p99_sla_ms:.0f}ms") + break + + if args.out: + try: + with open(args.out, "w", encoding="utf-8") as f: + json.dump({ + "base_url": args.base_url, + "path": args.path, + "provider": args.provider, + "model": args.model, + "stream": args.stream, + "duration": args.duration, + "prompt_bytes": args.prompt_bytes, + "steps": all_results, + "generated_at": time.time(), + }, f, indent=2) + print(f"Saved results to {args.out}") + except Exception as e: + print(f"Failed to save results: {e}") + + return 0 + + +def main(argv: Optional[List[str]] = None) -> int: + args = parse_args(argv) + try: + return asyncio.run(main_async(args)) + except KeyboardInterrupt: + return 130 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/Helper_Scripts/benchmarks/locustfile.py b/Helper_Scripts/benchmarks/locustfile.py new file mode 100644 index 000000000..9c5d83b28 --- /dev/null +++ b/Helper_Scripts/benchmarks/locustfile.py @@ -0,0 +1,172 @@ +""" +Locust load test for tldw_server Chat API (/api/v1/chat/completions) + +Supports closed-loop and an approximate open-loop RPS plan via LoadTestShape. + +Environment variables (override defaults): + - HOST : e.g., http://127.0.0.1:8000 (use --host CLI too) + - TLDW_BENCH_PATH : default "/api/v1/chat/completions" + - TLDW_BENCH_PROVIDER : default "openai" + - TLDW_BENCH_MODEL : default "gpt-4o-mini" + - TLDW_BENCH_STREAM : "1|true|yes|on" to enable streaming + - TLDW_BENCH_PROMPT_BYTES : integer payload size for user message (default 256) + - SINGLE_USER_API_KEY : for single-user mode (sent as X-API-KEY) + - TLDW_BENCH_BEARER_TOKEN : for multi-user mode (Authorization: Bearer ...) + - TLDW_TASKS_PER_USER_PER_SEC : default 1 (used with RPS plan) + - TLDW_RPS_PLAN : comma list of "rps:seconds", e.g. "10:30,20:30,40:60,20:30,10:30" + +Run (headless examples): + locust -f Helper_Scripts/benchmarks/locustfile.py \ + --host http://127.0.0.1:8000 --headless -u 50 -r 10 -t 2m + + # RPS plan (approximate open-loop): 10 rps for 30s, 20 rps for 30s, 40 rps for 60s, 20 rps for 30s, 10 rps for 30s + TLDW_RPS_PLAN="10:30,20:30,40:60,20:30,10:30" \ + TLDW_TASKS_PER_USER_PER_SEC=1 \ + locust -f Helper_Scripts/benchmarks/locustfile.py --host http://127.0.0.1:8000 --headless -t 3m +""" + +from __future__ import annotations + +import math +import os +import time +from typing import Any, Dict, Tuple + +from locust import HttpUser, task, between, constant_pacing, events, LoadTestShape + + +BASE_PATH = os.getenv("TLDW_BENCH_PATH", "/api/v1/chat/completions") +PROVIDER = os.getenv("TLDW_BENCH_PROVIDER", "openai") +MODEL = os.getenv("TLDW_BENCH_MODEL", "gpt-4o-mini") +STREAM = os.getenv("TLDW_BENCH_STREAM", "0").strip().lower() in {"1", "true", "yes", "on"} +PROMPT_BYTES = int(os.getenv("TLDW_BENCH_PROMPT_BYTES", "256") or 256) +TASKS_PER_USER_PER_SEC = float(os.getenv("TLDW_TASKS_PER_USER_PER_SEC", "1") or 1) + +API_KEY = os.getenv("SINGLE_USER_API_KEY") +BEARER = os.getenv("TLDW_BENCH_BEARER_TOKEN") + + +def build_headers() -> Dict[str, str]: + headers = {"Content-Type": "application/json"} + if BEARER: + headers["Authorization"] = f"Bearer {BEARER}" + elif API_KEY: + headers["X-API-KEY"] = API_KEY + return headers + + +def build_payload(prompt_bytes: int = PROMPT_BYTES) -> Dict[str, Any]: + base = "Please summarize the following text." + filler_len = max(0, prompt_bytes - len(base)) + filler = (" Lorem ipsum dolor sit amet." * ((filler_len // 28) + 1))[:filler_len] + content = base + filler + return { + "api_provider": PROVIDER, + "model": MODEL, + "stream": STREAM, + "messages": [{"role": "user", "content": content}], + } + + +class ChatUser(HttpUser): + # Constant pacing for predictability; combined with user count gives approximate RPS + wait_time = constant_pacing(1.0 / max(0.0001, TASKS_PER_USER_PER_SEC)) + + @task + def chat(self): + headers = build_headers() + payload = build_payload() + + if not STREAM: + # Regular non-stream request; Locust captures timing automatically + self.client.post(BASE_PATH, headers=headers, json=payload, name="chat:nonstream") + return + + # Streaming: measure TTFT and total time + start = time.perf_counter() + ttft_ms = None + try: + with self.client.post( + BASE_PATH, + headers=headers, + json=payload, + stream=True, + name="chat:stream", + catch_response=True, + ) as resp: + # Iterate SSE lines; first non-empty line marks TTFT + for line in resp.iter_lines(decode_unicode=True): + if not line: + continue + if ttft_ms is None: + ttft_ms = (time.perf_counter() - start) * 1000.0 + # Stop when provider DONE seen + s = str(line).strip().lower() + if s == "data: [done]" or s == "[done]": + break + # Mark success + resp.success() + except Exception as e: + # Emit a failed request event + events.request.fire( + request_type="STREAM", + name="chat:stream", + response_time=(time.perf_counter() - start) * 1000.0, + response_length=0, + exception=e, + context={}, + ) + return + + # Emit a synthetic TTFT metric (as separate request type for visibility) + if ttft_ms is not None: + events.request.fire( + request_type="TTFT", + name="chat:stream_ttft", + response_time=ttft_ms, + response_length=0, + exception=None, + context={}, + ) + + +def _parse_rps_plan(plan: str) -> Tuple[Tuple[float, int], ...]: + steps = [] + for part in plan.split(","): + if not part: + continue + if ":" not in part: + continue + rps_s, dur_s = part.split(":", 1) + try: + rps = float(rps_s) + dur = int(dur_s) + steps.append((rps, dur)) + except Exception: + continue + return tuple(steps) + + +class RPSShape(LoadTestShape): + """Approximate target RPS by adjusting user count over time. + + - Define plan via TLDW_RPS_PLAN="rps:seconds,..." + - Effective RPS ~= users * TASKS_PER_USER_PER_SEC + """ + + plan = _parse_rps_plan(os.getenv("TLDW_RPS_PLAN", "")) + start_time = time.time() + + def tick(self): # type: ignore[override] + if not self.plan: + return None + elapsed = time.time() - self.start_time + t = 0.0 + for rps, dur in self.plan: + if elapsed < t + dur: + # compute desired users to approximate this RPS + users = int(math.ceil(rps / max(0.0001, TASKS_PER_USER_PER_SEC))) + spawn_rate = max(1, users) # spawn quickly to target + return (users, spawn_rate) + t += dur + return None diff --git a/Helper_Scripts/download_embedding_models.py b/Helper_Scripts/download_embedding_models.py index b625b589b..9aa59dbec 100644 --- a/Helper_Scripts/download_embedding_models.py +++ b/Helper_Scripts/download_embedding_models.py @@ -67,7 +67,6 @@ def download_models( "repo_id": model_id, "revision": revision, "local_dir": local_dir, - "local_dir_use_symlinks": False, } if allow: kwargs["allow_patterns"] = allow diff --git a/Helper_Scripts/download_kokoro_assets.py b/Helper_Scripts/download_kokoro_assets.py index dbd75cedb..e51d9556e 100644 --- a/Helper_Scripts/download_kokoro_assets.py +++ b/Helper_Scripts/download_kokoro_assets.py @@ -1,54 +1,165 @@ #!/usr/bin/env python3 import argparse import os +import shutil import sys +from pathlib import Path from urllib.request import urlopen +from urllib.error import URLError, HTTPError """ -Download Kokoro ONNX model and voices.json. -Usage: +Kokoro asset downloader (updated for v1.0 ONNX). + +Recommended: use the one‑command installer instead: + python Helper_Scripts/TTS_Installers/install_tts_kokoro.py + +This helper supports two modes: +1) Legacy direct URLs (v0.19 layout; downloads a single voices.json) +2) Hugging Face repo snapshot (v1.0 layout; downloads onnx/model.onnx + voices/ dir) + +Usage (v1.0 recommended): + python Helper_Scripts/download_kokoro_assets.py \ + --repo-id onnx-community/Kokoro-82M-v1.0-ONNX-timestamped \ + --model-path models/kokoro/onnx/model.onnx \ + --voices-dir models/kokoro/voices + +Legacy (v0.19): python Helper_Scripts/download_kokoro_assets.py \ --onnx-url --voices-url \ - --model-path tldw_Server_API/app/core/TTS/models/kokoro-v0_19.onnx \ - --voices-json tldw_Server_API/app/core/TTS/models/voices.json + --model-path models/kokoro/kokoro-v0_19.onnx \ + --voices-json models/kokoro/voices.json """ -def download(url: str, dest: str, force: bool = False) -> None: - os.makedirs(os.path.dirname(dest), exist_ok=True) - if os.path.exists(dest) and not force: + +def _download_url(url: str, dest: Path, force: bool = False) -> None: + dest.parent.mkdir(parents=True, exist_ok=True) + if dest.exists() and not force: print(f"Skip existing: {dest}") return print(f"Downloading {url} -> {dest}") - with urlopen(url) as r, open(dest, 'wb') as f: - while True: - chunk = r.read(8192) - if not chunk: - break - f.write(chunk) + try: + with urlopen(url, timeout=60) as r, open(dest, "wb") as f: + while True: + chunk = r.read(8192) + if not chunk: + break + f.write(chunk) + except (HTTPError, URLError) as e: + print(f"ERROR downloading {url}: {e}", file=sys.stderr) + raise print(f"Saved: {dest}") -def main(): - p = argparse.ArgumentParser(description="Download Kokoro ONNX model and voices.json") - p.add_argument('--onnx-url', required=False) - p.add_argument('--voices-url', required=False) - p.add_argument('--model-path', required=True) - p.add_argument('--voices-json', required=True) - p.add_argument('--force', action='store_true') + +def _hf_download_file(repo_id: str, filename: str, dest: Path, force: bool = False) -> None: + try: + from huggingface_hub import hf_hub_download + except Exception as e: + raise RuntimeError("huggingface_hub is required for repo downloads. pip install huggingface-hub") from e + dest.parent.mkdir(parents=True, exist_ok=True) + if dest.exists() and not force: + print(f"Skip existing: {dest}") + return + print(f"Fetching {repo_id}:{filename} -> {dest}") + # Download into HF cache, then copy to exact destination path + src_fp = hf_hub_download(repo_id=repo_id, filename=filename, force_download=force) + shutil.copy2(src_fp, dest) + + +def _hf_download_dir(repo_id: str, subdir: str, dest: Path, force: bool = False) -> None: + try: + from huggingface_hub import snapshot_download + except Exception as e: + raise RuntimeError("huggingface_hub is required for repo downloads. pip install huggingface-hub") from e + print(f"Fetching directory {repo_id}:{subdir} -> {dest}") + # Skip if present and not forcing + if dest.exists() and any(dest.iterdir()) and not force: + print(f"Skip existing dir: {dest}") + return + # Download snapshot into a temporary folder, then copy the requested subdir + import tempfile + with tempfile.TemporaryDirectory(prefix="kokoro_hf_") as _td: + tmp_dir = Path(_td) + # Restrict snapshot to the requested subdirectory only to avoid downloading large ONNX files + snap = Path(snapshot_download( + repo_id=repo_id, + local_dir=str(tmp_dir), + allow_patterns=[f"{subdir}", f"{subdir}/*", f"{subdir}/**"], + force_download=force, + )) + src = snap / subdir + if not src.exists(): + raise FileNotFoundError(f"Subdirectory '{subdir}' not found in snapshot of {repo_id}") + # Prepare destination directory + if dest.exists() and force: + if dest.is_dir(): + shutil.rmtree(dest) + else: + dest.unlink() + dest.parent.mkdir(parents=True, exist_ok=True) + # Copy directory contents while tempdir is alive + shutil.copytree(src, dest, dirs_exist_ok=True) + + +def main() -> int: + p = argparse.ArgumentParser(description="Download Kokoro assets (v1.0 ONNX or legacy v0.19)") + # New (v1.0) options + p.add_argument("--repo-id", default="onnx-community/Kokoro-82M-v1.0-ONNX-timestamped", help="HF repo id to pull from") + p.add_argument("--model-relpath", default="onnx/model.onnx", help="Relative model path within repo (v1.0)") + p.add_argument("--model-path", default="models/kokoro/onnx/model.onnx", help="Destination model path") + p.add_argument("--voices-subdir", default="voices", help="Voices subdirectory within repo (v1.0)") + p.add_argument("--voices-dir", default="models/kokoro/voices", help="Destination voices directory (v1.0)") + p.add_argument("--model-only", action="store_true", help="Only fetch the model (skip voices dir)") + p.add_argument("--voices-only", action="store_true", help="Only fetch the voices dir (skip model)") + # Legacy options + p.add_argument("--onnx-url", required=False, help="Direct URL to ONNX file (legacy)") + p.add_argument("--voices-url", required=False, help="Direct URL to voices.json (legacy)") + p.add_argument("--voices-json", required=False, help="Destination file for voices.json (legacy)") + p.add_argument("--force", action="store_true") args = p.parse_args() - if not args.onnx_url and not os.path.exists(args.model_path): - print("--onnx-url is required if model file does not exist", file=sys.stderr) - sys.exit(2) - if not args.voices_url and not os.path.exists(args.voices_json): - print("--voices-url is required if voices.json does not exist", file=sys.stderr) - sys.exit(2) + # Prevent conflicting flags + if args.model_only and args.voices_only: + print("Choose only one of --model-only or --voices-only", file=sys.stderr) + return 2 + + # Legacy URL mode when any legacy flag is provided + legacy_mode = bool(args.onnx_url or args.voices_url or args.voices_json) + if legacy_mode: + print("[DEPRECATION] v0.19 URL mode detected: consider using the v1.0 repo mode or the installer.") + try: + if args.onnx_url: + _download_url(args.onnx_url, Path(args.model_path), force=args.force) + if args.voices_url: + if not args.voices_json: + print("--voices-json is required to save voices.json in legacy mode", file=sys.stderr) + return 2 + _download_url(args.voices_url, Path(args.voices_json), force=args.force) + except Exception as e: + print(f"ERROR: legacy download failed: {e}", file=sys.stderr) + return 1 + else: + print("Done (legacy mode).") + return 0 + + # v1.0 ONNX repo mode + model_path = Path(args.model_path) + voices_dir = Path(args.voices_dir) + repo_id = str(args.repo_id) + + try: + if not args.voices_only: + _hf_download_file(repo_id, args.model_relpath, model_path, force=args.force) + if not args.model_only: + _hf_download_dir(repo_id, args.voices_subdir, voices_dir, force=args.force) + except Exception as e: + print(f"ERROR: failed to download from repo: {e}", file=sys.stderr) + return 1 - if args.onnx_url: - download(args.onnx_url, args.model_path, force=args.force) - if args.voices_url: - download(args.voices_url, args.voices_json, force=args.force) + print("Done (v1.0 repo mode).") + print(f" Model : {model_path}") + print(f" Voices: {voices_dir}") + return 0 - print("Done.") -if __name__ == '__main__': - main() +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/Helper_Scripts/install_chatterbox_deps.py b/Helper_Scripts/install_chatterbox_deps.py index d0a8c0c68..6bf96efb9 100644 --- a/Helper_Scripts/install_chatterbox_deps.py +++ b/Helper_Scripts/install_chatterbox_deps.py @@ -13,6 +13,7 @@ - If you're using a virtualenv, ensure it is activated first. """ import argparse +import os import subprocess import sys @@ -51,11 +52,21 @@ def main(): ap.add_argument("--with-lang", action="store_true", help="install optional multilingual extras") args = ap.parse_args() + # Environment-controlled flags + if os.getenv("TLDW_SETUP_SKIP_PIP"): + print("[chatterbox] Skipping pip installs: TLDW_SETUP_SKIP_PIP=1") + return + + cmd = [sys.executable, "-m", "pip", "install", "-U"] + idx = os.getenv('TLDW_SETUP_PIP_INDEX_URL') + if idx: + cmd += ["--index-url", idx] + # install core deps first - run([sys.executable, "-m", "pip", "install", "-U"] + CORE) + run(cmd + CORE) if args.with_lang: - run([sys.executable, "-m", "pip", "install", "-U"] + LANG) + run(cmd + LANG) print("\nChatterbox dependencies installed successfully.") print("If you will use GPU, ensure the right torch build for your CUDA/ROCm.") diff --git a/Helper_Scripts/launch_postgres.sh b/Helper_Scripts/launch_postgres.sh new file mode 100644 index 000000000..9f39a3a99 --- /dev/null +++ b/Helper_Scripts/launch_postgres.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Simple launcher/provisioner for a local Postgres instance via Docker. +# - Reuses an existing container if present; starts it if stopped; creates it if missing. +# - Waits for readiness and ensures the expected databases exist. +# - Prints convenient DSNs to export for the app/tests. +# +# Defaults can be overridden via env vars: +# PG_CONTAINER (default: tldw_postgres_dev) +# PG_IMAGE (default: postgres:18) +# PG_PORT (default: 55432) +# PG_USER (default: tldw_user) +# PG_PASSWORD (default: TestPassword123!) +# PG_DB_PRIMARY (default: tldw_content) # Jobs/outbox default +# PG_DB_AUTHNZ (default: tldw_users) # AuthNZ default +# +# Example: +# PG_PORT=55432 PG_USER=tldw_user PG_PASSWORD=TestPassword123! ./Helper_Scripts/launch_postgres.sh + +PG_CONTAINER=${PG_CONTAINER:-tldw_postgres_dev} +PG_IMAGE=${PG_IMAGE:-postgres:18} +PG_PORT=${PG_PORT:-55432} +PG_USER=${PG_USER:-tldw_user} +PG_PASSWORD=${PG_PASSWORD:-TestPassword123!} +PG_DB_PRIMARY=${PG_DB_PRIMARY:-tldw_content} +PG_DB_AUTHNZ=${PG_DB_AUTHNZ:-tldw_users} + +command -v docker >/dev/null 2>&1 || { + echo "Error: docker is required but not found in PATH" >&2 + exit 1 +} + +container_exists() { + docker ps -a --format '{{.Names}}' | grep -qx "${PG_CONTAINER}" +} + +container_running() { + docker ps --format '{{.Names}}' | grep -qx "${PG_CONTAINER}" +} + +start_container() { + if container_exists; then + if container_running; then + echo "Postgres container '${PG_CONTAINER}' already running on port ${PG_PORT}." + return 0 + fi + echo "Starting existing Postgres container '${PG_CONTAINER}'..." + docker start "${PG_CONTAINER}" >/dev/null + else + echo "Creating Postgres container '${PG_CONTAINER}' (image=${PG_IMAGE}) on port ${PG_PORT}..." + docker run -d --name "${PG_CONTAINER}" \ + -e POSTGRES_USER="${PG_USER}" \ + -e POSTGRES_PASSWORD="${PG_PASSWORD}" \ + -e POSTGRES_DB="${PG_DB_PRIMARY}" \ + -p "${PG_PORT}:5432" \ + "${PG_IMAGE}" >/dev/null + fi +} + +wait_for_ready() { + echo "Waiting for Postgres to become ready..." + for i in {1..60}; do + if docker exec "${PG_CONTAINER}" pg_isready -U "${PG_USER}" >/dev/null 2>&1; then + echo "Postgres is ready." + return 0 + fi + sleep 1 + done + echo "Error: Postgres did not become ready in time" >&2 + exit 1 +} + +ensure_database() { + local db_name="$1" + # Check if DB exists; if not, create it as the current user (owner will be PG_USER) + if docker exec -e PGPASSWORD="${PG_PASSWORD}" "${PG_CONTAINER}" \ + psql -U "${PG_USER}" -d postgres -tAc "SELECT 1 FROM pg_database WHERE datname='${db_name}'" | grep -q 1; then + echo "Database '${db_name}' already exists." + else + echo "Creating database '${db_name}'..." + docker exec -e PGPASSWORD="${PG_PASSWORD}" "${PG_CONTAINER}" \ + psql -U "${PG_USER}" -d postgres -v ON_ERROR_STOP=1 -c "CREATE DATABASE \"${db_name}\";" >/dev/null + echo "Database '${db_name}' created." + fi +} + +print_dsn_help() { + local host="127.0.0.1" + local jobs_dsn="postgresql://${PG_USER}:${PG_PASSWORD}@${host}:${PG_PORT}/${PG_DB_PRIMARY}" + local authnz_dsn="postgresql://${PG_USER}:${PG_PASSWORD}@${host}:${PG_PORT}/${PG_DB_AUTHNZ}" + cat < None: + ap = argparse.ArgumentParser(description="PCM streaming client example") + ap.add_argument("--base", default="http://127.0.0.1:8000") + ap.add_argument("--token", default=None) + ap.add_argument("--text", default="Hello from TLDW") + ap.add_argument("--outfile", default="out.pcm") + ap.add_argument("--rate", type=int, default=24000, help="Sample rate") + ap.add_argument("--channels", type=int, default=1, help="Channels") + args = ap.parse_args() + + try: + import httpx + except Exception: + print("Please `pip install httpx`", file=sys.stderr) + sys.exit(2) + + url = f"{args.base.rstrip('/')}/api/v1/audio/speech" + headers = {"Accept": "application/octet-stream", "Content-Type": "application/json"} + if args.token: + headers["Authorization"] = f"Bearer {args.token}" + headers["X-Request-Id"] = str(uuid.uuid4()) + + payload = { + "model": "tts-1", + "input": args.text, + "voice": "alloy", + "response_format": "pcm", + "stream": True, + } + + with httpx.stream("POST", url, headers=headers, json=payload, timeout=60.0) as r: + r.raise_for_status() + print(f"Streaming PCM → {args.outfile} (rate={args.rate}, channels={args.channels})") + with open(args.outfile, "wb") as fout: + # Optional realtime playback + try: + import sounddevice as sd + import numpy as np + use_playback = True + except Exception: + use_playback = False + + for chunk in r.iter_bytes(): + if not chunk: + continue + fout.write(chunk) + if use_playback: + arr = np.frombuffer(chunk, dtype=np.int16) + sd.play(arr, samplerate=args.rate, blocking=False) + + if "sd" in locals(): + try: + sd.stop() + except Exception: + pass + + print("Done.") + + +if __name__ == "__main__": + main() diff --git a/Helper_Scripts/voice_latency_harness/examples/ws_tts_client.py b/Helper_Scripts/voice_latency_harness/examples/ws_tts_client.py new file mode 100644 index 000000000..f9f5b272f --- /dev/null +++ b/Helper_Scripts/voice_latency_harness/examples/ws_tts_client.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +""" +WebSocket TTS client example (for optional `/api/v1/audio/stream/tts`). + +Sends a prompt frame and writes received PCM16 frames to a file. +""" +from __future__ import annotations + +import argparse +import asyncio +import json +import sys +import uuid + + +async def run(base: str, token: str | None, text: str, outfile: str) -> None: + try: + import websockets # type: ignore + except Exception: + print("Please `pip install websockets`", file=sys.stderr) + sys.exit(2) + + url = base.rstrip("/") + "/api/v1/audio/stream/tts" + headers = {} + if token: + headers["Authorization"] = f"Bearer {token}" + headers["X-Request-Id"] = str(uuid.uuid4()) + + async with websockets.connect(url, extra_headers=headers, max_size=None) as ws: + # Send prompt frame + await ws.send(json.dumps({"type": "prompt", "text": text, "format": "pcm"})) + print(f"Receiving PCM → {outfile}") + with open(outfile, "wb") as f: + try: + while True: + msg = await ws.recv() + if isinstance(msg, (bytes, bytearray)): + f.write(msg) + else: + try: + data = json.loads(msg) + if data.get("type") == "error": + print(f"Server error: {data.get('message')}") + break + except Exception: + # Ignore non-JSON text + pass + except (websockets.ConnectionClosedOK, websockets.ConnectionClosedError): + pass + + +def main() -> None: + ap = argparse.ArgumentParser(description="WS TTS client example") + ap.add_argument("--base", default="ws://127.0.0.1:8000") + ap.add_argument("--token", default=None) + ap.add_argument("--text", default="Hello from TLDW") + ap.add_argument("--outfile", default="out_ws_tts.pcm") + args = ap.parse_args() + asyncio.run(run(args.base, args.token, args.text, args.outfile)) + + +if __name__ == "__main__": + main() diff --git a/Helper_Scripts/voice_latency_harness/harness.py b/Helper_Scripts/voice_latency_harness/harness.py new file mode 100644 index 000000000..6d29b3044 --- /dev/null +++ b/Helper_Scripts/voice_latency_harness/harness.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +""" +Minimal voice latency harness stub. + +Currently measures TTS time-to-first-byte (TTFB) for the REST endpoint +`/api/v1/audio/speech` with `response_format=pcm` using streaming. + +Extend with WS STT commit/final timing once VAD/commit is in place to compute +`stt_final_latency_seconds` and end-to-end `voice_to_voice_seconds`. +""" +from __future__ import annotations + +import argparse +import json +import sys +import time +import uuid +from typing import Dict, Any, List + + +def _now() -> float: + return time.time() + + +def _p50(values: List[float]) -> float: + if not values: + return 0.0 + s = sorted(values) + mid = (len(s) - 1) * 0.5 + i = int(mid) + if i == mid: + return s[i] + return (s[i] + s[i + 1]) / 2 + + +def _p90(values: List[float]) -> float: + if not values: + return 0.0 + s = sorted(values) + k = max(0, int(round(0.9 * (len(s) - 1)))) + return s[k] + + +def measure_tts_ttfb(base: str, token: str | None, text: str, runs: int = 5) -> Dict[str, Any]: + try: + import httpx # type: ignore + except Exception: + print("Please `pip install httpx` to run the harness.", file=sys.stderr) + sys.exit(2) + + url = f"{base.rstrip('/')}/api/v1/audio/speech" + headers = {"Accept": "application/octet-stream", "Content-Type": "application/json"} + if token: + headers["Authorization"] = f"Bearer {token}" + + ttfb_runs: List[float] = [] + per_run: List[Dict[str, Any]] = [] + + for i in range(max(1, runs)): + req_id = str(uuid.uuid4()) + headers["X-Request-Id"] = req_id + payload = { + "model": "tts-1", + "input": text, + "voice": "alloy", + "response_format": "pcm", + "stream": True, + } + start = _now() + first = None + total_bytes = 0 + try: + with httpx.stream("POST", url, headers=headers, json=payload, timeout=60.0) as r: + r.raise_for_status() + for chunk in r.iter_bytes(): + if not chunk: + continue + total_bytes += len(chunk) + if first is None: + first = _now() + ttfb = max(0.0, first - start) + ttfb_runs.append(ttfb) + # Continue consuming to validate stream is healthy + except (httpx.HTTPError, httpx.RequestError) as e: + per_run.append({"run": i + 1, "ok": False, "error": str(e)}) + continue + per_run.append({"run": i + 1, "ok": True, "ttfb_s": ttfb_runs[-1] if ttfb_runs else None, "bytes": total_bytes, "request_id": req_id}) + + summary = { + "mode": "tts", + "runs": len(per_run), + "p50_ttfb_s": round(_p50(ttfb_runs), 4) if ttfb_runs else None, + "p90_ttfb_s": round(_p90(ttfb_runs), 4) if ttfb_runs else None, + "per_run": per_run, + } + return summary + + +def main() -> None: + ap = argparse.ArgumentParser(description="Voice Latency Harness (stub)") + ap.add_argument("--mode", choices=["tts"], default="tts", help="Measurement mode") + ap.add_argument("--base", default="http://127.0.0.1:8000", help="Server base URL") + ap.add_argument("--token", default=None, help="Auth token (Bearer)") + ap.add_argument("--text", default="Hello from TLDW", help="TTS input text") + ap.add_argument("--runs", type=int, default=5, help="Number of runs") + args = ap.parse_args() + + if args.mode == "tts": + result = measure_tts_ttfb(args.base, args.token, args.text, args.runs) + print(json.dumps(result, indent=2)) + return + + print("Unsupported mode", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/Makefile b/Makefile index 4e69db4eb..235394349 100644 --- a/Makefile +++ b/Makefile @@ -14,3 +14,113 @@ pg-restore: @echo "[pg-restore] Restoring from $(PG_DUMP_FILE)" @python Helper_Scripts/pg_backup_restore.py restore --dump-file "$(PG_DUMP_FILE)" +# ----------------------------------------------------------------------------- +# Monitoring stack (Prometheus + Grafana) +# ----------------------------------------------------------------------------- +.PHONY: monitoring-up monitoring-down monitoring-logs + +MON_STACK := Dockerfiles/Monitoring/docker-compose.monitoring.yml + +monitoring-up: + @echo "[monitoring] Starting Prometheus + Grafana" + docker compose -f $(MON_STACK) up -d + @echo "[monitoring] Grafana: http://localhost:3000 (admin/admin). Prometheus: http://localhost:9090" + +monitoring-down: + @echo "[monitoring] Stopping Prometheus + Grafana" + docker compose -f $(MON_STACK) down -v + +monitoring-logs: + docker compose -f $(MON_STACK) logs -f + +# ----------------------------------------------------------------------------- +# Dev Server (mock mode) +# ----------------------------------------------------------------------------- +.PHONY: server-up-dev + +# Defaults (override on command line) +HOST ?= 127.0.0.1 +PORT ?= 8000 +API_KEY ?= REPLACE-THIS-WITH-A-SECURE-API-KEY-123 + +server-up-dev: + @echo "[server] Starting uvicorn in mock mode on $(HOST):$(PORT)" + AUTH_MODE=single_user \ + SINGLE_USER_API_KEY="$(API_KEY)" \ + DEFAULT_LLM_PROVIDER=openai \ + CHAT_FORCE_MOCK=1 \ + STREAMS_UNIFIED=1 \ + uvicorn tldw_Server_API.app.main:app --host $(HOST) --port $(PORT) --reload + +# ----------------------------------------------------------------------------- +# Benchmarks (LLM Gateway) +# ----------------------------------------------------------------------------- +.PHONY: bench-sweep bench-stream bench-rps + +# Defaults (override on command line) +BASE_URL ?= http://127.0.0.1:8000 +API_KEY ?= $(SINGLE_USER_API_KEY) +CONCURRENCY ?= 1 2 4 8 +DURATION ?= 20 +PROMPT_BYTES ?= 256 +OUTDIR ?= .benchmarks + +bench-sweep: + @mkdir -p $(OUTDIR) + @echo "[bench] Non-stream sweep: $(CONCURRENCY) for $(DURATION)s (prompt $(PROMPT_BYTES)B)" + python Helper_Scripts/benchmarks/llm_gateway_bench.py \ + --base-url $(BASE_URL) \ + --path /api/v1/chat/completions \ + --api-key "$(API_KEY)" \ + --concurrency $(CONCURRENCY) \ + --duration $(DURATION) \ + --prompt-bytes $(PROMPT_BYTES) \ + --out $(OUTDIR)/bench_nonstream.json + +bench-stream: + @mkdir -p $(OUTDIR) + @echo "[bench] Streaming sweep: $(CONCURRENCY) for $(DURATION)s (prompt $(PROMPT_BYTES)B)" + python Helper_Scripts/benchmarks/llm_gateway_bench.py \ + --stream \ + --base-url $(BASE_URL) \ + --path /api/v1/chat/completions \ + --api-key "$(API_KEY)" \ + --concurrency $(CONCURRENCY) \ + --duration $(DURATION) \ + --prompt-bytes $(PROMPT_BYTES) \ + --out $(OUTDIR)/bench_stream.json + +# Approximate open-loop RPS plan via Locust +RPS_PLAN ?= 10:30,20:30,40:60,20:30,10:30 +TASKS_PER_USER_PER_SEC ?= 1 +LOCUST_T ?= 3m + +bench-rps: + @echo "[bench-rps] RPS plan: $(RPS_PLAN) (tasks/user/sec=$(TASKS_PER_USER_PER_SEC))" + TLDW_RPS_PLAN="$(RPS_PLAN)" \ + TLDW_TASKS_PER_USER_PER_SEC="$(TASKS_PER_USER_PER_SEC)" \ + SINGLE_USER_API_KEY="$(API_KEY)" \ + locust -f Helper_Scripts/benchmarks/locustfile.py --host $(BASE_URL) --headless -t $(LOCUST_T) + +# ----------------------------------------------------------------------------- +# Full run: bring up monitoring, run non-stream + stream sweeps, print links +# ----------------------------------------------------------------------------- +.PHONY: bench-full + +FULL_CONCURRENCY ?= 1 2 4 8 +FULL_STREAM_CONCURRENCY ?= 4 8 16 +FULL_DURATION ?= 20 + +bench-full: + @echo "[full] Starting monitoring stack (Prometheus + Grafana)" + $(MAKE) monitoring-up + @echo "[full] Running non-stream sweep: $(FULL_CONCURRENCY) for $(FULL_DURATION)s" + $(MAKE) bench-sweep CONCURRENCY="$(FULL_CONCURRENCY)" DURATION=$(FULL_DURATION) + @echo "[full] Running stream sweep: $(FULL_STREAM_CONCURRENCY) for $(FULL_DURATION)s" + $(MAKE) bench-stream CONCURRENCY="$(FULL_STREAM_CONCURRENCY)" DURATION=$(FULL_DURATION) + @echo "[full] Done. Results in .benchmarks/bench_nonstream.json and .benchmarks/bench_stream.json" + @echo "[full] Grafana: http://localhost:3000/d/tldw-llm-gateway (admin/admin)" + @echo "[full] Prometheus: http://localhost:9090" + @echo "[full] Tip: enable STREAMS_UNIFIED=1 on the server to populate SSE panels" + @echo "[full] Stopping monitoring stack" + $(MAKE) monitoring-down diff --git a/New-User-Guide.md b/New-User-Guide.md new file mode 100644 index 000000000..497d943d8 --- /dev/null +++ b/New-User-Guide.md @@ -0,0 +1,245 @@ +# WIP/NOT ACCURATE +# tldw_server New User Guide + +This guide walks a brand-new user through the shortest path to a working local deployment, a first media ingestion, and the most useful follow-up resources. It complements `README.md` by focusing on actionable steps rather than full feature listings. + +--- + +## 1. What You Get +- **API-first media assistant**: ingest video/audio/docs, run hybrid RAG, and expose OpenAI-compatible Chat, Audio, and Embeddings endpoints. +- **Bring your own models**: plug in 16+ commercial or local providers (OpenAI, Anthropic, vLLM, Ollama, etc.). +- **Knowledge tooling**: searchable notes, prompt studio, character chats, evaluations, Chatbooks import/export. +- **Deployment flexibility**: run everything locally with Python, Docker Compose, or pair the backend with the Next.js Web UI. + +--- + +## 2. Before You Start + +| Requirement | Notes | +|-------------|-------| +| **OS** | Linux, macOS, WSL2, or Windows with Python build tools | +| **Python** | 3.11+ (3.12/3.13 tested) | +| **System packages** | `ffmpeg`, `portaudio/pyaudio` (macOS) or `python3-pyaudio` (Linux) for audio capture | +| **Disk** | Plan for SQLite DBs under `Databases/` plus media storage | +| **GPU (optional)** | Enables faster STT/LLM backends; fallback CPU works | +| **Provider credentials** | Add OpenAI/Anthropic/etc. keys to `.env` or `Config_Files/config.txt` | + +> Tip: If you are on Windows without WSL2, install the Python build tools and `ffmpeg` manually, or use the Docker path below to avoid native dependencies. + +### 2.1 Install ffmpeg + audio capture libraries + +These packages let the server transcode media and access microphones. Install **before** running `pip install -e .`. + +| Platform | Commands | +|----------|----------| +| **macOS (Homebrew)** | `brew install ffmpeg portaudio`
`pip install pyaudio` | +| **Ubuntu/Debian** | `sudo apt update && sudo apt install ffmpeg portaudio19-dev python3-pyaudio` | +| **Fedora** | `sudo dnf install ffmpeg portaudio portaudio-devel python3-pyaudio` | +| **Windows** | `choco install ffmpeg` (or download binaries)
`pip install pipwin && pipwin install pyaudio` | +| **WSL2** | Use the Linux instructions inside WSL; Windows audio devices stay accessible through ALSA/Pulse. | + +> If `pip install pyaudio` fails, install the system `portaudio` dev headers first (Linux) or use `pipwin` (Windows) to pull a matching wheel. + +--- + +## 3. Fast Path: Local Python Install + +Follow these steps from the repository root (`tldw_server2/`): + +### 3.1 Create a virtual environment and install dependencies +```bash +python3 -m venv .venv +source .venv/bin/activate # Windows: .venv\Scripts\activate +pip install -e . +# Optional extras: +# pip install -e ".[dev]" # linting/tests +# pip install -e ".[multiplayer]" # Postgres + multi-user helpers +# pip install -e ".[otel]" # telemetry exporters +``` + +### 3.2 Configure auth + provider settings +Create `.env` (or extend if it already exists): +```bash +cat > .env <<'EOF' +AUTH_MODE=single_user +SINGLE_USER_API_KEY=CHANGE_ME_TO_SECURE_API_KEY +DATABASE_URL=sqlite:///./Databases/users.db +# Provider keys (examples) +# OPENAI_API_KEY=sk-... +# ANTHROPIC_API_KEY=... +EOF +``` +You can also keep large provider configs in `tldw_Server_API/Config_Files/config.txt`. + +### 3.3 Initialize AuthNZ and databases +```bash +python -m tldw_Server_API.app.core.AuthNZ.initialize +``` +This validates the environment, seeds the AuthNZ DB, and prints the API key for single-user mode if not set. + +### 3.4 Run the API +```bash +python -m uvicorn tldw_Server_API.app.main:app --reload +``` +- Docs/UI: http://127.0.0.1:8000/docs +- Legacy Web UI: http://127.0.0.1:8000/webui/ + +### 3.5 Smoke-test the API +Use your API key (`SINGLE_USER_API_KEY`) in the header: +```bash +curl -X POST "http://127.0.0.1:8000/api/v1/chat/completions" \ + -H "Content-Type: application/json" \ + -H "X-API-KEY: CHANGE_ME_TO_SECURE_API_KEY" \ + -d '{ + "model": "openai:gpt-4o-mini", + "messages": [{"role": "user", "content": "Say hello from tldw_server"}] + }' +``` +Replace `model` with anything configured in your provider list (see `/api/v1/llm/providers` for active entries). + +--- + +## 4. Runtime & Provider Configuration + +Once the server boots, you’ll likely tailor behaviour, credentials, and model lists. Two files drive most settings: + +### 4.1 `.env`: secrets, auth, and DB targets +- Location: `tldw_server2/.env` (same folder as `pyproject.toml`). +- Best place for **secrets**: API keys, DB passwords, Postgres URLs, JWT secrets. +- Common fields: + - `AUTH_MODE` = `single_user` (API key header) or `multi_user` (JWT/auth endpoints). + - `SINGLE_USER_API_KEY` or `JWT_SECRET_KEY`. + - `DATABASE_URL` (AuthNZ DB), `JOBS_DB_URL`, `TEST_DATABASE_URL`. + - Provider keys: `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GROQ_API_KEY`, etc. + - `STREAMS_UNIFIED`, `LOG_LEVEL`, and other boolean toggles documented in `Env_Vars.md`. +- After editing `.env`, restart the FastAPI server (env variables are read at startup). + +### 4.2 `config.txt`: user-facing defaults and feature flags +- Location: `tldw_Server_API/Config_Files/config.txt`. +- Back this file up or keep a copy in `.git/info/exclude` if you don’t want Git noise. +- Controls everything from file-size limits to chat rate limits. Key sections: + - `[Server]`: `disable_cors`, `allow_remote_webui_access`, and `webui_ip_allowlist` for restricting the legacy UI. + - `[Media-Processing]`: per-file-size caps/timeouts for video/audio/PDF ingestion. + - `[Chat-Module]`: streaming defaults, history depth, rate limits. + - `[Database]`: choose SQLite vs Postgres for content (`pg_*` fields). + - `[Chunking]`, `[RAG]`, `[Embeddings]`: tune context windows and vector backends. +- Use any editor, then restart the API (or run `python -m tldw_Server_API.app.core.AuthNZ.initialize` once to validate the config). + +### 4.3 Adding cloud LLM providers & keys +1. Drop the API key into `.env`, e.g. `ANTHROPIC_API_KEY=sk-ant-...`. +2. In `config.txt`, open the `[API]` section and set the defaults for that provider: + ```ini + [API] + anthropic_model = claude-sonnet-4.5 + anthropic_temperature = 0.6 + default_api = anthropic # optional: make it the default `/chat/completions` target + ``` +3. If the provider exposes a custom base URL, set it here as well (e.g. `qwen_api_base_url`). +4. Call `GET /api/v1/llm/providers` to confirm the provider is now listed. + +### 4.4 Pointing to self-hosted/local LLMs +Edit the `[Local-API]` section of `config.txt`. Each entry maps to a backend host: + +```ini +[Local-API] +ollama_api_IP = http://192.168.1.50:11434/v1/chat/completions +ollama_model = llama3:instruct +vllm_api_IP = http://localhost:8001/v1/chat/completions +vllm_model = my-hf-model-id +tabby_api_IP = http://127.0.0.1:5000/v1/chat/completions +``` + +- Use full URLs (protocol + host + port + path). For LAN hosts, whitelist their CIDRs via `[Server] webui_ip_allowlist`. +- Update temperature/top_p/max_tokens per provider if the backend expects different defaults. +- After editing, restart the API so the provider manager reloads the endpoints. + +### 4.5 Where to adjust user-facing behaviour +- **Rate limits**: `[Chat-Module] rate_limit_per_minute`, `[Character-Chat]` guards. +- **Storage paths**: `[Database] sqlite_path`, `backup_path`, and `chroma_db_path`. +- **Web access**: `[Server] allow_remote_webui_access=true` plus `webui_ip_allowlist=10.0.0.0/24`. +- **Setup UI**: `[Setup] allow_remote_setup_access=true` if you must run first-time setup remotely (only on trusted networks). + +--- + +## 5. Docker Compose Path (All Services) + +If you prefer containers (or are on Windows without build tools): +```bash +# Base stack (SQLite users DB + Redis + app) +docker compose -f Dockerfiles/docker-compose.yml up -d --build + +# Multi-user/Postgres mode +export AUTH_MODE=multi_user +export DATABASE_URL=postgresql://tldw_user:TestPassword123!@postgres:5432/tldw_users +docker compose -f Dockerfiles/docker-compose.yml \ + -f Dockerfiles/docker-compose.override.yml up -d --build +``` +After the containers are up, initialize AuthNZ inside the app container: +```bash +docker compose -f Dockerfiles/docker-compose.yml exec app \ + python -m tldw_Server_API.app.core.AuthNZ.initialize +``` +- Check logs: `docker compose -f Dockerfiles/docker-compose.yml logs -f app` +- Optional overlays: `docker-compose.dev.yml` (unified streaming), `docker-compose.pg.yml` (pgvector/pgbouncer), proxy variants. + +--- + +## 6. Connect the Next.js Web UI (Optional but Friendly) +The `tldw-frontend/` directory hosts the current Next.js client. +```bash +cd tldw-frontend +cp .env.local.example .env.local # set NEXT_PUBLIC_API_URL=http://127.0.0.1:8000 +echo "NEXT_PUBLIC_X_API_KEY=CHANGE_ME_TO_SECURE_API_KEY" >> .env.local +npm install +npm run dev -- -p 8080 +``` +Open http://localhost:8080 to use the UI. CORS defaults allow 8080, so matching the port avoids manual server tweaks. + +--- + +## 7. Process Your First Media File +Once the API is running: +1. Place a sample file under `Samples/` (the repo already includes several fixtures). +2. Use the media ingestion endpoint: +```bash +curl -X POST "http://127.0.0.1:8000/api/v1/media/process" \ + -H "X-API-KEY: CHANGE_ME_TO_SECURE_API_KEY" \ + -F "source_type=file" \ + -F "file=@Samples/sample_audio.mp3" \ + -F "title=Sample Audio" \ + -F "tags=demo,quickstart" +``` +3. Track progress via `/api/v1/media/status/{job_id}` (returned from the process call) or use `/api/v1/media/search` once ingestion finishes. + +--- + +## 8. Common Next Steps +- **Explore docs**: OpenAPI docs at `/docs`, plus deep dives in `Docs/` (RAG, AuthNZ, MCP, etc.). +- **List available providers**: `GET /api/v1/llm/providers` to confirm names/models you can target. +- **Run tests**: `python -m pytest -v` (add `-m "unit"` or `-m "integration"` as needed). +- **Switch to PostgreSQL**: set `DATABASE_URL` and leverage `tldw_Server_API/app/core/DB_Management/` migration helpers. +- **Enable unified streaming**: export `STREAMS_UNIFIED=1` or use the Docker dev overlay for SSE/WS pilots. + +--- + +## 9. Troubleshooting Cheat Sheet + +| Symptom | Likely Cause | Fix | +|---------|--------------|-----| +| `uvicorn` crashes on startup | Missing `.env` or invalid provider config | Re-run `AuthNZ.initialize`, inspect `.env` values | +| `ffmpeg`/audio errors | Binary not installed or not in `PATH` | Install `ffmpeg`, restart terminal | +| `X-API-KEY` rejected | Key mismatch or wrong auth mode | Verify `AUTH_MODE`, check env, inspect server logs | +| Media stuck in `processing` | Background workers blocked or DB locked | Check logs under `Databases/`, ensure only one writer, consider Postgres | +| Docker health fails | Compose overlay mismatch | Start with base compose file, then add overlays gradually | + +> Enable debug logging by setting `LOG_LEVEL=DEBUG` before launching the server if you need granular traces (Loguru handles formatting). + +--- + +## 10. Where to Learn More +- `README.md`: feature matrix, architecture diagrams, release notes. +- `Docs/`: AuthNZ, RAG, TTS/STT, MCP, deployment profiles. +- `Project_Guidelines.md`: development philosophy if you plan to contribute. +- GitHub Issues/Discussions: report bugs, request features, or ask setup questions. + +Happy building! Once you ingest your first file and run a chat completion, you have the full pipeline working—everything else (prompt studio, evaluations, MCP, browser extension) builds on the same foundation. diff --git a/README.md b/README.md index 6e0bf2cf1..edfcf44fb 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ - [Current Status](#current-status) - [What's New](#whats-new) - [Highlights](#highlights) -- [Feature Status Matrix](#feature-status-matrix) +- [Feature Status](#feature-status) - [Architecture & Repo Layout](#architecture--repo-layout) - [Architecture Diagram](#architecture-diagram) - [Quickstart](#quickstart) @@ -32,7 +32,8 @@ - [Frontend & UI](#frontend--ui) - [Documentation & Resources](#documentation--resources) - [Deployment](#deployment) -- [Samples (Quick Links)](#samples-quick-links) +- [Networking & Limits](#networking--limits) +- [Monitoring](#monitoring) - [Troubleshooting](#troubleshooting) - [Contributing & Support](#contributing--support) - [Developer Guides](#developer-guides) @@ -78,6 +79,26 @@ This is a major milestone release that transitions tldw from a Gradio-based appl See: `Docs/Published/RELEASE_NOTES.md` for detailed release notes. +--- + +### Migrating From Gradio Version (pre-0.1.0) +- Backup: + - `cp -a ./Databases ./Databases.backup` +- Update configuration: + - Copy provider keys to `.env`. + - For AuthNZ setup: `cp .env.authnz.template .env && python -m tldw_Server_API.app.core.AuthNZ.initialize` +- Database migration: + - Inspect: `python -m tldw_Server_API.app.core.DB_Management.migrate_db status` + - Migrate: `python -m tldw_Server_API.app.core.DB_Management.migrate_db migrate` + - Optional: `--db-path /path/to/Media_DB_v2.db` if not using defaults + - If migrating content to Postgres later, use the tools under `tldw_Server_API/app/core/DB_Management/` (e.g., migration_tools.py) +- API changes: + - Use FastAPI routes; see http://127.0.0.1:8000/docs. OpenAI-compatible endpoints are available (e.g., `/api/v1/chat/completions`). +- Frontend: + - Legacy: /webui + - Or integrate directly against the API; +--- + ## Highlights - Media ingestion & processing: video, audio, PDFs, EPUB, DOCX, HTML, Markdown, XML, MediaWiki dumps; metadata extraction; configurable chunking. @@ -88,150 +109,15 @@ See: `Docs/Published/RELEASE_NOTES.md` for detailed release notes. - Prompt Studio & evaluations: projects, prompt testing/optimization, unified evaluation APIs (G-Eval, RAG, batch metrics). - MCP Unified: production MCP with JWT/RBAC, tool execution, WebSockets, metrics, and health endpoints. -## Feature Status Matrix - -
Feature Status Matrix Here - -Legend -- Working: Stable and actively supported -- WIP: In active development; APIs or behavior may evolve -- Experimental: Available behind flags or with caveats; subject to change - -### Admin Reporting -- HTTP usage (daily): `GET /api/v1/admin/usage/daily` -- HTTP top users: `GET /api/v1/admin/usage/top` -- LLM usage log: `GET /api/v1/admin/llm-usage` -- LLM usage summary: `GET /api/v1/admin/llm-usage/summary` (group_by=`user|provider|model|operation|day`) -- LLM top spenders: `GET /api/v1/admin/llm-usage/top-spenders` -- LLM CSV export: `GET /api/v1/admin/llm-usage/export.csv` -- Grafana dashboard JSON (LLM cost + tokens): `Docs/Deployment/Monitoring/Grafana_LLM_Cost_Top_Providers.json` - - Grafana dashboard JSON (LLM Daily Spend): `Docs/Deployment/Monitoring/Grafana_LLM_Daily_Spend.json` -- Prometheus alert rules (daily spend thresholds): `Samples/Prometheus/alerts.yml` - - -### Media Ingestion - -| Capability | Status | Notes | Links | -|---|---|---|---| -| URLs/files: video, audio, PDFs, EPUB, DOCX, HTML, Markdown, XML, MediaWiki | Working | Unified ingestion + metadata | [docs](Docs/Code_Documentation/Ingestion_Media_Processing.md) · [code](tldw_Server_API/app/api/v1/endpoints/media.py) | -| yt-dlp downloads + ffmpeg | Working | 1000+ sites via yt-dlp | [code](tldw_Server_API/app/core/Ingestion_Media_Processing/Video/Video_DL_Ingestion_Lib.py) | -| Adaptive/multi-level chunking | Working | Configurable size/overlap | [docs](Docs/API-related/Chunking_Templates_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/chunking.py) | -| OCR on PDFs/images | Working | Tesseract baseline; optional dots.ocr/POINTS | [docs](Docs/API-related/OCR_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/ocr.py) | -| MediaWiki import | Working | Config via YAML | [docs](Docs/Code_Documentation/Ingestion_Pipeline_MediaWiki.md) · [config](tldw_Server_API/Config_Files/mediawiki_import_config.yaml) | -| Browser extension capture | WIP | Web capture extension | [docs](Docs/Product/Content_Collections_PRD.md) | - -### Audio (STT/TTS) - -| Capability | Status | Notes | Links | -|---|---|---|---| -| File-based transcription | Working | faster_whisper, NeMo, Qwen2Audio | [docs](Docs/API-related/Audio_Transcription_API.md) · [code](tldw_Server_API/app/api/v1/endpoints/audio.py) | -| Real-time WS transcription | Working | `WS /api/v1/audio/stream/transcribe` | [docs](Docs/API-related/Audio_Transcription_API.md) · [code](tldw_Server_API/app/api/v1/endpoints/audio.py) | -| Diarization + VAD | Working | Optional diarization, timestamps | [docs](Docs/Code_Documentation/Ingestion_Pipeline_Audio.md) · [code](tldw_Server_API/app/api/v1/endpoints/audio.py) | -| TTS (OpenAI-compatible) | Working | Streaming + non-streaming | [docs](tldw_Server_API/app/core/TTS/TTS-README.md) · [code](tldw_Server_API/app/api/v1/endpoints/audio.py) | -| Voice catalog + management | Working | `GET /api/v1/audio/voices/catalog` | [docs](tldw_Server_API/app/core/TTS/README.md) · [code](tldw_Server_API/app/api/v1/endpoints/audio.py) | -| Audio jobs queue | Working | Background audio processing | [docs](Docs/API-related/Audio_Jobs_API.md) · [code](tldw_Server_API/app/api/v1/endpoints/audio_jobs.py) | - -### RAG & Search - -| Capability | Status | Notes | Links | -|---|---|---|---| -| Full-text search (FTS5) | Working | Fast local search | [docs](Docs/API-related/RAG-API-Guide.md) · [code](tldw_Server_API/app/api/v1/endpoints/rag_unified.py) | -| Embeddings + ChromaDB | Working | OpenAI-compatible embeddings | [docs](Docs/API-related/Embeddings_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/embeddings_v5_production_enhanced.py) | -| Hybrid BM25 + vector + rerank | Working | Contextual retrieval | [docs](Docs/API-related/RAG-API-Guide.md) · [code](tldw_Server_API/app/api/v1/endpoints/rag_unified.py) | -| Vector Stores (OpenAI-compatible) | Working | Chroma/PG adapters | [docs](Docs/API-related/Vector_Stores_Admin_and_Query.md) · [code](tldw_Server_API/app/api/v1/endpoints/vector_stores_openai.py) | -| Media embeddings ingestion | Working | Create vectors from media | [code](tldw_Server_API/app/api/v1/endpoints/media_embeddings.py) | -| pgvector backend | Experimental | Optional backend | [code](tldw_Server_API/app/core/RAG/rag_service/vector_stores/) | - -### Chat & LLMs - -| Capability | Status | Notes | Links | -|---|---|---|---| -| Chat Completions (OpenAI) | Working | Streaming supported | [docs](Docs/API-related/Chat_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/chat.py) | -| Function calling / tools | Working | Tool schema validation | [docs](Docs/API-related/Chat_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/chat.py) | -| Provider integrations (16+) | Working | Commercial + local | [docs](Docs/API-related/Providers_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/llm_providers.py) | -| Local providers | Working | vLLM, llama.cpp, Ollama, etc. | [docs](tldw_Server_API/app/core/LLM_Calls/README.md) · [code](tldw_Server_API/app/core/LLM_Calls/) | -| Strict OpenAI compat filter | Working | Filter non-standard keys | [docs](tldw_Server_API/app/core/LLM_Calls/README.md) | -| Providers listing | Working | `GET /api/v1/llm/providers` | [docs](Docs/API-related/Providers_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/llm_providers.py) | -| Moderation endpoint | Working | Basic wrappers | [code](tldw_Server_API/app/api/v1/endpoints/moderation.py) | - -### Knowledge, Notes, Prompt Studio - -| Capability | Status | Notes | Links | -|---|---|---|---| -| Notes + tagging | Working | Notebook-style notes | [code](tldw_Server_API/app/api/v1/endpoints/notes.py) | -| Prompt library | Working | Import/export | [code](tldw_Server_API/app/api/v1/endpoints/prompts.py) | -| Prompt Studio: projects/prompts/tests | Working | Test cases + runs | [docs](Docs/API-related/Prompt_Studio_API.md) · [code](tldw_Server_API/app/api/v1/endpoints/prompt_studio_projects.py) | -| Prompt Studio: optimization + WS | Working | Live updates | [docs](Docs/API-related/Prompt_Studio_API.md) · [code](tldw_Server_API/app/api/v1/endpoints/prompt_studio_optimization.py) | -| Character cards & sessions | Working | SillyTavern-compatible | [docs](Docs/API-related/CHARACTER_CHAT_API_DOCUMENTATION.md) · [code](tldw_Server_API/app/api/v1/endpoints/characters_endpoint.py) | -| Chatbooks import/export | Working | Backup/export | [docs](Docs/API-related/Chatbook_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/chatbooks.py) | -| Flashcards | Working | Decks/cards, APKG export | [code](tldw_Server_API/app/api/v1/endpoints/flashcards.py) | -| Reading & highlights | Working | Reading items mgmt | [docs](Docs/Product/Content_Collections_PRD.md) · [code](tldw_Server_API/app/api/v1/endpoints/reading.py) | - -### Evaluations - -| Capability | Status | Notes | Links | -|---|---|---|---| -| G-Eval | Working | Unified eval API | [docs](Docs/API-related/Evaluations_API_Unified_Reference.md) · [code](tldw_Server_API/app/api/v1/endpoints/evaluations_unified.py) | -| RAG evaluation | Working | Pipeline presets + metrics | [docs](Docs/API-related/RAG-API-Guide.md) · [code](tldw_Server_API/app/api/v1/endpoints/evaluations_rag_pipeline.py) | -| OCR evaluation (JSON/PDF) | Working | Text + PDF flows | [docs](Docs/API-related/OCR_API_Documentation.md) · [code](tldw_Server_API/app/api/v1/endpoints/evaluations_unified.py) | -| Embeddings A/B tests | Working | Provider/model compare | [docs](Docs/API-related/Evaluations_API_Unified_Reference.md) · [code](tldw_Server_API/app/api/v1/endpoints/evaluations_embeddings_abtest.py) | -| Response quality & datasets | Working | Datasets CRUD + runs | [docs](Docs/API-related/Evaluations_API_Unified_Reference.md) · [code](tldw_Server_API/app/api/v1/endpoints/evaluations_unified.py) | - -### Research & Web Scraping - -| Capability | Status | Notes | Links | -|---|---|---|---| -| Web search (multi-provider) | Working | Google, DDG, Brave, Kagi, Tavily, Searx | [code](tldw_Server_API/app/api/v1/endpoints/research.py) | -| Aggregation/final answer | Working | Structured answer + evidence | [code](tldw_Server_API/app/api/v1/endpoints/research.py) | -| Academic paper search | Working | arXiv, BioRxiv/MedRxiv, PubMed/PMC, Semantic Scholar, OSF | [code](tldw_Server_API/app/api/v1/endpoints/paper_search.py) | -| Web scraping service | Working | Status, jobs, progress, cookies | [docs](Docs/Product/Content_Collections_PRD.md) · [code](tldw_Server_API/app/api/v1/endpoints/web_scraping.py) | - -### Connectors (External Sources) - -| Capability | Status | Notes | Links | -|---|---|---|---| -| Google Drive connector | Working | OAuth2, browse/import | [code](tldw_Server_API/app/api/v1/endpoints/connectors.py) | -| Notion connector | Working | OAuth2, nested blocks→Markdown | [code](tldw_Server_API/app/api/v1/endpoints/connectors.py) | -| Connector policy + quotas | Working | Org policy, job quotas | [docs](Docs/Product/Content_Collections_PRD.md) · [code](tldw_Server_API/app/api/v1/endpoints/connectors.py) | - -### MCP Unified - -| Capability | Status | Notes | Links | -|---|---|---|---| -| Tool execution APIs + WS | Working | Production MCP with JWT/RBAC | [docs](Docs/MCP/Unified/Developer_Guide.md) · [code](tldw_Server_API/app/api/v1/endpoints/mcp_unified_endpoint.py) | -| Catalog management | Working | Admin tool/permission catalogs | [docs](Docs/MCP/Unified/Modules.md) · [code](tldw_Server_API/app/api/v1/endpoints/mcp_catalogs_manage.py) | -| Status/metrics endpoints | Working | Health + metrics | [docs](Docs/MCP/Unified/System_Admin_Guide.md) · [code](tldw_Server_API/app/api/v1/endpoints/mcp_unified_endpoint.py) | - -### AuthNZ, Security, Admin/Ops - -| Capability | Status | Notes | Links | -|---|---|---|---| -| Single-user (X-API-KEY) | Working | Simple local deployments | [docs](Docs/API-related/AuthNZ-API-Guide.md) · [code](tldw_Server_API/app/api/v1/endpoints/auth.py) | -| Multi-user JWT + RBAC | Working | Users/roles/permissions | [docs](Docs/API-related/AuthNZ-API-Guide.md) · [code](tldw_Server_API/app/api/v1/endpoints/auth_enhanced.py) | -| API keys manager | Working | Create/rotate/audit | [docs](Docs/API-related/AuthNZ-API-Guide.md) · [code](tldw_Server_API/app/api/v1/endpoints/admin.py) | -| Egress + SSRF guards | Working | Centralized guards | [code](tldw_Server_API/app/api/v1/endpoints/web_scraping.py) | -| Audit logging & alerts | Working | Unified audit + alerts | [docs](Docs/API-related/Audit_Configuration.md) · [code](tldw_Server_API/app/api/v1/endpoints/admin.py) | -| Admin & Ops | Working | Users/orgs/teams, roles/perms, quotas, usage | [docs](Docs/API-related/Admin_Orgs_Teams.md) · [code](tldw_Server_API/app/api/v1/endpoints/admin.py) | -| Monitoring & metrics | Working | Prometheus text + JSON | [docs](Docs/Deployment/Monitoring/README.md) · [code](tldw_Server_API/app/api/v1/endpoints/metrics.py) | - -### Storage, Outputs, Watchlists, Workflows, UI - -| Capability | Status | Notes | Links | -|---|---|---|---| -| SQLite defaults | Working | Local dev/small deployments | [code](tldw_Server_API/app/core/DB_Management/) | -| PostgreSQL (AuthNZ, content) | Working | Postgres content mode | [docs](Docs/Published/Deployment/Postgres_Content_Mode.md) | -| Outputs: templates | Working | Markdown/HTML/MP3 via TTS | [code](tldw_Server_API/app/api/v1/endpoints/outputs_templates.py) | -| Outputs: artifacts | Working | Persist/list/soft-delete/purge | [code](tldw_Server_API/app/api/v1/endpoints/outputs.py) | -| Watchlists: sources/groups/tags | Working | CRUD + bulk import | [docs](Docs/Product/Watchlist_PRD.md) · [code](tldw_Server_API/app/api/v1/endpoints/watchlists.py) | -| Watchlists: jobs & runs | Working | Schedule, run, run details | [docs](Docs/Product/Watchlist_PRD.md) · [code](tldw_Server_API/app/api/v1/endpoints/watchlists.py) | -| Watchlists: templates & OPML | Working | Template store; OPML import/export | [docs](Docs/Product/Watchlist_PRD.md) · [code](tldw_Server_API/app/api/v1/endpoints/watchlists.py) | -| Watchlists: notifications | Experimental | Email/chatbook delivery | [docs](Docs/Product/Watchlist_PRD.md) | -| Workflows engine & scheduler | WIP | Defs CRUD, runs, scheduler | [docs](Docs/Product/Workflows_PRD.md) · [code](tldw_Server_API/app/api/v1/endpoints/workflows.py) | -| VLM backends listing | Experimental | `/api/v1/vlm/backends` | [code](tldw_Server_API/app/api/v1/endpoints/vlm.py) | -| Next.js WebUI | Working | Primary client | [code](tldw-frontend/) | -| Legacy WebUI (/webui) | Working | Feature-frozen legacy | [code](tldw_Server_API/WebUI/) | +## Feature Status -
+See the full Feature Status Matrix in `Docs/Published/Overview/Feature_Status.md`. + +## Networking & Limits + +- HTTP client and TLS/pinning configuration: `tldw_Server_API/Config_Files/README.md` (timeouts, retries, redirects/proxies, JSON limits, TLS min version, cert pinning, SSE/download helpers). +- Egress/SSRF policy and security middleware: `tldw_Server_API/app/core/Security/README.md`. +- Resource Governor (rate limits, tokens, streams; Redis backend optional): `tldw_Server_API/app/core/Resource_Governance/README.md`. ## Architecture & Repo Layout @@ -376,6 +262,14 @@ pip install -e . # pip install -e ".[multiplayer]" # multi-user/PostgreSQL features # pip install -e ".[dev]" # tests, linters, tooling # pip install -e ".[otel]" # OpenTelemetry metrics/tracing exporters + +# Install pyaudio - needed for audio processing +# Linux +sudo apt install python3-pyaudio + +#MacOS +brew install portaudio +pip install pyaudio ``` 2) Configure authentication and providers ```bash @@ -399,13 +293,101 @@ python -m uvicorn tldw_Server_API.app.main:app --reload Docker Compose ```bash -# Bring up the stack (app + dependencies where applicable) +# Run from repo root + +# Option A) Single-user (SQLite users DB) docker compose -f Dockerfiles/docker-compose.yml up -d --build -# Optional proxy overlay examples are available: +# Option B) Multi-user (Postgres users DB) +export AUTH_MODE=multi_user +export DATABASE_URL=postgresql://tldw_user:TestPassword123!@postgres:5432/tldw_users +# Optional: route Jobs module to Postgres as well +export JOBS_DB_URL=postgresql://tldw_user:TestPassword123!@postgres:5432/tldw_users +docker compose -f Dockerfiles/docker-compose.yml -f Dockerfiles/docker-compose.override.yml up -d --build + +# Option C) Dev overlay — enable unified streaming (non-prod) +# This turns on the SSE/WS unified streams (STREAMS_UNIFIED=1) for pilot endpoints. +# Keep disabled in production until validated in your environment. +docker compose -f Dockerfiles/docker-compose.yml -f Dockerfiles/docker-compose.dev.yml up -d --build + +# Check status +docker compose -f Dockerfiles/docker-compose.yml ps +docker compose -f Dockerfiles/docker-compose.yml logs -f app + +# First-time AuthNZ initialization (inside the running app container) +docker compose -f Dockerfiles/docker-compose.yml exec app \ + python -m tldw_Server_API.app.core.AuthNZ.initialize + +# Optional: proxy overlays # - Dockerfiles/docker-compose.proxy.yml # - Dockerfiles/docker-compose.proxy-nginx.yml + +# Optional: use pgvector + pgbouncer for Postgres +docker compose -f Dockerfiles/docker-compose.yml -f Dockerfiles/docker-compose.pg.yml up -d --build +``` + +Notes +- Run compose commands from the repository root. The base compose file at `Dockerfiles/docker-compose.yml` builds with context at the repo root and includes Postgres and Redis services. +- The legacy WebUI is served at `/webui`; the primary UI is the Next.js client in `tldw-frontend/`. +- For unified streaming validation in non-prod, prefer the dev overlay above. You can also export `STREAMS_UNIFIED=1` directly in your environment. + +### Supporting Services via Docker + +Run only infrastructure services without the app. + +Postgres + Redis (base compose) +```bash +docker compose -f Dockerfiles/docker-compose.yml up -d postgres redis +``` + +Prometheus + Grafana (embeddings compose, monitoring profile) +```bash +docker compose -f Dockerfiles/docker-compose.embeddings.yml --profile monitoring up -d prometheus grafana +``` + +All four together +```bash +docker compose -f Dockerfiles/docker-compose.yml up -d postgres redis +docker compose -f Dockerfiles/docker-compose.embeddings.yml --profile monitoring up -d prometheus grafana +``` + +Manage and verify +```bash +# Status +docker compose -f Dockerfiles/docker-compose.yml ps +docker compose -f Dockerfiles/docker-compose.embeddings.yml ps + +# Logs +docker compose -f Dockerfiles/docker-compose.yml logs -f postgres redis +docker compose -f Dockerfiles/docker-compose.embeddings.yml logs -f prometheus grafana + +# Stop +docker compose -f Dockerfiles/docker-compose.yml stop postgres redis +docker compose -f Dockerfiles/docker-compose.embeddings.yml stop prometheus grafana + +# Remove +docker compose -f Dockerfiles/docker-compose.yml down +docker compose -f Dockerfiles/docker-compose.embeddings.yml down +``` + +Ports +- Postgres: 5432 +- Redis: 6379 +- Prometheus: 9091 (container listens on 9090) +- Grafana: 3000 + +Prometheus config +- Create `Config_Files/prometheus.yml` to define scrape targets. Minimal self-scrape example: +```yaml +global: + scrape_interval: 15s + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] ``` +See Docs/Operations/monitoring/README.md for examples that scrape the API and worker orchestrator. Tip: See multi-user setup and production hardening in Docs/User_Guides/Authentication_Setup.md and Docs/Published/Deployment/First_Time_Production_Setup.md. @@ -498,8 +480,13 @@ curl -s -X POST http://127.0.0.1:8000/api/v1/audio/transcriptions \ - Module deep dives: `Docs/Development/AuthNZ-Developer-Guide.md`, `Docs/Development/RAG-Developer-Guide.md`, `Docs/MCP/Unified/Developer_Guide.md` - API references: `Docs/API-related/RAG-API-Guide.md`, `Docs/API-related/OCR_API_Documentation.md`, `Docs/API-related/Prompt_Studio_API.md` - Deployment/Monitoring: `Docs/Published/Deployment/First_Time_Production_Setup.md`, `Docs/Published/Deployment/Reverse_Proxy_Examples.md`, `Docs/Deployment/Monitoring/` +- TTS onboarding: `Docs/User_Guides/TTS_Getting_Started.md` – hosted/local provider setup, verification, and troubleshooting - Design notes (WIP features): `Docs/Design/` - e.g., `Docs/Design/Custom_Scrapers_Router.md` +### Resource Governor Config + +For complete Resource Governor setup and examples (env, DB store bootstrap, YAML policy, middleware, diagnostics, and tests), see `tldw_Server_API/app/core/Resource_Governance/README.md`. + ### OpenAI-Compatible Strict Mode (Local Providers) Some self-hosted OpenAI-compatible servers reject unknown fields (like `top_k`). For local providers you can enable a strict mode that filters non-standard keys from chat payloads. @@ -511,33 +498,17 @@ Some self-hosted OpenAI-compatible servers reject unknown fields (like `top_k`). ## Deployment -- Dockerfiles and compose templates live under `Dockerfiles/`. +- Dockerfiles and compose templates live under `Dockerfiles/` (see `Dockerfiles/README.md`). - Reverse proxy samples: `Helper_Scripts/Samples/Nginx/`, `Helper_Scripts/Samples/Caddy/`. - Monitoring: `Docs/Deployment/Monitoring/` and `Helper_Scripts/Samples/Grafana/`. - Prometheus metrics exposed at `/metrics` and `/api/v1/metrics`. - Production hardening: `Docs/Published/User_Guides/Production_Hardening_Checklist.md`. -## Samples (Quick Links) - -- Reverse Proxy guide: `Docs/Deployment/Reverse_Proxy_Examples.md` -- Nginx sample config: `Samples/Nginx/nginx.conf` -- Traefik sample dynamic config: `Samples/Traefik/traefik-dynamic.yml` -- Production Hardening Checklist: `Docs/User_Guides/Production_Hardening_Checklist.md` -- Prometheus alert rules (near-quota): `Samples/Prometheus/alerts.yml` -- VibeVoice TTS (getting started): `Docs/VIBEVOICE_GETTING_STARTED.md` - - NeuTTS Air (voice cloning, local): `Docs/STT-TTS/NEUTTS_TTS_SETUP.md` - -### Monitoring (Prometheus + Grafana) -- Prometheus scrape endpoints: - - Unauthenticated scrape: `GET /metrics` (Prometheus text) - - MCP Prometheus text: `GET /api/v1/mcp/metrics/prometheus` -- LLM usage dashboard (cost + tokens): - - Import JSON: `Docs/Deployment/Monitoring/Grafana_LLM_Cost_Top_Providers.json` - - Panels included: - - Cost rate by provider: `sum by (provider) (rate(llm_cost_dollars[$__rate_interval]))` - - Top 5 providers by cost (range): `topk(5, sum by (provider) (increase(llm_cost_dollars[$__range])))` - - Token rate by provider and type: `sum by (provider, type) (rate(llm_tokens_used_total[$__rate_interval]))` - - Set Prometheus datasource UID to `prometheus` or edit to match your setup. +## Monitoring + +- Monitoring docs and setup: `Docs/Deployment/Monitoring/README.md` +- Grafana dashboards and samples: `Helper_Scripts/Samples/Grafana/README.md` +- Prometheus scrape endpoints: `GET /metrics` and `GET /api/v1/mcp/metrics/prometheus` ### PostgreSQL Content Mode @@ -586,76 +557,8 @@ Some self-hosted OpenAI-compatible servers reject unknown fields (like `top_k`). --- -### More Detailed explanation of this project (tldw_project) -
-**What is this Project? (Extended) - Click-Here** - -### What is this Project? -- **What it is now:** - - A tool that can ingest: audio, videos, articles, free form text, documents, and books as text into a personal, database, so that you can then search and chat with it at any time. - - (+ act as a nice way of creating your personal 'media' database, a personal digital library with search!) - - And of course, this is all open-source/free, with the idea being that this can massively help people in their efforts of research and learning. - - I don't plan to pivot and turn this into a commercial project. I do plan to make a server version of it, with the potential for offering a hosted version of it, and am in the process of doing so. The hosted version will be 95% the same, missing billing and similar from the open source branch. - - I'd like to see this project be used in schools, universities, and research institutions, or anyone who wants to keep a record of what they've consumed and be able to search and ask questions about it. - - I believe that this project can be a great tool for learning and research, and I'd like to see it develop to a point where it could be reasonably used as such. - - In the meantime, if you don't care about data ownership or privacy, https://notebooklm.google/ is a good alternative that works and is free. -- **Where its headed:** - - Act as a Multi-Purpose Research tool. The idea being that there is so much data one comes across, and we can store it all as text. (with tagging!) - - Imagine, if you were able to keep a copy of every talk, research paper or article you've ever read, and have it at your fingertips at a moments notice. - - Now, imagine if you could ask questions about that data/information(LLM), and be able to string it together with other pieces of data, to try and create sense of it all (RAG) - - Basically a [cheap foreign knockoff](https://tvtropes.org/pmwiki/pmwiki.php/Main/ShoddyKnockoffProduct) [`Young Lady's Illustrated Primer`](https://en.wikipedia.org/wiki/The_Diamond_Age) that you'd buy from some [shady dude in a van at a swap meet](https://tvtropes.org/pmwiki/pmwiki.php/Main/TheLittleShopThatWasntThereYesterday). - * Some food for thought: https://notes.andymatuschak.org/z9R3ho4NmDFScAohj3J8J3Y - * I say this recognizing the inherent difficulties in replicating such a device and acknowledging the current limitations of technology. - - This is a free-time project, so I'm not going to be able to work on it all the time, but I do have some ideas for where I'd like to take it. - - I view this as a personal tool I'll ideally continue to use for some time until something better/more suited to my needs comes along. - - Until then, I plan to continue working on this project and improving as much as possible. - - If I can't get a "Young Lady's Illustrated Primer" in the immediate, I'll just have to hack together some poor imitation of one.... -
- ---- - - -### Local Models I recommend -
-**Local Models I Can Recommend - Click-Here** - -### Local Models I recommend -- These are just the 'standard smaller' models I recommend, there are many more out there, and you can use any of them with this project. - - One should also be aware that people create 'fine-tunes' and 'merges' of existing models, to create new models that are more suited to their needs. - - This can result in models that may be better at some tasks but worse at others, so it's important to test and see what works best for you. -- FIXME (Qwen3-4B-Instruct-2507, Mistral-Nemo-Instruct-2407-GGUF, Qwen3-30B-A3B-Instruct-2507) - -For commercial API usage for use with this project: Latest Anthropic/ChatGPT/Gemini Models. -Flipside I would say none, honestly. The (largest players) will gaslight you and charge you money for it. Fun. -That being said they obviously can provide help/be useful(helped me make this app), but it's important to remember that they're not your friend, and they're not there to help you. They are there to make money not off you, but off large institutions and your data. -You are just a stepping stone to their goals. - -From @nrose 05/08/2024 on Threads: -``` -No, it’s a design. First they train it, then they optimize it. Optimize it for what- better answers? - No. For efficiency. -Per watt. Because they need all the compute they can get to train the next model.So it’s a sawtooth. -The model declines over time, then the optimization makes it somewhat better, then in a sort of - reverse asymptote, they dedicate all their “good compute” to the next bigger model.Which they then - trim down over time, so they can train the next big model… etc etc. -None of these companies exist to provide AI services in 2024. They’re only doing it to finance the - things they want to build in 2025 and 2026 and so on, and the goal is to obsolete computing in general - and become a hidden monopoly like the oil and electric companies. -2024 service quality is not a metric they want to optimize, they’re forced to, only to maintain some - directional income -``` - -As an update to this, looking back a year, it still stands true, and I would only change that you're less likely to insult the model at this point. (As long as you're not using sonnet...) -
- ---- - - -### Helpful Terms and Things to Know -
-**Helpful things to know - Click-Here** - -### Helpful things to know +### More Detailed Explanation & Background +See `Docs/About.md` for the extended project background, vision, and notes. - https://papers.ssrn.com/sol3/papers.cfm?abstract_id=5049562 - Purpose of this section is to help bring awareness to certain concepts and terms that are used in the field of AI/ML/NLP, as well as to provide some resources for learning more about them. - Also because some of those things are extremely relevant and important to know if you care about accuracy and the effectiveness of the LLMs you're using. @@ -748,8 +651,8 @@ GNU General Public License v3.0 - see `LICENSE` for details. ### Security Disclosures -1. Information disclosure via developer print debugging statement in `chat_functions.py` - Thank you to @luca-ing for pointing this out! - - Fixed in commit: `8c2484a` +See `SECURITY.md` for reporting guidelines and disclosures. + --- @@ -759,12 +662,12 @@ tldw_server started as a tool to transcribe and summarize YouTube videos but has Long-term vision: Building towards a personal AI research assistant inspired by "The Young Lady's Illustrated Primer" from Neal Stephenson's "The Diamond Age" - a tool that helps you learn and research at your own pace. ---- - ### Getting Help - API Documentation: `http://localhost:8000/docs` - GitHub Issues: [Report bugs or request features](https://github.com/rmusser01/tldw_server/issues) -- Discussions: [Community forum(for now)](https://github.com/rmusser01/tldw_server/discussions) +- Discussions: [Community forum](https://github.com/rmusser01/tldw_server/discussions) + + --- @@ -788,3 +691,20 @@ Roadmap & WIP Privacy & Security - Self-hosted by design; no telemetry or data collection - Users own and control their data; see hardening guide for production +- Metrics & Grafana + - Emitted metrics (core): + - `rg_decisions_total{category,scope,backend,result,policy_id}` — allow/deny decisions per category/scope/backend + - `rg_denials_total{category,scope,reason,policy_id}` — denial events by reason (e.g., `insufficient_capacity`) + - `rg_refunds_total{category,scope,reason,policy_id}` — refund events from commit/refund paths + - `rg_concurrency_active{category,scope,policy_id}` — active stream/job leases (gauge) + - Cardinality guard: + - By default, metrics DO NOT include `entity` labels to avoid high-cardinality pitfalls. If you truly need per-entity sampling, gate it behind `RG_METRICS_ENTITY_LABEL=true` and ensure hashing/masking is applied upstream. + - Quick Grafana panel examples: + - Allow vs Deny over time (per category): + - Query: `sum by (category, result) (rate(rg_decisions_total[5m]))` + - Denials by scope (top N): + - Query: `topk(5, sum by (scope) (rate(rg_denials_total[5m])))` + - Refund activity (tokens): + - Query: `sum by (policy_id) (rate(rg_refunds_total{category="tokens"}[5m]))` + - Active streams (per scope): + - Query: `avg by (scope) (rg_concurrency_active{category="streams"})` diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..fb2c5c640 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,11 @@ +# Security Policy + +## Supported Versions +- Main branch and the latest tagged release receive security fixes. + +## Reporting a Vulnerability +- Include reproduction steps, affected versions, and impact assessment if possible. + +## Disclosures +- Information disclosure via developer print debugging statement in `chat_functions.py` (reported by @luca-ing). + - Fixed in commit `8c2484a`. diff --git a/conftest.py b/conftest.py new file mode 100644 index 000000000..cdc32c05e --- /dev/null +++ b/conftest.py @@ -0,0 +1,41 @@ +""" +Top-level pytest configuration. + +Registers shared test plugins globally to comply with pytest>=8, which +disallows defining `pytest_plugins` in non-top-level conftest files. + +See: https://docs.pytest.org/en/stable/deprecations.html#pytest-plugins-in-non-top-level-conftest-files +""" + +# Register shared fixtures/plugins for the entire test suite +# Note: Avoid double-registering third-party plugins that are already +# auto-discovered via entry points (e.g., pytest-benchmark). Only add them +# explicitly when plugin autoloading is disabled. +import os + +_plugins = [ + # Chat + auth fixtures used widely across tests + "tldw_Server_API.tests._plugins.chat_fixtures", + "tldw_Server_API.tests._plugins.authnz_fixtures", + # Isolated Chat fixtures (unit_test_client, isolated_db, etc.) + "tldw_Server_API.tests.Chat.integration.conftest_isolated", + # Unified Postgres fixtures (temp DBs, reachability, DatabaseConfig) + "tldw_Server_API.tests._plugins.postgres", + # Optional pgvector fixtures (will be skipped if not available) + "tldw_Server_API.tests.helpers.pgvector", +] + +# Include pytest-benchmark only when autoload is disabled, to avoid duplicate +# registration errors when the plugin is already auto-loaded as 'benchmark'. +if os.environ.get("PYTEST_DISABLE_PLUGIN_AUTOLOAD", "").strip().lower() in {"1", "true", "yes"}: + try: + import importlib + + importlib.import_module("pytest_benchmark.plugin") + except Exception: + # Plugin not installed or failed to import; continue without it. + pass + else: + _plugins.insert(0, "pytest_benchmark.plugin") + +pytest_plugins = tuple(_plugins) diff --git a/mkdocs.yml b/mkdocs.yml index 1bd9e5148..9326e8c71 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -174,6 +174,7 @@ nav: - RAG Deployment Guide: User_Guides/RAG_Deployment_Guide.md - RAG Production Configuration: User_Guides/RAG_Production_Configuration_Guide.md - Setting up a local LLM: User_Guides/Setting_up_a_local_LLM.md + - TTS Getting Started: User_Guides/TTS_Getting_Started.md - Chatterbox TTS Setup: User_Guides/Chatterbox_TTS_Setup.md - User Guide: User_Guides/User_Guide.md - MCP Unified: diff --git a/pyproject.toml b/pyproject.toml index 377de2d18..4201d9f73 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ build-backend = "setuptools.build_meta" ########################### [project] name = "tldw-server" -version = "0.1.3" +version = "0.1.4" description = "A comprehensive research assistant and media analysis platform - Too Long; Didn't Watch Server" requires-python = ">=3.10" readme = "README.md" diff --git a/sbom/Makefile b/sbom/Makefile index f5b339f00..897b12be9 100644 --- a/sbom/Makefile +++ b/sbom/Makefile @@ -1,5 +1,9 @@ # Simple SBOM generation helpers +# CycloneDX CLI reference. Override with a digest for pinning: +# make CDX_CLI_REF=ghcr.io/cyclonedx/cyclonedx-cli@sha256: +CDX_CLI_REF ?= ghcr.io/cyclonedx/cyclonedx-cli:0.30.0 + PY ?= python3 NPM ?= npm @@ -32,7 +36,7 @@ sbom: done; \ if [ -n "$$files" ]; then \ if command -v docker >/dev/null 2>&1; then \ - docker run --rm -v "$$PWD":/work -w /work cyclonedx/cyclonedx-cli:0.30.0 merge --input-files $$files --output-file sbom/sbom.cdx.json \ + docker run --rm -v "$$PWD":/work -w /work $(CDX_CLI_REF) merge --input-files $$files --output-file sbom/sbom.cdx.json \ || cp sbom/sbom-python.cdx.json sbom/sbom.cdx.json || true; \ elif command -v cyclonedx-cli >/dev/null 2>&1; then \ cyclonedx-cli merge --input-files $$files --output-file sbom/sbom.cdx.json || cp sbom/sbom-python.cdx.json sbom/sbom.cdx.json || true; \ @@ -50,7 +54,7 @@ sbom-validate: @echo "==> Validating SBOM (if present)" @if [ -f sbom/sbom.cdx.json ]; then \ if command -v docker >/dev/null 2>&1; then \ - docker run --rm -v "$$PWD":/work -w /work cyclonedx/cyclonedx-cli:0.30.0 validate --input-file sbom/sbom.cdx.json || true; \ + docker run --rm -v "$$PWD":/work -w /work $(CDX_CLI_REF) validate --input-file sbom/sbom.cdx.json || true; \ elif command -v cyclonedx-cli >/dev/null 2>&1; then \ cyclonedx-cli validate --input-file sbom/sbom.cdx.json || true; \ else \ diff --git a/sbom/README.md b/sbom/README.md index e323e0e5c..842ae5957 100644 --- a/sbom/README.md +++ b/sbom/README.md @@ -10,7 +10,7 @@ Generate locally - Run: make sbom Artifacts: -- sbom-python.cdx.json - Python deps from pyproject.toml (cdxgen) or requirements.txt fallback +- sbom-python.cdx.json - Python deps from requirements.txt (CycloneDX) - sbom-frontend.cdx.json - Node deps (if package-lock.json present) - sbom.cdx.json - merged SBOM (if both present) @@ -20,10 +20,13 @@ Validate and scan: Notes ----- -- When pyproject.toml is present, the Makefile uses cdxgen to generate a Python SBOM without installing dependencies. -- If you prefer environment-resolved versions, create a venv (e.g., via uv sync) and run: - - python -m pip install cyclonedx-py cyclonedx-cli - - cyclonedx-py -e -o sbom/sbom-python.cdx.json +- Python SBOMs are generated via the official CycloneDX Python CLI. Newer + releases expose the `cyclonedx-py` CLI; older ones expose `cyclonedx-bom`. + Either of the following works: + - python -m pip install cyclonedx-bom + - cyclonedx-py requirements -i tldw_Server_API/requirements.txt -o sbom/sbom-python.cdx.json + # or (legacy) + - cyclonedx-bom -r tldw_Server_API/requirements.txt -o sbom/sbom-python.cdx.json - For container/OS-level SBOMs, consider using syft: - syft dir:. -o cyclonedx-json=sbom/sbom-syft.cdx.json - syft -o cyclonedx-json=sbom/sbom-image.cdx.json diff --git a/tldw-frontend/README.md b/tldw-frontend/README.md index 5f6eafa16..8b6798f96 100644 --- a/tldw-frontend/README.md +++ b/tldw-frontend/README.md @@ -25,6 +25,11 @@ yarn dev -p 8080 Open [http://localhost:8080](http://localhost:8080) with your browser. +Unified streaming (dev) +- To exercise the unified SSE/WS streaming in the backend, start the API with the dev overlay: + `docker compose -f Dockerfiles/docker-compose.yml -f Dockerfiles/Dockerfiles/docker-compose.dev.yml up -d --build` + and set `NEXT_PUBLIC_API_URL` to `http://127.0.0.1:8000`. + You can start editing the page by modifying `pages/index.tsx`. The page auto-updates as you edit the file. [API routes](https://nextjs.org/docs/pages/building-your-application/routing/api-routes) can be accessed on [http://localhost:3000/api/hello](http://localhost:3000/api/hello). This endpoint can be edited in `pages/api/hello.ts`. diff --git a/tldw_Server_API/Config_Files/.env.example b/tldw_Server_API/Config_Files/.env.example index ceb07bc88..f836ba9fa 100644 --- a/tldw_Server_API/Config_Files/.env.example +++ b/tldw_Server_API/Config_Files/.env.example @@ -19,7 +19,7 @@ SHOW_API_KEY_ON_STARTUP=false # For production multi-user, use Postgres (matches docker-compose.yml services) POSTGRES_DB=tldw_users POSTGRES_USER=tldw_user -POSTGRES_PASSWORD=ChangeMeStrong123! +POSTGRES_PASSWORD=TestPassword123! DATABASE_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB} # Optional: route Jobs backend to Postgres as well @@ -63,3 +63,16 @@ LOG_LEVEL=info # SECURITY_ALERT_WEBHOOK_MIN_SEVERITY=high # SECURITY_ALERT_EMAIL_MIN_SEVERITY=critical # SECURITY_ALERT_BACKOFF_SECONDS=30 + +# ===== HTTP Client / Egress (optional) ===== +# Explicit proxy allowlist (hosts or full URLs). Deny-by-default when unset. +# PROXY_ALLOWLIST=proxy.local,proxy.internal +# Enable HTTP/3 (QUIC) behind a flag (no-op unless stack supports it) +# HTTP3_ENABLED=false +# Enforce minimum TLS version for outbound HTTPS (true/false) +# TLS_ENFORCE_MIN_VERSION=true +# Minimum TLS version when enforcement is enabled: 1.2 or 1.3 +# TLS_MIN_VERSION=1.2 +# Certificate pinning (leaf SHA-256 hex) per host: host=p1|p2,host2=p3 +# Example: HTTP_CERT_PINS="api.example.com=abcdef...|123456...,other.host=deadbeef..." +# HTTP_CERT_PINS= diff --git a/tldw_Server_API/Config_Files/README.md b/tldw_Server_API/Config_Files/README.md index d2cb8645d..aded6ee21 100644 --- a/tldw_Server_API/Config_Files/README.md +++ b/tldw_Server_API/Config_Files/README.md @@ -309,6 +309,163 @@ VibeVoice: - `max_bytes` (int|null): log rotation size - `backup_count` (int): rotated files kept +## [HTTP-Client] +- Centralized outbound HTTP client configuration (applies to helpers in `tldw_Server_API.app.core.http_client`). +- Defaults are secure-by-default and can be overridden via environment variables. + +- Timeouts + - `HTTP_CONNECT_TIMEOUT` (float, default `5.0` seconds) + - `HTTP_READ_TIMEOUT` (float, default `30.0` seconds) + - `HTTP_WRITE_TIMEOUT` (float, default `30.0` seconds) + - `HTTP_POOL_TIMEOUT` (float, default `30.0` seconds) + +- Connection limits + - `HTTP_MAX_CONNECTIONS` (int, default `100`) + - `HTTP_MAX_KEEPALIVE_CONNECTIONS` (int, default `20`) + +- Retries & backoff + - `HTTP_RETRY_ATTEMPTS` (int, default `3`) + - `HTTP_BACKOFF_BASE_MS` (int, default `250`) + - `HTTP_BACKOFF_CAP_S` (int, default `30`) + - Retries on: 408, 429, 500, 502, 503, 504, and connect/read timeouts. Honors `Retry-After`. + +- Redirects & proxies + - `HTTP_MAX_REDIRECTS` (int, default `5`) + - `HTTP_TRUST_ENV` (bool, default `false`) — when false, system proxies are ignored + - `PROXY_ALLOWLIST` (csv of hosts or URLs; deny-by-default) + +- JSON & headers + - `HTTP_JSON_MAX_BYTES` (int, optional) — maximum allowed JSON response size for helpers that enable this guard + - `HTTP_DEFAULT_USER_AGENT` (string, overrides default `tldw_server/ (component)`) + +- Transport & TLS + - `HTTP3_ENABLED` (bool, default `false`) — HTTP/3 (QUIC) behind a flag. Note: currently a no‑op; reserved for future QUIC support. + - `TLS_ENFORCE_MIN_VERSION` (bool, default `false`) — optional TLS min version enforcement + - `TLS_MIN_VERSION` (str, default `1.2`) + - `TLS_CERT_PINS_SPKI_SHA256` (csv of SPKI SHA-256 pins; optional certificate pinning) + +- Proxies & Egress + - `PROXY_ALLOWLIST` (csv of proxy hostnames or URLs; deny-by-default when empty) + +TLS and certificate pinning + +By default the HTTP client follows system trust stores. You can optionally enforce a minimum TLS version and use certificate pinning on a per-host basis. + +- Env toggles for TLS minimum version: + - `HTTP_ENFORCE_TLS_MIN` or `TLS_ENFORCE_MIN_VERSION`: set to `1`/`true` to enable + - `HTTP_TLS_MIN_VERSION` or `TLS_MIN_VERSION`: `1.2` (default) or `1.3` + +- Programmatic per-host certificate pinning (leaf certificate SHA-256): + +```python +from tldw_Server_API.app.core.http_client import create_async_client, afetch, RetryPolicy + +# Map of host -> set of allowed certificate fingerprints (hex sha256 of DER) +pins = { + "api.openai.com": {"b1e5...deadbeef"}, + "api.groq.com": {"a2c4...c0ffee"}, +} + +async with create_async_client(enforce_tls_min_version=True, tls_min_version="1.2", cert_pinning=pins) as client: + resp = await afetch(method="GET", url="https://api.openai.com/v1/models", client=client, retry=RetryPolicy()) + print(resp.status_code) +``` + +Notes +- Pinning checks the leaf certificate fingerprint (sha256 of the DER cert) before the request proceeds. A mismatch raises an egress/pinning error. +- Env-driven pinning (built-in parser): set `HTTP_CERT_PINS` to a CSV-style mapping of host to pins + - Example: `HTTP_CERT_PINS="api.openai.com=ab12..|cd34..,api.groq.com=ef56.."` + - Format: `host=pin1|pin2[,host2=pin3]` where pins are lowercase sha256 hex of the leaf certificate DER. + - These pins are attached to clients created by `create_client`/`create_async_client` when `cert_pinning` is not provided. + +- Egress & SSRF policy + - All helpers evaluate the central egress policy (`app/core/Security/egress.py`) before any network I/O and on each redirect hop, and validate proxies. + - Denies unsupported schemes, disallowed ports, denylisted hosts, and private/reserved IPs by default. See `WORKFLOWS_EGRESS_*` env keys in that module for allow/deny behavior. + +- Observability + - Structured logs redact sensitive headers and may include `request_id`, `method`, `host`, `status`, `duration_ms`. + - Metrics (if telemetry enabled): `http_client_requests_total`, `http_client_request_duration_seconds`, `http_client_retries_total`, `http_client_egress_denials_total`. + - When tracing is active, `traceparent` header is injected automatically where supported. + +X-Request-Id propagation + +Outbound helpers auto-inject `X-Request-Id` when present in trace baggage (set via RequestID middleware or `TracingManager.set_baggage('request_id', ...)`). Example: + +``` +from tldw_Server_API.app.core.Metrics.traces import get_tracing_manager +from tldw_Server_API.app.core.http_client import create_client, fetch + +tm = get_tracing_manager() +tm.set_baggage('request_id', 'abc123') + +with create_client() as client: + r = fetch(method='GET', url='http://example.com', client=client) + assert r.status_code == 200 +``` + +SSE streaming example + +``` +from tldw_Server_API.app.core.http_client import create_async_client, astream_sse, RetryPolicy + +async def consume(): + async with create_async_client() as client: + policy = RetryPolicy(attempts=3) + async for ev in astream_sse(url='http://example.com/stream', client=client, retry=policy): + print(ev.event, ev.data) +``` + +Downloads with checksum and resume + +``` +from pathlib import Path +from tldw_Server_API.app.core.http_client import download, adownload, RetryPolicy + +dest = Path('/tmp/file.bin') +policy = RetryPolicy(attempts=3) + +# Sync +download( + url='http://example.com/file.bin', + dest=dest, + checksum='deadbeef...', # optional sha256 + resume=True, + retry=policy, + require_content_type='application/pdf', # optional strict content-type + max_bytes_total=50_000_000, # optional disk quota guard (bytes) +) + +# Async +# await adownload( +# url='http://example.com/file.bin', +# dest=dest, +# resume=True, +# retry=policy, +# require_content_type='application/pdf', +# max_bytes_total=50_000_000, +# ) +``` + +Example (Python) +``` +from tldw_Server_API.app.core.http_client import create_async_client, afetch_json + +async with create_async_client() as client: + data = await afetch_json(method="GET", url="https://api.example.com/items", client=client) +``` + +Downloads and streaming +``` +from tldw_Server_API.app.core.http_client import adownload, astream_sse, RetryPolicy + +# Reliable file downloads with optional checksum/length validation +await adownload(url="https://host/file.bin", dest="/tmp/file.bin", retry=RetryPolicy(attempts=3)) + +# Stream SSE events with backpressure-friendly async iteration +async for evt in astream_sse(method="GET", url="https://host/stream"): + print(evt.event, evt.data) +``` + ## [Moderation] - `enabled` (bool) - `input_enabled|output_enabled` (bool) diff --git a/tldw_Server_API/Config_Files/config.txt b/tldw_Server_API/Config_Files/config.txt index 6c6658f9e..7ce16e2ca 100644 --- a/tldw_Server_API/Config_Files/config.txt +++ b/tldw_Server_API/Config_Files/config.txt @@ -49,7 +49,7 @@ max_messages_per_request = 1000 max_images_per_request = 10 max_request_size_bytes = 1000000 streaming_idle_timeout_seconds = 300 -streaming_heartbeat_interval_seconds = 30 +streaming_heartbeat_interval_seconds = 0 streaming_max_response_size_mb = 10 chat_save_default = False conversation_creation_max_retries = 3 diff --git a/tldw_Server_API/Config_Files/model_pricing.json b/tldw_Server_API/Config_Files/model_pricing.json index 2f3407c44..4cf83cd15 100644 --- a/tldw_Server_API/Config_Files/model_pricing.json +++ b/tldw_Server_API/Config_Files/model_pricing.json @@ -8,6 +8,11 @@ "text-embedding-3-large": { "prompt": 0.00013, "completion": 0.00013 } }, "anthropic": { + "claude-sonnet-4.5": { "prompt": 0.003, "completion": 0.015 }, + "claude-haiku-4.5": { "prompt": 0.001, "completion": 0.005 }, + "claude-opus-4.1": { "prompt": 0.015, "completion": 0.075 }, + + "claude-3.5-sonnet": { "prompt": 0.003, "completion": 0.015 }, "claude-3-opus": { "prompt": 0.015, "completion": 0.075 }, "claude-3-sonnet": { "prompt": 0.003, "completion": 0.015 }, "claude-3-haiku": { "prompt": 0.00025, "completion": 0.00125 } @@ -26,6 +31,10 @@ "google": { "gemini-1.5-pro": { "prompt": 0.002, "completion": 0.005 }, "gemini-1.5-flash": { "prompt": 0.0005, "completion": 0.001 }, + "gemini-2.5-pro": { "prompt": 0.00125, "completion": 0.01 }, + "gemini-2.5-pro-high": { "prompt": 0.0025, "completion": 0.015 }, + "gemini-2.5-flash": { "prompt": 0.0003, "completion": 0.0025 }, + "gemini-2.5-flash-preview-09-2025": { "prompt": 0.0003, "completion": 0.0025 }, "text-embedding-004": { "prompt": 0.00005, "completion": 0.00005 } }, "cohere": { @@ -43,6 +52,31 @@ "xai": { "grok-2": { "prompt": 0.003, "completion": 0.006 } }, + "moonshot": { + "kimi-k2-0905-preview": { "prompt": 0.0006, "completion": 0.0025 }, + "kimi-k2-0905-preview-cache-hit": { "prompt": 0.00015, "completion": 0.0025 }, + "kimi-k2-0711-preview": { "prompt": 0.0006, "completion": 0.0025 }, + "kimi-k2-0711-preview-cache-hit": { "prompt": 0.00015, "completion": 0.0025 }, + "kimi-k2-turbo-preview": { "prompt": 0.00115, "completion": 0.008 }, + "kimi-k2-turbo-preview-cache-hit": { "prompt": 0.00015, "completion": 0.008 }, + "kimi-k2-thinking": { "prompt": 0.0006, "completion": 0.0025 }, + "kimi-k2-thinking-cache-hit": { "prompt": 0.00015, "completion": 0.0025 }, + "kimi-k2-thinking-turbo": { "prompt": 0.00115, "completion": 0.008 }, + "kimi-k2-thinking-turbo-cache-hit": { "prompt": 0.00015, "completion": 0.008 } + }, + "zai": { + "GLM-4.6": { "prompt": 0.0006, "completion": 0.0022 }, + "GLM-4.5": { "prompt": 0.0006, "completion": 0.0022 }, + "GLM-4.5V": { "prompt": 0.0006, "completion": 0.0018 }, + "GLM-4.5-X": { "prompt": 0.0022, "completion": 0.0089 }, + "GLM-4.5-Air": { "prompt": 0.0002, "completion": 0.0011 }, + "GLM-4.5-AirX": { "prompt": 0.0011, "completion": 0.0045 }, + "GLM-4-32B-0414-128K": { "prompt": 0.0001, "completion": 0.0001 }, + "GLM-4.5-Flash": { "prompt": 0.0, "completion": 0.0 } + }, + "minimax": { + "MiniMax-M2": { "prompt": 0.0003, "completion": 0.0012 } + }, "huggingface": { "default": { "prompt": 0.00005, "completion": 0.00005 } } diff --git a/tldw_Server_API/Config_Files/resource_governor_policies.yaml b/tldw_Server_API/Config_Files/resource_governor_policies.yaml new file mode 100644 index 000000000..54b8c6b7c --- /dev/null +++ b/tldw_Server_API/Config_Files/resource_governor_policies.yaml @@ -0,0 +1,81 @@ +# Resource Governor Policies (stub) +# +# This file defines example policies for development and local testing. +# In production, prefer RG_POLICY_STORE=db with policies managed via AuthNZ. + +schema_version: 1 + +hot_reload: + enabled: true + interval_sec: 5 # watcher/TTL interval for file store + +metadata: + description: Default example policies for tldw_server Resource Governor + owner: core-platform + version: 1 + +# Global defaults applied when a policy omits a field. +defaults: + fail_mode: fail_closed # can be overridden per policy + algorithm: + requests: token_bucket # token_bucket | sliding_window + tokens: token_bucket # preferred for model tokens + scopes_order: [global, tenant, user, conversation, client, ip, service] + +policies: + # Chat API: per-user and per-conversation controls + chat.default: + requests: { rpm: 120, burst: 2.0 } + tokens: { per_min: 60000, burst: 1.5 } + scopes: [global, user, conversation] + fail_mode: fail_closed + + # MCP ingestion/read paths + mcp.ingestion: + requests: { rpm: 60, burst: 1.0 } + scopes: [global, client] + fail_mode: fallback_memory # acceptable local over-admission during outages + + # Embeddings service (OpenAI-compatible) + embeddings.default: + requests: { rpm: 60, burst: 1.2 } + scopes: [user] + + # Audio: concurrency via streams + durable minutes cap + audio.default: + # Allow reasonable request rate so informational GETs (status/limits) + # are not denied by middleware. Concurrency/minutes are enforced separately. + requests: { rpm: 300, burst: 2.0 } + streams: { max_concurrent: 2, ttl_sec: 90 } + minutes: { daily_cap: 120, rounding: ceil } + scopes: [user, ip] + fail_mode: fail_closed + + # SlowAPI façade defaults (ingress IP-based when auth scopes unavailable) + slowapi.default: + requests: { rpm: 300, burst: 2.0 } + scopes: [ip] + + # Evaluations module + evals.default: + requests: { rpm: 30, burst: 1.0 } + scopes: [user] + +# Route/tag mapping helpers. Middleware may use these to resolve policy_id. +route_map: + by_tag: + chat: chat.default + mcp.ingestion: mcp.ingestion + embeddings: embeddings.default + audio: audio.default + evals: evals.default + slowapi: slowapi.default + by_path: + "/api/v1/chat/*": chat.default + "/api/v1/mcp/*": mcp.ingestion + "/api/v1/embeddings*": embeddings.default + "/api/v1/audio/*": audio.default + "/api/v1/evaluations/*": evals.default + +# Observability note: Do not include entity label in metrics by default. +# Use RG_METRICS_ENTITY_LABEL=true to enable hashed entity label if necessary. diff --git a/tldw_Server_API/Config_Files/session_encryption.key b/tldw_Server_API/Config_Files/session_encryption.key index 06d91796b..480a18863 100644 --- a/tldw_Server_API/Config_Files/session_encryption.key +++ b/tldw_Server_API/Config_Files/session_encryption.key @@ -1 +1 @@ -mu4Bm3aGIZEXeB5RvgAF_OH41xxjludeclrPTSJ-klA= +xNM5Aab8o19ZQylQ_nDlMoyt3S5Pjhg2-3-GCWISi5E= \ No newline at end of file diff --git a/tldw_Server_API/README.md b/tldw_Server_API/README.md index ec5f2a203..88d1275b7 100644 --- a/tldw_Server_API/README.md +++ b/tldw_Server_API/README.md @@ -73,7 +73,11 @@ See `app/main.py` for router includes and full route namespaces. - Config file: `tldw_Server_API/Config_Files/config.txt` - Under `[RAG]`: `default_fts_level = media` (or `chunk`) - Requests can still override with `fts_level` in the unified RAG API payload. -- The `GET /api/v1/llm/providers` endpoint reflects configured providers and models. +- The `GET /api/v1/llm/providers` endpoint reflects configured providers and models. For commercial providers, + the list is now seeded from `Config_Files/model_pricing.json` (pricing catalog) and merged with any models + explicitly listed in `config.txt`. This makes `model_pricing.json` the primary reference for discoverable + models; add entries there (with per‑1K prompt/completion rates) to expose them system‑wide. + - Reload without restart: `POST /api/v1/admin/llm-usage/pricing/reload`. - Chat request validation is in `app/api/v1/schemas/chat_request_schemas.py` and related modules. ### Chatbooks Job Backend Configuration diff --git a/tldw_Server_API/WebUI/CORS-SOLUTION.md b/tldw_Server_API/WebUI/CORS-SOLUTION.md index 5fa0347d3..ee9a78527 100644 --- a/tldw_Server_API/WebUI/CORS-SOLUTION.md +++ b/tldw_Server_API/WebUI/CORS-SOLUTION.md @@ -16,8 +16,8 @@ The WebUI is now served directly from the FastAPI server at the same origin, com ### Method 1: Automatic (Recommended) ```bash -cd tldw_Server_API/WebUI -./Start-WebUI-SameOrigin.sh +# From repo root +./start-webui.sh ``` This script will: - Check if the API server is running @@ -45,7 +45,7 @@ This script will: ### Option 1: Environment Variable (Recommended) ```bash export SINGLE_USER_API_KEY='your-api-key-here' -./Start-WebUI-SameOrigin.sh +./start-webui.sh ``` ### Option 2: Manual Entry @@ -58,10 +58,11 @@ If you must serve the WebUI from a different origin, you need to configure CORS ```python app.add_middleware( CORSMiddleware, - allow_origins=["http://localhost:8080"], # Specific origin + allow_origins=["http://localhost:8080"], # Specific origin (add more as needed) allow_credentials=True, allow_methods=["*"], allow_headers=["*"], + expose_headers=["X-Request-ID", "traceparent", "X-Trace-Id"], ) ``` @@ -109,3 +110,16 @@ The CORS issue has been solved by serving the WebUI directly from the FastAPI se **http://localhost:8000/webui/** No additional configuration needed! 🎉 + +## Browser Extensions & Streaming + +If you are building a browser extension that calls the API (especially with Server-Sent Events via `Accept: text/event-stream`), add the extension origin to allowed CORS origins. In development: + +```bash +# Example: allow a Chrome extension id (replace with your extension id) +export ALLOWED_ORIGINS='["chrome-extension://abcd1234efgh5678", "http://localhost:8080", "http://127.0.0.1:8080"]' +``` + +Notes: +- The server exposes `X-Request-ID`, `traceparent`, and `X-Trace-Id` headers for correlation. Ensure `expose_headers` includes these (already set by default when CORS is enabled). +- Background/service worker fetches avoid most UX friction, but CORS still applies: the origin must be explicitly allowed. diff --git a/tldw_Server_API/WebUI/README.md b/tldw_Server_API/WebUI/README.md index 0b0245144..7b46f3800 100644 --- a/tldw_Server_API/WebUI/README.md +++ b/tldw_Server_API/WebUI/README.md @@ -5,51 +5,26 @@ A browser-based interface for testing and interacting with the TLDW Server API. ## Quick Start ### Prerequisites -- TLDW Server API running (default: `http://localhost:8000`) +- TLDW Server API (default: `http://localhost:8000`) - Modern web browser (Chrome, Firefox, Safari, Edge) -- Python 3.x (for serving the WebUI) -### Starting the WebUI +### Starting the WebUI (Same-Origin, Recommended) -1. **Start the API Server** (in one terminal): +1. From the project root, launch the server and WebUI together: ```bash - cd /path/to/tldw_server - # Set your API key (if using single-user mode) + # Optionally set your API key (single-user mode) export SINGLE_USER_API_KEY="your-secret-api-key" - python -m uvicorn tldw_Server_API.app.main:app --reload + ./start-webui.sh ``` - The API will be available at http://localhost:8000 -2. **Start the WebUI** (in another terminal): - - **Option A: With Auto-Configuration (Recommended)** - ```bash - cd tldw_Server_API/WebUI - # The script will auto-detect SINGLE_USER_API_KEY from environment - ./Start-WebUI.sh +2. Open your browser to: ``` - - **Option B: Manual Configuration** - ```bash - cd tldw_Server_API/WebUI - python3 -m http.server 8080 - # You'll need to enter the API key manually in the UI - ``` - - **Option C: With Custom API URL** - ```bash - cd tldw_Server_API/WebUI - export API_URL="http://your-server:8000" - export SINGLE_USER_API_KEY="your-api-key" - ./Start-WebUI.sh + http://localhost:8000/webui/ ``` -3. **Open your browser** and navigate to: - ``` - http://localhost:8080 - ``` - -⚠️ **Important**: Do NOT open `index.html` directly in your browser (file:// protocol) as this will cause CORS errors. Always use an HTTP server. +Notes: +- The script gates first‑time setup at `/setup` and then serves the WebUI at `/webui/` on the same origin, avoiding CORS issues. +- If the API server is already running, you can simply visit `http://localhost:8000/webui/` directly. ## Overview @@ -165,7 +140,7 @@ WebUI/ ├── index.html # Main application entry point ├── api-endpoints-config.json # API endpoint documentation ├── webui-config.json # Auto-generated configuration (gitignored) -├── Start-WebUI.sh # Start script with auto-configuration +├── (root)/start-webui.sh # Recommended launcher (in repo root) ├── test-ui.sh # Testing and verification script ├── css/ │ └── styles.css # Application styles with theme support @@ -212,7 +187,8 @@ WebUI/ ### Providers UI - Location: Providers tab (or Settings → Providers) in the WebUI - Capabilities: - - List configured providers and available models with metadata + - List all available providers and models (catalog + config) with clear highlighting of configured/usable providers + - Model dropdowns group by provider; unconfigured providers are greyed out and disabled with a note (requires API key) - Inspect provider health (status, circuit breaker, recent performance) - View request queue status (size, workers) and rate limiter settings - Copy `/` names for use in Chat and RAG requests @@ -243,6 +219,13 @@ Notes: - The response will include `metadata.hard_citations` (per-sentence citations with `doc_id` and `start/end` offsets) and `metadata.numeric_fidelity` (present/missing/source_numbers). - In production mode (`tldw_production=true`) or when `RAG_GUARDRAILS_STRICT=true`, the server defaults to enabling numeric fidelity and hard citations; you can still tighten behavior per request. +## Performance & Maintainability + +- Per‑tab script loading: heavy JS is now loaded on demand when a tab is activated (audio, chat, prompts, etc.). This reduces initial load and keeps the code modular. +- Inline handler migration: tabs are being refactored to remove inline `onclick` and related attributes. Newer panels (e.g., Flashcards → Manage) use delegated listeners bound in JS. +- CSP tightening: once all inline handlers are removed, `unsafe-inline` can be dropped for `/webui` in CSP. Until then, inline use is minimized. +- node_modules: do not commit `WebUI/node_modules` (already ignored). If any slipped into history, consider a history prune in a separate maintenance task. Local dev can run `npm i` inside `WebUI/` for tests (vitest), but this folder is not required at runtime. + ### RAG Streaming Tip: Contexts and "Why These Sources" The streaming endpoint `POST /api/v1/rag/search/stream` now emits early context information, followed by reasoning and incremental answer chunks. Events are NDJSON lines: @@ -268,7 +251,7 @@ The WebUI now supports automatic configuration when running alongside a TLDW ser - `SINGLE_USER_API_KEY`: Your API authentication token - `API_URL`: Custom API server URL (optional, defaults to http://localhost:8000) -2. **Auto-Detection**: When using `Start-WebUI.sh`, the script will: +2. **Auto-Detection**: When using `start-webui.sh` (repo root), the script will: - Check for `SINGLE_USER_API_KEY` in environment - Generate a `webui-config.json` file automatically - Pre-populate the API key in the UI @@ -310,7 +293,7 @@ If not using auto-configuration: **"404 Not Found" errors** - Check you're using the correct ports: - API: http://localhost:8000 - - WebUI: http://localhost:8080 (or your chosen port) + - WebUI: http://localhost:8000/webui/ **Tabs not loading** - Clear browser cache: Ctrl+Shift+R (Windows/Linux) or Cmd+Shift+R (Mac) diff --git a/tldw_Server_API/WebUI/auth.html b/tldw_Server_API/WebUI/auth.html index 8b084506b..c45b01e69 100644 --- a/tldw_Server_API/WebUI/auth.html +++ b/tldw_Server_API/WebUI/auth.html @@ -20,93 +20,7 @@ .warn { color: #b36b00; } .err { color: #b00020; } - +

Authentication

@@ -116,7 +30,7 @@

Authentication

-
+

Register

@@ -132,14 +46,14 @@

Register

- +

Login

- +

         
       
diff --git a/tldw_Server_API/WebUI/css/styles.css b/tldw_Server_API/WebUI/css/styles.css index 2f7e3b263..0b6c71cf6 100644 --- a/tldw_Server_API/WebUI/css/styles.css +++ b/tldw_Server_API/WebUI/css/styles.css @@ -238,6 +238,12 @@ header h1 { .assistant-debug pre { background: var(--color-surface-alt); padding: 8px; border: 1px solid var(--color-border); border-radius: 4px; overflow: auto; } /* Basic token colors for lightweight highlighting */ + +/* Dev-only marker for migrated inline handlers (enable via localStorage.DEV_MIGRATE_MARKERS=1) */ +.migrated-inline { + outline: 1px dashed var(--color-warning); + outline-offset: 2px; +} .tok-key { color: #7a7; } .tok-string { color: #c22; } .tok-number { color: #164; } @@ -501,8 +507,10 @@ textarea.code-input { width: 100%; } -input[type="file"] { +.file-input-wrapper > input[type="file"] { position: absolute; + top: 0; + left: 0; opacity: 0; width: 100%; height: 100%; @@ -1268,6 +1276,11 @@ pre:hover .copy-button { gap: var(--spacing-lg); } +/* Simple Landing: stack quick actions vertically for more space */ +#tabSimpleLanding > .columns { + grid-template-columns: 1fr; +} + .hidden { display: none !important; } @@ -1704,10 +1717,20 @@ pre:hover .copy-button { padding: 20px; font-style: italic; } +/* Flashcards tag chips */ +.fc-tags { display: flex; flex-wrap: wrap; gap: 6px; align-items: center; } +.fc-chip { display: inline-flex; align-items: center; gap: 6px; padding: 2px 8px; border: 1px solid var(--color-border); border-radius: var(--radius-pill); background: var(--color-surface); font-size: 0.9em; } +.fc-chip .fc-chip-x { border: none; background: transparent; color: var(--color-text-secondary); cursor: pointer; padding: 0 2px; font-size: 1em; line-height: 1; } +.fc-chip .fc-chip-x:hover { color: var(--color-text); } +.fc-tag-input { min-width: 80px; border: 1px solid var(--color-border); padding: 2px 6px; border-radius: var(--radius-sm); } /* Collapsible Sections */ -.collapsible-header h3 { display:inline-block; } +.collapsible-header { display:flex; align-items:center; justify-content:space-between; gap: 8px; cursor: pointer; } +.collapsible-header h3 { display:inline-block; margin: 0; } .collapsible-body { margin-top: 8px; } +/* Collapsible toggle button visual when used as a separate control */ +.collapsible-toggle-btn { margin-left: auto; } + /* Simple progress bar (already styled inline in HTML) */ .progress-container { position: relative; } .progress-bar { transition: width 0.2s ease; } diff --git a/tldw_Server_API/WebUI/index.html b/tldw_Server_API/WebUI/index.html index 1321edbc3..f069c4bad 100644 --- a/tldw_Server_API/WebUI/index.html +++ b/tldw_Server_API/WebUI/index.html @@ -312,6 +312,14 @@

TLDW API Testing Interface

Checking...
DLQ: 0 + Setup + + + + @@ -335,6 +343,9 @@

TLDW API Testing Interface

+ + + - + Auth headers: single-user uses 'X-API-KEY'; multi-user uses 'Authorization: Bearer' (or 'X-API-KEY' if preferred). Contact your administrator for an API token. + + -
-
-
- - When enabled, the WebUI sends X-API-KEY instead of Bearer in multi-user mode when supported. -
-
-
-
- - When disabled (default), cURL masks tokens as [REDACTED]. -
-
+
+
+
+ + When enabled, the WebUI sends X-API-KEY instead of Bearer in multi-user mode when supported.
+
+
+
+ + When disabled (default), cURL masks tokens as [REDACTED]. +
+
+
+ +
+
+ Extensions streaming tip: if a browser extension calls the API (including text/event-stream), add its origin to ALLOWED_ORIGINS. See the CORS guidance. + Read CORS & Extensions +
+

Quick Actions

@@ -972,6 +1014,197 @@

Request History

+ +
+
+

Quick Actions

+

A streamlined panel for common tasks. Toggle "Show Advanced Panels" in the header to access full controls.

+
+
+
+
+

Chat Assistant

+

Full chat experience from the Chat tab, pinned here for quick access.

+
+ +
+
+
+
+ Loading chat interface… +
+
+
+
+
+ +
+
+
+

Ingest File or URL

+ +
+
+
+ + +
+
+ +
+ + +
+
+
+ + + You can select multiple files. Media type auto-detected from extension. +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+ Optional: Enable to immediately analyze ingested content. + Recommended for large documents and videos. +
+
+ + +
+ + + +
+ + +

Response

+
---
+
+ +
+
+
+ +
+
+
+

Search Content

+ +
+
+
+ +
+ + +
+
+
+
+
+ + +
+
+
+
+
+ + +
+
+
+
+
+
+
+
+ + +
+
+ +
+
+
+
+
+

No endpoints match your search.

@@ -983,17 +1216,17 @@

Request History

+ + - - - - - - - - + + + + + + diff --git a/tldw_Server_API/WebUI/js/admin-advanced.js b/tldw_Server_API/WebUI/js/admin-advanced.js index 0762d3b48..e93b56c12 100644 --- a/tldw_Server_API/WebUI/js/admin-advanced.js +++ b/tldw_Server_API/WebUI/js/admin-advanced.js @@ -3,10 +3,40 @@ function esc(x) { return Utils.escapeHtml(String(x ?? '')); } +// ---------- User Registration (moved from inline) ---------- +async function adminCreateUser() { + const username = (document.getElementById('adminReg_username')?.value || '').trim(); + const email = (document.getElementById('adminReg_email')?.value || '').trim(); + const password = document.getElementById('adminReg_password')?.value || ''; + const registration_code = (document.getElementById('adminReg_code')?.value || '').trim() || null; + if (!username || !email || !password) { + if (typeof Toast !== 'undefined' && Toast) Toast.error('Username, email, and password are required'); + return; + } + try { + const res = await window.apiClient.post('/api/v1/auth/register', { username, email, password, registration_code }); + const out = document.getElementById('adminUserRegister_response'); if (out) out.textContent = JSON.stringify(res, null, 2); + if (res && res.api_key) { if (typeof Toast !== 'undefined' && Toast) Toast.success('User created. API key returned below. Copy and store it securely.'); } + else { if (typeof Toast !== 'undefined' && Toast) Toast.success('User created.'); } + } catch (e) { + const out = document.getElementById('adminUserRegister_response'); if (out) out.textContent = JSON.stringify(e.response || e, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to create user'); + } +} + +function bindAdminUsersBasics() { + // List Users + const listBtn = document.getElementById('btnAdminUsersList'); + if (listBtn) listBtn.addEventListener('click', () => window.makeRequest && window.makeRequest('adminUsersList', 'GET', '/api/v1/admin/users', 'query')); + // Create User + const createBtn = document.getElementById('btnAdminCreateUser'); + if (createBtn) createBtn.addEventListener('click', adminCreateUser); +} + // ---------- Virtual Keys (per user) ---------- async function admVKList() { const userId = parseInt(document.getElementById('admVK_userId')?.value || '0', 10); - if (!userId) { Toast.error('Enter user id'); return; } + if (!userId) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Enter user id'); return; } try { const items = await window.apiClient.get(`/api/v1/admin/users/${userId}/virtual-keys`); const c = document.getElementById('adminVirtualKeys_list'); @@ -35,7 +65,7 @@ async function admVKList() { if (out) out.textContent = JSON.stringify(e.response || e, null, 2); const c = document.getElementById('adminVirtualKeys_list'); if (c) c.innerHTML = ''; - Toast.error('Failed to list virtual keys'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to list virtual keys'); } } @@ -51,12 +81,12 @@ async function rcCreate() { const res = await window.apiClient.post('/api/v1/admin/registration-codes', payload); const out = document.getElementById('adminRegCodes_result'); if (out) out.textContent = JSON.stringify(res, null, 2); - Toast.success('Registration code created'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Registration code created'); await rcList(); } catch (e) { const out = document.getElementById('adminRegCodes_result'); if (out) out.textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Failed to create code'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to create code'); } } @@ -70,7 +100,7 @@ async function rcList() { } catch (e) { const out = document.getElementById('adminRegCodes_result'); if (out) out.textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Failed to list codes'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to list codes'); } } @@ -80,12 +110,12 @@ async function rcDelete(id) { const res = await window.apiClient.delete(`/api/v1/admin/registration-codes/${id}`); const out = document.getElementById('adminRegCodes_result'); if (out) out.textContent = JSON.stringify(res, null, 2); - Toast.success('Registration code deleted'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Registration code deleted'); await rcList(); } catch (e) { const out = document.getElementById('adminRegCodes_result'); if (out) out.textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Failed to delete code'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to delete code'); } } @@ -115,7 +145,7 @@ function rcRenderList(items) { async function admVKCreate() { const userId = parseInt(document.getElementById('admVK_userId')?.value || '0', 10); - if (!userId) { Toast.error('Enter user id'); return; } + if (!userId) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Enter user id'); return; } const toList = (val) => (val || '').split(',').map(s => s.trim()).filter(Boolean); const payload = { name: (document.getElementById('admVK_name')?.value || '').trim() || null, @@ -131,12 +161,12 @@ async function admVKCreate() { const res = await window.apiClient.post(`/api/v1/admin/users/${userId}/virtual-keys`, payload); const out = document.getElementById('adminVirtualKeys_result'); if (out) out.textContent = JSON.stringify(res, null, 2); - Toast.success('Virtual key created'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Virtual key created'); await admVKList(); } catch (e) { const out = document.getElementById('adminVirtualKeys_result'); if (out) out.textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Create failed'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Create failed'); } } @@ -146,12 +176,12 @@ async function admVKRevoke(userId, keyId) { const res = await window.apiClient.delete(`/api/v1/admin/users/${userId}/api-keys/${keyId}`); const out = document.getElementById('adminVirtualKeys_result'); if (out) out.textContent = JSON.stringify(res, null, 2); - Toast.success('Key revoked'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Key revoked'); await admVKList(); } catch (e) { const out = document.getElementById('adminVirtualKeys_result'); if (out) out.textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Revoke failed'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Revoke failed'); } } @@ -187,7 +217,7 @@ async function adminQueryLLMUsage() { else pre.textContent = JSON.stringify(res, null, 2); } catch (e) { document.getElementById('adminLLMUsage_result').textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Failed to fetch LLM usage'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to fetch LLM usage'); } } @@ -236,10 +266,10 @@ async function _auditFetchAndDownload(qs, format) { const fname = parsedQS.get('filename') || (format === 'csv' ? 'audit_export.csv' : 'audit_export.json'); const mime = format === 'csv' ? 'text/csv;charset=utf-8' : 'application/json;charset=utf-8'; Utils.downloadData(text, fname, mime); - Toast.success('Audit export downloaded'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Audit export downloaded'); } catch (e) { console.error('Audit export failed:', e); - Toast.error(`Audit export failed: ${e.message || e}`); + if (typeof Toast !== 'undefined' && Toast) Toast.error(`Audit export failed: ${e.message || e}`); } } @@ -304,6 +334,498 @@ function _shadeFromHex(hex, lighten = 0, darken = 0) { return { base: `#${toHex(r)}${toHex(g)}${toHex(b)}`, light: `#${toHex(rl)}${toHex(gl)}${toHex(bl)}`, dark: `#${toHex(rd)}${toHex(gd)}${toHex(bd)}` }; } +// ============================== +// Moderation (migrated from inline) +// ============================== + +// Settings +async function moderationLoadSettings() { + try { + const res = await window.apiClient.get('/api/v1/moderation/settings'); + const eff = res && res.effective ? res.effective : {}; + const cats = (eff.categories_enabled || []).join(','); + const piiOverride = (res && Object.prototype.hasOwnProperty.call(res, 'pii_enabled')) ? res.pii_enabled : null; + const piiVal = (piiOverride === null || piiOverride === undefined) ? '' : String(!!piiOverride); + const setVal = (id, v) => { const el = document.getElementById(id); if (el) el.value = v; }; + setVal('modSettings_categories', cats); + setVal('modSettings_pii', piiVal); + const pre = document.getElementById('moderationSettings_status'); if (pre) pre.textContent = JSON.stringify(res, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Loaded settings'); + } catch (e) { + const pre = document.getElementById('moderationSettings_status'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to load settings'); + } +} + +async function moderationSaveSettings() { + try { + const rawCats = (document.getElementById('modSettings_categories')?.value || '').trim(); + const cats = rawCats ? rawCats.split(',').map(x => x.trim()).filter(Boolean) : []; + const piiVal = (document.getElementById('modSettings_pii')?.value || ''); + const body = {}; + if (piiVal !== '') body.pii_enabled = (piiVal === 'true'); + body.categories_enabled = cats; + body.persist = !!document.getElementById('modSettings_persist')?.checked; + const res = await window.apiClient.put('/api/v1/moderation/settings', body); + const pre = document.getElementById('moderationSettings_status'); if (pre) pre.textContent = JSON.stringify(res, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Saved settings'); + } catch (e) { + const pre = document.getElementById('moderationSettings_status'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to save settings'); + } +} + +// Managed Blocklist +window._moderationManaged = { version: '', items: [] }; +window._moderationManagedLint = {}; // id -> lint item + +function renderManagedBlocklist() { + const container = document.getElementById('moderationManaged_table'); if (!container) return; + const filter = (document.getElementById('moderationManaged_filter')?.value || '').toLowerCase(); + let items = (window._moderationManaged.items || []).filter(it => !filter || String(it.line).toLowerCase().includes(filter)); + const onlyInvalid = !!document.getElementById('moderationManaged_onlyInvalid')?.checked; + if (onlyInvalid) { + items = items.filter((it) => { + const lint = window._moderationManagedLint[String(it.id)] || null; + return lint && lint.ok === false; + }); + } + let html = ''; + for (const it of items) { + const lint = window._moderationManagedLint[String(it.id)] || null; + const lintText = lint ? (lint.ok ? 'ok' : (lint.error || 'invalid')) : ''; + const lintClass = lint ? (lint.ok ? 'ok' : 'invalid') : ''; + const lintIcon = lint ? (lint.ok ? '✓' : '⚠') : ''; + html += ` + + + + + `; + } + html += '
IDPatternLintActions
${Utils.escapeHtml(String(it.id ?? ''))}${Utils.escapeHtml(String(it.line))}${lintIcon}${Utils.escapeHtml(lint ? (lint.pattern_type || '') : '')}
'; + container.innerHTML = html; +} + +async function moderationLoadManaged() { + try { + const res = await window.apiClient.get('/api/v1/moderation/blocklist/managed'); + window._moderationManaged = res || { version: '', items: [] }; + await moderationLintManagedAll(); + renderManagedBlocklist(); + const pre = document.getElementById('moderationManaged_status'); if (pre) pre.textContent = `Loaded version: ${res.version}`; + if (typeof Toast !== 'undefined' && Toast) Toast.success('Loaded managed blocklist'); + } catch (e) { + const pre = document.getElementById('moderationManaged_status'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to load managed blocklist'); + } +} + +async function moderationRefreshManaged() { return moderationLoadManaged(); } + +async function moderationAppendManaged() { + try { + const line = (document.getElementById('moderationManaged_newLine')?.value || '').trim(); + if (!line) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Enter a line'); return; } + const lint = await window.apiClient.post('/api/v1/moderation/blocklist/lint', { line }); + const invalid = (lint.items || []).filter(it => !it.ok); + if (invalid.length > 0) { + const pre = document.getElementById('moderationManaged_status'); if (pre) pre.textContent = JSON.stringify(lint, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Lint failed: fix the line before append'); + return; + } + const res = await window.apiClient.post('/api/v1/moderation/blocklist/append', { line }, { headers: { 'If-Match': window._moderationManaged.version }}); + window._moderationManaged.version = res.version; + await moderationLoadManaged(); + const input = document.getElementById('moderationManaged_newLine'); if (input) input.value = ''; + if (typeof Toast !== 'undefined' && Toast) Toast.success('Appended'); + } catch (e) { + const pre = document.getElementById('moderationManaged_status'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to append'); + } +} + +async function moderationDeleteManaged(id) { + try { + if (!confirm('Delete blocklist entry #' + id + '?')) return; + const res = await window.apiClient.delete(`/api/v1/moderation/blocklist/${id}`, { headers: { 'If-Match': window._moderationManaged.version }}); + window._moderationManaged.version = res.version; + await moderationLoadManaged(); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Deleted'); + } catch (e) { + const pre = document.getElementById('moderationManaged_status'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to delete'); + } +} + +async function moderationLintManaged() { + try { + const line = (document.getElementById('moderationManaged_newLine')?.value || '').trim(); + if (!line) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Enter a line'); return; } + const res = await window.apiClient.post('/api/v1/moderation/blocklist/lint', { line }); + const invalid = (res.items || []).filter(it => !it.ok); + const msg = `Lint: ${res.valid_count} valid, ${res.invalid_count} invalid`; + const pre = document.getElementById('moderationManaged_status'); if (pre) pre.textContent = JSON.stringify(res, null, 2); + if (invalid.length === 0) { if (typeof Toast !== 'undefined' && Toast) Toast.success(msg); } else { if (typeof Toast !== 'undefined' && Toast) Toast.error(msg); } + } catch (e) { + const pre = document.getElementById('moderationManaged_status'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Lint failed'); + } +} + +async function moderationLintManagedAll() { + try { + const lines = (window._moderationManaged.items || []).map(it => it.line); + if (!lines.length) { window._moderationManagedLint = {}; return; } + const res = await window.apiClient.post('/api/v1/moderation/blocklist/lint', { lines }); + const map = {}; + // Key lint results by blocklist entry ID instead of array index + (res.items || []).forEach((it) => { map[String(it.id)] = it; }); + window._moderationManagedLint = map; + renderManagedBlocklist(); + const pre = document.getElementById('moderationManaged_status'); if (pre) pre.textContent = 'Linted'; + } catch (e) { + const pre = document.getElementById('moderationManaged_status'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); + } +} + +// Raw Blocklist +window._moderationBlocklistLastLint = null; + +async function moderationLoadBlocklist() { + try { + const lines = await window.apiClient.get('/api/v1/moderation/blocklist'); + const ta = document.getElementById('moderationBlocklist_text'); if (ta) ta.value = (lines || []).join('\n'); + const pre = document.getElementById('moderationBlocklist_status'); if (pre) pre.textContent = 'Loaded'; + if (typeof Toast !== 'undefined' && Toast) Toast.success('Loaded blocklist'); + } catch (e) { + const pre = document.getElementById('moderationBlocklist_status'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to load blocklist'); + } +} + +async function moderationSaveBlocklist() { + try { + const raw = document.getElementById('moderationBlocklist_text')?.value || ''; + const lines = raw.split(/\r?\n/); + const res = await window.apiClient.put('/api/v1/moderation/blocklist', { lines }); + const pre = document.getElementById('moderationBlocklist_status'); if (pre) pre.textContent = JSON.stringify(res, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Blocklist saved'); + } catch (e) { + const pre = document.getElementById('moderationBlocklist_status'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to save blocklist'); + } +} + +async function moderationLintBlocklist() { + try { + const raw = document.getElementById('moderationBlocklist_text')?.value || ''; + const lines = raw.split(/\r?\n/); + const res = await window.apiClient.post('/api/v1/moderation/blocklist/lint', { lines }); + const invalid = (res.items || []).filter(it => !it.ok); + const msg = `Lint: ${res.valid_count} valid, ${res.invalid_count} invalid`; + const pre = document.getElementById('moderationBlocklist_status'); if (pre) pre.textContent = JSON.stringify(res, null, 2); + window._moderationBlocklistLastLint = res; + renderBlocklistInvalidList(); + if (invalid.length === 0) { if (typeof Toast !== 'undefined' && Toast) Toast.success(msg); } else { if (typeof Toast !== 'undefined' && Toast) Toast.error(msg); } + } catch (e) { + const pre = document.getElementById('moderationBlocklist_status'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Lint failed'); + } +} + +function renderBlocklistInvalidList() { + const container = document.getElementById('moderationBlocklist_invalidList'); if (!container) return; + const onlyInvalid = !!document.getElementById('moderationBlocklist_onlyInvalid')?.checked; + const actions = document.getElementById('moderationBlocklist_invalidActions'); + if (!onlyInvalid) { container.innerHTML = ''; if (actions) actions.style.display = 'none'; return; } + const res = window._moderationBlocklistLastLint; + if (!res || !Array.isArray(res.items)) { container.innerHTML = 'No lint results yet'; return; } + const invalid = (res.items || []).filter(it => it && it.ok === false); + if (!invalid.length) { container.innerHTML = 'No invalid items'; if (actions) actions.style.display = 'none'; return; } + let html = ''; + for (const it of invalid) { + const idx = typeof it.index === 'number' ? it.index : ''; + const type = it.pattern_type || ''; + const err = it.error || 'invalid'; + const line = (it.line || '').slice(0, 120); + html += ` + + + + + `; + } + html += '
#TypeErrorLine
${idx}${Utils.escapeHtml(String(type))}${Utils.escapeHtml(String(err))}${Utils.escapeHtml(String(line))}
'; + container.innerHTML = html; + if (actions) actions.style.display = 'block'; +} + +async function moderationCopyInvalidBlocklist() { + try { + const res = window._moderationBlocklistLastLint ? (window._moderationBlocklistLastLint.items || []).filter(it => !it.ok).map(it => String(it.line || '')).join('\n') : ''; + if (!res) { if (typeof Toast !== 'undefined' && Toast) Toast.error('No invalid items to copy'); return; } + const ok = await Utils.copyToClipboard(res); + if (ok) { if (typeof Toast !== 'undefined' && Toast) Toast.success('Copied invalid lines'); } else { if (typeof Toast !== 'undefined' && Toast) Toast.error('Copy failed'); } + } catch (_) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Copy failed'); } +} + +// Overrides + Tester +function _buildOverridePayload() { + const v = (id) => (document.getElementById(id)?.value ?? '').trim(); + const maybeBool = (x) => x === '' ? undefined : (x === 'true'); + const payload = {}; + const enabled = maybeBool(v('modEnabled')); + const inp = maybeBool(v('modInputEnabled')); + const outp = maybeBool(v('modOutputEnabled')); + const ia = v('modInputAction'); + const oa = v('modOutputAction'); + const rr = v('modRedact'); + const cat = v('modUserCategories'); + if (enabled !== undefined) payload.enabled = enabled; + if (inp !== undefined) payload.input_enabled = inp; + if (outp !== undefined) payload.output_enabled = outp; + if (ia) payload.input_action = ia; + if (oa) payload.output_action = oa; + if (rr) payload.redact_replacement = rr; + if (cat) payload.categories_enabled = cat; + return payload; +} + +async function loadUserOverride() { + try { + const uid = (document.getElementById('modUserId')?.value || '').trim(); + if (!uid) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Enter a user ID'); return; } + const res = await window.apiClient.get(`/api/v1/moderation/users/${uid}`); + const pre = document.getElementById('moderationOverrides_result'); if (pre) pre.textContent = JSON.stringify(res, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Loaded override'); + } catch (e) { + const pre = document.getElementById('moderationOverrides_result'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to load override'); + } +} + +async function saveUserOverride() { + try { + const uid = (document.getElementById('modUserId')?.value || '').trim(); + if (!uid) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Enter a user ID'); return; } + const payload = _buildOverridePayload(); + const res = await window.apiClient.put(`/api/v1/moderation/users/${uid}`, payload); + const pre = document.getElementById('moderationOverrides_result'); if (pre) pre.textContent = JSON.stringify(res, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Saved override'); + } catch (e) { + const pre = document.getElementById('moderationOverrides_result'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to save override'); + } +} + +async function deleteUserOverride() { + try { + const uid = (document.getElementById('modUserId')?.value || '').trim(); + if (!uid) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Enter a user ID'); return; } + if (!confirm('Delete override for user ' + uid + '?')) return; + const res = await window.apiClient.delete(`/api/v1/moderation/users/${uid}`); + const pre = document.getElementById('moderationOverrides_result'); if (pre) pre.textContent = JSON.stringify(res, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Deleted override'); + } catch (e) { + const pre = document.getElementById('moderationOverrides_result'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to delete override'); + } +} + +async function moderationListOverrides() { + try { + const res = await window.apiClient.get('/api/v1/moderation/users'); + const overrides = (res && res.overrides) || {}; + const rows = Object.entries(overrides).map(([uid, o]) => ({ uid, ...o })); + let html = ''; + for (const r of rows) { + html += ` + + + + + + + + + + `; + } + html += '
Userenabledinput_enabledoutput_enabledinput_actionoutput_actionredact_replacementcategories_enabledActions
${Utils.escapeHtml(String(r.uid))}${String(r.enabled ?? '')}${String(r.input_enabled ?? '')}${String(r.output_enabled ?? '')}${Utils.escapeHtml(String(r.input_action ?? ''))}${Utils.escapeHtml(String(r.output_action ?? ''))}${Utils.escapeHtml(String(r.redact_replacement ?? ''))}${Utils.escapeHtml(String(r.categories_enabled ?? ''))}
'; + const div = document.getElementById('moderationOverrides_table'); if (div) div.innerHTML = html; + } catch (e) { + const div = document.getElementById('moderationOverrides_table'); if (div) div.innerHTML = `
${Utils.escapeHtml(JSON.stringify(e.response || e, null, 2))}
`; + } +} + +function moderationLoadIntoEditor(uid) { + const id = document.getElementById('modUserId'); if (id) id.value = uid; + loadUserOverride(); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Loaded override into editor'); +} + +async function moderationRunTest() { + try { + const user_id = (document.getElementById('modTest_user')?.value || '').trim() || null; + const phase = document.getElementById('modTest_phase')?.value; + const text = document.getElementById('modTest_text')?.value || ''; + const res = await window.apiClient.post('/api/v1/moderation/test', { user_id, phase, text }); + const pre = document.getElementById('moderationTester_result'); if (pre) pre.textContent = JSON.stringify(res, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Test completed'); + } catch (e) { + const pre = document.getElementById('moderationTester_result'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Test failed'); + } +} + +// ============================== +// Security Alerts (migrated) +// ============================== +async function loadSecurityAlertStatus() { + try { + const resp = await window.apiClient.makeRequest('GET', '/api/v1/admin/security/alert-status'); + const pre = document.getElementById('adminSecurityAlerts_response'); if (pre) pre.textContent = JSON.stringify(resp, null, 2); + const health = resp.health || 'unknown'; + const pill = document.getElementById('adminSecurityAlerts_health'); + if (pill) { + pill.textContent = `Health: ${health}`; + if (health === 'ok') { pill.style.backgroundColor = '#d1fae5'; pill.style.color = '#065f46'; } + else if (health === 'degraded') { pill.style.backgroundColor = '#fef3c7'; pill.style.color = '#92400e'; } + else { pill.style.backgroundColor = '#fee2e2'; pill.style.color = '#991b1b'; } + } + const tbody = document.querySelector('#adminSecurityAlerts_table tbody'); + if (tbody) { + tbody.innerHTML = ''; + (resp.sinks || []).forEach(sink => { + const row = document.createElement('tr'); + + const tdSink = document.createElement('td'); + tdSink.textContent = String(sink?.sink ?? ''); + row.appendChild(tdSink); + + const tdConfigured = document.createElement('td'); + tdConfigured.textContent = sink && sink.configured ? 'Yes' : 'No'; + row.appendChild(tdConfigured); + + const tdMinSeverity = document.createElement('td'); + tdMinSeverity.textContent = String((sink && sink.min_severity) || resp.min_severity || ''); + row.appendChild(tdMinSeverity); + + const tdLastStatus = document.createElement('td'); + const lastStatus = sink && sink.last_status === true ? 'success' : (sink && sink.last_status === false ? 'failure' : 'n/a'); + tdLastStatus.textContent = lastStatus; + row.appendChild(tdLastStatus); + + const tdLastError = document.createElement('td'); + tdLastError.textContent = String((sink && sink.last_error) || ''); + row.appendChild(tdLastError); + + const tdBackoff = document.createElement('td'); + tdBackoff.textContent = String((sink && sink.backoff_until) || ''); + row.appendChild(tdBackoff); + + tbody.appendChild(row); + }); + } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Security alert status refreshed'); + } catch (e) { + const pre = document.getElementById('adminSecurityAlerts_response'); if (pre) pre.textContent = String(e?.message || e); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to load security alert status: ' + (e?.message || e)); + } +} + +// ============================== +// Usage (migrated) +// ============================== +function _usageQS() { + const params = new URLSearchParams(); + const uid = parseInt(document.getElementById('usage_userId')?.value || ''); + const start = (document.getElementById('usage_start')?.value || '').trim(); + const end = (document.getElementById('usage_end')?.value || '').trim(); + const page = parseInt(document.getElementById('usage_page')?.value || '1', 10); + const limit = parseInt(document.getElementById('usage_limit')?.value || '50', 10); + if (!isNaN(uid)) params.set('user_id', String(uid)); + if (start) params.set('start', start); + if (end) params.set('end', end); + if (page) params.set('page', String(page)); + if (limit) params.set('limit', String(limit)); + return params.toString(); +} + +function _renderDailyTable(items) { + if (!Array.isArray(items) || items.length === 0) return '

No data yet.

'; + const showIn = !!document.getElementById('usage_show_bytes_in')?.checked; + let html = '' + (showIn ? '' : '') + ''; + for (const r of items) { + html += ` + + + + + + ${showIn ? `` : ''} + + `; + } + html += '
User IDDayRequestsErrorsBytesBytes InAvg Latency (ms)
${r.user_id}${r.day}${r.requests}${r.errors}${r.bytes_total}${r.bytes_in_total || 0}${r.avg_latency_ms || '-'}
'; + return html; +} + +async function adminLoadUsageDaily() { + const qs = _usageQS(); + const url = '/api/v1/admin/usage/daily' + (qs ? ('?' + qs) : ''); + const res = await window.apiClient.get(url); + const items = res && res.items ? res.items : []; + const summary = document.getElementById('adminUsageDaily_summary'); if (summary) summary.textContent = `Items: ${items.length}`; + const table = document.getElementById('adminUsageDaily_table'); if (table) table.innerHTML = _renderDailyTable(items); + const raw = document.getElementById('adminUsageDaily_raw'); if (raw) raw.textContent = JSON.stringify(res, null, 2); +} + +function adminDownloadUsageDailyCSV() { + const qs = _usageQS(); + const url = `/api/v1/admin/usage/daily.csv${qs ? ('?' + qs) : ''}`; + window.open(url, '_blank'); +} + +async function adminLoadUsageTop() { + const metric = (document.getElementById('usage_top_metric')?.value || 'requests'); + const topLimit = parseInt(document.getElementById('usage_top_limit')?.value || '10', 10); + const qsBase = _usageQS(); + const qs = new URLSearchParams(qsBase); + qs.set('metric', metric); + qs.set('top_limit', String(topLimit)); + const url = `/api/v1/admin/usage/top?${qs.toString()}`; + const res = await window.apiClient.get(url); + const items = res && res.items ? res.items : []; + const summary = document.getElementById('adminUsageTop_summary'); if (summary) summary.textContent = `Items: ${items.length}`; + let html = ''; + for (const r of items) { + html += ``; + } + html += '
User IDRequestsErrorsBytes Total
${r.user_id}${r.requests}${r.errors}${r.bytes_total}
'; + const table = document.getElementById('adminUsageTop_table'); if (table) table.innerHTML = html; + const raw = document.getElementById('adminUsageTop_raw'); if (raw) raw.textContent = JSON.stringify(res, null, 2); +} + +function adminDownloadUsageTopCSV() { + const metric = (document.getElementById('usage_top_metric')?.value || 'requests'); + const topLimit = parseInt(document.getElementById('usage_top_limit')?.value || '10', 10); + const qsBase = _usageQS(); + const qs = new URLSearchParams(qsBase); + qs.set('metric', metric); + qs.set('top_limit', String(topLimit)); + const url = `/api/v1/admin/usage/top.csv?${qs.toString()}`; + window.open(url, '_blank'); +} + +async function adminRunUsageAggregate() { + const day = (document.getElementById('usage_agg_day')?.value || '').trim(); + if (!day) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Enter a day'); return; } + const res = await window.apiClient.post('/api/v1/admin/usage/aggregate', { day }); + const pre = document.getElementById('adminUsageAgg_result'); if (pre) pre.textContent = JSON.stringify(res, null, 2); +} + function _colorFromLabel(label) { const h = Array.from(String(label || '')).reduce((a, c) => a + c.charCodeAt(0), 0) % 360; return { base: `hsl(${h}, 60%, 55%)`, light: `hsl(${h}, 65%, 75%)`, dark: `hsl(${h}, 55%, 40%)` }; @@ -496,10 +1018,10 @@ async function adminLoadLLMCharts() { _renderLegend('llmLegendProviderMix', provPairs, _colorForProvider); _attachLegendToggle('llmLegendProviderMix', 'llmChartProviderMix'); } - Toast.success('LLM charts loaded'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('LLM charts loaded'); } catch (e) { console.error('Failed to load LLM charts', e); - Toast.error('Failed to load LLM charts'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to load LLM charts'); } } // ---------- Admin Users API Keys (row actions) ---------- @@ -508,13 +1030,13 @@ async function admUserKeyRotate(userId, keyId) { const res = await window.apiClient.post(`/api/v1/admin/users/${userId}/api-keys/${keyId}/rotate`, { expires_in_days: 365 }); const out = document.getElementById('adminUserApiKeys_result'); if (out) out.textContent = JSON.stringify(res, null, 2); - if (res && res.key) Toast.success('API key rotated. Copy the new key now.'); - else Toast.success('API key rotated.'); + if (res && res.key) { if (typeof Toast !== 'undefined' && Toast) Toast.success('API key rotated. Copy the new key now.'); } + else { if (typeof Toast !== 'undefined' && Toast) Toast.success('API key rotated.'); } if (typeof window.adminListUserApiKeys === 'function') await window.adminListUserApiKeys(); } catch (e) { const out = document.getElementById('adminUserApiKeys_result'); if (out) out.textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Failed to rotate key'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to rotate key'); } } @@ -524,12 +1046,12 @@ async function admUserKeyRevoke(userId, keyId) { const res = await window.apiClient.delete(`/api/v1/admin/users/${userId}/api-keys/${keyId}`); const out = document.getElementById('adminUserApiKeys_result'); if (out) out.textContent = JSON.stringify(res, null, 2); - Toast.success('API key revoked'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('API key revoked'); if (typeof window.adminListUserApiKeys === 'function') await window.adminListUserApiKeys(); } catch (e) { const out = document.getElementById('adminUserApiKeys_result'); if (out) out.textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Failed to revoke key'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to revoke key'); } } @@ -543,14 +1065,14 @@ function tableHTML(rows, headers) { } async function admCreateOrg() { - const name = (document.getElementById('org_name')?.value || '').trim(); if (!name) { Toast.error('Name required'); return; } + const name = (document.getElementById('org_name')?.value || '').trim(); if (!name) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Name required'); return; } const payload = { name, slug: (document.getElementById('org_slug')?.value || '').trim() || null, owner_user_id: document.getElementById('org_owner')?.value ? parseInt(document.getElementById('org_owner').value, 10) : null }; try { const res = await window.apiClient.post('/api/v1/admin/orgs', payload); document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Org created'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Org created'); await admListOrgs(); - } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Create failed'); } + } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Create failed'); } } async function admListOrgs() { @@ -560,23 +1082,23 @@ async function admListOrgs() { const rows = items.map(x => ({ id: x.id, name: x.name, slug: x.slug, owner_user_id: x.owner_user_id })); document.getElementById('adminOrgs_list').innerHTML = tableHTML(rows, ['id','name','slug','owner_user_id']); document.getElementById('adminOrgsTeams_result').textContent = 'Loaded orgs'; - } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('List orgs failed'); } + } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('List orgs failed'); } } async function admCreateTeam() { - const orgId = parseInt(document.getElementById('team_org')?.value || '0', 10); if (!orgId) { Toast.error('Org ID required'); return; } - const name = (document.getElementById('team_name')?.value || '').trim(); if (!name) { Toast.error('Team name required'); return; } + const orgId = parseInt(document.getElementById('team_org')?.value || '0', 10); if (!orgId) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Org ID required'); return; } + const name = (document.getElementById('team_name')?.value || '').trim(); if (!name) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Team name required'); return; } const payload = { name, slug: (document.getElementById('team_slug')?.value || '').trim() || null }; try { const res = await window.apiClient.post(`/api/v1/admin/orgs/${orgId}/teams`, payload); document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Team created'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Team created'); await admListTeams(); - } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Create team failed'); } + } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Create team failed'); } } async function admListTeams() { - const orgId = parseInt(document.getElementById('team_org')?.value || '0', 10); if (!orgId) { Toast.error('Org ID required'); return; } + const orgId = parseInt(document.getElementById('team_org')?.value || '0', 10); if (!orgId) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Org ID required'); return; } try { const rows = await window.apiClient.get(`/api/v1/admin/orgs/${orgId}/teams`); const items = Array.isArray(rows) ? rows : []; @@ -588,13 +1110,13 @@ async function admListTeams() { async function admAddTeamMember() { const teamId = parseInt(document.getElementById('m_team')?.value || '0', 10); const userId = parseInt(document.getElementById('m_user')?.value || '0', 10); - if (!teamId || !userId) { Toast.error('Team ID and User ID required'); return; } + if (!teamId || !userId) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Team ID and User ID required'); return; } const role = (document.getElementById('m_role')?.value || '').trim() || 'member'; try { const res = await window.apiClient.post(`/api/v1/admin/teams/${teamId}/members`, { user_id: userId, role }); document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Added team member'); - } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Add member failed'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Added team member'); + } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Add member failed'); } } async function admListTeamMembers() { @@ -608,44 +1130,44 @@ async function admListTeamMembers() { async function admRemoveTeamMember() { const teamId = parseInt(document.getElementById('m_team')?.value || '0', 10); const userId = parseInt(document.getElementById('m_user')?.value || '0', 10); - if (!teamId || !userId) { Toast.error('Team ID and User ID required'); return; } + if (!teamId || !userId) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Team ID and User ID required'); return; } if (!confirm('Remove user ' + userId + ' from team ' + teamId + '?')) return; try { const res = await window.apiClient.delete(`/api/v1/admin/teams/${teamId}/members/${userId}`); document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Removed team member'); - } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Remove failed'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Removed team member'); + } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Remove failed'); } } async function admAddOrgMember() { const orgId = parseInt(document.getElementById('m_org')?.value || '0', 10); const userId = parseInt(document.getElementById('m_user')?.value || '0', 10); - if (!orgId || !userId) { Toast.error('Org ID and User ID required'); return; } + if (!orgId || !userId) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Org ID and User ID required'); return; } const role = (document.getElementById('m_role')?.value || '').trim() || 'member'; try { const res = await window.apiClient.post(`/api/v1/admin/orgs/${orgId}/members`, { user_id: userId, role }); document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Added org member'); - } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Add org member failed'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Added org member'); + } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Add org member failed'); } } async function admListOrgMembers() { - const orgId = parseInt(document.getElementById('m_org')?.value || '0', 10); if (!orgId) { Toast.error('Org ID required'); return; } + const orgId = parseInt(document.getElementById('m_org')?.value || '0', 10); if (!orgId) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Org ID required'); return; } try { const rows = await window.apiClient.get(`/api/v1/admin/orgs/${orgId}/members`); document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(rows, null, 2); - Toast.success('Listed org members'); - } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('List org members failed'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Listed org members'); + } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('List org members failed'); } } async function admUpdateOrgMemberRole() { const orgId = parseInt(document.getElementById('m_org')?.value || '0', 10); const userId = parseInt(document.getElementById('m_user')?.value || '0', 10); const role = (document.getElementById('m_role')?.value || '').trim(); - if (!orgId || !userId || !role) { Toast.error('Org ID, User ID, and new role required'); return; } + if (!orgId || !userId || !role) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Org ID, User ID, and new role required'); return; } try { const res = await window.apiClient.patch(`/api/v1/admin/orgs/${orgId}/members/${userId}`, { role }); document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Updated org member role'); - } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Update role failed'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Updated org member role'); + } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Update role failed'); } } async function admRemoveOrgMember() { @@ -660,23 +1182,23 @@ async function admRemoveOrgMember() { } async function admGetOrgWatchCfg() { - const orgId = parseInt(document.getElementById('m_org')?.value || '0', 10); if (!orgId) { Toast.error('Org ID required'); return; } + const orgId = parseInt(document.getElementById('m_org')?.value || '0', 10); if (!orgId) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Org ID required'); return; } try { const res = await window.apiClient.get(`/api/v1/admin/orgs/${orgId}/watchlists/settings`); document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Loaded org watchlists settings'); - } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Get settings failed'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Loaded org watchlists settings'); + } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Get settings failed'); } } async function admSetOrgWatchCfg() { - const orgId = parseInt(document.getElementById('m_org')?.value || '0', 10); if (!orgId) { Toast.error('Org ID required'); return; } + const orgId = parseInt(document.getElementById('m_org')?.value || '0', 10); if (!orgId) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Org ID required'); return; } const val = document.getElementById('org_wl_require')?.value; const body = { require_include_default: val === '' ? null : (val === 'true') }; try { const res = await window.apiClient.patch(`/api/v1/admin/orgs/${orgId}/watchlists/settings`, body); document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Updated org watchlists settings'); - } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Update settings failed'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Updated org watchlists settings'); + } catch (e) { document.getElementById('adminOrgsTeams_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Update settings failed'); } } // ---------- Tool Permissions ---------- @@ -687,103 +1209,103 @@ async function tpListPerms() { const html = (list.length ? '
    ' + list.map(p => `
  • ${esc(p.name)} - ${esc(p.description || '')}
  • `).join('') + '
' : '

None

'); document.getElementById('adminToolPermissions_list').innerHTML = html; document.getElementById('adminToolPermissions_result').textContent = 'Loaded'; - } catch (e) { document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('List failed'); } + } catch (e) { document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('List failed'); } } async function tpCreatePerm() { - const tool_name = (document.getElementById('tp_name')?.value || '').trim(); if (!tool_name) { Toast.error('Tool name required'); return; } + const tool_name = (document.getElementById('tp_name')?.value || '').trim(); if (!tool_name) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Tool name required'); return; } const description = (document.getElementById('tp_desc')?.value || '').trim() || null; try { const res = await window.apiClient.post('/api/v1/admin/permissions/tools', { tool_name, description }); document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Permission created'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Permission created'); await tpListPerms(); - } catch (e) { document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Create failed'); } + } catch (e) { document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Create failed'); } } async function tpDeletePerm() { - const name = (document.getElementById('tp_name')?.value || '').trim(); if (!name) { Toast.error('Enter permission name'); return; } + const name = (document.getElementById('tp_name')?.value || '').trim(); if (!name) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Enter permission name'); return; } if (!confirm('Delete ' + name + '?')) return; try { const res = await window.apiClient.delete(`/api/v1/admin/permissions/tools/${encodeURIComponent(name)}`); document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Permission deleted'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Permission deleted'); await tpListPerms(); - } catch (e) { document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Delete failed'); } + } catch (e) { document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Delete failed'); } } async function tpGrantToRole() { - const roleId = parseInt(document.getElementById('tp_role')?.value || '0', 10); if (!roleId) { Toast.error('Role ID required'); return; } - const tool = (document.getElementById('tp_tool')?.value || '').trim(); if (!tool) { Toast.error('Tool required'); return; } + const roleId = parseInt(document.getElementById('tp_role')?.value || '0', 10); if (!roleId) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Role ID required'); return; } + const tool = (document.getElementById('tp_tool')?.value || '').trim(); if (!tool) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Tool required'); return; } try { const res = await window.apiClient.post(`/api/v1/admin/roles/${roleId}/permissions/tools`, { tool_name: tool }); document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Granted'); - } catch (e) { document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Grant failed'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Granted'); + } catch (e) { document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Grant failed'); } } async function tpRevokeFromRole() { - const roleId = parseInt(document.getElementById('tp_role')?.value || '0', 10); if (!roleId) { Toast.error('Role ID required'); return; } - const tool = (document.getElementById('tp_tool')?.value || '').trim(); if (!tool) { Toast.error('Tool required'); return; } + const roleId = parseInt(document.getElementById('tp_role')?.value || '0', 10); if (!roleId) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Role ID required'); return; } + const tool = (document.getElementById('tp_tool')?.value || '').trim(); if (!tool) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Tool required'); return; } if (!confirm('Revoke ' + tool + ' from role ' + roleId + '?')) return; try { const res = await window.apiClient.delete(`/api/v1/admin/roles/${roleId}/permissions/tools/${encodeURIComponent(tool)}`); document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Revoked'); - } catch (e) { document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Revoke failed'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Revoked'); + } catch (e) { document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Revoke failed'); } } async function tpListRoleToolPerms() { - const roleId = parseInt(document.getElementById('tp_role')?.value || '0', 10); if (!roleId) { Toast.error('Role ID required'); return; } + const roleId = parseInt(document.getElementById('tp_role')?.value || '0', 10); if (!roleId) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Role ID required'); return; } try { const rows = await window.apiClient.get(`/api/v1/admin/roles/${roleId}/permissions/tools`); document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(rows || [], null, 2); - Toast.success('Listed role tool perms'); - } catch (e) { document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('List failed'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Listed role tool perms'); + } catch (e) { document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('List failed'); } } async function tpGrantByPrefix() { const roleId = parseInt(document.getElementById('tp_role')?.value || '0', 10); const prefix = (document.getElementById('tp_prefix')?.value || '').trim(); - if (!roleId || !prefix) { Toast.error('Role ID and prefix required'); return; } + if (!roleId || !prefix) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Role ID and prefix required'); return; } try { const res = await window.apiClient.post(`/api/v1/admin/roles/${roleId}/permissions/tools/prefix/grant`, { prefix }); document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(res || [], null, 2); - Toast.success('Granted by prefix'); - } catch (e) { document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Grant by prefix failed'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Granted by prefix'); + } catch (e) { document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Grant by prefix failed'); } } async function tpRevokeByPrefix() { const roleId = parseInt(document.getElementById('tp_role')?.value || '0', 10); const prefix = (document.getElementById('tp_prefix')?.value || '').trim(); - if (!roleId || !prefix) { Toast.error('Role ID and prefix required'); return; } + if (!roleId || !prefix) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Role ID and prefix required'); return; } if (!confirm('Revoke all tool permissions by prefix from role ' + roleId + '?')) return; try { const res = await window.apiClient.post(`/api/v1/admin/roles/${roleId}/permissions/tools/prefix/revoke`, { prefix }); document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(res || {}, null, 2); - Toast.success('Revoked by prefix'); - } catch (e) { document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Revoke by prefix failed'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Revoked by prefix'); + } catch (e) { document.getElementById('adminToolPermissions_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Revoke by prefix failed'); } } // ---------- Rate Limits ---------- async function rlUpsertRole() { - const roleId = parseInt(document.getElementById('rl_role')?.value || '0', 10); if (!roleId) { Toast.error('Role ID required'); return; } + const roleId = parseInt(document.getElementById('rl_role')?.value || '0', 10); if (!roleId) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Role ID required'); return; } const payload = { resource: (document.getElementById('rl_resource')?.value || '').trim(), limit_per_min: document.getElementById('rl_limit')?.value ? parseInt(document.getElementById('rl_limit').value, 10) : null, burst: document.getElementById('rl_burst')?.value ? parseInt(document.getElementById('rl_burst').value, 10) : null }; - if (!payload.resource) { Toast.error('Resource required'); return; } + if (!payload.resource) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Resource required'); return; } try { const res = await window.apiClient.post(`/api/v1/admin/roles/${roleId}/rate-limits`, payload); document.getElementById('adminRateLimits_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Role rate limit updated'); - } catch (e) { document.getElementById('adminRateLimits_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Upsert failed'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Role rate limit updated'); + } catch (e) { document.getElementById('adminRateLimits_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Upsert failed'); } } async function rlUpsertUser() { - const userId = parseInt(document.getElementById('rl_user')?.value || '0', 10); if (!userId) { Toast.error('User ID required'); return; } + const userId = parseInt(document.getElementById('rl_user')?.value || '0', 10); if (!userId) { if (typeof Toast !== 'undefined' && Toast) Toast.error('User ID required'); return; } const payload = { resource: (document.getElementById('rl_u_resource')?.value || '').trim(), limit_per_min: document.getElementById('rl_u_limit')?.value ? parseInt(document.getElementById('rl_u_limit').value, 10) : null, burst: document.getElementById('rl_u_burst')?.value ? parseInt(document.getElementById('rl_u_burst').value, 10) : null }; - if (!payload.resource) { Toast.error('Resource required'); return; } + if (!payload.resource) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Resource required'); return; } try { const res = await window.apiClient.post(`/api/v1/admin/users/${userId}/rate-limits`, payload); document.getElementById('adminRateLimits_result').textContent = JSON.stringify(res, null, 2); - Toast.success('User rate limit updated'); - } catch (e) { document.getElementById('adminRateLimits_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Upsert failed'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('User rate limit updated'); + } catch (e) { document.getElementById('adminRateLimits_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Upsert failed'); } } async function rlReset() { @@ -797,8 +1319,8 @@ async function rlReset() { try { const res = await window.apiClient.post('/api/v1/admin/rate-limits/reset', payload); document.getElementById('adminRateLimits_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Rate limits reset'); - } catch (e) { document.getElementById('adminRateLimits_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Reset failed'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Rate limits reset'); + } catch (e) { document.getElementById('adminRateLimits_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Reset failed'); } } // ---------- Tool Catalog (UI placeholder; HTML added separately) ---------- @@ -809,70 +1331,103 @@ async function tcList() { const list = document.getElementById('adminToolCatalog_list'); if (list) list.innerHTML = tableHTML(items.map(x => ({ id: x.id, name: x.name, org_id: x.org_id, team_id: x.team_id, is_active: x.is_active })), ['id','name','org_id','team_id','is_active']); document.getElementById('adminToolCatalog_result').textContent = 'Loaded catalogs'; - } catch (e) { document.getElementById('adminToolCatalog_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('List catalogs failed'); } + } catch (e) { document.getElementById('adminToolCatalog_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('List catalogs failed'); } } async function tcCreate() { - const name = (document.getElementById('tc_name')?.value || '').trim(); if (!name) { Toast.error('Name required'); return; } + const name = (document.getElementById('tc_name')?.value || '').trim(); if (!name) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Name required'); return; } const description = (document.getElementById('tc_desc')?.value || '').trim() || null; const org_id = document.getElementById('tc_org')?.value ? parseInt(document.getElementById('tc_org').value, 10) : null; const team_id = document.getElementById('tc_team')?.value ? parseInt(document.getElementById('tc_team').value, 10) : null; try { const res = await window.apiClient.post('/api/v1/admin/mcp/tool_catalogs', { name, description, org_id, team_id }); document.getElementById('adminToolCatalog_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Catalog created'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Catalog created'); await tcList(); - } catch (e) { document.getElementById('adminToolCatalog_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Create catalog failed'); } + } catch (e) { document.getElementById('adminToolCatalog_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Create catalog failed'); } } async function tcDelete() { - const id = parseInt(document.getElementById('tc_catalog_id')?.value || '0', 10); if (!id) { Toast.error('Catalog id required'); return; } + const id = parseInt(document.getElementById('tc_catalog_id')?.value || '0', 10); if (!id) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Catalog id required'); return; } if (!confirm('Delete catalog #' + id + '?')) return; try { const res = await window.apiClient.delete(`/api/v1/admin/mcp/tool_catalogs/${id}`); document.getElementById('adminToolCatalog_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Catalog deleted'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Catalog deleted'); await tcList(); - } catch (e) { document.getElementById('adminToolCatalog_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Delete catalog failed'); } + } catch (e) { document.getElementById('adminToolCatalog_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Delete catalog failed'); } } async function tcListEntries() { - const id = parseInt(document.getElementById('tc_catalog_id')?.value || '0', 10); if (!id) { Toast.error('Catalog id required'); return; } + const id = parseInt(document.getElementById('tc_catalog_id')?.value || '0', 10); if (!id) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Catalog id required'); return; } try { const rows = await window.apiClient.get(`/api/v1/admin/mcp/tool_catalogs/${id}/entries`); const items = Array.isArray(rows) ? rows : []; const entriesBox = document.getElementById('adminToolCatalog_entries'); if (entriesBox) entriesBox.innerHTML = tableHTML(items.map(x => ({ tool_name: x.tool_name, module_id: x.module_id ?? '' })), ['tool_name','module_id']); document.getElementById('adminToolCatalog_result').textContent = 'Loaded entries'; - } catch (e) { document.getElementById('adminToolCatalog_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('List entries failed'); } + } catch (e) { document.getElementById('adminToolCatalog_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('List entries failed'); } } async function tcAddEntry() { - const id = parseInt(document.getElementById('tc_catalog_id')?.value || '0', 10); if (!id) { Toast.error('Catalog id required'); return; } - const tool_name = (document.getElementById('tc_tool_name')?.value || '').trim(); if (!tool_name) { Toast.error('tool_name required'); return; } + const id = parseInt(document.getElementById('tc_catalog_id')?.value || '0', 10); if (!id) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Catalog id required'); return; } + const tool_name = (document.getElementById('tc_tool_name')?.value || '').trim(); if (!tool_name) { if (typeof Toast !== 'undefined' && Toast) Toast.error('tool_name required'); return; } const module_id = (document.getElementById('tc_module_id')?.value || '').trim() || null; try { const res = await window.apiClient.post(`/api/v1/admin/mcp/tool_catalogs/${id}/entries`, { tool_name, module_id }); document.getElementById('adminToolCatalog_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Entry added'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Entry added'); await tcListEntries(); - } catch (e) { document.getElementById('adminToolCatalog_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Add entry failed'); } + } catch (e) { document.getElementById('adminToolCatalog_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Add entry failed'); } } async function tcDeleteEntry() { - const id = parseInt(document.getElementById('tc_catalog_id')?.value || '0', 10); if (!id) { Toast.error('Catalog id required'); return; } - const tool_name = (document.getElementById('tc_tool_name')?.value || '').trim(); if (!tool_name) { Toast.error('tool_name required'); return; } + const id = parseInt(document.getElementById('tc_catalog_id')?.value || '0', 10); if (!id) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Catalog id required'); return; } + const tool_name = (document.getElementById('tc_tool_name')?.value || '').trim(); if (!tool_name) { if (typeof Toast !== 'undefined' && Toast) Toast.error('tool_name required'); return; } if (!confirm('Remove tool ' + tool_name + ' from catalog?')) return; try { const res = await window.apiClient.delete(`/api/v1/admin/mcp/tool_catalogs/${id}/entries/${encodeURIComponent(tool_name)}`); document.getElementById('adminToolCatalog_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Entry deleted'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Entry deleted'); await tcListEntries(); - } catch (e) { document.getElementById('adminToolCatalog_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Delete entry failed'); } + } catch (e) { document.getElementById('adminToolCatalog_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Delete entry failed'); } +} + +// ---------- Ephemeral Cleanup Settings ---------- +async function adminLoadCleanupSettings() { + try { + const resp = await window.apiClient.get('/api/v1/admin/cleanup-settings'); + const enabledEl = document.getElementById('adminCleanup_enabled'); + const intervalEl = document.getElementById('adminCleanup_interval'); + if (enabledEl) enabledEl.checked = !!resp.enabled; + if (intervalEl) intervalEl.value = resp.interval_sec || 1800; + const out = document.getElementById('adminCleanupSettings_response'); + if (out) out.textContent = JSON.stringify(resp, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Loaded cleanup settings'); + } catch (e) { + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to load cleanup settings: ' + (e?.message || e)); + } +} + +async function adminSaveCleanupSettings() { + try { + const enabled = !!document.getElementById('adminCleanup_enabled')?.checked; + const interval = parseInt(document.getElementById('adminCleanup_interval')?.value || '1800', 10); + const body = { enabled, interval_sec: interval }; + const resp = await window.apiClient.post('/api/v1/admin/cleanup-settings', body); + const out = document.getElementById('adminCleanupSettings_response'); + if (out) out.textContent = JSON.stringify(resp, null, 2); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Saved cleanup settings'); + } catch (e) { + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to save cleanup settings: ' + (e?.message || e)); + } } // ---------- Bindings ---------- function bindAdminAdvanced() { + // Users: basic list/create in User Management section + document.getElementById('btnAdminUsersList')?.addEventListener('click', () => window.makeRequest && window.makeRequest('adminUsersList', 'GET', '/api/v1/admin/users', 'query')); + document.getElementById('btnAdminCreateUser')?.addEventListener('click', adminCreateUser); // Virtual keys document.getElementById('btnAdmVKList')?.addEventListener('click', admVKList); document.getElementById('btnAdmVKCreate')?.addEventListener('click', admVKCreate); @@ -969,6 +1524,72 @@ function bindAdminAdvanced() { document.getElementById('btnTCEntries')?.addEventListener('click', tcListEntries); document.getElementById('btnTCAddEntry')?.addEventListener('click', tcAddEntry); document.getElementById('btnTCDeleteEntry')?.addEventListener('click', tcDeleteEntry); + + // Moderation: Settings + document.getElementById('btnModSettingsLoad')?.addEventListener('click', moderationLoadSettings); + document.getElementById('btnModSettingsSave')?.addEventListener('click', moderationSaveSettings); + + // Moderation: Managed + document.getElementById('btnModerationLoadManaged')?.addEventListener('click', moderationLoadManaged); + document.getElementById('btnModerationRefreshManaged')?.addEventListener('click', moderationRefreshManaged); + document.getElementById('btnModerationAppendManaged')?.addEventListener('click', moderationAppendManaged); + document.getElementById('btnModerationLintManaged')?.addEventListener('click', moderationLintManaged); + document.getElementById('moderationManaged_filter')?.addEventListener('input', renderManagedBlocklist); + document.getElementById('moderationManaged_onlyInvalid')?.addEventListener('change', renderManagedBlocklist); + document.getElementById('moderationManaged_table')?.addEventListener('click', (e) => { + const t = e.target; + if (t && t.classList?.contains('mod-managed-del')) { + const id = parseInt(t.getAttribute('data-id') || '0', 10); + if (id) moderationDeleteManaged(id); + } + }); + + // Moderation: Raw blocklist + document.getElementById('btnModerationLoadBlocklist')?.addEventListener('click', moderationLoadBlocklist); + document.getElementById('btnModerationLintBlocklist')?.addEventListener('click', moderationLintBlocklist); + document.getElementById('btnModerationSaveBlocklist')?.addEventListener('click', moderationSaveBlocklist); + document.getElementById('btnModerationCopyInvalidBlocklist')?.addEventListener('click', moderationCopyInvalidBlocklist); + document.getElementById('moderationBlocklist_onlyInvalid')?.addEventListener('change', renderBlocklistInvalidList); + + // Moderation: Overrides + Tester + document.getElementById('btnModOverrideLoad')?.addEventListener('click', loadUserOverride); + document.getElementById('btnModOverrideSave')?.addEventListener('click', saveUserOverride); + document.getElementById('btnModOverrideDelete')?.addEventListener('click', deleteUserOverride); + document.getElementById('btnModerationListOverrides')?.addEventListener('click', moderationListOverrides); + document.getElementById('btnModerationRunTest')?.addEventListener('click', moderationRunTest); + document.getElementById('moderationOverrides_list')?.addEventListener('click', (e) => { + const t = e.target; + if (t && t.classList?.contains('mod-load-editor')) { + const uid = t.getAttribute('data-uid'); + if (uid) moderationLoadIntoEditor(uid); + } + }); + + // Health panel + document.getElementById('btnHealthMain')?.addEventListener('click', () => window.makeRequest && window.makeRequest('healthMain','GET','/health','none')); + document.getElementById('btnHealthRAG')?.addEventListener('click', () => window.makeRequest && window.makeRequest('healthRAG','GET','/api/v1/rag/health','none')); + document.getElementById('btnHealthEmbeddings')?.addEventListener('click', () => window.makeRequest && window.makeRequest('healthEmbeddings','GET','/api/v1/embeddings/health','none')); + document.getElementById('btnHealthWebScraping')?.addEventListener('click', () => window.makeRequest && window.makeRequest('healthWebScraping','GET','/api/v1/web-scraping/status','none')); + + // Ephemeral Cleanup Settings + document.getElementById('btnAdminCleanupLoad')?.addEventListener('click', adminLoadCleanupSettings); + document.getElementById('btnAdminCleanupSave')?.addEventListener('click', adminSaveCleanupSettings); + + // Security alerts + document.getElementById('btnSecAlertRefresh')?.addEventListener('click', loadSecurityAlertStatus); + setTimeout(() => { try { if (document.getElementById('btnSecAlertRefresh')) loadSecurityAlertStatus(); } catch (_) {} }, 300); + + // Usage + document.getElementById('btnUsageLoadDaily')?.addEventListener('click', adminLoadUsageDaily); + document.getElementById('btnUsageDownloadDailyCSV')?.addEventListener('click', adminDownloadUsageDailyCSV); + document.getElementById('btnUsageTop')?.addEventListener('click', adminLoadUsageTop); + document.getElementById('btnUsageDownloadTopCSV')?.addEventListener('click', adminDownloadUsageTopCSV); + document.getElementById('btnUsageAggregate')?.addEventListener('click', adminRunUsageAggregate); + + // Admin user simple ops + document.getElementById('btnAdminUserGet')?.addEventListener('click', () => window.makeRequest && window.makeRequest('adminUserGet', 'GET', '/api/v1/admin/users/{id}', 'none')); + document.getElementById('btnAdminUserUpdate')?.addEventListener('click', () => window.makeRequest && window.makeRequest('adminUserUpdate', 'PUT', '/api/v1/admin/users/{id}', 'json')); + document.getElementById('btnAdminUserDelete')?.addEventListener('click', () => { if (confirm('Are you sure you want to delete this user?')) window.makeRequest && window.makeRequest('adminUserDelete','DELETE','/api/v1/admin/users/{id}','none'); }); } if (typeof document !== 'undefined') { @@ -990,6 +1611,8 @@ export default { adminAuditDownload, adminAuditDownloadLast24hHighRisk, adminAuditDownloadApiEventsCSV, adminAuditPreviewJSON, adminLoadLLMCharts, admUserKeyRotate, admUserKeyRevoke, + adminLoadCleanupSettings: adminLoadCleanupSettings, + adminSaveCleanupSettings: adminSaveCleanupSettings, tcList, tcCreate, tcDelete, tcListEntries, tcAddEntry, tcDeleteEntry, bindAdminAdvanced, }; diff --git a/tldw_Server_API/WebUI/js/admin-rbac-monitoring.js b/tldw_Server_API/WebUI/js/admin-rbac-monitoring.js index d17ea3af9..316e1e75c 100644 --- a/tldw_Server_API/WebUI/js/admin-rbac-monitoring.js +++ b/tldw_Server_API/WebUI/js/admin-rbac-monitoring.js @@ -53,27 +53,27 @@ async function monListWatchlists() { listEl.innerHTML = html; } catch (e) { document.getElementById('monitoringWatchlists_result').textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Failed to list watchlists'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to list watchlists'); } } async function monApplyDefaultsToScope(scopeType, scopeId) { try { - if (!scopeType || !scopeId) { Toast.error('Missing scope'); return; } + if (!scopeType || !scopeId) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Missing scope'); return; } const listed = await window.apiClient.get('/api/v1/monitoring/watchlists'); const wls = (listed && listed.watchlists) || []; const defaults = wls.filter(w => (w.scope_type === 'global' || w.scope_type === 'all') && ((w.name || '').startsWith('Kid-Safe Defaults'))); - if (defaults.length === 0) { Toast.error('No default watchlists found'); return; } + if (defaults.length === 0) { if (typeof Toast !== 'undefined' && Toast) Toast.error('No default watchlists found'); return; } let created = 0; for (const wl of defaults) { const payload = { id: null, name: `${wl.name} [${scopeType}:${scopeId}]`, description: wl.description || '', enabled: true, scope_type: scopeType, scope_id: scopeId, rules: wl.rules || [] }; try { await window.apiClient.post('/api/v1/monitoring/watchlists', payload); created += 1; } catch (_) {} } - Toast.success(`Applied ${created} default watchlists to ${scopeType}:${scopeId}`); + if (typeof Toast !== 'undefined' && Toast) Toast.success(`Applied ${created} default watchlists to ${scopeType}:${scopeId}`); await monListWatchlists(); } catch (e) { document.getElementById('monitoringWatchlists_result').textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Failed to apply defaults'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to apply defaults'); } } @@ -84,7 +84,7 @@ async function monReloadWatchlists() { await monListWatchlists(); } catch (e) { document.getElementById('monitoringWatchlists_result').textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Failed to reload'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to reload'); } } @@ -98,36 +98,36 @@ async function monUpsertWatchlist() { const scope_id = (document.getElementById('monWl_scope_id')?.value || '') || null; const rules_raw = document.getElementById('monWl_rules')?.value || '[]'; let rules; - try { rules = JSON.parse(rules_raw); } catch (e) { Toast.error('Rules must be JSON'); return; } + try { rules = JSON.parse(rules_raw); } catch (e) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Rules must be JSON'); return; } const body = { id, name, description, enabled, scope_type, scope_id, rules }; const res = await window.apiClient.post('/api/v1/monitoring/watchlists', body); document.getElementById('monitoringWatchlists_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Saved'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Saved'); await monListWatchlists(); } catch (e) { document.getElementById('monitoringWatchlists_result').textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Failed to save watchlist'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to save watchlist'); } } async function monDeleteWatchlist() { try { const id = (document.getElementById('monWl_id')?.value || '').trim(); - if (!id) { Toast.error('Enter watchlist ID to delete'); return; } + if (!id) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Enter watchlist ID to delete'); return; } if (!confirm('Delete watchlist ' + id + '?')) return; const res = await window.apiClient.delete(`/api/v1/monitoring/watchlists/${encodeURIComponent(id)}`); document.getElementById('monitoringWatchlists_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Deleted'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Deleted'); await monListWatchlists(); } catch (e) { document.getElementById('monitoringWatchlists_result').textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Failed to delete watchlist'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to delete watchlist'); } } async function monQuickApplyDefaults(scopeType) { const id = (scopeType === 'team') ? (document.getElementById('monQuick_team')?.value || '').trim() : (document.getElementById('monQuick_org')?.value || '').trim(); - if (!id) { Toast.error(`Enter a ${scopeType} id`); return; } + if (!id) { if (typeof Toast !== 'undefined' && Toast) Toast.error(`Enter a ${scopeType} id`); return; } await monApplyDefaultsToScope(scopeType, id); } @@ -135,13 +135,13 @@ async function monBulkApplyDefaults() { try { const scopeType = document.getElementById('monBulk_scope')?.value || 'team'; const raw = (document.getElementById('monBulk_ids')?.value || '').trim(); - if (!raw) { Toast.error('Enter at least one ID'); return; } + if (!raw) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Enter at least one ID'); return; } const parts = raw.split(/\n|,/).map(s => s.trim()).filter(Boolean); - if (parts.length === 0) { Toast.error('No valid IDs found'); return; } + if (parts.length === 0) { if (typeof Toast !== 'undefined' && Toast) Toast.error('No valid IDs found'); return; } const listed = await window.apiClient.get('/api/v1/monitoring/watchlists'); const wls = (listed && listed.watchlists) || []; const defaults = wls.filter(w => (w.scope_type === 'global' || w.scope_type === 'all') && ((w.name || '').startsWith('Kid-Safe Defaults'))); - if (defaults.length === 0) { Toast.error('No default watchlists found'); return; } + if (defaults.length === 0) { if (typeof Toast !== 'undefined' && Toast) Toast.error('No default watchlists found'); return; } let totalCreated = 0; for (const sid of parts) { for (const wl of defaults) { @@ -149,10 +149,10 @@ async function monBulkApplyDefaults() { try { await window.apiClient.post('/api/v1/monitoring/watchlists', payload); totalCreated += 1; } catch (_) {} } } - Toast.success(`Applied ${totalCreated} watchlists to ${parts.length} ${scopeType} id(s)`); + if (typeof Toast !== 'undefined' && Toast) Toast.success(`Applied ${totalCreated} watchlists to ${parts.length} ${scopeType} id(s)`); } catch (e) { document.getElementById('monitoringWatchlists_result').textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Bulk apply failed'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Bulk apply failed'); } } @@ -189,7 +189,7 @@ async function monListAlerts() { box.innerHTML = html; } catch (e) { document.getElementById('monitoringAlerts_list').innerHTML = `
${esc(JSON.stringify(e.response || e, null, 2))}
`; - Toast.error('Failed to list alerts'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to list alerts'); } } @@ -198,9 +198,9 @@ async function monMarkAlertRead(id) { if (!id) return; const safeId = encodeURIComponent(id); await window.apiClient.post(`/api/v1/monitoring/alerts/${safeId}/read`, {}); - Toast.success('Marked read'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Marked read'); await monListAlerts(); - } catch (e) { Toast.error('Mark read failed'); } + } catch (e) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Mark read failed'); } } async function monLoadRecentAlerts() { @@ -256,8 +256,8 @@ async function monSaveNotifSettings() { }; const res = await window.apiClient.put('/api/v1/monitoring/notifications/settings', body); document.getElementById('monitoringNotif_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Saved'); - } catch (e) { document.getElementById('monitoringNotif_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Failed to save settings'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Saved'); + } catch (e) { document.getElementById('monitoringNotif_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to save settings'); } } function monClearNotifDrafts() { @@ -278,12 +278,12 @@ function monClearNotifDrafts() { setVal('monNotif_smtp_user', ''); setVal('monNotif_smtp_pass', ''); } catch (_) { /* ignore */ } - Toast.success('Drafts cleared'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Drafts cleared'); } async function monRestoreNotifDefaults() { await monLoadNotifSettings(); - Toast.success('Defaults loaded'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Defaults loaded'); } async function monSendNotifTest() { @@ -292,8 +292,8 @@ async function monSendNotifTest() { const message = document.getElementById('monNotif_test_msg')?.value || 'Test notification'; const res = await window.apiClient.post('/api/v1/monitoring/notifications/test', { severity, message }); document.getElementById('monitoringNotif_result').textContent = JSON.stringify(res, null, 2); - Toast.success('Sent test'); - } catch (e) { document.getElementById('monitoringNotif_result').textContent = JSON.stringify(e.response || e, null, 2); Toast.error('Test failed'); } + if (typeof Toast !== 'undefined' && Toast) Toast.success('Sent test'); + } catch (e) { document.getElementById('monitoringNotif_result').textContent = JSON.stringify(e.response || e, null, 2); if (typeof Toast !== 'undefined' && Toast) Toast.error('Test failed'); } } async function monLoadRecentNotifications() { @@ -334,7 +334,7 @@ function monResetAllMonitoringUI() { document.getElementById('monitoringAlerts_list')?.replaceChildren(); document.getElementById('monitoringAlerts_recent')?.replaceChildren(); document.getElementById('monitoringNotif_recent')?.replaceChildren(); - Toast.success('Monitoring UI reset'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Monitoring UI reset'); } // -------- RBAC: Bindings (call inline impl if present) -------- diff --git a/tldw_Server_API/WebUI/js/admin-rbac.js b/tldw_Server_API/WebUI/js/admin-rbac.js index 162f8746e..67cd896b5 100644 --- a/tldw_Server_API/WebUI/js/admin-rbac.js +++ b/tldw_Server_API/WebUI/js/admin-rbac.js @@ -163,11 +163,11 @@ async function loadRbacMatrixList() { renderRbacMatrixList(); _updateRbacRolesInfo(Array.isArray(data.roles) ? data.roles.length : 0); _saveRbacFilterState(); - Toast?.success && Toast.success('Loaded RBAC role→permissions list'); + if (typeof Toast !== 'undefined' && Toast && Toast.success) Toast.success('Loaded RBAC role→permissions list'); } catch (e) { const el = document.getElementById('rbacMatrixList'); if (el) el.innerHTML = `
${_escapeHtml(JSON.stringify(e.response || e, null, 2))}
`; - Toast?.error && Toast.error('Failed to load matrix'); + if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Failed to load matrix'); } } @@ -198,11 +198,11 @@ async function loadRbacMatrixBoolean() { renderRbacMatrixBoolean(); _updateRbacRolesInfo(Array.isArray(data.roles) ? data.roles.length : 0); _saveRbacFilterState(); - Toast?.success && Toast.success('Loaded RBAC boolean grid'); + if (typeof Toast !== 'undefined' && Toast && Toast.success) Toast.success('Loaded RBAC boolean grid'); } catch (e) { const el = document.getElementById('rbacMatrixBoolean'); if (el) el.innerHTML = `
${_escapeHtml(JSON.stringify(e.response || e, null, 2))}
`; - Toast?.error && Toast.error('Failed to load boolean grid'); + if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Failed to load boolean grid'); } } @@ -310,8 +310,8 @@ async function exportRbacMatrixCsv() { a.click(); document.body.removeChild(a); URL.revokeObjectURL(url); - Toast?.success && Toast.success('Matrix CSV downloaded'); - } catch (e) { console.error(e); Toast?.error && Toast.error('Failed to export CSV'); } + if (typeof Toast !== 'undefined' && Toast && Toast.success) Toast.success('Matrix CSV downloaded'); + } catch (e) { console.error(e); if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Failed to export CSV'); } } async function exportRbacListCsv() { @@ -324,7 +324,7 @@ async function exportRbacListCsv() { const roles = Array.isArray(data.roles) ? data.roles : []; const perms = Array.isArray(data.permissions) ? data.permissions : []; const grants = new Set((data.grants || []).map(g => `${g.role_id}:${g.permission_id}`)); - if (!roles.length) { Toast?.error && Toast.error('No roles to export'); return; } + if (!roles.length) { if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('No roles to export'); return; } const permIdToName = {}; for (const p of perms) permIdToName[p.id] = p.name; const csvEscape = (v) => '"' + String(v).replace(/"/g, '""') + '"'; let csv = ''; @@ -343,8 +343,8 @@ async function exportRbacListCsv() { a.click(); document.body.removeChild(a); URL.revokeObjectURL(url); - Toast?.success && Toast.success('List CSV downloaded'); - } catch (e) { console.error(e); Toast?.error && Toast.error('Failed to export list CSV'); } + if (typeof Toast !== 'undefined' && Toast && Toast.success) Toast.success('List CSV downloaded'); + } catch (e) { console.error(e); if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Failed to export list CSV'); } } async function copyRbacSummary() { @@ -365,8 +365,8 @@ async function copyRbacSummary() { text += `${role.name}: ${names.join(', ')}` + '\n'; } await navigator.clipboard.writeText(text); - Toast?.success && Toast.success('Summary copied to clipboard'); - } catch (e) { console.error(e); Toast?.error && Toast.error('Failed to copy summary'); } + if (typeof Toast !== 'undefined' && Toast && Toast.success) Toast.success('Summary copied to clipboard'); + } catch (e) { console.error(e); if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Failed to copy summary'); } } // Rendering @@ -451,18 +451,18 @@ function _rbacUserId() { async function rbacGetRoleEffective() { const roleIdRaw = (document.getElementById('rbacEffRoleId')?.value || '').trim(); - if (!roleIdRaw) return Toast?.error && Toast.error('Role ID is required'); + if (!roleIdRaw) { if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Role ID is required'); return; } const roleId = parseInt(roleIdRaw, 10); - if (isNaN(roleId) || roleId <= 0) return Toast?.error && Toast.error('Enter a valid Role ID'); + if (isNaN(roleId) || roleId <= 0) { if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Enter a valid Role ID'); return; } try { const data = await window.apiClient.get(`/api/v1/admin/roles/${roleId}/permissions/effective`); const out = document.getElementById('rbacRoleEffOut'); if (out) out.textContent = JSON.stringify(data, null, 2); - Toast?.success && Toast.success('Loaded role effective permissions'); + if (typeof Toast !== 'undefined' && Toast && Toast.success) Toast.success('Loaded role effective permissions'); } catch (e) { const out = document.getElementById('rbacRoleEffOut'); if (out) out.textContent = JSON.stringify(e.response || e, null, 2); - Toast?.error && Toast.error('Failed to load role effective permissions'); + if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Failed to load role effective permissions'); } } @@ -485,9 +485,9 @@ async function rbacListRoles() { async function rbacCreateRole() { const name = (document.getElementById('rbacRoleName')?.value || '').trim(); const description = (document.getElementById('rbacRoleDesc')?.value || '').trim() || null; - if (!name) return Toast?.error && Toast.error('Role name required'); + if (!name) { if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Role name required'); return; } const res = await window.apiClient.post('/api/v1/admin/roles', { name, description }); - Toast?.success && Toast.success('Role created'); + if (typeof Toast !== 'undefined' && Toast && Toast.success) Toast.success('Role created'); const el = document.getElementById('rbacRolesOut'); if (el) el.textContent = JSON.stringify(res, null, 2); } @@ -501,9 +501,9 @@ async function rbacListPermissions() { async function rbacCreatePermission() { const name = (document.getElementById('rbacPermName')?.value || '').trim(); const category = (document.getElementById('rbacPermCat')?.value || '').trim() || null; - if (!name) return Toast?.error && Toast.error('Permission name required'); + if (!name) { if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Permission name required'); return; } const res = await window.apiClient.post('/api/v1/admin/permissions', { name, category }); - Toast?.success && Toast.success('Permission created'); + if (typeof Toast !== 'undefined' && Toast && Toast.success) Toast.success('Permission created'); const el = document.getElementById('rbacPermsOut'); if (el) el.textContent = JSON.stringify(res, null, 2); } @@ -518,9 +518,9 @@ async function rbacGetUserRoles() { async function rbacAssignRole() { const uid = _rbacUserId(); const rid = parseInt(document.getElementById('rbacAssignRoleId')?.value || 'NaN', 10); - if (!rid) return Toast?.error && Toast.error('Role ID required'); + if (!rid) { if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Role ID required'); return; } const res = await window.apiClient.post(`/api/v1/admin/users/${uid}/roles/${rid}`, {}); - Toast?.success && Toast.success('Role assigned'); + if (typeof Toast !== 'undefined' && Toast && Toast.success) Toast.success('Role assigned'); const el = document.getElementById('rbacUserRolesOut'); if (el) el.textContent = JSON.stringify(res, null, 2); } @@ -528,9 +528,9 @@ async function rbacAssignRole() { async function rbacRemoveRole() { const uid = _rbacUserId(); const rid = parseInt(document.getElementById('rbacAssignRoleId')?.value || 'NaN', 10); - if (!rid) return Toast?.error && Toast.error('Role ID required'); + if (!rid) { if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Role ID required'); return; } const res = await window.apiClient.delete(`/api/v1/admin/users/${uid}/roles/${rid}`); - Toast?.success && Toast.success('Role removed'); + if (typeof Toast !== 'undefined' && Toast && Toast.success) Toast.success('Role removed'); const el = document.getElementById('rbacUserRolesOut'); if (el) el.textContent = JSON.stringify(res, null, 2); } @@ -546,12 +546,12 @@ async function rbacUpsertOverride() { const uid = _rbacUserId(); const permField = (document.getElementById('rbacOverridePerm')?.value || '').trim(); const effect = document.getElementById('rbacOverrideEffect')?.value; - if (!permField) return Toast?.error && Toast.error('Permission required'); + if (!permField) { if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Permission required'); return; } let body = { effect }; if (/^\d+$/.test(permField)) body.permission_id = parseInt(permField, 10); else body.permission_name = permField; const res = await window.apiClient.post(`/api/v1/admin/users/${uid}/overrides`, body); - Toast?.success && Toast.success('Override saved'); + if (typeof Toast !== 'undefined' && Toast && Toast.success) Toast.success('Override saved'); const el = document.getElementById('rbacOverridesOut'); if (el) el.textContent = JSON.stringify(res, null, 2); } diff --git a/tldw_Server_API/WebUI/js/admin-user-permissions.js b/tldw_Server_API/WebUI/js/admin-user-permissions.js index 2f53daa62..d0e20d246 100644 --- a/tldw_Server_API/WebUI/js/admin-user-permissions.js +++ b/tldw_Server_API/WebUI/js/admin-user-permissions.js @@ -55,7 +55,7 @@ async function searchUsers() { } catch (e) { document.getElementById('userPermSearchResults').innerHTML = `
${_esc(JSON.stringify(e.response || e, null, 2))}
`; document.getElementById('userPermSearchResults').style.display = 'block'; - Toast?.error && Toast.error('Failed to search users'); + if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Failed to search users'); } } @@ -173,9 +173,9 @@ async function _applyRoleToggle(roleId, checked) { if (checked) await _apiPost(`/api/v1/admin/users/${uid}/roles/${roleId}`, {}); else await _apiDelete(`/api/v1/admin/users/${uid}/roles/${roleId}`); if (checked) UP_STATE.userRoles.add(roleId); else UP_STATE.userRoles.delete(roleId); - Toast?.success && Toast.success(checked ? 'Role assigned' : 'Role removed'); + if (typeof Toast !== 'undefined' && Toast && Toast.success) Toast.success(checked ? 'Role assigned' : 'Role removed'); } catch (e) { - Toast?.error && Toast.error('Failed to update role'); + if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Failed to update role'); } } @@ -199,9 +199,9 @@ async function _applyOverrideChange(pid, action, opts = {}) { renderOverridesTable(); renderEffectiveOut(); } - if (!opts.silent) Toast?.success && Toast.success('Override updated'); + if (!opts.silent) { if (typeof Toast !== 'undefined' && Toast && Toast.success) Toast.success('Override updated'); } } catch (e) { - Toast?.error && Toast.error('Failed to update override'); + if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Failed to update override'); } } @@ -210,7 +210,7 @@ async function bulkApplyOverrides(action, which = 'all') { const { tools, std } = _splitVisibleByTool(); let all; if (which === 'tools') all = tools; else if (which === 'std') all = std; else all = [...tools, ...std]; - if (!all.length) return Toast?.error && Toast.error('No filtered permissions to update'); + if (!all.length) { if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('No filtered permissions to update'); return; } const actionLabel = action === 'allow' ? 'Allow' : action === 'deny' ? 'Deny' : 'Inherit'; const sectionLabel = which === 'tools' ? 'tool permissions' : which === 'std' ? 'standard permissions' : 'filtered permissions'; const confirmed = window.confirm(`${actionLabel} ${all.length} ${sectionLabel}?`); @@ -240,10 +240,10 @@ async function bulkApplyOverrides(action, which = 'all') { UP_STATE.effective = new Set(Array.isArray(effRes?.permissions) ? effRes.permissions : []); renderOverridesTable(); renderEffectiveOut(); - Toast?.success && Toast.success(`Applied '${action}' to ${all.length} ${sectionLabel}`); + if (typeof Toast !== 'undefined' && Toast && Toast.success) Toast.success(`Applied '${action}' to ${all.length} ${sectionLabel}`); try { if (loaderId && container) Loading.hide(container); } catch (_) { /* ignore */ } } catch (e) { - Toast?.error && Toast.error('Bulk update failed'); + if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Bulk update failed'); } } @@ -264,7 +264,7 @@ async function loadUserPermissionsEditor(user) { renderOverridesTable(); renderEffectiveOut(); } catch (e) { - Toast?.error && Toast.error('Failed to load user data'); + if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Failed to load user data'); } } diff --git a/tldw_Server_API/WebUI/js/api-client.js b/tldw_Server_API/WebUI/js/api-client.js index 93f4c1572..8760a2ad6 100644 --- a/tldw_Server_API/WebUI/js/api-client.js +++ b/tldw_Server_API/WebUI/js/api-client.js @@ -15,6 +15,7 @@ class APIClient { this.activeRequests = new Map(); // Track active fetch requests this.csrfToken = null; // Cached CSRF token (double-submit pattern) this.includeTokenInCurl = false; // UI preference for cURL token masking + this.apiEndpoints = null; // Server-provided endpoint catalog this.init(); } @@ -85,6 +86,10 @@ class APIClient { // Store the loaded config for later use (includes LLM providers) this.loadedConfig = config; + // Capture server-provided endpoint map (if present) + if (config && config.api_endpoints) { + this.apiEndpoints = config.api_endpoints; + } // Use apiUrl if provided, otherwise keep same origin if (config.apiUrl) { @@ -153,6 +158,9 @@ class APIClient { this.configLoaded = true; console.log('Loaded API configuration from webui-config.json'); } + if (config && config.api_endpoints) { + this.apiEndpoints = config.api_endpoints; + } } } catch (error) { // Config file not found or error reading it, that's okay @@ -195,6 +203,40 @@ class APIClient { } catch (e) { /* ignore */ } } + // Resolve endpoint path from server-provided catalog. Falls back to known defaults when absent. + endpoint(category, name, params = {}) { + try { + let path = null; + if (this.apiEndpoints && this.apiEndpoints[category] && this.apiEndpoints[category][name]) { + path = this.apiEndpoints[category][name]; + } else { + // Fallback table for core endpoints + const defaults = { + llm: { + providers: '/api/v1/llm/providers', + provider: '/api/v1/llm/providers/{provider}', + models: '/api/v1/llm/models' + }, + chat: { completions: '/api/v1/chat/completions' }, + audio: { voices_catalog: '/api/v1/audio/voices/catalog' }, + embeddings: { + models: '/api/v1/embeddings/models', + providers_config: '/api/v1/embeddings/providers-config' + } + }; + path = (((defaults[category] || {})[name]) || null); + } + if (!path) return null; + // Replace simple placeholders like {provider} + Object.entries(params || {}).forEach(([k, v]) => { + path = path.replace(new RegExp(`{${k}}`, 'g'), encodeURIComponent(String(v))); + }); + return path; + } catch (e) { + return null; + } + } + setBaseUrl(url) { this.baseUrl = url; this.saveConfig(); @@ -339,6 +381,12 @@ class APIClient { } }; + // Always send a correlation request id + try { + const rid = (typeof Utils !== 'undefined' && Utils.uuidv4) ? Utils.uuidv4() : `${Date.now()}`; + fetchOptions.headers['X-Request-ID'] = rid; + } catch (e) { /* ignore */ } + const credsMode = this._determineCredentialsMode(); if (credsMode) { fetchOptions.credentials = credsMode; @@ -401,6 +449,17 @@ class APIClient { const duration = Date.now() - startTime; + // Capture correlation headers for UI surfacing + try { + const reqId = response.headers.get('X-Request-ID') || response.headers.get('x-request-id') || null; + const traceparent = response.headers.get('traceparent') || response.headers.get('Traceparent') || null; + const traceId = response.headers.get('X-Trace-Id') || response.headers.get('x-trace-id') || null; + this.lastCorrelation = { requestId: reqId, traceparent, traceId }; + if (window && window.webUI && typeof window.webUI.updateCorrelationBadge === 'function') { + window.webUI.updateCorrelationBadge(this.lastCorrelation); + } + } catch (_) { /* ignore */ } + this._syncCsrfFromResponse(response); // Save to history @@ -670,6 +729,10 @@ class APIClient { const ctrl = new AbortController(); const fetchHeaders = { 'Accept': 'text/event-stream', ...headers }; + try { + const rid = (typeof Utils !== 'undefined' && Utils.uuidv4) ? Utils.uuidv4() : `${Date.now()}`; + fetchHeaders['X-Request-ID'] = rid; + } catch (e) { /* ignore */ } const credsMode = this._determineCredentialsMode(); if (this.token) { @@ -694,6 +757,15 @@ class APIClient { const done = (async () => { const response = await fetch(url.toString(), fetchOptions); + try { + const reqId = response.headers.get('X-Request-ID') || response.headers.get('x-request-id') || null; + const traceparent = response.headers.get('traceparent') || response.headers.get('Traceparent') || null; + const traceId = response.headers.get('X-Trace-Id') || response.headers.get('x-trace-id') || null; + this.lastCorrelation = { requestId: reqId, traceparent, traceId }; + if (window && window.webUI && typeof window.webUI.updateCorrelationBadge === 'function') { + window.webUI.updateCorrelationBadge(this.lastCorrelation); + } + } catch (_) { /* ignore */ } if (!response.ok || !response.body) throw new Error(`HTTP ${response.status}`); const reader = response.body.getReader(); const decoder = new TextDecoder(); @@ -878,11 +950,49 @@ class APIClient { return config.llm_providers; } - // Fallback to API endpoint - const response = await this.get('/api/v1/llm/providers'); - this.cachedProviders = response; - this.cacheTimestamp = Date.now(); - return response; + // Prefer providers endpoint + try { + const ep = this.endpoint('llm', 'providers') || '/api/v1/llm/providers'; + const response = await this.get(ep); + this.cachedProviders = response; + this.cacheTimestamp = Date.now(); + return response; + } catch (e) { + // Fallback to flat models endpoint and synthesize provider mapping + try { + const modelsEp = this.endpoint('llm', 'models') || '/api/v1/llm/models'; + const models = await this.get(modelsEp); + const byProvider = {}; + (models || []).forEach((m) => { + const parts = String(m).split('/'); + if (parts.length >= 2) { + const prov = parts.shift(); + const model = parts.join('/'); + byProvider[prov] = byProvider[prov] || []; + byProvider[prov].push(model); + } + }); + const providers = Object.keys(byProvider).map((name) => ({ + name, + display_name: name, + type: 'unknown', + models: byProvider[name], + default_model: byProvider[name] && byProvider[name][0], + is_configured: true, + })); + const synthesized = { + providers, + default_provider: providers[0] ? providers[0].name : null, + total_configured: providers.length, + synthesized: true, + }; + this.cachedProviders = synthesized; + this.cacheTimestamp = Date.now(); + return synthesized; + } catch (e2) { + throw e2; + } + } } catch (error) { console.error('Failed to get LLM providers:', error); // Return empty providers list as fallback @@ -902,7 +1012,9 @@ class APIClient { */ async getProviderDetails(providerName) { try { - const response = await this.get(`/api/v1/llm/providers/${providerName}`); + const ep = this.endpoint('llm', 'provider', { provider: providerName }) + || `/api/v1/llm/providers/${providerName}`; + const response = await this.get(ep); return response; } catch (error) { console.error(`Failed to get provider details for ${providerName}:`, error); @@ -916,7 +1028,8 @@ class APIClient { */ async getAllAvailableModels() { try { - const response = await this.get('/api/v1/llm/models'); + const ep = this.endpoint('llm', 'models') || '/api/v1/llm/models'; + const response = await this.get(ep); return response; } catch (error) { console.error('Failed to get all models:', error); diff --git a/tldw_Server_API/WebUI/js/auth-basic.js b/tldw_Server_API/WebUI/js/auth-basic.js index 6037c2e1d..c8ca068f4 100644 --- a/tldw_Server_API/WebUI/js/auth-basic.js +++ b/tldw_Server_API/WebUI/js/auth-basic.js @@ -5,18 +5,18 @@ async function performLogin() { const username = document.getElementById('authLogin_username')?.value; const password = document.getElementById('authLogin_password')?.value; const remember = document.getElementById('authLogin_remember')?.checked; - if (!username || !password) { Toast.error('Username and password are required'); return; } + if (!username || !password) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Username and password are required'); return; } try { const response = await window.apiClient.post('/api/v1/auth/login', { username, password, remember_me: !!remember }); const pre = document.getElementById('authLogin_response'); if (pre) pre.textContent = JSON.stringify(response, null, 2); if (response && response.access_token) { window.apiClient.setToken(response.access_token); - Toast.success('Login successful!'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Login successful!'); if (response.refresh_token) { try { Utils.saveToStorage('refresh_token', response.refresh_token); } catch (e) {} } } } catch (error) { const pre = document.getElementById('authLogin_response'); if (pre) pre.textContent = JSON.stringify(error.response || error, null, 2); - Toast.error('Login failed: ' + (error.message || 'Unknown error')); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Login failed: ' + (error.message || 'Unknown error')); } } @@ -26,16 +26,16 @@ async function performRegistration() { const password = document.getElementById('authRegister_password')?.value; const confirmPassword = document.getElementById('authRegister_confirmPassword')?.value; const full_name = document.getElementById('authRegister_fullName')?.value; - if (!username || !email || !password) { Toast.error('Username, email, and password are required'); return; } - if (password !== confirmPassword) { Toast.error('Passwords do not match'); return; } + if (!username || !email || !password) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Username, email, and password are required'); return; } + if (password !== confirmPassword) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Passwords do not match'); return; } try { const response = await window.apiClient.post('/api/v1/auth/register', { username, email, password, full_name }); const pre = document.getElementById('authRegister_response'); if (pre) pre.textContent = JSON.stringify(response, null, 2); - if (response && response.api_key) Toast.success('Registration successful. API key created and shown below. Copy it now.'); - else Toast.success('Registration successful! Please login.'); + if (response && response.api_key) { if (typeof Toast !== 'undefined' && Toast) Toast.success('Registration successful. API key created and shown below. Copy it now.'); } + else { if (typeof Toast !== 'undefined' && Toast) Toast.success('Registration successful! Please login.'); } } catch (error) { const pre = document.getElementById('authRegister_response'); if (pre) pre.textContent = JSON.stringify(error.response || error, null, 2); - Toast.error('Registration failed: ' + (error.message || 'Unknown error')); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Registration failed: ' + (error.message || 'Unknown error')); } } @@ -46,27 +46,27 @@ async function performLogout() { const pre = document.getElementById('authLogout_response'); if (pre) pre.textContent = JSON.stringify(response, null, 2); window.apiClient.setToken(''); try { Utils.removeFromStorage('refresh_token'); } catch (e) {} - Toast.success(all_devices ? 'Logged out from all devices' : 'Logged out successfully'); + if (typeof Toast !== 'undefined' && Toast) Toast.success(all_devices ? 'Logged out from all devices' : 'Logged out successfully'); } catch (error) { const pre = document.getElementById('authLogout_response'); if (pre) pre.textContent = JSON.stringify(error.response || error, null, 2); - Toast.error('Logout failed: ' + (error.message || 'Unknown error')); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Logout failed: ' + (error.message || 'Unknown error')); } } async function performTokenRefresh() { const refresh_token = document.getElementById('authRefresh_token')?.value; - if (!refresh_token) { Toast.error('Refresh token is required'); return; } + if (!refresh_token) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Refresh token is required'); return; } try { const response = await window.apiClient.post('/api/v1/auth/refresh', { refresh_token }); const pre = document.getElementById('authRefresh_response'); if (pre) pre.textContent = JSON.stringify(response, null, 2); if (response && response.access_token) { window.apiClient.setToken(response.access_token); - Toast.success('Token refreshed successfully'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Token refreshed successfully'); } if (response && response.refresh_token) { try { Utils.saveToStorage('refresh_token', response.refresh_token); } catch (e) {} } } catch (error) { const pre = document.getElementById('authRefresh_response'); if (pre) pre.textContent = JSON.stringify(error.response || error, null, 2); - Toast.error('Token refresh failed: ' + (error.message || 'Unknown error')); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Token refresh failed: ' + (error.message || 'Unknown error')); } } @@ -76,7 +76,7 @@ async function getCurrentUser() { const pre = document.getElementById('authCurrentUser_response'); if (pre) pre.textContent = JSON.stringify(response, null, 2); } catch (error) { const pre = document.getElementById('authCurrentUser_response'); if (pre) pre.textContent = JSON.stringify(error.response || error, null, 2); - Toast.error('Failed to get user info: ' + (error.message || 'Unknown error')); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to get user info: ' + (error.message || 'Unknown error')); } } diff --git a/tldw_Server_API/WebUI/js/auth-keys.js b/tldw_Server_API/WebUI/js/auth-keys.js index 159d07435..47e71b14c 100644 --- a/tldw_Server_API/WebUI/js/auth-keys.js +++ b/tldw_Server_API/WebUI/js/auth-keys.js @@ -10,7 +10,7 @@ export async function listMyApiKeys() { } catch (e) { const pre = document.getElementById('authApiKeys_response'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Failed to list API keys'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to list API keys'); } } @@ -24,12 +24,12 @@ export async function createMyApiKey() { const res = await window.apiClient.post('/api/v1/users/api-keys', payload); const pre = document.getElementById('authApiKeys_response'); if (pre) pre.textContent = JSON.stringify(res || {}, null, 2); - Toast.success('API key created. Copy it now; it is shown only once.'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('API key created. Copy it now; it is shown only once.'); await listMyApiKeys(); } catch (e) { const pre = document.getElementById('authApiKeys_response'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Failed to create API key'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to create API key'); } } @@ -38,12 +38,12 @@ export async function rotateMyApiKey(id) { const res = await window.apiClient.post(`/api/v1/users/api-keys/${id}/rotate`, { expires_in_days: 365 }); const pre = document.getElementById('authApiKeys_response'); if (pre) pre.textContent = JSON.stringify(res || {}, null, 2); - Toast.success('API key rotated. Copy the new key now.'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('API key rotated. Copy the new key now.'); await listMyApiKeys(); } catch (e) { const pre = document.getElementById('authApiKeys_response'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Failed to rotate API key'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to rotate API key'); } } @@ -52,12 +52,12 @@ export async function revokeMyApiKey(id) { const res = await window.apiClient.delete(`/api/v1/users/api-keys/${id}`); const pre = document.getElementById('authApiKeys_response'); if (pre) pre.textContent = JSON.stringify(res || {}, null, 2); - Toast.success('API key revoked'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('API key revoked'); await listMyApiKeys(); } catch (e) { const pre = document.getElementById('authApiKeys_response'); if (pre) pre.textContent = JSON.stringify(e.response || e, null, 2); - Toast.error('Failed to revoke API key'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Failed to revoke API key'); } } diff --git a/tldw_Server_API/WebUI/js/auth-page.js b/tldw_Server_API/WebUI/js/auth-page.js new file mode 100644 index 000000000..eabf0787c --- /dev/null +++ b/tldw_Server_API/WebUI/js/auth-page.js @@ -0,0 +1,98 @@ +(() => { + async function getConfig() { + try { + const r = await fetch('/webui/config.json', { cache: 'no-store' }); + return await r.json(); + } catch { + return { mode: 'unknown' }; + } + } + + function setOutput(elId, data, klass) { + const el = document.getElementById(elId); + if (!el) return; + el.textContent = typeof data === 'string' ? data : JSON.stringify(data, null, 2); + el.className = klass || ''; + } + + async function handleRegister(ev) { + ev.preventDefault(); + const username = document.getElementById('reg_username').value.trim(); + const email = document.getElementById('reg_email').value.trim(); + const password = document.getElementById('reg_password').value; + const registration_code = document.getElementById('reg_code').value.trim() || null; + try { + const r = await fetch('/api/v1/auth/register', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ username, email, password, registration_code }) + }); + const data = await r.json(); + if (!r.ok) throw new Error(data.detail || r.statusText); + setOutput('reg_result', data, 'ok'); + if (data && data.api_key) { + alert('Registration successful. An API key was generated; copy it from the result and store it securely.'); + } + } catch (e) { + setOutput('reg_result', String(e), 'err'); + } + } + + async function handleLogin(ev) { + ev.preventDefault(); + const username = document.getElementById('login_username').value.trim(); + const password = document.getElementById('login_password').value; + const form = new URLSearchParams(); + form.set('username', username); + form.set('password', password); + try { + const r = await fetch('/api/v1/auth/login', { + method: 'POST', + headers: { 'Content-Type': 'application/x-www-form-urlencoded' }, + body: form.toString() + }); + const data = await r.json(); + if (!r.ok) throw new Error(data.detail || r.statusText); + if (data.access_token) { + try { localStorage.setItem('tldw_access_token', data.access_token); } catch (_) {} + } + setOutput('login_result', data, 'ok'); + } catch (e) { + setOutput('login_result', String(e), 'err'); + } + } + + function handleCopyToken() { + const pre = document.getElementById('login_result'); + try { + const obj = JSON.parse(pre.textContent || ''); + if (obj && obj.access_token) { + navigator.clipboard.writeText(obj.access_token); + alert('Access token copied to clipboard'); + } else { + alert('No token found'); + } + } catch { + alert('No token found'); + } + } + + window.addEventListener('DOMContentLoaded', async () => { + // Mode banner + try { + const cfg = await getConfig(); + const modeEl = document.getElementById('mode'); + if (modeEl) modeEl.textContent = (cfg && cfg.mode) ? cfg.mode : 'unknown'; + if (cfg && cfg.mode === 'single-user') { + document.getElementById('mu_hint')?.classList.remove('hidden'); + document.getElementById('forms')?.classList.add('hidden'); + } + } catch (_) {} + + // Bind forms + document.getElementById('reg-form')?.addEventListener('submit', handleRegister); + document.getElementById('login-form')?.addEventListener('submit', handleLogin); + document.getElementById('copy-token-btn')?.addEventListener('click', handleCopyToken); + }); +})(); + diff --git a/tldw_Server_API/WebUI/js/auth-permissions.js b/tldw_Server_API/WebUI/js/auth-permissions.js index 6db5ed514..6c3eb49e6 100644 --- a/tldw_Server_API/WebUI/js/auth-permissions.js +++ b/tldw_Server_API/WebUI/js/auth-permissions.js @@ -43,14 +43,18 @@ function _ap_renderSummary() { const items = _ap_filteredItems(); const { total, allowed, blocked } = _ap_summarize(items); const ts = AUTH_PERM_CACHE.lastLoadedAt ? new Date(AUTH_PERM_CACHE.lastLoadedAt).toLocaleString() : '-'; - el.innerHTML = `Items: ${total} · Allowed: ${allowed} · Blocked: ${blocked} · Loaded: ${_ap_escape(ts)}`; + if (window.SafeDOM && window.SafeDOM.setHTML) { + window.SafeDOM.setHTML(el, `Items: ${total} · Allowed: ${allowed} · Blocked: ${blocked} · Loaded: ${_ap_escape(ts)}`); + } else { + el.innerHTML = `Items: ${total} · Allowed: ${allowed} · Blocked: ${blocked} · Loaded: ${_ap_escape(ts)}`; + } } function _ap_renderMatrixByScope() { const container = document.getElementById('authPermMatrixByScope'); if (!container) return; const items = _ap_filteredItems(); - if (!items.length) { container.innerHTML = '

No data. Click Refresh to load.

'; return; } + if (!items.length) { if (window.SafeDOM && window.SafeDOM.setHTML) { window.SafeDOM.setHTML(container, '

No data. Click Refresh to load.

'); } else { container.innerHTML = '

No data. Click Refresh to load.

'; } return; } // Build unique sets const scopes = Array.from(new Set(items.map(it => it.privilege_scope_id))).sort(); const endpoints = Array.from(new Set(items.map(it => `${(it.method || '').toUpperCase()} ${it.endpoint}`))).sort(); @@ -79,14 +83,14 @@ function _ap_renderMatrixByScope() { html += ''; } html += '
'; - container.innerHTML = html; + if (window.SafeDOM && window.SafeDOM.setHTML) { window.SafeDOM.setHTML(container, html); } else { container.innerHTML = html; } } function _ap_renderList() { const container = document.getElementById('authPermList'); if (!container) return; const items = _ap_filteredItems(); - if (!items.length) { container.innerHTML = '

No permission entries.

'; return; } + if (!items.length) { if (window.SafeDOM && window.SafeDOM.setHTML) { window.SafeDOM.setHTML(container, '

No permission entries.

'); } else { container.innerHTML = '

No permission entries.

'; } return; } let html = '
'; html += '' + '' + @@ -104,7 +108,7 @@ function _ap_renderList() { ''; } html += '
MethodEndpointScopeStatusSensitivityFeature FlagRate Class
'; - container.innerHTML = html; + if (window.SafeDOM && window.SafeDOM.setHTML) { window.SafeDOM.setHTML(container, html); } else { container.innerHTML = html; } } function _ap_renderAll() { @@ -121,15 +125,17 @@ async function loadSelfPermissions() { AUTH_PERM_CACHE.self = data || { items: [] }; AUTH_PERM_CACHE.lastLoadedAt = new Date().toISOString(); _ap_renderAll(); - Toast?.success && Toast.success('Loaded permissions'); + if (typeof Toast !== 'undefined' && Toast && Toast.success) Toast.success('Loaded permissions'); } catch (e) { const container = document.getElementById('authPermList'); - if (container) container.innerHTML = `
${_ap_escape(JSON.stringify(e.response || e, null, 2))}
`; + if (container) { + if (window.SafeDOM && window.SafeDOM.setHTML) { window.SafeDOM.setHTML(container, `
${_ap_escape(JSON.stringify(e.response || e, null, 2))}
`); } else { container.innerHTML = `
${_ap_escape(JSON.stringify(e.response || e, null, 2))}
`; } + } const matrix = document.getElementById('authPermMatrixByScope'); if (matrix) matrix.innerHTML = ''; const summary = document.getElementById('authPermSummary'); if (summary) summary.textContent = 'Failed to load permissions'; - Toast?.error && Toast.error('Failed to load permissions'); + if (typeof Toast !== 'undefined' && Toast && Toast.error) Toast.error('Failed to load permissions'); } } diff --git a/tldw_Server_API/WebUI/js/chat-ui.js b/tldw_Server_API/WebUI/js/chat-ui.js index 5d6c3103e..557978491 100644 --- a/tldw_Server_API/WebUI/js/chat-ui.js +++ b/tldw_Server_API/WebUI/js/chat-ui.js @@ -101,17 +101,15 @@ class ChatUI { messageDiv.id = `${prefix}_message_entry_${id}`; messageDiv.dataset.messageId = id; - messageDiv.innerHTML = ` + const __markup = `
${role} - +
- @@ -127,15 +125,10 @@ class ChatUI { class="message-content-area" rows="3" placeholder="Enter message content..." - oninput="chatUI.handleContentChange('${prefix}', ${id})" >${content}
- - + +
@@ -144,7 +137,7 @@ class ChatUI {
- +
@@ -172,9 +165,49 @@ class ChatUI { `; + if (window.SafeDOM && typeof window.SafeDOM.setHTML === 'function') { + window.SafeDOM.setHTML(messageDiv, __markup); + } else { + messageDiv.innerHTML = __markup; + } container.appendChild(messageDiv); + // Bind events programmatically (no inline handlers) + try { + const removeBtn = messageDiv.querySelector('.remove-message-btn[data-action="remove-message"]'); + if (removeBtn && !removeBtn._b) { removeBtn._b = true; removeBtn.addEventListener('click', () => this.removeMessage(prefix, id)); } + + const roleSelect = messageDiv.querySelector(`#${prefix}_message_role_${id}`); + if (roleSelect && !roleSelect._b) { roleSelect._b = true; roleSelect.addEventListener('change', () => this.handleRoleChange(prefix, id)); } + + const contentArea = messageDiv.querySelector(`#${prefix}_message_content_${id}`); + if (contentArea && !contentArea._b) { contentArea._b = true; contentArea.addEventListener('input', () => this.handleContentChange(prefix, id)); } + + const toolbar = messageDiv.querySelector('.message-content-toolbar'); + if (toolbar && !toolbar._b) { + toolbar._b = true; + toolbar.addEventListener('click', (ev) => { + const t = ev.target.closest('button[data-action]'); + if (!t) return; + const action = t.getAttribute('data-action'); + if (action === 'format-json') { + this.formatJSON(prefix, id); + } else if (action === 'clear-content') { + this.clearContent(prefix, id); + } + }); + } + + const fileInput = messageDiv.querySelector(`#${prefix}_message_image_${id}`); + if (fileInput && !fileInput._b) { fileInput._b = true; fileInput.addEventListener('change', () => this.handleImageUpload(prefix, id)); } + + const removeImageBtn = messageDiv.querySelector('[data-action="remove-image"]'); + if (removeImageBtn && !removeImageBtn._b) { removeImageBtn._b = true; removeImageBtn.addEventListener('click', () => this.clearImage(prefix, id)); } + } catch (e) { + console.debug('Failed to bind chat message handlers', e); + } + // Initialize drag and drop for the image input this.initImageDragDrop(prefix, id); @@ -199,7 +232,7 @@ class ChatUI { messageDiv.remove(); this.autoSaveMessages(prefix); - Toast.success('Message removed'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Message removed'); } } @@ -331,9 +364,9 @@ class ChatUI { try { const json = JSON.parse(textarea.value); textarea.value = JSON.stringify(json, null, 2); - Toast.success('JSON formatted'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('JSON formatted'); } catch (e) { - Toast.error('Invalid JSON'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Invalid JSON'); } } @@ -528,8 +561,8 @@ class ChatUI { // Generate and display cURL command const curlCommand = (typeof apiClient.generateCurlV2 === 'function' - ? apiClient.generateCurlV2('POST', '/api/v1/chat/completions', { body: payload }) - : apiClient.generateCurl('POST', '/api/v1/chat/completions', { body: payload })); + ? apiClient.generateCurlV2('POST', (apiClient.endpoint('chat','completions') || '/api/v1/chat/completions'), { body: payload }) + : apiClient.generateCurl('POST', (apiClient.endpoint('chat','completions') || '/api/v1/chat/completions'), { body: payload })); const curlEl = document.getElementById('chatCompletions_curl'); if (curlEl) { curlEl.textContent = curlCommand; @@ -539,7 +572,7 @@ class ChatUI { if (payload.stream) { await this.handleStreamingResponse(responseArea, payload); } else { - const response = await apiClient.post('/api/v1/chat/completions', payload); + const response = await apiClient.post((apiClient.endpoint('chat','completions') || '/api/v1/chat/completions'), payload); // Display response with JSON viewer const viewer = new JSONViewer(responseArea, response, { @@ -554,16 +587,16 @@ class ChatUI { const convIdEl = document.getElementById(`${prefix}_conversation_id`); if (convIdEl) { convIdEl.value = response.tldw_conversation_id; - Toast.info(`Conversation ID: ${response.tldw_conversation_id}`); + if (typeof Toast !== 'undefined' && Toast) Toast.info(`Conversation ID: ${response.tldw_conversation_id}`); } } } - Toast.success('Request completed successfully'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Request completed successfully'); } catch (error) { console.error('Chat request error:', error); responseArea.textContent = `Error: ${error.message}`; - Toast.error(`Request failed: ${error.message}`); + if (typeof Toast !== 'undefined' && Toast) Toast.error(`Request failed: ${error.message}`); } finally { Loading.hide(responseArea.parentElement); } @@ -595,7 +628,7 @@ class ChatUI { responseArea.scrollTop = responseArea.scrollHeight; }; - await apiClient.post('/api/v1/chat/completions', payload, { + await apiClient.post((apiClient.endpoint('chat','completions') || '/api/v1/chat/completions'), payload, { streaming: true, onProgress }); @@ -637,7 +670,7 @@ class ChatUI { if (maxTokensEl) maxTokensEl.value = preset.max_tokens; } - Toast.success(`Loaded preset: ${presetName}`); + if (typeof Toast !== 'undefined' && Toast) Toast.success(`Loaded preset: ${presetName}`); } saveCurrentAsPreset(name) { @@ -661,9 +694,9 @@ class ChatUI { } this.savePresets(); - Toast.success(`Saved preset: ${name}`); + if (typeof Toast !== 'undefined' && Toast) Toast.success(`Saved preset: ${name}`); } catch (error) { - Toast.error(`Failed to save preset: ${error.message}`); + if (typeof Toast !== 'undefined' && Toast) Toast.error(`Failed to save preset: ${error.message}`); } } } @@ -718,6 +751,94 @@ function initializeChatCompletionsTab() { } catch (e) { console.debug('Could not set default save_to_db from config:', e?.message || e); } + + // Ensure a default model is selected if none chosen yet + try { + // Populate dropdowns first + const maybePopulate = (window.apiClient && typeof window.apiClient.populateModelDropdowns === 'function') + ? window.apiClient.populateModelDropdowns() + : (typeof window.populateModelDropdowns === 'function' ? window.populateModelDropdowns() : null); + if (maybePopulate && typeof maybePopulate.then === 'function') { + maybePopulate.then(() => { + try { + const sel = document.getElementById(`${prefix}_model`); + if (sel && (!sel.value || sel.value === '')) { + const info = (window.apiClient && typeof window.apiClient.getAvailableProviders === 'function') + ? window.apiClient.cachedProviders || null + : null; + const providersInfo = info || {}; + const dp = providersInfo.default_provider; + if (dp && Array.isArray(providersInfo.providers)) { + const p = providersInfo.providers.find(x => x && x.name === dp); + const dm = p && p.default_model ? `${p.name}/${p.default_model}` : null; + if (dm) sel.value = dm; + } + } + } catch (_) { /* ignore */ } + }); + } + } catch (_) { /* ignore */ } + + // Bind top-level controls (remove inline handlers) + try { + const logprobsCb = document.getElementById(`${prefix}_logprobs`); + if (logprobsCb && !logprobsCb._b) { logprobsCb._b = true; logprobsCb.addEventListener('change', () => { try { if (typeof window.toggleLogprobs === 'function') window.toggleLogprobs(); } catch(_){} }); } + + const toolChoiceSel = document.getElementById(`${prefix}_tool_choice`); + if (toolChoiceSel && !toolChoiceSel._b) { toolChoiceSel._b = true; toolChoiceSel.addEventListener('change', () => { try { if (typeof window.toggleToolChoiceJSON === 'function') window.toggleToolChoiceJSON(); } catch(_){} }); } + + const sendReqBtn = document.getElementById(`${prefix}_send_request`); + if (sendReqBtn && !sendReqBtn._b) { sendReqBtn._b = true; sendReqBtn.addEventListener('click', () => { try { if (typeof window.makeChatCompletionsRequest === 'function') window.makeChatCompletionsRequest(); } catch(_){} }); } + + const updSysBtn = document.getElementById('chat-update-system'); + if (updSysBtn && !updSysBtn._b) { updSysBtn._b = true; updSysBtn.addEventListener('click', () => { try { if (typeof window.updateSystemPrompt === 'function') window.updateSystemPrompt(); } catch(_){} }); } + + const resetConvBtn = document.getElementById('chat-reset-conv'); + if (resetConvBtn && !resetConvBtn._b) { resetConvBtn._b = true; resetConvBtn.addEventListener('click', () => { try { if (typeof window.resetChatConversation === 'function') window.resetChatConversation(); } catch(_){} }); } + + const sendBtn = document.getElementById('chat-send-btn'); + if (sendBtn && !sendBtn._b) { sendBtn._b = true; sendBtn.addEventListener('click', () => { try { if (typeof window.sendChatMessage === 'function') window.sendChatMessage(); } catch(_){} }); } + + const stopBtn = document.getElementById('chat-stop-btn'); + if (stopBtn && !stopBtn._b) { stopBtn._b = true; stopBtn.addEventListener('click', () => { try { if (typeof window.stopChatStream === 'function') window.stopChatStream(); } catch(_){} }); } + + const clearBtn = document.getElementById('chat-clear-btn'); + if (clearBtn && !clearBtn._b) { clearBtn._b = true; clearBtn.addEventListener('click', () => { try { if (typeof window.clearChat === 'function') window.clearChat(); } catch(_){} }); } + + const copyLastBtn = document.getElementById('chat-copy-last-btn'); + if (copyLastBtn && !copyLastBtn._b) { copyLastBtn._b = true; copyLastBtn.addEventListener('click', () => { try { if (typeof window.copyLastAssistantMessage === 'function') window.copyLastAssistantMessage(); } catch(_){} }); } + + const retryBtn = document.getElementById('chat-retry-btn'); + if (retryBtn && !retryBtn._b) { retryBtn._b = true; retryBtn.addEventListener('click', () => { try { if (typeof window.retryLastUserMessage === 'function') window.retryLastUserMessage(); } catch(_){} }); } + + const editBtn = document.getElementById('chat-edit-last-btn'); + if (editBtn && !editBtn._b) { editBtn._b = true; editBtn.addEventListener('click', () => { try { if (typeof window.editLastUserMessage === 'function') window.editLastUserMessage(); } catch(_){} }); } + + // Characters/Conversations endpoint helpers + const bindCmd = (id, fn, needsConfirm=false) => { + const el = document.getElementById(id); if (!el || el._b) return; el._b = true; + el.addEventListener('click', () => { + if (needsConfirm) { + const msg = el.getAttribute('data-confirm') || 'Are you sure?'; + if (!confirm(msg)) return; + } + try { if (typeof window[fn] === 'function') window[fn](); } catch(_){} + }); + }; + bindCmd('btn_createCharacter', 'createCharacter'); + bindCmd('btn_listCharacters', 'listCharacters'); + bindCmd('btn_getCharacter', 'getCharacter'); + bindCmd('btn_updateCharacter', 'updateCharacter'); + bindCmd('btn_deleteCharacter', 'deleteCharacter', true); + bindCmd('btn_createConversation', 'createConversation'); + bindCmd('btn_listConversations', 'listConversations'); + bindCmd('btn_sendConversationMessage', 'sendConversationMessage'); + bindCmd('btn_updateConversation', 'updateConversation'); + bindCmd('btn_deleteConversation', 'deleteConversation', true); + bindCmd('btn_exportConversation', 'exportConversation'); + bindCmd('btn_exportCharacter', 'exportCharacter'); + bindCmd('btn_getConversationDetails', 'getConversationDetails'); + } catch(_) { /* ignore */ } } // Export for use in other modules diff --git a/tldw_Server_API/WebUI/js/components.js b/tldw_Server_API/WebUI/js/components.js index f865fc1a5..465a941d6 100644 --- a/tldw_Server_API/WebUI/js/components.js +++ b/tldw_Server_API/WebUI/js/components.js @@ -96,6 +96,20 @@ class ToastManager { } } +// Local SafeDOM helper for this module +function setSafeHTML(el, html) { + if (!el) return; + try { + if (window.SafeDOM && typeof window.SafeDOM.setHTML === 'function') { + window.SafeDOM.setHTML(el, html); + } else { + el.innerHTML = html; + } + } catch (_) { + try { el.innerHTML = html; } catch (_) {} + } +} + class LoadingIndicator { constructor() { this.activeLoaders = new Map(); @@ -183,7 +197,7 @@ class Modal { // Create modal this.modal = document.createElement('div'); this.modal.className = `modal modal-${this.options.size}`; - this.modal.innerHTML = ` + const __modalMarkup = ` ${this.options.footer ? `` : ''} `; + setSafeHTML(this.modal, __modalMarkup); // ARIA roles and labelling try { @@ -275,9 +290,7 @@ class Modal { setContent(content) { const body = this.modal.querySelector('.modal-body'); - if (body) { - body.innerHTML = content; - } + if (body) setSafeHTML(body, content); } } @@ -300,23 +313,30 @@ class JSONViewer { const wrapper = document.createElement('div'); wrapper.className = `json-viewer json-viewer-${this.options.theme}`; - if (this.options.enableCopy) { - const toolbar = document.createElement('div'); - toolbar.className = 'json-viewer-toolbar'; - toolbar.innerHTML = ` - - - `; - wrapper.appendChild(toolbar); - } + if (this.options.enableCopy) { + const toolbar = document.createElement('div'); + toolbar.className = 'json-viewer-toolbar'; + // Build toolbar buttons programmatically to avoid inline handlers + const copyBtn = document.createElement('button'); + copyBtn.className = 'btn btn-sm'; + copyBtn.textContent = 'Copy JSON'; + copyBtn.addEventListener('click', async () => { + try { await Utils.copyToClipboard(JSON.stringify(this.json, null, 2)); } catch (_) {} + }); + const dlBtn = document.createElement('button'); + dlBtn.className = 'btn btn-sm'; + dlBtn.textContent = 'Download'; + dlBtn.addEventListener('click', () => { + try { Utils.downloadData(this.json, 'data.json'); } catch (_) {} + }); + toolbar.appendChild(copyBtn); + toolbar.appendChild(dlBtn); + wrapper.appendChild(toolbar); + } const content = document.createElement('div'); content.className = 'json-viewer-content'; - content.innerHTML = this.renderValue(this.json, 0); + setSafeHTML(content, this.renderValue(this.json, 0)); wrapper.appendChild(content); this.container.appendChild(wrapper); @@ -325,6 +345,8 @@ class JSONViewer { if (this.options.enableCollapse) { this.attachCollapseHandlers(); } + // Bind any quick-action buttons that were rendered + this.attachQuickActionHandlers(content); } renderValue(value, depth) { @@ -408,31 +430,31 @@ class JSONViewer { html += `
`; if (batchItem) { const payload = encodeURIComponent(JSON.stringify(batchItem)); - html += ``; + html += ``; } if (pmcItem) { const payloadPmc = encodeURIComponent(JSON.stringify(pmcItem)); - html += ` `; + html += ` `; } if (zenodoItem) { const payloadZen = encodeURIComponent(JSON.stringify(zenodoItem)); - html += ` `; + html += ` `; } if (vixraItem) { const payloadVix = encodeURIComponent(JSON.stringify(vixraItem)); - html += ` `; + html += ` `; } if (figshareItem) { const payloadFig = encodeURIComponent(JSON.stringify(figshareItem)); - html += ` `; + html += ` `; } if (halItem) { const payloadHal = encodeURIComponent(JSON.stringify(halItem)); - html += ` `; + html += ` `; } if (osfItem) { const payloadOsf = encodeURIComponent(JSON.stringify(osfItem)); - html += ` `; + html += ` `; } html += `
`; } @@ -599,6 +621,31 @@ class JSONViewer { return null; } } + + attachQuickActionHandlers(rootEl) { + try { + const root = rootEl || this.container; + if (!root) return; + const map = { + 'add-batch': (btn) => { try { window.addSearchItemToBatchFromPayload && window.addSearchItemToBatchFromPayload(btn); } catch(_){} }, + 'add-pmc': (btn) => { try { window.addPmcItemToBatchFromPayload && window.addPmcItemToBatchFromPayload(btn); } catch(_){} }, + 'ingest-zenodo': (btn) => { try { window.ingestZenodoFromPayload && window.ingestZenodoFromPayload(btn); } catch(_){} }, + 'ingest-vixra': (btn) => { try { window.ingestVixraFromPayload && window.ingestVixraFromPayload(btn); } catch(_){} }, + 'ingest-figshare': (btn) => { try { window.ingestFigshareFromPayload && window.ingestFigshareFromPayload(btn); } catch(_){} }, + 'ingest-hal': (btn) => { try { window.ingestHalFromPayload && window.ingestHalFromPayload(btn); } catch(_){} }, + 'ingest-osf': (btn) => { try { window.ingestOsfFromPayload && window.ingestOsfFromPayload(btn); } catch(_){} }, + }; + root.querySelectorAll('button.json-qa[data-action]') + .forEach((btn) => { + if (btn._qaBound) return; btn._qaBound = true; + btn.addEventListener('click', () => { + const action = btn.getAttribute('data-action'); + const fn = map[action]; + if (typeof fn === 'function') fn(btn); + }); + }); + } catch (_) {} + } } // Initialize global instances @@ -609,7 +656,7 @@ const Loading = new LoadingIndicator(); function addSearchItemToBatch(item) { try { const ta = document.getElementById('oaIngestBatch_payload'); - if (!ta) { Toast.warning('Open OA Ingest Batch panel to collect selections.'); return; } + if (!ta) { if (typeof Toast !== 'undefined' && Toast) Toast.warning('Open OA Ingest Batch panel to collect selections.'); return; } let arr = []; const current = (ta.value || '').trim(); if (current.startsWith('[')) { @@ -619,7 +666,7 @@ function addSearchItemToBatch(item) { if (!Array.isArray(arr)) arr = []; arr.push(item); ta.value = JSON.stringify(arr, null, 2); - Toast.success('Added to batch'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Added to batch'); } catch (e) { console.error('addSearchItemToBatch failed', e); alert('Failed to add to batch: ' + (e?.message || e)); @@ -642,7 +689,7 @@ function addSearchItemToBatchFromPayload(el) { function addPmcItemToBatch(item) { try { const ta = document.getElementById('pmcBatchIngest_payload'); - if (!ta) { Toast.warning('Open PMC Batch Ingest panel to collect selections.'); return; } + if (!ta) { if (typeof Toast !== 'undefined' && Toast) Toast.warning('Open PMC Batch Ingest panel to collect selections.'); return; } let arr = []; const current = (ta.value || '').trim(); if (current.startsWith('[')) { @@ -652,10 +699,10 @@ function addPmcItemToBatch(item) { if (!Array.isArray(arr)) arr = []; // Normalize to minimal { pmcid, title?, author? } const pmcid = String(item.pmcid || item.PMCID || '').trim(); - if (!pmcid) { Toast.error('Invalid PMCID payload'); return; } + if (!pmcid) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Invalid PMCID payload'); return; } arr.push({ pmcid, title: item.title || undefined, author: item.author || undefined, keywords: item.keywords || undefined }); ta.value = JSON.stringify(arr, null, 2); - Toast.success('Added to PMC batch'); + if (typeof Toast !== 'undefined' && Toast) Toast.success('Added to PMC batch'); } catch (e) { console.error('addPmcItemToBatch failed', e); alert('Failed to add to PMC batch: ' + (e?.message || e)); @@ -681,7 +728,7 @@ async function ingestZenodoFromPayload(el) { if (!payloadStr) return; const item = JSON.parse(decodeURIComponent(payloadStr)); const record_id = item.record_id; - if (!record_id) { Toast.error('Missing Zenodo record_id'); return; } + if (!record_id) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Missing Zenodo record_id'); return; } // Use defaults; advanced users can use the panel to customize const body = { perform_chunking: true, @@ -692,10 +739,10 @@ async function ingestZenodoFromPayload(el) { perform_analysis: true }; const res = await apiClient.post('/api/v1/paper-search/zenodo/ingest', body, { query: { record_id } }); - Toast.success(`Zenodo ingested: media_id ${res?.media_id ?? ''}`); + if (typeof Toast !== 'undefined' && Toast) Toast.success(`Zenodo ingested: media_id ${res?.media_id ?? ''}`); } catch (e) { console.error('ingestZenodoFromPayload failed', e); - Toast.error('Zenodo ingest failed'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Zenodo ingest failed'); } } @@ -706,7 +753,7 @@ async function ingestVixraFromPayload(el) { if (!payloadStr) return; const item = JSON.parse(decodeURIComponent(payloadStr)); const vid = item.vid; - if (!vid) { Toast.error('Missing viXra ID'); return; } + if (!vid) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Missing viXra ID'); return; } const body = { perform_chunking: true, parser: 'pymupdf4llm', @@ -716,10 +763,10 @@ async function ingestVixraFromPayload(el) { perform_analysis: true }; const res = await apiClient.post('/api/v1/paper-search/vixra/ingest', body, { query: { vid } }); - Toast.success(`viXra ingested: media_id ${res?.media_id ?? ''}`); + if (typeof Toast !== 'undefined' && Toast) Toast.success(`viXra ingested: media_id ${res?.media_id ?? ''}`); } catch (e) { console.error('ingestVixraFromPayload failed', e); - Toast.error('viXra ingest failed'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('viXra ingest failed'); } } @@ -730,7 +777,7 @@ async function ingestFigshareFromPayload(el) { if (!payloadStr) return; const item = JSON.parse(decodeURIComponent(payloadStr)); const article_id = item.article_id; - if (!article_id) { Toast.error('Missing Figshare article_id'); return; } + if (!article_id) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Missing Figshare article_id'); return; } const body = { perform_chunking: true, parser: 'pymupdf4llm', @@ -740,10 +787,10 @@ async function ingestFigshareFromPayload(el) { perform_analysis: true }; const res = await apiClient.post('/api/v1/paper-search/figshare/ingest', body, { query: { article_id } }); - Toast.success(`Figshare ingested: media_id ${res?.media_id ?? ''}`); + if (typeof Toast !== 'undefined' && Toast) Toast.success(`Figshare ingested: media_id ${res?.media_id ?? ''}`); } catch (e) { console.error('ingestFigshareFromPayload failed', e); - Toast.error('Figshare ingest failed'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('Figshare ingest failed'); } } @@ -754,7 +801,7 @@ async function ingestHalFromPayload(el) { if (!payloadStr) return; const item = JSON.parse(decodeURIComponent(payloadStr)); const docid = item.docid; - if (!docid) { Toast.error('Missing HAL docid'); return; } + if (!docid) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Missing HAL docid'); return; } const body = { perform_chunking: true, parser: 'pymupdf4llm', @@ -764,10 +811,10 @@ async function ingestHalFromPayload(el) { perform_analysis: true }; const res = await apiClient.post('/api/v1/paper-search/hal/ingest', body, { query: { docid } }); - Toast.success(`HAL ingested: media_id ${res?.media_id ?? ''}`); + if (typeof Toast !== 'undefined' && Toast) Toast.success(`HAL ingested: media_id ${res?.media_id ?? ''}`); } catch (e) { console.error('ingestHalFromPayload failed', e); - Toast.error('HAL ingest failed'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('HAL ingest failed'); } } @@ -778,7 +825,7 @@ async function ingestOsfFromPayload(el) { if (!payloadStr) return; const item = JSON.parse(decodeURIComponent(payloadStr)); const osf_id = item.osf_id; - if (!osf_id) { Toast.error('Missing OSF ID'); return; } + if (!osf_id) { if (typeof Toast !== 'undefined' && Toast) Toast.error('Missing OSF ID'); return; } const body = { perform_chunking: true, parser: 'pymupdf4llm', @@ -788,10 +835,10 @@ async function ingestOsfFromPayload(el) { perform_analysis: true }; const res = await apiClient.post('/api/v1/paper-search/osf/ingest', body, { query: { osf_id } }); - Toast.success(`OSF ingested: media_id ${res?.media_id ?? ''}`); + if (typeof Toast !== 'undefined' && Toast) Toast.success(`OSF ingested: media_id ${res?.media_id ?? ''}`); } catch (e) { console.error('ingestOsfFromPayload failed', e); - Toast.error('OSF ingest failed'); + if (typeof Toast !== 'undefined' && Toast) Toast.error('OSF ingest failed'); } } diff --git a/tldw_Server_API/WebUI/js/dictionaries.js b/tldw_Server_API/WebUI/js/dictionaries.js index 5a0cf1aaa..7b68ce4ae 100644 --- a/tldw_Server_API/WebUI/js/dictionaries.js +++ b/tldw_Server_API/WebUI/js/dictionaries.js @@ -17,7 +17,7 @@ const DictionariesUI = (() => { const container = el('dictsList'); if (!container) return; if (!Array.isArray(list) || list.length === 0) { - container.innerHTML = '
No dictionaries found.
'; + if (window.SafeDOM && window.SafeDOM.setHTML) { window.SafeDOM.setHTML(container, '
No dictionaries found.
'); } else { container.innerHTML = '
No dictionaries found.
'; } return; } const rows = list.map(d => { @@ -30,7 +30,7 @@ const DictionariesUI = (() => {
entries: ${d.entry_count ?? '-'}
`; }).join(''); - container.innerHTML = rows; + if (window.SafeDOM && window.SafeDOM.setHTML) { window.SafeDOM.setHTML(container, rows); } else { container.innerHTML = rows; } // click handlers container.querySelectorAll('.list-item').forEach(item => { item.addEventListener('click', async () => { @@ -119,7 +119,7 @@ const DictionariesUI = (() => { await apiClient.delete(`/api/v1/chat/dictionaries/${selected.id}`); selected = null; updateSelectedMeta(); - el('entriesList').innerHTML = ''; + const listEl = el('entriesList'); if (listEl) listEl.innerHTML = ''; await refreshDictionaries(); if (typeof Toast !== 'undefined') Toast.success('Dictionary deleted'); } catch (e) { @@ -147,7 +147,7 @@ const DictionariesUI = (() => { } if (!filtered || filtered.length === 0) { - listEl.innerHTML = '
No entries
'; + if (window.SafeDOM && window.SafeDOM.setHTML) { window.SafeDOM.setHTML(listEl, '
No entries
'); } else { listEl.innerHTML = '
No entries
'; } return; } const renderRow = (e) => ` @@ -189,9 +189,10 @@ const DictionariesUI = (() => { `; html += byGroup[g].map(renderRow).join(''); }); - listEl.innerHTML = html; + if (window.SafeDOM && window.SafeDOM.setHTML) { window.SafeDOM.setHTML(listEl, html); } else { listEl.innerHTML = html; } } else { - listEl.innerHTML = filtered.map(renderRow).join(''); + const html2 = filtered.map(renderRow).join(''); + if (window.SafeDOM && window.SafeDOM.setHTML) { window.SafeDOM.setHTML(listEl, html2); } else { listEl.innerHTML = html2; } } listEl.querySelectorAll('button[data-del]').forEach(btn => { btn.addEventListener('click', async () => { @@ -775,7 +776,8 @@ const DictionariesUI = (() => { if (!dd) return; const groups = Array.from(new Set(currentEntries.map(e => e.group || '').filter(Boolean))).sort(); const cur = dd.value; - dd.innerHTML = '' + groups.map(g => ``).join(''); + const opts = '' + groups.map(g => ``).join(''); + if (window.SafeDOM && window.SafeDOM.setHTML) { window.SafeDOM.setHTML(dd, opts); } else { dd.innerHTML = opts; } if (groups.includes(cur)) dd.value = cur; } diff --git a/tldw_Server_API/WebUI/js/endpoint-helper.js b/tldw_Server_API/WebUI/js/endpoint-helper.js index 1ed8b86b2..2949c643f 100644 --- a/tldw_Server_API/WebUI/js/endpoint-helper.js +++ b/tldw_Server_API/WebUI/js/endpoint-helper.js @@ -51,23 +51,60 @@ class EndpointHelper { html += this.createFormField(field, id); }); - // Add request button + // Add request + cURL buttons (no inline handlers) const buttonClass = method === 'DELETE' ? 'btn-danger' : ''; - const confirmDelete = method === 'DELETE' ? `if(confirm('Are you sure?')) ` : ''; - html += ` -

+            
`; - section.innerHTML = html; + if (window.SafeDOM && typeof window.SafeDOM.setHTML === 'function') { + window.SafeDOM.setHTML(section, html); + } else { + section.innerHTML = html; + } + // Bind actions + try { + const execBtn = section.querySelector('button[data-action="exec"]'); + if (execBtn && !execBtn._bound) { + execBtn._bound = true; + execBtn.addEventListener('click', (e) => { + e.preventDefault(); + const m = execBtn.getAttribute('data-method'); + if (m === 'DELETE') { + if (!confirm('Are you sure?')) return; + } + this.executeRequest( + execBtn.getAttribute('data-id'), + m, + execBtn.getAttribute('data-path'), + execBtn.getAttribute('data-body'), + execBtn.getAttribute('data-timeout') || 'default' + ); + }); + } + const curlBtn = section.querySelector('button[data-action="curl"]'); + if (curlBtn && !curlBtn._bound) { + curlBtn._bound = true; + curlBtn.addEventListener('click', (e) => { + e.preventDefault(); + this.showCurl( + curlBtn.getAttribute('data-id'), + curlBtn.getAttribute('data-method'), + curlBtn.getAttribute('data-path'), + curlBtn.getAttribute('data-body') + ); + }); + } + } catch (_) {} return section; } @@ -393,6 +430,11 @@ class EndpointHelper { // Add success/error styling element.className = success ? 'response-success' : 'response-error'; + + // Update correlation snippet if present + try { + this.updateCorrelationSnippet(element); + } catch (e) { /* ignore */ } } /** @@ -419,6 +461,54 @@ class EndpointHelper { this.displayResponse(element, errorInfo, false); } + /** + * Update correlation snippet next to the given response element. + */ + updateCorrelationSnippet(responseEl) { + if (!responseEl) return; + // Derive endpoint id from responseEl.id if possible + const id = (responseEl.id || '').replace(/_response$/, ''); + const corrId = id ? `${id}_correlation` : ''; + let box = (corrId && document.getElementById(corrId)) || null; + if (!box) { + // Create after response element if not found + box = document.createElement('div'); + box.className = 'correlation-snippet'; + box.style.marginTop = '6px'; + box.style.color = 'var(--color-text-muted)'; + box.style.fontSize = '0.85em'; + try { box.setAttribute('aria-live', 'polite'); } catch(_){} + responseEl.parentNode.insertBefore(box, responseEl.nextSibling); + } + const meta = (window.apiClient && window.apiClient.lastCorrelation) || {}; + const reqId = meta.requestId || '-'; + const trace = meta.traceparent || meta.traceId || '-'; + const shortReq = String(reqId).length > 12 ? `${String(reqId).slice(0, 12)}…` : reqId; + const shortTr = String(trace).length > 24 ? `${String(trace).slice(0, 24)}…` : trace; + box.textContent = `Correlation: X-Request-ID=${shortReq} trace=${shortTr}`; + box.title = `X-Request-ID=${reqId} traceparent/X-Trace-Id=${trace}`; + + // Tiny help hint explaining how to use curl -I with X-Request-ID + const section = responseEl.closest('.endpoint-section'); + const pathEl = section ? section.querySelector('.endpoint-path') : null; + const path = pathEl ? (pathEl.textContent || '').trim() : ''; + const base = (window.apiClient && window.apiClient.baseUrl) ? window.apiClient.baseUrl : window.location.origin; + const exampleRid = reqId && reqId !== '-' ? reqId : 'YOUR-RID'; + const exampleUrl = `${base}${path || ''}`; + const hintId = id ? `${id}_correlation_help` : ''; + let hint = (hintId && document.getElementById(hintId)) || null; + if (!hint) { + hint = document.createElement('div'); + if (hintId) hint.id = hintId; + hint.className = 'correlation-help'; + hint.style.marginTop = '4px'; + hint.style.color = 'var(--color-text-muted)'; + hint.style.fontSize = '0.8em'; + box.parentNode.insertBefore(hint, box.nextSibling); + } + hint.textContent = `Tip: copy X-Request-ID and correlate in server logs; you can also echo headers using: curl -s -I -H 'X-Request-ID: ${exampleRid}' '${exampleUrl}'`; + } + /** * Show cURL command for request */ @@ -564,6 +654,7 @@ class EndpointHelper { // Create global instance const endpointHelper = new EndpointHelper(); +try { window.endpointHelper = endpointHelper; } catch (_) {} // Export for use in other modules if (typeof module !== 'undefined' && module.exports) { diff --git a/tldw_Server_API/WebUI/js/inline-handler-shim.js b/tldw_Server_API/WebUI/js/inline-handler-shim.js deleted file mode 100644 index 948f9409e..000000000 --- a/tldw_Server_API/WebUI/js/inline-handler-shim.js +++ /dev/null @@ -1,99 +0,0 @@ -// Inline Handler Shim -// Replaces inline event handler attributes (onclick=, onchange=, etc.) with -// addEventListener-based handlers so CSP can disallow script-src-attr. -// -// Security note: This translates attribute code strings into Functions, which -// requires 'unsafe-eval' in CSP. We already allow 'unsafe-eval' for legacy UI. -// This is a transitional measure to reduce reliance on inline attributes. - -(function () { - 'use strict'; - - const ATTR_PREFIX = 'on'; - const HANDLER_ATTRS = new Set([ - 'onclick', 'onchange', 'onsubmit', 'oninput', 'onkeydown', 'onkeyup', - 'onkeypress', 'onload', 'onerror', 'onmouseover', 'onmouseout', 'onfocus', - 'onblur', 'onmouseenter', 'onmouseleave', 'onmousedown', 'onmouseup', - 'onwheel', 'oncontextmenu', 'ondblclick', 'onpaste', 'oncopy', 'oncut', - 'ondrag', 'ondragstart', 'ondragend', 'ondragenter', 'ondragleave', - 'ondragover', 'ondrop', 'onpointerdown', 'onpointerup', 'onpointermove' - ]); - - function rewireElement(el) { - if (!(el && el.getAttribute)) return; - // Iterate attributes snapshot because we may remove during iteration - const attrs = el.attributes ? Array.from(el.attributes) : []; - for (const attr of attrs) { - const name = attr.name.toLowerCase(); - if (!name.startsWith(ATTR_PREFIX)) continue; - // Limit to known handlers to avoid grabbing unrelated attributes - if (!HANDLER_ATTRS.has(name)) continue; - const code = attr.value || ''; - const evt = name.slice(2); // strip 'on' - try { - // Wrap attribute code into a function taking 'event' - // Use Function constructor to preserve global references (window). - const fn = new Function('event', code); - el.addEventListener(evt, function (event) { - try { - return fn.call(el, event); - } catch (e) { - // eslint-disable-next-line no-console - console.error('Inline handler shim error for', name, 'on', el, e); - } - }, false); - } catch (e) { - // eslint-disable-next-line no-console - console.warn('Failed to convert inline handler', name, 'on', el, e); - } finally { - // Remove attribute to prevent blocked inline execution and duplicate firing - try { el.removeAttribute(name); } catch (_e) {} - } - } - } - - function rewireTree(root) { - if (!root) return; - if (root.nodeType === 1) { // Element - rewireElement(root); - const children = root.querySelectorAll('[onload], [onerror], [onclick], [onchange], [onsubmit], [oninput], [onkeydown], [onkeyup], [onkeypress], [onmouseover], [onmouseout], [onfocus], [onblur], [onmouseenter], [onmouseleave], [onmousedown], [onmouseup], [onwheel], [oncontextmenu], [ondblclick], [onpaste], [oncopy], [oncut], [ondrag], [ondragstart], [ondragend], [ondragenter], [ondragleave], [ondragover], [ondrop], [onpointerdown], [onpointerup], [onpointermove]'); - for (const el of children) rewireElement(el); - } - } - - function installObserver() { - try { - const mo = new MutationObserver((mutations) => { - for (const m of mutations) { - if (m.type === 'childList') { - for (const node of m.addedNodes) { - rewireTree(node); - } - } else if (m.type === 'attributes' && typeof m.target?.getAttribute === 'function') { - const name = m.attributeName?.toLowerCase?.() || ''; - if (name && name.startsWith(ATTR_PREFIX)) rewireElement(m.target); - } - } - }); - mo.observe(document.documentElement || document.body, { - subtree: true, - childList: true, - attributes: true, - attributeFilter: Array.from(HANDLER_ATTRS), - }); - } catch (e) { - // eslint-disable-next-line no-console - console.warn('Inline handler shim observer failed:', e); - } - } - - if (document.readyState === 'loading') { - document.addEventListener('DOMContentLoaded', () => { - rewireTree(document); - installObserver(); - }); - } else { - rewireTree(document); - installObserver(); - } -})(); diff --git a/tldw_Server_API/WebUI/js/keywords.js b/tldw_Server_API/WebUI/js/keywords.js index e18814007..e72f7cdc8 100644 --- a/tldw_Server_API/WebUI/js/keywords.js +++ b/tldw_Server_API/WebUI/js/keywords.js @@ -9,11 +9,12 @@ try { // Show cURL (auth-aware and masked by default) - const curl = apiClient.generateCurlV2('POST', '/api/v1/prompts/keywords/', { body: { keyword_text: keywordText } }); + const ep = (apiClient.endpoint('prompts','keywords') || '/api/v1/prompts/keywords/'); + const curl = apiClient.generateCurlV2('POST', ep, { body: { keyword_text: keywordText } }); const curlEl = document.getElementById('keywordAdd_curl'); if (curlEl) curlEl.textContent = curl; - const response = await apiClient.post('/api/v1/prompts/keywords/', { keyword_text: keywordText }); + const response = await apiClient.post((apiClient.endpoint('prompts','keywords') || '/api/v1/prompts/keywords/'), { keyword_text: keywordText }); const respEl = document.getElementById('keywordAdd_response'); if (respEl) respEl.textContent = JSON.stringify(response, null, 2); if (input) input.value = ''; @@ -26,11 +27,11 @@ async function listKeywords() { try { - const curl = apiClient.generateCurlV2('GET', '/api/v1/prompts/keywords/'); + const curl = apiClient.generateCurlV2('GET', (apiClient.endpoint('prompts','keywords') || '/api/v1/prompts/keywords/')); const curlEl = document.getElementById('keywordsList_curl'); if (curlEl) curlEl.textContent = curl; - const keywords = await apiClient.get('/api/v1/prompts/keywords/'); + const keywords = await apiClient.get((apiClient.endpoint('prompts','keywords') || '/api/v1/prompts/keywords/')); const respEl = document.getElementById('keywordsList_response'); if (respEl) respEl.textContent = JSON.stringify(keywords, null, 2); } catch (error) { @@ -45,7 +46,7 @@ if (!keywordText) { alert('Please enter a keyword to delete'); return; } try { - const path = `/api/v1/prompts/keywords/${encodeURIComponent(keywordText)}`; + const path = (apiClient.endpoint('prompts','keyword_delete', { keyword: keywordText }) || `/api/v1/prompts/keywords/${encodeURIComponent(keywordText)}`); const curl = apiClient.generateCurlV2('DELETE', path); const curlEl = document.getElementById('keywordDelete_curl'); if (curlEl) curlEl.textContent = curl; @@ -66,7 +67,7 @@ const container = document.getElementById('keywords-list'); if (!container) return; try { - const keywords = await apiClient.get('/api/v1/prompts/keywords/'); + const keywords = await apiClient.get((apiClient.endpoint('prompts','keywords') || '/api/v1/prompts/keywords/')); container.innerHTML = ''; if (!Array.isArray(keywords) || keywords.length === 0) { const p = document.createElement('p'); @@ -107,7 +108,8 @@ async function deleteKeywordFromList(keyword) { if (!confirm(`Delete keyword "${keyword}"?`)) return; - await apiClient.delete(`/api/v1/prompts/keywords/${encodeURIComponent(keyword)}`); + const path = (apiClient.endpoint('prompts','keyword_delete', { keyword }) || `/api/v1/prompts/keywords/${encodeURIComponent(keyword)}`); + await apiClient.delete(path); loadAllKeywords(); } diff --git a/tldw_Server_API/WebUI/js/legacy-helpers.js b/tldw_Server_API/WebUI/js/legacy-helpers.js index 90bb9dbff..905456dc4 100644 --- a/tldw_Server_API/WebUI/js/legacy-helpers.js +++ b/tldw_Server_API/WebUI/js/legacy-helpers.js @@ -42,6 +42,75 @@ } } + // ------------------------------ + // Inline Jobs feedback for long-running requests + // ------------------------------ + const __lrJobStreams = new Map(); // endpointId -> { handle, timer } + function startJobFeedbackFor(endpointId, containerEl) { + try { + const host = containerEl && containerEl.parentElement ? containerEl.parentElement : document.body; + let box = document.getElementById(`${endpointId}_job_inline`); + if (!box) { + box = document.createElement('div'); + box.id = `${endpointId}_job_inline`; + box.className = 'text-small'; + box.style.marginTop = '6px'; + box.innerHTML = '
' + + 'Live job activity
' + + '
' + + '
'; + host.appendChild(box); + } + const listId = `${endpointId}_je`; + const statsId = `${endpointId}_js`; + const domainWhitelist = new Set(['media','webscrape','web_scrape','webscraping']); + const handle = apiClient.streamSSE('/api/v1/jobs/events/stream', { + onEvent: (obj) => { + if (!obj || !domainWhitelist.has(String(obj.domain))) return; + const list = document.getElementById(listId); + if (!list) return; + const line = document.createElement('div'); + const dqt = [obj.domain, obj.queue, obj.job_type].filter(Boolean).join('/'); + const jid = obj.job_id || '-'; + const ev = obj.event || ''; + line.textContent = `${new Date().toLocaleTimeString()} · ${ev} · ${dqt} · id:${jid}`; + list.appendChild(line); + while (list.children.length > 20) list.removeChild(list.firstChild); + list.scrollTop = list.scrollHeight; + }, + timeout: 600000 + }); + const timer = setInterval(async () => { + try { + // Sum media and webscrape domains only + const agg = { processing: 0, queued: 0 }; + const domains = ['media','webscrape','web_scrape','webscraping']; + for (const dom of domains) { + try { + const res = await apiClient.get('/api/v1/jobs/stats', { domain: dom }); + const arr = Array.isArray(res) ? res : (res && res.data) ? res.data : []; + agg.processing += arr.reduce((a, r) => a + (r.processing || 0), 0); + agg.queued += arr.reduce((a, r) => a + (r.queued || 0), 0); + } catch (_) { /* ignore per-domain errors */ } + } + const el = document.getElementById(statsId); + if (el) el.textContent = `processing=${agg.processing} queued=${agg.queued}`; + } catch (_) { /* ignore */ } + }, 10000); + __lrJobStreams.set(endpointId, { handle, timer }); + } catch (_) { /* ignore */ } + } + function stopJobFeedbackFor(endpointId) { + try { + const rec = __lrJobStreams.get(endpointId); + if (rec) { + try { if (rec.handle && rec.handle.abort) rec.handle.abort(); } catch (_) {} + try { if (rec.timer) clearInterval(rec.timer); } catch (_) {} + __lrJobStreams.delete(endpointId); + } + } catch (_) { /* ignore */ } + } + async function makeRequest(endpointId, method, path, bodyType = 'none', queryParams = {}) { const responseArea = document.getElementById(`${endpointId}_response`); const curlEl = document.getElementById(`${endpointId}_curl`); @@ -60,7 +129,7 @@ const longRunningPaths = [ 'process-videos', 'process-audios', 'process-ebooks', 'process-documents', 'process-pdfs', 'mediawiki/ingest-dump', - 'mediawiki/process-dump', 'ingest-web-content' + 'mediawiki/process-dump', 'ingest-web-content', 'media/add' ]; const isLongRunning = longRunningPaths.some((p) => path.includes(p)); @@ -71,6 +140,8 @@ + 'This operation may take several minutes depending on the file size and processing options.
' + 'Please do not refresh the page or close this tab.' + ''; + // Hook job feedback inline + startJobFeedbackFor(endpointId, responseArea); } else { Loading.show(responseArea.parentElement, 'Sending request...'); responseArea.textContent = ''; @@ -351,6 +422,7 @@ } } finally { Loading.hide(responseArea.parentElement); + try { stopJobFeedbackFor(endpointId); } catch (_) {} } } @@ -363,7 +435,8 @@ let allowedProviders = null; let allowedModels = null; try { - const lists = await apiClient.makeRequest('GET', '/api/v1/embeddings/models'); + const ep = (window.apiClient && window.apiClient.endpoint('embeddings','models')) || '/api/v1/embeddings/models'; + const lists = await apiClient.makeRequest('GET', ep); allowedProviders = lists?.allowed_providers ?? null; allowedModels = lists?.allowed_models ?? null; } catch (e) { @@ -401,7 +474,8 @@ async function notesExportDownload(params, filenameBase) { try { const baseUrl = (window.apiClient && window.apiClient.baseUrl) ? window.apiClient.baseUrl : window.location.origin; - const url = new URL(`${baseUrl}/api/v1/notes/export`); + const ep = (window.apiClient && window.apiClient.endpoint('notes','export')) || '/api/v1/notes/export'; + const url = new URL(`${baseUrl}${ep}`); Object.entries(params || {}).forEach(([k, v]) => { if (v !== undefined && v !== null && v !== '') url.searchParams.append(k, String(v)); }); @@ -480,7 +554,8 @@ async function populateEmbeddingsCreateModelDropdown() { const baseUrl = (window.apiClient && window.apiClient.baseUrl) ? window.apiClient.baseUrl : window.location.origin; const token = (window.apiClient && window.apiClient.token) ? window.apiClient.token : ''; - const res = await fetch(`${baseUrl}/api/v1/embeddings/models`, { + const ep2 = (window.apiClient && window.apiClient.endpoint('embeddings','models')) || '/api/v1/embeddings/models'; + const res = await fetch(`${baseUrl}${ep2}`, { headers: { ...(token ? { 'X-API-KEY': token } : {}), } diff --git a/tldw_Server_API/WebUI/js/main.js b/tldw_Server_API/WebUI/js/main.js index 19e9243b2..b15b8d78e 100644 --- a/tldw_Server_API/WebUI/js/main.js +++ b/tldw_Server_API/WebUI/js/main.js @@ -1,7 +1,20 @@ /** * Main JavaScript file for API WebUI + * NOTE: Sanitization and inline-handler migration are delegated to WebUISanitizer (js/sanitizer.js) + * to keep a single source of truth. Avoid duplicating sanitizer logic here. */ +// Ensure sanitizer.js is present (loaded before this file in index.html). +// For tests/CommonJS, attempt a soft require so sanitizer becomes a hard dependency. +let WebUISanitizerRef = (typeof window !== 'undefined' && window.WebUISanitizer) || null; +try { + if (!WebUISanitizerRef && typeof require !== 'undefined') { + // Attempt to load in non-browser test environments + require('./sanitizer.js'); + WebUISanitizerRef = (typeof window !== 'undefined' && window.WebUISanitizer) || (typeof globalThis !== 'undefined' ? globalThis.WebUISanitizer : null); + } +} catch (_) { /* ignore; browser script tags handle this path */ } + class WebUI { constructor() { this.loadedContentGroups = new Set(); @@ -11,6 +24,8 @@ class WebUI { this.searchPreloaded = false; this.theme = 'light'; this.apiStatusCheckInterval = null; + // Prevent the startup fallback from overriding a user selection race + this._defaultTabSettled = false; this.init(); } @@ -43,6 +58,19 @@ class WebUI { // Apply capability-based visibility (hide experimental tabs dynamically) this.applyFeatureVisibilityFromServer(); + // Initialize Simple/Advanced mode toggle and default visibility + this.initSimpleAdvancedToggle(); + + // Force-hide correlation badges unless user has explicitly enabled them + try { + if (String(localStorage.getItem('WEBUI_SHOW_CORRELATION')||'') !== '1') { + const ridEl0 = document.getElementById('reqid-badge'); + const trEl0 = document.getElementById('trace-badge'); + if (ridEl0) ridEl0.style.display = 'none'; + if (trEl0) trEl0.style.display = 'none'; + } + } catch(_){} + // If opened via file://, show guidance banner if (window.location.protocol === 'file:') { try { @@ -54,9 +82,195 @@ class WebUI { } catch (e) { /* ignore */ } } + // Proactively migrate any inline handlers present in base HTML + try { this.migrateInlineHandlers(document.body || document); } catch (_) {} + // Install CSP guard to sanitize/migrate inline handlers for any dynamic insertions + try { this.installCSPGuard(); } catch (_) {} + + // Bind generic endpoint exec buttons across the app (no inline handlers) + try { + document.addEventListener('click', async (e) => { + const btn = e.target && e.target.closest('button[data-action="exec-endpoint"]'); + if (!btn) return; + e.preventDefault(); + const id = btn.getAttribute('data-id'); + const method = btn.getAttribute('data-method') || 'GET'; + const path = btn.getAttribute('data-path') || ''; + const bodyType = btn.getAttribute('data-body') || 'none'; + const confirmMsg = btn.getAttribute('data-confirm') || ''; + if (confirmMsg && !confirm(confirmMsg)) return; + const responseEl = document.getElementById(`${id}_response`); + try { + if (responseEl) responseEl.textContent = ''; + // Try global endpointHelper instance if available + if (window.endpointHelper && typeof window.endpointHelper.executeRequest === 'function') { + await window.endpointHelper.executeRequest(id, method, path, bodyType); + return; + } + const body = (bodyType === 'json') ? (function(){ const ta = document.getElementById(`${id}_payload`); try { return ta && ta.value ? JSON.parse(ta.value) : {}; } catch(_) { return {}; } })() : null; + const res = await apiClient.makeRequest(method, path, { body }); + if (responseEl) responseEl.textContent = (typeof res === 'string') ? res : JSON.stringify(res, null, 2); + } catch(err) { + if (responseEl) responseEl.textContent = `Error: ${err.message}`; + } + }, true); + } catch(_) {} + console.log('WebUI initialized successfully'); } + updateCorrelationBadge(meta) { + try { + // Respect user pref to show/hide correlation badges; default: hidden + let show = false; + try { show = String(localStorage.getItem('WEBUI_SHOW_CORRELATION')||'') === '1'; } catch(_) {} + if (!show) return; + const rid = (meta && meta.requestId) ? String(meta.requestId) : ''; + const trace = (meta && (meta.traceparent || meta.traceId)) ? String(meta.traceparent || meta.traceId) : ''; + const ridEl = document.getElementById('reqid-badge'); + const trEl = document.getElementById('trace-badge'); + if (ridEl) { + if (rid) { + const short = rid.length > 8 ? rid.slice(0, 8) : rid; + ridEl.textContent = `RID: ${short}`; + ridEl.title = `Last X-Request-ID: ${rid}`; + ridEl.style.display = ''; + } else { + ridEl.style.display = 'none'; + } + } + if (trEl) { + if (trace) { + const shortT = trace.length > 12 ? trace.slice(0, 12) + '…' : trace; + trEl.textContent = `Trace: ${shortT}`; + trEl.title = `Last traceparent/X-Trace-Id: ${trace}`; + trEl.style.display = ''; + } else { + trEl.style.display = 'none'; + } + } + // Also update correlation snippets in endpoint sections + try { + const preEls = document.querySelectorAll('.endpoint-section pre[id$="_response"]'); + preEls.forEach((pre) => { + let box = pre.nextElementSibling; + if (!(box && box.classList && box.classList.contains('correlation-snippet'))) { + box = document.createElement('div'); + box.className = 'correlation-snippet'; + box.style.marginTop = '6px'; + box.style.color = 'var(--color-text-muted)'; + box.style.fontSize = '0.85em'; + try { box.setAttribute('aria-live', 'polite'); } catch(_){} + const textSpan = document.createElement('span'); + textSpan.className = 'corr-text'; + const copyRidBtn = document.createElement('button'); + copyRidBtn.type = 'button'; + copyRidBtn.className = 'btn btn-compact corr-copy-btn'; + copyRidBtn.textContent = 'Copy RID'; + copyRidBtn.style.marginLeft = '8px'; + copyRidBtn.addEventListener('click', async (e) => { + e.preventDefault(); + try { + const ok = await Utils.copyToClipboard(String(rid || '')); + if (ok && typeof Toast !== 'undefined' && Toast) Toast.success('Copied X-Request-ID'); + } catch (_) {} + }); + const copyTraceBtn = document.createElement('button'); + copyTraceBtn.type = 'button'; + copyTraceBtn.className = 'btn btn-compact corr-copy-btn'; + copyTraceBtn.textContent = 'Copy Trace'; + copyTraceBtn.style.marginLeft = '6px'; + copyTraceBtn.addEventListener('click', async (e) => { + e.preventDefault(); + try { + const ok = await Utils.copyToClipboard(String(trace || '')); + if (ok && typeof Toast !== 'undefined' && Toast) Toast.success('Copied trace'); + } catch (_) {} + }); + box.appendChild(textSpan); + box.appendChild(copyRidBtn); + box.appendChild(copyTraceBtn); + pre.parentNode.insertBefore(box, pre.nextSibling); + } + const shortReq = rid && rid.length > 12 ? rid.slice(0, 12) + '…' : (rid || '-'); + const shortTr = trace && trace.length > 24 ? trace.slice(0, 24) + '…' : (trace || '-'); + // Update text span if present; else fallback to textContent + const textNode = box.querySelector('.corr-text'); + const content = `Correlation: X-Request-ID=${shortReq} trace=${shortTr}`; + if (textNode) textNode.textContent = content; else box.textContent = content; + box.title = `X-Request-ID=${rid || '-'} traceparent/X-Trace-Id=${trace || '-'}`; + }); + } catch (_) { /* ignore */ } + } catch (e) { /* ignore */ } + } + + // Observe DOM insertions and migrate inline handlers quickly to avoid CSP blocks + installCSPGuard() { + // Track already-migrated elements to avoid repeated work + const migratedElements = new WeakSet(); + + const migrateNode = (node, force = false) => { + try { + if (!node || node.nodeType !== 1) return; + if (!force && migratedElements.has(node)) return; + if (window.WebUISanitizer && typeof window.WebUISanitizer.migrateInlineHandlers === 'function') { + window.WebUISanitizer.migrateInlineHandlers(node); + } else { + this.migrateInlineHandlers(node); + } + migratedElements.add(node); + } catch (_) {} + }; + try { + const target = document.querySelector('.content-container') || document.getElementById('main-content-area') || document.body; + if (!target) return; + const mo = new MutationObserver((mutations) => { + for (const m of mutations) { + if (m.type === 'childList') { + m.addedNodes && m.addedNodes.forEach((n) => { if (n && n.nodeType === 1) migrateNode(n); }); + } else if (m.type === 'attributes') { + if (m.attributeName && m.attributeName.startsWith('on')) { + // Force re-migration when inline handlers change + migrateNode(m.target, true); + } + } + } + }); + mo.observe(target, { subtree: true, childList: true, attributes: true, attributeFilter: ['onclick','onchange','oninput','onsubmit','onkeydown','onkeyup','onload','onerror'] }); + this._cspGuardObserver = mo; + } catch (_) {} + // Bubble-phase guard for essential interactions only + const essentialEvents = ['click', 'change', 'submit', 'input', 'keydown', 'keyup']; + + const handleEvent = (e) => { + const path = (e.composedPath && e.composedPath()) || []; + for (const el of path) { + if (el && el.nodeType === 1) migrateNode(el); + } + }; + + // Simple debounce for high-frequency events (typing/input) + const makeDebounced = (fn, wait = 60) => { + let t; + return (e) => { + if (t) clearTimeout(t); + t = setTimeout(() => fn(e), wait); + }; + }; + const debouncedInput = makeDebounced(handleEvent, 60); + const debouncedKeydown = makeDebounced(handleEvent, 60); + + essentialEvents.forEach((evt) => { + try { + const handler = (evt === 'input') ? debouncedInput + : (evt === 'keydown') ? debouncedKeydown + : handleEvent; + // Use bubble phase to reduce overhead vs capture + document.addEventListener(evt, handler, false); + } catch (_) {} + }); + } + async applyFeatureVisibilityFromServer() { try { const base = (window.apiClient && window.apiClient.baseUrl) ? window.apiClient.baseUrl : window.location.origin; @@ -77,10 +291,25 @@ class WebUI { ], }; - const hide = (selector) => { const el = document.querySelector(selector); if (el) el.style.display = 'none'; }; + const applyHiddenState = (selector, hidden) => { + const elements = document.querySelectorAll(selector); + elements.forEach((el) => { + if (!el) return; + if (hidden) { + try { el.dataset.capabilityHidden = 'true'; } catch (_) { el.setAttribute('data-capability-hidden', 'true'); } + el.style.display = 'none'; + } else { + if (el.dataset) { + delete el.dataset.capabilityHidden; + } + el.removeAttribute('data-capability-hidden'); + el.style.display = ''; + } + }); + }; Object.entries(capabilityToSelectors).forEach(([cap, selectors]) => { const enabled = !!caps[cap]; - if (!enabled) selectors.forEach(hide); + selectors.forEach((selector) => applyHiddenState(selector, !enabled)); }); } catch (e) { // Non-fatal @@ -88,6 +317,72 @@ class WebUI { } } + initSimpleAdvancedToggle() { + try { + const toggle = document.getElementById('toggle-advanced'); + const label = document.getElementById('advanced-toggle-label'); + if (!toggle || !label) return; + + // Determine default visibility: single-user -> hide advanced by default + let saved = Utils.getFromStorage('show-advanced-panels'); + let defaultShow = true; + try { + if (window.apiClient && (window.apiClient.authMode === 'single-user')) { + defaultShow = false; + } + } catch (_) {} + const show = (typeof saved === 'boolean') ? saved : defaultShow; + toggle.checked = !!show; + + const apply = () => { + const wantShow = !!toggle.checked; + this.setAdvancedPanelsVisible(wantShow); + Utils.saveToStorage('show-advanced-panels', wantShow); + if (!wantShow) { + const allowed = new Set(['simple', 'general']); + const current = this.activeTopTabButton ? this.activeTopTabButton.dataset.toptab : ''; + if (!allowed.has(current || '')) { + const btn = document.getElementById('top-tab-simple'); + if (btn) this.activateTopTab(btn); + } + } + }; + + toggle.addEventListener('change', apply); + apply(); + } catch (e) { /* ignore */ } + } + + setAdvancedPanelsVisible(visible) { + try { + const allowed = new Set(['simple', 'general']); + document.querySelectorAll('.top-tab-button').forEach((btn) => { + const t = btn.dataset.toptab; + if (!t) return; + if (allowed.has(t)) { btn.style.display = ''; return; } + if (btn.getAttribute('data-capability-hidden') === 'true') { + btn.style.display = 'none'; + return; + } + btn.style.display = visible ? '' : 'none'; + }); + // Hide corresponding subtab rows when advanced hidden + const rows = document.querySelectorAll('.sub-tab-row'); + const advancedTargets = new Set(['chat', 'media', 'rag', 'workflows', 'prompts', 'notes', 'watchlists', 'persona', 'personalization', 'evaluations', 'keywords', 'embeddings', 'research', 'chatbooks', 'audio', 'admin', 'mcp']); + rows.forEach((row) => { + const id = row.id || ''; + if (!id) return; + const t = id.endsWith('-subtabs') ? id.slice(0, -8) : id; + if (!advancedTargets.has(t)) return; + if (row.getAttribute('data-capability-hidden') === 'true') { + row.style.display = 'none'; + return; + } + row.style.display = visible ? '' : 'none'; + }); + } catch (e) { /* ignore */ } + } + loadTheme() { const savedTheme = Utils.getFromStorage('theme') || 'light'; this.setTheme(savedTheme); @@ -157,8 +452,29 @@ class WebUI { await this.activateSubTab(firstSubTab); } } else { - // Handle tabs without sub-tabs (like Global Settings) - this.showContent(topTabName); + // Handle tabs without sub-tabs + // Map known top-level tabs to their content IDs + let contentId = topTabName; + if (topTabName === 'simple') { + // The Simple page uses 'tabSimpleLanding' as its content container + contentId = 'tabSimpleLanding'; + // Ensure Simple group scripts are loaded so its initializer is available + try { + if (window.ModuleLoader && typeof window.ModuleLoader.ensureGroupScriptsLoaded === 'function') { + await window.ModuleLoader.ensureGroupScriptsLoaded('simple'); + } + } catch (e) { + console.debug('ModuleLoader failed to load simple group scripts', e); + } + } + + this.showContent(contentId); + + // When showing Simple landing directly, run its initializer and mount shared chat + if (contentId === 'tabSimpleLanding') { + try { if (typeof window.initializeSimpleLanding === 'function') window.initializeSimpleLanding(); } catch (_) {} + try { if (window.SharedChatPortal && typeof window.SharedChatPortal.mount === 'function') window.SharedChatPortal.mount('simple'); } catch (_) {} + } } // Save active tab to storage @@ -192,6 +508,21 @@ class WebUI { // Get content ID and load group const contentId = tabButton.dataset.contentId; const loadGroup = tabButton.dataset.loadGroup; + // Infer group for loader when tabs have no explicit loadGroup + let loaderGroup = loadGroup; + if (!loaderGroup && contentId) { + if (contentId.startsWith('tabSimple')) loaderGroup = 'simple'; + else if (contentId.startsWith('tabChat')) loaderGroup = 'chat'; + else if (contentId.startsWith('tabAudio')) loaderGroup = 'audio'; + else if (contentId.startsWith('tabPrompts')) loaderGroup = 'prompts'; + else if (contentId.startsWith('tabRAG')) loaderGroup = 'rag'; + else if (contentId.startsWith('tabEvals') || contentId.startsWith('tabEvaluations')) loaderGroup = 'evaluations'; + else if (contentId.startsWith('tabKeywords')) loaderGroup = 'keywords'; + else if (contentId.startsWith('tabJobs')) loaderGroup = 'jobs'; + else if (contentId.startsWith('tabMedia')) loaderGroup = 'media'; + else if (contentId.startsWith('tabMaintenance')) loaderGroup = 'maintenance'; + else if (contentId.startsWith('tabAuth')) loaderGroup = 'auth'; + } try { if (contentId) this.activeSubTabButton.setAttribute('aria-controls', contentId); } catch (e) { /* ignore */ } // Load content if not already loaded @@ -217,9 +548,26 @@ class WebUI { } } + // Ensure per-group scripts are loaded on demand (keeps initial bundle small) + try { + if (loaderGroup && window.ModuleLoader && typeof window.ModuleLoader.ensureGroupScriptsLoaded === 'function') { + await window.ModuleLoader.ensureGroupScriptsLoaded(loaderGroup); + } + } catch (e) { + console.debug('ModuleLoader ensureGroupScriptsLoaded failed for', loaderGroup, e); + try { + if (typeof Toast !== 'undefined' && Toast) { + Toast.warning(`Some features may be unavailable for ${loaderGroup} (script load failed)`); + } + } catch (_) {} + } + // Show the content this.showContent(contentId); + // Re-initialize form handlers for any newly injected content (e.g., file inputs) + try { this.initFormHandlers(); } catch (_) {} + // Save active sub-tab to storage Utils.saveToStorage('active-sub-tab', contentId); @@ -231,6 +579,15 @@ class WebUI { if (contentId === 'tabChatCompletions' && typeof initializeChatCompletionsTab === 'function') { initializeChatCompletionsTab(); } + if (contentId === 'tabSimpleLanding' && typeof window.initializeSimpleLanding === 'function') { + window.initializeSimpleLanding(); + } + if (contentId === 'tabChatCompletions' && window.SharedChatPortal && typeof window.SharedChatPortal.mount === 'function') { + window.SharedChatPortal.mount('advanced'); + } + if (contentId === 'tabSimpleLanding' && window.SharedChatPortal && typeof window.SharedChatPortal.mount === 'function') { + window.SharedChatPortal.mount('simple'); + } if (contentId === 'tabWebScrapingIngest' && typeof initializeWebScrapingIngestTab === 'function') { initializeWebScrapingIngestTab(); } @@ -248,13 +605,52 @@ class WebUI { initializeDictionariesTab(); } + if (contentId && (contentId.startsWith('tabAudio') || contentId === 'tabTranscriptSeg') && typeof bindAudioTabHandlers === 'function') { + bindAudioTabHandlers(); + } + if (contentId === 'tabAudioStreaming' && window.initializeAudioStreamingTab) { + try { window.initializeAudioStreamingTab(); } catch (_) {} + } + + // Metrics tab(s) + if (contentId && contentId.startsWith('tabMetrics') && typeof window.initializeMetricsTab === 'function') { + try { window.initializeMetricsTab(contentId); } catch (_) {} + } + + // Flashcards tab + if (contentId && contentId.startsWith('tabFlashcards') && typeof initializeFlashcardsTab === 'function') { + initializeFlashcardsTab(contentId); + } + if (contentId && contentId.startsWith('tabMedia') && typeof bindMediaCommonHandlers === 'function') { + bindMediaCommonHandlers(); + } + + // Vector Stores tab + if (contentId === 'tabVectorStores' && window.initializeVectorStoresTab) { + try { window.initializeVectorStoresTab(); } catch (_) {} + } + + // Personalization tab + if (contentId === 'tabPersonalization' && typeof window.initializePersonalizationTab === 'function') { + window.initializePersonalizationTab(); + } + + // Workflows tab(s) + if (contentId && contentId.startsWith('tabWorkflows') && typeof window.initializeWorkflowsTab === 'function') { + try { window.initializeWorkflowsTab(contentId); } catch (_) {} + } + // Initialize model dropdowns for tabs that have LLM selection // This includes chat, media processing, and evaluation tabs const tabsWithModelSelection = [ 'tabChatCompletions', 'tabCharacterChat', 'tabConversations', 'tabMediaIngestion', 'tabMediaProcessingNoDB', 'tabEvalsOpenAI', 'tabEvalsGEval', - 'tabWebScrapingIngest', 'tabMultiItemAnalysis' + 'tabWebScrapingIngest', 'tabMultiItemAnalysis', + // Flashcards Import panel includes a model selector for generation + 'tabFlashcardsImport', + // Simple landing has model selects + 'tabSimpleLanding' ]; if (tabsWithModelSelection.includes(contentId)) { @@ -277,46 +673,33 @@ class WebUI { } async loadContentGroup(groupName, targetContentId) { - const response = await fetch(`tabs/${groupName}_content.html`); + // Resolve relative to current page to avoid base-path issues + const url = new URL(`tabs/${groupName}_content.html`, window.location.href).toString(); + const response = await fetch(url); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status} for tabs/${groupName}_content.html`); } const html = await response.text(); const mainContentArea = document.getElementById('main-content-area'); - // Ensure inline scripts inside tab HTML are executed + // Sanitize string first so the browser never sees inline handler attributes + // during parsing (avoids CSP script-src-attr violations). + const sanitizedHtml = this.sanitizeInlineHandlersAndScripts(html); const temp = document.createElement('div'); - temp.innerHTML = html; - const scripts = Array.from(temp.querySelectorAll('script')); - scripts.forEach(s => s.parentNode && s.parentNode.removeChild(s)); - mainContentArea.insertAdjacentHTML('beforeend', temp.innerHTML); - // For migrated groups, skip executing inline scripts (no eval) and only load external src scripts. - const MIGRATED_GROUPS = new Set(['keywords', 'jobs', 'rag', 'evaluations', 'admin']); - for (const s of scripts) { - try { - if (s.src) { - const newScript = document.createElement('script'); - if (s.type) newScript.type = s.type; - newScript.src = s.src; - document.body.appendChild(newScript); - document.body.removeChild(newScript); - } else { - // Inline script: only execute for non-migrated groups - if (!MIGRATED_GROUPS.has(groupName)) { - const code = s.textContent || ''; - (0, eval)(code); - } else { - console.debug(`Skipped inline script eval for migrated group: ${groupName}`); - } - } - } catch (e) { - console.error('Failed to execute inline script for group', groupName, e); + temp.innerHTML = sanitizedHtml; + // Convert preserved handler markers to listeners BEFORE insertion + try { + if (window.WebUISanitizer && typeof window.WebUISanitizer.migrateInlineHandlers === 'function') { + window.WebUISanitizer.migrateInlineHandlers(temp); + } else { + this.migrateInlineHandlers(temp); } + } catch (_) {} + // Move the sanitized nodes into the live DOM to preserve bound listeners + while (temp.firstChild) { + mainContentArea.appendChild(temp.firstChild); } - try { - window.__groupScriptEval = window.__groupScriptEval || {}; - window.__groupScriptEval[groupName] = (window.__groupScriptEval[groupName] || 0) + scripts.length; - } catch (e) { /* ignore */ } + // Group-specific scripts are loaded via ModuleLoader when the tab is activated. // Re-initialize form handlers for newly loaded content this.initFormHandlers(); @@ -339,6 +722,33 @@ class WebUI { } } + // Remove all +

@@ -529,6 +502,8 @@

Recent Alerts (Compact)

+ +

GET/PUT/POST @@ -1023,6 +999,8 @@

Recent Notifications

+ + @@ -1387,8 +1366,8 @@

- - + + @@ -1397,6 +1376,8 @@

Status:

---
+ +

GET/POST/DELETE @@ -1448,10 +1430,10 @@

- +
@@ -1463,15 +1445,15 @@

- - + +

- - + +
@@ -1483,6 +1465,8 @@

Status:

---
+ +

GET/PUT @@ -1614,9 +1599,9 @@

- - - + + +
@@ -1626,7 +1611,7 @@

@@ -1635,10 +1620,12 @@

Status:

---
+ +

@@ -1807,9 +1795,9 @@

- - - + + +

Result:

@@ -1823,11 +1811,13 @@

List all configured per-user overrides and load one into the editor above.

- +
+ +

@@ -1890,12 +1881,14 @@

- +

Result:

---
+ + + +
@@ -1989,7 +1986,7 @@

- @@ -2079,6 +2076,8 @@

Audit Log

+ +

@@ -2275,7 +2275,7 @@

}

- @@ -2298,7 +2298,7 @@

- @@ -2364,7 +2364,7 @@

Result:

AuthNZ Security Alerts

Inspect delivery configuration, per-sink thresholds, and recent dispatch health.

-
@@ -2388,56 +2388,7 @@

AuthNZ Security Alerts

Raw Response:

---
-
@@ -2445,16 +2396,16 @@

System Status and Health Checks

Monitor system health and status across all services.

- - - -
@@ -2481,37 +2432,12 @@

Ephemeral Cleanup Settings

- - + +

Result:

---
- + @@ -2555,8 +2481,8 @@

- - + + @@ -2567,8 +2493,8 @@

- - + +

Daily Usage

@@ -2592,12 +2518,14 @@

Manual Aggregate Day

- +
---
+ + diff --git a/tldw_Server_API/WebUI/tabs/audio_content.html b/tldw_Server_API/WebUI/tabs/audio_content.html index eba30fcc3..da0c69f1a 100644 --- a/tldw_Server_API/WebUI/tabs/audio_content.html +++ b/tldw_Server_API/WebUI/tabs/audio_content.html @@ -32,11 +32,20 @@

Provider Status

Kokoro - + +
+ + + First time using TTS? Run the Setup Wizard to install dependencies and configure providers (OpenAI keys, local models like Kokoro). + See the quick guide: Getting Started — STT/TTS. + +
+
@@ -52,7 +61,7 @@

Provider Status

- @@ -87,7 +96,7 @@

Provider Status

-
@@ -146,7 +155,7 @@

Voice Cloning

@@ -154,15 +163,15 @@

Voice Cloning

?
- - - + + + Idle (recording overrides file)
-
+
Recording Settings
@@ -279,6 +287,14 @@

ElevenLabs Options

- +

Response:

---
@@ -211,12 +211,17 @@

Response:

Interactive Chat Interface

-
+

This same interface is now pinned to the Simple tab for quick access.

+
+ +
- +
@@ -247,16 +252,16 @@

Interactive Chat Interface

- +
- - - - - - + + + + + + -
+
@@ -545,7 +551,7 @@

POST /api/v1/characters/import - Import Character from File

- +

cURL Command:

---

Response:

@@ -562,7 +568,7 @@

GET /api/v1/characters/ - List All Characters

- +

cURL Command:

---

Response:

@@ -596,7 +602,7 @@

POST /api/v1/characters/ - Create Character

} image_base64 should be a base64 encoded string of the image, without the 'data:image/...;base64,' prefix. List/Dict fields can be JSON strings or actual lists/dicts if using a client that sends structured JSON. - +

cURL Command:

---

Response:

@@ -609,7 +615,7 @@

GET /api/v1/characters/{character_id} - Get Character

- +

cURL Command:

---

Response:

@@ -631,7 +637,7 @@

PUT /api/v1/characters/{character_id} - Update Character

} To remove an image, pass image_base64: null or an empty string. - +

cURL Command:

---

Response:

@@ -644,7 +650,7 @@

DELETE /api/v1/characters/{character_id} - Delete Character

- +

cURL Command:

---

Response:

@@ -666,7 +672,7 @@

GET /api/v1/characters/{character_id}/export - Export Character

- +

Response:

---
@@ -1072,11 +1078,14 @@

Response:

return a.display_name.localeCompare(b.display_name); }); - // Group models by provider + // Group models by provider, highlighting configured providers and disabling unconfigured ones sortedProviders.forEach(provider => { if (provider.models && provider.models.length > 0) { - const safeGroup = Utils.escapeHtml(String(provider.display_name || provider.name || 'Provider')); - optionsHtml += ``; + const isConfigured = !!provider.is_configured; + const safeGroupBase = Utils.escapeHtml(String(provider.display_name || provider.name || 'Provider')); + const groupLabel = isConfigured ? `✅ ${safeGroupBase}` : `${safeGroupBase} (Not Configured)`; + const labelStyle = isConfigured ? '' : ' style="color:#888"'; + optionsHtml += ``; provider.models.forEach(model => { const value = `${provider.name}/${model}`; @@ -1088,7 +1097,9 @@

Response:

defaultModel = value; } - optionsHtml += ``; + const disabled = isConfigured ? '' : ' disabled'; + const suffix = isConfigured ? '' : ' (requires API key)'; + optionsHtml += ``; }); optionsHtml += '
'; @@ -1213,18 +1224,22 @@

Response:

return a.display_name.localeCompare(b.display_name); }); - // Build groups and options via DOM APIs + // Build groups and options via DOM APIs, with configured highlighting sortedProviders.forEach(provider => { if (provider.models && provider.models.length > 0) { const group = document.createElement('optgroup'); - group.label = String(provider.display_name || provider.name || 'Provider'); + const isConfigured = !!provider.is_configured; + const base = String(provider.display_name || provider.name || 'Provider'); + group.label = isConfigured ? `✅ ${base}` : `${base} (Not Configured)`; + if (!isConfigured) group.setAttribute('style', 'color:#888'); provider.models.forEach(model => { const value = `${provider.name}/${model}`; const isDefault = provider.name === defaultProvider && provider.default_model === model; if (isDefault) defaultModel = value; const opt = document.createElement('option'); opt.value = value; - opt.textContent = `${model}${isDefault ? ' (default)' : ''}`; + opt.textContent = `${model}${isDefault ? ' (default)' : ''}${isConfigured ? '' : ' (requires API key)'}`; + if (!isConfigured) opt.disabled = true; if (isDefault) opt.dataset.default = 'true'; group.appendChild(opt); }); @@ -1561,7 +1576,7 @@

- @@ -1576,7 +1591,7 @@

List all available characters.

- @@ -1596,7 +1611,7 @@

- @@ -1628,7 +1643,7 @@

} - @@ -1648,7 +1663,7 @@

- @@ -1691,7 +1706,7 @@

- @@ -1722,7 +1737,7 @@

- @@ -1749,7 +1764,7 @@

- @@ -1799,7 +1814,7 @@

- @@ -1830,7 +1845,7 @@

} - @@ -1850,7 +1865,7 @@

- @@ -1879,7 +1894,7 @@

- diff --git a/tldw_Server_API/WebUI/tabs/conversations_content.html b/tldw_Server_API/WebUI/tabs/conversations_content.html index 1174676d3..a96bdedb5 100644 --- a/tldw_Server_API/WebUI/tabs/conversations_content.html +++ b/tldw_Server_API/WebUI/tabs/conversations_content.html @@ -121,8 +121,8 @@

- - + + diff --git a/tldw_Server_API/WebUI/tabs/flashcards_content.html b/tldw_Server_API/WebUI/tabs/flashcards_content.html new file mode 100644 index 000000000..186048ab6 --- /dev/null +++ b/tldw_Server_API/WebUI/tabs/flashcards_content.html @@ -0,0 +1,328 @@ + +
+
+

+ FLASHCARDS + Decks & Cards +

+ +
+
+

Decks

+
+ +
+
+ + +
+
+ + +
+
+ +
+
+ + +
+
+ +
+

Cards

+
+
+
+ + +
+
+ + +
+
+
+
+ + +
+
+
+
+ + +
+
+ + +
+
+ + +
+
+
+
+
+ + + +
+ +
+
+
+ + + +
+
+
+
+
+ +
+ + +
+
+
+ +
+ + +
+
+ + If unchecked, tags are replaced. +
+
+
+
+ +

Create Card

+
+ + +
+
+ + +
+
+
+ + +
+
+ + +
+
+
+ + +
+
+ +
+
+
+ +
+

Results

+ +
+

+    
+
+
+ + +
+
+

+ FLASHCARDS + Review +

+ +
+
+
+ + +
+
+ +
+
+
+ + +
+
+ + +
+
+
+ + + + +
+
+
+
+ +
+

Review Result

+

+    
+
+ +
+ + +
+
+

+ FLASHCARDS + Import & Export +

+ +
+
+

Import (TSV/CSV-like)

+
+ + +
+
+
+ + +
+
+ +
+
+
+ +
+
+

+        
+
+
+

Export

+
+ + +
+
+ + +
+
+
+ + +
+
+ +
+
+
+ +
+
+ +
+
+ +
+ +
+

Import (JSON / JSONL file)

+
+ +
+
+ +
+
+

+        
+
+
+ +
+

Create From Selection (Media/Notes)

+
+
+
+ + +
+
+ +
+
+
+
+
+
+ + +
+
+ + +
+
+
+ + +
+
+ + +
+
+ +
+
+

+        
+
+ +
+
+
+
+
diff --git a/tldw_Server_API/WebUI/tabs/llm_providers_content.html b/tldw_Server_API/WebUI/tabs/llm_providers_content.html index 7d2bde815..434af3d71 100644 --- a/tldw_Server_API/WebUI/tabs/llm_providers_content.html +++ b/tldw_Server_API/WebUI/tabs/llm_providers_content.html @@ -5,7 +5,8 @@

GET /api/v1/llm/providers - List LLM Providers

-

Get a list of all configured LLM providers with their available models.

+

Get a list of all available LLM providers and models (catalog + config). + Configured/usable providers are highlighted; others are shown for discovery.

+ diff --git a/tldw_Server_API/WebUI/tabs/metrics_content.html b/tldw_Server_API/WebUI/tabs/metrics_content.html index cbea6e6de..c2f58cb19 100644 --- a/tldw_Server_API/WebUI/tabs/metrics_content.html +++ b/tldw_Server_API/WebUI/tabs/metrics_content.html @@ -5,13 +5,13 @@

System Metrics Dashboard

Real-time monitoring of system performance and health metrics.

- - -
@@ -80,15 +80,15 @@

disconnected

- - - + + +
@@ -126,7 +126,7 @@

Get metrics in Prometheus format for scraping by monitoring systems.

- @@ -156,7 +156,7 @@

- @@ -174,7 +174,7 @@

Check the health status of the metrics collection service.

- @@ -203,7 +203,7 @@

} - @@ -248,9 +248,9 @@

- - - + + +
@@ -668,7 +668,7 @@

Metrics Analysis Tools

- @@ -721,7 +721,7 @@

Alerts Configuration

} - diff --git a/tldw_Server_API/WebUI/tabs/notes_content.html b/tldw_Server_API/WebUI/tabs/notes_content.html index 125d7e17a..62e0cae41 100644 --- a/tldw_Server_API/WebUI/tabs/notes_content.html +++ b/tldw_Server_API/WebUI/tabs/notes_content.html @@ -154,6 +154,9 @@

cURL Command:

Response:

---
+
+ +
diff --git a/tldw_Server_API/WebUI/tabs/personalization_content.html b/tldw_Server_API/WebUI/tabs/personalization_content.html index 3991dfb25..96d11af56 100644 --- a/tldw_Server_API/WebUI/tabs/personalization_content.html +++ b/tldw_Server_API/WebUI/tabs/personalization_content.html @@ -5,9 +5,9 @@

Personalization Dashboard (Preview)

Opt-in and basic profile. This preview allows viewing and tweaking weights and adding a memory.

- - - + + +

@@ -18,11 +18,7 @@ 

Weights

- +

Add Memory

@@ -30,13 +26,8 @@

Add Memory

- - + +

diff --git a/tldw_Server_API/WebUI/tabs/vector_stores_content.html b/tldw_Server_API/WebUI/tabs/vector_stores_content.html
index 123e57f8e..80436db1b 100644
--- a/tldw_Server_API/WebUI/tabs/vector_stores_content.html
+++ b/tldw_Server_API/WebUI/tabs/vector_stores_content.html
@@ -22,18 +22,18 @@ 

Create Vector Store

- +

Vector Stores

- +
    - + - +
    @@ -53,9 +53,9 @@

    Edit Store

    - - - + + + @@ -90,14 +90,14 @@

    Create/Update From Media

    - + - - - + + +
    @@ -132,22 +132,22 @@

    Vectors

    - + - +
    - +
    - + - - + +
    @@ -158,21 +158,21 @@

    Vectors

    - +
    - +
    - +
    - +
    @@ -187,14 +187,14 @@

    Admin (Index & Tuning)

    - - - + + +
    - + Note: ef_search applies to pgvector only; ignored for Chroma.
    @@ -208,7 +208,7 @@

    Admin (Index & Tuning)

    - +
    @@ -667,7 +667,7 @@

    Admin: Discover Users

    - + @@ -689,7 +689,7 @@

    Admin: Discover Users

    - +
    ---
    diff --git a/tldw_Server_API/WebUI/tabs/webscraping_content.html b/tldw_Server_API/WebUI/tabs/webscraping_content.html index 7ab3cb903..607308f1f 100644 --- a/tldw_Server_API/WebUI/tabs/webscraping_content.html +++ b/tldw_Server_API/WebUI/tabs/webscraping_content.html @@ -234,8 +234,8 @@

    Other Options

    - - + +
    @@ -309,7 +309,7 @@

    -

    cURL Command:

    @@ -348,7 +348,7 @@

    -

    cURL Command:

    @@ -367,7 +367,7 @@

    Get web scraping service status and active jobs

    - @@ -388,9 +388,7 @@

    - +
    
         
    @@ -407,9 +405,7 @@ 

    - +
    
         
    @@ -426,9 +422,7 @@ 

    - +
    
         
    @@ -443,9 +437,7 @@ 

    Initialize the web scraping service.

    No request body required. Uses server configuration. - +
    
         
    @@ -458,9 +450,7 @@ 

    Shutdown the web scraping service.

    No request body required. Shuts down gracefully. - +
    
         
    @@ -479,9 +469,7 @@ 

    - +
    
         
    @@ -512,9 +500,7 @@ 

    ] - +
    
         
    @@ -533,21 +519,8 @@ 

    - +
    
         
     
    -
    -
    diff --git a/tldw_Server_API/WebUI/tabs/workflows_content.html b/tldw_Server_API/WebUI/tabs/workflows_content.html
    index e556755dc..9f90c0613 100644
    --- a/tldw_Server_API/WebUI/tabs/workflows_content.html
    +++ b/tldw_Server_API/WebUI/tabs/workflows_content.html
    @@ -271,57 +271,57 @@ 

    Configuration Cheat Sheet

    Read-only snapshot of effective Workflows settings
    - +
    - +
    - + - - - - - - - - + + + + + + + + - +
    -