AllenInstitute · BrianWhitneyAI · May 21, 2026 · Apr 9, 2026 · Apr 15, 2026 · Apr 15, 2026
@@ -0,0 +1,113 @@
+name: Query Benchmark
+
+on:
+  workflow_dispatch:
+    inputs:
+      base_branch:
+        description: "Base branch to compare against"
+        required: false
+        type: string
+        default: "main"
+      compare_branch:
+        description: "Branch to benchmark"
+        required: true
+        type: string
+      iterations:
+        description: "Timed iterations per task (default 5)"
+        required: false
+        type: string
+        default: "5"
+      warmup:
+        description: "Warmup rounds before timing (default 1)"
+        required: false
+        type: string
+        default: "1"
+
+permissions:
+  contents: read
+
+jobs:
+  benchmark:
+    name: "Regression (${{ github.event.inputs.base_branch }} vs ${{ github.event.inputs.compare_branch }})"
+    runs-on: ubuntu-latest
+    # Both branches run sequentially in a single job on the same VM. This is intentional:
+    # if each branch ran in its own job, GitHub could schedule them on different physical
+    # machines with different CPU speeds, cache sizes, or competing workloads. A ~15%
+    # hardware variance between VMs would mask the small regressions we actually care about.
+    # Running back-to-back on the same VM ensures both measurements share the same hardware
+    # baseline, so deltas reflect code differences only.
+    # 180 minutes: fixture download + full task suite (including change_grouping on 10m) × 2 branches
+    timeout-minutes: 180
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.compare_branch }}
+
+      # Fixtures cached by version; downloaded once, reused by both branch runs.
+      # Must come after checkout so git clean -ffdx does not wipe them.
+      - name: Cache benchmark fixtures
+        id: fixture-cache
+        uses: actions/cache@v4
+        with:
+          path: packages/web/fixtures
+          key: benchmark-fixtures-v1
+
+      - name: Download benchmark fixtures
+        if: steps.fixture-cache.outputs.cache-hit != 'true'
+        run: |
+          mkdir -p packages/web/fixtures
+          BASE=https://staging-biofile-finder-datasets.s3.us-west-2.amazonaws.com/benchmark-fixtures/v1
+          curl -fL "$BASE/synthetic-100k.parquet" -o packages/web/fixtures/synthetic-100k.parquet
+          curl -fL "$BASE/synthetic-1m.parquet"   -o packages/web/fixtures/synthetic-1m.parquet
+          curl -fL "$BASE/synthetic-10m.parquet"  -o packages/web/fixtures/synthetic-10m.parquet
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+          cache: "npm"
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Install Playwright Chromium
+        run: npx playwright install chromium --with-deps
+        working-directory: packages/web
+
+      - name: Run benchmark (${{ github.event.inputs.compare_branch }})
+        run: node scripts/run-regression.js --iterations ${{ github.event.inputs.iterations }} --warmup ${{ github.event.inputs.warmup }}
+        working-directory: packages/web
+        env:
+          BENCHMARK_BRANCH: ${{ github.event.inputs.compare_branch }}
+
+      - name: Save compare branch results
+        run: mv packages/web/benchmark-results-*.json /tmp/benchmark-compare.json
+
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.base_branch }}
+          clean: false
+
+      - name: Install dependencies (base branch)
+        run: npm ci
+
+      - name: Run benchmark (${{ github.event.inputs.base_branch }})
+        run: node scripts/run-regression.js --skip-build --iterations ${{ github.event.inputs.iterations }} --warmup ${{ github.event.inputs.warmup }}
+        working-directory: packages/web
+        env:
+          BENCHMARK_BRANCH: ${{ github.event.inputs.base_branch }}
+
+      - name: Generate comparison
+        run: |
+          BASE_FILE=$(ls packages/web/benchmark-results-*.json | head -1)
+          node packages/web/scripts/compare-results.js "$BASE_FILE" /tmp/benchmark-compare.json >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Upload results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-results
+          path: |
+            packages/web/benchmark-results-*.json
+            /tmp/benchmark-compare.json
+          retention-days: 7
@@ -14,3 +14,8 @@ build
 *.tgz
 .env
 mise.toml
+
+# Benchmark runner output — generated by CI, not source
+packages/web/benchmark-results*.json
+# Generated parquet fixtures — produce with scripts/generate-fixtures.py
+packages/web/fixtures/
@@ -0,0 +1,118 @@
+Query benchmarking
+==================
+
+Three tools for measuring and monitoring DuckDB-WASM query performance.
+
+---
+
+Tool 1 — Local benchmark runner
+--------------------------------
+
+Runs the full task suite in headless Chromium against parquet fixtures, prints a p50/p95 timing table, and writes a result JSON for later comparison.
+
+**First-time setup**
+
+```bash
+cd packages/web
+npx playwright install chromium --with-deps
+```
+
+**Download local fixtures** (one time; ~500 MB total)
+
+```bash
+BASE=https://staging-biofile-finder-datasets.s3.us-west-2.amazonaws.com/benchmark-fixtures/v1
+mkdir -p packages/web/fixtures
+curl -fL "$BASE/synthetic-100k.parquet" -o packages/web/fixtures/synthetic-100k.parquet
+curl -fL "$BASE/synthetic-1m.parquet"   -o packages/web/fixtures/synthetic-1m.parquet
+curl -fL "$BASE/synthetic-10m.parquet"  -o packages/web/fixtures/synthetic-10m.parquet
+```
+
+**Run against local fixtures**
+
+```bash
+# All scales
+npm run benchmark --prefix packages/web -- --local
+
+# Single scale
+npm run benchmark --prefix packages/web -- --local --scale 100k
+
+# Override iteration/warmup counts
+npm run benchmark --prefix packages/web -- --local --scale 1m --iterations 10 --warmup 3
+```
+
+**Run against remote S3 parquets**
+
+```bash
+BENCHMARK_REAL_1M_URL=s3://your-bucket/file.parquet \
+  npm run benchmark --prefix packages/web -- --scale 1m
+```
+
+**Compare two result files**
+
+```bash
+npm run benchmark:compare --prefix packages/web -- \
+  packages/web/benchmark-results-main.json \
+  packages/web/benchmark-results-local.json
+```
+
+This prints a Markdown table with p50 deltas and regression/improvement badges (⚠️ ≥25% slower, ❌ ≥50% slower, ✅ ≥25% faster). Badges are suppressed for queries where either branch is under 500ms — percentage deltas on fast queries are noise.
+
+**Flags**
+
+| Flag | Description |
+|---|---|
+| `--local` | Use fixtures from `packages/web/fixtures/` instead of S3 URLs |
+| `--scale 100k\|1m\|10m` | Run a single fixture size |
+| `--full` | Run all scales with both cloud and local sources side-by-side |
+| `--iterations N` | Timed iterations per task (default 5) |
+| `--warmup N` | Warmup rounds before timing (default 1) |
+| `--skip-build` | Skip the webpack build step |
+| `--chromium` | Use Playwright's bundled Chromium instead of system Chrome |
+
+---
+
+Tool 2 — CI regression workflow
+---------------------------------
+
+`benchmark.yml` is a `workflow_dispatch` workflow that benchmarks two branches sequentially on the same VM and posts a Markdown comparison table to the workflow summary.
+
+Both branches run on the same machine to eliminate hardware variance — a ~15% CPU speed difference between VMs would mask the small regressions the tool is designed to catch.
+
+**Trigger it** from the Actions tab: select **Query Benchmark**, enter a `compare_branch` (your PR branch) and optionally override `base_branch` (default: `main`), `iterations`, and `warmup`.
+
+The workflow:
+1. Checks out the compare branch and downloads fixtures from S3 (cached by version)
+2. Runs `run-regression.js` → writes `benchmark-results-<compare>.json`
+3. Checks out the base branch (without wiping fixtures)
+4. Runs `run-regression.js` → writes `benchmark-results-<base>.json`
+5. Runs `compare-results.js` → posts the Markdown table to the step summary
+
+---
+
+Tool 3 — Dev console query timing
+-----------------------------------
+
+Enables per-query DuckDB timing in the running app without any build step.
+
+**Enable**
+
+In the browser DevTools console:
+
+```js
+localStorage.setItem("bff_query_timing", "1")
+```
+
+Then reload the page. Each DuckDB query will log its elapsed time to the console as it runs:
+
+```
+[duckdb] 12.3ms — [fetchAnnotations] SELECT DISTINCT ...
+[duckdb]  4.1ms — [getFiles] SELECT * FROM ...
+```
+
+**Disable**
+
+```js
+localStorage.removeItem("bff_query_timing")
+```
+
+Then reload.
@@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8" />
+    <title>BFF Benchmark</title>
+</head>
+<body>
+    <p id="status">Starting...</p>
+</body>
+</html>