diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 000000000..4a3d8e957
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,113 @@
+name: Query Benchmark
+
+on:
+  workflow_dispatch:
+    inputs:
+      base_branch:
+        description: "Base branch to compare against"
+        required: false
+        type: string
+        default: "main"
+      compare_branch:
+        description: "Branch to benchmark"
+        required: true
+        type: string
+      iterations:
+        description: "Timed iterations per task (default 5)"
+        required: false
+        type: string
+        default: "5"
+      warmup:
+        description: "Warmup rounds before timing (default 1)"
+        required: false
+        type: string
+        default: "1"
+
+permissions:
+  contents: read
+
+jobs:
+  benchmark:
+    name: "Regression (${{ github.event.inputs.base_branch }} vs ${{ github.event.inputs.compare_branch }})"
+    runs-on: ubuntu-latest
+    # Both branches run sequentially in a single job on the same VM. This is intentional:
+    # if each branch ran in its own job, GitHub could schedule them on different physical
+    # machines with different CPU speeds, cache sizes, or competing workloads. A ~15%
+    # hardware variance between VMs would mask the small regressions we actually care about.
+    # Running back-to-back on the same VM ensures both measurements share the same hardware
+    # baseline, so deltas reflect code differences only.
+    # 180 minutes: fixture download + full task suite (including change_grouping on 10m) × 2 branches
+    timeout-minutes: 180
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.compare_branch }}
+
+      # Fixtures cached by version; downloaded once, reused by both branch runs.
+      # Must come after checkout so git clean -ffdx does not wipe them.
+      - name: Cache benchmark fixtures
+        id: fixture-cache
+        uses: actions/cache@v4
+        with:
+          path: packages/web/fixtures
+          key: benchmark-fixtures-v1
+
+      - name: Download benchmark fixtures
+        if: steps.fixture-cache.outputs.cache-hit != 'true'
+        run: |
+          mkdir -p packages/web/fixtures
+          BASE=https://staging-biofile-finder-datasets.s3.us-west-2.amazonaws.com/benchmark-fixtures/v1
+          curl -fL "$BASE/synthetic-100k.parquet" -o packages/web/fixtures/synthetic-100k.parquet
+          curl -fL "$BASE/synthetic-1m.parquet"   -o packages/web/fixtures/synthetic-1m.parquet
+          curl -fL "$BASE/synthetic-10m.parquet"  -o packages/web/fixtures/synthetic-10m.parquet
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+          cache: "npm"
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Install Playwright Chromium
+        run: npx playwright install chromium --with-deps
+        working-directory: packages/web
+
+      - name: Run benchmark (${{ github.event.inputs.compare_branch }})
+        run: node scripts/run-regression.js --iterations ${{ github.event.inputs.iterations }} --warmup ${{ github.event.inputs.warmup }}
+        working-directory: packages/web
+        env:
+          BENCHMARK_BRANCH: ${{ github.event.inputs.compare_branch }}
+
+      - name: Save compare branch results
+        run: mv packages/web/benchmark-results-*.json /tmp/benchmark-compare.json
+
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.base_branch }}
+          clean: false
+
+      - name: Install dependencies (base branch)
+        run: npm ci
+
+      - name: Run benchmark (${{ github.event.inputs.base_branch }})
+        run: node scripts/run-regression.js --skip-build --iterations ${{ github.event.inputs.iterations }} --warmup ${{ github.event.inputs.warmup }}
+        working-directory: packages/web
+        env:
+          BENCHMARK_BRANCH: ${{ github.event.inputs.base_branch }}
+
+      - name: Generate comparison
+        run: |
+          BASE_FILE=$(ls packages/web/benchmark-results-*.json | head -1)
+          node packages/web/scripts/compare-results.js "$BASE_FILE" /tmp/benchmark-compare.json >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Upload results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-results
+          path: |
+            packages/web/benchmark-results-*.json
+            /tmp/benchmark-compare.json
+          retention-days: 7
diff --git a/.gitignore b/.gitignore
index b718d871a..86e656de9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,8 @@ build
 *.tgz
 .env
 mise.toml
+
+# Benchmark runner output — generated by CI, not source
+packages/web/benchmark-results*.json
+# Generated parquet fixtures — produce with scripts/generate-fixtures.py
+packages/web/fixtures/
diff --git a/dev-docs/07-query-benchmarking.md b/dev-docs/07-query-benchmarking.md
new file mode 100644
index 000000000..66c79d513
--- /dev/null
+++ b/dev-docs/07-query-benchmarking.md
@@ -0,0 +1,118 @@
+Query benchmarking
+==================
+
+Three tools for measuring and monitoring DuckDB-WASM query performance.
+
+---
+
+Tool 1 — Local benchmark runner
+--------------------------------
+
+Runs the full task suite in headless Chromium against parquet fixtures, prints a p50/p95 timing table, and writes a result JSON for later comparison.
+
+**First-time setup**
+
+```bash
+cd packages/web
+npx playwright install chromium --with-deps
+```
+
+**Download local fixtures** (one time; ~500 MB total)
+
+```bash
+BASE=https://staging-biofile-finder-datasets.s3.us-west-2.amazonaws.com/benchmark-fixtures/v1
+mkdir -p packages/web/fixtures
+curl -fL "$BASE/synthetic-100k.parquet" -o packages/web/fixtures/synthetic-100k.parquet
+curl -fL "$BASE/synthetic-1m.parquet"   -o packages/web/fixtures/synthetic-1m.parquet
+curl -fL "$BASE/synthetic-10m.parquet"  -o packages/web/fixtures/synthetic-10m.parquet
+```
+
+**Run against local fixtures**
+
+```bash
+# All scales
+npm run benchmark --prefix packages/web -- --local
+
+# Single scale
+npm run benchmark --prefix packages/web -- --local --scale 100k
+
+# Override iteration/warmup counts
+npm run benchmark --prefix packages/web -- --local --scale 1m --iterations 10 --warmup 3
+```
+
+**Run against remote S3 parquets**
+
+```bash
+BENCHMARK_REAL_1M_URL=s3://your-bucket/file.parquet \
+  npm run benchmark --prefix packages/web -- --scale 1m
+```
+
+**Compare two result files**
+
+```bash
+npm run benchmark:compare --prefix packages/web -- \
+  packages/web/benchmark-results-main.json \
+  packages/web/benchmark-results-local.json
+```
+
+This prints a Markdown table with p50 deltas and regression/improvement badges (⚠️ ≥25% slower, ❌ ≥50% slower, ✅ ≥25% faster). Badges are suppressed for queries where either branch is under 500ms — percentage deltas on fast queries are noise.
+
+**Flags**
+
+| Flag | Description |
+|---|---|
+| `--local` | Use fixtures from `packages/web/fixtures/` instead of S3 URLs |
+| `--scale 100k\|1m\|10m` | Run a single fixture size |
+| `--full` | Run all scales with both cloud and local sources side-by-side |
+| `--iterations N` | Timed iterations per task (default 5) |
+| `--warmup N` | Warmup rounds before timing (default 1) |
+| `--skip-build` | Skip the webpack build step |
+| `--chromium` | Use Playwright's bundled Chromium instead of system Chrome |
+
+---
+
+Tool 2 — CI regression workflow
+---------------------------------
+
+`benchmark.yml` is a `workflow_dispatch` workflow that benchmarks two branches sequentially on the same VM and posts a Markdown comparison table to the workflow summary.
+
+Both branches run on the same machine to eliminate hardware variance — a ~15% CPU speed difference between VMs would mask the small regressions the tool is designed to catch.
+
+**Trigger it** from the Actions tab: select **Query Benchmark**, enter a `compare_branch` (your PR branch) and optionally override `base_branch` (default: `main`), `iterations`, and `warmup`.
+
+The workflow:
+1. Checks out the compare branch and downloads fixtures from S3 (cached by version)
+2. Runs `run-regression.js` → writes `benchmark-results-<compare>.json`
+3. Checks out the base branch (without wiping fixtures)
+4. Runs `run-regression.js` → writes `benchmark-results-<base>.json`
+5. Runs `compare-results.js` → posts the Markdown table to the step summary
+
+---
+
+Tool 3 — Dev console query timing
+-----------------------------------
+
+Enables per-query DuckDB timing in the running app without any build step.
+
+**Enable**
+
+In the browser DevTools console:
+
+```js
+localStorage.setItem("bff_query_timing", "1")
+```
+
+Then reload the page. Each DuckDB query will log its elapsed time to the console as it runs:
+
+```
+[duckdb] 12.3ms — [fetchAnnotations] SELECT DISTINCT ...
+[duckdb]  4.1ms — [getFiles] SELECT * FROM ...
+```
+
+**Disable**
+
+```js
+localStorage.removeItem("bff_query_timing")
+```
+
+Then reload.
diff --git a/package-lock.json b/package-lock.json
index 1494be715..ff3c50ca1 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -16140,6 +16140,53 @@
         "node": ">=4"
       }
     },
+    "node_modules/playwright": {
+      "version": "1.59.1",
+      "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.59.1.tgz",
+      "integrity": "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "playwright-core": "1.59.1"
+      },
+      "bin": {
+        "playwright": "cli.js"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "optionalDependencies": {
+        "fsevents": "2.3.2"
+      }
+    },
+    "node_modules/playwright-core": {
+      "version": "1.59.1",
+      "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.59.1.tgz",
+      "integrity": "sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "bin": {
+        "playwright-core": "cli.js"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/playwright/node_modules/fsevents": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
+      "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+      }
+    },
     "node_modules/please-upgrade-node": {
       "version": "3.2.0",
       "resolved": "https://registry.npmjs.org/please-upgrade-node/-/please-upgrade-node-3.2.0.tgz",
@@ -21516,6 +21563,7 @@
         "fork-ts-checker-webpack-plugin": "6.x",
         "html-webpack-plugin": "5.x",
         "mini-css-extract-plugin": "2.x",
+        "playwright": "^1.59.1",
         "postcss-loader": "6.x",
         "postcss-preset-env": "7.x",
         "react-svg-loader": "^3.0.3",
diff --git a/packages/web/benchmark/index.html b/packages/web/benchmark/index.html
new file mode 100644
index 000000000..e56cde507
--- /dev/null
+++ b/packages/web/benchmark/index.html
@@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8" />
+    <title>BFF Benchmark</title>
+</head>
+<body>
+    <p id="status">Starting...</p>
+</body>
+</html>
diff --git a/packages/web/benchmark/src/index.ts b/packages/web/benchmark/src/index.ts
new file mode 100644
index 000000000..baba1d835
--- /dev/null
+++ b/packages/web/benchmark/src/index.ts
@@ -0,0 +1,191 @@
+import { BENCHMARK_TASKS, createServices } from "./tasks";
+import { BenchmarkConfig, BenchmarkResults, QueryResult, SourceResult } from "./types";
+import DatabaseServiceWebWorker from "../../src/services/DatabaseServiceWeb/duckdb-worker.worker";
+
+const DEFAULT_ITERATIONS = 5;
+const DEFAULT_WARMUP_ROUNDS = 1;
+
+// Updates the #status element in the benchmark HTML page and mirrors to console.
+// The page can run headlessly in CI (Playwright), so the console log is the
+// only visible progress signal when there is no browser UI to observe.
+function setStatus(msg: string) {
+    const el = document.getElementById("status");
+    if (el) el.textContent = msg;
+    console.log("[benchmark]", msg);
+}
+
+// Nearest-rank percentile over a pre-sorted array. Used to report p50 and p95
+// across timed iterations — p95 surfaces occasional slow outliers (GC pauses,
+// DuckDB cache misses) that the median would hide.
+function percentile(sorted: number[], p: number): number {
+    const idx = Math.ceil((p / 100) * sorted.length) - 1;
+    return sorted[Math.max(0, idx)];
+}
+
+// Fisher-Yates shuffle — randomizes task order each timed iteration so that a
+// consistently slow task doesn't inflate the times of everything that follows it
+// (DuckDB buffer pool and OS page cache warm up over repeated runs).
+function shuffle<T>(arr: T[]): T[] {
+    const out = [...arr];
+    for (let i = out.length - 1; i > 0; i--) {
+        const j = Math.floor(Math.random() * (i + 1));
+        [out[i], out[j]] = [out[j], out[i]];
+    }
+    return out;
+}
+
+/**
+ * Run the full task suite against a registered source using round-robin timing:
+ * warmup rounds first, then timed rounds with shuffled task order.
+ *
+ * Tasks are called at the service layer (fetchValues, getFiles, etc.) — the same
+ * methods the app calls in response to user interactions.
+ *
+ * Timing strategy per task (see BenchmarkTask.timing):
+ *   "worker" (default): sums DuckDB-internal query times, excluding Arrow→JS conversion
+ *     and JSON serialization. Accurate for single-query tasks and tasks with large result sets.
+ *   "wall-clock": measures elapsed time at the task level. Used for compound tasks that fire
+ *     parallel queries — worker timings for those give O(N²) due to cumulative wait time.
+ *
+ * Returns p50/p95/p99 across timed iterations for each task.
+ */
+async function benchmarkSource(
+    service: DatabaseServiceWebWorker,
+    sourceName: string,
+    iterations: number,
+    warmupRounds: number
+): Promise<QueryResult[]> {
+    const { annotationSvc, fileSvc } = createServices(service, sourceName);
+
+    service.enableQueryTiming();
+
+    // Warmup ensures DuckDB's buffer pool, query planner, and V8 JIT are in a
+    // stable state before timing begins. Without it, the first few iterations
+    // of every task reflect cold-start overhead rather than steady-state cost.
+    setStatus(`Warming up ${sourceName} (${warmupRounds} rounds)...`);
+    for (let w = 0; w < warmupRounds; w++) {
+        for (const task of BENCHMARK_TASKS) {
+            service.clearTimings();
+            await task.run(annotationSvc, fileSvc);
+        }
+    }
+
+    const timingsMap = new Map<string, number[]>(BENCHMARK_TASKS.map(({ name }) => [name, []]));
+
+    for (let i = 0; i < iterations; i++) {
+        setStatus(`Timing ${sourceName} — iteration ${i + 1}/${iterations}...`);
+        for (const task of shuffle(BENCHMARK_TASKS)) {
+            if (task.resetAnnotationCache) {
+                service.clearAnnotationCache(sourceName);
+            }
+            const timings = timingsMap.get(task.name) ?? [];
+            timingsMap.set(task.name, timings);
+            if (task.timing === "wall-clock") {
+                const start = performance.now();
+                await task.run(annotationSvc, fileSvc);
+                timings.push(performance.now() - start);
+            } else {
+                service.clearTimings();
+                await task.run(annotationSvc, fileSvc);
+                timings.push(service.sumTimings());
+            }
+        }
+    }
+
+    return BENCHMARK_TASKS.map(({ name }) => {
+        const timings = [...(timingsMap.get(name) ?? [])].sort((a, b) => a - b);
+        return {
+            name,
+            timings,
+            p50: percentile(timings, 50),
+            p95: percentile(timings, 95),
+            p99: percentile(timings, 99),
+        };
+    });
+}
+
+async function main() {
+    const config: BenchmarkConfig = (window as any).__benchmarkConfig;
+    if (!config?.sources?.length) {
+        throw new Error("No benchmark config found. Runner must inject window.__benchmarkConfig.");
+    }
+    const iterations = config.iterations ?? DEFAULT_ITERATIONS;
+    const warmupRounds = config.warmupRounds ?? DEFAULT_WARMUP_ROUNDS;
+
+    setStatus("Initializing DuckDB-WASM...");
+    const initStart = performance.now();
+    const service = new DatabaseServiceWebWorker();
+    await service.initialize();
+    const initTimeMs = performance.now() - initStart;
+    setStatus(`DuckDB initialized in ${initTimeMs.toFixed(0)}ms.`);
+
+    // DuckDB reads parquet differently depending on how the file is registered:
+    // BROWSER_FILEREADER (local File object) skips all HTTP overhead; URL registration
+    // uses HTTP range requests, which adds per-request I/O latency and makes sort-heavy
+    // queries appear slower. Both paths must be consistent across compared runs or the
+    // delta reflects I/O differences, not code differences.
+    //
+    // Playwright injects File objects via setInputFiles and resolves __resolveLocalFiles
+    // directly. The 5-second timeout is a fallback for running the page manually outside
+    // of Playwright — in CI this promise is always resolved before the timeout fires.
+    const localFiles: Record<string, File> = await new Promise<Record<string, File>>((resolve) => {
+        (window as any).__resolveLocalFiles = resolve;
+        (window as any).__localFilesRequested = true;
+        setTimeout(() => resolve({}), 5000);
+    });
+
+    // Absorb DuckDB's one-time parquet cold-start cost (scanner JIT, VFS setup,
+    // buffer pool init) before timing any real source registrations. Without this,
+    // the first source always shows inflated registration time regardless of file size.
+    if (config.sources.length > 0) {
+        const warmup = config.sources[0];
+        const warmupFile = localFiles[warmup.label];
+        await service.prepareDataSources(
+            [{ name: "__bff_warmup__", type: "parquet", uri: warmupFile ?? warmup.url }],
+            /* skipNormalization */ true
+        );
+        await service.execute('DROP VIEW IF EXISTS "__bff_warmup__"');
+    }
+
+    const sources: SourceResult[] = [];
+
+    for (const source of config.sources) {
+        setStatus(`Registering ${source.label} (${source.url})...`);
+
+        const regStart = performance.now();
+        const localFile = localFiles[source.label];
+        if (!localFile) {
+            console.warn(
+                `[benchmark] No local file for ${source.label} — falling back to HTTP reads; timings will differ from local-file runs`
+            );
+        }
+        await service.prepareDataSources(
+            [{ name: source.label, type: "parquet", uri: localFile ?? source.url }],
+            /* skipNormalization */ true
+        );
+        const registrationMs = performance.now() - regStart;
+
+        const queries = await benchmarkSource(service, source.label, iterations, warmupRounds);
+        sources.push({ label: source.label, registrationMs, queries });
+
+        await service.execute(`DROP VIEW IF EXISTS "${source.label}"`);
+    }
+
+    setStatus("Done.");
+
+    const results: BenchmarkResults = {
+        timestamp: new Date().toISOString(),
+        commit: "unknown",
+        branch: "unknown",
+        initTimeMs,
+        sources,
+    };
+
+    (window as any).__benchmarkResults = results;
+}
+
+main().catch((err: Error) => {
+    console.error("[benchmark] Fatal error:", err);
+    setStatus(`Error: ${err.message}`);
+    (window as any).__benchmarkError = err.message;
+});
diff --git a/packages/web/benchmark/src/tasks.ts b/packages/web/benchmark/src/tasks.ts
new file mode 100644
index 000000000..53c8eb96c
--- /dev/null
+++ b/packages/web/benchmark/src/tasks.ts
@@ -0,0 +1,178 @@
+/**
+ * Benchmark task definitions for BFF's DuckDB-WASM queries.
+ *
+ * Design rationale
+ * ----------------
+ * Tasks are defined at the **service layer** rather than as raw SQL strings. Calling through
+ * DatabaseAnnotationService / DatabaseFileService exercises the full production code path:
+ * SQL construction, DuckDB query execution, and result mapping.
+ *
+ * Tasks are deliberately **granular** (one service call each) so regressions can be pinpointed
+ * to a specific query type. A high-level user action (e.g. "add a data source and see the
+ * first files") is a composition of several tasks here; the individual timings make it easier
+ * to identify which step regressed.
+ *
+ * Maintenance
+ * -----------
+ * Keep BENCHMARK_TASKS in sync with user-facing operations. When a new query type is added
+ * to a service, add a corresponding task. When a query is removed or restructured, update
+ * or drop the task so the list stays an accurate reflection of what users wait on.
+ *
+ * Fixture columns referenced by name (e.g. "cell_line", "focus_score") must exist in the
+ * generated fixture — see packages/web/benchmark/src/fixture-generator.ts.
+ */
+import DatabaseAnnotationService from "../../../core/services/AnnotationService/DatabaseAnnotationService";
+import DatabaseFileService from "../../../core/services/FileService/DatabaseFileService";
+import FileDownloadServiceNoop from "../../../core/services/FileDownloadService/FileDownloadServiceNoop";
+import DatabaseServiceWebWorker from "../../src/services/DatabaseServiceWeb/duckdb-worker.worker";
+import FileSet from "../../../core/entity/FileSet";
+import FileFilter, { FilterType } from "../../../core/entity/FileFilter";
+import { AnnotationType } from "../../../core/entity/AnnotationFormatter";
+import ExcludeFilter from "../../../core/entity/FileFilter/ExcludeFilter";
+import FileSort, { SortOrder } from "../../../core/entity/FileSort";
+
+export interface BenchmarkTask {
+    name: string;
+    /** Timing strategy — see benchmarkSource in index.ts for details. Default: "worker". */
+    timing?: "worker" | "wall-clock";
+    /**
+     * If true, the annotation cache is cleared before each timed iteration so the task
+     * always issues a real DuckDB query. Without this, warmup populates the cache and
+     * timed iterations return immediately, reporting 0ms.
+     */
+    resetAnnotationCache?: boolean;
+    run: (
+        annotationSvc: DatabaseAnnotationService,
+        fileSvc: DatabaseFileService
+    ) => Promise<unknown>;
+}
+
+export const BENCHMARK_TASKS: BenchmarkTask[] = [
+    // App startup: loads the column list to populate the annotation picker.
+    {
+        name: "fetch_annotations",
+        resetAnnotationCache: true,
+        run: (a) => a.fetchAnnotations(),
+    },
+
+    // Opening a filter picker — three cardinality tiers because query time varies
+    // significantly depending on how many distinct values DuckDB must collect.
+    // cell_line:     5 distinct values  (low)
+    // experiment_id: 100 distinct values (medium)
+    // focus_score:   ~unique per row     (high — near-continuous float)
+    {
+        name: "open_filter_picker_low_cardinality",
+        run: (a) => a.fetchValues("cell_line"),
+    },
+    {
+        name: "open_filter_picker_medium_cardinality",
+        run: (a) => a.fetchValues("experiment_id"),
+    },
+    {
+        name: "open_filter_picker_high_cardinality",
+        run: (a) => a.fetchValues("focus_score"),
+    },
+
+    // File list: default view, no filter or sort.
+    {
+        name: "browse_file_list",
+        run: (_, f) => f.getFiles({ fileSet: new FileSet(), from: 0, limit: 100 }),
+    },
+
+    // File list sorted by File Size with two limit sizes. Emulates different zoom levels.
+    {
+        name: "sort_file_list",
+        run: (_, f) =>
+            f.getFiles({
+                fileSet: new FileSet({ sort: new FileSort("File Size", SortOrder.DESC) }),
+                from: 0,
+                limit: 50,
+            }),
+    },
+    {
+        name: "sort_file_list_large_page",
+        run: (_, f) =>
+            f.getFiles({
+                fileSet: new FileSet({ sort: new FileSort("File Size", SortOrder.DESC) }),
+                from: 0,
+                limit: 100,
+            }),
+    },
+
+    // Applying a filter: count then browse (fires together when user selects a value).
+    {
+        name: "filter_count",
+        run: (_, f) =>
+            f.getCountOfMatchingFiles(new FileSet({ filters: [new FileFilter("cell_line", 3)] })),
+    },
+    {
+        name: "filter_browse",
+        run: (_, f) =>
+            f.getFiles({
+                fileSet: new FileSet({ filters: [new FileFilter("cell_line", 3)] }),
+                from: 0,
+                limit: 100,
+            }),
+    },
+
+    // Directory tree: count per folder when "show null groups" is enabled.
+    // Fires once per visible folder node.
+    {
+        name: "null_group_count",
+        run: (_, f) =>
+            f.getCountOfMatchingFiles(new FileSet({ filters: [new ExcludeFilter("cell_line")] })),
+    },
+
+    // Changing the grouping annotation — fires parallel IS NOT NULL queries, one per schema
+    // column. Uses wall-clock timing because the queries run in parallel (see benchmarkSource).
+    {
+        name: "change_grouping",
+        timing: "wall-clock",
+        run: (a) => a.fetchAvailableAnnotationsForHierarchy(["cell_line"]),
+    },
+
+    // Expanding a folder in the directory tree: load second-level values under a
+    // specific parent value (cell_line=3 → plate_id values).
+    {
+        name: "expand_folder",
+        run: (a) => a.fetchHierarchyValuesUnderPath(["cell_line", "plate_id"], ["3"], []),
+    },
+
+    // Date range filter covering ~half the fixture rows (acquisition_date spans 2024-01-01
+    // to 2024-12-31). Exercises DuckDB's date predicate pushdown against the parquet row groups.
+    {
+        name: "filter_date_range",
+        run: (_, f) =>
+            f.getFiles({
+                fileSet: new FileSet({
+                    filters: [
+                        new FileFilter(
+                            "acquisition_date",
+                            "RANGE(2024-01-01,2024-06-30)",
+                            FilterType.DEFAULT,
+                            AnnotationType.DATE
+                        ),
+                    ],
+                }),
+                from: 0,
+                limit: 100,
+            }),
+    },
+];
+
+/** Create service instances wrapping the given worker for one data source. */
+export function createServices(
+    db: DatabaseServiceWebWorker,
+    sourceName: string
+): { annotationSvc: DatabaseAnnotationService; fileSvc: DatabaseFileService } {
+    const annotationSvc = new DatabaseAnnotationService({
+        databaseService: db,
+        dataSourceNames: [sourceName],
+    });
+    const fileSvc = new DatabaseFileService({
+        databaseService: db,
+        dataSourceNames: [sourceName],
+        downloadService: new FileDownloadServiceNoop(),
+    });
+    return { annotationSvc, fileSvc };
+}
diff --git a/packages/web/benchmark/src/types.ts b/packages/web/benchmark/src/types.ts
new file mode 100644
index 000000000..1df530db1
--- /dev/null
+++ b/packages/web/benchmark/src/types.ts
@@ -0,0 +1,38 @@
+/**
+ * A parquet data source to benchmark against. Injected into the browser by the
+ * Playwright runner — the benchmark engine itself has no knowledge of where
+ * these come from (real S3, synthetic S3, local server, etc.).
+ */
+export interface ParquetSource {
+    url: string;
+    label: string;
+}
+
+/** Injected as window.__benchmarkConfig before the page loads. */
+export interface BenchmarkConfig {
+    sources: ParquetSource[];
+    iterations?: number;
+    warmupRounds?: number;
+}
+
+export interface QueryResult {
+    name: string;
+    timings: number[]; // ms per iteration, sorted ascending
+    p50: number;
+    p95: number;
+    p99: number;
+}
+
+export interface SourceResult {
+    label: string;
+    registrationMs: number;
+    queries: QueryResult[];
+}
+
+export interface BenchmarkResults {
+    timestamp: string;
+    commit: string;
+    branch: string;
+    initTimeMs: number;
+    sources: SourceResult[];
+}
diff --git a/packages/web/benchmark/webpack.config.js b/packages/web/benchmark/webpack.config.js
new file mode 100644
index 000000000..2e5fa8f70
--- /dev/null
+++ b/packages/web/benchmark/webpack.config.js
@@ -0,0 +1,48 @@
+const path = require("path");
+
+const HtmlWebpackPlugin = require("html-webpack-plugin");
+
+module.exports = {
+    context: __dirname,
+    entry: "./src/index.ts",
+    mode: "development",
+    devtool: "source-map",
+    module: {
+        rules: [
+            {
+                test: /\.(t|j)sx?$/,
+                exclude: /node_modules/,
+                use: {
+                    loader: "babel-loader",
+                    options: {
+                        // Reuse the root babel config; override preset-env for browser output
+                        extends: path.resolve(__dirname, "..", "..", "..", "babel.config.json"),
+                        presets: ["@babel/preset-env"],
+                    },
+                },
+            },
+        ],
+    },
+    output: {
+        path: path.resolve(__dirname, "dist"),
+        filename: "benchmark.js",
+        clean: true,
+    },
+    plugins: [
+        new HtmlWebpackPlugin({
+            template: path.resolve(__dirname, "index.html"),
+        }),
+    ],
+    resolve: {
+        extensions: [".ts", ".tsx", ".js", ".jsx", ".json"],
+        // Prefer ESM builds (needed for @duckdb/duckdb-wasm tree-shaking)
+        mainFields: ["module", "main"],
+        fallback: {
+            // Node built-ins not available in browser — explicitly disable
+            path: false,
+            fs: false,
+            crypto: false,
+            buffer: false,
+        },
+    },
+};
diff --git a/packages/web/package.json b/packages/web/package.json
index 3f22217d1..a170f3b24 100644
--- a/packages/web/package.json
+++ b/packages/web/package.json
@@ -1,9 +1,12 @@
 {
-
   "name": "biofile-finder",
   "description": "A web application for organizing that data, and provide simple hooks for incorporating that data into both programmatic and non-programmatic workflows",
   "main": "src/index.tsx",
   "scripts": {
+    "benchmark": "node scripts/run-local.js",
+    "benchmark:regression": "node scripts/run-regression.js",
+    "benchmark:compare": "node scripts/compare-results.js",
+    "benchmark:summary": "node scripts/summarize-results.js",
     "build": "webpack --config ./webpack/webpack.config.js --env production",
     "clean": "git clean -Xfd -e \"!node_modules\"",
     "packageForPublish": "node ./scripts/package-for-publish.js",
@@ -23,6 +26,7 @@
     "fork-ts-checker-webpack-plugin": "6.x",
     "html-webpack-plugin": "5.x",
     "mini-css-extract-plugin": "2.x",
+    "playwright": "^1.59.1",
     "postcss-loader": "6.x",
     "postcss-preset-env": "7.x",
     "react-svg-loader": "^3.0.3",
diff --git a/packages/web/scripts/compare-results.js b/packages/web/scripts/compare-results.js
new file mode 100644
index 000000000..dff4766ed
--- /dev/null
+++ b/packages/web/scripts/compare-results.js
@@ -0,0 +1,200 @@
+// Compares two benchmark result JSON files and outputs a Markdown table to stdout.
+// Used by benchmark.yml for the GitHub Step Summary; also runnable manually.
+
+"use strict";
+
+const fs = require("fs");
+
+const REGRESSION_WARN_PCT = 25; // ≥25% slower → ⚠️
+const REGRESSION_SEVERE_PCT = 50; // ≥50% slower → ❌
+const IMPROVEMENT_PCT = 25; // ≥25% faster → ✅
+// Badges are suppressed when either branch is below this threshold — percentage
+// deltas on fast queries are dominated by noise rather than real regressions.
+const BADGE_MIN_MS = 500;
+
+function fmt(ms) {
+    if (ms === undefined || ms === null) return "—";
+    return ms < 10 ? `${ms.toFixed(2)}ms` : `${ms.toFixed(1)}ms`;
+}
+
+function pctDelta(base, pr) {
+    if (!base) return null;
+    return ((pr - base) / base) * 100;
+}
+
+function deltaBadge(base, pr) {
+    const delta = pctDelta(base, pr);
+    if (delta === null) return "N/A";
+    const sign = delta >= 0 ? "+" : "";
+    const label = `${sign}${delta.toFixed(1)}%`;
+    if (base < BADGE_MIN_MS || pr < BADGE_MIN_MS) return label;
+    if (delta >= REGRESSION_SEVERE_PCT) return `${label} ❌`;
+    if (delta >= REGRESSION_WARN_PCT) return `${label} ⚠️`;
+    if (delta <= -IMPROVEMENT_PCT) return `${label} ✅`;
+    return label;
+}
+
+const [, , baseFile, prFile] = process.argv;
+
+if (!baseFile || !prFile) {
+    console.error("Usage: node compare-results.js <base.json> <pr.json>");
+    process.exit(1);
+}
+
+const base = JSON.parse(fs.readFileSync(baseFile, "utf8"));
+const pr = JSON.parse(fs.readFileSync(prFile, "utf8"));
+
+const baseSources = new Map(base.sources.map((s) => [s.label, s]));
+const prSources = new Map(pr.sources.map((s) => [s.label, s]));
+
+// PR result order is authoritative; base may have fewer sources.
+const allLabels = [
+    ...new Set([...pr.sources.map((s) => s.label), ...base.sources.map((s) => s.label)]),
+];
+
+const allQueryNames = [
+    ...new Set([
+        ...pr.sources.flatMap((s) => s.queries.map((q) => q.name)),
+        ...base.sources.flatMap((s) => s.queries.map((q) => q.name)),
+    ]),
+];
+
+const allDeltas = [];
+
+for (const qName of allQueryNames) {
+    for (const label of allLabels) {
+        const baseQ = baseSources.get(label)?.queries.find((q) => q.name === qName);
+        const prQ = prSources.get(label)?.queries.find((q) => q.name === qName);
+        if (baseQ && prQ) {
+            allDeltas.push({
+                label: `\`${qName}\` @ ${label}`,
+                delta: pctDelta(baseQ.p50, prQ.p50),
+                baseP50: baseQ.p50,
+                prP50: prQ.p50,
+            });
+        }
+    }
+}
+
+const regressions = allDeltas
+    .filter((d) => d.delta !== null && d.delta >= REGRESSION_WARN_PCT)
+    .sort((a, b) => b.delta - a.delta);
+
+const improvements = allDeltas
+    .filter((d) => d.delta !== null && d.delta <= -IMPROVEMENT_PCT)
+    .sort((a, b) => a.delta - b.delta);
+
+const lines = [];
+
+lines.push("## BFF Query Benchmark Results");
+lines.push("");
+lines.push(`| | \`${base.branch}\` | \`${pr.branch}\` | Delta |`);
+lines.push("|-|-|-|-|");
+
+lines.push(
+    `| **DuckDB init** | ${fmt(base.initTimeMs)} | ${fmt(pr.initTimeMs)} | ${deltaBadge(
+        base.initTimeMs,
+        pr.initTimeMs
+    )} |`
+);
+
+lines.push("| | | | |");
+lines.push("| **Registration (parquet → view)** | | | |");
+
+for (const label of allLabels) {
+    const baseReg = baseSources.get(label)?.registrationMs ?? null;
+    const prReg = prSources.get(label)?.registrationMs ?? null;
+    lines.push(
+        `| \`${label}\`` +
+            ` | ${fmt(baseReg)}` +
+            ` | ${fmt(prReg)}` +
+            ` | ${baseReg !== null && prReg !== null ? deltaBadge(baseReg, prReg) : "—"} |`
+    );
+}
+
+lines.push("| | | | |");
+lines.push("| **Query timings — p50** | | | |");
+
+for (const label of allLabels) {
+    lines.push(`| _${label}_ | | | |`);
+    for (const qName of allQueryNames) {
+        const baseQ = baseSources.get(label)?.queries.find((q) => q.name === qName);
+        const prQ = prSources.get(label)?.queries.find((q) => q.name === qName);
+        lines.push(
+            `| \`${qName}\`` +
+                ` | ${fmt(baseQ?.p50)}` +
+                ` | ${fmt(prQ?.p50)}` +
+                ` | ${baseQ && prQ ? deltaBadge(baseQ.p50, prQ.p50) : "—"} |`
+        );
+    }
+}
+
+lines.push("");
+lines.push("<details><summary>p95 timings</summary>\n");
+lines.push(`| | \`${base.branch}\` | \`${pr.branch}\` | Delta |`);
+lines.push("|-|-|-|-|");
+
+for (const label of allLabels) {
+    lines.push(`| _${label}_ | | | |`);
+    for (const qName of allQueryNames) {
+        const baseQ = baseSources.get(label)?.queries.find((q) => q.name === qName);
+        const prQ = prSources.get(label)?.queries.find((q) => q.name === qName);
+        lines.push(
+            `| \`${qName}\`` +
+                ` | ${fmt(baseQ?.p95)}` +
+                ` | ${fmt(prQ?.p95)}` +
+                ` | ${baseQ && prQ ? deltaBadge(baseQ.p95, prQ.p95) : "—"} |`
+        );
+    }
+}
+
+lines.push("\n</details>");
+
+lines.push("");
+lines.push("### Summary");
+lines.push("");
+
+if (regressions.length === 0 && improvements.length === 0) {
+    lines.push("No significant changes detected.");
+} else {
+    if (regressions.length > 0) {
+        lines.push(
+            `**${regressions.length} regression${
+                regressions.length > 1 ? "s" : ""
+            }** (≥${REGRESSION_WARN_PCT}% slower):`
+        );
+        lines.push("");
+        for (const r of regressions) {
+            const badge = r.delta >= REGRESSION_SEVERE_PCT ? "❌" : "⚠️";
+            lines.push(
+                `- ${badge} ${r.label}: ${fmt(r.baseP50)} → ${fmt(r.prP50)} (+${r.delta.toFixed(
+                    1
+                )}%)`
+            );
+        }
+        lines.push("");
+    }
+    if (improvements.length > 0) {
+        lines.push(
+            `**${improvements.length} improvement${
+                improvements.length > 1 ? "s" : ""
+            }** (≥${IMPROVEMENT_PCT}% faster):`
+        );
+        lines.push("");
+        for (const i of improvements) {
+            lines.push(
+                `- ✅ ${i.label}: ${fmt(i.baseP50)} → ${fmt(i.prP50)} (${i.delta.toFixed(1)}%)`
+            );
+        }
+        lines.push("");
+    }
+}
+
+const iters = pr.sources[0]?.queries[0]?.timings?.length ?? "?";
+lines.push(
+    `_Benchmarks run in headless Chromium with DuckDB-WASM. ` +
+        `${iters} iterations per query. ` +
+        `Flags: ⚠️ ≥${REGRESSION_WARN_PCT}% slower · ❌ ≥${REGRESSION_SEVERE_PCT}% slower · ✅ ≥${IMPROVEMENT_PCT}% faster_`
+);
+
+console.log(lines.join("\n"));
diff --git a/packages/web/scripts/lib/run-benchmark-page.js b/packages/web/scripts/lib/run-benchmark-page.js
new file mode 100644
index 000000000..c8ead326d
--- /dev/null
+++ b/packages/web/scripts/lib/run-benchmark-page.js
@@ -0,0 +1,227 @@
+/**
+ * Shared Playwright runner used by both benchmark tools.
+ *
+ * Builds the benchmark bundle (optional), starts the local HTTP server with
+ * the COOP/COEP headers required for SharedArrayBuffer, injects a
+ * BenchmarkConfig into the page, and returns the BenchmarkResults.
+ */
+
+"use strict";
+
+const { chromium } = require("playwright");
+const http = require("http");
+const fs = require("fs");
+const path = require("path");
+const { execSync } = require("child_process");
+
+const DIST_DIR = path.join(__dirname, "..", "..", "benchmark", "dist");
+const FIXTURES_DIR = path.join(__dirname, "..", "..", "fixtures");
+const PORT = 18765;
+const TIMEOUT_MS = 90 * 60 * 1000; // 90 min: 10M row source + full task suite
+
+// Content-type map for benchmark bundle assets served to the Playwright browser.
+// WASM must be served as application/wasm or the browser will refuse to compile it.
+const MIME = {
+    ".html": "text/html",
+    ".js": "application/javascript",
+    ".js.map": "application/json",
+    ".wasm": "application/wasm",
+    ".json": "application/json",
+    ".parquet": "application/octet-stream",
+};
+
+function mimeFor(filePath) {
+    for (const [ext, type] of Object.entries(MIME)) {
+        if (filePath.endsWith(ext)) return type;
+    }
+    return "application/octet-stream";
+}
+
+function startServer() {
+    return new Promise((resolve, reject) => {
+        const server = http.createServer((req, res) => {
+            const relPath = req.url === "/" ? "/index.html" : req.url.split("?")[0];
+
+            // Serve fixture files from /fixtures/ — fallback path if file injection fails.
+            const fixtureMatch = relPath.match(/^\/fixtures\/(.+)$/);
+            if (fixtureMatch) {
+                const fixturePath = path.join(FIXTURES_DIR, fixtureMatch[1]);
+                try {
+                    const stat = fs.statSync(fixturePath);
+                    const range = req.headers["range"];
+                    if (range) {
+                        const [, start, end] = range.match(/bytes=(\d+)-(\d*)/) || [];
+                        const startByte = parseInt(start, 10);
+                        const endByte = end ? parseInt(end, 10) : stat.size - 1;
+                        const chunkSize = endByte - startByte + 1;
+                        res.writeHead(206, {
+                            "Content-Type": "application/octet-stream",
+                            "Content-Range": `bytes ${startByte}-${endByte}/${stat.size}`,
+                            "Content-Length": chunkSize,
+                            "Accept-Ranges": "bytes",
+                            "Cross-Origin-Opener-Policy": "same-origin",
+                            "Cross-Origin-Embedder-Policy": "credentialless",
+                        });
+                        fs.createReadStream(fixturePath, { start: startByte, end: endByte }).pipe(
+                            res
+                        );
+                    } else {
+                        res.writeHead(200, {
+                            "Content-Type": "application/octet-stream",
+                            "Content-Length": stat.size,
+                            "Accept-Ranges": "bytes",
+                            "Cross-Origin-Opener-Policy": "same-origin",
+                            "Cross-Origin-Embedder-Policy": "credentialless",
+                        });
+                        fs.createReadStream(fixturePath).pipe(res);
+                    }
+                } catch {
+                    res.writeHead(404);
+                    res.end("Not found: " + relPath);
+                }
+                return;
+            }
+
+            const fullPath = path.join(DIST_DIR, relPath);
+            let content;
+            try {
+                content = fs.readFileSync(fullPath);
+            } catch {
+                res.writeHead(404);
+                res.end("Not found: " + relPath);
+                return;
+            }
+            res.writeHead(200, {
+                "Content-Type": mimeFor(fullPath),
+                "Content-Length": content.length,
+                "Cross-Origin-Opener-Policy": "same-origin",
+                "Cross-Origin-Embedder-Policy": "credentialless",
+            });
+            res.end(content);
+        });
+        server.on("error", reject);
+        server.listen(PORT, () => resolve(server));
+    });
+}
+
+function buildBenchmark() {
+    console.log("[build] Building benchmark bundle...");
+    execSync("npx webpack --config benchmark/webpack.config.js", {
+        cwd: path.join(__dirname, "..", ".."),
+        stdio: "inherit",
+    });
+    console.log("[build] Done.");
+}
+
+/**
+ * Run the benchmark page with the given config and return the BenchmarkResults.
+ *
+ * @param {object} options
+ * @param {{ url: string, label: string }[]} options.sources  Parquet sources to benchmark.
+ * @param {boolean} [options.skipBuild=false]                  Skip webpack build.
+ * @param {number}  [options.iterations]                        Override timed iteration count.
+ * @param {number}  [options.warmupRounds]                      Override warmup round count.
+ * @param {string}  [options.channel]                           Chrome channel to use. Pass "chrome"
+ *                                                              to launch the system Chrome binary
+ *                                                              for JIT-accurate local profiling.
+ *                                                              Omit to use Playwright's bundled
+ *                                                              Chromium (default; required for CI).
+ * @returns {Promise<object>}  Raw BenchmarkResults from the page.
+ */
+async function runBenchmarkPage({ sources, skipBuild = false, iterations, warmupRounds, channel }) {
+    if (!skipBuild) buildBenchmark();
+
+    if (!fs.existsSync(path.join(DIST_DIR, "index.html"))) {
+        throw new Error(
+            `Benchmark dist not found at ${DIST_DIR}. Build first or pass skipBuild: true.`
+        );
+    }
+
+    const launchOptions = { headless: true };
+    if (channel) launchOptions.channel = channel;
+
+    const server = await startServer();
+    const browser = await chromium.launch(launchOptions);
+
+    try {
+        const context = await browser.newContext();
+        const page = await context.newPage();
+
+        page.on("console", (msg) => {
+            console.log(`[browser:${msg.type()}]`, msg.text());
+        });
+        page.on("pageerror", (err) => console.error("[browser:pageerror]", err.message));
+
+        // Inject config before the page script runs so the benchmark can read it
+        // synchronously on startup — no callback handshake needed.
+        await page.addInitScript({
+            content: `window.__benchmarkConfig = ${JSON.stringify({
+                sources,
+                iterations,
+                warmupRounds,
+            })};`,
+        });
+
+        console.log(`[playwright] Starting benchmark (${sources.length} source(s))...`);
+        await page.goto(`http://localhost:${PORT}/`, { waitUntil: "domcontentloaded" });
+
+        // Wait for the benchmark to signal it's ready for file injection
+        await page.waitForFunction(() => window.__localFilesRequested === true, null, {
+            timeout: 10000,
+        });
+
+        // Inject each local fixture as a real File object via setInputFiles.
+        // The browser reads the file lazily via FileReader (BROWSER_FILEREADER protocol),
+        // which is identical to how the real app loads files via the file picker —
+        // no HTTP range-request overhead, so DuckDB sort performance matches real-user timing.
+        for (const source of sources) {
+            const localMatch = source.url.match(
+                new RegExp(`^http://localhost:${PORT}/fixtures/(.+)$`)
+            );
+            if (!localMatch) continue;
+            const fixturePath = path.join(FIXTURES_DIR, localMatch[1]);
+            if (!fs.existsSync(fixturePath)) continue;
+
+            console.log(`[playwright] Injecting ${source.label} via setInputFiles...`);
+            const inputHandle = await page.evaluateHandle(() => {
+                const inp = document.createElement("input");
+                inp.type = "file";
+                document.body.appendChild(inp);
+                return inp;
+            });
+            await inputHandle.setInputFiles(fixturePath);
+            await page.evaluate((label) => {
+                const inputs = document.querySelectorAll("input[type=file]");
+                const inp = inputs[inputs.length - 1];
+                window.__pendingLocalFiles = window.__pendingLocalFiles || {};
+                window.__pendingLocalFiles[label] = inp.files[0];
+                inp.remove();
+            }, source.label);
+        }
+
+        // Signal the benchmark to proceed with injected File objects
+        await page.evaluate(() => {
+            if (window.__resolveLocalFiles) {
+                window.__resolveLocalFiles(window.__pendingLocalFiles || {});
+            }
+        });
+
+        await page.waitForFunction(
+            () =>
+                typeof window.__benchmarkResults !== "undefined" ||
+                typeof window.__benchmarkError !== "undefined",
+            null,
+            { timeout: TIMEOUT_MS }
+        );
+
+        const error = await page.evaluate(() => window.__benchmarkError ?? null);
+        if (error) throw new Error(`Benchmark failed in browser: ${error}`);
+
+        return await page.evaluate(() => window.__benchmarkResults);
+    } finally {
+        await browser.close();
+        await new Promise((res) => server.close(res));
+    }
+}
+
+module.exports = { runBenchmarkPage };
diff --git a/packages/web/scripts/run-local.js b/packages/web/scripts/run-local.js
new file mode 100644
index 000000000..e2c1b5ffe
--- /dev/null
+++ b/packages/web/scripts/run-local.js
@@ -0,0 +1,150 @@
+// Local benchmark runner for developer machines. Supports cloud (S3/https) and local
+// fixtures, single scale or all scales, and side-by-side cloud vs local comparison (--full).
+
+"use strict";
+
+const path = require("path");
+const fs = require("fs");
+const { execSync } = require("child_process");
+const { runBenchmarkPage } = require("./lib/run-benchmark-page");
+
+const LOCAL_FIXTURE_MAP = {
+    "100k": "http://localhost:18765/fixtures/synthetic-100k.parquet",
+    "1m": "http://localhost:18765/fixtures/synthetic-1m.parquet",
+    "10m": "http://localhost:18765/fixtures/synthetic-10m.parquet",
+};
+
+const REMOTE_URL_MAP = {
+    "100k":
+        process.env.BENCHMARK_REAL_100K_URL ??
+        "https://staging-biofile-finder-datasets.s3.us-west-2.amazonaws.com/benchmark-fixtures/v1/synthetic-100k.parquet",
+    "1m":
+        process.env.BENCHMARK_REAL_1M_URL ??
+        "https://staging-biofile-finder-datasets.s3.us-west-2.amazonaws.com/benchmark-fixtures/v1/synthetic-1m.parquet",
+    "10m":
+        process.env.BENCHMARK_REAL_10M_URL ??
+        "https://staging-biofile-finder-datasets.s3.us-west-2.amazonaws.com/benchmark-fixtures/v1/synthetic-10m.parquet",
+};
+
+const useLocal = process.argv.includes("--local");
+const useFull = process.argv.includes("--full");
+
+const scaleArg = (() => {
+    const idx = process.argv.indexOf("--scale");
+    return idx !== -1 ? process.argv[idx + 1] : null;
+})();
+
+const URL_MAP = useLocal ? LOCAL_FIXTURE_MAP : REMOTE_URL_MAP;
+
+if (scaleArg && !URL_MAP[scaleArg] && !useFull) {
+    console.error(
+        `Error: --scale "${scaleArg}" is not a valid scale. Choose from: ${Object.keys(
+            URL_MAP
+        ).join(", ")}`
+    );
+    process.exit(1);
+}
+
+let sources;
+if (useFull) {
+    const missingUrls = Object.entries(REMOTE_URL_MAP)
+        .filter(([, url]) => !url)
+        .map(([label]) => `  BENCHMARK_REAL_${label.toUpperCase()}_URL`);
+    if (missingUrls.length > 0) {
+        console.error(
+            `Error: --full requires all three cloud URLs to be set:\n${missingUrls.join("\n")}`
+        );
+        process.exit(1);
+    }
+    // Interleave cloud and local sources per scale for easy side-by-side reading
+    sources = Object.keys(REMOTE_URL_MAP).flatMap((label) => [
+        { label: `${label}-cloud`, url: REMOTE_URL_MAP[label] },
+        { label: `${label}-local`, url: LOCAL_FIXTURE_MAP[label] },
+    ]);
+} else {
+    sources = Object.entries(URL_MAP)
+        .filter(([label, url]) => Boolean(url) && (!scaleArg || label === scaleArg))
+        .map(([label, url]) => ({ label, url }));
+}
+
+if (sources.length === 0) {
+    console.error(
+        "No real parquet URLs provided.\n" +
+            "Set one or more of:\n" +
+            "  BENCHMARK_REAL_100K_URL\n" +
+            "  BENCHMARK_REAL_1M_URL\n" +
+            "  BENCHMARK_REAL_10M_URL\n" +
+            "Or use --local to serve fixtures from packages/web/fixtures/.\n" +
+            "Or use --full to run cloud + local for all scales."
+    );
+    process.exit(1);
+}
+
+function getArgValue(flag) {
+    const idx = process.argv.indexOf(flag);
+    return idx !== -1 ? parseInt(process.argv[idx + 1], 10) : undefined;
+}
+
+async function main() {
+    const skipBuild = process.argv.includes("--skip-build");
+    const useChromium = process.argv.includes("--chromium");
+    const channel = useChromium ? undefined : "chrome";
+    const iterations = getArgValue("--iterations");
+    const warmup = getArgValue("--warmup");
+
+    console.log(`[local] Running against ${sources.length} real parquet source(s):`);
+    for (const { label, url } of sources) {
+        console.log(`  ${label}: ${url}`);
+    }
+    if (iterations) console.log(`[local] Iterations: ${iterations}`);
+    if (warmup !== undefined) console.log(`[local] Warmup rounds: ${warmup}`);
+    console.log(
+        `[local] Browser: ${
+            channel ? `system Chrome (channel: ${channel})` : "Playwright bundled Chromium"
+        }`
+    );
+
+    const rawResults = await runBenchmarkPage({
+        sources,
+        skipBuild,
+        iterations,
+        warmupRounds: warmup,
+        channel,
+    });
+
+    const branch = getBranch();
+    const results = {
+        ...rawResults,
+        commit: process.env.GITHUB_SHA ?? getCommit(),
+        branch,
+    };
+
+    const outFile = path.join(__dirname, "..", "benchmark-results-local.json");
+    fs.writeFileSync(outFile, JSON.stringify(results, null, 2));
+    console.log(`\n[local] Results written to ${path.relative(process.cwd(), outFile)}`);
+
+    execSync(`node ${path.join(__dirname, "summarize-results.js")} "${outFile}"`, {
+        stdio: "inherit",
+    });
+}
+
+function getBranch() {
+    try {
+        return execSync("git rev-parse --abbrev-ref HEAD", { stdio: "pipe" }).toString().trim();
+    } catch {
+        return "unknown";
+    }
+}
+
+function getCommit() {
+    try {
+        return execSync("git rev-parse --short HEAD", { stdio: "pipe" }).toString().trim();
+    } catch {
+        return "unknown";
+    }
+}
+
+main().catch((err) => {
+    console.error("[fatal]", err.message);
+    process.exit(1);
+});
diff --git a/packages/web/scripts/run-regression.js b/packages/web/scripts/run-regression.js
new file mode 100644
index 000000000..617310ea3
--- /dev/null
+++ b/packages/web/scripts/run-regression.js
@@ -0,0 +1,83 @@
+// CI regression runner — benchmarks one branch against local fixtures and writes
+// benchmark-results-<branch>.json. Called once per branch by benchmark.yml.
+
+"use strict";
+
+const path = require("path");
+const fs = require("fs");
+const { execSync } = require("child_process");
+const { runBenchmarkPage } = require("./lib/run-benchmark-page");
+
+const FIXTURES_DIR = path.join(__dirname, "..", "fixtures");
+const SCALES = ["100k", "1m", "10m"];
+
+const LOCAL_FIXTURE_MAP = {
+    "100k": "http://localhost:18765/fixtures/synthetic-100k.parquet",
+    "1m": "http://localhost:18765/fixtures/synthetic-1m.parquet",
+    "10m": "http://localhost:18765/fixtures/synthetic-10m.parquet",
+};
+
+const missing = SCALES.filter(
+    (scale) => !fs.existsSync(path.join(FIXTURES_DIR, `synthetic-${scale}.parquet`))
+);
+if (missing.length > 0) {
+    console.error(
+        `Missing fixture files: ${missing.map((s) => `synthetic-${s}.parquet`).join(", ")}\n` +
+            `Download them to ${FIXTURES_DIR} before running this script.`
+    );
+    process.exit(1);
+}
+
+const sources = SCALES.map((scale) => ({ label: scale, url: LOCAL_FIXTURE_MAP[scale] }));
+
+function getCurrentBranch() {
+    if (process.env.BENCHMARK_BRANCH) return process.env.BENCHMARK_BRANCH;
+    try {
+        return execSync("git rev-parse --abbrev-ref HEAD", { stdio: "pipe" }).toString().trim();
+    } catch {
+        return "unknown";
+    }
+}
+
+function slugify(branch) {
+    return branch.replace(/[^a-zA-Z0-9._-]/g, "-").replace(/-+/g, "-");
+}
+
+function getArgValue(flag) {
+    const idx = process.argv.indexOf(flag);
+    return idx !== -1 ? parseInt(process.argv[idx + 1], 10) : undefined;
+}
+
+async function main() {
+    const skipBuild = process.argv.includes("--skip-build");
+    const iterations = getArgValue("--iterations");
+    const warmup = getArgValue("--warmup");
+
+    console.log(`[regression] Using local fixtures from ${FIXTURES_DIR}`);
+    if (iterations) console.log(`[regression] Iterations: ${iterations}`);
+    if (warmup !== undefined) console.log(`[regression] Warmup rounds: ${warmup}`);
+
+    const rawResults = await runBenchmarkPage({
+        sources,
+        skipBuild,
+        iterations,
+        warmupRounds: warmup,
+    });
+
+    const branch = getCurrentBranch();
+    const results = {
+        ...rawResults,
+        commit: process.env.GITHUB_SHA ?? "local",
+        branch,
+    };
+
+    const slug = slugify(branch);
+    const outFile = path.join(__dirname, "..", `benchmark-results-${slug}.json`);
+    fs.writeFileSync(outFile, JSON.stringify(results, null, 2));
+    console.log(`[regression] Results written to ${path.relative(process.cwd(), outFile)}`);
+}
+
+main().catch((err) => {
+    console.error("[fatal]", err.message);
+    process.exit(1);
+});
diff --git a/packages/web/scripts/summarize-results.js b/packages/web/scripts/summarize-results.js
new file mode 100644
index 000000000..4bd55935a
--- /dev/null
+++ b/packages/web/scripts/summarize-results.js
@@ -0,0 +1,96 @@
+// Prints a human-readable p50/p95 table from a benchmark-results.json file.
+// Called automatically by run-local.js; also runnable standalone.
+
+"use strict";
+
+const fs = require("fs");
+const path = require("path");
+const { execSync } = require("child_process");
+
+function defaultResultsFile() {
+    if (process.argv[2]) return process.argv[2];
+
+    const local = path.join(__dirname, "..", "benchmark-results-local.json");
+    if (fs.existsSync(local)) return local;
+
+    try {
+        const branch =
+            process.env.BENCHMARK_BRANCH ||
+            execSync("git rev-parse --abbrev-ref HEAD", { stdio: "pipe" }).toString().trim();
+        const slug = branch.replace(/[^a-zA-Z0-9._-]/g, "-").replace(/-+/g, "-");
+        const stamped = path.join(__dirname, "..", `benchmark-results-${slug}.json`);
+        if (fs.existsSync(stamped)) return stamped;
+    } catch (e) {
+        if (e instanceof SomeSpecificError) {
+            return path.join(__dirname, "..", "benchmark-results.json");
+        } else {
+            throw e;
+        }
+    }
+}
+
+const file = defaultResultsFile();
+
+if (!fs.existsSync(file)) {
+    console.error(`No results file found at ${file}`);
+    process.exit(1);
+}
+
+const data = JSON.parse(fs.readFileSync(file, "utf8"));
+
+function fmt(ms) {
+    if (ms === undefined || ms === null) return "—";
+    return ms < 10 ? `${ms.toFixed(2)}ms` : `${ms.toFixed(1)}ms`;
+}
+
+function col(str, width) {
+    return String(str).padEnd(width);
+}
+
+function rcol(str, width) {
+    return String(str).padStart(width);
+}
+
+const SEP = "─".repeat(82);
+
+console.log("");
+console.log("BFF Query Benchmark Results");
+console.log(SEP);
+console.log(`Branch:      ${data.branch}`);
+console.log(`Commit:      ${data.commit}`);
+console.log(`Timestamp:   ${data.timestamp}`);
+console.log(`DuckDB init: ${fmt(data.initTimeMs)}`);
+console.log("");
+
+const sourceLabels = data.sources.map((s) => s.label);
+
+// Header row
+const COL_W = 20;
+console.log(col("  Query", 26) + sourceLabels.map((l) => rcol(l, COL_W)).join(""));
+console.log("  " + "─".repeat(24 + sourceLabels.length * COL_W));
+
+// Registration row
+const regCells = data.sources.map((s) => rcol(fmt(s.registrationMs), COL_W));
+console.log("  " + col("registration", 24) + regCells.join(""));
+console.log("");
+
+// Query rows (p50 / p95)
+const queryNames = [...new Set(data.sources.flatMap((s) => s.queries.map((q) => q.name)))];
+
+for (const name of queryNames) {
+    const cells = data.sources.map((s) => {
+        const q = s.queries.find((x) => x.name === name);
+        if (!q) return rcol("—", COL_W);
+        return rcol(`${fmt(q.p50)} / ${fmt(q.p95)}`, COL_W);
+    });
+    console.log("  " + col(name, 24) + cells.join(""));
+}
+
+console.log("");
+console.log(SEP);
+console.log(
+    `  ${data.sources.length} source(s) · ${queryNames.length} queries · ` +
+        `${data.sources[0]?.queries[0]?.timings?.length ?? "?"} iterations each`
+);
+console.log("  Timings shown as p50 / p95");
+console.log("");
diff --git a/packages/web/src/services/DatabaseServiceWeb/duckdb-worker.worker.ts b/packages/web/src/services/DatabaseServiceWeb/duckdb-worker.worker.ts
index bfdb6164a..e5721d5a0 100644
--- a/packages/web/src/services/DatabaseServiceWeb/duckdb-worker.worker.ts
+++ b/packages/web/src/services/DatabaseServiceWeb/duckdb-worker.worker.ts
@@ -13,6 +13,8 @@ import { initializeDuckDB } from "../../../../core/services/DatabaseService";
 
 declare const self: DedicatedWorkerGlobalScope & typeof globalThis;
 let databaseService: DatabaseServiceWebWorker | null = null;
+let queryTimingEnabled = false;
+const accumulatedTimings = new Map<string, number[]>();
 
 // Map to track connectionNumber -> connection object
 const activeConnections = new Map<number, duckdb.AsyncDuckDBConnection>();
@@ -44,7 +46,8 @@ function cancelActiveConnection(connection: duckdb.AsyncDuckDBConnection): void
 type MessageHandler<T extends WorkerMsgType> = (payload: WorkerReqPayload<T>) => Promise<void>;
 
 const messageHandler: { [T in WorkerMsgType]: MessageHandler<T> } = {
-    [WorkerMsgType.INIT]: async () => {
+    [WorkerMsgType.INIT]: async (payload) => {
+        queryTimingEnabled = payload?.queryTiming ?? false;
         if (!databaseService) await initDuckDB();
         self.postMessage({ type: WorkerResType.READY });
     },
@@ -260,8 +263,26 @@ export default class DatabaseServiceWebWorker extends DatabaseService {
                 type: WorkerResType.STARTED,
                 payload: { connectionId, id: queryId },
             });
+        const t0 = queryTimingEnabled ? performance.now() : 0;
         try {
             const result = await connection.query(sql);
+            if (queryTimingEnabled) {
+                const elapsed = performance.now() - t0;
+                // Service-layer methods prefix SQL with a `-- label\n` comment so timings
+                // can be grouped by logical operation rather than raw SQL text.
+                const labelMatch = sql.match(/^--\s*(.+)\n/);
+                const label = labelMatch ? labelMatch[1].trim() : "query";
+                const existing = accumulatedTimings.get(label) ?? [];
+                accumulatedTimings.set(label, [...existing, elapsed]);
+                // Strip the label comment from the first line, collapse whitespace,
+                // and truncate so the console log stays readable.
+                const body = sql
+                    .replace(/^--[^\n]*\n/, "") // remove `-- label\n` prefix
+                    .replace(/\s+/g, " ") // collapse newlines/indentation
+                    .trim()
+                    .slice(0, 200);
+                console.log(`[duckdb] ${elapsed}ms — [${label}] ${body}`);
+            }
 
             // Apache Arrow JS (used by duckdb-wasm) only reads the first 8 bytes, losing the nanoseconds.
             // Re-run with INTERVAL columns cast to ms integers so the data survives Arrow.
@@ -293,6 +314,31 @@ export default class DatabaseServiceWebWorker extends DatabaseService {
         }
     }
 
+    // Benchmark-facing activation path. The production path passes queryTiming via the
+    // INIT payload (read from localStorage) so the flag is set before any queries run.
+    public enableQueryTiming(): void {
+        queryTimingEnabled = true;
+    }
+
+    // Benchmark only — clears the in-memory annotation cache so each timed iteration
+    // hits DuckDB rather than returning a cached result from a prior warmup pass.
+    public clearAnnotationCache(sourceName: string): void {
+        this.dataSourceToAnnotationsMap.delete(sourceName);
+    }
+
+    public clearTimings(): void {
+        accumulatedTimings.clear();
+    }
+
+    /** Sum of all accumulated DuckDB query times across all labels since last clearTimings(). */
+    public sumTimings(): number {
+        let total = 0;
+        accumulatedTimings.forEach((values) => {
+            total += values.reduce((a, b) => a + b, 0);
+        });
+        return total;
+    }
+
     // public wrapper so that the worker can access the function
     public async deleteDataSourceWrapper(dataSource: string): Promise<void> {
         this.deleteDataSource(dataSource);
diff --git a/packages/web/src/services/DatabaseServiceWeb/index.ts b/packages/web/src/services/DatabaseServiceWeb/index.ts
index 0411a33f4..808e68418 100644
--- a/packages/web/src/services/DatabaseServiceWeb/index.ts
+++ b/packages/web/src/services/DatabaseServiceWeb/index.ts
@@ -42,8 +42,14 @@ export default class DatabaseServiceWeb extends DatabaseService {
     }
 
     public async initialize() {
-        this.worker.postMessage({ type: WorkerMsgType.INIT });
+        this.worker.postMessage({
+            type: WorkerMsgType.INIT,
+            payload: { queryTiming: localStorage.getItem("bff_query_timing") === "1" },
+        });
         await this.dbInitialized;
+        if (localStorage.getItem("bff_query_timing") === "1") {
+            console.log("[bff] Query timing enabled. Query times will appear in the console.");
+        }
     }
 
     public async saveQuery(
diff --git a/packages/web/src/services/DatabaseServiceWeb/types.ts b/packages/web/src/services/DatabaseServiceWeb/types.ts
index 5e44cd8d0..598380668 100644
--- a/packages/web/src/services/DatabaseServiceWeb/types.ts
+++ b/packages/web/src/services/DatabaseServiceWeb/types.ts
@@ -45,7 +45,7 @@ type WorkerMsgBase<T extends WorkerMsgType | WorkerResType, P> = {
 };
 
 export type WorkerReqPayload<T extends WorkerMsgType> = {
-    [WorkerMsgType.INIT]: void;
+    [WorkerMsgType.INIT]: { queryTiming?: boolean } | undefined;
     [WorkerMsgType.CANCEL]: {
         connectionId: number;
     };