diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 000000000..4a3d8e957 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,113 @@ +name: Query Benchmark + +on: + workflow_dispatch: + inputs: + base_branch: + description: "Base branch to compare against" + required: false + type: string + default: "main" + compare_branch: + description: "Branch to benchmark" + required: true + type: string + iterations: + description: "Timed iterations per task (default 5)" + required: false + type: string + default: "5" + warmup: + description: "Warmup rounds before timing (default 1)" + required: false + type: string + default: "1" + +permissions: + contents: read + +jobs: + benchmark: + name: "Regression (${{ github.event.inputs.base_branch }} vs ${{ github.event.inputs.compare_branch }})" + runs-on: ubuntu-latest + # Both branches run sequentially in a single job on the same VM. This is intentional: + # if each branch ran in its own job, GitHub could schedule them on different physical + # machines with different CPU speeds, cache sizes, or competing workloads. A ~15% + # hardware variance between VMs would mask the small regressions we actually care about. + # Running back-to-back on the same VM ensures both measurements share the same hardware + # baseline, so deltas reflect code differences only. + # 180 minutes: fixture download + full task suite (including change_grouping on 10m) × 2 branches + timeout-minutes: 180 + + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.compare_branch }} + + # Fixtures cached by version; downloaded once, reused by both branch runs. + # Must come after checkout so git clean -ffdx does not wipe them. + - name: Cache benchmark fixtures + id: fixture-cache + uses: actions/cache@v4 + with: + path: packages/web/fixtures + key: benchmark-fixtures-v1 + + - name: Download benchmark fixtures + if: steps.fixture-cache.outputs.cache-hit != 'true' + run: | + mkdir -p packages/web/fixtures + BASE=https://staging-biofile-finder-datasets.s3.us-west-2.amazonaws.com/benchmark-fixtures/v1 + curl -fL "$BASE/synthetic-100k.parquet" -o packages/web/fixtures/synthetic-100k.parquet + curl -fL "$BASE/synthetic-1m.parquet" -o packages/web/fixtures/synthetic-1m.parquet + curl -fL "$BASE/synthetic-10m.parquet" -o packages/web/fixtures/synthetic-10m.parquet + + - uses: actions/setup-node@v4 + with: + node-version: "20" + cache: "npm" + + - name: Install dependencies + run: npm ci + + - name: Install Playwright Chromium + run: npx playwright install chromium --with-deps + working-directory: packages/web + + - name: Run benchmark (${{ github.event.inputs.compare_branch }}) + run: node scripts/run-regression.js --iterations ${{ github.event.inputs.iterations }} --warmup ${{ github.event.inputs.warmup }} + working-directory: packages/web + env: + BENCHMARK_BRANCH: ${{ github.event.inputs.compare_branch }} + + - name: Save compare branch results + run: mv packages/web/benchmark-results-*.json /tmp/benchmark-compare.json + + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.base_branch }} + clean: false + + - name: Install dependencies (base branch) + run: npm ci + + - name: Run benchmark (${{ github.event.inputs.base_branch }}) + run: node scripts/run-regression.js --skip-build --iterations ${{ github.event.inputs.iterations }} --warmup ${{ github.event.inputs.warmup }} + working-directory: packages/web + env: + BENCHMARK_BRANCH: ${{ github.event.inputs.base_branch }} + + - name: Generate comparison + run: | + BASE_FILE=$(ls packages/web/benchmark-results-*.json | head -1) + node packages/web/scripts/compare-results.js "$BASE_FILE" /tmp/benchmark-compare.json >> "$GITHUB_STEP_SUMMARY" + + - name: Upload results + if: always() + uses: actions/upload-artifact@v4 + with: + name: benchmark-results + path: | + packages/web/benchmark-results-*.json + /tmp/benchmark-compare.json + retention-days: 7 diff --git a/.gitignore b/.gitignore index b718d871a..86e656de9 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,8 @@ build *.tgz .env mise.toml + +# Benchmark runner output — generated by CI, not source +packages/web/benchmark-results*.json +# Generated parquet fixtures — produce with scripts/generate-fixtures.py +packages/web/fixtures/ diff --git a/dev-docs/07-query-benchmarking.md b/dev-docs/07-query-benchmarking.md new file mode 100644 index 000000000..66c79d513 --- /dev/null +++ b/dev-docs/07-query-benchmarking.md @@ -0,0 +1,118 @@ +Query benchmarking +================== + +Three tools for measuring and monitoring DuckDB-WASM query performance. + +--- + +Tool 1 — Local benchmark runner +-------------------------------- + +Runs the full task suite in headless Chromium against parquet fixtures, prints a p50/p95 timing table, and writes a result JSON for later comparison. + +**First-time setup** + +```bash +cd packages/web +npx playwright install chromium --with-deps +``` + +**Download local fixtures** (one time; ~500 MB total) + +```bash +BASE=https://staging-biofile-finder-datasets.s3.us-west-2.amazonaws.com/benchmark-fixtures/v1 +mkdir -p packages/web/fixtures +curl -fL "$BASE/synthetic-100k.parquet" -o packages/web/fixtures/synthetic-100k.parquet +curl -fL "$BASE/synthetic-1m.parquet" -o packages/web/fixtures/synthetic-1m.parquet +curl -fL "$BASE/synthetic-10m.parquet" -o packages/web/fixtures/synthetic-10m.parquet +``` + +**Run against local fixtures** + +```bash +# All scales +npm run benchmark --prefix packages/web -- --local + +# Single scale +npm run benchmark --prefix packages/web -- --local --scale 100k + +# Override iteration/warmup counts +npm run benchmark --prefix packages/web -- --local --scale 1m --iterations 10 --warmup 3 +``` + +**Run against remote S3 parquets** + +```bash +BENCHMARK_REAL_1M_URL=s3://your-bucket/file.parquet \ + npm run benchmark --prefix packages/web -- --scale 1m +``` + +**Compare two result files** + +```bash +npm run benchmark:compare --prefix packages/web -- \ + packages/web/benchmark-results-main.json \ + packages/web/benchmark-results-local.json +``` + +This prints a Markdown table with p50 deltas and regression/improvement badges (⚠️ ≥25% slower, ❌ ≥50% slower, ✅ ≥25% faster). Badges are suppressed for queries where either branch is under 500ms — percentage deltas on fast queries are noise. + +**Flags** + +| Flag | Description | +|---|---| +| `--local` | Use fixtures from `packages/web/fixtures/` instead of S3 URLs | +| `--scale 100k\|1m\|10m` | Run a single fixture size | +| `--full` | Run all scales with both cloud and local sources side-by-side | +| `--iterations N` | Timed iterations per task (default 5) | +| `--warmup N` | Warmup rounds before timing (default 1) | +| `--skip-build` | Skip the webpack build step | +| `--chromium` | Use Playwright's bundled Chromium instead of system Chrome | + +--- + +Tool 2 — CI regression workflow +--------------------------------- + +`benchmark.yml` is a `workflow_dispatch` workflow that benchmarks two branches sequentially on the same VM and posts a Markdown comparison table to the workflow summary. + +Both branches run on the same machine to eliminate hardware variance — a ~15% CPU speed difference between VMs would mask the small regressions the tool is designed to catch. + +**Trigger it** from the Actions tab: select **Query Benchmark**, enter a `compare_branch` (your PR branch) and optionally override `base_branch` (default: `main`), `iterations`, and `warmup`. + +The workflow: +1. Checks out the compare branch and downloads fixtures from S3 (cached by version) +2. Runs `run-regression.js` → writes `benchmark-results-.json` +3. Checks out the base branch (without wiping fixtures) +4. Runs `run-regression.js` → writes `benchmark-results-.json` +5. Runs `compare-results.js` → posts the Markdown table to the step summary + +--- + +Tool 3 — Dev console query timing +----------------------------------- + +Enables per-query DuckDB timing in the running app without any build step. + +**Enable** + +In the browser DevTools console: + +```js +localStorage.setItem("bff_query_timing", "1") +``` + +Then reload the page. Each DuckDB query will log its elapsed time to the console as it runs: + +``` +[duckdb] 12.3ms — [fetchAnnotations] SELECT DISTINCT ... +[duckdb] 4.1ms — [getFiles] SELECT * FROM ... +``` + +**Disable** + +```js +localStorage.removeItem("bff_query_timing") +``` + +Then reload. diff --git a/package-lock.json b/package-lock.json index 1494be715..ff3c50ca1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16140,6 +16140,53 @@ "node": ">=4" } }, + "node_modules/playwright": { + "version": "1.59.1", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.59.1.tgz", + "integrity": "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.59.1" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.59.1", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.59.1.tgz", + "integrity": "sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/playwright/node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, "node_modules/please-upgrade-node": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/please-upgrade-node/-/please-upgrade-node-3.2.0.tgz", @@ -21516,6 +21563,7 @@ "fork-ts-checker-webpack-plugin": "6.x", "html-webpack-plugin": "5.x", "mini-css-extract-plugin": "2.x", + "playwright": "^1.59.1", "postcss-loader": "6.x", "postcss-preset-env": "7.x", "react-svg-loader": "^3.0.3", diff --git a/packages/web/benchmark/index.html b/packages/web/benchmark/index.html new file mode 100644 index 000000000..e56cde507 --- /dev/null +++ b/packages/web/benchmark/index.html @@ -0,0 +1,10 @@ + + + + + BFF Benchmark + + +

Starting...

+ + diff --git a/packages/web/benchmark/src/index.ts b/packages/web/benchmark/src/index.ts new file mode 100644 index 000000000..baba1d835 --- /dev/null +++ b/packages/web/benchmark/src/index.ts @@ -0,0 +1,191 @@ +import { BENCHMARK_TASKS, createServices } from "./tasks"; +import { BenchmarkConfig, BenchmarkResults, QueryResult, SourceResult } from "./types"; +import DatabaseServiceWebWorker from "../../src/services/DatabaseServiceWeb/duckdb-worker.worker"; + +const DEFAULT_ITERATIONS = 5; +const DEFAULT_WARMUP_ROUNDS = 1; + +// Updates the #status element in the benchmark HTML page and mirrors to console. +// The page can run headlessly in CI (Playwright), so the console log is the +// only visible progress signal when there is no browser UI to observe. +function setStatus(msg: string) { + const el = document.getElementById("status"); + if (el) el.textContent = msg; + console.log("[benchmark]", msg); +} + +// Nearest-rank percentile over a pre-sorted array. Used to report p50 and p95 +// across timed iterations — p95 surfaces occasional slow outliers (GC pauses, +// DuckDB cache misses) that the median would hide. +function percentile(sorted: number[], p: number): number { + const idx = Math.ceil((p / 100) * sorted.length) - 1; + return sorted[Math.max(0, idx)]; +} + +// Fisher-Yates shuffle — randomizes task order each timed iteration so that a +// consistently slow task doesn't inflate the times of everything that follows it +// (DuckDB buffer pool and OS page cache warm up over repeated runs). +function shuffle(arr: T[]): T[] { + const out = [...arr]; + for (let i = out.length - 1; i > 0; i--) { + const j = Math.floor(Math.random() * (i + 1)); + [out[i], out[j]] = [out[j], out[i]]; + } + return out; +} + +/** + * Run the full task suite against a registered source using round-robin timing: + * warmup rounds first, then timed rounds with shuffled task order. + * + * Tasks are called at the service layer (fetchValues, getFiles, etc.) — the same + * methods the app calls in response to user interactions. + * + * Timing strategy per task (see BenchmarkTask.timing): + * "worker" (default): sums DuckDB-internal query times, excluding Arrow→JS conversion + * and JSON serialization. Accurate for single-query tasks and tasks with large result sets. + * "wall-clock": measures elapsed time at the task level. Used for compound tasks that fire + * parallel queries — worker timings for those give O(N²) due to cumulative wait time. + * + * Returns p50/p95/p99 across timed iterations for each task. + */ +async function benchmarkSource( + service: DatabaseServiceWebWorker, + sourceName: string, + iterations: number, + warmupRounds: number +): Promise { + const { annotationSvc, fileSvc } = createServices(service, sourceName); + + service.enableQueryTiming(); + + // Warmup ensures DuckDB's buffer pool, query planner, and V8 JIT are in a + // stable state before timing begins. Without it, the first few iterations + // of every task reflect cold-start overhead rather than steady-state cost. + setStatus(`Warming up ${sourceName} (${warmupRounds} rounds)...`); + for (let w = 0; w < warmupRounds; w++) { + for (const task of BENCHMARK_TASKS) { + service.clearTimings(); + await task.run(annotationSvc, fileSvc); + } + } + + const timingsMap = new Map(BENCHMARK_TASKS.map(({ name }) => [name, []])); + + for (let i = 0; i < iterations; i++) { + setStatus(`Timing ${sourceName} — iteration ${i + 1}/${iterations}...`); + for (const task of shuffle(BENCHMARK_TASKS)) { + if (task.resetAnnotationCache) { + service.clearAnnotationCache(sourceName); + } + const timings = timingsMap.get(task.name) ?? []; + timingsMap.set(task.name, timings); + if (task.timing === "wall-clock") { + const start = performance.now(); + await task.run(annotationSvc, fileSvc); + timings.push(performance.now() - start); + } else { + service.clearTimings(); + await task.run(annotationSvc, fileSvc); + timings.push(service.sumTimings()); + } + } + } + + return BENCHMARK_TASKS.map(({ name }) => { + const timings = [...(timingsMap.get(name) ?? [])].sort((a, b) => a - b); + return { + name, + timings, + p50: percentile(timings, 50), + p95: percentile(timings, 95), + p99: percentile(timings, 99), + }; + }); +} + +async function main() { + const config: BenchmarkConfig = (window as any).__benchmarkConfig; + if (!config?.sources?.length) { + throw new Error("No benchmark config found. Runner must inject window.__benchmarkConfig."); + } + const iterations = config.iterations ?? DEFAULT_ITERATIONS; + const warmupRounds = config.warmupRounds ?? DEFAULT_WARMUP_ROUNDS; + + setStatus("Initializing DuckDB-WASM..."); + const initStart = performance.now(); + const service = new DatabaseServiceWebWorker(); + await service.initialize(); + const initTimeMs = performance.now() - initStart; + setStatus(`DuckDB initialized in ${initTimeMs.toFixed(0)}ms.`); + + // DuckDB reads parquet differently depending on how the file is registered: + // BROWSER_FILEREADER (local File object) skips all HTTP overhead; URL registration + // uses HTTP range requests, which adds per-request I/O latency and makes sort-heavy + // queries appear slower. Both paths must be consistent across compared runs or the + // delta reflects I/O differences, not code differences. + // + // Playwright injects File objects via setInputFiles and resolves __resolveLocalFiles + // directly. The 5-second timeout is a fallback for running the page manually outside + // of Playwright — in CI this promise is always resolved before the timeout fires. + const localFiles: Record = await new Promise>((resolve) => { + (window as any).__resolveLocalFiles = resolve; + (window as any).__localFilesRequested = true; + setTimeout(() => resolve({}), 5000); + }); + + // Absorb DuckDB's one-time parquet cold-start cost (scanner JIT, VFS setup, + // buffer pool init) before timing any real source registrations. Without this, + // the first source always shows inflated registration time regardless of file size. + if (config.sources.length > 0) { + const warmup = config.sources[0]; + const warmupFile = localFiles[warmup.label]; + await service.prepareDataSources( + [{ name: "__bff_warmup__", type: "parquet", uri: warmupFile ?? warmup.url }], + /* skipNormalization */ true + ); + await service.execute('DROP VIEW IF EXISTS "__bff_warmup__"'); + } + + const sources: SourceResult[] = []; + + for (const source of config.sources) { + setStatus(`Registering ${source.label} (${source.url})...`); + + const regStart = performance.now(); + const localFile = localFiles[source.label]; + if (!localFile) { + console.warn( + `[benchmark] No local file for ${source.label} — falling back to HTTP reads; timings will differ from local-file runs` + ); + } + await service.prepareDataSources( + [{ name: source.label, type: "parquet", uri: localFile ?? source.url }], + /* skipNormalization */ true + ); + const registrationMs = performance.now() - regStart; + + const queries = await benchmarkSource(service, source.label, iterations, warmupRounds); + sources.push({ label: source.label, registrationMs, queries }); + + await service.execute(`DROP VIEW IF EXISTS "${source.label}"`); + } + + setStatus("Done."); + + const results: BenchmarkResults = { + timestamp: new Date().toISOString(), + commit: "unknown", + branch: "unknown", + initTimeMs, + sources, + }; + + (window as any).__benchmarkResults = results; +} + +main().catch((err: Error) => { + console.error("[benchmark] Fatal error:", err); + setStatus(`Error: ${err.message}`); + (window as any).__benchmarkError = err.message; +}); diff --git a/packages/web/benchmark/src/tasks.ts b/packages/web/benchmark/src/tasks.ts new file mode 100644 index 000000000..53c8eb96c --- /dev/null +++ b/packages/web/benchmark/src/tasks.ts @@ -0,0 +1,178 @@ +/** + * Benchmark task definitions for BFF's DuckDB-WASM queries. + * + * Design rationale + * ---------------- + * Tasks are defined at the **service layer** rather than as raw SQL strings. Calling through + * DatabaseAnnotationService / DatabaseFileService exercises the full production code path: + * SQL construction, DuckDB query execution, and result mapping. + * + * Tasks are deliberately **granular** (one service call each) so regressions can be pinpointed + * to a specific query type. A high-level user action (e.g. "add a data source and see the + * first files") is a composition of several tasks here; the individual timings make it easier + * to identify which step regressed. + * + * Maintenance + * ----------- + * Keep BENCHMARK_TASKS in sync with user-facing operations. When a new query type is added + * to a service, add a corresponding task. When a query is removed or restructured, update + * or drop the task so the list stays an accurate reflection of what users wait on. + * + * Fixture columns referenced by name (e.g. "cell_line", "focus_score") must exist in the + * generated fixture — see packages/web/benchmark/src/fixture-generator.ts. + */ +import DatabaseAnnotationService from "../../../core/services/AnnotationService/DatabaseAnnotationService"; +import DatabaseFileService from "../../../core/services/FileService/DatabaseFileService"; +import FileDownloadServiceNoop from "../../../core/services/FileDownloadService/FileDownloadServiceNoop"; +import DatabaseServiceWebWorker from "../../src/services/DatabaseServiceWeb/duckdb-worker.worker"; +import FileSet from "../../../core/entity/FileSet"; +import FileFilter, { FilterType } from "../../../core/entity/FileFilter"; +import { AnnotationType } from "../../../core/entity/AnnotationFormatter"; +import ExcludeFilter from "../../../core/entity/FileFilter/ExcludeFilter"; +import FileSort, { SortOrder } from "../../../core/entity/FileSort"; + +export interface BenchmarkTask { + name: string; + /** Timing strategy — see benchmarkSource in index.ts for details. Default: "worker". */ + timing?: "worker" | "wall-clock"; + /** + * If true, the annotation cache is cleared before each timed iteration so the task + * always issues a real DuckDB query. Without this, warmup populates the cache and + * timed iterations return immediately, reporting 0ms. + */ + resetAnnotationCache?: boolean; + run: ( + annotationSvc: DatabaseAnnotationService, + fileSvc: DatabaseFileService + ) => Promise; +} + +export const BENCHMARK_TASKS: BenchmarkTask[] = [ + // App startup: loads the column list to populate the annotation picker. + { + name: "fetch_annotations", + resetAnnotationCache: true, + run: (a) => a.fetchAnnotations(), + }, + + // Opening a filter picker — three cardinality tiers because query time varies + // significantly depending on how many distinct values DuckDB must collect. + // cell_line: 5 distinct values (low) + // experiment_id: 100 distinct values (medium) + // focus_score: ~unique per row (high — near-continuous float) + { + name: "open_filter_picker_low_cardinality", + run: (a) => a.fetchValues("cell_line"), + }, + { + name: "open_filter_picker_medium_cardinality", + run: (a) => a.fetchValues("experiment_id"), + }, + { + name: "open_filter_picker_high_cardinality", + run: (a) => a.fetchValues("focus_score"), + }, + + // File list: default view, no filter or sort. + { + name: "browse_file_list", + run: (_, f) => f.getFiles({ fileSet: new FileSet(), from: 0, limit: 100 }), + }, + + // File list sorted by File Size with two limit sizes. Emulates different zoom levels. + { + name: "sort_file_list", + run: (_, f) => + f.getFiles({ + fileSet: new FileSet({ sort: new FileSort("File Size", SortOrder.DESC) }), + from: 0, + limit: 50, + }), + }, + { + name: "sort_file_list_large_page", + run: (_, f) => + f.getFiles({ + fileSet: new FileSet({ sort: new FileSort("File Size", SortOrder.DESC) }), + from: 0, + limit: 100, + }), + }, + + // Applying a filter: count then browse (fires together when user selects a value). + { + name: "filter_count", + run: (_, f) => + f.getCountOfMatchingFiles(new FileSet({ filters: [new FileFilter("cell_line", 3)] })), + }, + { + name: "filter_browse", + run: (_, f) => + f.getFiles({ + fileSet: new FileSet({ filters: [new FileFilter("cell_line", 3)] }), + from: 0, + limit: 100, + }), + }, + + // Directory tree: count per folder when "show null groups" is enabled. + // Fires once per visible folder node. + { + name: "null_group_count", + run: (_, f) => + f.getCountOfMatchingFiles(new FileSet({ filters: [new ExcludeFilter("cell_line")] })), + }, + + // Changing the grouping annotation — fires parallel IS NOT NULL queries, one per schema + // column. Uses wall-clock timing because the queries run in parallel (see benchmarkSource). + { + name: "change_grouping", + timing: "wall-clock", + run: (a) => a.fetchAvailableAnnotationsForHierarchy(["cell_line"]), + }, + + // Expanding a folder in the directory tree: load second-level values under a + // specific parent value (cell_line=3 → plate_id values). + { + name: "expand_folder", + run: (a) => a.fetchHierarchyValuesUnderPath(["cell_line", "plate_id"], ["3"], []), + }, + + // Date range filter covering ~half the fixture rows (acquisition_date spans 2024-01-01 + // to 2024-12-31). Exercises DuckDB's date predicate pushdown against the parquet row groups. + { + name: "filter_date_range", + run: (_, f) => + f.getFiles({ + fileSet: new FileSet({ + filters: [ + new FileFilter( + "acquisition_date", + "RANGE(2024-01-01,2024-06-30)", + FilterType.DEFAULT, + AnnotationType.DATE + ), + ], + }), + from: 0, + limit: 100, + }), + }, +]; + +/** Create service instances wrapping the given worker for one data source. */ +export function createServices( + db: DatabaseServiceWebWorker, + sourceName: string +): { annotationSvc: DatabaseAnnotationService; fileSvc: DatabaseFileService } { + const annotationSvc = new DatabaseAnnotationService({ + databaseService: db, + dataSourceNames: [sourceName], + }); + const fileSvc = new DatabaseFileService({ + databaseService: db, + dataSourceNames: [sourceName], + downloadService: new FileDownloadServiceNoop(), + }); + return { annotationSvc, fileSvc }; +} diff --git a/packages/web/benchmark/src/types.ts b/packages/web/benchmark/src/types.ts new file mode 100644 index 000000000..1df530db1 --- /dev/null +++ b/packages/web/benchmark/src/types.ts @@ -0,0 +1,38 @@ +/** + * A parquet data source to benchmark against. Injected into the browser by the + * Playwright runner — the benchmark engine itself has no knowledge of where + * these come from (real S3, synthetic S3, local server, etc.). + */ +export interface ParquetSource { + url: string; + label: string; +} + +/** Injected as window.__benchmarkConfig before the page loads. */ +export interface BenchmarkConfig { + sources: ParquetSource[]; + iterations?: number; + warmupRounds?: number; +} + +export interface QueryResult { + name: string; + timings: number[]; // ms per iteration, sorted ascending + p50: number; + p95: number; + p99: number; +} + +export interface SourceResult { + label: string; + registrationMs: number; + queries: QueryResult[]; +} + +export interface BenchmarkResults { + timestamp: string; + commit: string; + branch: string; + initTimeMs: number; + sources: SourceResult[]; +} diff --git a/packages/web/benchmark/webpack.config.js b/packages/web/benchmark/webpack.config.js new file mode 100644 index 000000000..2e5fa8f70 --- /dev/null +++ b/packages/web/benchmark/webpack.config.js @@ -0,0 +1,48 @@ +const path = require("path"); + +const HtmlWebpackPlugin = require("html-webpack-plugin"); + +module.exports = { + context: __dirname, + entry: "./src/index.ts", + mode: "development", + devtool: "source-map", + module: { + rules: [ + { + test: /\.(t|j)sx?$/, + exclude: /node_modules/, + use: { + loader: "babel-loader", + options: { + // Reuse the root babel config; override preset-env for browser output + extends: path.resolve(__dirname, "..", "..", "..", "babel.config.json"), + presets: ["@babel/preset-env"], + }, + }, + }, + ], + }, + output: { + path: path.resolve(__dirname, "dist"), + filename: "benchmark.js", + clean: true, + }, + plugins: [ + new HtmlWebpackPlugin({ + template: path.resolve(__dirname, "index.html"), + }), + ], + resolve: { + extensions: [".ts", ".tsx", ".js", ".jsx", ".json"], + // Prefer ESM builds (needed for @duckdb/duckdb-wasm tree-shaking) + mainFields: ["module", "main"], + fallback: { + // Node built-ins not available in browser — explicitly disable + path: false, + fs: false, + crypto: false, + buffer: false, + }, + }, +}; diff --git a/packages/web/package.json b/packages/web/package.json index 3f22217d1..a170f3b24 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -1,9 +1,12 @@ { - "name": "biofile-finder", "description": "A web application for organizing that data, and provide simple hooks for incorporating that data into both programmatic and non-programmatic workflows", "main": "src/index.tsx", "scripts": { + "benchmark": "node scripts/run-local.js", + "benchmark:regression": "node scripts/run-regression.js", + "benchmark:compare": "node scripts/compare-results.js", + "benchmark:summary": "node scripts/summarize-results.js", "build": "webpack --config ./webpack/webpack.config.js --env production", "clean": "git clean -Xfd -e \"!node_modules\"", "packageForPublish": "node ./scripts/package-for-publish.js", @@ -23,6 +26,7 @@ "fork-ts-checker-webpack-plugin": "6.x", "html-webpack-plugin": "5.x", "mini-css-extract-plugin": "2.x", + "playwright": "^1.59.1", "postcss-loader": "6.x", "postcss-preset-env": "7.x", "react-svg-loader": "^3.0.3", diff --git a/packages/web/scripts/compare-results.js b/packages/web/scripts/compare-results.js new file mode 100644 index 000000000..dff4766ed --- /dev/null +++ b/packages/web/scripts/compare-results.js @@ -0,0 +1,200 @@ +// Compares two benchmark result JSON files and outputs a Markdown table to stdout. +// Used by benchmark.yml for the GitHub Step Summary; also runnable manually. + +"use strict"; + +const fs = require("fs"); + +const REGRESSION_WARN_PCT = 25; // ≥25% slower → ⚠️ +const REGRESSION_SEVERE_PCT = 50; // ≥50% slower → ❌ +const IMPROVEMENT_PCT = 25; // ≥25% faster → ✅ +// Badges are suppressed when either branch is below this threshold — percentage +// deltas on fast queries are dominated by noise rather than real regressions. +const BADGE_MIN_MS = 500; + +function fmt(ms) { + if (ms === undefined || ms === null) return "—"; + return ms < 10 ? `${ms.toFixed(2)}ms` : `${ms.toFixed(1)}ms`; +} + +function pctDelta(base, pr) { + if (!base) return null; + return ((pr - base) / base) * 100; +} + +function deltaBadge(base, pr) { + const delta = pctDelta(base, pr); + if (delta === null) return "N/A"; + const sign = delta >= 0 ? "+" : ""; + const label = `${sign}${delta.toFixed(1)}%`; + if (base < BADGE_MIN_MS || pr < BADGE_MIN_MS) return label; + if (delta >= REGRESSION_SEVERE_PCT) return `${label} ❌`; + if (delta >= REGRESSION_WARN_PCT) return `${label} ⚠️`; + if (delta <= -IMPROVEMENT_PCT) return `${label} ✅`; + return label; +} + +const [, , baseFile, prFile] = process.argv; + +if (!baseFile || !prFile) { + console.error("Usage: node compare-results.js "); + process.exit(1); +} + +const base = JSON.parse(fs.readFileSync(baseFile, "utf8")); +const pr = JSON.parse(fs.readFileSync(prFile, "utf8")); + +const baseSources = new Map(base.sources.map((s) => [s.label, s])); +const prSources = new Map(pr.sources.map((s) => [s.label, s])); + +// PR result order is authoritative; base may have fewer sources. +const allLabels = [ + ...new Set([...pr.sources.map((s) => s.label), ...base.sources.map((s) => s.label)]), +]; + +const allQueryNames = [ + ...new Set([ + ...pr.sources.flatMap((s) => s.queries.map((q) => q.name)), + ...base.sources.flatMap((s) => s.queries.map((q) => q.name)), + ]), +]; + +const allDeltas = []; + +for (const qName of allQueryNames) { + for (const label of allLabels) { + const baseQ = baseSources.get(label)?.queries.find((q) => q.name === qName); + const prQ = prSources.get(label)?.queries.find((q) => q.name === qName); + if (baseQ && prQ) { + allDeltas.push({ + label: `\`${qName}\` @ ${label}`, + delta: pctDelta(baseQ.p50, prQ.p50), + baseP50: baseQ.p50, + prP50: prQ.p50, + }); + } + } +} + +const regressions = allDeltas + .filter((d) => d.delta !== null && d.delta >= REGRESSION_WARN_PCT) + .sort((a, b) => b.delta - a.delta); + +const improvements = allDeltas + .filter((d) => d.delta !== null && d.delta <= -IMPROVEMENT_PCT) + .sort((a, b) => a.delta - b.delta); + +const lines = []; + +lines.push("## BFF Query Benchmark Results"); +lines.push(""); +lines.push(`| | \`${base.branch}\` | \`${pr.branch}\` | Delta |`); +lines.push("|-|-|-|-|"); + +lines.push( + `| **DuckDB init** | ${fmt(base.initTimeMs)} | ${fmt(pr.initTimeMs)} | ${deltaBadge( + base.initTimeMs, + pr.initTimeMs + )} |` +); + +lines.push("| | | | |"); +lines.push("| **Registration (parquet → view)** | | | |"); + +for (const label of allLabels) { + const baseReg = baseSources.get(label)?.registrationMs ?? null; + const prReg = prSources.get(label)?.registrationMs ?? null; + lines.push( + `| \`${label}\`` + + ` | ${fmt(baseReg)}` + + ` | ${fmt(prReg)}` + + ` | ${baseReg !== null && prReg !== null ? deltaBadge(baseReg, prReg) : "—"} |` + ); +} + +lines.push("| | | | |"); +lines.push("| **Query timings — p50** | | | |"); + +for (const label of allLabels) { + lines.push(`| _${label}_ | | | |`); + for (const qName of allQueryNames) { + const baseQ = baseSources.get(label)?.queries.find((q) => q.name === qName); + const prQ = prSources.get(label)?.queries.find((q) => q.name === qName); + lines.push( + `| \`${qName}\`` + + ` | ${fmt(baseQ?.p50)}` + + ` | ${fmt(prQ?.p50)}` + + ` | ${baseQ && prQ ? deltaBadge(baseQ.p50, prQ.p50) : "—"} |` + ); + } +} + +lines.push(""); +lines.push("
p95 timings\n"); +lines.push(`| | \`${base.branch}\` | \`${pr.branch}\` | Delta |`); +lines.push("|-|-|-|-|"); + +for (const label of allLabels) { + lines.push(`| _${label}_ | | | |`); + for (const qName of allQueryNames) { + const baseQ = baseSources.get(label)?.queries.find((q) => q.name === qName); + const prQ = prSources.get(label)?.queries.find((q) => q.name === qName); + lines.push( + `| \`${qName}\`` + + ` | ${fmt(baseQ?.p95)}` + + ` | ${fmt(prQ?.p95)}` + + ` | ${baseQ && prQ ? deltaBadge(baseQ.p95, prQ.p95) : "—"} |` + ); + } +} + +lines.push("\n
"); + +lines.push(""); +lines.push("### Summary"); +lines.push(""); + +if (regressions.length === 0 && improvements.length === 0) { + lines.push("No significant changes detected."); +} else { + if (regressions.length > 0) { + lines.push( + `**${regressions.length} regression${ + regressions.length > 1 ? "s" : "" + }** (≥${REGRESSION_WARN_PCT}% slower):` + ); + lines.push(""); + for (const r of regressions) { + const badge = r.delta >= REGRESSION_SEVERE_PCT ? "❌" : "⚠️"; + lines.push( + `- ${badge} ${r.label}: ${fmt(r.baseP50)} → ${fmt(r.prP50)} (+${r.delta.toFixed( + 1 + )}%)` + ); + } + lines.push(""); + } + if (improvements.length > 0) { + lines.push( + `**${improvements.length} improvement${ + improvements.length > 1 ? "s" : "" + }** (≥${IMPROVEMENT_PCT}% faster):` + ); + lines.push(""); + for (const i of improvements) { + lines.push( + `- ✅ ${i.label}: ${fmt(i.baseP50)} → ${fmt(i.prP50)} (${i.delta.toFixed(1)}%)` + ); + } + lines.push(""); + } +} + +const iters = pr.sources[0]?.queries[0]?.timings?.length ?? "?"; +lines.push( + `_Benchmarks run in headless Chromium with DuckDB-WASM. ` + + `${iters} iterations per query. ` + + `Flags: ⚠️ ≥${REGRESSION_WARN_PCT}% slower · ❌ ≥${REGRESSION_SEVERE_PCT}% slower · ✅ ≥${IMPROVEMENT_PCT}% faster_` +); + +console.log(lines.join("\n")); diff --git a/packages/web/scripts/lib/run-benchmark-page.js b/packages/web/scripts/lib/run-benchmark-page.js new file mode 100644 index 000000000..c8ead326d --- /dev/null +++ b/packages/web/scripts/lib/run-benchmark-page.js @@ -0,0 +1,227 @@ +/** + * Shared Playwright runner used by both benchmark tools. + * + * Builds the benchmark bundle (optional), starts the local HTTP server with + * the COOP/COEP headers required for SharedArrayBuffer, injects a + * BenchmarkConfig into the page, and returns the BenchmarkResults. + */ + +"use strict"; + +const { chromium } = require("playwright"); +const http = require("http"); +const fs = require("fs"); +const path = require("path"); +const { execSync } = require("child_process"); + +const DIST_DIR = path.join(__dirname, "..", "..", "benchmark", "dist"); +const FIXTURES_DIR = path.join(__dirname, "..", "..", "fixtures"); +const PORT = 18765; +const TIMEOUT_MS = 90 * 60 * 1000; // 90 min: 10M row source + full task suite + +// Content-type map for benchmark bundle assets served to the Playwright browser. +// WASM must be served as application/wasm or the browser will refuse to compile it. +const MIME = { + ".html": "text/html", + ".js": "application/javascript", + ".js.map": "application/json", + ".wasm": "application/wasm", + ".json": "application/json", + ".parquet": "application/octet-stream", +}; + +function mimeFor(filePath) { + for (const [ext, type] of Object.entries(MIME)) { + if (filePath.endsWith(ext)) return type; + } + return "application/octet-stream"; +} + +function startServer() { + return new Promise((resolve, reject) => { + const server = http.createServer((req, res) => { + const relPath = req.url === "/" ? "/index.html" : req.url.split("?")[0]; + + // Serve fixture files from /fixtures/ — fallback path if file injection fails. + const fixtureMatch = relPath.match(/^\/fixtures\/(.+)$/); + if (fixtureMatch) { + const fixturePath = path.join(FIXTURES_DIR, fixtureMatch[1]); + try { + const stat = fs.statSync(fixturePath); + const range = req.headers["range"]; + if (range) { + const [, start, end] = range.match(/bytes=(\d+)-(\d*)/) || []; + const startByte = parseInt(start, 10); + const endByte = end ? parseInt(end, 10) : stat.size - 1; + const chunkSize = endByte - startByte + 1; + res.writeHead(206, { + "Content-Type": "application/octet-stream", + "Content-Range": `bytes ${startByte}-${endByte}/${stat.size}`, + "Content-Length": chunkSize, + "Accept-Ranges": "bytes", + "Cross-Origin-Opener-Policy": "same-origin", + "Cross-Origin-Embedder-Policy": "credentialless", + }); + fs.createReadStream(fixturePath, { start: startByte, end: endByte }).pipe( + res + ); + } else { + res.writeHead(200, { + "Content-Type": "application/octet-stream", + "Content-Length": stat.size, + "Accept-Ranges": "bytes", + "Cross-Origin-Opener-Policy": "same-origin", + "Cross-Origin-Embedder-Policy": "credentialless", + }); + fs.createReadStream(fixturePath).pipe(res); + } + } catch { + res.writeHead(404); + res.end("Not found: " + relPath); + } + return; + } + + const fullPath = path.join(DIST_DIR, relPath); + let content; + try { + content = fs.readFileSync(fullPath); + } catch { + res.writeHead(404); + res.end("Not found: " + relPath); + return; + } + res.writeHead(200, { + "Content-Type": mimeFor(fullPath), + "Content-Length": content.length, + "Cross-Origin-Opener-Policy": "same-origin", + "Cross-Origin-Embedder-Policy": "credentialless", + }); + res.end(content); + }); + server.on("error", reject); + server.listen(PORT, () => resolve(server)); + }); +} + +function buildBenchmark() { + console.log("[build] Building benchmark bundle..."); + execSync("npx webpack --config benchmark/webpack.config.js", { + cwd: path.join(__dirname, "..", ".."), + stdio: "inherit", + }); + console.log("[build] Done."); +} + +/** + * Run the benchmark page with the given config and return the BenchmarkResults. + * + * @param {object} options + * @param {{ url: string, label: string }[]} options.sources Parquet sources to benchmark. + * @param {boolean} [options.skipBuild=false] Skip webpack build. + * @param {number} [options.iterations] Override timed iteration count. + * @param {number} [options.warmupRounds] Override warmup round count. + * @param {string} [options.channel] Chrome channel to use. Pass "chrome" + * to launch the system Chrome binary + * for JIT-accurate local profiling. + * Omit to use Playwright's bundled + * Chromium (default; required for CI). + * @returns {Promise} Raw BenchmarkResults from the page. + */ +async function runBenchmarkPage({ sources, skipBuild = false, iterations, warmupRounds, channel }) { + if (!skipBuild) buildBenchmark(); + + if (!fs.existsSync(path.join(DIST_DIR, "index.html"))) { + throw new Error( + `Benchmark dist not found at ${DIST_DIR}. Build first or pass skipBuild: true.` + ); + } + + const launchOptions = { headless: true }; + if (channel) launchOptions.channel = channel; + + const server = await startServer(); + const browser = await chromium.launch(launchOptions); + + try { + const context = await browser.newContext(); + const page = await context.newPage(); + + page.on("console", (msg) => { + console.log(`[browser:${msg.type()}]`, msg.text()); + }); + page.on("pageerror", (err) => console.error("[browser:pageerror]", err.message)); + + // Inject config before the page script runs so the benchmark can read it + // synchronously on startup — no callback handshake needed. + await page.addInitScript({ + content: `window.__benchmarkConfig = ${JSON.stringify({ + sources, + iterations, + warmupRounds, + })};`, + }); + + console.log(`[playwright] Starting benchmark (${sources.length} source(s))...`); + await page.goto(`http://localhost:${PORT}/`, { waitUntil: "domcontentloaded" }); + + // Wait for the benchmark to signal it's ready for file injection + await page.waitForFunction(() => window.__localFilesRequested === true, null, { + timeout: 10000, + }); + + // Inject each local fixture as a real File object via setInputFiles. + // The browser reads the file lazily via FileReader (BROWSER_FILEREADER protocol), + // which is identical to how the real app loads files via the file picker — + // no HTTP range-request overhead, so DuckDB sort performance matches real-user timing. + for (const source of sources) { + const localMatch = source.url.match( + new RegExp(`^http://localhost:${PORT}/fixtures/(.+)$`) + ); + if (!localMatch) continue; + const fixturePath = path.join(FIXTURES_DIR, localMatch[1]); + if (!fs.existsSync(fixturePath)) continue; + + console.log(`[playwright] Injecting ${source.label} via setInputFiles...`); + const inputHandle = await page.evaluateHandle(() => { + const inp = document.createElement("input"); + inp.type = "file"; + document.body.appendChild(inp); + return inp; + }); + await inputHandle.setInputFiles(fixturePath); + await page.evaluate((label) => { + const inputs = document.querySelectorAll("input[type=file]"); + const inp = inputs[inputs.length - 1]; + window.__pendingLocalFiles = window.__pendingLocalFiles || {}; + window.__pendingLocalFiles[label] = inp.files[0]; + inp.remove(); + }, source.label); + } + + // Signal the benchmark to proceed with injected File objects + await page.evaluate(() => { + if (window.__resolveLocalFiles) { + window.__resolveLocalFiles(window.__pendingLocalFiles || {}); + } + }); + + await page.waitForFunction( + () => + typeof window.__benchmarkResults !== "undefined" || + typeof window.__benchmarkError !== "undefined", + null, + { timeout: TIMEOUT_MS } + ); + + const error = await page.evaluate(() => window.__benchmarkError ?? null); + if (error) throw new Error(`Benchmark failed in browser: ${error}`); + + return await page.evaluate(() => window.__benchmarkResults); + } finally { + await browser.close(); + await new Promise((res) => server.close(res)); + } +} + +module.exports = { runBenchmarkPage }; diff --git a/packages/web/scripts/run-local.js b/packages/web/scripts/run-local.js new file mode 100644 index 000000000..e2c1b5ffe --- /dev/null +++ b/packages/web/scripts/run-local.js @@ -0,0 +1,150 @@ +// Local benchmark runner for developer machines. Supports cloud (S3/https) and local +// fixtures, single scale or all scales, and side-by-side cloud vs local comparison (--full). + +"use strict"; + +const path = require("path"); +const fs = require("fs"); +const { execSync } = require("child_process"); +const { runBenchmarkPage } = require("./lib/run-benchmark-page"); + +const LOCAL_FIXTURE_MAP = { + "100k": "http://localhost:18765/fixtures/synthetic-100k.parquet", + "1m": "http://localhost:18765/fixtures/synthetic-1m.parquet", + "10m": "http://localhost:18765/fixtures/synthetic-10m.parquet", +}; + +const REMOTE_URL_MAP = { + "100k": + process.env.BENCHMARK_REAL_100K_URL ?? + "https://staging-biofile-finder-datasets.s3.us-west-2.amazonaws.com/benchmark-fixtures/v1/synthetic-100k.parquet", + "1m": + process.env.BENCHMARK_REAL_1M_URL ?? + "https://staging-biofile-finder-datasets.s3.us-west-2.amazonaws.com/benchmark-fixtures/v1/synthetic-1m.parquet", + "10m": + process.env.BENCHMARK_REAL_10M_URL ?? + "https://staging-biofile-finder-datasets.s3.us-west-2.amazonaws.com/benchmark-fixtures/v1/synthetic-10m.parquet", +}; + +const useLocal = process.argv.includes("--local"); +const useFull = process.argv.includes("--full"); + +const scaleArg = (() => { + const idx = process.argv.indexOf("--scale"); + return idx !== -1 ? process.argv[idx + 1] : null; +})(); + +const URL_MAP = useLocal ? LOCAL_FIXTURE_MAP : REMOTE_URL_MAP; + +if (scaleArg && !URL_MAP[scaleArg] && !useFull) { + console.error( + `Error: --scale "${scaleArg}" is not a valid scale. Choose from: ${Object.keys( + URL_MAP + ).join(", ")}` + ); + process.exit(1); +} + +let sources; +if (useFull) { + const missingUrls = Object.entries(REMOTE_URL_MAP) + .filter(([, url]) => !url) + .map(([label]) => ` BENCHMARK_REAL_${label.toUpperCase()}_URL`); + if (missingUrls.length > 0) { + console.error( + `Error: --full requires all three cloud URLs to be set:\n${missingUrls.join("\n")}` + ); + process.exit(1); + } + // Interleave cloud and local sources per scale for easy side-by-side reading + sources = Object.keys(REMOTE_URL_MAP).flatMap((label) => [ + { label: `${label}-cloud`, url: REMOTE_URL_MAP[label] }, + { label: `${label}-local`, url: LOCAL_FIXTURE_MAP[label] }, + ]); +} else { + sources = Object.entries(URL_MAP) + .filter(([label, url]) => Boolean(url) && (!scaleArg || label === scaleArg)) + .map(([label, url]) => ({ label, url })); +} + +if (sources.length === 0) { + console.error( + "No real parquet URLs provided.\n" + + "Set one or more of:\n" + + " BENCHMARK_REAL_100K_URL\n" + + " BENCHMARK_REAL_1M_URL\n" + + " BENCHMARK_REAL_10M_URL\n" + + "Or use --local to serve fixtures from packages/web/fixtures/.\n" + + "Or use --full to run cloud + local for all scales." + ); + process.exit(1); +} + +function getArgValue(flag) { + const idx = process.argv.indexOf(flag); + return idx !== -1 ? parseInt(process.argv[idx + 1], 10) : undefined; +} + +async function main() { + const skipBuild = process.argv.includes("--skip-build"); + const useChromium = process.argv.includes("--chromium"); + const channel = useChromium ? undefined : "chrome"; + const iterations = getArgValue("--iterations"); + const warmup = getArgValue("--warmup"); + + console.log(`[local] Running against ${sources.length} real parquet source(s):`); + for (const { label, url } of sources) { + console.log(` ${label}: ${url}`); + } + if (iterations) console.log(`[local] Iterations: ${iterations}`); + if (warmup !== undefined) console.log(`[local] Warmup rounds: ${warmup}`); + console.log( + `[local] Browser: ${ + channel ? `system Chrome (channel: ${channel})` : "Playwright bundled Chromium" + }` + ); + + const rawResults = await runBenchmarkPage({ + sources, + skipBuild, + iterations, + warmupRounds: warmup, + channel, + }); + + const branch = getBranch(); + const results = { + ...rawResults, + commit: process.env.GITHUB_SHA ?? getCommit(), + branch, + }; + + const outFile = path.join(__dirname, "..", "benchmark-results-local.json"); + fs.writeFileSync(outFile, JSON.stringify(results, null, 2)); + console.log(`\n[local] Results written to ${path.relative(process.cwd(), outFile)}`); + + execSync(`node ${path.join(__dirname, "summarize-results.js")} "${outFile}"`, { + stdio: "inherit", + }); +} + +function getBranch() { + try { + return execSync("git rev-parse --abbrev-ref HEAD", { stdio: "pipe" }).toString().trim(); + } catch { + return "unknown"; + } +} + +function getCommit() { + try { + return execSync("git rev-parse --short HEAD", { stdio: "pipe" }).toString().trim(); + } catch { + return "unknown"; + } +} + +main().catch((err) => { + console.error("[fatal]", err.message); + process.exit(1); +}); diff --git a/packages/web/scripts/run-regression.js b/packages/web/scripts/run-regression.js new file mode 100644 index 000000000..617310ea3 --- /dev/null +++ b/packages/web/scripts/run-regression.js @@ -0,0 +1,83 @@ +// CI regression runner — benchmarks one branch against local fixtures and writes +// benchmark-results-.json. Called once per branch by benchmark.yml. + +"use strict"; + +const path = require("path"); +const fs = require("fs"); +const { execSync } = require("child_process"); +const { runBenchmarkPage } = require("./lib/run-benchmark-page"); + +const FIXTURES_DIR = path.join(__dirname, "..", "fixtures"); +const SCALES = ["100k", "1m", "10m"]; + +const LOCAL_FIXTURE_MAP = { + "100k": "http://localhost:18765/fixtures/synthetic-100k.parquet", + "1m": "http://localhost:18765/fixtures/synthetic-1m.parquet", + "10m": "http://localhost:18765/fixtures/synthetic-10m.parquet", +}; + +const missing = SCALES.filter( + (scale) => !fs.existsSync(path.join(FIXTURES_DIR, `synthetic-${scale}.parquet`)) +); +if (missing.length > 0) { + console.error( + `Missing fixture files: ${missing.map((s) => `synthetic-${s}.parquet`).join(", ")}\n` + + `Download them to ${FIXTURES_DIR} before running this script.` + ); + process.exit(1); +} + +const sources = SCALES.map((scale) => ({ label: scale, url: LOCAL_FIXTURE_MAP[scale] })); + +function getCurrentBranch() { + if (process.env.BENCHMARK_BRANCH) return process.env.BENCHMARK_BRANCH; + try { + return execSync("git rev-parse --abbrev-ref HEAD", { stdio: "pipe" }).toString().trim(); + } catch { + return "unknown"; + } +} + +function slugify(branch) { + return branch.replace(/[^a-zA-Z0-9._-]/g, "-").replace(/-+/g, "-"); +} + +function getArgValue(flag) { + const idx = process.argv.indexOf(flag); + return idx !== -1 ? parseInt(process.argv[idx + 1], 10) : undefined; +} + +async function main() { + const skipBuild = process.argv.includes("--skip-build"); + const iterations = getArgValue("--iterations"); + const warmup = getArgValue("--warmup"); + + console.log(`[regression] Using local fixtures from ${FIXTURES_DIR}`); + if (iterations) console.log(`[regression] Iterations: ${iterations}`); + if (warmup !== undefined) console.log(`[regression] Warmup rounds: ${warmup}`); + + const rawResults = await runBenchmarkPage({ + sources, + skipBuild, + iterations, + warmupRounds: warmup, + }); + + const branch = getCurrentBranch(); + const results = { + ...rawResults, + commit: process.env.GITHUB_SHA ?? "local", + branch, + }; + + const slug = slugify(branch); + const outFile = path.join(__dirname, "..", `benchmark-results-${slug}.json`); + fs.writeFileSync(outFile, JSON.stringify(results, null, 2)); + console.log(`[regression] Results written to ${path.relative(process.cwd(), outFile)}`); +} + +main().catch((err) => { + console.error("[fatal]", err.message); + process.exit(1); +}); diff --git a/packages/web/scripts/summarize-results.js b/packages/web/scripts/summarize-results.js new file mode 100644 index 000000000..4bd55935a --- /dev/null +++ b/packages/web/scripts/summarize-results.js @@ -0,0 +1,96 @@ +// Prints a human-readable p50/p95 table from a benchmark-results.json file. +// Called automatically by run-local.js; also runnable standalone. + +"use strict"; + +const fs = require("fs"); +const path = require("path"); +const { execSync } = require("child_process"); + +function defaultResultsFile() { + if (process.argv[2]) return process.argv[2]; + + const local = path.join(__dirname, "..", "benchmark-results-local.json"); + if (fs.existsSync(local)) return local; + + try { + const branch = + process.env.BENCHMARK_BRANCH || + execSync("git rev-parse --abbrev-ref HEAD", { stdio: "pipe" }).toString().trim(); + const slug = branch.replace(/[^a-zA-Z0-9._-]/g, "-").replace(/-+/g, "-"); + const stamped = path.join(__dirname, "..", `benchmark-results-${slug}.json`); + if (fs.existsSync(stamped)) return stamped; + } catch (e) { + if (e instanceof SomeSpecificError) { + return path.join(__dirname, "..", "benchmark-results.json"); + } else { + throw e; + } + } +} + +const file = defaultResultsFile(); + +if (!fs.existsSync(file)) { + console.error(`No results file found at ${file}`); + process.exit(1); +} + +const data = JSON.parse(fs.readFileSync(file, "utf8")); + +function fmt(ms) { + if (ms === undefined || ms === null) return "—"; + return ms < 10 ? `${ms.toFixed(2)}ms` : `${ms.toFixed(1)}ms`; +} + +function col(str, width) { + return String(str).padEnd(width); +} + +function rcol(str, width) { + return String(str).padStart(width); +} + +const SEP = "─".repeat(82); + +console.log(""); +console.log("BFF Query Benchmark Results"); +console.log(SEP); +console.log(`Branch: ${data.branch}`); +console.log(`Commit: ${data.commit}`); +console.log(`Timestamp: ${data.timestamp}`); +console.log(`DuckDB init: ${fmt(data.initTimeMs)}`); +console.log(""); + +const sourceLabels = data.sources.map((s) => s.label); + +// Header row +const COL_W = 20; +console.log(col(" Query", 26) + sourceLabels.map((l) => rcol(l, COL_W)).join("")); +console.log(" " + "─".repeat(24 + sourceLabels.length * COL_W)); + +// Registration row +const regCells = data.sources.map((s) => rcol(fmt(s.registrationMs), COL_W)); +console.log(" " + col("registration", 24) + regCells.join("")); +console.log(""); + +// Query rows (p50 / p95) +const queryNames = [...new Set(data.sources.flatMap((s) => s.queries.map((q) => q.name)))]; + +for (const name of queryNames) { + const cells = data.sources.map((s) => { + const q = s.queries.find((x) => x.name === name); + if (!q) return rcol("—", COL_W); + return rcol(`${fmt(q.p50)} / ${fmt(q.p95)}`, COL_W); + }); + console.log(" " + col(name, 24) + cells.join("")); +} + +console.log(""); +console.log(SEP); +console.log( + ` ${data.sources.length} source(s) · ${queryNames.length} queries · ` + + `${data.sources[0]?.queries[0]?.timings?.length ?? "?"} iterations each` +); +console.log(" Timings shown as p50 / p95"); +console.log(""); diff --git a/packages/web/src/services/DatabaseServiceWeb/duckdb-worker.worker.ts b/packages/web/src/services/DatabaseServiceWeb/duckdb-worker.worker.ts index bfdb6164a..e5721d5a0 100644 --- a/packages/web/src/services/DatabaseServiceWeb/duckdb-worker.worker.ts +++ b/packages/web/src/services/DatabaseServiceWeb/duckdb-worker.worker.ts @@ -13,6 +13,8 @@ import { initializeDuckDB } from "../../../../core/services/DatabaseService"; declare const self: DedicatedWorkerGlobalScope & typeof globalThis; let databaseService: DatabaseServiceWebWorker | null = null; +let queryTimingEnabled = false; +const accumulatedTimings = new Map(); // Map to track connectionNumber -> connection object const activeConnections = new Map(); @@ -44,7 +46,8 @@ function cancelActiveConnection(connection: duckdb.AsyncDuckDBConnection): void type MessageHandler = (payload: WorkerReqPayload) => Promise; const messageHandler: { [T in WorkerMsgType]: MessageHandler } = { - [WorkerMsgType.INIT]: async () => { + [WorkerMsgType.INIT]: async (payload) => { + queryTimingEnabled = payload?.queryTiming ?? false; if (!databaseService) await initDuckDB(); self.postMessage({ type: WorkerResType.READY }); }, @@ -260,8 +263,26 @@ export default class DatabaseServiceWebWorker extends DatabaseService { type: WorkerResType.STARTED, payload: { connectionId, id: queryId }, }); + const t0 = queryTimingEnabled ? performance.now() : 0; try { const result = await connection.query(sql); + if (queryTimingEnabled) { + const elapsed = performance.now() - t0; + // Service-layer methods prefix SQL with a `-- label\n` comment so timings + // can be grouped by logical operation rather than raw SQL text. + const labelMatch = sql.match(/^--\s*(.+)\n/); + const label = labelMatch ? labelMatch[1].trim() : "query"; + const existing = accumulatedTimings.get(label) ?? []; + accumulatedTimings.set(label, [...existing, elapsed]); + // Strip the label comment from the first line, collapse whitespace, + // and truncate so the console log stays readable. + const body = sql + .replace(/^--[^\n]*\n/, "") // remove `-- label\n` prefix + .replace(/\s+/g, " ") // collapse newlines/indentation + .trim() + .slice(0, 200); + console.log(`[duckdb] ${elapsed}ms — [${label}] ${body}`); + } // Apache Arrow JS (used by duckdb-wasm) only reads the first 8 bytes, losing the nanoseconds. // Re-run with INTERVAL columns cast to ms integers so the data survives Arrow. @@ -293,6 +314,31 @@ export default class DatabaseServiceWebWorker extends DatabaseService { } } + // Benchmark-facing activation path. The production path passes queryTiming via the + // INIT payload (read from localStorage) so the flag is set before any queries run. + public enableQueryTiming(): void { + queryTimingEnabled = true; + } + + // Benchmark only — clears the in-memory annotation cache so each timed iteration + // hits DuckDB rather than returning a cached result from a prior warmup pass. + public clearAnnotationCache(sourceName: string): void { + this.dataSourceToAnnotationsMap.delete(sourceName); + } + + public clearTimings(): void { + accumulatedTimings.clear(); + } + + /** Sum of all accumulated DuckDB query times across all labels since last clearTimings(). */ + public sumTimings(): number { + let total = 0; + accumulatedTimings.forEach((values) => { + total += values.reduce((a, b) => a + b, 0); + }); + return total; + } + // public wrapper so that the worker can access the function public async deleteDataSourceWrapper(dataSource: string): Promise { this.deleteDataSource(dataSource); diff --git a/packages/web/src/services/DatabaseServiceWeb/index.ts b/packages/web/src/services/DatabaseServiceWeb/index.ts index 0411a33f4..808e68418 100644 --- a/packages/web/src/services/DatabaseServiceWeb/index.ts +++ b/packages/web/src/services/DatabaseServiceWeb/index.ts @@ -42,8 +42,14 @@ export default class DatabaseServiceWeb extends DatabaseService { } public async initialize() { - this.worker.postMessage({ type: WorkerMsgType.INIT }); + this.worker.postMessage({ + type: WorkerMsgType.INIT, + payload: { queryTiming: localStorage.getItem("bff_query_timing") === "1" }, + }); await this.dbInitialized; + if (localStorage.getItem("bff_query_timing") === "1") { + console.log("[bff] Query timing enabled. Query times will appear in the console."); + } } public async saveQuery( diff --git a/packages/web/src/services/DatabaseServiceWeb/types.ts b/packages/web/src/services/DatabaseServiceWeb/types.ts index 5e44cd8d0..598380668 100644 --- a/packages/web/src/services/DatabaseServiceWeb/types.ts +++ b/packages/web/src/services/DatabaseServiceWeb/types.ts @@ -45,7 +45,7 @@ type WorkerMsgBase = { }; export type WorkerReqPayload = { - [WorkerMsgType.INIT]: void; + [WorkerMsgType.INIT]: { queryTiming?: boolean } | undefined; [WorkerMsgType.CANCEL]: { connectionId: number; };