Skip to content
54 changes: 28 additions & 26 deletions packages/web/benchmark/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import { BENCHMARK_TASKS, createServices } from "./tasks";
import { BenchmarkConfig, BenchmarkResults, QueryResult, SourceResult } from "./types";
import { DEFAULT_ITERATIONS, DEFAULT_WARMUP_ROUNDS, buildQueryResult } from "./stats";
import DatabaseServiceWebWorker from "../../src/services/DatabaseServiceWeb/duckdb-worker.worker";

const DEFAULT_ITERATIONS = 5;
const DEFAULT_WARMUP_ROUNDS = 1;

// Updates the #status element in the benchmark HTML page and mirrors to console.
// The page can run headlessly in CI (Playwright), so the console log is the
// only visible progress signal when there is no browser UI to observe.
Expand All @@ -14,14 +12,6 @@ function setStatus(msg: string) {
console.log("[benchmark]", msg);
}

// Nearest-rank percentile over a pre-sorted array. Used to report p50 and p95
// across timed iterations — p95 surfaces occasional slow outliers (GC pauses,
// DuckDB cache misses) that the median would hide.
function percentile(sorted: number[], p: number): number {
const idx = Math.ceil((p / 100) * sorted.length) - 1;
return sorted[Math.max(0, idx)];
}

// Fisher-Yates shuffle — randomizes task order each timed iteration so that a
// consistently slow task doesn't inflate the times of everything that follows it
// (DuckDB buffer pool and OS page cache warm up over repeated runs).
Expand Down Expand Up @@ -53,7 +43,8 @@ async function benchmarkSource(
service: DatabaseServiceWebWorker,
sourceNames: string[],
iterations: number,
warmupRounds: number
warmupRounds: number,
tasks: typeof BENCHMARK_TASKS
): Promise<QueryResult[]> {
const { annotationSvc, fileSvc } = createServices(service, sourceNames);

Expand All @@ -64,17 +55,17 @@ async function benchmarkSource(
// of every task reflect cold-start overhead rather than steady-state cost.
setStatus(`Warming up ${sourceNames.join(", ")} (${warmupRounds} rounds)...`);
for (let w = 0; w < warmupRounds; w++) {
for (const task of BENCHMARK_TASKS) {
for (const task of tasks) {
service.clearTimings();
await task.run(annotationSvc, fileSvc);
}
}

const timingsMap = new Map<string, number[]>(BENCHMARK_TASKS.map(({ name }) => [name, []]));
const timingsMap = new Map<string, number[]>(tasks.map(({ name }) => [name, []]));

for (let i = 0; i < iterations; i++) {
setStatus(`Timing ${sourceNames.join(", ")} — iteration ${i + 1}/${iterations}...`);
for (const task of shuffle(BENCHMARK_TASKS)) {
for (const task of shuffle(tasks)) {
if (task.resetAnnotationCache) {
for (const sourceName of sourceNames) {
service.clearAnnotationCache(sourceName);
Expand All @@ -94,16 +85,7 @@ async function benchmarkSource(
}
}

return BENCHMARK_TASKS.map(({ name }) => {
const timings = [...(timingsMap.get(name) ?? [])].sort((a, b) => a - b);
return {
name,
timings,
p50: percentile(timings, 50),
p95: percentile(timings, 95),
p99: percentile(timings, 99),
};
});
return tasks.map(({ name }) => buildQueryResult(name, timingsMap.get(name) ?? []));
}

async function main() {
Expand All @@ -113,6 +95,16 @@ async function main() {
}
const iterations = config.iterations ?? DEFAULT_ITERATIONS;
const warmupRounds = config.warmupRounds ?? DEFAULT_WARMUP_ROUNDS;
const taskFilter = config.taskFilter;

// When a taskFilter is provided, only run the requested tasks.
if (taskFilter) {
const validNames = new Set(BENCHMARK_TASKS.map((t) => t.name));
const invalid = taskFilter.filter((n) => !validNames.has(n));
if (invalid.length) {
throw new Error(`Unknown task(s) in taskFilter: ${invalid.join(", ")}`);
}
}

setStatus("Initializing DuckDB-WASM...");
const initStart = performance.now();
Expand Down Expand Up @@ -149,6 +141,10 @@ async function main() {
await service.execute('DROP VIEW IF EXISTS "__bff_warmup__"');
}

const activeTasks = taskFilter
? BENCHMARK_TASKS.filter((t) => taskFilter.includes(t.name))
: BENCHMARK_TASKS;

const sourceResults: SourceResult[] = [];

for (const sources of config.testCases) {
Expand All @@ -171,7 +167,13 @@ async function main() {
const registrationMs = performance.now() - regStart;

const labels = sources.map((source) => source.label);
const queries = await benchmarkSource(service, labels, iterations, warmupRounds);
const queries = await benchmarkSource(
service,
labels,
iterations,
warmupRounds,
activeTasks
);
sourceResults.push({ labels, registrationMs, queries });

for (const source of sources) {
Expand Down
20 changes: 20 additions & 0 deletions packages/web/benchmark/src/stats.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { QueryResult } from "./types";

export const DEFAULT_ITERATIONS = 5;
export const DEFAULT_WARMUP_ROUNDS = 1;

export function percentile(sorted: number[], p: number): number {
const idx = Math.ceil((p / 100) * sorted.length) - 1;
return sorted[Math.max(0, idx)];
}

export function buildQueryResult(name: string, rawTimings: number[]): QueryResult {
const timings = [...rawTimings].sort((a, b) => a - b);
return {
name,
timings,
p50: percentile(timings, 50),
p95: percentile(timings, 95),
p99: percentile(timings, 99),
};
}
1 change: 1 addition & 0 deletions packages/web/benchmark/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export interface BenchmarkConfig {
testCases: TestCase[];
iterations?: number;
warmupRounds?: number;
taskFilter?: string[];
}

export interface QueryResult {
Expand Down
128 changes: 120 additions & 8 deletions packages/web/scripts/lib/run-benchmark-page.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@
import fs from "fs";
import path from "path";
import { execSync } from "child_process";
import { BenchmarkResults, TestCase } from "../../benchmark/src/types";

import { BenchmarkResults, QueryResult, SourceResult, TestCase } from "../../benchmark/src/types";
import { BENCHMARK_TASKS } from "../../benchmark/src/tasks";
import { DEFAULT_ITERATIONS, buildQueryResult } from "../../benchmark/src/stats";
const DIST_DIR = path.join(__dirname, "..", "..", "benchmark", "dist");
const FIXTURES_DIR = path.join(__dirname, "..", "..", "fixtures");
const PORT = 18765;
Expand Down Expand Up @@ -157,7 +158,7 @@
iterations?: number;
warmupRounds?: number;
channel?: string;
}): Promise<BenchmarkResults> {
}) {
if (!skipBuild) buildBenchmark();

if (!fs.existsSync(path.join(DIST_DIR, "index.html"))) {
Expand All @@ -167,6 +168,109 @@
}

const server = await startServer();

try {
const allTaskNames = BENCHMARK_TASKS.map((task) => task.name);
const iterationCount = iterations ?? DEFAULT_ITERATIONS;

let initTimeMs = 0;
const sourceResults: SourceResult[] = [];

for (const testCase of testCases) {
let registrationMs = 0;
let queries: QueryResult[];

if (warmupRounds === 0) {
// Each (task, iteration) pair gets a fresh browser so every
// measurement is a cold start.
console.log(
`[playwright] warmupRounds=0: running ${allTaskNames.length} task(s) × ` +
`${iterationCount} iteration(s) in separate browser instances`
);

const timingsMap = new Map<string, number[]>(
allTaskNames.map((name) => [name, []])
);

for (const taskName of allTaskNames) {
for (let i = 0; i < iterationCount; i++) {
console.log(
`[playwright] Launching browser for "${taskName}" ` +
`iteration ${i + 1}/${iterationCount} ` +
`(${testCase.map((source) => source.label).join(", ")})`
);
const run = await runSingleBenchmark({
testCase,
iterations: 1,
warmupRounds: 0,
channel,
taskFilter: [taskName],
});

if (initTimeMs === 0) initTimeMs = run.initTimeMs;
if (registrationMs === 0) registrationMs = run.registrationMs;

const timing = run.queries[0]?.timings[0];
if (timing !== undefined) {
timingsMap.get(taskName)!.push(timing);

Check warning on line 215 in packages/web/scripts/lib/run-benchmark-page.ts

View workflow job for this annotation

GitHub Actions / lint

Forbidden non-null assertion
}
}
}

queries = allTaskNames.map((name) =>
buildQueryResult(name, timingsMap.get(name) ?? [])
);
} else {
// Warmups > 0: all tasks share a single browser instance.
console.log(
`[playwright] Launching browser for task(s): ${allTaskNames.join(", ")} ` +
`(${testCase.map((source) => source.label).join(", ")})`
);
const run = await runSingleBenchmark({
testCase,
iterations: iterationCount,
warmupRounds,
channel,
taskFilter: allTaskNames,
});

initTimeMs = initTimeMs || run.initTimeMs;
registrationMs = run.registrationMs;
queries = run.queries;
}

sourceResults.push({
labels: testCase.map((source) => source.label),
registrationMs,
queries,
});
}

return {
timestamp: new Date().toISOString(),
commit: "unknown",
branch: "unknown",
initTimeMs,
results: sourceResults,
} as BenchmarkResults;
} finally {
await new Promise((res) => server.close(res));
}
}

async function runSingleBenchmark({
testCase,
iterations,
warmupRounds,
channel,
taskFilter,
}: {
testCase: TestCase;
iterations: number;
warmupRounds?: number;
channel?: string;
taskFilter: string[];
}): Promise<{ initTimeMs: number; registrationMs: number; queries: QueryResult[] }> {
const browser = await chromium.launch({
channel,
headless: true,
Expand All @@ -185,13 +289,14 @@
// synchronously on startup — no callback handshake needed.
await page.addInitScript({
content: `window.__benchmarkConfig = ${JSON.stringify({
testCases,
testCases: [testCase],
iterations,
warmupRounds,
taskFilter,
})};`,
});

console.log(`[playwright] Starting benchmark (${testCases.length} test case(s))...`);
console.log(`[playwright] Starting benchmark...`);
await page.goto(`http://localhost:${PORT}/`, { waitUntil: "domcontentloaded" });

// Wait for the benchmark to signal it's ready for file injection
Expand All @@ -204,7 +309,7 @@
// which is identical to how the real app loads files via the file picker —
// no HTTP range-request overhead, so DuckDB sort performance matches real-user timing.
const loaded = new Set();
for (const source of testCases.flat()) {
for (const source of testCase) {
if (source.label in loaded) continue; // Don't add duplicate sources
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This whole section was moved verbatim to injectFixtures, with one exception: I changed this condition to loaded.has(source.label) to fix a bug. (in does not check set membership!)

const localMatch = source.url.match(
new RegExp(`^http://localhost:${PORT}/fixtures/(.+)$`)
Expand Down Expand Up @@ -254,10 +359,17 @@
const error = await page.evaluate(() => window.__benchmarkError ?? null);
if (error) throw new Error(`Benchmark failed in browser: ${error}`);

return await page.evaluate(() => window.__benchmarkResults);
const benchmarkResults: BenchmarkResults = await page.evaluate(
() => window.__benchmarkResults
);
const result = benchmarkResults.results[0];
return {
initTimeMs: benchmarkResults.initTimeMs,
registrationMs: result.registrationMs,
queries: result.queries,
};
} finally {
await browser.close();
await new Promise((res) => server.close(res));
Copy link
Copy Markdown
Contributor Author

@pgarrison pgarrison May 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was moved to line 182

}
}

Expand Down
Loading