-
Notifications
You must be signed in to change notification settings - Fork 7
New browser instance per benchmark task #808
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 4 commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
869a629
When --warmups=0, use a new browser for each task
pgarrison 5b15db0
New browser per iteration, not just per task
pgarrison d07858f
Remove verbose LLM comments
pgarrison 7a7e5f9
Merge branch 'feature/benchmark-aggregate-parquet' into feature/brows…
pgarrison efc2483
Restore type of runBenchmarkPage
pgarrison 4a857ce
When warmups>0, continue using one browser for all test cases
pgarrison 9e1c2a5
Retain previous comment and formatting
pgarrison e28acc1
Merge branch 'main' into feature/browser-per-benchmark-task
pgarrison 4576877
Avoid non-null assertion
pgarrison File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| import { QueryResult } from "./types"; | ||
|
|
||
| export const DEFAULT_ITERATIONS = 5; | ||
| export const DEFAULT_WARMUP_ROUNDS = 1; | ||
|
|
||
| export function percentile(sorted: number[], p: number): number { | ||
| const idx = Math.ceil((p / 100) * sorted.length) - 1; | ||
| return sorted[Math.max(0, idx)]; | ||
| } | ||
|
|
||
| export function buildQueryResult(name: string, rawTimings: number[]): QueryResult { | ||
| const timings = [...rawTimings].sort((a, b) => a - b); | ||
| return { | ||
| name, | ||
| timings, | ||
| p50: percentile(timings, 50), | ||
| p95: percentile(timings, 95), | ||
| p99: percentile(timings, 99), | ||
| }; | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,8 +26,9 @@ | |
| import fs from "fs"; | ||
| import path from "path"; | ||
| import { execSync } from "child_process"; | ||
| import { BenchmarkResults, TestCase } from "../../benchmark/src/types"; | ||
|
|
||
| import { BenchmarkResults, QueryResult, SourceResult, TestCase } from "../../benchmark/src/types"; | ||
| import { BENCHMARK_TASKS } from "../../benchmark/src/tasks"; | ||
| import { DEFAULT_ITERATIONS, buildQueryResult } from "../../benchmark/src/stats"; | ||
| const DIST_DIR = path.join(__dirname, "..", "..", "benchmark", "dist"); | ||
| const FIXTURES_DIR = path.join(__dirname, "..", "..", "fixtures"); | ||
| const PORT = 18765; | ||
|
|
@@ -157,7 +158,7 @@ | |
| iterations?: number; | ||
| warmupRounds?: number; | ||
| channel?: string; | ||
| }): Promise<BenchmarkResults> { | ||
| }) { | ||
| if (!skipBuild) buildBenchmark(); | ||
|
|
||
| if (!fs.existsSync(path.join(DIST_DIR, "index.html"))) { | ||
|
|
@@ -167,6 +168,109 @@ | |
| } | ||
|
|
||
| const server = await startServer(); | ||
|
|
||
| try { | ||
| const allTaskNames = BENCHMARK_TASKS.map((task) => task.name); | ||
| const iterationCount = iterations ?? DEFAULT_ITERATIONS; | ||
|
|
||
| let initTimeMs = 0; | ||
| const sourceResults: SourceResult[] = []; | ||
|
|
||
| for (const testCase of testCases) { | ||
| let registrationMs = 0; | ||
| let queries: QueryResult[]; | ||
|
|
||
| if (warmupRounds === 0) { | ||
| // Each (task, iteration) pair gets a fresh browser so every | ||
| // measurement is a cold start. | ||
| console.log( | ||
| `[playwright] warmupRounds=0: running ${allTaskNames.length} task(s) × ` + | ||
| `${iterationCount} iteration(s) in separate browser instances` | ||
| ); | ||
|
|
||
| const timingsMap = new Map<string, number[]>( | ||
| allTaskNames.map((name) => [name, []]) | ||
| ); | ||
|
|
||
| for (const taskName of allTaskNames) { | ||
| for (let i = 0; i < iterationCount; i++) { | ||
| console.log( | ||
| `[playwright] Launching browser for "${taskName}" ` + | ||
| `iteration ${i + 1}/${iterationCount} ` + | ||
| `(${testCase.map((source) => source.label).join(", ")})` | ||
| ); | ||
| const run = await runSingleBenchmark({ | ||
| testCase, | ||
| iterations: 1, | ||
| warmupRounds: 0, | ||
| channel, | ||
| taskFilter: [taskName], | ||
| }); | ||
|
|
||
| if (initTimeMs === 0) initTimeMs = run.initTimeMs; | ||
| if (registrationMs === 0) registrationMs = run.registrationMs; | ||
|
|
||
| const timing = run.queries[0]?.timings[0]; | ||
| if (timing !== undefined) { | ||
| timingsMap.get(taskName)!.push(timing); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| queries = allTaskNames.map((name) => | ||
| buildQueryResult(name, timingsMap.get(name) ?? []) | ||
| ); | ||
| } else { | ||
| // Warmups > 0: all tasks share a single browser instance. | ||
| console.log( | ||
| `[playwright] Launching browser for task(s): ${allTaskNames.join(", ")} ` + | ||
| `(${testCase.map((source) => source.label).join(", ")})` | ||
| ); | ||
| const run = await runSingleBenchmark({ | ||
| testCase, | ||
| iterations: iterationCount, | ||
| warmupRounds, | ||
| channel, | ||
| taskFilter: allTaskNames, | ||
| }); | ||
|
|
||
| initTimeMs = initTimeMs || run.initTimeMs; | ||
| registrationMs = run.registrationMs; | ||
| queries = run.queries; | ||
| } | ||
|
|
||
| sourceResults.push({ | ||
| labels: testCase.map((source) => source.label), | ||
| registrationMs, | ||
| queries, | ||
| }); | ||
| } | ||
|
|
||
| return { | ||
| timestamp: new Date().toISOString(), | ||
| commit: "unknown", | ||
| branch: "unknown", | ||
| initTimeMs, | ||
| results: sourceResults, | ||
| } as BenchmarkResults; | ||
| } finally { | ||
| await new Promise((res) => server.close(res)); | ||
| } | ||
| } | ||
|
|
||
| async function runSingleBenchmark({ | ||
| testCase, | ||
| iterations, | ||
| warmupRounds, | ||
| channel, | ||
| taskFilter, | ||
| }: { | ||
| testCase: TestCase; | ||
| iterations: number; | ||
| warmupRounds?: number; | ||
| channel?: string; | ||
| taskFilter: string[]; | ||
| }): Promise<{ initTimeMs: number; registrationMs: number; queries: QueryResult[] }> { | ||
| const browser = await chromium.launch({ | ||
| channel, | ||
| headless: true, | ||
|
|
@@ -185,13 +289,14 @@ | |
| // synchronously on startup — no callback handshake needed. | ||
| await page.addInitScript({ | ||
| content: `window.__benchmarkConfig = ${JSON.stringify({ | ||
| testCases, | ||
| testCases: [testCase], | ||
| iterations, | ||
| warmupRounds, | ||
| taskFilter, | ||
| })};`, | ||
| }); | ||
|
|
||
| console.log(`[playwright] Starting benchmark (${testCases.length} test case(s))...`); | ||
| console.log(`[playwright] Starting benchmark...`); | ||
| await page.goto(`http://localhost:${PORT}/`, { waitUntil: "domcontentloaded" }); | ||
|
|
||
| // Wait for the benchmark to signal it's ready for file injection | ||
|
|
@@ -204,7 +309,7 @@ | |
| // which is identical to how the real app loads files via the file picker — | ||
| // no HTTP range-request overhead, so DuckDB sort performance matches real-user timing. | ||
| const loaded = new Set(); | ||
| for (const source of testCases.flat()) { | ||
| for (const source of testCase) { | ||
| if (source.label in loaded) continue; // Don't add duplicate sources | ||
| const localMatch = source.url.match( | ||
| new RegExp(`^http://localhost:${PORT}/fixtures/(.+)$`) | ||
|
|
@@ -254,10 +359,17 @@ | |
| const error = await page.evaluate(() => window.__benchmarkError ?? null); | ||
| if (error) throw new Error(`Benchmark failed in browser: ${error}`); | ||
|
|
||
| return await page.evaluate(() => window.__benchmarkResults); | ||
| const benchmarkResults: BenchmarkResults = await page.evaluate( | ||
| () => window.__benchmarkResults | ||
| ); | ||
| const result = benchmarkResults.results[0]; | ||
| return { | ||
| initTimeMs: benchmarkResults.initTimeMs, | ||
| registrationMs: result.registrationMs, | ||
| queries: result.queries, | ||
| }; | ||
| } finally { | ||
| await browser.close(); | ||
| await new Promise((res) => server.close(res)); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was moved to line 182 |
||
| } | ||
| } | ||
|
|
||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This whole section was moved verbatim to
injectFixtures, with one exception: I changed this condition toloaded.has(source.label)to fix a bug. (indoes not check set membership!)