Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 43 additions & 9 deletions .github/workflows/perf-probe-baseline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:
- "packages/app/e2e/perf/**"
- "packages/app/e2e/fixtures.ts"
- "packages/app/package.json"
- "packages/app/script/compare-perf.ts"
- "packages/app/script/e2e-local.ts"
- "packages/app/src/testing/perf-metrics*"
workflow_dispatch:
Expand All @@ -18,11 +19,20 @@ jobs:
perf-probe-baseline:
runs-on: ubuntu-latest
timeout-minutes: 30
env:
PERF_ARTIFACT_DIR: ${{ github.workspace }}/perf-artifacts
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # actions/checkout@v6
with:
path: head
persist-credentials: false

- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # actions/checkout@v6
with:
path: base
persist-credentials: false
ref: ${{ github.event.pull_request.base.sha || github.sha }}

- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # actions/[email protected]
with:
node-version: "24"
Expand All @@ -34,25 +44,47 @@ jobs:
- uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # actions/cache@v5
with:
path: ~/.bun/install/cache
key: bun-${{ runner.os }}-${{ hashFiles('bun.lock') }}
key: bun-${{ runner.os }}-${{ hashFiles('head/bun.lock', 'base/bun.lock') }}
restore-keys: |
bun-${{ runner.os }}-

- uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # actions/cache@v5
with:
path: ${{ github.workspace }}/.playwright-browsers
key: playwright-${{ runner.os }}-${{ hashFiles('packages/app/package.json', 'bun.lock') }}
key: playwright-${{ runner.os }}-${{ hashFiles('head/packages/app/package.json', 'head/bun.lock', 'base/packages/app/package.json', 'base/bun.lock') }}

- name: Install head dependencies
working-directory: head
run: bun install --frozen-lockfile

- run: bun install --frozen-lockfile
- name: Install base dependencies
working-directory: base
run: bun install --frozen-lockfile

- name: Install Playwright browsers
working-directory: packages/app
working-directory: head/packages/app
run: bunx playwright install --with-deps chromium

- name: Run perf probe baseline
- name: Run perf probe baseline (base)
env:
CI: "true"
run: bun --cwd packages/app test:e2e:local:perf
PAWWORK_PERF_BRANCH: base
PAWWORK_PERF_OUTPUT: ${{ github.workspace }}/perf-artifacts/perf-base.json
run: bun --cwd base/packages/app test:e2e:local:perf

- name: Run perf probe baseline (head)
env:
CI: "true"
PAWWORK_PERF_BRANCH: head
PAWWORK_PERF_OUTPUT: ${{ github.workspace }}/perf-artifacts/perf-head.json
run: bun --cwd head/packages/app test:e2e:local:perf

- name: Compare base and head perf baselines
run: >
bun head/packages/app/script/compare-perf.ts
--base "${PERF_ARTIFACT_DIR}/perf-base.json"
--head "${PERF_ARTIFACT_DIR}/perf-head.json"
--output "${PERF_ARTIFACT_DIR}/perf-compare.json"

- name: Upload perf probe artifacts
if: always()
Expand All @@ -62,6 +94,8 @@ jobs:
if-no-files-found: ignore
retention-days: 7
path: |
packages/app/e2e/perf-results
packages/app/e2e/playwright-report
packages/app/e2e/test-results
perf-artifacts
base/packages/app/e2e/playwright-report
base/packages/app/e2e/test-results
head/packages/app/e2e/playwright-report
head/packages/app/e2e/test-results
18 changes: 14 additions & 4 deletions packages/app/e2e/perf/perf-probe.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { sessionPath } from "../utils"
import { installPerfProbe, resetPerfProbe, snapshotPerfProbe, summarizeScenarioRuns } from "./probe"

const outputPath = process.env.PAWWORK_PERF_OUTPUT ?? path.join(process.cwd(), "e2e", "perf-results", "pr0.1-baseline.json")
const perfBranch = process.env.PAWWORK_PERF_BRANCH ?? "dev"

const longMarkdown = [
"# Baseline stream",
Expand Down Expand Up @@ -73,6 +74,11 @@ async function settleFrames(page: Parameters<typeof snapshotPerfProbe>[0], count
}, count)
}

async function cooldownAfterRun(page: Parameters<typeof snapshotPerfProbe>[0]) {
await settleFrames(page, 6)
await page.waitForTimeout(250)
}

async function navigateProjectHome(page: Parameters<typeof snapshotPerfProbe>[0], directory: string) {
await page.goto(sessionPath(directory))
await expect(page.locator('[data-component="session-new-home"]')).toBeVisible()
Expand Down Expand Up @@ -148,9 +154,10 @@ test.describe("PR0.1 perf probe baseline", () => {
await settleFrames(page, 3)
await page.keyboard.press("Escape")
runs.push(await snapshotPerfProbe(page))
if (run < 2) await cooldownAfterRun(page)
}

scenarioResults.push(summarizeScenarioRuns({ branch: "dev", scenario: "homepage-cold", runs }))
scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, scenario: "homepage-cold", runs }))
})

test("session-streaming-long emits a 3-run JSON baseline", async ({ page, project, llm }) => {
Expand Down Expand Up @@ -192,9 +199,10 @@ test.describe("PR0.1 perf probe baseline", () => {
secondWave.resolve()
await send
runs.push(await snapshotPerfProbe(page))
if (run < 2) await cooldownAfterRun(page)
}

scenarioResults.push(summarizeScenarioRuns({ branch: "dev", scenario: "session-streaming-long", runs }))
scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, scenario: "session-streaming-long", runs }))
})

test("tool-call-expand emits a 3-run JSON baseline", async ({ page, project, llm }) => {
Expand All @@ -221,9 +229,10 @@ test.describe("PR0.1 perf probe baseline", () => {
await expect(trigger).toHaveAttribute("aria-expanded", "true")
await settleFrames(page, 4)
runs.push(await snapshotPerfProbe(page))
if (run < 2) await cooldownAfterRun(page)
}

scenarioResults.push(summarizeScenarioRuns({ branch: "dev", scenario: "tool-call-expand", runs }))
scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, scenario: "tool-call-expand", runs }))
})

test("session-scroll-reading emits a 3-run JSON baseline", async ({ page, project }) => {
Expand Down Expand Up @@ -258,9 +267,10 @@ test.describe("PR0.1 perf probe baseline", () => {
await page.mouse.wheel(0, 3600)
await settleFrames(page, 4)
runs.push(await snapshotPerfProbe(page))
if (run < 2) await cooldownAfterRun(page)
})
}

scenarioResults.push(summarizeScenarioRuns({ branch: "dev", scenario: "session-scroll-reading", runs }))
scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, scenario: "session-scroll-reading", runs }))
})
})
48 changes: 48 additions & 0 deletions packages/app/script/compare-perf.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import fs from "node:fs/promises"
import path from "node:path"
import { comparePerfBaselines, type PerfScenarioSummary } from "../src/testing/perf-metrics"

function readArg(flag: string) {
const index = process.argv.indexOf(flag)
if (index === -1) return undefined
return process.argv[index + 1]
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

async function readPerfFile(filePath: string) {
const payload = JSON.parse(await fs.readFile(filePath, "utf8")) as PerfScenarioSummary[]
if (!Array.isArray(payload)) {
throw new Error(`Expected an array of perf scenarios in ${filePath}`)
}
return payload
}

async function main() {
const basePath = readArg("--base")
const headPath = readArg("--head")
const outputPath = readArg("--output")

if (!basePath || !headPath) {
throw new Error("Usage: bun script/compare-perf.ts --base <perf-base.json> --head <perf-head.json> [--output <path>]")
}

const [base, head] = await Promise.all([readPerfFile(basePath), readPerfFile(headPath)])
const comparison = comparePerfBaselines({ base, head })

if (outputPath) {
await fs.mkdir(path.dirname(outputPath), { recursive: true })
await fs.writeFile(outputPath, `${JSON.stringify(comparison, null, 2)}\n`)
}

const summary = {
pass: comparison.pass,
failures: comparison.failures,
warnings: comparison.warnings,
}
console.log(JSON.stringify(summary, null, 2))

if (!comparison.pass) {
process.exitCode = 1
}
}

await main()
Loading
Loading