Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 145 additions & 4 deletions .github/workflows/perf-probe-baseline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ on:
- "packages/app/package.json"
- "packages/app/playwright.config.ts"
- "packages/app/script/compare-perf.ts"
- "packages/app/script/merge-perf-artifacts.ts"
- "packages/app/script/e2e-local.ts"
- "packages/app/src/testing/perf-metrics*"
workflow_dispatch:
Expand All @@ -31,11 +32,13 @@ jobs:
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # actions/checkout@v6
with:
fetch-depth: 0
path: head
persist-credentials: false

- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # actions/checkout@v6
with:
fetch-depth: 0
path: base
persist-credentials: false
ref: ${{ github.event.pull_request.base.sha || github.sha }}
Expand All @@ -59,6 +62,8 @@ jobs:
with:
path: ${{ github.workspace }}/.playwright-browsers
key: playwright-${{ runner.os }}-${{ hashFiles('head/packages/app/package.json', 'head/bun.lock', 'base/packages/app/package.json', 'base/bun.lock') }}
restore-keys: |
playwright-${{ runner.os }}-

- name: Install head dependencies
working-directory: head
Expand All @@ -78,6 +83,56 @@ jobs:
cp head/packages/app/playwright.config.ts base/packages/app/playwright.config.ts
cp head/packages/app/src/testing/perf-metrics.ts base/packages/app/src/testing/perf-metrics.ts

- name: Detect low-end perf scope
id: low_end_scope
working-directory: head
env:
EVENT_NAME: ${{ github.event_name }}
BASE_SHA: ${{ github.event.pull_request.base.sha || github.event.before }}
HEAD_SHA: ${{ github.sha }}
run: |
set -euo pipefail

is_low_end_path() {
case "$1" in
packages/app/src/pages/session/*|packages/app/src/pages/session/**|packages/app/src/pages/session.tsx|packages/ui/src/components/message-part.tsx|packages/ui/src/components/session-turn.tsx|packages/ui/src/components/markdown.tsx|packages/app/e2e/perf/*|packages/app/e2e/perf/**|packages/app/src/testing/perf-metrics*|packages/app/script/compare-perf.ts|packages/app/script/merge-perf-artifacts.ts|.github/workflows/perf-probe-baseline.yml)
return 0
;;
*)
return 1
;;
esac
}

if [ "$EVENT_NAME" = "workflow_dispatch" ]; then
echo "run_low_end=true" >> "$GITHUB_OUTPUT"
exit 0
fi

if [ -z "$BASE_SHA" ] || [ "$BASE_SHA" = "0000000000000000000000000000000000000000" ]; then
BASE_SHA="$(git rev-list --max-parents=0 HEAD | tail -n 1)"
fi

run_low_end=false
mapfile -t changes < <(git diff --name-status --find-renames --find-copies "$BASE_SHA" "$HEAD_SHA" --)
for change in "${changes[@]}"; do
IFS=$'\t' read -r status path1 path2 <<< "$change"
case "$status" in
A*|M*|T*|D*)
if is_low_end_path "$path1"; then run_low_end=true; break; fi
;;
R*|C*)
if is_low_end_path "$path1" || is_low_end_path "$path2"; then run_low_end=true; break; fi
;;
*)
run_low_end=true
break
;;
esac
done

echo "run_low_end=$run_low_end" >> "$GITHUB_OUTPUT"

- name: Run perf probe baseline (base)
env:
CI: "true"
Expand All @@ -92,13 +147,45 @@ jobs:
PAWWORK_PERF_OUTPUT: ${{ github.workspace }}/perf-artifacts/perf-head.json
run: bun --cwd head/packages/app test:e2e:local:perf

- name: Run low-end perf probe baseline (base)
if: steps.low_end_scope.outputs.run_low_end == 'true'
env:
CI: "true"
PAWWORK_PERF_BRANCH: base
PAWWORK_PERF_PROFILE: low-end
PAWWORK_PERF_OUTPUT: ${{ github.workspace }}/perf-artifacts/perf-base-low-end.json
run: bun --cwd base/packages/app test:e2e:local:perf

- name: Run low-end perf probe baseline (head)
if: steps.low_end_scope.outputs.run_low_end == 'true'
env:
CI: "true"
PAWWORK_PERF_BRANCH: head
PAWWORK_PERF_PROFILE: low-end
PAWWORK_PERF_OUTPUT: ${{ github.workspace }}/perf-artifacts/perf-head-low-end.json
run: bun --cwd head/packages/app test:e2e:local:perf

- name: Merge perf profile artifacts (base)
run: >
bun head/packages/app/script/merge-perf-artifacts.ts
--required "${PERF_ARTIFACT_DIR}/perf-base.json"
--optional "${PERF_ARTIFACT_DIR}/perf-base-low-end.json"
--output "${PERF_ARTIFACT_DIR}/perf-base-combined.json"

- name: Merge perf profile artifacts (head)
run: >
bun head/packages/app/script/merge-perf-artifacts.ts
--required "${PERF_ARTIFACT_DIR}/perf-head.json"
--optional "${PERF_ARTIFACT_DIR}/perf-head-low-end.json"
--output "${PERF_ARTIFACT_DIR}/perf-head-combined.json"

- name: Compare base and head perf baselines
id: compare
continue-on-error: true
run: >
bun head/packages/app/script/compare-perf.ts
--base "${PERF_ARTIFACT_DIR}/perf-base.json"
--head "${PERF_ARTIFACT_DIR}/perf-head.json"
--base "${PERF_ARTIFACT_DIR}/perf-base-combined.json"
--head "${PERF_ARTIFACT_DIR}/perf-head-combined.json"
--output "${PERF_ARTIFACT_DIR}/perf-compare.json"
--comment-output "${PERF_ARTIFACT_DIR}/perf-comment.md"

Expand All @@ -118,14 +205,48 @@ jobs:
PAWWORK_PERF_OUTPUT: ${{ github.workspace }}/perf-artifacts/perf-head-confirm.json
run: bun --cwd head/packages/app test:e2e:local:perf

- name: Confirm low-end perf regression (base)
if: steps.compare.outcome == 'failure' && steps.low_end_scope.outputs.run_low_end == 'true'
env:
CI: "true"
PAWWORK_PERF_BRANCH: base
PAWWORK_PERF_PROFILE: low-end
PAWWORK_PERF_OUTPUT: ${{ github.workspace }}/perf-artifacts/perf-base-low-end-confirm.json
run: bun --cwd base/packages/app test:e2e:local:perf

- name: Confirm low-end perf regression (head)
if: steps.compare.outcome == 'failure' && steps.low_end_scope.outputs.run_low_end == 'true'
env:
CI: "true"
PAWWORK_PERF_BRANCH: head
PAWWORK_PERF_PROFILE: low-end
PAWWORK_PERF_OUTPUT: ${{ github.workspace }}/perf-artifacts/perf-head-low-end-confirm.json
run: bun --cwd head/packages/app test:e2e:local:perf

- name: Merge confirmed perf profile artifacts (base)
if: steps.compare.outcome == 'failure'
run: >
bun head/packages/app/script/merge-perf-artifacts.ts
--required "${PERF_ARTIFACT_DIR}/perf-base-confirm.json"
--optional "${PERF_ARTIFACT_DIR}/perf-base-low-end-confirm.json"
--output "${PERF_ARTIFACT_DIR}/perf-base-confirm-combined.json"

- name: Merge confirmed perf profile artifacts (head)
if: steps.compare.outcome == 'failure'
run: >
bun head/packages/app/script/merge-perf-artifacts.ts
--required "${PERF_ARTIFACT_DIR}/perf-head-confirm.json"
--optional "${PERF_ARTIFACT_DIR}/perf-head-low-end-confirm.json"
--output "${PERF_ARTIFACT_DIR}/perf-head-confirm-combined.json"

- name: Compare confirmed perf baselines
id: compare_confirmed
if: steps.compare.outcome == 'failure'
continue-on-error: true
run: >
bun head/packages/app/script/compare-perf.ts
--base "${PERF_ARTIFACT_DIR}/perf-base-confirm.json"
--head "${PERF_ARTIFACT_DIR}/perf-head-confirm.json"
--base "${PERF_ARTIFACT_DIR}/perf-base-confirm-combined.json"
--head "${PERF_ARTIFACT_DIR}/perf-head-confirm-combined.json"
--output "${PERF_ARTIFACT_DIR}/perf-compare-confirm.json"
--comment-output "${PERF_ARTIFACT_DIR}/perf-comment.md"

Expand Down Expand Up @@ -188,6 +309,26 @@ jobs:
PAWWORK_PERF_TRACE: "1"
run: bun --cwd head/packages/app test:e2e:local:perf

- name: Capture low-end perf diagnostic trace (base)
if: steps.compare.outcome == 'failure' && steps.compare_confirmed.outcome == 'failure' && steps.low_end_scope.outputs.run_low_end == 'true'
env:
CI: "true"
PAWWORK_PERF_BRANCH: base
PAWWORK_PERF_PROFILE: low-end
PAWWORK_PERF_OUTPUT: ${{ github.workspace }}/perf-artifacts/perf-base-low-end-trace.json
PAWWORK_PERF_TRACE: "1"
run: bun --cwd base/packages/app test:e2e:local:perf

- name: Capture low-end perf diagnostic trace (head)
if: steps.compare.outcome == 'failure' && steps.compare_confirmed.outcome == 'failure' && steps.low_end_scope.outputs.run_low_end == 'true'
env:
CI: "true"
PAWWORK_PERF_BRANCH: head
PAWWORK_PERF_PROFILE: low-end
PAWWORK_PERF_OUTPUT: ${{ github.workspace }}/perf-artifacts/perf-head-low-end-trace.json
PAWWORK_PERF_TRACE: "1"
run: bun --cwd head/packages/app test:e2e:local:perf

- name: Fail job on comparator regression
if: steps.compare.outcome == 'failure' && steps.compare_confirmed.outcome == 'failure'
run: exit 1
Expand Down
56 changes: 51 additions & 5 deletions packages/app/e2e/perf/perf-probe.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@ import {
} from "../selectors"
import { sessionPath, terminalToggleKey } from "../utils"
import { installPerfProbe, resetPerfProbe, snapshotPerfProbe, summarizeScenarioRuns } from "./probe"
import { applyPerfProfile, readPerfProfile, shouldRunScenario, type PerfScenarioName } from "./profiles"
import { seedTimelineRecomputeSession } from "./timeline-fixture"

const outputPath = process.env.PAWWORK_PERF_OUTPUT ?? path.join(process.cwd(), "e2e", "perf-results", "pr0.1-baseline.json")
const perfBranch = process.env.PAWWORK_PERF_BRANCH ?? "dev"
const PERF_PROFILE = readPerfProfile()

const longMarkdown = [
"# Baseline stream",
Expand Down Expand Up @@ -146,6 +149,10 @@ async function scrollTimelineTo(page: Parameters<typeof snapshotPerfProbe>[0], t
expect(found).toBe(true)
}

function skipUnlessScenario(name: PerfScenarioName) {
test.skip(!shouldRunScenario(PERF_PROFILE, name), `${PERF_PROFILE} profile does not run ${name}`)
}

test.describe("PR0.1 perf probe baseline", () => {
test.describe.configure({ mode: "serial" })

Expand All @@ -155,7 +162,9 @@ test.describe("PR0.1 perf probe baseline", () => {
})

test("homepage-cold emits a 3-run JSON baseline", async ({ page, project }) => {
skipUnlessScenario("homepage-cold")
await installPerfProbe(page)
await applyPerfProfile(page, PERF_PROFILE)
await project.open()

const runs = []
Expand All @@ -171,11 +180,13 @@ test.describe("PR0.1 perf probe baseline", () => {
if (run < 2) await cooldownAfterRun(page)
}

scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, scenario: "homepage-cold", runs }))
scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, profile: PERF_PROFILE, scenario: "homepage-cold", runs }))
})

test("session-streaming-long emits a 3-run JSON baseline", async ({ page, project, llm }) => {
skipUnlessScenario("session-streaming-long")
await installPerfProbe(page)
await applyPerfProfile(page, PERF_PROFILE)
await project.open()

const runs = []
Expand Down Expand Up @@ -216,11 +227,13 @@ test.describe("PR0.1 perf probe baseline", () => {
if (run < 2) await cooldownAfterRun(page)
}

scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, scenario: "session-streaming-long", runs }))
scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, profile: PERF_PROFILE, scenario: "session-streaming-long", runs }))
})

test("tool-call-expand emits a 3-run JSON baseline", async ({ page, project, llm }) => {
skipUnlessScenario("tool-call-expand")
await installPerfProbe(page)
await applyPerfProfile(page, PERF_PROFILE)
await project.open()

const runs = []
Expand All @@ -246,11 +259,13 @@ test.describe("PR0.1 perf probe baseline", () => {
if (run < 2) await cooldownAfterRun(page)
}

scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, scenario: "tool-call-expand", runs }))
scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, profile: PERF_PROFILE, scenario: "tool-call-expand", runs }))
})

test("terminal-side-panel-open emits a 3-run JSON baseline", async ({ page, project }) => {
skipUnlessScenario("terminal-side-panel-open")
await installPerfProbe(page)
await applyPerfProfile(page, PERF_PROFILE)
await project.open()

const runs = []
Expand All @@ -273,11 +288,13 @@ test.describe("PR0.1 perf probe baseline", () => {
})
}

scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, scenario: "terminal-side-panel-open", runs }))
scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, profile: PERF_PROFILE, scenario: "terminal-side-panel-open", runs }))
})

test("session-scroll-reading emits a 3-run JSON baseline", async ({ page, project }) => {
skipUnlessScenario("session-scroll-reading")
await installPerfProbe(page)
await applyPerfProfile(page, PERF_PROFILE)
await project.open()

const runs = []
Expand Down Expand Up @@ -312,6 +329,35 @@ test.describe("PR0.1 perf probe baseline", () => {
})
}

scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, scenario: "session-scroll-reading", runs }))
scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, profile: PERF_PROFILE, scenario: "session-scroll-reading", runs }))
})

test("session-timeline-recompute emits a 3-run low-end JSON baseline", async ({ page, project }) => {
skipUnlessScenario("session-timeline-recompute")
await installPerfProbe(page)
await applyPerfProfile(page, PERF_PROFILE)
await project.open()

const runs = []
for (let run = 0; run < 3; run += 1) {
await withSession(project.sdk, `perf timeline recompute ${Date.now()}-${run}`, async (session) => {
await seedTimelineRecomputeSession(project, session.id)
await page.goto(sessionPath(project.directory, session.id))
await expect(page.locator(sessionMessageItemSelector).first()).toBeVisible({ timeout: 30_000 })
await expect.poll(async () => page.locator(sessionMessageItemSelector).count()).toBeGreaterThanOrEqual(8)
await resetPerfProbe(page)
await page.locator(scrollViewportSelector).first().hover()
for (let index = 0; index < 4; index += 1) {
await page.mouse.wheel(0, index % 2 === 0 ? 2400 : -2400)
await settleFrames(page, 2)
await scrollTimelineTo(page, index % 2 === 0 ? 0 : 1200)
await settleFrames(page, 2)
}
runs.push(await snapshotPerfProbe(page))
if (run < 2) await cooldownAfterRun(page)
})
}

scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, profile: PERF_PROFILE, scenario: "session-timeline-recompute", runs }))
})
})
10 changes: 8 additions & 2 deletions packages/app/e2e/perf/probe.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { Page } from "@playwright/test"
import { aggregatePerfRuns, summarizePerfRun, type PerfRunSummary } from "../../src/testing/perf-metrics"
import { aggregatePerfRuns, summarizePerfRun, type PerfProfile, type PerfRunSummary } from "../../src/testing/perf-metrics"
import type { PerfScenarioName } from "./profiles"

type BrowserPerfSample = {
startedAt: number
Expand Down Expand Up @@ -158,6 +159,11 @@ export async function snapshotPerfProbe(page: Page) {
return summarizePerfRun(sample)
}

export function summarizeScenarioRuns(input: { branch: string; scenario: string; runs: PerfRunSummary[] }) {
export function summarizeScenarioRuns(input: {
branch: string
profile?: PerfProfile
scenario: PerfScenarioName
runs: PerfRunSummary[]
}) {
return aggregatePerfRuns(input)
}
Loading
Loading