Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 57 additions & 1 deletion packages/app/e2e/perf/perf-probe.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import { sessionPath, terminalToggleKey } from "../utils"
import type { createSdk } from "../utils"
import { installPerfProbe, resetPerfProbe, snapshotPerfProbe, summarizeScenarioRuns } from "./probe"
import { applyPerfProfile, readPerfProfile, shouldRunScenario, type PerfScenarioName } from "./profiles"
import { seedTimelineRecomputeSession } from "./timeline-fixture"
import { TIMELINE_RECOMPUTE_SEED_TURN_COUNT, seedTimelineRecomputeSession } from "./timeline-fixture"

const outputPath = process.env.PAWWORK_PERF_OUTPUT ?? path.join(process.cwd(), "e2e", "perf-results", "pr0.1-baseline.json")
const perfBranch = process.env.PAWWORK_PERF_BRANCH ?? "dev"
Expand All @@ -41,6 +41,12 @@ const longMarkdown = [
const heavyBashCommand =
'node -e \'for (let i = 0; i < 900; i++) console.log(String(i).padStart(4, "0") + " " + "heavy bash output ".repeat(8))\''

const inputLagText = [
"Long session input lag probe.",
"Typing remains responsive while a realistic message history is mounted.",
"This fixed draft protects the composer path from timeline render regressions.",
].join(" ")

const scenarioResults: ReturnType<typeof summarizeScenarioRuns>[] = []

type PerfSdk = ReturnType<typeof createSdk>
Expand Down Expand Up @@ -149,6 +155,25 @@ async function submitVisiblePrompt(page: Parameters<typeof snapshotPerfProbe>[0]
await expect.poll(async () => (await readPromptSend(page)).started, { timeout: 10_000 }).toBeGreaterThan(previous.started)
}

async function readPromptText(page: Parameters<typeof snapshotPerfProbe>[0]) {
return page.locator(promptSelector).first().evaluate((el) => (el.textContent ?? "").replace(/\u200B/g, "").trim())
}

async function revealCachedSessionMessages(page: Parameters<typeof snapshotPerfProbe>[0], expectedCount: number) {
const messages = page.locator(sessionMessageItemSelector)
if ((await messages.count()) < expectedCount) {
await page.locator(scrollViewportSelector).first().hover()
await page.mouse.wheel(0, -2400)
await settleFrames(page, 2)
await scrollTimelineTo(page, 0)
await settleFrames(page, 2)
const loadEarlier = page.getByRole("button", { name: /Load earlier messages|加载更早的消息/i }).first()
await expect(loadEarlier).toBeVisible({ timeout: 30_000 })
await loadEarlier.click()
}
await expect(messages).toHaveCount(expectedCount, { timeout: 30_000 })
}

async function scrollTimelineTo(page: Parameters<typeof snapshotPerfProbe>[0], top: number) {
const found = await page.evaluate(
({ top, scrollViewportSelector, turnListSelector }) => {
Expand Down Expand Up @@ -274,6 +299,37 @@ test.describe("PR0.1 perf probe baseline", () => {
scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, profile: PERF_PROFILE, scenario: "homepage-cold", runs }))
})

test("long-session-input-lag emits a 3-run JSON baseline", async ({ page, project }) => {
skipUnlessScenario("long-session-input-lag")
await installPerfProbe(page)
await applyPerfProfile(page, PERF_PROFILE)
await project.open()

const runs = []
for (let run = 0; run < 3; run += 1) {
await withSession(project.sdk, `perf input lag ${Date.now()}-${run}`, async (session) => {
await seedTimelineRecomputeSession(project, session.id)
await page.goto(sessionPath(project.directory, session.id))
await expect(page.locator(sessionMessageItemSelector).first()).toBeVisible({ timeout: 30_000 })
await expect(page.locator(promptSelector).first()).toBeVisible({ timeout: 30_000 })
await revealCachedSessionMessages(page, TIMELINE_RECOMPUTE_SEED_TURN_COUNT)

const prompt = page.locator(promptSelector).first()
await prompt.click()
await prompt.fill("")
await expect(page.locator(sessionMessageItemSelector)).toHaveCount(TIMELINE_RECOMPUTE_SEED_TURN_COUNT)
await resetPerfProbe(page)
await page.keyboard.type(`${inputLagText} run ${run + 1}.`)
await expect.poll(() => readPromptText(page)).toBe(`${inputLagText} run ${run + 1}.`)
await settleFrames(page, 4)
runs.push(await snapshotPerfProbe(page))
if (run < 2) await cooldownAfterRun(page)
})
}

scenarioResults.push(summarizeScenarioRuns({ branch: perfBranch, profile: PERF_PROFILE, scenario: "long-session-input-lag", runs }))
})

test("session-streaming-long emits a 3-run JSON baseline", async ({ page, project, llm }) => {
skipUnlessScenario("session-streaming-long")
await installPerfProbe(page)
Expand Down
2 changes: 2 additions & 0 deletions packages/app/e2e/perf/profiles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import type { PerfProfile } from "../../src/testing/perf-metrics"

export type PerfScenarioName =
| "homepage-cold"
| "long-session-input-lag"
| "session-streaming-long"
| "tool-call-expand"
| "tool-default-open-heavy-bash"
Expand All @@ -12,6 +13,7 @@ export type PerfScenarioName =

const defaultScenarios = new Set<PerfScenarioName>([
"homepage-cold",
"long-session-input-lag",
"session-streaming-long",
"tool-call-expand",
"tool-default-open-heavy-bash",
Expand Down
7 changes: 7 additions & 0 deletions packages/app/e2e/perf/profiles.unit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,10 @@ test("default profile runs heavy default-open bash perf coverage", () => {
expect(shouldRunScenario("default", scenario)).toBe(true)
expect(shouldRunScenario("low-end", scenario)).toBe(false)
})

test("default profile runs long-session input lag coverage", () => {
const scenario = "long-session-input-lag" as PerfScenarioName

expect(shouldRunScenario("default", scenario)).toBe(true)
expect(shouldRunScenario("low-end", scenario)).toBe(false)
})
4 changes: 3 additions & 1 deletion packages/app/e2e/perf/timeline-fixture.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ type TimelineProject = {
}
}

export const TIMELINE_RECOMPUTE_SEED_TURN_COUNT = 36

export async function seedTimelineRecomputeSession(project: TimelineProject, sessionID: string) {
for (let turn = 0; turn < 36; turn += 1) {
for (let turn = 0; turn < TIMELINE_RECOMPUTE_SEED_TURN_COUNT; turn += 1) {
await project.sdk.session.promptAsync({
sessionID,
noReply: true,
Expand Down
Loading