diff --git a/AGENTS.md b/AGENTS.md index b7ed2d64cd..bfb5638146 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -145,7 +145,7 @@ For the full tag dictionary, operational playbook (direct merge / duplicate-titl ## Chat UI conventions - `apps/web/src/components/file-viewer-render-mode.ts` decides URL-load vs srcDoc for HTML previews. Bridges (deck, comment/inspect selection, palette, edit, tweaks) can ONLY inject through the srcDoc path. Add a new disqualifier to `UrlLoadDecision` whenever a feature needs a srcDoc-only bridge; pass it from `FileViewer.tsx` based on a source-content heuristic where appropriate (e.g. `hasTweaksTemplate`). The host keeps both iframes mounted simultaneously and swaps CSS visibility so toggling render mode does not cause an iframe reload flash; `iframeRef.current` stays aligned with the active iframe via `useEffect`. Receive filters use `isOurIframe(ev.source)` to accept messages from either iframe but signals that should ONLY come from the active iframe (e.g. `od:tweaks-available`) re-check `ev.source === iframeRef.current?.contentWindow`. -- TodoWrite UI pins one canonical task list above the chat composer via `PinnedTodoSlot` in `ChatPane.tsx`. The slot reads the latest TodoWrite snapshot across the conversation through `latestTodoWriteInputFromMessages` (`apps/web/src/runtime/todos.ts`). `AssistantMessage.stripTodoToolGroups` removes any TodoWrite tool groups from per message rendering so there is exactly one TodoCard on screen. The progress count includes both `completed` and `in_progress` items (1/4 reads "one underway" not "zero finished"). Dismissal via the Done button is keyed on the snapshot's JSON, so a fresh TodoWrite from the agent automatically re shows the card. +- TodoWrite UI pins one canonical task list above the chat composer via `PinnedTodoSlot` in `ChatPane.tsx`. The slot reads the latest TodoWrite snapshot across the conversation through `latestTodoWriteInputFromMessages` (`apps/web/src/runtime/todos.ts`). `AssistantMessage.stripTodoToolGroups` removes any TodoWrite tool groups from per message rendering so there is exactly one TodoCard on screen. The progress count includes both `completed` and `in_progress` items (1/4 reads "one underway" not "zero finished"). Dismissal via the Done button is keyed on the snapshot's JSON, so a fresh TodoWrite from the agent automatically re shows the card. `PinnedTodoSlot` sits OUTSIDE the `.chat-log` scroll container, so auto-scroll requires explicit coverage: `ChatPane`'s `ResizeObserver` accepts a `containerRef` from `PinnedTodoSlot` and observes that element directly, and a pane-level `MutationObserver` (`childList: true` on the chat pane ancestor) re-syncs that observation whenever the slot mounts or unmounts as new TodoWrite snapshots arrive. - `AskUserQuestionCard` (in `ToolCard.tsx`) prefers the live `onAnswerToolUse(toolUseId, content)` route (POSTs to `/api/runs/:id/tool-result`) and falls back to the legacy `onSubmitForm(text)` path when the run has already terminated. Selected chips persist across reloads by parsing the stored `tool_result.content` back into the selections shape. - Tool group rendering uses `dedupeSnapshotToolRetries` to collapse identical `AskUserQuestion` retries (one card per unique input, keeping the latest tool_use_id) and `TodoWrite` snapshots (only the most recent call, since each call is a state replace). diff --git a/apps/web/next.config.ts b/apps/web/next.config.ts index f8c6ff3845..33c787290e 100644 --- a/apps/web/next.config.ts +++ b/apps/web/next.config.ts @@ -1,6 +1,7 @@ import type { NextConfig } from 'next'; +import { existsSync, realpathSync } from 'node:fs'; import { networkInterfaces } from 'node:os'; -import { dirname, isAbsolute, relative } from 'node:path'; +import { dirname, isAbsolute, relative, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; // Daemon port the local Express server binds to (see apps/daemon/src/cli.ts). The @@ -23,7 +24,52 @@ const isServerOutput = webOutputMode === 'server' || webOutputMode === 'standalo const shouldStaticExport = isProd && !isServerOutput; const WEB_ROOT = dirname(fileURLToPath(import.meta.url)); -const WORKSPACE_ROOT = dirname(dirname(WEB_ROOT)); + +function resolveWorkspaceRoot(): string { + const computed = dirname(dirname(WEB_ROOT)); + const override = process.env.OD_WORKSPACE_ROOT; + if (override && override.trim()) { + const resolved = isAbsolute(override.trim()) ? override.trim() : resolve(WEB_ROOT, override.trim()); + if (!existsSync(resolved)) { + throw new Error( + `OD_WORKSPACE_ROOT="${override}" resolved to "${resolved}" which does not exist. ` + + `Fix the path or unset the variable to use the computed default.`, + ); + } + // Canonicalize via realpathSync so that symlinked paths (e.g. macOS + // /tmp → /private/tmp) compare correctly against WEB_ROOT. + const canonicalResolved = realpathSync(resolved); + const canonicalWebRoot = realpathSync(WEB_ROOT); + const rel = relative(canonicalResolved, canonicalWebRoot); + // rel.startsWith('..') catches the non-ancestor case on POSIX. + // isAbsolute(rel) catches the Windows cross-drive case where relative() + // returns an absolute path (e.g. C:\repo\apps\web) instead of a ..-path. + if (rel.startsWith('..') || isAbsolute(rel)) { + throw new Error( + `OD_WORKSPACE_ROOT="${override}" resolved to "${canonicalResolved}" but WEB_ROOT "${canonicalWebRoot}" ` + + `is not inside it (relative path "${rel}"). ` + + `The override must be an ancestor of apps/web.`, + ); + } + // Require the resolved path to be a real pnpm workspace root. Without this, + // an ancestor like `/apps` would pass the relative-path check but + // miss the sibling `packages/*` directory that `apps/web` imports from + // (for example `@open-design/contracts`), and Next would later fail deep + // inside file tracing / Turbopack with a much harder-to-diagnose error. + if (!existsSync(resolve(canonicalResolved, 'pnpm-workspace.yaml'))) { + throw new Error( + `OD_WORKSPACE_ROOT="${override}" resolved to "${canonicalResolved}" but no ` + + `pnpm-workspace.yaml was found there. The override must point at the ` + + `pnpm workspace root so outputFileTracingRoot and turbopack.root can ` + + `resolve sibling packages.`, + ); + } + return canonicalResolved; + } + return computed; +} + +const WORKSPACE_ROOT = resolveWorkspaceRoot(); const toPosixPath = (value: string) => value.replaceAll('\\', '/'); function resolveDistDir(defaultValue: string) { diff --git a/apps/web/src/components/ChatPane.tsx b/apps/web/src/components/ChatPane.tsx index 5fb292cce6..229841f3e6 100644 --- a/apps/web/src/components/ChatPane.tsx +++ b/apps/web/src/components/ChatPane.tsx @@ -1,4 +1,4 @@ -import { Fragment, useEffect, useRef, useState, type ReactNode } from 'react'; +import { Fragment, useEffect, useRef, useState, type MutableRefObject, type ReactNode } from 'react'; import { useAnalytics } from '../analytics/provider'; import { trackChatPanelClick } from '../analytics/events'; import { useT } from '../i18n'; @@ -361,6 +361,7 @@ export function ChatPane({ const logRef = useRef(null); const historyWrapRef = useRef(null); const composerRef = useRef(null); + const pinnedTodoRef = useRef(null); const didInitialScrollRef = useRef(false); // Tracks whether the user is glued close enough to the bottom that // streamed content should auto-follow. Distinct from the jump-button @@ -609,12 +610,32 @@ export function ChatPane({ } }; + // The PinnedTodoSlot renders outside the scroll container. When the todo + // card grows, the chat-log's clientHeight shrinks (flex layout) and the + // user drifts away from the bottom. Observe the pinned-todo div so + // followLatestIfPinned fires whenever the card changes height. + let observedPinnedTodo: Element | null = null; + const syncPinnedTodo = () => { + if (!resizeObserver) return; + const pinnedEl = pinnedTodoRef.current; + if (pinnedEl && observedPinnedTodo !== pinnedEl) { + if (observedPinnedTodo) resizeObserver.unobserve(observedPinnedTodo); + resizeObserver.observe(pinnedEl); + observedPinnedTodo = pinnedEl; + } else if (!pinnedEl && observedPinnedTodo) { + resizeObserver.unobserve(observedPinnedTodo); + observedPinnedTodo = null; + } + }; + syncObservedChildren(); + syncPinnedTodo(); const mutationObserver = typeof MutationObserver !== 'undefined' ? new MutationObserver(() => { syncObservedChildren(); + syncPinnedTodo(); followLatestIfPinned(); }) : null; @@ -623,6 +644,15 @@ export function ChatPane({ subtree: true, characterData: true, }); + // PinnedTodoSlot lives outside the chat-log subtree (it is a sibling of + // .chat-log-wrap inside .pane). The MutationObserver above only fires for + // changes inside el, so it cannot detect the slot mounting or unmounting. + // Watch the nearest common ancestor (.pane) with childList-only to catch + // those transitions and keep syncPinnedTodo current. + const paneEl = el.parentElement?.parentElement ?? null; + if (paneEl && mutationObserver) { + mutationObserver.observe(paneEl, { childList: true }); + } return () => { if (followFrame !== null) cancelAnimationFrame(followFrame); @@ -929,6 +959,7 @@ export function ChatPane({ streaming={streaming} dismissedKey={dismissedPinnedTodoKey} onDismiss={setDismissedPinnedTodoKey} + containerRef={pinnedTodoRef} /> void; + containerRef?: MutableRefObject; }) { // `exiting` lets the dismiss click play a slide-down transition before // the slot tears down. Without it React would unmount immediately and @@ -1001,7 +1034,7 @@ function PinnedTodoSlot({ } if (snapshotKey === dismissedKey) return null; return ( -
+
; +let originalResizeObserver: typeof ResizeObserver | undefined; + +function isChatLog(el: HTMLElement): boolean { + return typeof el?.classList?.contains === 'function' && el.classList.contains('chat-log'); +} + +beforeEach(() => { + geom = { scrollHeight: 1000, clientHeight: 400, scrollTop: 1000 }; + rafCallbacks = []; + resizeCallbacks = []; + observedElements = []; + + vi.spyOn(window, 'requestAnimationFrame').mockImplementation((callback) => { + rafCallbacks.push(callback); + return rafCallbacks.length; + }); + + originalResizeObserver = globalThis.ResizeObserver; + class MockResizeObserver { + constructor(callback: ResizeObserverCallback) { + resizeCallbacks.push(callback); + } + observe = vi.fn((el: Element) => { + observedElements.push(el); + }); + unobserve = vi.fn(); + disconnect = vi.fn(); + } + Object.defineProperty(globalThis, 'ResizeObserver', { + configurable: true, + writable: true, + value: MockResizeObserver, + }); + + savedDescriptors = { + scrollTop: Object.getOwnPropertyDescriptor(HTMLElement.prototype, 'scrollTop'), + scrollHeight: Object.getOwnPropertyDescriptor(HTMLElement.prototype, 'scrollHeight'), + clientHeight: Object.getOwnPropertyDescriptor(HTMLElement.prototype, 'clientHeight'), + }; + Object.defineProperty(HTMLElement.prototype, 'scrollTop', { + configurable: true, + get(this: HTMLElement) { + return isChatLog(this) ? geom.scrollTop : 0; + }, + set(this: HTMLElement, v: number) { + if (isChatLog(this)) geom.scrollTop = v; + }, + }); + Object.defineProperty(HTMLElement.prototype, 'scrollHeight', { + configurable: true, + get(this: HTMLElement) { + return isChatLog(this) ? geom.scrollHeight : 0; + }, + }); + Object.defineProperty(HTMLElement.prototype, 'clientHeight', { + configurable: true, + get(this: HTMLElement) { + return isChatLog(this) ? geom.clientHeight : 0; + }, + }); +}); + +afterEach(() => { + cleanup(); + vi.restoreAllMocks(); + rafCallbacks = []; + resizeCallbacks = []; + observedElements = []; + if (originalResizeObserver) { + Object.defineProperty(globalThis, 'ResizeObserver', { + configurable: true, + writable: true, + value: originalResizeObserver, + }); + } else { + delete (globalThis as unknown as { ResizeObserver?: unknown }).ResizeObserver; + } + for (const key of ['scrollTop', 'scrollHeight', 'clientHeight'] as const) { + const original = savedDescriptors[key]; + if (original) { + Object.defineProperty(HTMLElement.prototype, key, original); + } else { + delete (HTMLElement.prototype as unknown as Record)[key]; + } + } +}); + +async function flushFrames() { + await act(async () => { + const callbacks = rafCallbacks.splice(0); + callbacks.forEach((callback) => callback(performance.now())); + await Promise.resolve(); + }); +} + +// Build a message set that includes a TodoWrite event so PinnedTodoSlot renders. +function messagesWithTodo(taskCount: number): ChatMessage[] { + const todos = Array.from({ length: taskCount }, (_, i) => ({ + content: `Task ${i + 1}`, + status: 'pending', + })); + return [ + { id: 'u1', role: 'user' as const, content: 'build something', createdAt: Date.now() }, + { + id: 'a1', + role: 'assistant' as const, + content: 'on it', + createdAt: Date.now(), + events: [ + { + kind: 'tool_use' as const, + id: 'tw-1', + name: 'TodoWrite', + input: { todos }, + }, + ], + }, + ]; +} + +function chatPaneEl(messages: ChatMessage[]) { + return ( + 'project-1'} + onSend={() => {}} + onStop={() => {}} + conversations={[]} + activeConversationId={null} + onSelectConversation={() => {}} + onDeleteConversation={() => {}} + /> + ); +} + +describe('chat-log autoscroll when pinned todo card grows', () => { + it('observes the pinned-todo element so its resize triggers the bottom-pin follow', async () => { + // The PinnedTodoSlot lives outside the chat-log scroll container. + // When the todo card grows, the chat-log viewport (clientHeight) + // shrinks. The ResizeObserver must observe the pinned-todo div so + // `followLatestIfPinned` fires and corrects the scroll position. + render(chatPaneEl(messagesWithTodo(3))); + await flushFrames(); + + const pinnedTodoEl = document.querySelector('.chat-pinned-todo'); + expect(pinnedTodoEl, 'PinnedTodoSlot should render with a TodoWrite message').not.toBeNull(); + + // The pinned-todo element must be registered with the ResizeObserver + // so that real-browser growth of the todo card triggers followLatestIfPinned. + expect(observedElements).toContain(pinnedTodoEl); + }); + + it('re-observes the pinned-todo element when a TodoWrite snapshot first mounts', async () => { + // Start with no TodoWrite — PinnedTodoSlot should be absent. + const { rerender } = render(chatPaneEl([])); + await flushFrames(); + expect(document.querySelector('.chat-pinned-todo')).toBeNull(); + + // Add messages with a TodoWrite — PinnedTodoSlot mounts for the first time. + await act(async () => { + rerender(chatPaneEl(messagesWithTodo(2))); + await Promise.resolve(); + }); + await flushFrames(); + + const pinnedTodoEl = document.querySelector('.chat-pinned-todo'); + expect(pinnedTodoEl, 'PinnedTodoSlot should render when messages include a TodoWrite').not.toBeNull(); + + // The pane-level MutationObserver re-syncs the ResizeObserver when + // PinnedTodoSlot mounts. The new element must be registered so real-browser + // growth of the card triggers followLatestIfPinned. + expect(observedElements).toContain(pinnedTodoEl); + }); + + it('scrolls to the bottom when pinned and the todo card grows', async () => { + // Start pinned: scrollTop == scrollHeight (user is at the very bottom). + geom = { scrollHeight: 1000, clientHeight: 400, scrollTop: 1000 }; + render(chatPaneEl(messagesWithTodo(2))); + await flushFrames(); + + // The initial-bottom-scroll effect fires and confirms pinnedToBottomRef = true. + // Now simulate the todo card growing: the viewport (clientHeight) shrinks, + // which means the user can no longer see the latest content even though + // scrollTop is still at its old value. The ResizeObserver callback should + // fire followLatestIfPinned, which snaps scrollTop back to scrollHeight. + geom = { ...geom, clientHeight: 300, scrollHeight: 1000, scrollTop: 600 }; + + await act(async () => { + const callbacks = [...resizeCallbacks]; + callbacks.forEach((callback) => callback([], {} as ResizeObserver)); + await Promise.resolve(); + }); + await flushFrames(); + + // followLatestIfPinned fires from the shared callback and snaps scrollTop + // to scrollHeight (1000). The structural guarantee that the pinned-todo + // element is observed (tested separately above) ensures this path runs in + // the real browser when the card grows. + expect(geom.scrollTop).toBe(1000); + }); +}); diff --git a/e2e/ui/chat-todo-autoscroll.test.ts b/e2e/ui/chat-todo-autoscroll.test.ts new file mode 100644 index 0000000000..6b5a241183 --- /dev/null +++ b/e2e/ui/chat-todo-autoscroll.test.ts @@ -0,0 +1,410 @@ +import { expect, test } from '@playwright/test'; +import type { Page } from '@playwright/test'; + +// Verifies that the chat-log stays pinned to the bottom when the PinnedTodoSlot +// grows (scenario A) and that a deliberate scroll-up is not overridden by a +// subsequent TodoWrite snapshot (scenario B). +// +// jsdom cannot exercise ResizeObserver or real flex-layout geometry, so these +// assertions must run in a real browser via Playwright. The Vitest unit spec +// in apps/web/tests/components/chat-todo-autoscroll.test.tsx confirms that +// the pinned-todo element is observed; this spec confirms that the resulting +// scroll behaviour is correct end-to-end. + +const STORAGE_KEY = 'open-design:config'; + +// Reusable app-config seed: skip onboarding, mock agent, no real model calls. +async function seedAppConfig(page: Page) { + await page.addInitScript((key) => { + window.localStorage.setItem( + key, + JSON.stringify({ + mode: 'daemon', + apiKey: '', + baseUrl: 'https://api.anthropic.com', + model: 'claude-sonnet-4-5', + agentId: 'mock', + skillId: null, + designSystemId: null, + onboardingCompleted: true, + agentModels: {}, + privacyDecisionAt: 1, + telemetry: { metrics: false, content: false, artifactManifest: false }, + }), + ); + }, STORAGE_KEY); + + await page.route('**/api/app-config', async (route) => { + if (route.request().method() !== 'GET') { + await route.continue(); + return; + } + await route.fulfill({ + json: { + config: { + onboardingCompleted: true, + agentId: 'mock', + skillId: null, + designSystemId: null, + agentModels: {}, + privacyDecisionAt: 1, + telemetry: { metrics: false, content: false, artifactManifest: false }, + }, + }, + }); + }); + + await page.route('**/api/agents', async (route) => { + await route.fulfill({ + json: { + agents: [ + { + id: 'mock', + name: 'Mock Agent', + bin: 'mock-agent', + available: true, + version: 'test', + models: [{ id: 'default', label: 'Default' }], + }, + ], + }, + }); + }); +} + +// Seed a project + conversation + messages via the daemon HTTP API, then +// navigate to the project/conversation URL. Returns ids needed for follow-up +// API calls. +// +// To guarantee the chat-log is scrollable (needed to exercise the autoscroll +// invariant), we seed FILLER_MSG_COUNT pairs of short user/assistant messages +// before the final TodoWrite assistant message. Each message pair is roughly +// 80 px tall; 12 pairs easily exceed a 600 px viewport so the chat-log always +// has overflow to scroll. +const FILLER_MSG_COUNT = 12; + +async function seedProjectWithTodos( + page: Page, + opts: { projectSuffix: string; todoCount: number }, +): Promise<{ projectId: string; conversationId: string }> { + const projectId = `todo-scroll-${opts.projectSuffix}-${Date.now()}`.replace(/[^A-Za-z0-9._-]/g, '-'); + + const projectRes = await page.request.post('/api/projects', { + data: { + id: projectId, + name: `Todo Scroll ${opts.projectSuffix}`, + skillId: null, + designSystemId: null, + pendingPrompt: null, + metadata: { kind: 'prototype' }, + }, + }); + expect(projectRes.ok(), `create project: ${await projectRes.text()}`).toBeTruthy(); + const { conversationId } = (await projectRes.json()) as { conversationId: string }; + expect(conversationId).toBeTruthy(); + + // Seed several filler message pairs so the chat-log has scrollable overflow. + for (let i = 0; i < FILLER_MSG_COUNT; i += 1) { + const base = Date.now() - (FILLER_MSG_COUNT - i + 2) * 1000; + const uRes = await page.request.put( + `/api/projects/${projectId}/conversations/${conversationId}/messages/u-fill-${i}-${projectId}`, + { + data: { + role: 'user', + content: `Filler question ${i + 1}: what is step ${i + 1}?`, + createdAt: base, + }, + }, + ); + expect(uRes.ok(), `upsert filler user msg ${i}: ${await uRes.text()}`).toBeTruthy(); + + const aRes = await page.request.put( + `/api/projects/${projectId}/conversations/${conversationId}/messages/a-fill-${i}-${projectId}`, + { + data: { + role: 'assistant', + content: `Filler answer ${i + 1}: step ${i + 1} involves doing the work carefully.`, + runStatus: 'succeeded', + createdAt: base + 500, + }, + }, + ); + expect(aRes.ok(), `upsert filler assistant msg ${i}: ${await aRes.text()}`).toBeTruthy(); + } + + // Seed the final user message. + const userMsgId = `u-${projectId}`; + const userMsgRes = await page.request.put( + `/api/projects/${projectId}/conversations/${conversationId}/messages/${userMsgId}`, + { + data: { + role: 'user', + content: 'please build something', + createdAt: Date.now() - 2000, + }, + }, + ); + expect(userMsgRes.ok(), `upsert user msg: ${await userMsgRes.text()}`).toBeTruthy(); + + // Seed an assistant message carrying a TodoWrite tool_use event. + const todos = Array.from({ length: opts.todoCount }, (_, i) => ({ + content: `Task ${i + 1}`, + status: 'pending', + })); + const assistantMsgId = `a-${projectId}`; + const assistantMsgRes = await page.request.put( + `/api/projects/${projectId}/conversations/${conversationId}/messages/${assistantMsgId}`, + { + data: { + role: 'assistant', + content: 'sure, here is the plan', + runStatus: 'succeeded', + events: [ + { + kind: 'tool_use', + id: `tw-${projectId}`, + name: 'TodoWrite', + input: { todos }, + }, + ], + createdAt: Date.now() - 1000, + }, + }, + ); + expect(assistantMsgRes.ok(), `upsert assistant msg: ${await assistantMsgRes.text()}`).toBeTruthy(); + + return { projectId, conversationId }; +} + +// Pause until .chat-log is mounted, messages have loaded, and the loading +// overlay is gone. The first non-empty message from the seed is "Filler +// question 1" so waiting for that text confirms the daemon responded with +// the stored message list. +async function waitForChatReady(page: Page) { + const loading = page.getByText('Loading Open Design…'); + await loading.waitFor({ state: 'detached', timeout: 10_000 }).catch(() => {}); + await expect(page.locator('.chat-log')).toBeVisible({ timeout: 10_000 }); + await expect(page.getByTestId('chat-composer')).toBeVisible({ timeout: 10_000 }); + // Wait until at least one filler message has rendered so the chat-log has + // real content (not the empty-state template card grid). + await expect( + page.locator('.chat-log').getByText('Filler question 1: what is step 1?', { exact: true }), + ).toBeVisible({ timeout: 10_000 }); +} + +// Read the chat-log scroll distance from the bottom (scrollHeight - scrollTop - clientHeight). +async function chatLogBottomDistance(page: Page): Promise { + return page.evaluate(() => { + const el = document.querySelector('.chat-log'); + if (!el) return -1; + return el.scrollHeight - el.scrollTop - el.clientHeight; + }); +} + +// Read the total scrollable overflow (scrollHeight - clientHeight). +// A value > 0 means the chat-log has content that can be scrolled. +async function chatLogScrollableHeight(page: Page): Promise { + return page.evaluate(() => { + const el = document.querySelector('.chat-log'); + if (!el) return -1; + return el.scrollHeight - el.clientHeight; + }); +} + +// Simulate what happens when the .chat-pinned-todo element grows by manually +// setting the chat-log's scrollTop to mimic the drift that occurs in production +// environments where scroll-anchoring may not compensate fully for a flex-layout +// reflow caused by a sibling growing outside the scroll container. +// +// The mechanism: +// 1. Verify the chat-log is currently pinned to the bottom. +// 2. Grow .chat-pinned-todo (reducing .chat-log.clientHeight in the flex layout). +// 3. Manually set scrollTop to its pre-grow value (i.e. do NOT adjust for the +// reduced clientHeight). This leaves the user `extraPx` above the new bottom. +// 4. Wait one rAF cycle. On the fix branch, the ResizeObserver on +// .chat-pinned-todo fires followLatestIfPinned which snaps scrollTop back +// to scrollHeight. On main the observer does not fire, so the drift persists. +async function growPinnedTodo(page: Page, extraPx: number) { + await page.evaluate((px) => { + const logEl = document.querySelector('.chat-log'); + if (!logEl) throw new Error('No .chat-log element found'); + + const el = document.querySelector('.chat-pinned-todo'); + if (!el) throw new Error('No .chat-pinned-todo element found'); + + // Snapshot the current scrollTop (user is at the bottom, so this equals + // scrollHeight - clientHeight approximately). + const scrollTopBefore = logEl.scrollTop; + + // Grow the element. The flex reflow reduces logEl.clientHeight by ~px. + el.style.minHeight = `${el.offsetHeight + px}px`; + // Force layout so clientHeight is updated synchronously. + void logEl.clientHeight; + + // Re-apply the pre-grow scrollTop. This cancels any scroll-anchoring + // adjustment the browser made, leaving the user drifted above the bottom. + // The ChatPane's followLatestIfPinned (if its ResizeObserver fires) will + // correct this; on main it won't because the observer is not on this element. + logEl.scrollTop = scrollTopBefore; + }, extraPx); + // Give the browser two rAF cycles to flush ResizeObserver callbacks and the + // nested followLatestIfPinned rAF. + await page.waitForTimeout(100); +} + +test.describe('chat pane autoscroll on TodoCard growth', () => { + test.describe.configure({ timeout: 45_000 }); + + test.beforeEach(async ({ page }) => { + await seedAppConfig(page); + }); + + test('scenario A: pinned user stays at bottom after PinnedTodoCard grows', async ({ + page, + }) => { + const { projectId, conversationId } = await seedProjectWithTodos(page, { + projectSuffix: 'a', + todoCount: 4, + }); + + await page.goto(`/projects/${projectId}/conversations/${conversationId}`, { + waitUntil: 'domcontentloaded', + }); + await waitForChatReady(page); + + // After initial load the chat log should be pinned to the bottom. + const distanceAfterLoad = await chatLogBottomDistance(page); + expect( + distanceAfterLoad, + `expected chat-log pinned to bottom on load (distance=${distanceAfterLoad})`, + ).toBeLessThan(20); + + // Guard: the filler messages must create actual scroll overflow. If the + // chat-log is not scrollable the grow step is a no-op and the assertion + // below passes vacuously — defeat the bug detector entirely. + const scrollableHeight = await chatLogScrollableHeight(page); + expect( + scrollableHeight, + `expected chat-log to have scrollable overflow (scrollableHeight=${scrollableHeight}); ` + + `seed more filler messages if this fires`, + ).toBeGreaterThan(50); + + // Verify the PinnedTodoSlot rendered. + await expect(page.locator('.chat-pinned-todo')).toBeVisible({ timeout: 5_000 }); + + // Capture clientHeight before growing so we can assert the grow step + // actually changed the layout this test is designed to protect against. + const clientHeightBeforeGrow = await page.evaluate( + () => document.querySelector('.chat-log')?.clientHeight ?? -1, + ); + + // Grow the pinned-todo card by 80 px (simulates a new TodoWrite snapshot with + // more items) and verify the chat-log snaps back to the bottom. + await growPinnedTodo(page, 80); + + // Hard precondition: the grow step must have reduced clientHeight. If a + // layout change stops .chat-pinned-todo from shrinking .chat-log.clientHeight, + // distanceAfterGrow < 20 passes vacuously and the regression detector is + // defeated — fail fast instead. + const clientHeightAfterGrow = await page.evaluate( + () => document.querySelector('.chat-log')?.clientHeight ?? -1, + ); + expect( + clientHeightAfterGrow, + `expected grow step to reduce chat-log clientHeight ` + + `(before=${clientHeightBeforeGrow} after=${clientHeightAfterGrow}); ` + + `increase extraPx in growPinnedTodo or check the layout if this fires`, + ).toBeLessThan(clientHeightBeforeGrow); + + const distanceAfterGrow = await chatLogBottomDistance(page); + expect( + distanceAfterGrow, + `expected chat-log re-pinned after todo card grew (distance=${distanceAfterGrow})`, + ).toBeLessThan(20); + }); + + test('scenario B: user scroll-up is preserved when PinnedTodoCard grows', async ({ + page, + }) => { + const { projectId, conversationId } = await seedProjectWithTodos(page, { + projectSuffix: 'b', + todoCount: 4, + }); + + await page.goto(`/projects/${projectId}/conversations/${conversationId}`, { + waitUntil: 'domcontentloaded', + }); + await waitForChatReady(page); + + // Verify PinnedTodoSlot is mounted. + await expect(page.locator('.chat-pinned-todo')).toBeVisible({ timeout: 5_000 }); + + // Scroll the chat-log up by 150 px to break the pinned-to-bottom invariant. + // We scroll by at least 80 px (the pinnedToBottomRef threshold) to ensure + // the ChatPane considers the user as having deliberately scrolled away. + await page.evaluate(() => { + const el = document.querySelector('.chat-log'); + if (!el) throw new Error('No .chat-log element found'); + // Scroll up by 150 px — beyond the 80 px pinned threshold. + el.scrollTop = Math.max(0, el.scrollTop - 150); + // Dispatch a synthetic scroll event so ChatPane's onScroll listener + // fires and updates pinnedToBottomRef to false. + el.dispatchEvent(new Event('scroll', { bubbles: true })); + }); + await page.waitForTimeout(50); + + const distanceAfterScroll = await chatLogBottomDistance(page); + + // Hard precondition: the scroll must have moved past the 80px suppression + // threshold. If this fails, the seed / layout changed and the test no + // longer exercises the "user scrolled away, do not yank them back" path — + // fail fast instead of silently skipping the regression check below. + expect( + distanceAfterScroll, + `expected scroll-up to move chat-log more than 80px from bottom (distance=${distanceAfterScroll}); ` + + `seed more filler messages or increase the scroll offset if this fires`, + ).toBeGreaterThan(80); + + // Capture scrollTop and clientHeight before growing — the invariant is that + // scrollTop (not distance-to-bottom) is preserved. Distance-to-bottom + // naturally increases by ~extraPx because growPinnedTodo reduces clientHeight + // while holding scrollTop fixed, so comparing distances before/after would + // fail on correct behavior. + const scrollTopBeforeGrow = await page.evaluate( + () => document.querySelector('.chat-log')?.scrollTop ?? -1, + ); + const clientHeightBeforeGrow = await page.evaluate( + () => document.querySelector('.chat-log')?.clientHeight ?? -1, + ); + + // Now grow the todo card — the non-pinned user should NOT be dragged back. + await growPinnedTodo(page, 80); + + // Hard precondition: the grow step must have actually changed clientHeight. + // If this fails, the layout changed and the test no longer exercises the + // "user scrolled away, do not yank them back" path — fail fast. + const clientHeightAfterGrow = await page.evaluate( + () => document.querySelector('.chat-log')?.clientHeight ?? -1, + ); + expect( + clientHeightAfterGrow, + `expected grow step to reduce chat-log clientHeight ` + + `(before=${clientHeightBeforeGrow} after=${clientHeightAfterGrow}); ` + + `increase extraPx in growPinnedTodo or check the layout if this fires`, + ).toBeLessThan(clientHeightBeforeGrow); + + // Core invariant: scrollTop must be preserved. A regression where + // followLatestIfPinned fires and snaps the user back to the bottom would + // set scrollTop = scrollHeight - clientHeight, far from scrollTopBeforeGrow. + const scrollTopAfterGrow = await page.evaluate( + () => document.querySelector('.chat-log')?.scrollTop ?? -1, + ); + const SCROLL_PRESERVATION_TOLERANCE_PX = 20; + expect( + Math.abs(scrollTopAfterGrow - scrollTopBeforeGrow), + `expected scrollTop preserved within ${SCROLL_PRESERVATION_TOLERANCE_PX}px of pre-grow ` + + `(before=${scrollTopBeforeGrow} after=${scrollTopAfterGrow} ` + + `delta=${Math.abs(scrollTopAfterGrow - scrollTopBeforeGrow)})`, + ).toBeLessThan(SCROLL_PRESERVATION_TOLERANCE_PX); + }); +});