diff --git a/web/src/components/BlockTimeline.tsx b/web/src/components/BlockTimeline.tsx new file mode 100644 index 00000000..cd0a8a9b --- /dev/null +++ b/web/src/components/BlockTimeline.tsx @@ -0,0 +1,742 @@ +/** + * BlockTimeline — render an assistant turn's blocks inline, in stream order. + * + * First principles: + * + * 1. **Faithful timeline.** Each phase the LLM emits renders at the spot + * it streamed. No per-turn aggregation, no hoisting. + * + * 2. **One chip per phase of work.** A "phase" is a contiguous run of + * reasoning + tool blocks with no text between. Reasoning followed by + * tools (the natural think→act pattern) reads as one collapsible + * activity, not two stacked chips. Text always breaks the phase — + * "preamble text → tools → final text" stays three distinct elements. + * + * 3. **Blocks are self-stating.** A chip is muted when its work is + * settled and active when something inside it is in flight. There is + * no separate turn-level status surface. + * + * 4. **One live cursor for the gaps.** When the engine is mid-flight but + * no block is currently absorbing the state (initial warm-up, + * preparing the next tool, post-tool analyzing), a small `` + * at the bottom of the message body covers the transition. The + * moment the next block starts streaming, the block's own active + * state takes over and the cursor steps aside. + * + * Within a phase: consecutive tool blocks whose calls share a single tool + * name fold into one tool row with `×N`. Reasoning rows each keep their + * own row. The chip's body lists the rows in stream order. + */ + +import { AlertCircle, Check, ChevronRight, Copy, Loader2 } from "lucide-react"; +import { type ReactNode, useCallback, useMemo, useState } from "react"; +import { Streamdown } from "streamdown"; +import type { + ContentBlock, + PreparingTool, + StreamingState, + ToolCallDisplay, +} from "../hooks/useChat"; +import { useMinDisplayTime, type VisualStatus } from "../hooks/useMinDisplayTime"; +import { formatDuration, stripServerPrefix } from "../lib/format"; +import { + aggregateGroup, + describeCall, + type DisplayDetail, + type GroupDescription, + type Tone, + type ToolDescription, +} from "../lib/tool-display"; +import { InlineAppView } from "./InlineAppView"; +import { ResourceLinkView } from "./ResourceLinkView"; + +// ───────────────────────────────────────────────────────────────────────────── +// Data model +// ───────────────────────────────────────────────────────────────────────────── + +/** One row inside an `activity` segment's chip body. */ +type ActivityRow = + | { kind: "reasoning"; text: string } + | { kind: "tool"; name: string; calls: ToolCallDisplay[] }; + +/** + * One renderable item after segmentation + within-segment tool folding. + * + * - `text` — single text block (whole-paragraph prose). + * - `activity` — a contiguous run of reasoning + tool blocks with no text + * between, presented as ONE chip whose body lists each `rows[]` entry. + */ +type TimelineItem = { kind: "text"; text: string } | { kind: "activity"; rows: ActivityRow[] }; + +/** + * Walk `blocks[]`, partition at text boundaries, and within each activity + * segment fold consecutive same-name tool blocks into one tool row. + * + * Empty reasoning blocks (zero-length text) and empty tool blocks are + * dropped so the timeline doesn't render placeholders for nothing. + */ +function foldBlocks(blocks: ReadonlyArray): TimelineItem[] { + const items: TimelineItem[] = []; + let rows: ActivityRow[] = []; + + const flush = () => { + if (rows.length === 0) return; + items.push({ kind: "activity", rows }); + rows = []; + }; + + for (const block of blocks) { + if (block.type === "text") { + flush(); + if (block.text.length > 0) items.push({ kind: "text", text: block.text }); + continue; + } + if (block.type === "reasoning") { + if (block.text.length === 0) continue; + rows.push({ kind: "reasoning", text: block.text }); + continue; + } + // tool block + if (block.toolCalls.length === 0) continue; + const sharedName = sameNameAcross(block.toolCalls); + const prev = rows[rows.length - 1]; + if ( + sharedName !== null && + prev?.kind === "tool" && + prev.name === sharedName && + prev.name !== "" + ) { + prev.calls.push(...block.toolCalls); + } else { + rows.push({ + kind: "tool", + name: sharedName ?? "", + calls: [...block.toolCalls], + }); + } + } + flush(); + return items; +} + +function sameNameAcross(calls: ReadonlyArray): string | null { + if (calls.length === 0) return null; + const first = calls[0].name; + for (const c of calls) if (c.name !== first) return null; + return first; +} + +// ───────────────────────────────────────────────────────────────────────────── +// BlockTimeline — top-level iterator +// ───────────────────────────────────────────────────────────────────────────── + +interface BlockTimelineProps { + blocks: ReadonlyArray; + /** True for the currently-streaming assistant message. */ + isCurrentMessage: boolean; + streamingState: StreamingState; + preparingTool: PreparingTool | null; + displayDetail: DisplayDetail; +} + +export function BlockTimeline({ + blocks, + isCurrentMessage, + streamingState, + preparingTool, + displayDetail, +}: BlockTimelineProps) { + const items = useMemo(() => foldBlocks(blocks), [blocks]); + + return ( + <> + {items.map((item, idx) => { + const isLast = idx === items.length - 1; + const isTailDelta = isCurrentMessage && isLast && streamingState === "streaming"; + if (item.kind === "text") { + return ( + // biome-ignore lint/suspicious/noArrayIndexKey: items derived from append-only blocks; identity by position is stable +
+ + {item.text} + +
+ ); + } + return ( + // biome-ignore lint/suspicious/noArrayIndexKey: same as above +
+ + +
+ ); + })} + {isCurrentMessage && ( + + )} + + ); +} + +function lastRow(rows: ReadonlyArray): ActivityRow | undefined { + return rows[rows.length - 1]; +} + +function collectToolCalls(rows: ReadonlyArray): ToolCallDisplay[] { + const out: ToolCallDisplay[] = []; + for (const r of rows) if (r.kind === "tool") out.push(...r.calls); + return out; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Live cursor — covers the gaps between blocks +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Render only when the engine is mid-flight but no block is absorbing the + * state. `streamingState` tells us which case we're in: + * + * - `streaming` → text/reasoning block receiving deltas; that block's own + * active state covers it → cursor hides. + * - `working` → a tool call has status="running"; the chip spins → + * cursor hides. + * - `thinking` → pre-first-block warm-up; cursor shows "Thinking…". + * - `preparing` → tool being built server-side, no tool block pushed yet; + * cursor shows "Calling X…". + * - `analyzing` → post-tool-result digest before next reasoning/text; + * cursor shows "Analyzing…". + * - `null` → turn done; cursor hides. + */ +function LiveCursor({ + streamingState, + preparingTool, +}: { + streamingState: StreamingState; + preparingTool: PreparingTool | null; +}) { + const label = liveCursorLabel(streamingState, preparingTool); + if (label === null) return null; + return ( +
+ + {label} +
+ ); +} + +function liveCursorLabel( + streamingState: StreamingState, + preparingTool: PreparingTool | null, +): string | null { + switch (streamingState) { + case "thinking": + return "Thinking…"; + case "preparing": + return preparingTool ? `Calling ${stripServerPrefix(preparingTool.name)}…` : "Calling…"; + case "analyzing": + return "Analyzing…"; + default: + return null; + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Activity chip — one collapsible widget per phase of work +// ───────────────────────────────────────────────────────────────────────────── + +interface ActivityChipProps { + rows: ReadonlyArray; + /** True when the trailing row is reasoning still receiving deltas. */ + isReasoningTailStreaming: boolean; + displayDetail: DisplayDetail; +} + +function ActivityChip({ rows, isReasoningTailStreaming, displayDetail }: ActivityChipProps) { + const [open, setOpen] = useState(false); + const toggle = useCallback(() => setOpen((v) => !v), []); + + // Flatten every tool call across the segment so useMinDisplayTime gets a + // stable list, then weave smoothed statuses back into descriptions for + // tone / label derivation. Without smoothing, a 2ms tool flashes + // running→done too briefly to register. + const allCalls = useMemo(() => collectToolCalls(rows), [rows]); + const visualStatuses = useMinDisplayTime(allCalls); + const smoothedCalls = useMemo( + () => applyVisualStatuses(allCalls, visualStatuses), + [allCalls, visualStatuses], + ); + const smoothedRows = useMemo( + () => weaveSmoothed(rows, allCalls, smoothedCalls), + [rows, allCalls, smoothedCalls], + ); + const descriptions = useMemo(() => smoothedCalls.map(describeCall), [smoothedCalls]); + const group = useMemo(() => aggregateGroup(descriptions), [descriptions]); + + if (displayDetail === "quiet") return null; + + // Reasoning-tail streaming counts as "running" for the chip even though + // no tool is in flight — the only piece of state the aggregator can't + // see on its own (it knows tools, not reasoning). + const tone: Tone = group.tone === "running" || isReasoningTailStreaming ? "running" : group.tone; + const head = chipHead(rows, group, isReasoningTailStreaming); + const hasBody = rows.some((r) => r.kind === "reasoning" || r.kind === "tool"); + const isSingleRow = rows.length === 1; + + return ( +
+ + {open && hasBody && ( +
+ {isSingleRow ? ( + + ) : ( + smoothedRows.map((row, idx) => ( + // biome-ignore lint/suspicious/noArrayIndexKey: rows within a segment are append-only and don't reorder + + )) + )} +
+ )} +
+ ); +} + +interface ChipHead { + label: string; + subject: string | null; + /** Count to render as a `×N` suffix. 0 means "don't render the suffix". */ + countSuffix: number; + totalMs: number | null; + tokenLabel: string | null; +} + +/** + * Compose the chip's collapsed-state label from a `GroupDescription` plus + * the reasoning context the aggregator doesn't see (token count, the + * streaming-tail flag). This function does no aggregation of its own — + * it picks tense and assembles strings. + * + * Fallback verb → count-led label ("3 actions"). The verb word would be + * verb-shaped scaffolding with no real signal; the count is the strongest + * truth we have when actions can't be characterized as one thing. The + * `×N` suffix is suppressed in that case — the count is already in the + * label. + */ +function chipHead( + rows: ReadonlyArray, + group: GroupDescription, + isReasoningTailStreaming: boolean, +): ChipHead { + const totalReasoningChars = rows.reduce( + (acc, r) => (r.kind === "reasoning" ? acc + r.text.length : acc), + 0, + ); + + // Pure reasoning — no tool calls in this segment. + if (group.count === 0) { + return { + label: isReasoningTailStreaming ? "Thinking…" : "Thought", + subject: null, + countSuffix: 0, + totalMs: null, + tokenLabel: isReasoningTailStreaming ? null : approximateTokenLabel(totalReasoningChars), + }; + } + + const running = group.tone === "running" || isReasoningTailStreaming; + const { label, showCountSuffix } = formatGroupLabel(group, { running }); + return { + label, + subject: group.subject, + countSuffix: showCountSuffix ? group.count : 0, + totalMs: group.totalMs, + tokenLabel: approximateTokenLabel(totalReasoningChars), + }; +} + +/** + * Single source of truth for "what does a group of tool calls look like?". + * Both the phase-chip head (ActivityChip) and the per-row head (ToolRow) + * consume this — the rule must match in both places or recovered errors, + * fallback verbs, and count suffixes will diverge silently between + * surfaces. Keep all label / count-suffix decisions in this one function. + * + * - Fallback verb → count-led label ("3 actions"), no `×N` suffix + * (the count is in the label itself). + * - Real verb → "Verb object" (or just "Verb" when object is absent), + * with `×N` suffix when count > 1. + * + * `running` picks past- vs. present-progressive tense. Callers compute it + * themselves — the phase chip combines `group.tone === "running"` with the + * reasoning-tail-streaming flag (a piece of state the aggregator can't + * see); rows just pass `group.tone === "running"`. + */ +function formatGroupLabel( + group: GroupDescription, + opts: { running: boolean }, +): { label: string; showCountSuffix: boolean } { + if (group.verbIsFallback) { + return { label: `${group.count} actions`, showCountSuffix: false }; + } + const verb = opts.running ? group.verbPresent : group.verb; + return { + label: group.object ? `${verb} ${group.object}` : verb, + showCountSuffix: group.count > 1, + }; +} + +/** Render one segment-body row when there's only ONE row — skip the + * nested row chrome and render the content directly. Click-to-expand is + * already provided by the chip head. */ +function SingleRowBody({ row }: { row: ActivityRow }) { + if (row.kind === "reasoning") { + return ( +
+
{row.text}
+
+ ); + } + // tool + return ( +
+ {row.calls.length === 1 ? ( + + ) : ( + row.calls.map((c) => ) + )} +
+ ); +} + +/** Render one row in a multi-row segment body — each row is its own + * expandable mini-section so the user can drill into reasoning text or + * per-call detail without losing the surrounding context. */ +function ActivityRowView({ row }: { row: ActivityRow }) { + if (row.kind === "reasoning") return ; + return ; +} + +function ReasoningRow({ text }: { text: string }) { + const [open, setOpen] = useState(false); + const toggle = useCallback(() => setOpen((v) => !v), []); + const tokenLabel = approximateTokenLabel(text.length); + return ( +
+ + {open && ( +
+
{text}
+
+ )} +
+ ); +} + +function ToolRow({ calls }: { calls: ReadonlyArray }) { + const [open, setOpen] = useState(false); + const toggle = useCallback(() => setOpen((v) => !v), []); + const descriptions = useMemo(() => calls.map(describeCall), [calls]); + const group = useMemo(() => aggregateGroup(descriptions), [descriptions]); + const { label, showCountSuffix } = formatGroupLabel(group, { + running: group.tone === "running", + }); + return ( +
+ + {open && ( +
+ {descriptions.length === 1 ? ( + + ) : ( + descriptions.map((d) => ) + )} +
+ )} +
+ ); +} + +function ToolCallRow({ item }: { item: ToolDescription }) { + const [open, setOpen] = useState(false); + const toggle = useCallback(() => setOpen((v) => !v), []); + const hasDetail = item.input.length > 0 || item.resultText != null || item.errorText != null; + return ( +
+ + {open && hasDetail && ( +
+ +
+ )} +
+ ); +} + +function ToolCallDetail({ item }: { item: ToolDescription }) { + return ( + <> + {item.input.length > 0 && ( +
+
+ {item.input.map((field) => ( +
+
{field.key}
+
+ {field.kind === "long" ? ( +
{field.display}
+ ) : ( + field.display + )} +
+
+ ))} +
+
+ )} + + {item.errorText && ( +
+
{item.errorText}
+
+ )} + + {item.resultText && !item.errorText && ( +
+
{item.resultText}
+
+ )} + + ); +} + +function Section({ + label, + copyable, + children, +}: { + label: string; + copyable?: string; + children: ReactNode; +}) { + return ( +
+
+ {label} + {copyable != null && } +
+ {children} +
+ ); +} + +type CopyState = "idle" | "copied" | "failed"; + +function CopyButton({ content }: { content: string }) { + const [state, setState] = useState("idle"); + const onClick = useCallback( + async (e: React.MouseEvent) => { + e.stopPropagation(); + try { + if (!navigator.clipboard?.writeText) throw new Error("Clipboard API not available"); + await navigator.clipboard.writeText(content); + setState("copied"); + } catch { + setState("failed"); + } + window.setTimeout(() => setState("idle"), 1500); + }, + [content], + ); + return ( + + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Tool widgets (inline app views + resource-link cards) +// ───────────────────────────────────────────────────────────────────────────── + +function ToolWidgets({ calls }: { calls: ReadonlyArray }) { + const widgets = calls.filter((tc) => tc.resourceUri && tc.status === "done" && tc.appName); + const resourceLinkCalls = calls.filter( + (tc) => tc.status === "done" && tc.appName && tc.resourceLinks && tc.resourceLinks.length > 0, + ); + if (widgets.length === 0 && resourceLinkCalls.length === 0) return null; + return ( + <> + {widgets.map((tc) => ( + // Pass the full ui:// URI through — InlineAppView strips the scheme + // and forwards everything after as the resource path. The legacy + // regex `/^ui:\/\/[^/]+\/(.+)$/` dropped the first segment on the + // assumption it was a namespace prefix, which breaks two-segment + // URIs like `ui:///` where the first segment is + // load-bearing (Reboot's convention for state-scoped UI methods). + + ))} + {resourceLinkCalls.flatMap((tc) => + tc.resourceLinks!.map((link) => ( + + )), + )} + + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Shared chrome icons +// ───────────────────────────────────────────────────────────────────────────── + +function HeadIcon({ tone }: { tone: Tone }) { + if (tone === "running") { + return ( + + ); + } + if (tone === "error") { + return ( + + ); + } + return ; +} + +function RowIcon({ tone }: { tone: Tone }) { + return ; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Helpers +// ───────────────────────────────────────────────────────────────────────────── + +function applyVisualStatuses( + calls: ReadonlyArray, + visualStatuses: ReadonlyArray, +): ToolCallDisplay[] { + if (visualStatuses.length !== calls.length) return [...calls]; + return calls.map((c, i) => { + const vs = visualStatuses[i]; + if (!vs || vs.status === c.status) return c; + return { ...c, status: vs.status }; + }); +} + +/** + * Rebuild the rows array using smoothed call objects, preserving structure + * (reasoning rows pass through; tool rows get smoothed calls in the same + * order they appeared in the flattened list). + */ +function weaveSmoothed( + rows: ReadonlyArray, + origCalls: ReadonlyArray, + smoothed: ReadonlyArray, +): ActivityRow[] { + if (smoothed.length !== origCalls.length) return [...rows]; + const byId = new Map(); + for (let i = 0; i < origCalls.length; i++) byId.set(origCalls[i].id, smoothed[i]); + return rows.map((r) => + r.kind === "tool" + ? { kind: "tool", name: r.name, calls: r.calls.map((c) => byId.get(c.id) ?? c) } + : r, + ); +} + +/** Same heuristic as the old reasoning row — 4 chars/token, k-form ≥2500. */ +function approximateTokenLabel(charCount: number): string { + if (charCount === 0) return ""; + const tokens = Math.round(charCount / 4); + if (tokens >= 2500) return `${(tokens / 1000).toFixed(1)}k tokens`; + return `${tokens} tokens`; +} diff --git a/web/src/components/MessageInput.tsx b/web/src/components/MessageInput.tsx index 2aa5e92d..34a73449 100644 --- a/web/src/components/MessageInput.tsx +++ b/web/src/components/MessageInput.tsx @@ -234,7 +234,7 @@ export function MessageInput({ - {/* Shortcut hints — status copy lives on the TurnActivityPill, not here. */} + {/* Shortcut hints — status copy lives on the BlockTimeline / LiveCursor, not here. */}
{onNewConversation && (
) : (
- {/* Turn-level activity surface — anchors at the top of the - assistant message. Single source of truth for status, - tool grouping, and reasoning timeline. */} - - {/* Render content blocks in temporal order. Reasoning is - surfaced inside the pill; tool blocks emit only their - widget/resource attachments here. */} {msg.blocks ? ( - msg.blocks.map((block, blockIdx) => { - if (block.type === "reasoning") { - // Reasoning lives in the pill timeline now. - return null; - } - if (block.type === "text" && block.text) { - return ( - // biome-ignore lint/suspicious/noArrayIndexKey: blocks are append-only and don't reorder -
- - {block.text} - -
- ); - } - if (block.type === "tool" && block.toolCalls.length > 0) { - const blockWidgets = block.toolCalls.filter( - (tc) => tc.resourceUri && tc.status === "done" && tc.appName, - ); - const resourceLinkCalls = block.toolCalls.filter( - (tc) => - tc.status === "done" && - tc.appName && - tc.resourceLinks && - tc.resourceLinks.length > 0, - ); - if (blockWidgets.length === 0 && resourceLinkCalls.length === 0) { - return null; - } - return ( - // biome-ignore lint/suspicious/noArrayIndexKey: blocks are append-only and don't reorder -
- {blockWidgets.map((tc) => { - // Pass the full ui:// URI through — InlineAppView strips the - // scheme and forwards everything after as the resource path. - // The legacy regex `/^ui:\/\/[^/]+\/(.+)$/` dropped the first - // segment on the assumption it was a namespace prefix, which - // breaks two-segment URIs like `ui:///` where - // the first segment is load-bearing (Reboot's convention for - // state-scoped UI methods). - return ( - - ); - })} - {resourceLinkCalls.flatMap((tc) => - tc.resourceLinks!.map((link) => ( - - )), - )} -
- ); - } - return null; - }) + ) : ( + // Legacy / pre-block-model conversations: render the + // serialized message content as one Streamdown block. + // The block model has been the engine's emission shape + // for some time, so this branch is essentially + // history-only; kept for archived JSONLs that don't + // have `blocks` populated.
groupTurn(blocks ?? []), [blocks]); - - // Flatten every call across the turn so useMinDisplayTime gets a stable list, - // then thread the smoothed statuses back into per-entry copies before - // describing. Without this, a 2ms tool flashes "running → done" too fast to - // register. - const allCalls = useMemo(() => flattenCalls(entries), [entries]); - const visualStatuses = useMinDisplayTime(allCalls); - const adjustedEntries = useMemo( - () => applyVisualStatuses(entries, allCalls, visualStatuses), - [entries, allCalls, visualStatuses], - ); - const summary = useMemo(() => describeTurn(adjustedEntries), [adjustedEntries]); - - const [expanded, setExpanded] = useState(false); - const toggle = useCallback(() => setExpanded((v) => !v), []); - - if (displayDetail === "quiet") return null; - - // Visibility: nothing to show when the turn produced no tool activity AND - // we aren't actively narrating a pre-tool state (thinking / preparing). - const liveLeadingState = - isCurrentTurn && (streamingState === "thinking" || streamingState === "preparing"); - if (summary.totalCalls === 0 && !liveLeadingState) return null; - - const head = headDescription({ - summary, - streamingState, - preparingTool, - isCurrentTurn, - }); - const showChevron = entries.length > 0; - - return ( -
- - - {expanded && entries.length > 0 && ( -
- {adjustedEntries.map((entry, idx) => - entry.kind === "tool" ? ( - - ) : ( - // biome-ignore lint/suspicious/noArrayIndexKey: reasoning entries don't reorder; index is stable - - ), - )} -
- )} -
- ); -}); - -// ───────────────────────────────────────────────────────────────────────────── -// Head label derivation -// ───────────────────────────────────────────────────────────────────────────── - -interface HeadDescription { - tone: "running" | "neutral"; - spinning: boolean; - text: string; -} - -interface HeadInputs { - summary: TurnSummary; - streamingState: StreamingState; - preparingTool: PreparingTool | null; - isCurrentTurn: boolean; -} - -function headDescription({ - summary, - streamingState, - preparingTool, - isCurrentTurn, -}: HeadInputs): HeadDescription { - const isLive = - isCurrentTurn && - (streamingState === "thinking" || - streamingState === "preparing" || - streamingState === "working" || - streamingState === "analyzing"); - const spinning = isLive || summary.running; - - // Live pre-tool states — these fire before any tool call has landed in the - // turn (or between calls when the next one is being planned). - if (isCurrentTurn && streamingState === "preparing" && preparingTool) { - return { - tone: "running", - spinning: true, - text: `Calling ${stripServerPrefix(preparingTool.name)}…`, - }; - } - if (isCurrentTurn && streamingState === "analyzing") { - return { tone: "running", spinning: true, text: "Analyzing…" }; - } - if (summary.totalCalls === 0) { - // Only the bare "Thinking…" state — covered by the early visibility gate - // unless we're a live turn. - return { tone: "running", spinning: true, text: "Thinking…" }; - } - - // Tool activity exists — pick tense from running-ness. - const subject = summary.topSubject ? ` ${summary.topSubject}` : ""; - if (spinning) { - return { - tone: "running", - spinning: true, - text: `${summary.dominantVerbPresent}${subject}`, - }; - } - return { - tone: "neutral", - spinning: false, - text: `${summary.dominantVerb}${subject}`, - }; -} - -function HeadIcon({ tone }: { tone: "running" | "neutral" }) { - if (tone === "running") { - return ( - - ); - } - return ; -} - -// ───────────────────────────────────────────────────────────────────────────── -// Timeline rows -// ───────────────────────────────────────────────────────────────────────────── - -function ToolGroupRow({ entry }: { entry: Extract }) { - const [open, setOpen] = useState(false); - const toggle = useCallback(() => setOpen((v) => !v), []); - - const descriptions = useMemo(() => entry.calls.map(describeCall), [entry.calls]); - const tone: Tone = descriptions.some((d) => d.tone === "running") - ? "running" - : descriptions.some((d) => d.tone === "error") - ? "error" - : "ok"; - const totalMs = useMemo(() => sumDurations(descriptions), [descriptions]); - const verbPhrase = useMemo(() => groupVerbPhrase(descriptions, tone), [descriptions, tone]); - const headSubject = useMemo(() => firstSubject(descriptions), [descriptions]); - const count = descriptions.length; - - return ( -
- - {open && ( -
- {count === 1 ? ( - - ) : ( - descriptions.map((d) => ) - )} -
- )} -
- ); -} - -function ToolCallRow({ item }: { item: ToolDescription }) { - const [open, setOpen] = useState(false); - const toggle = useCallback(() => setOpen((v) => !v), []); - const hasDetail = item.input.length > 0 || item.resultText != null || item.errorText != null; - return ( -
- - {open && hasDetail && ( -
- -
- )} -
- ); -} - -function ReasoningRow({ text }: { text: string }) { - const [open, setOpen] = useState(false); - const toggle = useCallback(() => setOpen((v) => !v), []); - const tokenLabel = approximateTokenLabel(text.length); - return ( -
- - {open && ( -
-
{text}
-
- )} -
- ); -} - -function RowIcon({ tone }: { tone: Tone }) { - if (tone === "running") { - return ( - - ); - } - if (tone === "error") { - return ( - - ); - } - return ; -} - -// ───────────────────────────────────────────────────────────────────────────── -// Per-call detail (Input / Result / Error) — kept as the existing pattern. -// ───────────────────────────────────────────────────────────────────────────── - -function ToolCallDetail({ item }: { item: ToolDescription }) { - return ( - <> - {item.input.length > 0 && ( -
-
- {item.input.map((field) => ( -
-
{field.key}
-
- {field.kind === "long" ? ( -
{field.display}
- ) : ( - field.display - )} -
-
- ))} -
-
- )} - - {item.errorText && ( -
-
{item.errorText}
-
- )} - - {item.resultText && !item.errorText && ( -
-
{item.resultText}
-
- )} - - ); -} - -function Section({ - label, - copyable, - children, -}: { - label: string; - copyable?: string; - children: React.ReactNode; -}) { - return ( -
-
- {label} - {copyable != null && } -
- {children} -
- ); -} - -type CopyState = "idle" | "copied" | "failed"; - -function CopyButton({ content }: { content: string }) { - const [state, setState] = useState("idle"); - const onClick = useCallback( - async (e: React.MouseEvent) => { - e.stopPropagation(); - try { - if (!navigator.clipboard?.writeText) { - throw new Error("Clipboard API not available"); - } - await navigator.clipboard.writeText(content); - setState("copied"); - } catch { - setState("failed"); - } - window.setTimeout(() => setState("idle"), 1500); - }, - [content], - ); - return ( - - ); -} - -// ───────────────────────────────────────────────────────────────────────────── -// Helpers -// ───────────────────────────────────────────────────────────────────────────── - -function flattenCalls(entries: ReadonlyArray): ToolCallDisplay[] { - const out: ToolCallDisplay[] = []; - for (const e of entries) { - if (e.kind === "tool") { - for (const c of e.calls) out.push(c); - } - } - return out; -} - -/** - * Overlay smoothed visual statuses back onto the entries' calls. Tools that - * just completed stay visually `running` for the min-display grace so they - * don't flash. - */ -function applyVisualStatuses( - entries: ReadonlyArray, - allCalls: ReadonlyArray, - visualStatuses: ReadonlyArray, -): TimelineEntry[] { - if (visualStatuses.length !== allCalls.length) return [...entries]; - const byId = new Map(); - for (let i = 0; i < allCalls.length; i++) { - byId.set(allCalls[i].id, visualStatuses[i]); - } - return entries.map((e) => { - if (e.kind !== "tool") return e; - const calls = e.calls.map((c) => { - const vs = byId.get(c.id); - if (!vs || vs.status === c.status) return c; - return { ...c, status: vs.status }; - }); - return { kind: "tool", name: e.name, calls }; - }); -} - -function sumDurations(items: ReadonlyArray): number | null { - let any = false; - let total = 0; - for (const it of items) { - if (typeof it.durationMs === "number") { - any = true; - total += it.durationMs; - } - } - return any ? total : null; -} - -/** - * Verb phrase for a tool group row. All calls share a name (and thus a verb); - * tense comes from the group's tone. Error tone never reaches this row (the - * group row never goes red — error tone here would imply *every* call in the - * group failed, in which case we still show the per-call red dot inside). - */ -/** - * Verb phrase for a tool-group row: present tense while running, past tense at - * rest, paired with the tool name's object. No article — "Ran listtransactions" - * not "Ran the listtransactions" — because the ×N count and duration suffix - * already read awkwardly with one. - */ -function groupVerbPhrase(items: ReadonlyArray, tone: Tone): string { - if (items.length === 0) return ""; - const sample = items[0]; - const verb = tone === "running" ? (PRESENT_TENSE[sample.verb] ?? sample.verb) : sample.verb; - return sample.object ? `${verb} ${sample.object}` : verb; -} - -function firstSubject(items: ReadonlyArray): string | null { - for (const it of items) { - if (it.headSubject) return it.headSubject; - } - return null; -} - -/** Same heuristic as the old ReasoningBlock — 4 chars/token, k-form ≥2500. */ -function approximateTokenLabel(charCount: number): string { - if (charCount === 0) return ""; - const tokens = Math.round(charCount / 4); - if (tokens >= 2500) return `${(tokens / 1000).toFixed(1)}k tokens`; - return `${tokens} tokens`; -} diff --git a/web/src/index.css b/web/src/index.css index 84ef4d35..e064e72e 100644 --- a/web/src/index.css +++ b/web/src/index.css @@ -460,15 +460,18 @@ .turn-pill__head:disabled { cursor: default; } .turn-pill__head:disabled:hover { opacity: 0.7; color: var(--muted-foreground); } -/* Active OR expanded → box treatment so the head reads as a real surface. */ -.turn-pill[data-tone="running"] > .turn-pill__head, +/* Expanded → box treatment so the head reads as the top of a real + surface. Active (running) state is NOT boxed — it uses the spinner + + brand color treatment below, the same as . Keeping + in-flight chrome consistent across both surfaces means the user sees + one "loading" shape regardless of whether work is owned by a chip or + by the gap between chips. */ .turn-pill[data-expanded="true"] > .turn-pill__head { padding: 6px 12px; background: var(--card); border: 1px solid var(--border); opacity: 1; } -.turn-pill[data-tone="running"] > .turn-pill__head:hover, .turn-pill[data-expanded="true"] > .turn-pill__head:hover { border-color: color-mix(in oklch, var(--border), var(--foreground) 20%); } @@ -642,15 +645,16 @@ padding: 4px 0 8px 18px; } -/* Reasoning row variant — muted, monospace-free */ +/* Reasoning text — sits inside the already-bordered chip body, so no + second border / background. Just slightly muted prose with a scrollable + max height for long traces. */ .turn-pill__reasoning { margin: 0; - padding: 8px 12px; - background: var(--muted); - border: 1px solid var(--border); - border-radius: 6px; + padding: 0; + background: transparent; + border: 0; color: var(--muted-foreground); - font-size: 12px; + font-size: 12.5px; line-height: 1.55; white-space: pre-wrap; word-break: break-word; @@ -658,6 +662,15 @@ overflow-y: auto; } +/* Single-row chip bodies (just reasoning, or just one tool group): the + chip head already provides the affordance, so render the content + directly inside the body with consistent padding instead of wrapping + it in another row chrome. */ +.turn-pill__reasoning-wrap, +.turn-pill__tool-wrap { + padding: 10px 12px; +} + /* Sections (Input / Result / Error) */ .turn-pill__section { margin-top: 8px; } .turn-pill__section:first-child { margin-top: 0; } @@ -752,6 +765,36 @@ color: var(--warm-hover); } +/* ─────────────────────────────────────────────────────────────────────── + * Live cursor — covers the gaps between blocks during a streaming turn + * + * Appears at the bottom of an assistant message body only while the engine + * is in a "between blocks" state (thinking pre-first-block, preparing the + * next tool call, or analyzing a result). When a block is actively + * absorbing the state (text/reasoning streaming, tool running), the block's + * own active treatment covers it and the cursor stays hidden. + * + * Visual: small spinner + muted label. No box; deliberately quiet — the + * blocks themselves are the loud part. + * ─────────────────────────────────────────────────────────────────────── */ +.live-cursor { + display: inline-flex; + align-items: center; + gap: 8px; + padding: 2px 0; + color: var(--muted-foreground); + font-size: 12px; +} +.live-cursor__spinner { + color: var(--processing); + animation: spin 1s linear infinite; + flex-shrink: 0; +} +.live-cursor__label { + color: var(--processing); + font-weight: 500; +} + /* Workspace selector dropdown entrance */ .ws-dropdown-enter { animation: ws-dropdown-in 150ms ease-out; diff --git a/web/src/lib/tool-display/aggregate.ts b/web/src/lib/tool-display/aggregate.ts new file mode 100644 index 00000000..8c5a876f --- /dev/null +++ b/web/src/lib/tool-display/aggregate.ts @@ -0,0 +1,137 @@ +/** + * Aggregator — collapse N `ToolDescription`s into one `GroupDescription` + * that a chip head can render directly. + * + * Pure function. Deterministic. Never fails — every input produces some + * description so the UI can always render *something* sensible. + * + * Field rules: + * + * - `verb`: majority verb across the group. If a single verb covers + * more than half the calls it wins; otherwise we surface a neutral + * fallback rather than misclaim the action. + * - `object`: only when every non-null value agrees AND we picked a + * real verb. A fallback verb pinned to a real object reads as + * nonsense ("Worked manage tools") — the verb already admits we + * don't know what happened; pairing it with an object pretends we + * do. When the verb is the fallback, object is null. + * - `subject`: only when every non-null value agrees. Always allowed, + * even with the fallback verb — the subject comes from the user's + * input and is true regardless of which tools ran. + * - `totalMs`: sum of known durations; `null` when none are known. + * - `tone`: any running → running; else the LAST call's tone. Earlier + * errors followed by a later success are the natural shape of agentic + * recovery — the model tried something, it failed, it adjusted, it + * succeeded. Escalating the chip head to "error" in that case trains + * the user to ignore the red icon when it does appear. The per-call + * rows in the chip body still show their own tones, so the user can + * still see what failed by expanding. + * + * Scope: this is Layer 1 of the tool-display aggregation stack. It does + * NOT understand verb synonymy (that's Layer 2, a future taxonomy) and + * has no plugin/registry hook (that's Layer 3, deferred until a bundle + * actually needs it). Resist adding either here. + */ + +import type { Tone, ToolDescription } from "./types.ts"; +import { PRESENT_TENSE } from "./verbs.ts"; + +/** Verb shown when no single verb covers a majority of the group. */ +const MAJORITY_FALLBACK = "Worked"; + +/** Summary of a group of tool calls, ready for a chip head to render. */ +export interface GroupDescription { + /** Past-tense verb — majority verb if >50%, else the neutral fallback. */ + verb: string; + /** Present-progressive form (e.g. "Searching") for the running state. */ + verbPresent: string; + /** + * True when `verb` is the neutral fallback because no verb covered a + * majority. Renderers should treat this as a signal that the verb is + * NOT a real characterization — typically by leading with the count + * ("3 actions") instead of the verb word, and by suppressing the + * `object` (already null in this case). + */ + verbIsFallback: boolean; + /** Inferred object when every non-null `object` agrees; null otherwise. */ + object: string | null; + /** Headline subject when every non-null `headSubject` agrees; null otherwise. */ + subject: string | null; + /** Total number of calls in the group. */ + count: number; + /** Sum of per-call durations in ms, when any are known. */ + totalMs: number | null; + /** Aggregate tone: running > error > ok. */ + tone: Tone; +} + +export function aggregateGroup(descriptions: ReadonlyArray): GroupDescription { + const verb = majorityVerb(descriptions); + const verbIsFallback = verb === MAJORITY_FALLBACK && descriptions.length > 1; + return { + verb, + verbPresent: PRESENT_TENSE[verb] ?? verb, + verbIsFallback, + object: verbIsFallback ? null : agreedField(descriptions, (d) => d.object), + subject: agreedField(descriptions, (d) => d.headSubject), + count: descriptions.length, + totalMs: sumDurations(descriptions), + tone: aggregateTone(descriptions), + }; +} + +/** + * Pick the verb shared by more than half the calls; fall back to a + * neutral word when no verb dominates. The strict majority threshold + * keeps us from labeling a 3-way split with whichever verb happens to + * sort last. + */ +function majorityVerb(descriptions: ReadonlyArray): string { + if (descriptions.length === 0) return MAJORITY_FALLBACK; + if (descriptions.length === 1) return descriptions[0].verb; + const counts = new Map(); + for (const d of descriptions) counts.set(d.verb, (counts.get(d.verb) ?? 0) + 1); + const threshold = descriptions.length / 2; + for (const [verb, n] of counts) { + if (n > threshold) return verb; + } + return MAJORITY_FALLBACK; +} + +/** + * Return the field's value only when every non-null value across the + * group agrees on it. Mixed values resolve to `null`. Tolerates partial + * coverage: 2 of 3 calls with subject "news" + 1 with null still shows + * "news"; "news" + "weather" + null shows null. + */ +function agreedField( + descriptions: ReadonlyArray, + pick: (d: ToolDescription) => string | null, +): string | null { + const seen = new Set(); + for (const d of descriptions) { + const v = pick(d); + if (v) seen.add(v); + } + return seen.size === 1 ? [...seen][0] : null; +} + +function sumDurations(descriptions: ReadonlyArray): number | null { + let total: number | null = null; + for (const d of descriptions) { + if (typeof d.durationMs === "number") total = (total ?? 0) + d.durationMs; + } + return total; +} + +function aggregateTone(descriptions: ReadonlyArray): Tone { + // Running takes precedence — anything in flight makes the group in flight. + for (const d of descriptions) { + if (d.tone === "running") return "running"; + } + // Otherwise the group's terminal outcome is the LAST call's tone. + // Recovery (error → … → success) reads as success; failure-without- + // recovery (… → error) reads as error. + if (descriptions.length === 0) return "ok"; + return descriptions[descriptions.length - 1].tone; +} diff --git a/web/src/lib/tool-display/index.ts b/web/src/lib/tool-display/index.ts index ebfe944c..c4ee1a9f 100644 --- a/web/src/lib/tool-display/index.ts +++ b/web/src/lib/tool-display/index.ts @@ -6,15 +6,8 @@ * let the generic (Tier 0) describer do its job. */ +export { aggregateGroup, type GroupDescription } from "./aggregate.ts"; export { describeCall } from "./describe.ts"; export type { ToolRenderer } from "./registry.ts"; export { registerToolRenderer } from "./registry.ts"; -export { describeTurn, groupTurn } from "./turn.ts"; -export type { - DisplayDetail, - InputField, - TimelineEntry, - Tone, - ToolDescription, - TurnSummary, -} from "./types.ts"; +export type { DisplayDetail, InputField, Tone, ToolDescription } from "./types.ts"; diff --git a/web/src/lib/tool-display/turn.ts b/web/src/lib/tool-display/turn.ts deleted file mode 100644 index be93e43d..00000000 --- a/web/src/lib/tool-display/turn.ts +++ /dev/null @@ -1,113 +0,0 @@ -/** - * Turn selector — collapses one assistant turn's `blocks[]` into a single - * activity timeline for the TurnActivityPill. - * - * Two invariants: - * - * 1. **Cross-block tool grouping.** Every call of the same (stripped) tool - * name within the turn merges into one `tool` entry, regardless of how - * reasoning interleaves between calls. The block model only coalesces - * *consecutive* tool calls; this selector does the rest. Without it, - * extended-thinking turns produce a stack of single-call entries (see - * Mercury repro in the redesign notes). - * - * 2. **First-occurrence ordering.** A tool group sits at the index of its - * first call; later calls of the same tool fold in without moving the - * group. Reasoning entries are appended at their own position, so the - * timeline still reads "reasoning then activity then more reasoning" - * truthfully. - */ - -import type { ContentBlock, ToolCallDisplay } from "../../hooks/useChat.ts"; -import { stripServerPrefix } from "../format.ts"; -import { describeCall } from "./describe.ts"; -import type { TimelineEntry, TurnSummary } from "./types.ts"; -import { dominantVerb, PRESENT_TENSE } from "./verbs.ts"; - -/** - * Walk `blocks[]` and produce the turn's timeline. Text blocks render in the - * message body (not here); only `reasoning` and `tool` blocks contribute. - * - * Buckets by full (prefixed) tool name, not the stripped form — two servers - * that each expose a `search` tool produce two distinct rows. The display - * uses the stripped form, so the user still sees "Searched ×N" per server - * group without the wire-name clutter. - */ -export function groupTurn(blocks: ReadonlyArray): TimelineEntry[] { - const entries: TimelineEntry[] = []; - // (mutable) tool-group buckets keyed by *full* tool name. We push placeholder - // entries into `entries` and accumulate calls into these arrays by reference. - const buckets = new Map(); - - for (const block of blocks) { - if (block.type === "reasoning") { - if (block.text.length === 0) continue; - entries.push({ kind: "reasoning", text: block.text }); - } else if (block.type === "tool") { - for (const call of block.toolCalls) { - const bucketKey = call.name; - const bucket = buckets.get(bucketKey); - if (bucket) { - bucket.push(call); - } else { - const fresh: ToolCallDisplay[] = [call]; - buckets.set(bucketKey, fresh); - entries.push({ kind: "tool", name: stripServerPrefix(call.name), calls: fresh }); - } - } - } - // type === "text" — message body, not timeline. - } - - return entries; -} - -/** - * Summarize the turn for the pill's L1 head. Pure derivation from the - * timeline; the pill component combines this with `streamingState` to pick - * the running-vs-done label. - */ -export function describeTurn(entries: ReadonlyArray): TurnSummary { - const allCalls = entries.flatMap((e) => (e.kind === "tool" ? [...e.calls] : [])); - const descriptions = allCalls.map(describeCall); - - const verbPast = descriptions.length > 0 ? dominantVerb(descriptions.map((d) => d.verb)) : "Ran"; - const verbPresent = PRESENT_TENSE[verbPast] ?? verbPast; - - // Top subject: first non-null headSubject from a call whose verb matches the - // dominant verb. Falls back to any non-null headSubject. Null when calls - // span multiple subjects or have none — better to omit than mislead. - let topSubject: string | null = null; - for (const d of descriptions) { - if (d.verb === verbPast && d.headSubject) { - topSubject = d.headSubject; - break; - } - } - if (!topSubject) { - for (const d of descriptions) { - if (d.headSubject) { - topSubject = d.headSubject; - break; - } - } - } - - let totalMs: number | null = null; - for (const d of descriptions) { - if (typeof d.durationMs === "number") { - totalMs = (totalMs ?? 0) + d.durationMs; - } - } - - const running = descriptions.some((d) => d.tone === "running"); - - return { - dominantVerb: verbPast, - dominantVerbPresent: verbPresent, - topSubject, - totalCalls: descriptions.length, - totalMs, - running, - }; -} diff --git a/web/src/lib/tool-display/types.ts b/web/src/lib/tool-display/types.ts index 505e4f16..9096014f 100644 --- a/web/src/lib/tool-display/types.ts +++ b/web/src/lib/tool-display/types.ts @@ -6,8 +6,6 @@ * raw tool-call data directly; it only consumes these shapes. */ -import type { ToolCallDisplay } from "../../hooks/useChat.ts"; - /** * Display tone for a single tool call. `running` drives the present-tense * verb and the spinner icon; `ok` / `error` are the terminal states. @@ -56,35 +54,3 @@ export interface ToolDescription { errorText: string | null; durationMs: number | null; } - -/** - * One entry in a turn's activity timeline. - * - * - `reasoning` rows surface model thinking inline with the tool activity. - * - `tool` rows collapse every call of the same (stripped) tool name within - * the turn into a single group, regardless of how reasoning interleaves - * between them. The group sits at the position of its first call. - */ -export type TimelineEntry = - | { kind: "reasoning"; text: string } - | { kind: "tool"; name: string; calls: ReadonlyArray }; - -/** - * Turn-level summary used by the pill's L1 (collapsed) head. Derived from the - * full set of tool calls in a turn; not coupled to streamingState — the pill - * combines this with streamingState to choose its running-vs-done label. - */ -export interface TurnSummary { - /** Past-tense dominant verb across all calls ("Researched"). */ - dominantVerb: string; - /** Present-progressive form for use during streaming ("Researching"). */ - dominantVerbPresent: string; - /** Headline subject when calls share one, otherwise null. */ - topSubject: string | null; - /** Total number of tool calls in the turn (sum across groups). */ - totalCalls: number; - /** Sum of per-call durations in ms, when any are known. */ - totalMs: number | null; - /** True while any call is still running. */ - running: boolean; -} diff --git a/web/test/BlockTimeline.test.tsx b/web/test/BlockTimeline.test.tsx new file mode 100644 index 00000000..26c5bbb7 --- /dev/null +++ b/web/test/BlockTimeline.test.tsx @@ -0,0 +1,438 @@ +import { describe, expect, it } from "bun:test"; +import { act, fireEvent, render } from "@testing-library/react"; +import { BlockTimeline } from "../src/components/BlockTimeline.tsx"; +import type { + ContentBlock, + PreparingTool, + StreamingState, + ToolCallDisplay, +} from "../src/hooks/useChat.ts"; + +/** + * Locks in the first-principles UX: + * + * - Every block renders inline at the spot it streamed. + * - Per-block chips (no per-turn aggregation). + * - Consecutive same-name tool blocks fold into one chip with ×N. + * - LiveCursor covers thinking / preparing / analyzing gaps; hides + * when a block is actively absorbing the state. + * - Tool chips spin while any call is running; mute when all done. + */ + +// ───────────────────────────────────────────────────────────────────────────── +// Fixtures +// ───────────────────────────────────────────────────────────────────────────── + +function done(id: string, name = "search"): ToolCallDisplay { + return { id, name, status: "done", ok: true, ms: 25 }; +} +function running(id: string, name = "search"): ToolCallDisplay { + return { id, name, status: "running" }; +} +const text = (t: string): ContentBlock => ({ type: "text", text: t }); +const reasoning = (t: string): ContentBlock => ({ type: "reasoning", text: t }); +const tool = (...calls: ToolCallDisplay[]): ContentBlock => ({ type: "tool", toolCalls: calls }); + +function renderTimeline(opts: { + blocks: ContentBlock[]; + isCurrentMessage?: boolean; + streamingState?: StreamingState; + preparingTool?: PreparingTool | null; +}) { + const { + blocks, + isCurrentMessage = false, + streamingState = null, + preparingTool = null, + } = opts; + return render( + , + ); +} + +/** Find all pill heads in DOM order — avoids `.turn-pill__head` selector + * because happy-dom rejects BEM `__` in querySelectorAll. */ +function pillHeads(container: HTMLElement): HTMLButtonElement[] { + const out: HTMLButtonElement[] = []; + for (const b of Array.from(container.getElementsByTagName("button"))) { + if ((b.getAttribute("class") ?? "").split(/\s+/).includes("turn-pill__head")) { + out.push(b as HTMLButtonElement); + } + } + return out; +} + +function liveCursorLabel(container: HTMLElement): string | null { + for (const el of Array.from(container.getElementsByTagName("span"))) { + if ((el.getAttribute("class") ?? "").split(/\s+/).includes("live-cursor__label")) { + return (el.textContent ?? "").trim(); + } + } + return null; +} + +function timeline(container: HTMLElement): string[] { + const out: string[] = []; + const walker = container.ownerDocument!.createTreeWalker( + container, + 1 /* SHOW_ELEMENT */, + null, + ); + let node = walker.currentNode as HTMLElement | null; + while (node) { + const cls = node.getAttribute?.("class") ?? ""; + const classes = cls.split(/\s+/); + if (classes.includes("streamdown-container")) { + const t = (node.textContent ?? "").trim(); + if (t) out.push(`text:${t}`); + } else if (classes.includes("turn-pill__head")) { + const t = (node.textContent ?? "").trim(); + if (t) out.push(`chip:${t}`); + } else if (classes.includes("live-cursor")) { + const t = (node.textContent ?? "").trim(); + if (t) out.push(`cursor:${t}`); + } + node = walker.nextNode() as HTMLElement | null; + } + return out; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Chronological rendering +// ───────────────────────────────────────────────────────────────────────────── + +describe("BlockTimeline order", () => { + it("renders blocks in stream order — text, tool, text", () => { + // The user's reported bug: tool calls between two text spans were + // hoisted above both. Must render inline at the spot they streamed. + const { container } = renderTimeline({ + blocks: [ + text("Let me find the file."), + tool(done("a", "list")), + text("Got both."), + ], + }); + const order = timeline(container); + const firstText = order.findIndex((s) => s.startsWith("text:Let me find")); + const chip = order.findIndex((s) => s.startsWith("chip:")); + const secondText = order.findIndex((s) => s.startsWith("text:Got both")); + expect(firstText).toBeGreaterThanOrEqual(0); + expect(chip).toBeGreaterThan(firstText); + expect(secondText).toBeGreaterThan(chip); + }); + + it("emits one chip per reasoning block", () => { + // Two reasoning blocks with a text between them must surface as two + // separate Thought chips, not collapse into one. + const { container } = renderTimeline({ + blocks: [reasoning("first"), text("midway"), reasoning("second")], + }); + expect(pillHeads(container).length).toBe(2); + }); + + it("skips empty (zero-token) reasoning blocks", () => { + const { container } = renderTimeline({ + blocks: [reasoning(""), text("hi")], + }); + expect(pillHeads(container).length).toBe(0); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Tool-chip folding +// ───────────────────────────────────────────────────────────────────────────── + +describe("BlockTimeline phase folding", () => { + it("folds contiguous reasoning + tool blocks into ONE chip per phase", () => { + // The user's reported pain: [Thought · 25 tokens] and [Used tools ×4] + // as two separate chips. They are the same phase of work — one chip + // with the reasoning + tool rows inside. + const { container } = renderTimeline({ + blocks: [reasoning("plan"), tool(done("a", "search"), done("b", "search"))], + }); + expect(pillHeads(container).length).toBe(1); + }); + + it("breaks the phase at a text boundary", () => { + // Text between the two tool calls makes them distinct phases. + const { container } = renderTimeline({ + blocks: [tool(done("a", "search")), text("midway"), tool(done("b", "search"))], + }); + expect(pillHeads(container).length).toBe(2); + }); + + it("does NOT break the phase at a reasoning boundary", () => { + // Reasoning is part of the phase, not a separator. [tool, reasoning, + // tool] is ONE chip whose body lists the rows in order. + const { container } = renderTimeline({ + blocks: [tool(done("a", "search")), reasoning("more"), tool(done("b", "search"))], + }); + expect(pillHeads(container).length).toBe(1); + }); + + it("folds same-name consecutive tool blocks into one row with ×N", () => { + // Within a phase, three consecutive same-name tool blocks collapse to + // one row showing ×3. Visible in the chip's collapsed head label. + const { container } = renderTimeline({ + blocks: [tool(done("a", "search")), tool(done("b", "search")), tool(done("c", "search"))], + }); + const heads = pillHeads(container); + expect(heads.length).toBe(1); + expect(heads[0].textContent ?? "").toContain("×3"); + }); + + it("keeps distinct tool names as separate rows within the same chip", () => { + // Two consecutive tool blocks with different names share one phase + // chip but render as two rows inside (revealed on expand). + const { container } = renderTimeline({ + blocks: [tool(done("a", "search")), tool(done("b", "read"))], + }); + expect(pillHeads(container).length).toBe(1); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Active vs settled tone +// ───────────────────────────────────────────────────────────────────────────── + +describe("BlockTimeline tool tone", () => { + function pillTones(container: HTMLElement): string[] { + const out: string[] = []; + for (const el of Array.from(container.getElementsByTagName("div"))) { + if ((el.getAttribute("class") ?? "").split(/\s+/).includes("turn-pill")) { + const tone = el.getAttribute("data-tone"); + if (tone) out.push(tone); + } + } + return out; + } + + it("spins (running tone) while any call is in flight", () => { + const { container } = renderTimeline({ + blocks: [tool(running("a", "search"))], + isCurrentMessage: true, + streamingState: "working", + }); + expect(pillTones(container)).toContain("running"); + }); + + it("settles to muted (ok) tone when all calls complete", () => { + const { container } = renderTimeline({ + blocks: [tool(done("a", "search"))], + isCurrentMessage: false, + streamingState: null, + }); + const tones = pillTones(container); + expect(tones).toContain("ok"); + expect(tones).not.toContain("running"); + }); + + it("shows an error chip when a call failed", () => { + const errorCall: ToolCallDisplay = { + id: "err", + name: "search", + status: "error", + ok: false, + ms: 5, + }; + const { container } = renderTimeline({ + blocks: [tool(errorCall)], + }); + expect(pillTones(container)).toContain("error"); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// LiveCursor — the gap indicator +// ───────────────────────────────────────────────────────────────────────────── + +describe("LiveCursor", () => { + it("shows 'Thinking…' for the pre-first-block warm-up", () => { + const { container } = renderTimeline({ + blocks: [], + isCurrentMessage: true, + streamingState: "thinking", + }); + expect(liveCursorLabel(container)).toBe("Thinking…"); + }); + + it("shows 'Calling X…' during preparing with a known tool", () => { + const { container } = renderTimeline({ + blocks: [], + isCurrentMessage: true, + streamingState: "preparing", + preparingTool: { id: "p1", name: "synapse-research__start_research" }, + }); + // stripServerPrefix collapses `synapse-research__start_research` → + // `start_research`. + expect(liveCursorLabel(container)).toBe("Calling start_research…"); + }); + + it("shows 'Analyzing…' between a tool result and the next block", () => { + const { container } = renderTimeline({ + blocks: [tool(done("a", "search"))], + isCurrentMessage: true, + streamingState: "analyzing", + }); + expect(liveCursorLabel(container)).toBe("Analyzing…"); + }); + + it("hides during 'streaming' (text/reasoning block is absorbing the state)", () => { + const { container } = renderTimeline({ + blocks: [text("hello")], + isCurrentMessage: true, + streamingState: "streaming", + }); + expect(liveCursorLabel(container)).toBeNull(); + }); + + it("hides during 'working' (tool chip is spinning)", () => { + const { container } = renderTimeline({ + blocks: [tool(running("r", "search"))], + isCurrentMessage: true, + streamingState: "working", + }); + expect(liveCursorLabel(container)).toBeNull(); + }); + + it("hides when the message is not the current streaming one", () => { + // Historical message — no live cursor regardless of state. + const { container } = renderTimeline({ + blocks: [reasoning("done thinking"), text("done")], + isCurrentMessage: false, + streamingState: null, + }); + expect(liveCursorLabel(container)).toBeNull(); + }); + + it("hides when the turn is fully done (null state)", () => { + const { container } = renderTimeline({ + blocks: [text("complete")], + isCurrentMessage: true, + streamingState: null, + }); + expect(liveCursorLabel(container)).toBeNull(); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Reasoning chip — settled persistence +// ───────────────────────────────────────────────────────────────────────────── + +describe("ToolCallRow fallback label", () => { + it("shows the tool name when the call has no input summary", () => { + // No-input tools (e.g. current_user()) produced rows that read as + // just "● 7ms" — no label, no signal about what ran. Fall back to + // the tool's stripped name so the reader can identify the call. + const noInputCall: ToolCallDisplay = { + id: "n1", + name: "list_active_apps", + status: "done", + ok: true, + ms: 7, + }; + // Multi-call ToolRow renders a ToolCallRow per call. Pair the + // no-input call with one that has input so the row enters its + // per-call list mode. + const withInputCall: ToolCallDisplay = { + id: "w1", + name: "list_active_apps", + status: "done", + ok: true, + ms: 5, + input: { query: "anything" }, + }; + const { container } = renderTimeline({ + blocks: [tool(withInputCall, noInputCall)], + }); + // Per-call rows only render when the chip head is expanded. + const heads = pillHeads(container); + expect(heads.length).toBe(1); + act(() => { + fireEvent.click(heads[0]); + }); + const summaries: string[] = []; + for (const span of Array.from(container.getElementsByTagName("span"))) { + if ( + (span.getAttribute("class") ?? "").split(/\s+/).includes("turn-pill__call-summary") + ) { + summaries.push((span.textContent ?? "").trim()); + } + } + // One row is the input preview ("query: anything"); the other must + // fall back to the tool name (the call had no input to preview). + expect(summaries).toContain("list_active_apps"); + }); +}); + +describe("ActivityChip fallback label", () => { + it("leads with 'N actions' when tools mix verbs (no majority)", () => { + // Three different verbs → no majority → fallback. Head should NOT + // say "Worked ×3" (verb-shaped scaffolding plus a redundant count); + // it should say "3 actions" (the count IS the truthful summary). + const call1: ToolCallDisplay = { + id: "a", + name: "add_workspace_member", + status: "done", + ok: true, + ms: 10, + }; + const call2: ToolCallDisplay = { + id: "b", + name: "list_workspace_tools", + status: "done", + ok: true, + ms: 10, + }; + const call3: ToolCallDisplay = { + id: "c", + name: "remove_workspace_member", + status: "done", + ok: true, + ms: 10, + }; + const { container } = renderTimeline({ + blocks: [tool(call1, call2, call3)], + }); + const heads = pillHeads(container); + expect(heads.length).toBe(1); + const headText = (heads[0].textContent ?? "").trim(); + expect(headText).toContain("3 actions"); + // The redundant `×3` suffix MUST be suppressed — the count is in + // the label itself. + expect(headText).not.toContain("×3"); + }); +}); + +describe("ReasoningChip", () => { + it("persists as a clickable 'Thought' chip on settled turns", () => { + // Previously the only-reasoning case (no tools) hid entirely after + // streaming. Now it stays as a clickable Thought chip so the user + // can investigate. + const { container } = renderTimeline({ + blocks: [reasoning("plan"), text("answer")], + isCurrentMessage: false, + streamingState: null, + }); + const heads = pillHeads(container); + expect(heads.length).toBe(1); + expect((heads[0].textContent ?? "")).toContain("Thought"); + }); + + it("shows 'Thinking…' while still receiving deltas", () => { + const { container } = renderTimeline({ + blocks: [reasoning("partial")], + isCurrentMessage: true, + streamingState: "streaming", + }); + const heads = pillHeads(container); + expect(heads.length).toBe(1); + expect((heads[0].textContent ?? "")).toContain("Thinking…"); + }); +}); diff --git a/web/test/TurnActivityPill.test.tsx b/web/test/TurnActivityPill.test.tsx deleted file mode 100644 index dd3a7bf4..00000000 --- a/web/test/TurnActivityPill.test.tsx +++ /dev/null @@ -1,471 +0,0 @@ -import { afterEach, beforeEach, describe, expect, it } from "bun:test"; -import { act, fireEvent, render, waitFor } from "@testing-library/react"; -import { TurnActivityPill } from "../src/components/TurnActivityPill.tsx"; -import type { ContentBlock, PreparingTool, ToolCallDisplay } from "../src/hooks/useChat.ts"; - -// ───────────────────────────────────────────────────────────────────────────── -// Fixtures -// ───────────────────────────────────────────────────────────────────────────── - -function doneCall(id: string, name = "search", ms = 50): ToolCallDisplay { - return { id, name, status: "done", ok: true, ms }; -} - -function failedCall(id: string, name = "search", ms = 5): ToolCallDisplay { - return { id, name, status: "error", ok: false, ms }; -} - -function runningCall(id: string, name = "search"): ToolCallDisplay { - return { id, name, status: "running" }; -} - -function toolBlock(...calls: ToolCallDisplay[]): ContentBlock { - return { type: "tool", toolCalls: calls }; -} - -function reasoningBlock(text: string): ContentBlock { - return { type: "reasoning", text }; -} - -const PILL_TONE_RE = /class="turn-pill"[^>]*data-tone="([^"]+)"/; -function pillTone(html: string): string | null { - const m = html.match(PILL_TONE_RE); - return m ? (m[1] ?? null) : null; -} -function countSpinners(html: string): number { - return (html.match(/turn-pill__icon--running/g) ?? []).length; -} -function findHead(container: HTMLElement): HTMLButtonElement { - const btns = container.getElementsByTagName("button"); - for (const b of Array.from(btns)) { - if ((b.getAttribute("class") ?? "").split(/\s+/).includes("turn-pill__head")) { - return b as HTMLButtonElement; - } - } - throw new Error("Pill head not found"); -} - -// ───────────────────────────────────────────────────────────────────────────── -// Cross-block grouping — the Mercury repro -// ───────────────────────────────────────────────────────────────────────────── - -describe("TurnActivityPill cross-block grouping", () => { - // The whole point of the redesign: 30 single-call blocks broken up by - // reasoning would previously render as 30 mini-accordions. The new pill - // folds them into one entry per tool name, regardless of interleaving. - it("collapses N consecutive single-call blocks of the same tool into one row", () => { - const blocks: ContentBlock[] = [ - reasoningBlock("plan"), - toolBlock(doneCall("a", "list_transactions", 100)), - toolBlock(doneCall("b", "list_transactions", 200)), - toolBlock(doneCall("c", "list_transactions", 150)), - ]; - const { container } = render( - , - ); - fireEvent.click(findHead(container)); - // One tool-group row containing all three calls; row-count multiplier reads ×3. - const rowHeads = Array.from(container.getElementsByTagName("button")).filter((b) => - (b.getAttribute("class") ?? "").split(/\s+/).includes("turn-pill__row-head"), - ); - // rows: 1 reasoning + 1 tool group = 2 row heads - expect(rowHeads.length).toBe(2); - expect(container.innerHTML).toContain("×3"); - }); - - it("merges across reasoning interleaving (Mercury pattern)", () => { - const blocks: ContentBlock[] = [ - reasoningBlock("first thought"), - toolBlock(doneCall("a", "list_transactions", 100)), - reasoningBlock("interrupting thought"), - toolBlock(doneCall("b", "list_transactions", 100)), - reasoningBlock("another"), - toolBlock(doneCall("c", "list_transactions", 100)), - ]; - const { container } = render( - , - ); - fireEvent.click(findHead(container)); - // One ×3 group despite reasoning splitting the blocks apart. - expect(container.innerHTML).toContain("×3"); - }); - - it("keeps distinct tool names in separate groups", () => { - const blocks: ContentBlock[] = [ - toolBlock(doneCall("a", "list_transactions")), - toolBlock(doneCall("b", "get_recipients")), - toolBlock(doneCall("c", "list_transactions")), - ]; - const { container } = render( - , - ); - fireEvent.click(findHead(container)); - const html = container.innerHTML; - // Two groups: list_transactions ×2 and get_recipients ×1 (no multiplier shown for 1). - expect(html).toContain("×2"); - expect(html).toContain("recipients"); - expect(html).toContain("transactions"); - }); - - it("does not collide same-named tools from different servers", () => { - // Two servers each expose a `search` tool. They must render as separate - // rows; folding them into one "Searched ×2" would misrepresent which - // server did what work. - const blocks: ContentBlock[] = [ - toolBlock(doneCall("a", "notion__search")), - toolBlock(doneCall("b", "mercury__search")), - toolBlock(doneCall("c", "notion__search")), - ]; - const { container } = render( - , - ); - fireEvent.click(findHead(container)); - const rowHeads = Array.from(container.getElementsByTagName("button")).filter((b) => - (b.getAttribute("class") ?? "").split(/\s+/).includes("turn-pill__row-head"), - ); - // Exactly two tool-group rows — notion ×2 and mercury ×1. - expect(rowHeads.length).toBe(2); - expect(container.innerHTML).toContain("×2"); - }); - - it("counts total steps at the head across all groups", () => { - const blocks: ContentBlock[] = [ - toolBlock(doneCall("a", "list_transactions")), - toolBlock(doneCall("b", "list_transactions")), - toolBlock(doneCall("c", "get_recipients")), - ]; - const { container } = render( - , - ); - expect(container.innerHTML).toContain("3 steps"); - }); -}); - -// ───────────────────────────────────────────────────────────────────────────── -// Head morphing across streamingState -// ───────────────────────────────────────────────────────────────────────────── - -describe("TurnActivityPill head label", () => { - it("shows 'Calling X…' during preparing on the current turn", () => { - const preparing: PreparingTool = { id: "p1", name: "server__search_thing" }; - const { container } = render( - , - ); - expect(container.innerHTML).toContain("Calling search_thing"); - expect(pillTone(container.innerHTML)).toBe("running"); - }); - - it("shows 'Analyzing…' during analyzing on the current turn", () => { - const { container } = render( - , - ); - expect(container.innerHTML).toContain("Analyzing"); - expect(pillTone(container.innerHTML)).toBe("running"); - }); - - it("settles to past tense + duration when streaming completes", () => { - const { container } = render( - , - ); - const html = container.innerHTML; - // Past tense ("Listed"), step count, and total duration all visible at rest. - expect(html).toMatch(/Listed/); - expect(html).toContain("1 step"); - expect(html).toContain("1.5s"); - expect(pillTone(html)).toBe("neutral"); - }); - - it("hides entirely when no tools were called and no leading state is live", () => { - const { container } = render( - , - ); - expect(container.innerHTML).toBe(""); - }); - - it("hides entirely in quiet mode regardless of streamingState", () => { - const { container } = render( - , - ); - expect(container.innerHTML).toBe(""); - }); -}); - -// ───────────────────────────────────────────────────────────────────────────── -// Single-spinner contract — never two indicators on screen -// ───────────────────────────────────────────────────────────────────────────── - -describe("TurnActivityPill single-spinner contract", () => { - it("renders exactly one spinner when tools are in flight (no body open)", () => { - const { container } = render( - , - ); - expect(countSpinners(container.innerHTML)).toBe(1); - }); - - it("does not emit a separate 'Analyzing' indicator alongside the head", () => { - // Pre-redesign, the accordion head + a pending footer + the composer - // label could all say 'Analyzing' at once. The new pill must own the - // state entirely on a single element. - const { container } = render( - , - ); - // Exactly one occurrence of "Analyzing" — on the head only. - expect((container.innerHTML.match(/Analyzing/g) ?? []).length).toBe(1); - }); -}); - -// ───────────────────────────────────────────────────────────────────────────── -// Tone policy — head stays neutral when a child fails -// ───────────────────────────────────────────────────────────────────────────── - -describe("TurnActivityPill tone policy", () => { - it("stays neutral when a child call failed", () => { - const { container } = render( - , - ); - expect(pillTone(container.innerHTML)).toBe("neutral"); - expect(container.innerHTML).not.toContain("Couldn't"); - }); - - it("shows red per-call icon for the failed child when expanded", () => { - const { container } = render( - , - ); - fireEvent.click(findHead(container)); - expect(container.innerHTML).toContain("turn-pill__icon--error"); - }); -}); - -// ───────────────────────────────────────────────────────────────────────────── -// Reasoning row -// ───────────────────────────────────────────────────────────────────────────── - -describe("TurnActivityPill reasoning row", () => { - it("shows 'Thought · N tokens' as a row in L2 timeline", () => { - const { container } = render( - , - ); - fireEvent.click(findHead(container)); - const html = container.innerHTML; - expect(html).toContain("Thought"); - expect(html).toMatch(/\d+\s+tokens/); - }); - - it("skips empty reasoning blocks (no row for a 0-token entry)", () => { - const { container } = render( - , - ); - fireEvent.click(findHead(container)); - expect(container.innerHTML).not.toContain("Thought"); - }); -}); - -// ───────────────────────────────────────────────────────────────────────────── -// CopyButton feedback (preserves the behavior from the deleted tests) -// ───────────────────────────────────────────────────────────────────────────── - -let originalClipboard: PropertyDescriptor | undefined; -beforeEach(() => { - originalClipboard = Object.getOwnPropertyDescriptor(globalThis.navigator, "clipboard"); -}); -afterEach(() => { - if (originalClipboard) { - Object.defineProperty(globalThis.navigator, "clipboard", originalClipboard); - } else { - // biome-ignore lint/performance/noDelete: cleanup of test mock - delete (globalThis.navigator as { clipboard?: unknown }).clipboard; - } -}); -function setClipboard(impl: { writeText: (text: string) => Promise } | null) { - Object.defineProperty(globalThis.navigator, "clipboard", { - value: impl, - configurable: true, - writable: true, - }); -} - -function callWithResult(text: string): ToolCallDisplay { - return { - id: "t1", - name: "search", - status: "done", - ok: true, - ms: 50, - result: { content: [{ type: "text", text }], isError: false }, - }; -} -function findCopyButton(container: HTMLElement): HTMLButtonElement { - const btns = container.getElementsByTagName("button"); - for (const b of Array.from(btns)) { - if ((b.getAttribute("class") ?? "").split(/\s+/).includes("turn-pill__copy")) { - return b as HTMLButtonElement; - } - } - throw new Error("CopyButton not found"); -} -function findRowHead(container: HTMLElement): HTMLButtonElement { - const btns = container.getElementsByTagName("button"); - for (const b of Array.from(btns)) { - if ((b.getAttribute("class") ?? "").split(/\s+/).includes("turn-pill__row-head")) { - return b as HTMLButtonElement; - } - } - throw new Error("Row head not found"); -} - -function renderExpanded(text: string) { - const result = render( - , - ); - // Expand head → row. - fireEvent.click(findHead(result.container)); - fireEvent.click(findRowHead(result.container)); - return result; -} - -describe("TurnActivityPill CopyButton feedback", () => { - it("shows success after a successful copy", async () => { - let captured = ""; - setClipboard({ writeText: async (t) => void (captured = t) }); - const { container } = renderExpanded("the result"); - const btn = findCopyButton(container); - await act(async () => { - fireEvent.click(btn); - }); - expect(captured).toBe("the result"); - await waitFor(() => { - expect((btn.textContent ?? "").toLowerCase()).toContain("copied"); - }); - }); - - it("shows failure when writeText rejects", async () => { - setClipboard({ writeText: () => Promise.reject(new Error("denied")) }); - const { container } = renderExpanded("the result"); - const btn = findCopyButton(container); - await act(async () => { - fireEvent.click(btn); - }); - await waitFor(() => { - expect((btn.textContent ?? "").toLowerCase()).toContain("failed"); - expect(btn.getAttribute("aria-label")).toBe("Copy failed"); - }); - }); - - it("shows failure when the Clipboard API is unavailable", async () => { - setClipboard(null); - const { container } = renderExpanded("the result"); - const btn = findCopyButton(container); - await act(async () => { - fireEvent.click(btn); - }); - await waitFor(() => { - expect((btn.textContent ?? "").toLowerCase()).toContain("failed"); - expect(btn.getAttribute("aria-label")).toBe("Copy failed"); - }); - }); -}); diff --git a/web/test/aggregate.test.ts b/web/test/aggregate.test.ts new file mode 100644 index 00000000..9c6e9c99 --- /dev/null +++ b/web/test/aggregate.test.ts @@ -0,0 +1,303 @@ +import { describe, expect, it } from "bun:test"; +import { aggregateGroup } from "../src/lib/tool-display/aggregate"; +import type { Tone, ToolDescription } from "../src/lib/tool-display/types"; + +function desc(overrides: Partial & { verb: string }): ToolDescription { + return { + id: `id_${Math.random().toString(36).slice(2, 8)}`, + name: overrides.name ?? "tool", + verb: overrides.verb, + object: "", + tone: "ok" as Tone, + summary: null, + headSubject: null, + input: [], + resultText: null, + resultJson: null, + errorText: null, + durationMs: null, + ...overrides, + }; +} + +describe("aggregateGroup — verb selection", () => { + it("uses the call's own verb when there is exactly one call", () => { + const g = aggregateGroup([desc({ verb: "Searched" })]); + expect(g.verb).toBe("Searched"); + }); + + it("uses the verb shared by every call", () => { + const g = aggregateGroup([ + desc({ verb: "Searched" }), + desc({ verb: "Searched" }), + desc({ verb: "Searched" }), + ]); + expect(g.verb).toBe("Searched"); + }); + + it("uses the majority verb when one covers more than half", () => { + // 2 of 3 "Searched" — majority wins. + const g = aggregateGroup([ + desc({ verb: "Searched" }), + desc({ verb: "Searched" }), + desc({ verb: "Read" }), + ]); + expect(g.verb).toBe("Searched"); + }); + + it("falls back to a neutral verb when no verb has a strict majority", () => { + // 3-way split — nobody has >50%. Neutral fallback rather than picking + // the verb that happens to sort last. + const g = aggregateGroup([ + desc({ verb: "Searched" }), + desc({ verb: "Read" }), + desc({ verb: "Listed" }), + ]); + expect(g.verb).toBe("Worked"); + }); + + it("treats a 2-2 tie as no majority (strictly >50%)", () => { + const g = aggregateGroup([ + desc({ verb: "Searched" }), + desc({ verb: "Searched" }), + desc({ verb: "Read" }), + desc({ verb: "Read" }), + ]); + expect(g.verb).toBe("Worked"); + }); + + it("emits a fallback for an empty group instead of throwing", () => { + const g = aggregateGroup([]); + expect(g.verb).toBe("Worked"); + expect(g.count).toBe(0); + expect(g.tone).toBe("ok"); + }); + + it("produces a present-progressive form alongside the past-tense verb", () => { + const g = aggregateGroup([desc({ verb: "Searched" })]); + expect(g.verb).toBe("Searched"); + expect(g.verbPresent).toBe("Searching"); + }); +}); + +describe("aggregateGroup — agreed fields", () => { + it("surfaces an object when every non-null value agrees", () => { + const g = aggregateGroup([ + desc({ verb: "Read", object: "files" }), + desc({ verb: "Read", object: "files" }), + ]); + expect(g.object).toBe("files"); + }); + + it("returns null when objects disagree", () => { + const g = aggregateGroup([ + desc({ verb: "Read", object: "files" }), + desc({ verb: "Read", object: "issues" }), + ]); + expect(g.object).toBeNull(); + }); + + it("tolerates partial coverage when the non-null values agree", () => { + // Two calls share "news"; the third has no object inference. We still + // surface "news" — it's the only signal we have and it doesn't conflict. + const g = aggregateGroup([ + desc({ verb: "Searched", object: "news" }), + desc({ verb: "Searched", object: "news" }), + desc({ verb: "Searched", object: "" }), + ]); + expect(g.object).toBe("news"); + }); + + it("agrees on subject under the same rules", () => { + const g = aggregateGroup([ + desc({ verb: "Searched", headSubject: "top news" }), + desc({ verb: "Searched", headSubject: "top news" }), + ]); + expect(g.subject).toBe("top news"); + }); + + it("returns null when subjects disagree", () => { + const g = aggregateGroup([ + desc({ verb: "Searched", headSubject: "Mercury" }), + desc({ verb: "Searched", headSubject: "Venus" }), + ]); + expect(g.subject).toBeNull(); + }); +}); + +describe("aggregateGroup — tone", () => { + it("returns running when any call is running, regardless of other tones", () => { + const g = aggregateGroup([ + desc({ verb: "Searched", tone: "ok" }), + desc({ verb: "Searched", tone: "running" }), + desc({ verb: "Searched", tone: "error" }), + ]); + expect(g.tone).toBe("running"); + }); + + it("returns ok when every call settled cleanly", () => { + const g = aggregateGroup([ + desc({ verb: "Searched", tone: "ok" }), + desc({ verb: "Searched", tone: "ok" }), + ]); + expect(g.tone).toBe("ok"); + }); + + it("treats error → success as recovery (terminal outcome wins)", () => { + // Agentic self-correction: the model tried, it failed, it adjusted, + // it succeeded. The chip head shouldn't shout "error" when the + // model actually got there. + const g = aggregateGroup([ + desc({ verb: "Searched", tone: "error" }), + desc({ verb: "Searched", tone: "ok" }), + ]); + expect(g.tone).toBe("ok"); + }); + + it("treats success → error as a terminal failure", () => { + // The model had something working, then broke it (or moved on to a + // call that failed). The latest state is what the user needs to know + // about. + const g = aggregateGroup([ + desc({ verb: "Searched", tone: "ok" }), + desc({ verb: "Searched", tone: "error" }), + ]); + expect(g.tone).toBe("error"); + }); + + it("returns error when every call failed (no recovery)", () => { + const g = aggregateGroup([ + desc({ verb: "Searched", tone: "error" }), + desc({ verb: "Searched", tone: "error" }), + desc({ verb: "Searched", tone: "error" }), + ]); + expect(g.tone).toBe("error"); + }); +}); + +describe("aggregateGroup — totalMs", () => { + it("sums known durations", () => { + const g = aggregateGroup([ + desc({ verb: "Searched", durationMs: 100 }), + desc({ verb: "Searched", durationMs: 250 }), + ]); + expect(g.totalMs).toBe(350); + }); + + it("skips calls without a known duration but still sums the rest", () => { + const g = aggregateGroup([ + desc({ verb: "Searched", durationMs: 100 }), + desc({ verb: "Searched", durationMs: null }), + desc({ verb: "Searched", durationMs: 50 }), + ]); + expect(g.totalMs).toBe(150); + }); + + it("returns null when no call has a known duration", () => { + const g = aggregateGroup([ + desc({ verb: "Searched", durationMs: null }), + desc({ verb: "Searched", durationMs: null }), + ]); + expect(g.totalMs).toBeNull(); + }); +}); + +describe("aggregateGroup — count", () => { + it("counts every description in the group", () => { + const g = aggregateGroup([ + desc({ verb: "Searched" }), + desc({ verb: "Read" }), + desc({ verb: "Listed" }), + ]); + expect(g.count).toBe(3); + }); +}); + +describe("aggregateGroup — verbIsFallback flag", () => { + // Renderers need to know when the verb is the neutral fallback (so they + // can present count-led labels instead of pretending we have a real verb). + it("sets verbIsFallback=true when no verb has a majority", () => { + const g = aggregateGroup([ + desc({ verb: "Searched" }), + desc({ verb: "Read" }), + desc({ verb: "Listed" }), + ]); + expect(g.verbIsFallback).toBe(true); + }); + + it("sets verbIsFallback=false when a verb covers the majority", () => { + const g = aggregateGroup([ + desc({ verb: "Searched" }), + desc({ verb: "Searched" }), + desc({ verb: "Read" }), + ]); + expect(g.verbIsFallback).toBe(false); + }); + + it("sets verbIsFallback=false for a single call (its verb is canonical)", () => { + const g = aggregateGroup([desc({ verb: "Worked" })]); + expect(g.verb).toBe("Worked"); + expect(g.verbIsFallback).toBe(false); + }); + + it("sets verbIsFallback=false for an empty group (nothing to characterize)", () => { + const g = aggregateGroup([]); + expect(g.verbIsFallback).toBe(false); + }); +}); + +describe("aggregateGroup — fallback verb suppresses object", () => { + // "Worked manage tools" is nonsense — the verb already admits we don't + // know what happened, so pinning it to a shared object pretends we do. + // When verb falls back, object collapses to null and the chip head + // reads as just the fallback verb (plus subject / count, which remain + // meaningful). + it("clears object when no verb has a majority but objects agree", () => { + const g = aggregateGroup([ + desc({ verb: "Added", object: "tools" }), + desc({ verb: "Listed", object: "tools" }), + desc({ verb: "Removed", object: "tools" }), + ]); + expect(g.verb).toBe("Worked"); + expect(g.object).toBeNull(); + }); + + it("preserves subject even when the verb is the fallback", () => { + // Subject comes from the user's input, not from tool semantics — it + // remains true regardless of what verb we settle on. + const g = aggregateGroup([ + desc({ verb: "Added", object: "tools", headSubject: "alpha" }), + desc({ verb: "Listed", object: "tools", headSubject: "alpha" }), + desc({ verb: "Removed", object: "tools", headSubject: "alpha" }), + ]); + expect(g.verb).toBe("Worked"); + expect(g.object).toBeNull(); + expect(g.subject).toBe("alpha"); + }); + + it("still includes object for a single call (no fallback path triggered)", () => { + const g = aggregateGroup([desc({ verb: "Worked", object: "tools" })]); + // Single-call short-circuit returns the call's own verb; "Worked" here + // is the literal verb, not the aggregation fallback, so object stays. + expect(g.verb).toBe("Worked"); + expect(g.object).toBe("tools"); + }); +}); + +describe("aggregateGroup — user's mixed-tool scenario", () => { + // The case from the screenshot: three search-shaped tools with different + // names but the same inferred verb ("Searched") and a shared subject + // pulled from the user's prompt ("news headlines"). The old hard-coded + // rule said "Used tools" because tool *names* differed. The aggregator + // now uses the verb because all three describe Searches. + it("uses the shared verb when tool NAMES differ but VERBS agree", () => { + const g = aggregateGroup([ + desc({ verb: "Searched", name: "news_search", headSubject: "news headlines" }), + desc({ verb: "Searched", name: "web_search", headSubject: "news headlines" }), + desc({ verb: "Searched", name: "headlines_lookup", headSubject: "news headlines" }), + ]); + expect(g.verb).toBe("Searched"); + expect(g.subject).toBe("news headlines"); + expect(g.count).toBe(3); + }); +});