From 5440d7e1c191e9fb9472728ccf8037c645e1951a Mon Sep 17 00:00:00 2001 From: "sentry-junior[bot]" <264270552+sentry-junior[bot]@users.noreply.github.com> Date: Wed, 20 May 2026 11:56:06 -0700 Subject: [PATCH 1/2] fix(tools): introduce ToolInputError to classify expected tool failures (#383) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary The tool error handler (`handleToolExecutionError`) sent all non-MCP tool errors to Sentry via `logException`, even when the error was caused by invalid model input — missing files, bad regex, ambiguous edits, path traversal. This created noisy alerts for expected conditions like [JUNIOR-2Q](https://sentry.sentry.io/issues/7495355493/). This introduces `ToolInputError` as the single classification mechanism for expected model/user-caused tool failures. Domain code throws `ToolInputError` at the source where it knows the failure is input-caused, not a system error. The global error handler respects this: no `logException`, no Sentry noise. ## Architecture ``` Domain code throws ToolInputError → handleToolExecutionError → logWarn only → rethrow → Pi tool_error Domain code throws Error (system) → handleToolExecutionError → logException (Sentry) → rethrow MCP tool returns isError → McpToolError → logWarn only → rethrow → Pi tool_error ``` No ad-hoc error-type catches at the executor level. The domain function that creates the error owns the classification. ## Changes **New:** - `tool-input-error.ts` — `ToolInputError` class **Error handler:** - `tool-error-handler.ts` — skip `logException` for `ToolInputError`; replace `getMcpAwareErrorType` with `getToolErrorType()` covering both MCP and input error classification **Domain sources that now throw `ToolInputError`:** - `edit-file.ts` — ENOENT on `fs.readFile` → `ToolInputError("File not found: ...")` - `text-edits.ts` — all validation (not found, ambiguous, overlap, no-op, empty oldText, bad newText) - `file-utils.ts` — workspace path traversal - `list-dir.ts` — not-a-directory - `grep.ts` — invalid regex pattern - `sandbox.ts` — missing required arguments (path, edits, pattern, command) **What stays as system errors (still reports to Sentry):** - Sandbox API failures (`throwSandboxOperationError`) - Slack API errors - Network/connection failures - Non-ENOENT filesystem errors - `unsupported sandbox tool` (config error) ## Tests - **`tool-error-handler.test.ts`** (new) — system errors call `logException`; `ToolInputError` and `McpToolError` do not - **`file-tools.test.ts`** — `ToolInputError` for ambiguous edits, text not found, path traversal, invalid regex, file-as-directory - **`sandbox-executor.test.ts`** — editFile on missing path throws `ToolInputError` ## Verification - `pnpm typecheck` ✅ - 51/51 tests passing ✅ - Lint + prettier ✅ Fixes [JUNIOR-2Q](https://sentry.sentry.io/issues/7495355493/) Requested-By: David Cramer Co-authored-by: Junior Co-authored-by: Claude (anthropic/claude-opus-4.6) --- packages/junior/src/chat/sandbox/sandbox.ts | 15 +++-- .../tools/execution/tool-error-handler.ts | 22 ++++--- .../chat/tools/execution/tool-input-error.ts | 7 ++ .../src/chat/tools/sandbox/edit-file.ts | 14 +++- .../src/chat/tools/sandbox/file-utils.ts | 3 +- .../junior/src/chat/tools/sandbox/grep.ts | 14 +++- .../junior/src/chat/tools/sandbox/list-dir.ts | 3 +- .../src/chat/tools/sandbox/text-edits.ts | 15 +++-- .../tests/unit/misc/sandbox-executor.test.ts | 29 +++++++++ .../execution/tool-error-handler.test.ts | 64 +++++++++++++++++++ .../unit/tools/sandbox/file-tools.test.ts | 61 ++++++++++++++++++ 11 files changed, 219 insertions(+), 28 deletions(-) create mode 100644 packages/junior/src/chat/tools/execution/tool-input-error.ts create mode 100644 packages/junior/tests/unit/tools/execution/tool-error-handler.test.ts diff --git a/packages/junior/src/chat/sandbox/sandbox.ts b/packages/junior/src/chat/sandbox/sandbox.ts index 5231188a..c9f518a1 100644 --- a/packages/junior/src/chat/sandbox/sandbox.ts +++ b/packages/junior/src/chat/sandbox/sandbox.ts @@ -36,6 +36,7 @@ import { missingFileResult, sliceFileContent, } from "@/chat/tools/sandbox/read-file"; +import { ToolInputError } from "@/chat/tools/execution/tool-input-error"; // Spec: specs/security-policy.md (sandbox isolation, network policy, credential lifecycle) // Spec: specs/logging/tracing-spec.md (required sandbox span semantics) @@ -248,7 +249,7 @@ export function createSandboxExecutor(options?: { ): Promise> => { const filePath = String(rawInput.path ?? "").trim(); if (!filePath) { - throw new Error("path is required"); + throw new ToolInputError("path is required"); } const offset = positiveInteger(rawInput.offset); const limit = positiveInteger(rawInput.limit); @@ -330,7 +331,7 @@ export function createSandboxExecutor(options?: { ): Promise> => { const filePath = String(rawInput.path ?? "").trim(); if (!filePath) { - throw new Error("path is required"); + throw new ToolInputError("path is required"); } const content = String(rawInput.content ?? ""); @@ -370,10 +371,10 @@ export function createSandboxExecutor(options?: { ): Promise> => { const filePath = String(rawInput.path ?? "").trim(); if (!filePath) { - throw new Error("path is required"); + throw new ToolInputError("path is required"); } if (!Array.isArray(rawInput.edits)) { - throw new Error("edits is required"); + throw new ToolInputError("edits is required"); } logSandboxBootRequest("tool.editFile", { @@ -406,7 +407,7 @@ export function createSandboxExecutor(options?: { ): Promise> => { const pattern = String(rawInput.pattern ?? ""); if (!pattern) { - throw new Error("pattern is required"); + throw new ToolInputError("pattern is required"); } logSandboxBootRequest("tool.grep"); @@ -447,7 +448,7 @@ export function createSandboxExecutor(options?: { ): Promise> => { const pattern = String(rawInput.pattern ?? ""); if (!pattern) { - throw new Error("pattern is required"); + throw new ToolInputError("pattern is required"); } logSandboxBootRequest("tool.findFiles"); @@ -509,7 +510,7 @@ export function createSandboxExecutor(options?: { if (params.toolName === "bash") { if (!bashCommand) { - throw new Error("command is required"); + throw new ToolInputError("command is required"); } if (options?.runBashCustomCommand) { const custom = await options.runBashCustomCommand(bashCommand); diff --git a/packages/junior/src/chat/tools/execution/tool-error-handler.ts b/packages/junior/src/chat/tools/execution/tool-error-handler.ts index 96a1af45..4ce1d2f2 100644 --- a/packages/junior/src/chat/tools/execution/tool-error-handler.ts +++ b/packages/junior/src/chat/tools/execution/tool-error-handler.ts @@ -6,13 +6,17 @@ import { type LogContext, } from "@/chat/logging"; import { GEN_AI_PROVIDER_NAME } from "@/chat/pi/client"; -import { - getMcpAwareErrorMessage, - getMcpAwareErrorType, - McpToolError, -} from "@/chat/mcp/errors"; +import { getMcpAwareErrorMessage, McpToolError } from "@/chat/mcp/errors"; import { PluginCredentialFailureError } from "@/chat/services/plugin-auth-orchestration"; import { SlackActionError } from "@/chat/slack/client"; +import { ToolInputError } from "@/chat/tools/execution/tool-input-error"; + +/** Classify tool errors into stable observability types. */ +function getToolErrorType(error: unknown): string { + if (error instanceof McpToolError) return "tool_error"; + if (error instanceof ToolInputError) return "tool_input_error"; + return error instanceof Error ? error.name : "tool_execution_error"; +} function getToolErrorAttributes( error: unknown, @@ -40,7 +44,7 @@ export function handleToolExecutionError( shouldTrace: boolean, traceContext: LogContext, ): never { - const errorType = getMcpAwareErrorType(error, "tool_execution_error"); + const errorType = getToolErrorType(error); const errorMessage = getMcpAwareErrorMessage(error); setSpanAttributes({ "error.type": errorType, @@ -84,8 +88,10 @@ export function handleToolExecutionError( ); } - // MCP tool errors are expected outcomes — log as warnings, not Sentry exceptions. - if (!(error instanceof McpToolError)) { + // Expected tool failures (MCP errors, model input errors) are not Sentry exceptions. + const isExpectedToolFailure = + error instanceof McpToolError || error instanceof ToolInputError; + if (!isExpectedToolFailure) { logException( error, "agent_tool_call_failed", diff --git a/packages/junior/src/chat/tools/execution/tool-input-error.ts b/packages/junior/src/chat/tools/execution/tool-input-error.ts new file mode 100644 index 00000000..511b760e --- /dev/null +++ b/packages/junior/src/chat/tools/execution/tool-input-error.ts @@ -0,0 +1,7 @@ +/** Thrown when a tool fails due to invalid model/user input, not a system error. */ +export class ToolInputError extends Error { + constructor(message: string, options?: { cause?: unknown }) { + super(message, options); + this.name = "ToolInputError"; + } +} diff --git a/packages/junior/src/chat/tools/sandbox/edit-file.ts b/packages/junior/src/chat/tools/sandbox/edit-file.ts index 8df9d936..fdd2b2fd 100644 --- a/packages/junior/src/chat/tools/sandbox/edit-file.ts +++ b/packages/junior/src/chat/tools/sandbox/edit-file.ts @@ -1,8 +1,10 @@ import { + isMissingPathError, normalizeToLf, resolveWorkspacePath, type SandboxFileSystem, } from "@/chat/tools/sandbox/file-utils"; +import { ToolInputError } from "@/chat/tools/execution/tool-input-error"; import { buildCompactDiff, detectLineEnding, @@ -45,7 +47,17 @@ export async function editFile(params: { path: string; }): Promise { const filePath = resolveWorkspacePath(params.path); - const rawContent = await params.fs.readFile(filePath, { encoding: "utf8" }); + let rawContent: string; + try { + rawContent = await params.fs.readFile(filePath, { encoding: "utf8" }); + } catch (error) { + if (isMissingPathError(error)) { + throw new ToolInputError(`File not found: ${params.path}`, { + cause: error, + }); + } + throw error; + } const { bom, text } = stripBom(rawContent); const lineEnding = detectLineEnding(text); const normalizedContent = normalizeToLf(text); diff --git a/packages/junior/src/chat/tools/sandbox/file-utils.ts b/packages/junior/src/chat/tools/sandbox/file-utils.ts index dd2c51a3..a41561d3 100644 --- a/packages/junior/src/chat/tools/sandbox/file-utils.ts +++ b/packages/junior/src/chat/tools/sandbox/file-utils.ts @@ -1,6 +1,7 @@ import path from "node:path"; import { SANDBOX_WORKSPACE_ROOT } from "@/chat/sandbox/paths"; import type { SandboxFileSystem } from "@/chat/sandbox/workspace"; +import { ToolInputError } from "@/chat/tools/execution/tool-input-error"; export type { SandboxFileSystem }; @@ -152,7 +153,7 @@ export function resolveWorkspacePath( normalized !== SANDBOX_WORKSPACE_ROOT && !normalized.startsWith(`${SANDBOX_WORKSPACE_ROOT}/`) ) { - throw new Error( + throw new ToolInputError( `Path must stay within ${SANDBOX_WORKSPACE_ROOT}: ${requested}`, ); } diff --git a/packages/junior/src/chat/tools/sandbox/grep.ts b/packages/junior/src/chat/tools/sandbox/grep.ts index 0221f9fe..de4b8ba6 100644 --- a/packages/junior/src/chat/tools/sandbox/grep.ts +++ b/packages/junior/src/chat/tools/sandbox/grep.ts @@ -13,6 +13,7 @@ import { type TextSearchResultDetails, } from "@/chat/tools/sandbox/file-utils"; import { tool } from "@/chat/tools/definition"; +import { ToolInputError } from "@/chat/tools/execution/tool-input-error"; const DEFAULT_GREP_LIMIT = 100; const MAX_GREP_LINE_CHARS = 500; @@ -70,9 +71,16 @@ export async function grepFiles(params: { const root = resolveWorkspacePath(params.path); const limit = positiveInteger(params.limit) ?? DEFAULT_GREP_LIMIT; const context = positiveInteger(params.context) ?? 0; - const regex = params.literal - ? undefined - : new RegExp(params.pattern, params.ignoreCase ? "i" : ""); + let regex: RegExp | undefined; + if (!params.literal) { + try { + regex = new RegExp(params.pattern, params.ignoreCase ? "i" : ""); + } catch (error) { + throw new ToolInputError(`Invalid regex pattern: ${params.pattern}`, { + cause: error, + }); + } + } const { files, missingPath, missingRoot } = await collectFiles({ fs: params.fs, root, diff --git a/packages/junior/src/chat/tools/sandbox/list-dir.ts b/packages/junior/src/chat/tools/sandbox/list-dir.ts index 14d89dba..7383cb31 100644 --- a/packages/junior/src/chat/tools/sandbox/list-dir.ts +++ b/packages/junior/src/chat/tools/sandbox/list-dir.ts @@ -11,6 +11,7 @@ import { type TextSearchResultDetails, } from "@/chat/tools/sandbox/file-utils"; import { tool } from "@/chat/tools/definition"; +import { ToolInputError } from "@/chat/tools/execution/tool-input-error"; const DEFAULT_LIST_LIMIT = 500; @@ -39,7 +40,7 @@ export async function listDir(params: { throw error; } if (!stat.isDirectory()) { - throw new Error(`Not a directory: ${params.path ?? "."}`); + throw new ToolInputError(`Not a directory: ${params.path ?? "."}`); } let entries: string[]; diff --git a/packages/junior/src/chat/tools/sandbox/text-edits.ts b/packages/junior/src/chat/tools/sandbox/text-edits.ts index 09589b99..64ec1a6e 100644 --- a/packages/junior/src/chat/tools/sandbox/text-edits.ts +++ b/packages/junior/src/chat/tools/sandbox/text-edits.ts @@ -1,4 +1,5 @@ import { normalizeToLf } from "@/chat/tools/sandbox/file-utils"; +import { ToolInputError } from "@/chat/tools/execution/tool-input-error"; export interface TextReplacement { oldText: string; @@ -180,17 +181,17 @@ export function validateAndApplyTextEdits( targetName: string, ): { baseContent: string; newContent: string } { if (!Array.isArray(edits) || edits.length === 0) { - throw new Error(`${targetName} requires at least one edit.`); + throw new ToolInputError(`${targetName} requires at least one edit.`); } const normalizedEdits = edits.map((edit, index) => { if (typeof edit.oldText !== "string" || edit.oldText.length === 0) { - throw new Error( + throw new ToolInputError( `edits[${index}].oldText must not be empty in ${targetName}.`, ); } if (typeof edit.newText !== "string") { - throw new Error( + throw new ToolInputError( `edits[${index}].newText must be a string in ${targetName}.`, ); } @@ -205,13 +206,13 @@ export function validateAndApplyTextEdits( const edit = normalizedEdits[index]; const matchIndex = content.indexOf(edit.oldText); if (matchIndex === -1) { - throw new Error( + throw new ToolInputError( `Could not find edits[${index}] in ${targetName}. oldText must match exactly including whitespace and newlines.`, ); } const occurrences = countOccurrences(content, edit.oldText); if (occurrences > 1) { - throw new Error( + throw new ToolInputError( `Found ${occurrences} occurrences of edits[${index}] in ${targetName}. Each oldText must be unique.`, ); } @@ -228,7 +229,7 @@ export function validateAndApplyTextEdits( const previous = matchedEdits[index - 1]; const current = matchedEdits[index]; if (previous.matchIndex + previous.matchLength > current.matchIndex) { - throw new Error( + throw new ToolInputError( `edits[${previous.editIndex}] and edits[${current.editIndex}] overlap in ${targetName}. Merge overlapping replacements into one edit.`, ); } @@ -244,7 +245,7 @@ export function validateAndApplyTextEdits( } if (newContent === content) { - throw new Error(`No changes made to ${targetName}.`); + throw new ToolInputError(`No changes made to ${targetName}.`); } return { baseContent: content, newContent }; diff --git a/packages/junior/tests/unit/misc/sandbox-executor.test.ts b/packages/junior/tests/unit/misc/sandbox-executor.test.ts index 6544d3c4..0ea0d5cd 100644 --- a/packages/junior/tests/unit/misc/sandbox-executor.test.ts +++ b/packages/junior/tests/unit/misc/sandbox-executor.test.ts @@ -1146,6 +1146,35 @@ describe("createSandboxExecutor", () => { }); }); + it("throws ToolInputError when editFile targets a missing path", async () => { + const sandbox = makeSandbox("sbx_missing_edit_file"); + sandbox.fs.readFile.mockRejectedValue( + Object.assign(new Error("ENOENT: no such file or directory"), { + code: "ENOENT", + }), + ); + sandboxCreateMock.mockResolvedValue(sandbox); + vi.mocked(createBashTool).mockResolvedValue({ + tools: { + readFile: { execute: vi.fn(async () => ({ content: "" })) }, + writeFile: { execute: vi.fn(async () => ({ success: true })) }, + }, + } as never); + + const executor = createSandboxExecutor(); + executor.configureSkills([]); + + await expect( + executor.execute({ + toolName: "editFile", + input: { + path: "missing.ts", + edits: [{ oldText: "a", newText: "b" }], + }, + }), + ).rejects.toThrow("File not found: missing.ts"); + }); + it("keeps sandbox API failures as readFile errors", async () => { const sandbox = makeSandbox("sbx_read_file_api_error"); sandboxCreateMock.mockResolvedValue(sandbox); diff --git a/packages/junior/tests/unit/tools/execution/tool-error-handler.test.ts b/packages/junior/tests/unit/tools/execution/tool-error-handler.test.ts new file mode 100644 index 00000000..d43c7b59 --- /dev/null +++ b/packages/junior/tests/unit/tools/execution/tool-error-handler.test.ts @@ -0,0 +1,64 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { ToolInputError } from "@/chat/tools/execution/tool-input-error"; + +const logExceptionMock = vi.fn(); +const logInfoMock = vi.fn(); +const logWarnMock = vi.fn(); +const setSpanAttributesMock = vi.fn(); + +vi.mock("@/chat/logging", () => ({ + logException: (...args: unknown[]) => logExceptionMock(...args), + logInfo: (...args: unknown[]) => logInfoMock(...args), + logWarn: (...args: unknown[]) => logWarnMock(...args), + setSpanAttributes: (...args: unknown[]) => setSpanAttributesMock(...args), +})); + +vi.mock("@/chat/pi/client", () => ({ + GEN_AI_PROVIDER_NAME: "test-provider", +})); + +import { handleToolExecutionError } from "@/chat/tools/execution/tool-error-handler"; +import { McpToolError } from "@/chat/mcp/errors"; + +describe("handleToolExecutionError", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("reports system errors to Sentry via logException", () => { + const error = new Error("sandbox API failed"); + expect(() => + handleToolExecutionError(error, "editFile", "call_1", true, {}), + ).toThrow(error); + + expect(logExceptionMock).toHaveBeenCalledTimes(1); + expect(setSpanAttributesMock).toHaveBeenCalledWith( + expect.objectContaining({ "error.type": "Error" }), + ); + }); + + it("does not report ToolInputError to Sentry", () => { + const error = new ToolInputError("Could not find edits[0] in file.ts"); + expect(() => + handleToolExecutionError(error, "editFile", "call_1", true, {}), + ).toThrow(error); + + expect(logExceptionMock).not.toHaveBeenCalled(); + expect(logWarnMock).toHaveBeenCalledTimes(1); + expect(setSpanAttributesMock).toHaveBeenCalledWith( + expect.objectContaining({ "error.type": "tool_input_error" }), + ); + }); + + it("does not report McpToolError to Sentry", () => { + const error = new McpToolError("mcp tool failed"); + expect(() => + handleToolExecutionError(error, "mcpTool", "call_1", true, {}), + ).toThrow(error); + + expect(logExceptionMock).not.toHaveBeenCalled(); + expect(setSpanAttributesMock).toHaveBeenCalledWith( + expect.objectContaining({ "error.type": "tool_error" }), + ); + }); +}); diff --git a/packages/junior/tests/unit/tools/sandbox/file-tools.test.ts b/packages/junior/tests/unit/tools/sandbox/file-tools.test.ts index 734b77d6..0123a2e0 100644 --- a/packages/junior/tests/unit/tools/sandbox/file-tools.test.ts +++ b/packages/junior/tests/unit/tools/sandbox/file-tools.test.ts @@ -10,6 +10,7 @@ import { grepFiles } from "@/chat/tools/sandbox/grep"; import { listDir } from "@/chat/tools/sandbox/list-dir"; import { sliceFileContent } from "@/chat/tools/sandbox/read-file"; import type { SandboxFileSystem } from "@/chat/tools/sandbox/file-utils"; +import { ToolInputError } from "@/chat/tools/execution/tool-input-error"; function workspacePath(filePath: string): string { return path.posix.join(SANDBOX_WORKSPACE_ROOT, filePath); @@ -299,4 +300,64 @@ describe("sandbox file tools", () => { details: { ok: true, path: "src", truncated: false }, }); }); + + it("throws ToolInputError for ambiguous edits", async () => { + const memory = createMemoryFs({ + "src/app.ts": "same\nsame\n", + }); + + await expect( + editFile({ + fs: memory.fs, + path: "src/app.ts", + edits: [{ oldText: "same", newText: "changed" }], + }), + ).rejects.toThrow(ToolInputError); + }); + + it("throws ToolInputError for old text not found", async () => { + const memory = createMemoryFs({ + "src/app.ts": "hello world\n", + }); + + await expect( + editFile({ + fs: memory.fs, + path: "src/app.ts", + edits: [{ oldText: "missing text", newText: "new" }], + }), + ).rejects.toThrow(ToolInputError); + }); + + it("throws ToolInputError for workspace path traversal", async () => { + const memory = createMemoryFs({}); + + await expect( + listDir({ fs: memory.fs, path: "../../../etc" }), + ).rejects.toThrow(ToolInputError); + }); + + it("throws ToolInputError for invalid grep regex", async () => { + const memory = createMemoryFs({ + "src/app.ts": "content\n", + }); + + await expect( + grepFiles({ + fs: memory.fs, + path: "src", + pattern: "[invalid", + }), + ).rejects.toThrow(ToolInputError); + }); + + it("throws ToolInputError when listDir targets a file", async () => { + const memory = createMemoryFs({ + "src/app.ts": "content\n", + }); + + await expect( + listDir({ fs: memory.fs, path: "src/app.ts" }), + ).rejects.toThrow(ToolInputError); + }); }); From af372d1acf8d052e51392846e4938d9a5adacd0d Mon Sep 17 00:00:00 2001 From: Omri SirComp Date: Wed, 20 May 2026 22:10:56 +0300 Subject: [PATCH 2/2] fix(junior): keep resume diagnostics cumulative Track cumulative duration and token usage across paused turn checkpoints, then use those totals when rendering timeout-resume Slack footers. Timeout continuation notices also now use the normal Slack footer shape so the conversation ID stays traceable. Co-Authored-By: GPT-5 Codex --- packages/junior/src/chat/respond.ts | 41 ++++---- .../junior/src/chat/runtime/reply-executor.ts | 29 +++++- .../junior/src/chat/runtime/slack-resume.ts | 46 ++++++++- .../src/chat/services/turn-checkpoint.ts | 48 ++++++++++ .../src/chat/state/turn-session-store.ts | 51 ++++++++++ packages/junior/src/chat/usage.ts | 28 ++++++ .../tests/unit/handlers/oauth-resume.test.ts | 96 +++++++++++++++++++ .../unit/services/turn-checkpoint.test.ts | 51 ++++++++++ 8 files changed, 364 insertions(+), 26 deletions(-) diff --git a/packages/junior/src/chat/respond.ts b/packages/junior/src/chat/respond.ts index 77603a76..3b7e0d95 100644 --- a/packages/junior/src/chat/respond.ts +++ b/packages/junior/src/chat/respond.ts @@ -77,7 +77,7 @@ import { toAgentThinkingLevel, type TurnThinkingSelection, } from "@/chat/services/turn-thinking-level"; -import type { AgentTurnUsage } from "@/chat/usage"; +import { hasAgentTurnUsage, type AgentTurnUsage } from "@/chat/usage"; import { loadTurnCheckpoint, persistCompletedCheckpoint, @@ -179,6 +179,16 @@ function trimRouterAttachmentText(text: string): string { : `${normalized.slice(0, MAX_ROUTER_ATTACHMENT_PREVIEW_CHARS)}...`; } +function extractSliceUsage( + messages: PiMessage[], + beforeMessageCount: number, +): AgentTurnUsage | undefined { + const usage = extractGenAiUsageSummary( + ...messages.slice(beforeMessageCount).filter(isAssistantMessage), + ); + return hasAgentTurnUsage(usage) ? usage : undefined; +} + function supportsRouterTextPreview(mediaType: string): boolean { const baseMediaType = mediaType.split(";", 1)[0]?.trim().toLowerCase(); if (!baseMediaType) { @@ -1033,9 +1043,7 @@ export async function generateAssistantReply( agent.state, ...outputMessages, ); - turnUsage = Object.values(usageSummary).some( - (value) => value !== undefined, - ) + turnUsage = hasAgentTurnUsage(usageSummary) ? usageSummary : undefined; setSpanAttributes({ @@ -1071,6 +1079,8 @@ export async function generateAssistantReply( ) { await persistCompletedCheckpoint({ conversationId: sessionConversationId, + currentDurationMs: Date.now() - replyStartedAtMs, + currentUsage: turnUsage, sessionId, sliceId: currentSliceId, allMessages: agent.state.messages, @@ -1103,10 +1113,15 @@ export async function generateAssistantReply( }); } catch (error) { if (timedOut && timeoutResumeConversationId && timeoutResumeSessionId) { + turnUsage = + turnUsage ?? + extractSliceUsage(timeoutResumeMessages, beforeMessageCount); const checkpoint = await persistTimeoutCheckpoint({ conversationId: timeoutResumeConversationId, sessionId: timeoutResumeSessionId, currentSliceId: timeoutResumeSliceId, + currentDurationMs: Date.now() - replyStartedAtMs, + currentUsage: turnUsage, messages: timeoutResumeMessages, loadedSkillNames: loadedSkillNamesForResume, errorMessage: error instanceof Error ? error.message : String(error), @@ -1140,25 +1155,17 @@ export async function generateAssistantReply( timeoutResumeSessionId ) { if (!turnUsage && timeoutResumeMessages.length > 0) { - // Match the canonical slice-scoped extraction: sum usage from new - // assistant messages produced during this slice, not the full - // message history (which may include prior slices whose usage was - // already reported in earlier footers). - const fallbackUsage = extractGenAiUsageSummary( - ...timeoutResumeMessages - .slice(beforeMessageCount) - .filter(isAssistantMessage), + turnUsage = extractSliceUsage( + timeoutResumeMessages, + beforeMessageCount, ); - turnUsage = Object.values(fallbackUsage).some( - (value) => value !== undefined, - ) - ? fallbackUsage - : undefined; } const nextSliceId = await persistAuthPauseCheckpoint({ conversationId: timeoutResumeConversationId, sessionId: timeoutResumeSessionId, currentSliceId: timeoutResumeSliceId, + currentDurationMs: Date.now() - replyStartedAtMs, + currentUsage: turnUsage, messages: timeoutResumeMessages, loadedSkillNames: loadedSkillNamesForResume, errorMessage: error.message, diff --git a/packages/junior/src/chat/runtime/reply-executor.ts b/packages/junior/src/chat/runtime/reply-executor.ts index d60275d7..b5e85f0e 100644 --- a/packages/junior/src/chat/runtime/reply-executor.ts +++ b/packages/junior/src/chat/runtime/reply-executor.ts @@ -49,7 +49,10 @@ import { isVisionEnabled, } from "@/chat/services/vision-context"; import { createSlackAdapterAssistantStatusSession } from "@/chat/slack/assistant-thread/status"; -import { buildSlackReplyFooter } from "@/chat/slack/footer"; +import { + buildSlackReplyBlocks, + buildSlackReplyFooter, +} from "@/chat/slack/footer"; import { maybeUpdateAssistantTitle } from "@/chat/slack/assistant-thread/title"; import { appendSlackLegacyAttachmentText } from "@/chat/slack/legacy-attachments"; import { type ThreadArtifactsState } from "@/chat/state/artifacts"; @@ -61,7 +64,7 @@ import { buildDeterministicTurnId } from "@/chat/runtime/turn"; import { markTurnCompleted, markTurnFailed } from "@/chat/runtime/turn"; import { startActiveTurn } from "@/chat/runtime/turn"; import { isRedundantReactionAckText } from "@/chat/services/reply-delivery-plan"; -import { deleteSlackMessage } from "@/chat/slack/outbound"; +import { deleteSlackMessage, postSlackMessage } from "@/chat/slack/outbound"; import { finalizeFailedTurnReply, getAgentTurnDiagnosticsAttributes, @@ -204,9 +207,25 @@ export function createReplyToThread(deps: ReplyExecutorDeps) { const postTurnContinuationNotice = async (): Promise => { try { await beforeFirstResponsePost(); - await thread.post( - buildSlackOutputMessage(buildTurnContinuationResponse()), - ); + const text = buildTurnContinuationResponse(); + const footer = buildSlackReplyFooter({ conversationId }); + const shouldUseSlackFooter = + Boolean(footer) && + Boolean(channelId && threadTs) && + (thread.adapter as { name?: string } | undefined)?.name === + "slack"; + if (shouldUseSlackFooter && channelId && threadTs) { + const blocks = buildSlackReplyBlocks(text, footer); + await postSlackMessage({ + channelId, + threadTs, + text, + ...(blocks ? { blocks } : {}), + }); + return; + } + + await thread.post(buildSlackOutputMessage(text)); } catch (error) { logException( error, diff --git a/packages/junior/src/chat/runtime/slack-resume.ts b/packages/junior/src/chat/runtime/slack-resume.ts index 27c29731..b7d5ba28 100644 --- a/packages/junior/src/chat/runtime/slack-resume.ts +++ b/packages/junior/src/chat/runtime/slack-resume.ts @@ -21,13 +21,18 @@ import { createSlackWebApiAssistantStatusSession, type AssistantStatusSession, } from "@/chat/slack/assistant-thread/status"; -import { buildSlackReplyFooter } from "@/chat/slack/footer"; +import { + buildSlackReplyBlocks, + buildSlackReplyFooter, +} from "@/chat/slack/footer"; import { planSlackReplyPosts, postSlackApiReplyPosts, } from "@/chat/slack/reply"; import { postSlackMessage as postSlackApiMessage } from "@/chat/slack/outbound"; import { getStateAdapter } from "@/chat/state/adapter"; +import { getAgentTurnSessionCheckpoint } from "@/chat/state/turn-session-store"; +import { addAgentTurnUsage } from "@/chat/usage"; import { startSlackProcessingReactionForMessage, type ProcessingReactionSession, @@ -172,11 +177,18 @@ async function postTurnContinuationNoticeBestEffort(args: { lockKey: string; resumeArgs: ResumeSlackTurnArgs; }): Promise { + const text = buildTurnContinuationResponse(); + const footer = buildSlackReplyFooter({ + conversationId: + args.resumeArgs.replyContext?.correlation?.conversationId ?? args.lockKey, + }); + const blocks = buildSlackReplyBlocks(text, footer); try { await postSlackApiMessage({ channelId: args.resumeArgs.channelId, threadTs: args.resumeArgs.threadTs, - text: buildTurnContinuationResponse(), + text, + ...(blocks ? { blocks } : {}), }); } catch (error) { logException( @@ -258,6 +270,19 @@ function createResumeReplyContext( }; } +async function getResumeCheckpoint(args: { + conversationId?: string; + sessionId?: string; +}) { + if (!args.conversationId || !args.sessionId) { + return undefined; + } + return await getAgentTurnSessionCheckpoint( + args.conversationId, + args.sessionId, + ); +} + /** * Resume a paused Slack turn under the normal thread lock. * @@ -311,6 +336,10 @@ export async function resumeSlackTurn(args: ResumeSlackTurnArgs) { const generateReply = args.generateReply ?? generateAssistantReply; const replyContext = createResumeReplyContext(args, status); + const priorCheckpoint = await getResumeCheckpoint({ + conversationId: replyContext.correlation?.conversationId, + sessionId: replyContext.correlation?.turnId, + }); const replyPromise = generateReply(args.messageText, replyContext); const replyTimeoutMs = resolveReplyTimeoutMs(args.replyTimeoutMs); let reply = @@ -339,9 +368,18 @@ export async function resumeSlackTurn(args: ResumeSlackTurnArgs) { await status.stop(); const footer = buildSlackReplyFooter({ conversationId: args.replyContext?.correlation?.conversationId ?? lockKey, - durationMs: reply.diagnostics.durationMs, + durationMs: + typeof priorCheckpoint?.cumulativeDurationMs === "number" || + typeof reply.diagnostics.durationMs === "number" + ? (priorCheckpoint?.cumulativeDurationMs ?? 0) + + (reply.diagnostics.durationMs ?? 0) + : undefined, thinkingLevel: reply.diagnostics.thinkingLevel, - usage: reply.diagnostics.usage, + usage: + addAgentTurnUsage( + priorCheckpoint?.cumulativeUsage, + reply.diagnostics.usage, + ) ?? reply.diagnostics.usage, }); await postSlackApiReplyPosts({ channelId: args.channelId, diff --git a/packages/junior/src/chat/services/turn-checkpoint.ts b/packages/junior/src/chat/services/turn-checkpoint.ts index 6b3f42d2..21e02581 100644 --- a/packages/junior/src/chat/services/turn-checkpoint.ts +++ b/packages/junior/src/chat/services/turn-checkpoint.ts @@ -6,6 +6,7 @@ import { import { logException } from "@/chat/logging"; import type { PiMessage } from "@/chat/pi/messages"; import { trimTrailingAssistantMessages } from "@/chat/respond-helpers"; +import { addAgentTurnUsage, type AgentTurnUsage } from "@/chat/usage"; export interface TurnCheckpointContext { conversationId?: string; @@ -19,6 +20,19 @@ export interface TurnCheckpointState { existingCheckpoint?: AgentTurnSessionCheckpoint; } +function addDurationMs( + prior: number | undefined, + current: number | undefined, +): number | undefined { + const total = [prior, current].reduce((sum, value) => { + if (typeof value !== "number" || !Number.isFinite(value)) { + return sum; + } + return (sum ?? 0) + Math.max(0, Math.floor(value)); + }, undefined); + return total; +} + /** Load turn checkpoint state for a conversation/session pair. */ export async function loadTurnCheckpoint( ctx: TurnCheckpointContext, @@ -46,13 +60,27 @@ export async function loadTurnCheckpoint( /** Persist a completed turn checkpoint. */ export async function persistCompletedCheckpoint(args: { conversationId: string; + currentDurationMs?: number; + currentUsage?: AgentTurnUsage; sessionId: string; sliceId: number; allMessages: PiMessage[]; loadedSkillNames: string[]; }): Promise { + const latestCheckpoint = await getAgentTurnSessionCheckpoint( + args.conversationId, + args.sessionId, + ); await upsertAgentTurnSessionCheckpoint({ conversationId: args.conversationId, + cumulativeDurationMs: addDurationMs( + latestCheckpoint?.cumulativeDurationMs, + args.currentDurationMs, + ), + cumulativeUsage: addAgentTurnUsage( + latestCheckpoint?.cumulativeUsage, + args.currentUsage, + ), sessionId: args.sessionId, sliceId: args.sliceId, state: "completed", @@ -69,6 +97,8 @@ export async function persistAuthPauseCheckpoint(args: { conversationId: string; sessionId: string; currentSliceId: number; + currentDurationMs?: number; + currentUsage?: AgentTurnUsage; messages: PiMessage[]; loadedSkillNames: string[]; errorMessage: string; @@ -94,6 +124,14 @@ export async function persistAuthPauseCheckpoint(args: { ); await upsertAgentTurnSessionCheckpoint({ conversationId: args.conversationId, + cumulativeDurationMs: addDurationMs( + latestCheckpoint?.cumulativeDurationMs, + args.currentDurationMs, + ), + cumulativeUsage: addAgentTurnUsage( + latestCheckpoint?.cumulativeUsage, + args.currentUsage, + ), sessionId: args.sessionId, sliceId: nextSliceId, state: "awaiting_resume", @@ -135,6 +173,8 @@ export async function persistTimeoutCheckpoint(args: { conversationId: string; sessionId: string; currentSliceId: number; + currentDurationMs?: number; + currentUsage?: AgentTurnUsage; messages: PiMessage[]; loadedSkillNames: string[]; errorMessage: string; @@ -161,6 +201,14 @@ export async function persistTimeoutCheckpoint(args: { ); return await upsertAgentTurnSessionCheckpoint({ conversationId: args.conversationId, + cumulativeDurationMs: addDurationMs( + latestCheckpoint?.cumulativeDurationMs, + args.currentDurationMs, + ), + cumulativeUsage: addAgentTurnUsage( + latestCheckpoint?.cumulativeUsage, + args.currentUsage, + ), sessionId: args.sessionId, sliceId: nextSliceId, state: "awaiting_resume", diff --git a/packages/junior/src/chat/state/turn-session-store.ts b/packages/junior/src/chat/state/turn-session-store.ts index 11f87eec..e936fd8d 100644 --- a/packages/junior/src/chat/state/turn-session-store.ts +++ b/packages/junior/src/chat/state/turn-session-store.ts @@ -1,5 +1,6 @@ import { isRecord } from "@/chat/coerce"; import type { PiMessage } from "@/chat/pi/messages"; +import type { AgentTurnUsage } from "@/chat/usage"; import { getStateAdapter } from "./adapter"; const AGENT_TURN_SESSION_PREFIX = "junior:agent_turn_session"; @@ -17,6 +18,8 @@ export type AgentTurnResumeReason = "timeout" | "auth"; export interface AgentTurnSessionCheckpoint { checkpointVersion: number; conversationId: string; + cumulativeDurationMs?: number; + cumulativeUsage?: AgentTurnUsage; errorMessage?: string; loadedSkillNames?: string[]; piMessages: PiMessage[]; @@ -35,6 +38,34 @@ function agentTurnSessionKey( return `${AGENT_TURN_SESSION_PREFIX}:${conversationId}:${sessionId}`; } +function toFiniteNonNegativeNumber(value: unknown): number | undefined { + return typeof value === "number" && Number.isFinite(value) + ? Math.max(0, Math.floor(value)) + : undefined; +} + +function parseAgentTurnUsage(value: unknown): AgentTurnUsage | undefined { + if (!isRecord(value)) { + return undefined; + } + + const usage: AgentTurnUsage = {}; + for (const field of [ + "inputTokens", + "outputTokens", + "cachedInputTokens", + "cacheCreationTokens", + "totalTokens", + ] as const) { + const count = toFiniteNonNegativeNumber(value[field]); + if (count !== undefined) { + usage[field] = count; + } + } + + return Object.keys(usage).length > 0 ? usage : undefined; +} + function parseAgentTurnSessionCheckpoint( value: unknown, ): AgentTurnSessionCheckpoint | undefined { @@ -64,6 +95,10 @@ function parseAgentTurnSessionCheckpoint( const sliceId = parsed.sliceId; const checkpointVersion = parsed.checkpointVersion; const updatedAtMs = parsed.updatedAtMs; + const cumulativeDurationMs = toFiniteNonNegativeNumber( + parsed.cumulativeDurationMs, + ); + const cumulativeUsage = parseAgentTurnUsage(parsed.cumulativeUsage); if ( typeof conversationId !== "string" || typeof sessionId !== "string" || @@ -81,6 +116,8 @@ function parseAgentTurnSessionCheckpoint( sliceId, state: status, updatedAtMs, + ...(cumulativeDurationMs !== undefined ? { cumulativeDurationMs } : {}), + ...(cumulativeUsage ? { cumulativeUsage } : {}), piMessages: Array.isArray(parsed.piMessages) ? (parsed.piMessages as PiMessage[]) : [], @@ -120,6 +157,8 @@ export async function getAgentTurnSessionCheckpoint( export async function upsertAgentTurnSessionCheckpoint(args: { conversationId: string; + cumulativeDurationMs?: number; + cumulativeUsage?: AgentTurnUsage; sessionId: string; sliceId: number; state: AgentTurnSessionStatus; @@ -145,6 +184,16 @@ export async function upsertAgentTurnSessionCheckpoint(args: { state: args.state, updatedAtMs: Date.now(), piMessages: Array.isArray(args.piMessages) ? args.piMessages : [], + ...(typeof args.cumulativeDurationMs === "number" && + Number.isFinite(args.cumulativeDurationMs) + ? { + cumulativeDurationMs: Math.max( + 0, + Math.floor(args.cumulativeDurationMs), + ), + } + : {}), + ...(args.cumulativeUsage ? { cumulativeUsage: args.cumulativeUsage } : {}), ...(Array.isArray(args.loadedSkillNames) ? { loadedSkillNames: args.loadedSkillNames.filter( @@ -192,6 +241,8 @@ export async function supersedeAgentTurnSessionCheckpoint(args: { sliceId: existing.sliceId, state: "superseded", piMessages: existing.piMessages, + cumulativeDurationMs: existing.cumulativeDurationMs, + cumulativeUsage: existing.cumulativeUsage, loadedSkillNames: existing.loadedSkillNames, resumeReason: existing.resumeReason, resumedFromSliceId: existing.resumedFromSliceId, diff --git a/packages/junior/src/chat/usage.ts b/packages/junior/src/chat/usage.ts index c707dacb..53448d2a 100644 --- a/packages/junior/src/chat/usage.ts +++ b/packages/junior/src/chat/usage.ts @@ -18,3 +18,31 @@ export interface AgentTurnUsage { /** Provider-reported total. May not equal the sum of individual counters across providers. */ totalTokens?: number; } + +/** Return whether any token counter is present on a usage record. */ +export function hasAgentTurnUsage( + usage: AgentTurnUsage | undefined, +): usage is AgentTurnUsage { + return Boolean( + usage && + Object.values(usage).some( + (value) => typeof value === "number" && Number.isFinite(value), + ), + ); +} + +/** Sum token counters across turn slices while preserving absent fields. */ +export function addAgentTurnUsage( + ...usages: Array +): AgentTurnUsage | undefined { + const total: AgentTurnUsage = {}; + for (const usage of usages) { + if (!usage) continue; + for (const field of Object.keys(usage) as (keyof AgentTurnUsage)[]) { + const value = usage[field]; + if (typeof value !== "number" || !Number.isFinite(value)) continue; + total[field] = (total[field] ?? 0) + value; + } + } + return hasAgentTurnUsage(total) ? total : undefined; +} diff --git a/packages/junior/tests/unit/handlers/oauth-resume.test.ts b/packages/junior/tests/unit/handlers/oauth-resume.test.ts index 39350f08..bf0a3223 100644 --- a/packages/junior/tests/unit/handlers/oauth-resume.test.ts +++ b/packages/junior/tests/unit/handlers/oauth-resume.test.ts @@ -182,6 +182,21 @@ describe("resumeAuthorizedRequest", () => { expect(onTimeoutPause).toHaveBeenCalledTimes(1); expect(postMessageMock).toHaveBeenCalledWith( expect.objectContaining({ + blocks: [ + { + type: "markdown", + text: buildTurnContinuationResponse(), + }, + { + type: "context", + elements: [ + { + type: "mrkdwn", + text: "*ID:* slack:C-test:1700000000.0002", + }, + ], + }, + ], channel: "C-test", thread_ts: "1700000000.0002", text: buildTurnContinuationResponse(), @@ -189,6 +204,87 @@ describe("resumeAuthorizedRequest", () => { ); }); + it("uses cumulative checkpoint diagnostics for timeout resume footers", async () => { + const { upsertAgentTurnSessionCheckpoint } = + await import("@/chat/state/turn-session-store"); + + await upsertAgentTurnSessionCheckpoint({ + conversationId: "conversation-1", + sessionId: "turn-1", + sliceId: 2, + state: "awaiting_resume", + piMessages: [], + resumeReason: "timeout", + cumulativeDurationMs: 1_000, + cumulativeUsage: { + inputTokens: 2, + outputTokens: 3, + }, + }); + + await resumeSlackTurn({ + messageText: "continue this turn", + channelId: "C-test", + threadTs: "1700000000.0005", + lockKey: "slack:C-test:1700000000.0005", + replyContext: { + requester: { userId: "U-test" }, + correlation: { + conversationId: "conversation-1", + turnId: "turn-1", + }, + }, + generateReply: async () => + ({ + text: "done", + diagnostics: { + assistantMessageCount: 1, + durationMs: 500, + modelId: "fake-agent-model", + outcome: "success", + toolCalls: [], + toolErrorCount: 0, + toolResultCount: 0, + usage: { + outputTokens: 7, + }, + usedPrimaryText: true, + }, + }) as any, + }); + + expect(postMessageMock).toHaveBeenCalledWith( + expect.objectContaining({ + channel: "C-test", + thread_ts: "1700000000.0005", + text: "done", + blocks: [ + { + type: "markdown", + text: "done", + }, + { + type: "context", + elements: expect.arrayContaining([ + { + type: "mrkdwn", + text: "*ID:* conversation-1", + }, + { + type: "mrkdwn", + text: "*Tokens:* 12", + }, + { + type: "mrkdwn", + text: "*Time:* 1.5s", + }, + ]), + }, + ], + }), + ); + }); + it("posts the canonical failure response when timeout pause handling throws", async () => { const onFailure = vi.fn(async () => undefined); diff --git a/packages/junior/tests/unit/services/turn-checkpoint.test.ts b/packages/junior/tests/unit/services/turn-checkpoint.test.ts index 268fc253..238da5cd 100644 --- a/packages/junior/tests/unit/services/turn-checkpoint.test.ts +++ b/packages/junior/tests/unit/services/turn-checkpoint.test.ts @@ -97,4 +97,55 @@ describe("persistAuthPauseCheckpoint", () => { piMessages: [priorMessages[0]], }); }); + + it("carries cumulative diagnostics across pause checkpoints", async () => { + const { persistTimeoutCheckpoint } = + await import("@/chat/services/turn-checkpoint"); + const { getAgentTurnSessionCheckpoint, upsertAgentTurnSessionCheckpoint } = + await import("@/chat/state/turn-session-store"); + + await upsertAgentTurnSessionCheckpoint({ + conversationId: "conversation-1", + sessionId: "turn-1", + sliceId: 1, + state: "awaiting_resume", + piMessages: [], + resumeReason: "timeout", + cumulativeDurationMs: 1_500, + cumulativeUsage: { + inputTokens: 10, + outputTokens: 3, + }, + }); + + await persistTimeoutCheckpoint({ + conversationId: "conversation-1", + sessionId: "turn-1", + currentSliceId: 1, + currentDurationMs: 2_250, + currentUsage: { + outputTokens: 7, + cachedInputTokens: 2, + }, + messages: [], + loadedSkillNames: [], + errorMessage: "timed out again", + logContext: { + modelId: "test-model", + }, + }); + + const checkpoint = await getAgentTurnSessionCheckpoint( + "conversation-1", + "turn-1", + ); + expect(checkpoint).toMatchObject({ + cumulativeDurationMs: 3_750, + cumulativeUsage: { + inputTokens: 10, + outputTokens: 10, + cachedInputTokens: 2, + }, + }); + }); });