From 5440d7e1c191e9fb9472728ccf8037c645e1951a Mon Sep 17 00:00:00 2001
From: "sentry-junior[bot]"
 <264270552+sentry-junior[bot]@users.noreply.github.com>
Date: Wed, 20 May 2026 11:56:06 -0700
Subject: [PATCH 1/2] fix(tools): introduce ToolInputError to classify expected
 tool failures (#383)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary

The tool error handler (`handleToolExecutionError`) sent all non-MCP
tool errors to Sentry via `logException`, even when the error was caused
by invalid model input — missing files, bad regex, ambiguous edits, path
traversal. This created noisy alerts for expected conditions like
[JUNIOR-2Q](https://sentry.sentry.io/issues/7495355493/).

This introduces `ToolInputError` as the single classification mechanism
for expected model/user-caused tool failures. Domain code throws
`ToolInputError` at the source where it knows the failure is
input-caused, not a system error. The global error handler respects
this: no `logException`, no Sentry noise.

## Architecture

```
Domain code throws ToolInputError  → handleToolExecutionError → logWarn only → rethrow → Pi tool_error
Domain code throws Error (system)  → handleToolExecutionError → logException (Sentry) → rethrow
MCP tool returns isError           → McpToolError             → logWarn only → rethrow → Pi tool_error
```

No ad-hoc error-type catches at the executor level. The domain function
that creates the error owns the classification.

## Changes

**New:**
- `tool-input-error.ts` — `ToolInputError` class

**Error handler:**
- `tool-error-handler.ts` — skip `logException` for `ToolInputError`;
replace `getMcpAwareErrorType` with `getToolErrorType()` covering both
MCP and input error classification

**Domain sources that now throw `ToolInputError`:**
- `edit-file.ts` — ENOENT on `fs.readFile` → `ToolInputError("File not
found: ...")`
- `text-edits.ts` — all validation (not found, ambiguous, overlap,
no-op, empty oldText, bad newText)
- `file-utils.ts` — workspace path traversal
- `list-dir.ts` — not-a-directory
- `grep.ts` — invalid regex pattern
- `sandbox.ts` — missing required arguments (path, edits, pattern,
command)

**What stays as system errors (still reports to Sentry):**
- Sandbox API failures (`throwSandboxOperationError`)
- Slack API errors
- Network/connection failures
- Non-ENOENT filesystem errors
- `unsupported sandbox tool` (config error)

## Tests

- **`tool-error-handler.test.ts`** (new) — system errors call
`logException`; `ToolInputError` and `McpToolError` do not
- **`file-tools.test.ts`** — `ToolInputError` for ambiguous edits, text
not found, path traversal, invalid regex, file-as-directory
- **`sandbox-executor.test.ts`** — editFile on missing path throws
`ToolInputError`

## Verification

- `pnpm typecheck` ✅
- 51/51 tests passing ✅
- Lint + prettier ✅

Fixes [JUNIOR-2Q](https://sentry.sentry.io/issues/7495355493/)

Requested-By: David Cramer <dcramer>

Co-authored-by: Junior <junior[bot]@sentry.io>
Co-authored-by: Claude (anthropic/claude-opus-4.6) <noreply@anthropic.com>
---
 packages/junior/src/chat/sandbox/sandbox.ts   | 15 +++--
 .../tools/execution/tool-error-handler.ts     | 22 ++++---
 .../chat/tools/execution/tool-input-error.ts  |  7 ++
 .../src/chat/tools/sandbox/edit-file.ts       | 14 +++-
 .../src/chat/tools/sandbox/file-utils.ts      |  3 +-
 .../junior/src/chat/tools/sandbox/grep.ts     | 14 +++-
 .../junior/src/chat/tools/sandbox/list-dir.ts |  3 +-
 .../src/chat/tools/sandbox/text-edits.ts      | 15 +++--
 .../tests/unit/misc/sandbox-executor.test.ts  | 29 +++++++++
 .../execution/tool-error-handler.test.ts      | 64 +++++++++++++++++++
 .../unit/tools/sandbox/file-tools.test.ts     | 61 ++++++++++++++++++
 11 files changed, 219 insertions(+), 28 deletions(-)
 create mode 100644 packages/junior/src/chat/tools/execution/tool-input-error.ts
 create mode 100644 packages/junior/tests/unit/tools/execution/tool-error-handler.test.ts
diff --git a/packages/junior/src/chat/sandbox/sandbox.ts b/packages/junior/src/chat/sandbox/sandbox.ts
index 5231188a..c9f518a1 100644
--- a/packages/junior/src/chat/sandbox/sandbox.ts
+++ b/packages/junior/src/chat/sandbox/sandbox.ts
@@ -36,6 +36,7 @@ import {
   missingFileResult,
   sliceFileContent,
 } from "@/chat/tools/sandbox/read-file";
+import { ToolInputError } from "@/chat/tools/execution/tool-input-error";
 
 // Spec: specs/security-policy.md (sandbox isolation, network policy, credential lifecycle)
 // Spec: specs/logging/tracing-spec.md (required sandbox span semantics)
@@ -248,7 +249,7 @@ export function createSandboxExecutor(options?: {
   ): Promise<SandboxExecutionEnvelope<T>> => {
     const filePath = String(rawInput.path ?? "").trim();
     if (!filePath) {
-      throw new Error("path is required");
+      throw new ToolInputError("path is required");
     }
     const offset = positiveInteger(rawInput.offset);
     const limit = positiveInteger(rawInput.limit);
@@ -330,7 +331,7 @@ export function createSandboxExecutor(options?: {
   ): Promise<SandboxExecutionEnvelope<T>> => {
     const filePath = String(rawInput.path ?? "").trim();
     if (!filePath) {
-      throw new Error("path is required");
+      throw new ToolInputError("path is required");
     }
 
     const content = String(rawInput.content ?? "");
@@ -370,10 +371,10 @@ export function createSandboxExecutor(options?: {
   ): Promise<SandboxExecutionEnvelope<T>> => {
     const filePath = String(rawInput.path ?? "").trim();
     if (!filePath) {
-      throw new Error("path is required");
+      throw new ToolInputError("path is required");
     }
     if (!Array.isArray(rawInput.edits)) {
-      throw new Error("edits is required");
+      throw new ToolInputError("edits is required");
     }
 
     logSandboxBootRequest("tool.editFile", {
@@ -406,7 +407,7 @@ export function createSandboxExecutor(options?: {
   ): Promise<SandboxExecutionEnvelope<T>> => {
     const pattern = String(rawInput.pattern ?? "");
     if (!pattern) {
-      throw new Error("pattern is required");
+      throw new ToolInputError("pattern is required");
     }
 
     logSandboxBootRequest("tool.grep");
@@ -447,7 +448,7 @@ export function createSandboxExecutor(options?: {
   ): Promise<SandboxExecutionEnvelope<T>> => {
     const pattern = String(rawInput.pattern ?? "");
     if (!pattern) {
-      throw new Error("pattern is required");
+      throw new ToolInputError("pattern is required");
     }
 
     logSandboxBootRequest("tool.findFiles");
@@ -509,7 +510,7 @@ export function createSandboxExecutor(options?: {
 
     if (params.toolName === "bash") {
       if (!bashCommand) {
-        throw new Error("command is required");
+        throw new ToolInputError("command is required");
       }
       if (options?.runBashCustomCommand) {
         const custom = await options.runBashCustomCommand(bashCommand);
diff --git a/packages/junior/src/chat/tools/execution/tool-error-handler.ts b/packages/junior/src/chat/tools/execution/tool-error-handler.ts
index 96a1af45..4ce1d2f2 100644
--- a/packages/junior/src/chat/tools/execution/tool-error-handler.ts
+++ b/packages/junior/src/chat/tools/execution/tool-error-handler.ts
@@ -6,13 +6,17 @@ import {
   type LogContext,
 } from "@/chat/logging";
 import { GEN_AI_PROVIDER_NAME } from "@/chat/pi/client";
-import {
-  getMcpAwareErrorMessage,
-  getMcpAwareErrorType,
-  McpToolError,
-} from "@/chat/mcp/errors";
+import { getMcpAwareErrorMessage, McpToolError } from "@/chat/mcp/errors";
 import { PluginCredentialFailureError } from "@/chat/services/plugin-auth-orchestration";
 import { SlackActionError } from "@/chat/slack/client";
+import { ToolInputError } from "@/chat/tools/execution/tool-input-error";
+
+/** Classify tool errors into stable observability types. */
+function getToolErrorType(error: unknown): string {
+  if (error instanceof McpToolError) return "tool_error";
+  if (error instanceof ToolInputError) return "tool_input_error";
+  return error instanceof Error ? error.name : "tool_execution_error";
+}
 
 function getToolErrorAttributes(
   error: unknown,
@@ -40,7 +44,7 @@ export function handleToolExecutionError(
   shouldTrace: boolean,
   traceContext: LogContext,
 ): never {
-  const errorType = getMcpAwareErrorType(error, "tool_execution_error");
+  const errorType = getToolErrorType(error);
   const errorMessage = getMcpAwareErrorMessage(error);
   setSpanAttributes({
     "error.type": errorType,
@@ -84,8 +88,10 @@ export function handleToolExecutionError(
     );
   }
 
-  // MCP tool errors are expected outcomes — log as warnings, not Sentry exceptions.
-  if (!(error instanceof McpToolError)) {
+  // Expected tool failures (MCP errors, model input errors) are not Sentry exceptions.
+  const isExpectedToolFailure =
+    error instanceof McpToolError || error instanceof ToolInputError;
+  if (!isExpectedToolFailure) {
     logException(
       error,
       "agent_tool_call_failed",
diff --git a/packages/junior/src/chat/tools/execution/tool-input-error.ts b/packages/junior/src/chat/tools/execution/tool-input-error.ts
new file mode 100644
index 00000000..511b760e
--- /dev/null
+++ b/packages/junior/src/chat/tools/execution/tool-input-error.ts
@@ -0,0 +1,7 @@
+/** Thrown when a tool fails due to invalid model/user input, not a system error. */
+export class ToolInputError extends Error {
+  constructor(message: string, options?: { cause?: unknown }) {
+    super(message, options);
+    this.name = "ToolInputError";
+  }
+}
diff --git a/packages/junior/src/chat/tools/sandbox/edit-file.ts b/packages/junior/src/chat/tools/sandbox/edit-file.ts
index 8df9d936..fdd2b2fd 100644
--- a/packages/junior/src/chat/tools/sandbox/edit-file.ts
+++ b/packages/junior/src/chat/tools/sandbox/edit-file.ts
@@ -1,8 +1,10 @@
 import {
+  isMissingPathError,
   normalizeToLf,
   resolveWorkspacePath,
   type SandboxFileSystem,
 } from "@/chat/tools/sandbox/file-utils";
+import { ToolInputError } from "@/chat/tools/execution/tool-input-error";
 import {
   buildCompactDiff,
   detectLineEnding,
@@ -45,7 +47,17 @@ export async function editFile(params: {
   path: string;
 }): Promise<EditFileResult> {
   const filePath = resolveWorkspacePath(params.path);
-  const rawContent = await params.fs.readFile(filePath, { encoding: "utf8" });
+  let rawContent: string;
+  try {
+    rawContent = await params.fs.readFile(filePath, { encoding: "utf8" });
+  } catch (error) {
+    if (isMissingPathError(error)) {
+      throw new ToolInputError(`File not found: ${params.path}`, {
+        cause: error,
+      });
+    }
+    throw error;
+  }
   const { bom, text } = stripBom(rawContent);
   const lineEnding = detectLineEnding(text);
   const normalizedContent = normalizeToLf(text);
diff --git a/packages/junior/src/chat/tools/sandbox/file-utils.ts b/packages/junior/src/chat/tools/sandbox/file-utils.ts
index dd2c51a3..a41561d3 100644
--- a/packages/junior/src/chat/tools/sandbox/file-utils.ts
+++ b/packages/junior/src/chat/tools/sandbox/file-utils.ts
@@ -1,6 +1,7 @@
 import path from "node:path";
 import { SANDBOX_WORKSPACE_ROOT } from "@/chat/sandbox/paths";
 import type { SandboxFileSystem } from "@/chat/sandbox/workspace";
+import { ToolInputError } from "@/chat/tools/execution/tool-input-error";
 
 export type { SandboxFileSystem };
 
@@ -152,7 +153,7 @@ export function resolveWorkspacePath(
     normalized !== SANDBOX_WORKSPACE_ROOT &&
     !normalized.startsWith(`${SANDBOX_WORKSPACE_ROOT}/`)
   ) {
-    throw new Error(
+    throw new ToolInputError(
       `Path must stay within ${SANDBOX_WORKSPACE_ROOT}: ${requested}`,
     );
   }
diff --git a/packages/junior/src/chat/tools/sandbox/grep.ts b/packages/junior/src/chat/tools/sandbox/grep.ts
index 0221f9fe..de4b8ba6 100644
--- a/packages/junior/src/chat/tools/sandbox/grep.ts
+++ b/packages/junior/src/chat/tools/sandbox/grep.ts
@@ -13,6 +13,7 @@ import {
   type TextSearchResultDetails,
 } from "@/chat/tools/sandbox/file-utils";
 import { tool } from "@/chat/tools/definition";
+import { ToolInputError } from "@/chat/tools/execution/tool-input-error";
 
 const DEFAULT_GREP_LIMIT = 100;
 const MAX_GREP_LINE_CHARS = 500;
@@ -70,9 +71,16 @@ export async function grepFiles(params: {
   const root = resolveWorkspacePath(params.path);
   const limit = positiveInteger(params.limit) ?? DEFAULT_GREP_LIMIT;
   const context = positiveInteger(params.context) ?? 0;
-  const regex = params.literal
-    ? undefined
-    : new RegExp(params.pattern, params.ignoreCase ? "i" : "");
+  let regex: RegExp | undefined;
+  if (!params.literal) {
+    try {
+      regex = new RegExp(params.pattern, params.ignoreCase ? "i" : "");
+    } catch (error) {
+      throw new ToolInputError(`Invalid regex pattern: ${params.pattern}`, {
+        cause: error,
+      });
+    }
+  }
   const { files, missingPath, missingRoot } = await collectFiles({
     fs: params.fs,
     root,
diff --git a/packages/junior/src/chat/tools/sandbox/list-dir.ts b/packages/junior/src/chat/tools/sandbox/list-dir.ts
index 14d89dba..7383cb31 100644
--- a/packages/junior/src/chat/tools/sandbox/list-dir.ts
+++ b/packages/junior/src/chat/tools/sandbox/list-dir.ts
@@ -11,6 +11,7 @@ import {
   type TextSearchResultDetails,
 } from "@/chat/tools/sandbox/file-utils";
 import { tool } from "@/chat/tools/definition";
+import { ToolInputError } from "@/chat/tools/execution/tool-input-error";
 
 const DEFAULT_LIST_LIMIT = 500;
 
@@ -39,7 +40,7 @@ export async function listDir(params: {
     throw error;
   }
   if (!stat.isDirectory()) {
-    throw new Error(`Not a directory: ${params.path ?? "."}`);
+    throw new ToolInputError(`Not a directory: ${params.path ?? "."}`);
   }
 
   let entries: string[];
diff --git a/packages/junior/src/chat/tools/sandbox/text-edits.ts b/packages/junior/src/chat/tools/sandbox/text-edits.ts
index 09589b99..64ec1a6e 100644
--- a/packages/junior/src/chat/tools/sandbox/text-edits.ts
+++ b/packages/junior/src/chat/tools/sandbox/text-edits.ts
@@ -1,4 +1,5 @@
 import { normalizeToLf } from "@/chat/tools/sandbox/file-utils";
+import { ToolInputError } from "@/chat/tools/execution/tool-input-error";
 
 export interface TextReplacement {
   oldText: string;
@@ -180,17 +181,17 @@ export function validateAndApplyTextEdits(
   targetName: string,
 ): { baseContent: string; newContent: string } {
   if (!Array.isArray(edits) || edits.length === 0) {
-    throw new Error(`${targetName} requires at least one edit.`);
+    throw new ToolInputError(`${targetName} requires at least one edit.`);
   }
 
   const normalizedEdits = edits.map((edit, index) => {
     if (typeof edit.oldText !== "string" || edit.oldText.length === 0) {
-      throw new Error(
+      throw new ToolInputError(
         `edits[${index}].oldText must not be empty in ${targetName}.`,
       );
     }
     if (typeof edit.newText !== "string") {
-      throw new Error(
+      throw new ToolInputError(
         `edits[${index}].newText must be a string in ${targetName}.`,
       );
     }
@@ -205,13 +206,13 @@ export function validateAndApplyTextEdits(
     const edit = normalizedEdits[index];
     const matchIndex = content.indexOf(edit.oldText);
     if (matchIndex === -1) {
-      throw new Error(
+      throw new ToolInputError(
         `Could not find edits[${index}] in ${targetName}. oldText must match exactly including whitespace and newlines.`,
       );
     }
     const occurrences = countOccurrences(content, edit.oldText);
     if (occurrences > 1) {
-      throw new Error(
+      throw new ToolInputError(
         `Found ${occurrences} occurrences of edits[${index}] in ${targetName}. Each oldText must be unique.`,
       );
     }
@@ -228,7 +229,7 @@ export function validateAndApplyTextEdits(
     const previous = matchedEdits[index - 1];
     const current = matchedEdits[index];
     if (previous.matchIndex + previous.matchLength > current.matchIndex) {
-      throw new Error(
+      throw new ToolInputError(
         `edits[${previous.editIndex}] and edits[${current.editIndex}] overlap in ${targetName}. Merge overlapping replacements into one edit.`,
       );
     }
@@ -244,7 +245,7 @@ export function validateAndApplyTextEdits(
   }
 
   if (newContent === content) {
-    throw new Error(`No changes made to ${targetName}.`);
+    throw new ToolInputError(`No changes made to ${targetName}.`);
   }
 
   return { baseContent: content, newContent };
diff --git a/packages/junior/tests/unit/misc/sandbox-executor.test.ts b/packages/junior/tests/unit/misc/sandbox-executor.test.ts
index 6544d3c4..0ea0d5cd 100644
--- a/packages/junior/tests/unit/misc/sandbox-executor.test.ts
+++ b/packages/junior/tests/unit/misc/sandbox-executor.test.ts
@@ -1146,6 +1146,35 @@ describe("createSandboxExecutor", () => {
     });
   });
 
+  it("throws ToolInputError when editFile targets a missing path", async () => {
+    const sandbox = makeSandbox("sbx_missing_edit_file");
+    sandbox.fs.readFile.mockRejectedValue(
+      Object.assign(new Error("ENOENT: no such file or directory"), {
+        code: "ENOENT",
+      }),
+    );
+    sandboxCreateMock.mockResolvedValue(sandbox);
+    vi.mocked(createBashTool).mockResolvedValue({
+      tools: {
+        readFile: { execute: vi.fn(async () => ({ content: "" })) },
+        writeFile: { execute: vi.fn(async () => ({ success: true })) },
+      },
+    } as never);
+
+    const executor = createSandboxExecutor();
+    executor.configureSkills([]);
+
+    await expect(
+      executor.execute({
+        toolName: "editFile",
+        input: {
+          path: "missing.ts",
+          edits: [{ oldText: "a", newText: "b" }],
+        },
+      }),
+    ).rejects.toThrow("File not found: missing.ts");
+  });
+
   it("keeps sandbox API failures as readFile errors", async () => {
     const sandbox = makeSandbox("sbx_read_file_api_error");
     sandboxCreateMock.mockResolvedValue(sandbox);
diff --git a/packages/junior/tests/unit/tools/execution/tool-error-handler.test.ts b/packages/junior/tests/unit/tools/execution/tool-error-handler.test.ts
new file mode 100644
index 00000000..d43c7b59
--- /dev/null
+++ b/packages/junior/tests/unit/tools/execution/tool-error-handler.test.ts
@@ -0,0 +1,64 @@
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import { ToolInputError } from "@/chat/tools/execution/tool-input-error";
+
+const logExceptionMock = vi.fn();
+const logInfoMock = vi.fn();
+const logWarnMock = vi.fn();
+const setSpanAttributesMock = vi.fn();
+
+vi.mock("@/chat/logging", () => ({
+  logException: (...args: unknown[]) => logExceptionMock(...args),
+  logInfo: (...args: unknown[]) => logInfoMock(...args),
+  logWarn: (...args: unknown[]) => logWarnMock(...args),
+  setSpanAttributes: (...args: unknown[]) => setSpanAttributesMock(...args),
+}));
+
+vi.mock("@/chat/pi/client", () => ({
+  GEN_AI_PROVIDER_NAME: "test-provider",
+}));
+
+import { handleToolExecutionError } from "@/chat/tools/execution/tool-error-handler";
+import { McpToolError } from "@/chat/mcp/errors";
+
+describe("handleToolExecutionError", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("reports system errors to Sentry via logException", () => {
+    const error = new Error("sandbox API failed");
+    expect(() =>
+      handleToolExecutionError(error, "editFile", "call_1", true, {}),
+    ).toThrow(error);
+
+    expect(logExceptionMock).toHaveBeenCalledTimes(1);
+    expect(setSpanAttributesMock).toHaveBeenCalledWith(
+      expect.objectContaining({ "error.type": "Error" }),
+    );
+  });
+
+  it("does not report ToolInputError to Sentry", () => {
+    const error = new ToolInputError("Could not find edits[0] in file.ts");
+    expect(() =>
+      handleToolExecutionError(error, "editFile", "call_1", true, {}),
+    ).toThrow(error);
+
+    expect(logExceptionMock).not.toHaveBeenCalled();
+    expect(logWarnMock).toHaveBeenCalledTimes(1);
+    expect(setSpanAttributesMock).toHaveBeenCalledWith(
+      expect.objectContaining({ "error.type": "tool_input_error" }),
+    );
+  });
+
+  it("does not report McpToolError to Sentry", () => {
+    const error = new McpToolError("mcp tool failed");
+    expect(() =>
+      handleToolExecutionError(error, "mcpTool", "call_1", true, {}),
+    ).toThrow(error);
+
+    expect(logExceptionMock).not.toHaveBeenCalled();
+    expect(setSpanAttributesMock).toHaveBeenCalledWith(
+      expect.objectContaining({ "error.type": "tool_error" }),
+    );
+  });
+});
diff --git a/packages/junior/tests/unit/tools/sandbox/file-tools.test.ts b/packages/junior/tests/unit/tools/sandbox/file-tools.test.ts
index 734b77d6..0123a2e0 100644
--- a/packages/junior/tests/unit/tools/sandbox/file-tools.test.ts
+++ b/packages/junior/tests/unit/tools/sandbox/file-tools.test.ts
@@ -10,6 +10,7 @@ import { grepFiles } from "@/chat/tools/sandbox/grep";
 import { listDir } from "@/chat/tools/sandbox/list-dir";
 import { sliceFileContent } from "@/chat/tools/sandbox/read-file";
 import type { SandboxFileSystem } from "@/chat/tools/sandbox/file-utils";
+import { ToolInputError } from "@/chat/tools/execution/tool-input-error";
 
 function workspacePath(filePath: string): string {
   return path.posix.join(SANDBOX_WORKSPACE_ROOT, filePath);
@@ -299,4 +300,64 @@ describe("sandbox file tools", () => {
       details: { ok: true, path: "src", truncated: false },
     });
   });
+
+  it("throws ToolInputError for ambiguous edits", async () => {
+    const memory = createMemoryFs({
+      "src/app.ts": "same\nsame\n",
+    });
+
+    await expect(
+      editFile({
+        fs: memory.fs,
+        path: "src/app.ts",
+        edits: [{ oldText: "same", newText: "changed" }],
+      }),
+    ).rejects.toThrow(ToolInputError);
+  });
+
+  it("throws ToolInputError for old text not found", async () => {
+    const memory = createMemoryFs({
+      "src/app.ts": "hello world\n",
+    });
+
+    await expect(
+      editFile({
+        fs: memory.fs,
+        path: "src/app.ts",
+        edits: [{ oldText: "missing text", newText: "new" }],
+      }),
+    ).rejects.toThrow(ToolInputError);
+  });
+
+  it("throws ToolInputError for workspace path traversal", async () => {
+    const memory = createMemoryFs({});
+
+    await expect(
+      listDir({ fs: memory.fs, path: "../../../etc" }),
+    ).rejects.toThrow(ToolInputError);
+  });
+
+  it("throws ToolInputError for invalid grep regex", async () => {
+    const memory = createMemoryFs({
+      "src/app.ts": "content\n",
+    });
+
+    await expect(
+      grepFiles({
+        fs: memory.fs,
+        path: "src",
+        pattern: "[invalid",
+      }),
+    ).rejects.toThrow(ToolInputError);
+  });
+
+  it("throws ToolInputError when listDir targets a file", async () => {
+    const memory = createMemoryFs({
+      "src/app.ts": "content\n",
+    });
+
+    await expect(
+      listDir({ fs: memory.fs, path: "src/app.ts" }),
+    ).rejects.toThrow(ToolInputError);
+  });
 });

From af372d1acf8d052e51392846e4938d9a5adacd0d Mon Sep 17 00:00:00 2001
From: Omri SirComp <omribz156@gmail.com>
Date: Wed, 20 May 2026 22:10:56 +0300
Subject: [PATCH 2/2] fix(junior): keep resume diagnostics cumulative

Track cumulative duration and token usage across paused turn checkpoints, then use those totals when rendering timeout-resume Slack footers. Timeout continuation notices also now use the normal Slack footer shape so the conversation ID stays traceable.

Co-Authored-By: GPT-5 Codex <codex@openai.com>
---
 packages/junior/src/chat/respond.ts           | 41 ++++----
 .../junior/src/chat/runtime/reply-executor.ts | 29 +++++-
 .../junior/src/chat/runtime/slack-resume.ts   | 46 ++++++++-
 .../src/chat/services/turn-checkpoint.ts      | 48 ++++++++++
 .../src/chat/state/turn-session-store.ts      | 51 ++++++++++
 packages/junior/src/chat/usage.ts             | 28 ++++++
 .../tests/unit/handlers/oauth-resume.test.ts  | 96 +++++++++++++++++++
 .../unit/services/turn-checkpoint.test.ts     | 51 ++++++++++
 8 files changed, 364 insertions(+), 26 deletions(-)

diff --git a/packages/junior/src/chat/respond.ts b/packages/junior/src/chat/respond.ts
index 77603a76..3b7e0d95 100644
--- a/packages/junior/src/chat/respond.ts
+++ b/packages/junior/src/chat/respond.ts
@@ -77,7 +77,7 @@ import {
   toAgentThinkingLevel,
   type TurnThinkingSelection,
 } from "@/chat/services/turn-thinking-level";
-import type { AgentTurnUsage } from "@/chat/usage";
+import { hasAgentTurnUsage, type AgentTurnUsage } from "@/chat/usage";
 import {
   loadTurnCheckpoint,
   persistCompletedCheckpoint,
@@ -179,6 +179,16 @@ function trimRouterAttachmentText(text: string): string {
     : `${normalized.slice(0, MAX_ROUTER_ATTACHMENT_PREVIEW_CHARS)}...`;
 }
 
+function extractSliceUsage(
+  messages: PiMessage[],
+  beforeMessageCount: number,
+): AgentTurnUsage | undefined {
+  const usage = extractGenAiUsageSummary(
+    ...messages.slice(beforeMessageCount).filter(isAssistantMessage),
+  );
+  return hasAgentTurnUsage(usage) ? usage : undefined;
+}
+
 function supportsRouterTextPreview(mediaType: string): boolean {
   const baseMediaType = mediaType.split(";", 1)[0]?.trim().toLowerCase();
   if (!baseMediaType) {
@@ -1033,9 +1043,7 @@ export async function generateAssistantReply(
             agent.state,
             ...outputMessages,
           );
-          turnUsage = Object.values(usageSummary).some(
-            (value) => value !== undefined,
-          )
+          turnUsage = hasAgentTurnUsage(usageSummary)
             ? usageSummary
             : undefined;
           setSpanAttributes({
@@ -1071,6 +1079,8 @@ export async function generateAssistantReply(
     ) {
       await persistCompletedCheckpoint({
         conversationId: sessionConversationId,
+        currentDurationMs: Date.now() - replyStartedAtMs,
+        currentUsage: turnUsage,
         sessionId,
         sliceId: currentSliceId,
         allMessages: agent.state.messages,
@@ -1103,10 +1113,15 @@ export async function generateAssistantReply(
     });
   } catch (error) {
     if (timedOut && timeoutResumeConversationId && timeoutResumeSessionId) {
+      turnUsage =
+        turnUsage ??
+        extractSliceUsage(timeoutResumeMessages, beforeMessageCount);
       const checkpoint = await persistTimeoutCheckpoint({
         conversationId: timeoutResumeConversationId,
         sessionId: timeoutResumeSessionId,
         currentSliceId: timeoutResumeSliceId,
+        currentDurationMs: Date.now() - replyStartedAtMs,
+        currentUsage: turnUsage,
         messages: timeoutResumeMessages,
         loadedSkillNames: loadedSkillNamesForResume,
         errorMessage: error instanceof Error ? error.message : String(error),
@@ -1140,25 +1155,17 @@ export async function generateAssistantReply(
       timeoutResumeSessionId
     ) {
       if (!turnUsage && timeoutResumeMessages.length > 0) {
-        // Match the canonical slice-scoped extraction: sum usage from new
-        // assistant messages produced during this slice, not the full
-        // message history (which may include prior slices whose usage was
-        // already reported in earlier footers).
-        const fallbackUsage = extractGenAiUsageSummary(
-          ...timeoutResumeMessages
-            .slice(beforeMessageCount)
-            .filter(isAssistantMessage),
+        turnUsage = extractSliceUsage(
+          timeoutResumeMessages,
+          beforeMessageCount,
         );
-        turnUsage = Object.values(fallbackUsage).some(
-          (value) => value !== undefined,
-        )
-          ? fallbackUsage
-          : undefined;
       }
       const nextSliceId = await persistAuthPauseCheckpoint({
         conversationId: timeoutResumeConversationId,
         sessionId: timeoutResumeSessionId,
         currentSliceId: timeoutResumeSliceId,
+        currentDurationMs: Date.now() - replyStartedAtMs,
+        currentUsage: turnUsage,
         messages: timeoutResumeMessages,
         loadedSkillNames: loadedSkillNamesForResume,
         errorMessage: error.message,
diff --git a/packages/junior/src/chat/runtime/reply-executor.ts b/packages/junior/src/chat/runtime/reply-executor.ts
index d60275d7..b5e85f0e 100644
--- a/packages/junior/src/chat/runtime/reply-executor.ts
+++ b/packages/junior/src/chat/runtime/reply-executor.ts
@@ -49,7 +49,10 @@ import {
   isVisionEnabled,
 } from "@/chat/services/vision-context";
 import { createSlackAdapterAssistantStatusSession } from "@/chat/slack/assistant-thread/status";
-import { buildSlackReplyFooter } from "@/chat/slack/footer";
+import {
+  buildSlackReplyBlocks,
+  buildSlackReplyFooter,
+} from "@/chat/slack/footer";
 import { maybeUpdateAssistantTitle } from "@/chat/slack/assistant-thread/title";
 import { appendSlackLegacyAttachmentText } from "@/chat/slack/legacy-attachments";
 import { type ThreadArtifactsState } from "@/chat/state/artifacts";
@@ -61,7 +64,7 @@ import { buildDeterministicTurnId } from "@/chat/runtime/turn";
 import { markTurnCompleted, markTurnFailed } from "@/chat/runtime/turn";
 import { startActiveTurn } from "@/chat/runtime/turn";
 import { isRedundantReactionAckText } from "@/chat/services/reply-delivery-plan";
-import { deleteSlackMessage } from "@/chat/slack/outbound";
+import { deleteSlackMessage, postSlackMessage } from "@/chat/slack/outbound";
 import {
   finalizeFailedTurnReply,
   getAgentTurnDiagnosticsAttributes,
@@ -204,9 +207,25 @@ export function createReplyToThread(deps: ReplyExecutorDeps) {
         const postTurnContinuationNotice = async (): Promise<void> => {
           try {
             await beforeFirstResponsePost();
-            await thread.post(
-              buildSlackOutputMessage(buildTurnContinuationResponse()),
-            );
+            const text = buildTurnContinuationResponse();
+            const footer = buildSlackReplyFooter({ conversationId });
+            const shouldUseSlackFooter =
+              Boolean(footer) &&
+              Boolean(channelId && threadTs) &&
+              (thread.adapter as { name?: string } | undefined)?.name ===
+                "slack";
+            if (shouldUseSlackFooter && channelId && threadTs) {
+              const blocks = buildSlackReplyBlocks(text, footer);
+              await postSlackMessage({
+                channelId,
+                threadTs,
+                text,
+                ...(blocks ? { blocks } : {}),
+              });
+              return;
+            }
+
+            await thread.post(buildSlackOutputMessage(text));
           } catch (error) {
             logException(
               error,
diff --git a/packages/junior/src/chat/runtime/slack-resume.ts b/packages/junior/src/chat/runtime/slack-resume.ts
index 27c29731..b7d5ba28 100644
--- a/packages/junior/src/chat/runtime/slack-resume.ts
+++ b/packages/junior/src/chat/runtime/slack-resume.ts
@@ -21,13 +21,18 @@ import {
   createSlackWebApiAssistantStatusSession,
   type AssistantStatusSession,
 } from "@/chat/slack/assistant-thread/status";
-import { buildSlackReplyFooter } from "@/chat/slack/footer";
+import {
+  buildSlackReplyBlocks,
+  buildSlackReplyFooter,
+} from "@/chat/slack/footer";
 import {
   planSlackReplyPosts,
   postSlackApiReplyPosts,
 } from "@/chat/slack/reply";
 import { postSlackMessage as postSlackApiMessage } from "@/chat/slack/outbound";
 import { getStateAdapter } from "@/chat/state/adapter";
+import { getAgentTurnSessionCheckpoint } from "@/chat/state/turn-session-store";
+import { addAgentTurnUsage } from "@/chat/usage";
 import {
   startSlackProcessingReactionForMessage,
   type ProcessingReactionSession,
@@ -172,11 +177,18 @@ async function postTurnContinuationNoticeBestEffort(args: {
   lockKey: string;
   resumeArgs: ResumeSlackTurnArgs;
 }): Promise<void> {
+  const text = buildTurnContinuationResponse();
+  const footer = buildSlackReplyFooter({
+    conversationId:
+      args.resumeArgs.replyContext?.correlation?.conversationId ?? args.lockKey,
+  });
+  const blocks = buildSlackReplyBlocks(text, footer);
   try {
     await postSlackApiMessage({
       channelId: args.resumeArgs.channelId,
       threadTs: args.resumeArgs.threadTs,
-      text: buildTurnContinuationResponse(),
+      text,
+      ...(blocks ? { blocks } : {}),
     });
   } catch (error) {
     logException(
@@ -258,6 +270,19 @@ function createResumeReplyContext(
   };
 }
 
+async function getResumeCheckpoint(args: {
+  conversationId?: string;
+  sessionId?: string;
+}) {
+  if (!args.conversationId || !args.sessionId) {
+    return undefined;
+  }
+  return await getAgentTurnSessionCheckpoint(
+    args.conversationId,
+    args.sessionId,
+  );
+}
+
 /**
  * Resume a paused Slack turn under the normal thread lock.
  *
@@ -311,6 +336,10 @@ export async function resumeSlackTurn(args: ResumeSlackTurnArgs) {
 
     const generateReply = args.generateReply ?? generateAssistantReply;
     const replyContext = createResumeReplyContext(args, status);
+    const priorCheckpoint = await getResumeCheckpoint({
+      conversationId: replyContext.correlation?.conversationId,
+      sessionId: replyContext.correlation?.turnId,
+    });
     const replyPromise = generateReply(args.messageText, replyContext);
     const replyTimeoutMs = resolveReplyTimeoutMs(args.replyTimeoutMs);
     let reply =
@@ -339,9 +368,18 @@ export async function resumeSlackTurn(args: ResumeSlackTurnArgs) {
     await status.stop();
     const footer = buildSlackReplyFooter({
       conversationId: args.replyContext?.correlation?.conversationId ?? lockKey,
-      durationMs: reply.diagnostics.durationMs,
+      durationMs:
+        typeof priorCheckpoint?.cumulativeDurationMs === "number" ||
+        typeof reply.diagnostics.durationMs === "number"
+          ? (priorCheckpoint?.cumulativeDurationMs ?? 0) +
+            (reply.diagnostics.durationMs ?? 0)
+          : undefined,
       thinkingLevel: reply.diagnostics.thinkingLevel,
-      usage: reply.diagnostics.usage,
+      usage:
+        addAgentTurnUsage(
+          priorCheckpoint?.cumulativeUsage,
+          reply.diagnostics.usage,
+        ) ?? reply.diagnostics.usage,
     });
     await postSlackApiReplyPosts({
       channelId: args.channelId,
diff --git a/packages/junior/src/chat/services/turn-checkpoint.ts b/packages/junior/src/chat/services/turn-checkpoint.ts
index 6b3f42d2..21e02581 100644
--- a/packages/junior/src/chat/services/turn-checkpoint.ts
+++ b/packages/junior/src/chat/services/turn-checkpoint.ts
@@ -6,6 +6,7 @@ import {
 import { logException } from "@/chat/logging";
 import type { PiMessage } from "@/chat/pi/messages";
 import { trimTrailingAssistantMessages } from "@/chat/respond-helpers";
+import { addAgentTurnUsage, type AgentTurnUsage } from "@/chat/usage";
 
 export interface TurnCheckpointContext {
   conversationId?: string;
@@ -19,6 +20,19 @@ export interface TurnCheckpointState {
   existingCheckpoint?: AgentTurnSessionCheckpoint;
 }
 
+function addDurationMs(
+  prior: number | undefined,
+  current: number | undefined,
+): number | undefined {
+  const total = [prior, current].reduce<number | undefined>((sum, value) => {
+    if (typeof value !== "number" || !Number.isFinite(value)) {
+      return sum;
+    }
+    return (sum ?? 0) + Math.max(0, Math.floor(value));
+  }, undefined);
+  return total;
+}
+
 /** Load turn checkpoint state for a conversation/session pair. */
 export async function loadTurnCheckpoint(
   ctx: TurnCheckpointContext,
@@ -46,13 +60,27 @@ export async function loadTurnCheckpoint(
 /** Persist a completed turn checkpoint. */
 export async function persistCompletedCheckpoint(args: {
   conversationId: string;
+  currentDurationMs?: number;
+  currentUsage?: AgentTurnUsage;
   sessionId: string;
   sliceId: number;
   allMessages: PiMessage[];
   loadedSkillNames: string[];
 }): Promise<void> {
+  const latestCheckpoint = await getAgentTurnSessionCheckpoint(
+    args.conversationId,
+    args.sessionId,
+  );
   await upsertAgentTurnSessionCheckpoint({
     conversationId: args.conversationId,
+    cumulativeDurationMs: addDurationMs(
+      latestCheckpoint?.cumulativeDurationMs,
+      args.currentDurationMs,
+    ),
+    cumulativeUsage: addAgentTurnUsage(
+      latestCheckpoint?.cumulativeUsage,
+      args.currentUsage,
+    ),
     sessionId: args.sessionId,
     sliceId: args.sliceId,
     state: "completed",
@@ -69,6 +97,8 @@ export async function persistAuthPauseCheckpoint(args: {
   conversationId: string;
   sessionId: string;
   currentSliceId: number;
+  currentDurationMs?: number;
+  currentUsage?: AgentTurnUsage;
   messages: PiMessage[];
   loadedSkillNames: string[];
   errorMessage: string;
@@ -94,6 +124,14 @@ export async function persistAuthPauseCheckpoint(args: {
     );
     await upsertAgentTurnSessionCheckpoint({
       conversationId: args.conversationId,
+      cumulativeDurationMs: addDurationMs(
+        latestCheckpoint?.cumulativeDurationMs,
+        args.currentDurationMs,
+      ),
+      cumulativeUsage: addAgentTurnUsage(
+        latestCheckpoint?.cumulativeUsage,
+        args.currentUsage,
+      ),
       sessionId: args.sessionId,
       sliceId: nextSliceId,
       state: "awaiting_resume",
@@ -135,6 +173,8 @@ export async function persistTimeoutCheckpoint(args: {
   conversationId: string;
   sessionId: string;
   currentSliceId: number;
+  currentDurationMs?: number;
+  currentUsage?: AgentTurnUsage;
   messages: PiMessage[];
   loadedSkillNames: string[];
   errorMessage: string;
@@ -161,6 +201,14 @@ export async function persistTimeoutCheckpoint(args: {
     );
     return await upsertAgentTurnSessionCheckpoint({
       conversationId: args.conversationId,
+      cumulativeDurationMs: addDurationMs(
+        latestCheckpoint?.cumulativeDurationMs,
+        args.currentDurationMs,
+      ),
+      cumulativeUsage: addAgentTurnUsage(
+        latestCheckpoint?.cumulativeUsage,
+        args.currentUsage,
+      ),
       sessionId: args.sessionId,
       sliceId: nextSliceId,
       state: "awaiting_resume",
diff --git a/packages/junior/src/chat/state/turn-session-store.ts b/packages/junior/src/chat/state/turn-session-store.ts
index 11f87eec..e936fd8d 100644
--- a/packages/junior/src/chat/state/turn-session-store.ts
+++ b/packages/junior/src/chat/state/turn-session-store.ts
@@ -1,5 +1,6 @@
 import { isRecord } from "@/chat/coerce";
 import type { PiMessage } from "@/chat/pi/messages";
+import type { AgentTurnUsage } from "@/chat/usage";
 import { getStateAdapter } from "./adapter";
 
 const AGENT_TURN_SESSION_PREFIX = "junior:agent_turn_session";
@@ -17,6 +18,8 @@ export type AgentTurnResumeReason = "timeout" | "auth";
 export interface AgentTurnSessionCheckpoint {
   checkpointVersion: number;
   conversationId: string;
+  cumulativeDurationMs?: number;
+  cumulativeUsage?: AgentTurnUsage;
   errorMessage?: string;
   loadedSkillNames?: string[];
   piMessages: PiMessage[];
@@ -35,6 +38,34 @@ function agentTurnSessionKey(
   return `${AGENT_TURN_SESSION_PREFIX}:${conversationId}:${sessionId}`;
 }
 
+function toFiniteNonNegativeNumber(value: unknown): number | undefined {
+  return typeof value === "number" && Number.isFinite(value)
+    ? Math.max(0, Math.floor(value))
+    : undefined;
+}
+
+function parseAgentTurnUsage(value: unknown): AgentTurnUsage | undefined {
+  if (!isRecord(value)) {
+    return undefined;
+  }
+
+  const usage: AgentTurnUsage = {};
+  for (const field of [
+    "inputTokens",
+    "outputTokens",
+    "cachedInputTokens",
+    "cacheCreationTokens",
+    "totalTokens",
+  ] as const) {
+    const count = toFiniteNonNegativeNumber(value[field]);
+    if (count !== undefined) {
+      usage[field] = count;
+    }
+  }
+
+  return Object.keys(usage).length > 0 ? usage : undefined;
+}
+
 function parseAgentTurnSessionCheckpoint(
   value: unknown,
 ): AgentTurnSessionCheckpoint | undefined {
@@ -64,6 +95,10 @@ function parseAgentTurnSessionCheckpoint(
     const sliceId = parsed.sliceId;
     const checkpointVersion = parsed.checkpointVersion;
     const updatedAtMs = parsed.updatedAtMs;
+    const cumulativeDurationMs = toFiniteNonNegativeNumber(
+      parsed.cumulativeDurationMs,
+    );
+    const cumulativeUsage = parseAgentTurnUsage(parsed.cumulativeUsage);
     if (
       typeof conversationId !== "string" ||
       typeof sessionId !== "string" ||
@@ -81,6 +116,8 @@ function parseAgentTurnSessionCheckpoint(
       sliceId,
       state: status,
       updatedAtMs,
+      ...(cumulativeDurationMs !== undefined ? { cumulativeDurationMs } : {}),
+      ...(cumulativeUsage ? { cumulativeUsage } : {}),
       piMessages: Array.isArray(parsed.piMessages)
         ? (parsed.piMessages as PiMessage[])
         : [],
@@ -120,6 +157,8 @@ export async function getAgentTurnSessionCheckpoint(
 
 export async function upsertAgentTurnSessionCheckpoint(args: {
   conversationId: string;
+  cumulativeDurationMs?: number;
+  cumulativeUsage?: AgentTurnUsage;
   sessionId: string;
   sliceId: number;
   state: AgentTurnSessionStatus;
@@ -145,6 +184,16 @@ export async function upsertAgentTurnSessionCheckpoint(args: {
     state: args.state,
     updatedAtMs: Date.now(),
     piMessages: Array.isArray(args.piMessages) ? args.piMessages : [],
+    ...(typeof args.cumulativeDurationMs === "number" &&
+    Number.isFinite(args.cumulativeDurationMs)
+      ? {
+          cumulativeDurationMs: Math.max(
+            0,
+            Math.floor(args.cumulativeDurationMs),
+          ),
+        }
+      : {}),
+    ...(args.cumulativeUsage ? { cumulativeUsage: args.cumulativeUsage } : {}),
     ...(Array.isArray(args.loadedSkillNames)
       ? {
           loadedSkillNames: args.loadedSkillNames.filter(
@@ -192,6 +241,8 @@ export async function supersedeAgentTurnSessionCheckpoint(args: {
     sliceId: existing.sliceId,
     state: "superseded",
     piMessages: existing.piMessages,
+    cumulativeDurationMs: existing.cumulativeDurationMs,
+    cumulativeUsage: existing.cumulativeUsage,
     loadedSkillNames: existing.loadedSkillNames,
     resumeReason: existing.resumeReason,
     resumedFromSliceId: existing.resumedFromSliceId,
diff --git a/packages/junior/src/chat/usage.ts b/packages/junior/src/chat/usage.ts
index c707dacb..53448d2a 100644
--- a/packages/junior/src/chat/usage.ts
+++ b/packages/junior/src/chat/usage.ts
@@ -18,3 +18,31 @@ export interface AgentTurnUsage {
   /** Provider-reported total. May not equal the sum of individual counters across providers. */
   totalTokens?: number;
 }
+
+/** Return whether any token counter is present on a usage record. */
+export function hasAgentTurnUsage(
+  usage: AgentTurnUsage | undefined,
+): usage is AgentTurnUsage {
+  return Boolean(
+    usage &&
+    Object.values(usage).some(
+      (value) => typeof value === "number" && Number.isFinite(value),
+    ),
+  );
+}
+
+/** Sum token counters across turn slices while preserving absent fields. */
+export function addAgentTurnUsage(
+  ...usages: Array<AgentTurnUsage | undefined>
+): AgentTurnUsage | undefined {
+  const total: AgentTurnUsage = {};
+  for (const usage of usages) {
+    if (!usage) continue;
+    for (const field of Object.keys(usage) as (keyof AgentTurnUsage)[]) {
+      const value = usage[field];
+      if (typeof value !== "number" || !Number.isFinite(value)) continue;
+      total[field] = (total[field] ?? 0) + value;
+    }
+  }
+  return hasAgentTurnUsage(total) ? total : undefined;
+}
diff --git a/packages/junior/tests/unit/handlers/oauth-resume.test.ts b/packages/junior/tests/unit/handlers/oauth-resume.test.ts
index 39350f08..bf0a3223 100644
--- a/packages/junior/tests/unit/handlers/oauth-resume.test.ts
+++ b/packages/junior/tests/unit/handlers/oauth-resume.test.ts
@@ -182,6 +182,21 @@ describe("resumeAuthorizedRequest", () => {
     expect(onTimeoutPause).toHaveBeenCalledTimes(1);
     expect(postMessageMock).toHaveBeenCalledWith(
       expect.objectContaining({
+        blocks: [
+          {
+            type: "markdown",
+            text: buildTurnContinuationResponse(),
+          },
+          {
+            type: "context",
+            elements: [
+              {
+                type: "mrkdwn",
+                text: "*ID:* slack:C-test:1700000000.0002",
+              },
+            ],
+          },
+        ],
         channel: "C-test",
         thread_ts: "1700000000.0002",
         text: buildTurnContinuationResponse(),
@@ -189,6 +204,87 @@ describe("resumeAuthorizedRequest", () => {
     );
   });
 
+  it("uses cumulative checkpoint diagnostics for timeout resume footers", async () => {
+    const { upsertAgentTurnSessionCheckpoint } =
+      await import("@/chat/state/turn-session-store");
+
+    await upsertAgentTurnSessionCheckpoint({
+      conversationId: "conversation-1",
+      sessionId: "turn-1",
+      sliceId: 2,
+      state: "awaiting_resume",
+      piMessages: [],
+      resumeReason: "timeout",
+      cumulativeDurationMs: 1_000,
+      cumulativeUsage: {
+        inputTokens: 2,
+        outputTokens: 3,
+      },
+    });
+
+    await resumeSlackTurn({
+      messageText: "continue this turn",
+      channelId: "C-test",
+      threadTs: "1700000000.0005",
+      lockKey: "slack:C-test:1700000000.0005",
+      replyContext: {
+        requester: { userId: "U-test" },
+        correlation: {
+          conversationId: "conversation-1",
+          turnId: "turn-1",
+        },
+      },
+      generateReply: async () =>
+        ({
+          text: "done",
+          diagnostics: {
+            assistantMessageCount: 1,
+            durationMs: 500,
+            modelId: "fake-agent-model",
+            outcome: "success",
+            toolCalls: [],
+            toolErrorCount: 0,
+            toolResultCount: 0,
+            usage: {
+              outputTokens: 7,
+            },
+            usedPrimaryText: true,
+          },
+        }) as any,
+    });
+
+    expect(postMessageMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        channel: "C-test",
+        thread_ts: "1700000000.0005",
+        text: "done",
+        blocks: [
+          {
+            type: "markdown",
+            text: "done",
+          },
+          {
+            type: "context",
+            elements: expect.arrayContaining([
+              {
+                type: "mrkdwn",
+                text: "*ID:* conversation-1",
+              },
+              {
+                type: "mrkdwn",
+                text: "*Tokens:* 12",
+              },
+              {
+                type: "mrkdwn",
+                text: "*Time:* 1.5s",
+              },
+            ]),
+          },
+        ],
+      }),
+    );
+  });
+
   it("posts the canonical failure response when timeout pause handling throws", async () => {
     const onFailure = vi.fn(async () => undefined);
 
diff --git a/packages/junior/tests/unit/services/turn-checkpoint.test.ts b/packages/junior/tests/unit/services/turn-checkpoint.test.ts
index 268fc253..238da5cd 100644
--- a/packages/junior/tests/unit/services/turn-checkpoint.test.ts
+++ b/packages/junior/tests/unit/services/turn-checkpoint.test.ts
@@ -97,4 +97,55 @@ describe("persistAuthPauseCheckpoint", () => {
       piMessages: [priorMessages[0]],
     });
   });
+
+  it("carries cumulative diagnostics across pause checkpoints", async () => {
+    const { persistTimeoutCheckpoint } =
+      await import("@/chat/services/turn-checkpoint");
+    const { getAgentTurnSessionCheckpoint, upsertAgentTurnSessionCheckpoint } =
+      await import("@/chat/state/turn-session-store");
+
+    await upsertAgentTurnSessionCheckpoint({
+      conversationId: "conversation-1",
+      sessionId: "turn-1",
+      sliceId: 1,
+      state: "awaiting_resume",
+      piMessages: [],
+      resumeReason: "timeout",
+      cumulativeDurationMs: 1_500,
+      cumulativeUsage: {
+        inputTokens: 10,
+        outputTokens: 3,
+      },
+    });
+
+    await persistTimeoutCheckpoint({
+      conversationId: "conversation-1",
+      sessionId: "turn-1",
+      currentSliceId: 1,
+      currentDurationMs: 2_250,
+      currentUsage: {
+        outputTokens: 7,
+        cachedInputTokens: 2,
+      },
+      messages: [],
+      loadedSkillNames: [],
+      errorMessage: "timed out again",
+      logContext: {
+        modelId: "test-model",
+      },
+    });
+
+    const checkpoint = await getAgentTurnSessionCheckpoint(
+      "conversation-1",
+      "turn-1",
+    );
+    expect(checkpoint).toMatchObject({
+      cumulativeDurationMs: 3_750,
+      cumulativeUsage: {
+        inputTokens: 10,
+        outputTokens: 10,
+        cachedInputTokens: 2,
+      },
+    });
+  });
 });