diff --git a/packages/opencode/src/altimate/telemetry/index.ts b/packages/opencode/src/altimate/telemetry/index.ts index 023879bfb..c30b0c2f0 100644 --- a/packages/opencode/src/altimate/telemetry/index.ts +++ b/packages/opencode/src/altimate/telemetry/index.ts @@ -659,6 +659,20 @@ export namespace Telemetry { error_message?: string } // altimate_change end + // altimate_change start — plan-agent model tool-call refusal detection + | { + type: "plan_no_tool_generation" + timestamp: number + session_id: string + message_id: string + model_id: string + provider_id: string + /** "stop" finish_reason without any tool calls in the session — flags models that refuse to tool-call in plan mode */ + finish_reason: string + /** output tokens on the stop-without-tools generation — helps distinguish "refused" (low) from "wrote a long text plan" (high) */ + tokens_output: number + } + // altimate_change end /** SHA256 hash a masked error message for anonymous grouping. */ export function hashError(maskedMessage: string): string { diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 72c96311d..529f44dc0 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -49,6 +49,14 @@ export namespace SessionProcessor { // altimate_change start — per-step generation telemetry let stepStartTime = Date.now() // altimate_change end + // altimate_change start — plan-agent tool-call-refusal detection + // Some models (observed: qwen3-coder-next, occasionally gpt-5.4) end plan-agent + // steps with finish_reason=stop and never emit tool calls. User abandons the + // session thinking it's stuck. Track whether the session has ever produced a + // tool call; if plan agent finishes its first step with stop-no-tools, warn. + let sessionToolCallsMade = 0 + let planNoToolWarningEmitted = false + // altimate_change end const result = { get message() { @@ -162,6 +170,9 @@ export namespace SessionProcessor { metadata: value.providerMetadata, }) toolcalls[value.toolCallId] = part as MessageV2.ToolPart + // altimate_change start — session has now tool-called; suppresses plan refusal warning + sessionToolCallsMade++ + // altimate_change end const parts = await MessageV2.parts(input.assistantMessage.id) const lastThree = parts.slice(-DOOM_LOOP_THRESHOLD) @@ -322,6 +333,52 @@ export namespace SessionProcessor { ...(usage.tokens.cache.write > 0 && { tokens_cache_write: usage.tokens.cache.write }), }) // altimate_change end + // altimate_change start — detect plan-agent tool-call refusal + // A plan-agent step that ends with finish=stop and NO tool calls + // (ever) in the session means the model wrote text and gave up. + // Users read the text, see no progress, and abandon. Surface a + // warning + telemetry so the pattern is measurable and the user + // knows to try a different model. + if ( + input.assistantMessage.agent === "plan" && + value.finishReason === "stop" && + sessionToolCallsMade === 0 && + !planNoToolWarningEmitted + ) { + planNoToolWarningEmitted = true + Telemetry.track({ + type: "plan_no_tool_generation", + timestamp: Date.now(), + session_id: input.sessionID, + message_id: input.assistantMessage.id, + model_id: input.model.id, + provider_id: input.model.providerID, + finish_reason: value.finishReason, + tokens_output: usage.tokens.output, + }) + log.warn("plan agent stopped without tool calls — model may not be tool-calling properly", { + sessionID: input.sessionID, + modelID: input.model.id, + providerID: input.model.providerID, + tokensOutput: usage.tokens.output, + }) + // synthetic: true so this warning is shown in the TUI but + // excluded when the transcript is replayed to the LLM next turn + // (prompt.ts filters synthetic text parts — see lines 648, 795). + await Session.updatePart({ + id: PartID.ascending(), + messageID: input.assistantMessage.id, + sessionID: input.assistantMessage.sessionID, + type: "text", + synthetic: true, + text: + `⚠️ altimate-code: the \`plan\` agent is running on \`${input.model.providerID}/${input.model.id}\`, ` + + `which returned text without calling any tools. If you expected the plan agent to explore the ` + + `codebase, try switching to a model with stronger tool-use via \`/model\`.`, + time: { start: Date.now(), end: Date.now() }, + }) + } + // altimate_change end await Session.updatePart({ id: PartID.ascending(), reason: value.finishReason, diff --git a/packages/opencode/test/session/processor.test.ts b/packages/opencode/test/session/processor.test.ts index fd12e483d..e933032fb 100644 --- a/packages/opencode/test/session/processor.test.ts +++ b/packages/opencode/test/session/processor.test.ts @@ -454,6 +454,137 @@ describe("doom loop detection telemetry", () => { }) }) +// --------------------------------------------------------------------------- +// 4b. Plan-agent tool-call refusal detection +// --------------------------------------------------------------------------- +describe("plan-agent no-tool-generation detection", () => { + /** + * Simulates the plan-no-tool detection from processor.ts finish-step handler. + * Mirrors the state machine: session-scoped tool-call counter and + * one-shot warning flag. Returns null when no warning should fire, + * or the telemetry event that would be emitted. + */ + function simulateFinishStep(opts: { + agent: string + finishReason: string + sessionToolCallsMade: number + planNoToolWarningEmitted: boolean + sessionID: string + messageID: string + modelID: string + providerID: string + tokensOutput: number + }): { event: Telemetry.Event | null; warningEmitted: boolean } { + if ( + opts.agent === "plan" && + opts.finishReason === "stop" && + opts.sessionToolCallsMade === 0 && + !opts.planNoToolWarningEmitted + ) { + return { + event: { + type: "plan_no_tool_generation", + timestamp: Date.now(), + session_id: opts.sessionID, + message_id: opts.messageID, + model_id: opts.modelID, + provider_id: opts.providerID, + finish_reason: opts.finishReason, + tokens_output: opts.tokensOutput, + }, + warningEmitted: true, + } + } + return { event: null, warningEmitted: opts.planNoToolWarningEmitted } + } + + const baseOpts = { + sessionID: "sess-plan-1", + messageID: "msg-plan-1", + modelID: "qwen3-coder-next", + providerID: "ollama-cloud", + tokensOutput: 293, + } + + test("fires when plan agent stops without tool calls", () => { + const result = simulateFinishStep({ + ...baseOpts, + agent: "plan", + finishReason: "stop", + sessionToolCallsMade: 0, + planNoToolWarningEmitted: false, + }) + expect(result.event).not.toBeNull() + expect(result.event?.type).toBe("plan_no_tool_generation") + expect(result.event?.model_id).toBe("qwen3-coder-next") + expect(result.event?.provider_id).toBe("ollama-cloud") + expect(result.event?.finish_reason).toBe("stop") + expect(result.event?.tokens_output).toBe(293) + expect(result.warningEmitted).toBe(true) + }) + + test("does not fire when session has already made tool calls", () => { + const result = simulateFinishStep({ + ...baseOpts, + agent: "plan", + finishReason: "stop", + sessionToolCallsMade: 3, + planNoToolWarningEmitted: false, + }) + expect(result.event).toBeNull() + expect(result.warningEmitted).toBe(false) + }) + + test("does not fire when finish_reason is tool-calls", () => { + const result = simulateFinishStep({ + ...baseOpts, + agent: "plan", + finishReason: "tool-calls", + sessionToolCallsMade: 0, + planNoToolWarningEmitted: false, + }) + expect(result.event).toBeNull() + }) + + test("does not fire for non-plan agents", () => { + for (const agent of ["builder", "analyst", "general", "explore"]) { + const result = simulateFinishStep({ + ...baseOpts, + agent, + finishReason: "stop", + sessionToolCallsMade: 0, + planNoToolWarningEmitted: false, + }) + expect(result.event).toBeNull() + } + }) + + test("fires at most once per session (one-shot flag)", () => { + const result = simulateFinishStep({ + ...baseOpts, + agent: "plan", + finishReason: "stop", + sessionToolCallsMade: 0, + planNoToolWarningEmitted: true, + }) + expect(result.event).toBeNull() + expect(result.warningEmitted).toBe(true) + }) + + test("does not fire when finish_reason is length/error/other", () => { + for (const reason of ["length", "error", "content-filter", "unknown"]) { + const result = simulateFinishStep({ + ...baseOpts, + agent: "plan", + finishReason: reason, + sessionToolCallsMade: 0, + planNoToolWarningEmitted: false, + }) + expect(result.event).toBeNull() + } + }) +}) + // --------------------------------------------------------------------------- // 5. Generation telemetry // ---------------------------------------------------------------------------