Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions packages/opencode/src/altimate/telemetry/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,20 @@ export namespace Telemetry {
error_message?: string
}
// altimate_change end
// altimate_change start — plan-agent model tool-call refusal detection
| {
type: "plan_no_tool_generation"
timestamp: number
session_id: string
message_id: string
model_id: string
provider_id: string
/** "stop" finish_reason without any tool calls in the session — flags models that refuse to tool-call in plan mode */
finish_reason: string
/** output tokens on the stop-without-tools generation — helps distinguish "refused" (low) from "wrote a long text plan" (high) */
tokens_output: number
}
// altimate_change end

/** SHA256 hash a masked error message for anonymous grouping. */
export function hashError(maskedMessage: string): string {
Expand Down
57 changes: 57 additions & 0 deletions packages/opencode/src/session/processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ export namespace SessionProcessor {
// altimate_change start — per-step generation telemetry
let stepStartTime = Date.now()
// altimate_change end
// altimate_change start — plan-agent tool-call-refusal detection
// Some models (observed: qwen3-coder-next, occasionally gpt-5.4) end plan-agent
// steps with finish_reason=stop and never emit tool calls. User abandons the
// session thinking it's stuck. Track whether the session has ever produced a
// tool call; if plan agent finishes its first step with stop-no-tools, warn.
let sessionToolCallsMade = 0
let planNoToolWarningEmitted = false
// altimate_change end

const result = {
get message() {
Expand Down Expand Up @@ -162,6 +170,9 @@ export namespace SessionProcessor {
metadata: value.providerMetadata,
})
toolcalls[value.toolCallId] = part as MessageV2.ToolPart
// altimate_change start — session has now tool-called; suppresses plan refusal warning
sessionToolCallsMade++
// altimate_change end

const parts = await MessageV2.parts(input.assistantMessage.id)
const lastThree = parts.slice(-DOOM_LOOP_THRESHOLD)
Expand Down Expand Up @@ -322,6 +333,52 @@ export namespace SessionProcessor {
...(usage.tokens.cache.write > 0 && { tokens_cache_write: usage.tokens.cache.write }),
})
// altimate_change end
// altimate_change start — detect plan-agent tool-call refusal
// A plan-agent step that ends with finish=stop and NO tool calls
// (ever) in the session means the model wrote text and gave up.
// Users read the text, see no progress, and abandon. Surface a
// warning + telemetry so the pattern is measurable and the user
// knows to try a different model.
if (
input.assistantMessage.agent === "plan" &&
value.finishReason === "stop" &&
sessionToolCallsMade === 0 &&
!planNoToolWarningEmitted
) {
planNoToolWarningEmitted = true
Telemetry.track({
type: "plan_no_tool_generation",
timestamp: Date.now(),
session_id: input.sessionID,
message_id: input.assistantMessage.id,
model_id: input.model.id,
provider_id: input.model.providerID,
finish_reason: value.finishReason,
tokens_output: usage.tokens.output,
})
log.warn("plan agent stopped without tool calls — model may not be tool-calling properly", {
sessionID: input.sessionID,
modelID: input.model.id,
providerID: input.model.providerID,
tokensOutput: usage.tokens.output,
})
// synthetic: true so this warning is shown in the TUI but
// excluded when the transcript is replayed to the LLM next turn
// (prompt.ts filters synthetic text parts — see lines 648, 795).
await Session.updatePart({
id: PartID.ascending(),
messageID: input.assistantMessage.id,
sessionID: input.assistantMessage.sessionID,
type: "text",
synthetic: true,
text:
`⚠️ altimate-code: the \`plan\` agent is running on \`${input.model.providerID}/${input.model.id}\`, ` +
`which returned text without calling any tools. If you expected the plan agent to explore the ` +
`codebase, try switching to a model with stronger tool-use via \`/model\`.`,
time: { start: Date.now(), end: Date.now() },
})
}
// altimate_change end
await Session.updatePart({
id: PartID.ascending(),
reason: value.finishReason,
Expand Down
131 changes: 131 additions & 0 deletions packages/opencode/test/session/processor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,137 @@ describe("doom loop detection telemetry", () => {
})
})

// ---------------------------------------------------------------------------
// 4b. Plan-agent tool-call refusal detection
// ---------------------------------------------------------------------------
describe("plan-agent no-tool-generation detection", () => {
/**
* Simulates the plan-no-tool detection from processor.ts finish-step handler.
* Mirrors the state machine: session-scoped tool-call counter and
* one-shot warning flag. Returns null when no warning should fire,
* or the telemetry event that would be emitted.
*/
function simulateFinishStep(opts: {
agent: string
finishReason: string
sessionToolCallsMade: number
planNoToolWarningEmitted: boolean
sessionID: string
messageID: string
modelID: string
providerID: string
tokensOutput: number
}): { event: Telemetry.Event | null; warningEmitted: boolean } {
if (
opts.agent === "plan" &&
opts.finishReason === "stop" &&
opts.sessionToolCallsMade === 0 &&
!opts.planNoToolWarningEmitted
) {
return {
event: {
type: "plan_no_tool_generation",
timestamp: Date.now(),
session_id: opts.sessionID,
message_id: opts.messageID,
model_id: opts.modelID,
provider_id: opts.providerID,
finish_reason: opts.finishReason,
tokens_output: opts.tokensOutput,
},
warningEmitted: true,
}
}
return { event: null, warningEmitted: opts.planNoToolWarningEmitted }
}

const baseOpts = {
sessionID: "sess-plan-1",
messageID: "msg-plan-1",
modelID: "qwen3-coder-next",
providerID: "ollama-cloud",
tokensOutput: 293,
}

test("fires when plan agent stops without tool calls", () => {
const result = simulateFinishStep({
...baseOpts,
agent: "plan",
finishReason: "stop",
sessionToolCallsMade: 0,
planNoToolWarningEmitted: false,
})
expect(result.event).not.toBeNull()
expect(result.event?.type).toBe("plan_no_tool_generation")
expect(result.event?.model_id).toBe("qwen3-coder-next")
expect(result.event?.provider_id).toBe("ollama-cloud")
expect(result.event?.finish_reason).toBe("stop")
expect(result.event?.tokens_output).toBe(293)
expect(result.warningEmitted).toBe(true)
})

test("does not fire when session has already made tool calls", () => {
const result = simulateFinishStep({
...baseOpts,
agent: "plan",
finishReason: "stop",
sessionToolCallsMade: 3,
planNoToolWarningEmitted: false,
})
expect(result.event).toBeNull()
expect(result.warningEmitted).toBe(false)
})

test("does not fire when finish_reason is tool-calls", () => {
const result = simulateFinishStep({
...baseOpts,
agent: "plan",
finishReason: "tool-calls",
sessionToolCallsMade: 0,
planNoToolWarningEmitted: false,
})
expect(result.event).toBeNull()
})

test("does not fire for non-plan agents", () => {
for (const agent of ["builder", "analyst", "general", "explore"]) {
const result = simulateFinishStep({
...baseOpts,
agent,
finishReason: "stop",
sessionToolCallsMade: 0,
planNoToolWarningEmitted: false,
})
expect(result.event).toBeNull()
}
})

test("fires at most once per session (one-shot flag)", () => {
const result = simulateFinishStep({
...baseOpts,
agent: "plan",
finishReason: "stop",
sessionToolCallsMade: 0,
planNoToolWarningEmitted: true,
})
expect(result.event).toBeNull()
expect(result.warningEmitted).toBe(true)
})

test("does not fire when finish_reason is length/error/other", () => {
for (const reason of ["length", "error", "content-filter", "unknown"]) {
const result = simulateFinishStep({
...baseOpts,
agent: "plan",
finishReason: reason,
sessionToolCallsMade: 0,
planNoToolWarningEmitted: false,
})
expect(result.event).toBeNull()
}
})
})

// ---------------------------------------------------------------------------
// 5. Generation telemetry
// ---------------------------------------------------------------------------
Expand Down
Loading