Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 90 additions & 2 deletions packages/opencode/src/altimate/telemetry/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,48 @@ import os from "os"

const log = Log.create({ service: "telemetry" })

// altimate_change start — telemetry query reference for Azure App Insights (KQL)
/**
* Telemetry Module — Azure App Insights Integration
*
* QUERYING TELEMETRY DATA (KQL / Log Analytics):
*
* customDimensions → string fields (tool_name, model_id, provider_id, error_class, os, etc.)
* customMeasurements → numeric fields (tokens_input, cost, duration_ms, etc.)
*
* Serialization rules (see toAppInsightsEnvelopes):
* - typeof number → measurements map (customMeasurements)
* - typeof string → properties map (customDimensions)
* - typeof boolean → properties map (as "true"/"false")
* - typeof object → properties map (JSON.stringify)
* - session_id / project_id are lifted into envelope tags, not properties
* - cli_version is injected into every event's properties automatically
*
* Example KQL:
*
* // Token usage per model
* customEvents
* | where name == "generation"
* | extend model = tostring(customDimensions.model_id),
* tokens_in = todouble(customMeasurements.tokens_input),
* tokens_out = todouble(customMeasurements.tokens_output)
* | summarize avg(tokens_in), avg(tokens_out) by model
*
* // Error class distribution
* customEvents
* | where name == "core_failure"
* | extend err = tostring(customDimensions.error_class)
* | summarize count() by err
*/
// altimate_change end

export namespace Telemetry {
const FLUSH_INTERVAL_MS = 5_000
const MAX_BUFFER_SIZE = 200
const REQUEST_TIMEOUT_MS = 10_000

export type Event =
// altimate_change start — add os/arch/node_version for environment segmentation
| {
type: "session_start"
timestamp: number
Expand All @@ -23,7 +59,11 @@ export namespace Telemetry {
provider_id: string
agent: string
project_id: string
os: string
arch: string
node_version: string
}
// altimate_change end
| {
type: "session_end"
timestamp: number
Expand All @@ -48,6 +88,9 @@ export namespace Telemetry {
// No nested objects: Azure App Insights custom measures must be top-level numbers.
tokens_input: number
tokens_output: number
// altimate_change start — total input tokens including cached (for providers like Anthropic that exclude cache from tokens_input)
tokens_input_total?: number
// altimate_change end
tokens_reasoning?: number // only for reasoning models
tokens_cache_read?: number // only when a cached prompt was reused
tokens_cache_write?: number // only when a new cache entry was written
Expand Down Expand Up @@ -432,7 +475,7 @@ export namespace Telemetry {
session_id: string
tool_name: string
tool_category: string
error_class: "parse_error" | "connection" | "timeout" | "validation" | "internal" | "permission" | "http_error" | "unknown"
error_class: "parse_error" | "connection" | "timeout" | "validation" | "internal" | "permission" | "http_error" | "file_not_found" | "edit_mismatch" | "not_configured" | "resource_exhausted" | "unknown"
error_message: string
input_signature: string
masked_args?: string
Expand Down Expand Up @@ -678,12 +721,44 @@ export namespace Telemetry {
"sasl",
"scram",
"password must be",
],
},
// altimate_change start — split not_configured out of connection for clearer triage
{
class: "not_configured",
keywords: [
"no warehouse configured",
"driver not installed",
"not found. available:",
"no warehouse configured",
"unsupported database type",
"warehouse not configured",
"connection not configured",
],
},
// altimate_change end
// altimate_change start — file_not_found class for file system errors
{
class: "file_not_found",
keywords: [
"file not found",
"no such file",
"enoent",
"directory not found",
"path not found",
"file does not exist",
],
},
// altimate_change end
// altimate_change start — edit_mismatch class for edit tool failures
{
class: "edit_mismatch",
keywords: [
"could not find oldstring",
"no changes to apply",
"oldstring and newstring are identical",
],
},
// altimate_change end
{ class: "timeout", keywords: ["timeout", "etimedout", "bridge timeout", "timed out"] },
{ class: "permission", keywords: ["permission", "access denied", "permission denied", "unauthorized", "forbidden", "authentication"] },
{
Expand All @@ -700,6 +775,19 @@ export namespace Telemetry {
],
},
{ class: "internal", keywords: ["internal", "assertion"] },
// altimate_change start — resource_exhausted class for OOM/quota errors
{
class: "resource_exhausted",
keywords: [
"out of memory",
"resource limit",
"quota exceeded",
"disk i/o",
"enomem",
"heap out of memory",
],
},
// altimate_change end
{
class: "http_error",
keywords: ["status code: 4", "status code: 5", "request failed with status"],
Expand Down
3 changes: 3 additions & 0 deletions packages/opencode/src/session/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,9 @@ export namespace Session {
const tokens = {
total,
input: adjustedInputTokens,
// altimate_change start — inputTotal includes cached tokens for accurate telemetry reporting
inputTotal: adjustedInputTokens + cacheReadInputTokens + cacheWriteInputTokens,
// altimate_change end
output: outputTokens,
reasoning: reasoningTokens,
cache: {
Expand Down
42 changes: 42 additions & 0 deletions packages/opencode/src/session/processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ import { Telemetry } from "@/altimate/telemetry"

export namespace SessionProcessor {
const DOOM_LOOP_THRESHOLD = 3
// altimate_change start — per-tool repeat threshold to catch varied-input loops (e.g. todowrite 2,080x)
// Legitimate tool use rarely exceeds 20-25 calls per tool per session.
// 30 catches pathological patterns while avoiding false positives for power users.
const TOOL_REPEAT_THRESHOLD = 30
// altimate_change end
const log = Log.create({ service: "session.processor" })

export type Info = Awaited<ReturnType<typeof create>>
Expand All @@ -34,6 +39,9 @@ export namespace SessionProcessor {
abort: AbortSignal
}) {
const toolcalls: Record<string, MessageV2.ToolPart> = {}
// altimate_change start — per-tool call counter for varied-input loop detection
const toolCallCounts: Record<string, number> = {}
// altimate_change end
let snapshot: string | undefined
let blocked = false
let attempt = 0
Expand Down Expand Up @@ -181,6 +189,37 @@ export namespace SessionProcessor {
ruleset: agent.permission,
})
}

// altimate_change start — per-tool repeat counter (catches varied-input loops like todowrite 2,080x)
// Counter is scoped to the processor lifetime (create() call), so it accumulates
// across multiple process() invocations within a session. This is intentional:
// cross-turn accumulation catches slow-burn loops that stay under the threshold
// per-turn but add up over the session.
toolCallCounts[value.toolName] = (toolCallCounts[value.toolName] ?? 0) + 1
if (toolCallCounts[value.toolName] >= TOOL_REPEAT_THRESHOLD) {
Telemetry.track({
type: "doom_loop_detected",
timestamp: Date.now(),
session_id: input.sessionID,
tool_name: value.toolName,
repeat_count: toolCallCounts[value.toolName],
})
const agent = await Agent.get(input.assistantMessage.agent)
await PermissionNext.ask({
permission: "doom_loop",
patterns: [value.toolName],
sessionID: input.assistantMessage.sessionID,
metadata: {
tool: value.toolName,
input: value.input,
repeat_count: toolCallCounts[value.toolName],
},
always: [value.toolName],
ruleset: agent.permission,
})
toolCallCounts[value.toolName] = 0
}
// altimate_change end
}
break
}
Expand Down Expand Up @@ -275,6 +314,9 @@ export namespace SessionProcessor {
duration_ms: Date.now() - stepStartTime,
tokens_input: usage.tokens.input,
tokens_output: usage.tokens.output,
// altimate_change start — include total input tokens (with cache) when they differ from tokens_input
...(usage.tokens.inputTotal !== usage.tokens.input && { tokens_input_total: usage.tokens.inputTotal }),
// altimate_change end
...(value.usage.reasoningTokens !== undefined && { tokens_reasoning: usage.tokens.reasoning }),
...(value.usage.cachedInputTokens !== undefined && { tokens_cache_read: usage.tokens.cache.read }),
...(usage.tokens.cache.write > 0 && { tokens_cache_write: usage.tokens.cache.write }),
Expand Down
3 changes: 3 additions & 0 deletions packages/opencode/src/session/prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,9 @@ export namespace SessionPrompt {
provider_id: model.providerID,
agent: lastUser.agent,
project_id: Instance.project?.id ?? "",
os: process.platform,
arch: process.arch,
node_version: process.version,
})
// altimate_change start — task intent classification (keyword/regex, zero LLM cost)
const userMsg = msgs.find((m) => m.info.id === lastUser!.id)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// @ts-nocheck
/**
* Integration tests for the 7 telemetry moat signals.
* Integration tests for the 7 telemetry signals.
*
* These tests verify that events actually fire through real code paths,
* not just that the type definitions compile or utility functions work.
Expand Down Expand Up @@ -739,6 +739,9 @@ describe("Full E2E session simulation", () => {
provider_id: "anthropic",
agent: "default",
project_id: "test",
os: "linux",
arch: "x64",
node_version: "v22.0.0",
})

// 2. task_classified
Expand Down
Loading
Loading