Skip to content

Commit 0962531

Browse files
timvisher-ddclaude
andcommitted
fix: use current model's context window for usage_update size
The original code took Math.min across all modelUsage entries, which reported a 200k context window when subagents (Sonnet/Haiku) were mixed with Opus 1M. Track the top-level assistant model and look up its context window specifically. The SDK's streaming path keys modelUsage by the requested model alias (e.g. "claude-opus-4-6") while BetaMessage.model on assistant messages has the resolved API response model (e.g. "claude-opus-4-6-20250514"). Fall back to prefix matching (preferring the longest/most-specific match) when exact key lookup misses. Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
1 parent c13edf3 commit 0962531

2 files changed

Lines changed: 318 additions & 4 deletions

File tree

src/acp-agent.ts

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,7 @@ export class ClaudeAcpAgent implements Agent {
478478
};
479479

480480
let lastAssistantTotalUsage: number | null = null;
481+
let lastAssistantModel: string | null = null;
481482

482483
const userMessage = promptToClaude(params);
483484

@@ -576,10 +577,25 @@ export class ClaudeAcpAgent implements Agent {
576577
session.accumulatedUsage.cachedReadTokens += message.usage.cache_read_input_tokens;
577578
session.accumulatedUsage.cachedWriteTokens += message.usage.cache_creation_input_tokens;
578579

579-
// Calculate context window size from modelUsage (minimum across all models used)
580-
const contextWindows = Object.values(message.modelUsage).map((m) => m.contextWindow);
581-
const contextWindowSize =
582-
contextWindows.length > 0 ? Math.min(...contextWindows) : 200000;
580+
// Calculate context window size from the current model's usage.
581+
// The modelUsage keys may use the requested model alias (e.g. "claude-opus-4-6")
582+
// while message.model on assistant messages has the resolved API response model
583+
// (e.g. "claude-opus-4-6-20250514"), so we fall back to prefix matching.
584+
const currentModel = lastAssistantModel;
585+
const matchingModelUsage = currentModel
586+
? message.modelUsage[currentModel] ??
587+
Object.entries(message.modelUsage)
588+
.filter(
589+
([key]) =>
590+
currentModel.startsWith(key) || key.startsWith(currentModel),
591+
)
592+
.sort((a, b) => b[0].length - a[0].length)[0]?.[1]
593+
: undefined;
594+
// Fallback to 200k: this is hit when lastAssistantModel is null (e.g. the
595+
// assistant message lacked a model field) or no modelUsage key matches.
596+
// 200k is a conservative default — the Anthropic API should always populate
597+
// BetaMessage.model, so this path is unlikely in practice.
598+
const contextWindowSize = matchingModelUsage?.contextWindow ?? 200000;
583599

584600
// Send usage_update notification
585601
if (lastAssistantTotalUsage !== null) {
@@ -690,6 +706,11 @@ export class ClaudeAcpAgent implements Agent {
690706
}
691707

692708
// Store latest assistant usage (excluding subagents)
709+
// Sum all token types as a proxy for post-turn context occupancy:
710+
// current turn's output will become next turn's input.
711+
// Note: per the Anthropic API, input_tokens excludes cache tokens —
712+
// cache_read and cache_creation are reported separately, so summing
713+
// all four fields is not double-counting.
693714
if ((message.message as any).usage && message.parent_tool_use_id === null) {
694715
const messageWithUsage = message.message as unknown as SDKResultMessage;
695716
lastAssistantTotalUsage =
@@ -698,6 +719,15 @@ export class ClaudeAcpAgent implements Agent {
698719
messageWithUsage.usage.cache_read_input_tokens +
699720
messageWithUsage.usage.cache_creation_input_tokens;
700721
}
722+
// Track the current top-level model for context window size lookup
723+
// (exclude subagent messages to stay in sync with lastAssistantTotalUsage)
724+
if (
725+
message.type === "assistant" &&
726+
message.parent_tool_use_id === null &&
727+
message.message.model
728+
) {
729+
lastAssistantModel = message.message.model;
730+
}
701731

702732
// Slash commands like /compact can generate invalid output... doesn't match
703733
// their own docs: https://docs.anthropic.com/en/docs/claude-code/sdk/sdk-slash-commands#%2Fcompact-compact-conversation-history

src/tests/acp-agent.test.ts

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1416,3 +1416,287 @@ describe("stop reason propagation", () => {
14161416
).rejects.toThrow("Internal error");
14171417
});
14181418
});
1419+
1420+
describe("usage_update computation", () => {
1421+
function createAssistantMessage(overrides: {
1422+
model: string;
1423+
usage?: { input_tokens: number; output_tokens: number; cache_read_input_tokens: number; cache_creation_input_tokens: number };
1424+
}) {
1425+
return {
1426+
type: "assistant" as const,
1427+
parent_tool_use_id: null,
1428+
uuid: randomUUID(),
1429+
session_id: "test-session",
1430+
message: {
1431+
model: overrides.model,
1432+
content: [{ type: "text", text: "hello" }],
1433+
usage: overrides.usage ?? {
1434+
input_tokens: 100,
1435+
output_tokens: 50,
1436+
cache_read_input_tokens: 20,
1437+
cache_creation_input_tokens: 10,
1438+
},
1439+
},
1440+
};
1441+
}
1442+
1443+
function createResultMessageWithModel(overrides: {
1444+
modelUsage: Record<string, { inputTokens: number; outputTokens: number; cacheReadInputTokens: number; cacheCreationInputTokens: number; webSearchRequests: number; costUSD: number; contextWindow: number; maxOutputTokens: number }>;
1445+
}) {
1446+
return {
1447+
type: "result" as const,
1448+
subtype: "success" as const,
1449+
stop_reason: "end_turn",
1450+
is_error: false,
1451+
result: "",
1452+
errors: [],
1453+
duration_ms: 0,
1454+
duration_api_ms: 0,
1455+
num_turns: 1,
1456+
total_cost_usd: 0.01,
1457+
usage: {
1458+
input_tokens: 10,
1459+
output_tokens: 5,
1460+
cache_read_input_tokens: 0,
1461+
cache_creation_input_tokens: 0,
1462+
},
1463+
modelUsage: overrides.modelUsage,
1464+
permission_denials: [],
1465+
uuid: randomUUID(),
1466+
session_id: "test-session",
1467+
};
1468+
}
1469+
1470+
function createMockAgentWithCapture() {
1471+
const updates: any[] = [];
1472+
const mockClient = {
1473+
sessionUpdate: async (notification: any) => {
1474+
updates.push(notification);
1475+
},
1476+
} as unknown as AgentSideConnection;
1477+
const agent = new ClaudeAcpAgent(mockClient, { log: () => {}, error: () => {} });
1478+
return { agent, updates };
1479+
}
1480+
1481+
function injectSession(agent: ClaudeAcpAgent, messages: any[]) {
1482+
const gen = (function* () { yield* messages; })();
1483+
agent.sessions["test-session"] = {
1484+
query: gen as any,
1485+
input: new Pushable(),
1486+
cancelled: false,
1487+
cwd: "/test",
1488+
permissionMode: "default",
1489+
settingsManager: {} as any,
1490+
accumulatedUsage: {
1491+
inputTokens: 0,
1492+
outputTokens: 0,
1493+
cachedReadTokens: 0,
1494+
cachedWriteTokens: 0,
1495+
},
1496+
configOptions: [],
1497+
promptRunning: false,
1498+
pendingMessages: new Map(),
1499+
nextPendingOrder: 0,
1500+
};
1501+
}
1502+
1503+
it("used sums all token types as post-turn context occupancy proxy", async () => {
1504+
const { agent, updates } = createMockAgentWithCapture();
1505+
injectSession(agent, [
1506+
createAssistantMessage({
1507+
model: "claude-opus-4-20250514",
1508+
usage: { input_tokens: 1000, output_tokens: 500, cache_read_input_tokens: 200, cache_creation_input_tokens: 100 },
1509+
}),
1510+
createResultMessageWithModel({
1511+
modelUsage: {
1512+
"claude-opus-4-20250514": {
1513+
inputTokens: 1000, outputTokens: 500, cacheReadInputTokens: 200,
1514+
cacheCreationInputTokens: 100, webSearchRequests: 0, costUSD: 0.01,
1515+
contextWindow: 1000000, maxOutputTokens: 16384,
1516+
},
1517+
},
1518+
}),
1519+
]);
1520+
1521+
await agent.prompt({ sessionId: "test-session", prompt: [{ type: "text", text: "test" }] });
1522+
1523+
const usageUpdate = updates.find((u: any) => u.update?.sessionUpdate === "usage_update");
1524+
expect(usageUpdate).toBeDefined();
1525+
// used = input(1000) + output(500) + cache_read(200) + cache_creation(100) = 1800
1526+
expect(usageUpdate.update.used).toBe(1800);
1527+
});
1528+
1529+
it("size reflects the current model's context window, not min across all", async () => {
1530+
const { agent, updates } = createMockAgentWithCapture();
1531+
injectSession(agent, [
1532+
createAssistantMessage({ model: "claude-opus-4-20250514" }),
1533+
createResultMessageWithModel({
1534+
modelUsage: {
1535+
"claude-opus-4-20250514": {
1536+
inputTokens: 100, outputTokens: 50, cacheReadInputTokens: 20,
1537+
cacheCreationInputTokens: 10, webSearchRequests: 0, costUSD: 0.01,
1538+
contextWindow: 1000000, maxOutputTokens: 16384,
1539+
},
1540+
"claude-sonnet-4-20250514": {
1541+
inputTokens: 50, outputTokens: 25, cacheReadInputTokens: 10,
1542+
cacheCreationInputTokens: 5, webSearchRequests: 0, costUSD: 0.005,
1543+
contextWindow: 200000, maxOutputTokens: 16384,
1544+
},
1545+
},
1546+
}),
1547+
]);
1548+
1549+
await agent.prompt({ sessionId: "test-session", prompt: [{ type: "text", text: "test" }] });
1550+
1551+
const usageUpdate = updates.find((u: any) => u.update?.sessionUpdate === "usage_update");
1552+
expect(usageUpdate).toBeDefined();
1553+
// size should be 1000000 (Opus), not 200000 (min of both)
1554+
expect(usageUpdate.update.size).toBe(1000000);
1555+
});
1556+
1557+
it("after model switch, size updates to the new model's window", async () => {
1558+
const { agent, updates } = createMockAgentWithCapture();
1559+
// Simulate: assistant on Sonnet with both models in modelUsage
1560+
injectSession(agent, [
1561+
createAssistantMessage({ model: "claude-sonnet-4-20250514" }),
1562+
createResultMessageWithModel({
1563+
modelUsage: {
1564+
"claude-opus-4-20250514": {
1565+
inputTokens: 100, outputTokens: 50, cacheReadInputTokens: 20,
1566+
cacheCreationInputTokens: 10, webSearchRequests: 0, costUSD: 0.01,
1567+
contextWindow: 1000000, maxOutputTokens: 16384,
1568+
},
1569+
"claude-sonnet-4-20250514": {
1570+
inputTokens: 50, outputTokens: 25, cacheReadInputTokens: 10,
1571+
cacheCreationInputTokens: 5, webSearchRequests: 0, costUSD: 0.005,
1572+
contextWindow: 200000, maxOutputTokens: 16384,
1573+
},
1574+
},
1575+
}),
1576+
]);
1577+
1578+
await agent.prompt({ sessionId: "test-session", prompt: [{ type: "text", text: "test" }] });
1579+
1580+
const usageUpdate = updates.find((u: any) => u.update?.sessionUpdate === "usage_update");
1581+
expect(usageUpdate).toBeDefined();
1582+
// size should be 200000 (Sonnet - the current model)
1583+
expect(usageUpdate.update.size).toBe(200000);
1584+
});
1585+
1586+
it("after switching back to original model, size returns to original window", async () => {
1587+
const { agent, updates } = createMockAgentWithCapture();
1588+
// Last assistant message is Opus again
1589+
injectSession(agent, [
1590+
createAssistantMessage({ model: "claude-sonnet-4-20250514" }),
1591+
createAssistantMessage({ model: "claude-opus-4-20250514" }),
1592+
createResultMessageWithModel({
1593+
modelUsage: {
1594+
"claude-opus-4-20250514": {
1595+
inputTokens: 200, outputTokens: 100, cacheReadInputTokens: 40,
1596+
cacheCreationInputTokens: 20, webSearchRequests: 0, costUSD: 0.02,
1597+
contextWindow: 1000000, maxOutputTokens: 16384,
1598+
},
1599+
"claude-sonnet-4-20250514": {
1600+
inputTokens: 50, outputTokens: 25, cacheReadInputTokens: 10,
1601+
cacheCreationInputTokens: 5, webSearchRequests: 0, costUSD: 0.005,
1602+
contextWindow: 200000, maxOutputTokens: 16384,
1603+
},
1604+
},
1605+
}),
1606+
]);
1607+
1608+
await agent.prompt({ sessionId: "test-session", prompt: [{ type: "text", text: "test" }] });
1609+
1610+
const usageUpdate = updates.find((u: any) => u.update?.sessionUpdate === "usage_update");
1611+
expect(usageUpdate).toBeDefined();
1612+
// size should be 1000000 (Opus - switched back)
1613+
expect(usageUpdate.update.size).toBe(1000000);
1614+
});
1615+
1616+
it("subagent assistant messages do not affect size (top-level model is used)", async () => {
1617+
const { agent, updates } = createMockAgentWithCapture();
1618+
// Top-level assistant on Opus, then subagent on Haiku (parent_tool_use_id set)
1619+
injectSession(agent, [
1620+
createAssistantMessage({ model: "claude-opus-4-20250514" }),
1621+
{
1622+
type: "assistant" as const,
1623+
parent_tool_use_id: "tool_use_123",
1624+
uuid: randomUUID(),
1625+
session_id: "test-session",
1626+
message: {
1627+
model: "claude-haiku-4-5-20251001",
1628+
content: [{ type: "text", text: "subagent response" }],
1629+
usage: { input_tokens: 50, output_tokens: 25, cache_read_input_tokens: 0, cache_creation_input_tokens: 0 },
1630+
},
1631+
},
1632+
createResultMessageWithModel({
1633+
modelUsage: {
1634+
"claude-opus-4-20250514": {
1635+
inputTokens: 100, outputTokens: 50, cacheReadInputTokens: 20,
1636+
cacheCreationInputTokens: 10, webSearchRequests: 0, costUSD: 0.01,
1637+
contextWindow: 1000000, maxOutputTokens: 16384,
1638+
},
1639+
"claude-haiku-4-5-20251001": {
1640+
inputTokens: 50, outputTokens: 25, cacheReadInputTokens: 0,
1641+
cacheCreationInputTokens: 0, webSearchRequests: 0, costUSD: 0.001,
1642+
contextWindow: 200000, maxOutputTokens: 8192,
1643+
},
1644+
},
1645+
}),
1646+
]);
1647+
1648+
await agent.prompt({ sessionId: "test-session", prompt: [{ type: "text", text: "test" }] });
1649+
1650+
const usageUpdate = updates.find((u: any) => u.update?.sessionUpdate === "usage_update");
1651+
expect(usageUpdate).toBeDefined();
1652+
// size should be 1000000 (Opus - the top-level model), NOT 200000 (Haiku subagent)
1653+
expect(usageUpdate.update.size).toBe(1000000);
1654+
});
1655+
1656+
it("prefix-matches when assistant model has date suffix but modelUsage key does not", async () => {
1657+
const { agent, updates } = createMockAgentWithCapture();
1658+
// The API response has the full versioned model ID on assistant messages,
1659+
// but the SDK's streaming path may key modelUsage by the shorter alias.
1660+
injectSession(agent, [
1661+
createAssistantMessage({ model: "claude-opus-4-6-20250514" }),
1662+
createResultMessageWithModel({
1663+
modelUsage: {
1664+
"claude-opus-4-6": {
1665+
inputTokens: 100, outputTokens: 50, cacheReadInputTokens: 20,
1666+
cacheCreationInputTokens: 10, webSearchRequests: 0, costUSD: 0.01,
1667+
contextWindow: 1000000, maxOutputTokens: 16384,
1668+
},
1669+
},
1670+
}),
1671+
]);
1672+
1673+
await agent.prompt({ sessionId: "test-session", prompt: [{ type: "text", text: "test" }] });
1674+
1675+
const usageUpdate = updates.find((u: any) => u.update?.sessionUpdate === "usage_update");
1676+
expect(usageUpdate).toBeDefined();
1677+
// Should match via prefix: "claude-opus-4-6-20250514".startsWith("claude-opus-4-6")
1678+
expect(usageUpdate.update.size).toBe(1000000);
1679+
});
1680+
1681+
it("prefix-matches when modelUsage key has date suffix but assistant model does not", async () => {
1682+
const { agent, updates } = createMockAgentWithCapture();
1683+
injectSession(agent, [
1684+
createAssistantMessage({ model: "claude-opus-4-6" }),
1685+
createResultMessageWithModel({
1686+
modelUsage: {
1687+
"claude-opus-4-6-20250514": {
1688+
inputTokens: 100, outputTokens: 50, cacheReadInputTokens: 20,
1689+
cacheCreationInputTokens: 10, webSearchRequests: 0, costUSD: 0.01,
1690+
contextWindow: 1000000, maxOutputTokens: 16384,
1691+
},
1692+
},
1693+
}),
1694+
]);
1695+
1696+
await agent.prompt({ sessionId: "test-session", prompt: [{ type: "text", text: "test" }] });
1697+
1698+
const usageUpdate = updates.find((u: any) => u.update?.sessionUpdate === "usage_update");
1699+
expect(usageUpdate).toBeDefined();
1700+
expect(usageUpdate.update.size).toBe(1000000);
1701+
});
1702+
});

0 commit comments

Comments
 (0)