Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,26 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Added

- **Token breakdown: decompose the "in" headline into fresh vs cached.** The headline
"NN in" is dominated by cheap `cache_read` tokens; a new `Input X new · Y cached`
line separates freshly-billed input (`input + cache_creation`) from cache reads, so the
cost line is interpretable (a big "in" at 97% cache is mostly $1.50/M reads, not $15/M
fresh input). Applied to the single-session and live views.

### Fixed

- **Token dedup: fall back to `message.id` when `requestId` is absent.** Streaming
assistant chunks share a `requestId` and `deduplicateAssistant` collapses them so
their (identical) usage is counted once — without it, tokens inflate ~2-3×. But
assistant rows that *omit* `requestId` (older Claude Code versions / partial logs)
bypassed the grouping entirely and re-introduced that inflation. They still share
`message.id`, so dedup now keys on `requestId ?? message.id`. Rows with neither key
pass through unchanged.

## [1.0.0] - 2026-02-18

### Added
Expand Down
16 changes: 16 additions & 0 deletions src/formatter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -413,3 +413,19 @@ describe('formatter: insights', () => {
expect(output).toContain('Warmup overhead');
});
});

describe('formatSession: input decomposed into fresh vs cached', () => {
it('shows freshly-billed input (input + cache writes) and cache reads separately', () => {
// input 5K + cacheCreation 10K = 15K fresh; cacheRead 50K cached.
const output = strip(formatSession(makeAnalysis()));
expect(output).toContain('Input');
expect(output).toContain('15.0K new');
expect(output).toContain('50.0K cached');
});

it('the headline "in" remains the full context total (fresh + cached)', () => {
// input+cacheRead+cacheCreation = 65K — the line a user sees as scale.
const output = strip(formatSession(makeAnalysis()));
expect(output).toContain('65.0K in');
});
});
8 changes: 8 additions & 0 deletions src/formatter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,12 @@ export function formatSession(analysis: SessionAnalysis): string {
const inBar = Math.round(inFrac * BAR_WIDTH);
const outBar = BAR_WIDTH - inBar;
lines.push(` Tokens ${chalk.cyan('\u2588'.repeat(inBar))}${chalk.green('\u2588'.repeat(outBar))} ${chalk.cyan(formatTokens(tokens.input + tokens.cacheRead + tokens.cacheCreation) + ' in')} ${chalk.green(formatTokens(tokens.output) + ' out')}`);
// Decompose "in": the headline is dominated by cheap cache reads. Show the
// freshly-billed input (input + cache writes) separately so the cost line
// is interpretable (a huge "in" at 97% cache is mostly $1.50/M reads, not
// $15/M fresh input).
const freshIn = tokens.input + tokens.cacheCreation;
lines.push(` Input ${chalk.gray(formatTokens(freshIn) + ' new')} ${chalk.gray('\u00b7')} ${chalk.gray(formatTokens(tokens.cacheRead) + ' cached')}`);
}
const cachePct = Math.round(analysis.cacheHitRate * 100);
lines.push(` Cache ${renderCacheBar(analysis.cacheHitRate)} ${cachePct}% hit`);
Expand Down Expand Up @@ -263,6 +269,8 @@ export function formatSessionLive(analysis: SessionAnalysis): string {
const inBar = Math.round(inFrac * BAR_WIDTH);
const outBar = BAR_WIDTH - inBar;
lines.push(` Tokens ${chalk.cyan('\u2588'.repeat(inBar))}${chalk.green('\u2588'.repeat(outBar))} ${chalk.cyan(formatTokens(liveTok.input + liveTok.cacheRead + liveTok.cacheCreation) + ' in')} ${chalk.green(formatTokens(liveTok.output) + ' out')}`);
const liveFreshIn = liveTok.input + liveTok.cacheCreation;
lines.push(` Input ${chalk.gray(formatTokens(liveFreshIn) + ' new')} ${chalk.gray('\u00b7')} ${chalk.gray(formatTokens(liveTok.cacheRead) + ' cached')}`);
}
const liveCachePct = Math.round(analysis.cacheHitRate * 100);
lines.push(` Cache ${renderCacheBar(analysis.cacheHitRate)} ${liveCachePct}% hit`);
Expand Down
30 changes: 30 additions & 0 deletions src/parser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,36 @@ describe('parser: token deduplication', () => {
expect(assistants).toHaveLength(2);
});

it('should merge chunks that lack requestId by falling back to message.id', async () => {
// Some transcripts omit requestId on assistant rows; the streaming chunks
// still share message.id. Without the fallback these would NOT be merged and
// their (identical) usage would be summed downstream — the ~2-3x inflation.
setup();
const path = writeJsonl('dedup-no-reqid.jsonl', [
{ type: 'user', timestamp: '2026-01-01T00:00:00Z', uuid: 'u1', message: { role: 'user', content: 'hello' } },
{ type: 'assistant', timestamp: '2026-01-01T00:00:01Z', uuid: 'a1', message: { id: 'msg_1', role: 'assistant', model: 'claude-opus-4-6', usage: { input_tokens: 100, output_tokens: 10, cache_read_input_tokens: 500, cache_creation_input_tokens: 0 }, content: [{ type: 'text', text: 'chunk1' }] } },
{ type: 'assistant', timestamp: '2026-01-01T00:00:02Z', uuid: 'a2', message: { id: 'msg_1', role: 'assistant', model: 'claude-opus-4-6', usage: { input_tokens: 100, output_tokens: 50, cache_read_input_tokens: 500, cache_creation_input_tokens: 0 }, content: [{ type: 'text', text: 'chunk2' }] } },
{ type: 'assistant', timestamp: '2026-01-01T00:00:03Z', uuid: 'a3', message: { id: 'msg_1', role: 'assistant', model: 'claude-opus-4-6', usage: { input_tokens: 100, output_tokens: 80, cache_read_input_tokens: 500, cache_creation_input_tokens: 0 }, content: [{ type: 'tool_use', id: 'tu_1', name: 'Read' }] } },
]);

const messages = await parseSession(path);
const assistants = messages.filter(m => m.type === 'assistant');
expect(assistants).toHaveLength(1);
expect(assistants[0].message?.usage?.output_tokens).toBe(80);
});

it('should NOT merge no-requestId assistants with different message.ids', async () => {
setup();
const path = writeJsonl('no-dedup-msgid.jsonl', [
{ type: 'assistant', timestamp: '2026-01-01T00:00:01Z', uuid: 'a1', message: { id: 'msg_1', role: 'assistant', model: 'claude-opus-4-6', usage: { input_tokens: 100, output_tokens: 50, cache_read_input_tokens: 0, cache_creation_input_tokens: 0 }, content: [{ type: 'text', text: 'response 1' }] } },
{ type: 'assistant', timestamp: '2026-01-01T00:00:03Z', uuid: 'a2', message: { id: 'msg_2', role: 'assistant', model: 'claude-opus-4-6', usage: { input_tokens: 200, output_tokens: 60, cache_read_input_tokens: 0, cache_creation_input_tokens: 0 }, content: [{ type: 'text', text: 'response 2' }] } },
]);

const messages = await parseSession(path);
const assistants = messages.filter(m => m.type === 'assistant');
expect(assistants).toHaveLength(2);
});

it('should handle empty file', async () => {
setup();
const path = writeJsonl('empty.jsonl', []);
Expand Down
43 changes: 25 additions & 18 deletions src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,45 +65,52 @@ export async function parseSessionFrom(
/**
* Group assistant messages by requestId, merge content arrays, keep single usage.
* Streaming chunks share a requestId but each reports the same usage — counting
* all of them inflates tokens ~3x.
* all of them inflates tokens ~2-3x.
*
* Fallback: when a transcript omits `requestId` on assistant rows (older Claude
* Code versions, or partial logs), the streaming chunks still share `message.id`,
* so we group by `requestId ?? message.id`. Without this, those un-keyed rows pass
* straight through and re-introduce the very inflation this function exists to
* prevent. Rows with neither key still pass through unchanged.
*/
function deduplicateAssistant(messages: SessionMessage[]): SessionMessage[] {
const result: SessionMessage[] = [];
const requestMap = new Map<string, SessionMessage>();
const requestOrder: string[] = [];

const flush = () => {
for (const key of requestOrder) {
result.push(requestMap.get(key)!);
}
requestMap.clear();
requestOrder.length = 0;
};

for (const msg of messages) {
if (msg.type !== 'assistant' || !msg.requestId) {
// Flush any pending request groups when we hit a non-assistant message
for (const reqId of requestOrder) {
result.push(requestMap.get(reqId)!);
}
requestMap.clear();
requestOrder.length = 0;
const key = msg.type === 'assistant' ? (msg.requestId ?? msg.message?.id) : undefined;
if (!key) {
// Non-assistant message, or an assistant row with no usable group key:
// flush any pending group (preserve ordering) and pass through as-is.
flush();
result.push(msg);
continue;
}

const reqId = msg.requestId;
if (requestMap.has(reqId)) {
mergeAssistantChunk(requestMap.get(reqId)!, msg);
if (requestMap.has(key)) {
mergeAssistantChunk(requestMap.get(key)!, msg);
} else {
const clone: SessionMessage = {
...msg,
message: msg.message
? { ...msg.message, content: msg.message.content ? copyContent(msg.message.content) : undefined }
: undefined,
};
requestMap.set(reqId, clone);
requestOrder.push(reqId);
requestMap.set(key, clone);
requestOrder.push(key);
}
}

// Flush remaining
for (const reqId of requestOrder) {
result.push(requestMap.get(reqId)!);
}

flush();
return result;
}

Expand Down
1 change: 1 addition & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ export interface SessionMessage {
parentUuid?: string | null;
isSidechain?: boolean;
message?: {
id?: string;
role?: string;
model?: string;
usage?: TokenUsage;
Expand Down