dioptx · wan-huiyan · May 29, 2026 · May 29, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,26 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [Unreleased]
+
+### Added
+
+- **Token breakdown: decompose the "in" headline into fresh vs cached.** The headline
+  "NN in" is dominated by cheap `cache_read` tokens; a new `Input  X new · Y cached`
+  line separates freshly-billed input (`input + cache_creation`) from cache reads, so the
+  cost line is interpretable (a big "in" at 97% cache is mostly $1.50/M reads, not $15/M
+  fresh input). Applied to the single-session and live views.
+
+### Fixed
+
+- **Token dedup: fall back to `message.id` when `requestId` is absent.** Streaming
+  assistant chunks share a `requestId` and `deduplicateAssistant` collapses them so
+  their (identical) usage is counted once — without it, tokens inflate ~2-3×. But
+  assistant rows that *omit* `requestId` (older Claude Code versions / partial logs)
+  bypassed the grouping entirely and re-introduced that inflation. They still share
+  `message.id`, so dedup now keys on `requestId ?? message.id`. Rows with neither key
+  pass through unchanged.
+
 ## [1.0.0] - 2026-02-18
 
 ### Added

diff --git a/src/formatter.test.ts b/src/formatter.test.ts
@@ -413,3 +413,19 @@ describe('formatter: insights', () => {
     expect(output).toContain('Warmup overhead');
   });
 });
+
+describe('formatSession: input decomposed into fresh vs cached', () => {
+  it('shows freshly-billed input (input + cache writes) and cache reads separately', () => {
+    // input 5K + cacheCreation 10K = 15K fresh; cacheRead 50K cached.
+    const output = strip(formatSession(makeAnalysis()));
+    expect(output).toContain('Input');
+    expect(output).toContain('15.0K new');
+    expect(output).toContain('50.0K cached');
+  });
+
+  it('the headline "in" remains the full context total (fresh + cached)', () => {
+    // input+cacheRead+cacheCreation = 65K — the line a user sees as scale.
+    const output = strip(formatSession(makeAnalysis()));
+    expect(output).toContain('65.0K in');
+  });
+});
diff --git a/src/formatter.ts b/src/formatter.ts
@@ -162,6 +162,12 @@ export function formatSession(analysis: SessionAnalysis): string {
     const inBar = Math.round(inFrac * BAR_WIDTH);
     const outBar = BAR_WIDTH - inBar;
     lines.push(` Tokens  ${chalk.cyan('\u2588'.repeat(inBar))}${chalk.green('\u2588'.repeat(outBar))}  ${chalk.cyan(formatTokens(tokens.input + tokens.cacheRead + tokens.cacheCreation) + ' in')} ${chalk.green(formatTokens(tokens.output) + ' out')}`);
+    // Decompose "in": the headline is dominated by cheap cache reads. Show the
+    // freshly-billed input (input + cache writes) separately so the cost line
+    // is interpretable (a huge "in" at 97% cache is mostly $1.50/M reads, not
+    // $15/M fresh input).
+    const freshIn = tokens.input + tokens.cacheCreation;
+    lines.push(` Input   ${chalk.gray(formatTokens(freshIn) + ' new')} ${chalk.gray('\u00b7')} ${chalk.gray(formatTokens(tokens.cacheRead) + ' cached')}`);
   }
   const cachePct = Math.round(analysis.cacheHitRate * 100);
   lines.push(` Cache   ${renderCacheBar(analysis.cacheHitRate)}  ${cachePct}% hit`);
@@ -263,6 +269,8 @@ export function formatSessionLive(analysis: SessionAnalysis): string {
     const inBar = Math.round(inFrac * BAR_WIDTH);
     const outBar = BAR_WIDTH - inBar;
     lines.push(` Tokens  ${chalk.cyan('\u2588'.repeat(inBar))}${chalk.green('\u2588'.repeat(outBar))}  ${chalk.cyan(formatTokens(liveTok.input + liveTok.cacheRead + liveTok.cacheCreation) + ' in')} ${chalk.green(formatTokens(liveTok.output) + ' out')}`);
+    const liveFreshIn = liveTok.input + liveTok.cacheCreation;
+    lines.push(` Input   ${chalk.gray(formatTokens(liveFreshIn) + ' new')} ${chalk.gray('\u00b7')} ${chalk.gray(formatTokens(liveTok.cacheRead) + ' cached')}`);
   }
   const liveCachePct = Math.round(analysis.cacheHitRate * 100);
   lines.push(` Cache   ${renderCacheBar(analysis.cacheHitRate)}  ${liveCachePct}% hit`);

diff --git a/src/parser.test.ts b/src/parser.test.ts
@@ -58,6 +58,36 @@ describe('parser: token deduplication', () => {
     expect(assistants).toHaveLength(2);
   });
 
+  it('should merge chunks that lack requestId by falling back to message.id', async () => {
+    // Some transcripts omit requestId on assistant rows; the streaming chunks
+    // still share message.id. Without the fallback these would NOT be merged and
+    // their (identical) usage would be summed downstream — the ~2-3x inflation.
+    setup();
+    const path = writeJsonl('dedup-no-reqid.jsonl', [
+      { type: 'user', timestamp: '2026-01-01T00:00:00Z', uuid: 'u1', message: { role: 'user', content: 'hello' } },
+      { type: 'assistant', timestamp: '2026-01-01T00:00:01Z', uuid: 'a1', message: { id: 'msg_1', role: 'assistant', model: 'claude-opus-4-6', usage: { input_tokens: 100, output_tokens: 10, cache_read_input_tokens: 500, cache_creation_input_tokens: 0 }, content: [{ type: 'text', text: 'chunk1' }] } },
+      { type: 'assistant', timestamp: '2026-01-01T00:00:02Z', uuid: 'a2', message: { id: 'msg_1', role: 'assistant', model: 'claude-opus-4-6', usage: { input_tokens: 100, output_tokens: 50, cache_read_input_tokens: 500, cache_creation_input_tokens: 0 }, content: [{ type: 'text', text: 'chunk2' }] } },
+      { type: 'assistant', timestamp: '2026-01-01T00:00:03Z', uuid: 'a3', message: { id: 'msg_1', role: 'assistant', model: 'claude-opus-4-6', usage: { input_tokens: 100, output_tokens: 80, cache_read_input_tokens: 500, cache_creation_input_tokens: 0 }, content: [{ type: 'tool_use', id: 'tu_1', name: 'Read' }] } },
+    ]);
+
+    const messages = await parseSession(path);
+    const assistants = messages.filter(m => m.type === 'assistant');
+    expect(assistants).toHaveLength(1);
+    expect(assistants[0].message?.usage?.output_tokens).toBe(80);
+  });
+
+  it('should NOT merge no-requestId assistants with different message.ids', async () => {
+    setup();
+    const path = writeJsonl('no-dedup-msgid.jsonl', [
+      { type: 'assistant', timestamp: '2026-01-01T00:00:01Z', uuid: 'a1', message: { id: 'msg_1', role: 'assistant', model: 'claude-opus-4-6', usage: { input_tokens: 100, output_tokens: 50, cache_read_input_tokens: 0, cache_creation_input_tokens: 0 }, content: [{ type: 'text', text: 'response 1' }] } },
+      { type: 'assistant', timestamp: '2026-01-01T00:00:03Z', uuid: 'a2', message: { id: 'msg_2', role: 'assistant', model: 'claude-opus-4-6', usage: { input_tokens: 200, output_tokens: 60, cache_read_input_tokens: 0, cache_creation_input_tokens: 0 }, content: [{ type: 'text', text: 'response 2' }] } },
+    ]);
+
+    const messages = await parseSession(path);
+    const assistants = messages.filter(m => m.type === 'assistant');
+    expect(assistants).toHaveLength(2);
+  });
+
   it('should handle empty file', async () => {
     setup();
     const path = writeJsonl('empty.jsonl', []);

diff --git a/src/parser.ts b/src/parser.ts
@@ -65,45 +65,52 @@ export async function parseSessionFrom(
 /**
  * Group assistant messages by requestId, merge content arrays, keep single usage.
  * Streaming chunks share a requestId but each reports the same usage — counting
- * all of them inflates tokens ~3x.
+ * all of them inflates tokens ~2-3x.
+ *
+ * Fallback: when a transcript omits `requestId` on assistant rows (older Claude
+ * Code versions, or partial logs), the streaming chunks still share `message.id`,
+ * so we group by `requestId ?? message.id`. Without this, those un-keyed rows pass
+ * straight through and re-introduce the very inflation this function exists to
+ * prevent. Rows with neither key still pass through unchanged.
  */
 function deduplicateAssistant(messages: SessionMessage[]): SessionMessage[] {
   const result: SessionMessage[] = [];
   const requestMap = new Map<string, SessionMessage>();
   const requestOrder: string[] = [];
 
+  const flush = () => {
+    for (const key of requestOrder) {
+      result.push(requestMap.get(key)!);
+    }
+    requestMap.clear();
+    requestOrder.length = 0;
+  };
+
   for (const msg of messages) {
-    if (msg.type !== 'assistant' || !msg.requestId) {
-      // Flush any pending request groups when we hit a non-assistant message
-      for (const reqId of requestOrder) {
-        result.push(requestMap.get(reqId)!);
-      }
-      requestMap.clear();
-      requestOrder.length = 0;
+    const key = msg.type === 'assistant' ? (msg.requestId ?? msg.message?.id) : undefined;
+    if (!key) {
+      // Non-assistant message, or an assistant row with no usable group key:
+      // flush any pending group (preserve ordering) and pass through as-is.
+      flush();
       result.push(msg);
       continue;
     }
 
-    const reqId = msg.requestId;
-    if (requestMap.has(reqId)) {
-      mergeAssistantChunk(requestMap.get(reqId)!, msg);
+    if (requestMap.has(key)) {
+      mergeAssistantChunk(requestMap.get(key)!, msg);
     } else {
       const clone: SessionMessage = {
         ...msg,
         message: msg.message
           ? { ...msg.message, content: msg.message.content ? copyContent(msg.message.content) : undefined }
           : undefined,
       };
-      requestMap.set(reqId, clone);
-      requestOrder.push(reqId);
+      requestMap.set(key, clone);
+      requestOrder.push(key);
     }
   }
 
-  // Flush remaining
-  for (const reqId of requestOrder) {
-    result.push(requestMap.get(reqId)!);
-  }
-
+  flush();
   return result;
 }
 

diff --git a/src/types.ts b/src/types.ts
@@ -86,6 +86,7 @@ export interface SessionMessage {
   parentUuid?: string | null;
   isSidechain?: boolean;
   message?: {
+    id?: string;
     role?: string;
     model?: string;
     usage?: TokenUsage;