slkiser · slkiser · May 8, 2026 · May 6, 2026 · May 6, 2026 · May 8, 2026
diff --git a/README.md b/README.md
@@ -344,7 +344,7 @@ Existing `experimental.quotaToast` settings still work when no sidecar file exis
 | `formatStyle` | `singleWindow` | Shared quota reset-period display for popup toasts and the Sidebar panel: `singleWindow` shows one reset period per provider; `allWindows` shows all reset periods per provider. Legacy `classic`/`grouped` aliases are still accepted. |
 | `percentDisplayMode` | `remaining` | Shared quota percentage meaning for popup toasts and the Sidebar panel: `remaining` shows quota left; `used` shows quota consumed. `/quota` keeps its existing remaining-percent output. |
 | `onlyCurrentModel` | `false` | Filter quota rows to the current model/provider when that session selection can be resolved. |
-| `showSessionTokens` | `true` | Show the `Session input/output tokens` section when session token data is available. |
+| `showSessionTokens` | `true` | Show the `Session input/output tokens` section when session token data is available. When cached input is present, the section keeps the legacy `in/out` layout and appends cached input in parentheses next to the input amount. |
 | `pricingSnapshot.source` | `"auto"` | Token pricing snapshot selection for `/tokens_*`: `auto`, `bundled`, or `runtime`. |
 | `pricingSnapshot.autoRefresh` | `7` | Refresh stale local pricing data after this many days. |
 

diff --git a/src/lib/entries.ts b/src/lib/entries.ts
@@ -69,13 +69,17 @@ export interface QuotaToastError {
 export interface SessionTokenModel {
   modelID: string;
   input: number;
+  cachedInput?: number;
+  totalInput?: number;
   output: number;
 }
 
 /** Session tokens data for toast display. */
 export interface SessionTokensData {
   models: SessionTokenModel[];
   totalInput: number;
+  totalCachedInput?: number;
+  totalCombinedInput?: number;
   totalOutput: number;
 }
 

diff --git a/src/lib/quota-stats.ts b/src/lib/quota-stats.ts
@@ -821,13 +821,17 @@ export async function aggregateUsage(params: {
 export type SessionTokenRow = {
   modelID: string;
   input: number;
+  cachedInput: number;
+  totalInput: number;
   output: number;
 };
 
 export type SessionTokenSummary = {
   sessionID: string;
   models: SessionTokenRow[];
   totalInput: number;
+  totalCachedInput: number;
+  totalCombinedInput: number;
   totalOutput: number;
 };
 
@@ -839,43 +843,59 @@ export async function getSessionTokenSummary(
 
   if (sessionMessages.length === 0) return null;
 
-  const byModel = new Map<string, { input: number; output: number }>();
+  const byModel = new Map<string, { input: number; cachedInput: number; totalInput: number; output: number }>();
   let totalInput = 0;
+  let totalCachedInput = 0;
+  let totalCombinedInput = 0;
   let totalOutput = 0;
 
   for (const msg of sessionMessages) {
     const tokens = msg.tokens;
     if (!tokens) continue;
 
     const input = typeof tokens.input === "number" ? tokens.input : 0;
+    const cachedInput = typeof tokens.cache?.read === "number" ? tokens.cache.read : 0;
+    const totalInputForMessage = input + cachedInput;
     const output = typeof tokens.output === "number" ? tokens.output : 0;
 
     // Skip if both are 0
-    if (input === 0 && output === 0) continue;
+    if (totalInputForMessage === 0 && output === 0) continue;
 
     totalInput += input;
+    totalCachedInput += cachedInput;
+    totalCombinedInput += totalInputForMessage;
     totalOutput += output;
 
     const modelID = msg.modelID ?? "unknown";
     const existing = byModel.get(modelID);
     if (existing) {
       existing.input += input;
+      existing.cachedInput += cachedInput;
+      existing.totalInput += totalInputForMessage;
       existing.output += output;
     } else {
-      byModel.set(modelID, { input, output });
+      byModel.set(modelID, { input, cachedInput, totalInput: totalInputForMessage, output });
     }
   }
 
   // Sort by total tokens descending
   const models = Array.from(byModel.entries())
-    .map(([modelID, t]) => ({ modelID, input: t.input, output: t.output }))
-    .filter((m) => m.input > 0 || m.output > 0)
-    .sort((a, b) => b.input + b.output - (a.input + a.output));
+    .map(([modelID, t]) => ({
+      modelID,
+      input: t.input,
+      cachedInput: t.cachedInput,
+      totalInput: t.totalInput,
+      output: t.output,
+    }))
+    .filter((m) => m.totalInput > 0 || m.output > 0)
+    .sort((a, b) => b.totalInput + b.output - (a.totalInput + a.output));
 
   return {
     sessionID,
     models,
     totalInput,
+    totalCachedInput,
+    totalCombinedInput,
     totalOutput,
   };
 }
diff --git a/src/lib/session-tokens-format.ts b/src/lib/session-tokens-format.ts
@@ -25,13 +25,26 @@ function clampRenderedLine(line: string, maxWidth?: number): string {
   return width === undefined ? line : line.slice(0, width);
 }
 
-function buildWideSessionTokenSectionModel(sessionTokens: SessionTokensData): SessionTokenSectionModel {
+function formatInputWithCache(input: number, cachedInput?: number): string {
+  const inputStr = formatTokenCount(input);
+  const cached = cachedInput ?? 0;
+  return cached > 0 ? `${inputStr} (${formatTokenCount(cached)})` : inputStr;
+}
+
+function formatInputCell(input: number, cachedInput?: number): string {
+  const value = formatInputWithCache(input, cachedInput);
+  return value.length > 6 ? value : padLeft(value, 6);
+}
+
+function buildWideSessionTokenSectionModel(
+  sessionTokens: SessionTokensData,
+): SessionTokenSectionModel {
   const lines: string[] = [];
   for (const model of sessionTokens.models) {
     const shortName = shortenModelName(model.modelID, 20);
-    const inStr = formatTokenCount(model.input);
+    const inStr = formatInputCell(model.input, model.cachedInput);
     const outStr = formatTokenCount(model.output);
-    lines.push(`  ${padRight(shortName, 20)}  ${padLeft(inStr, 6)} in  ${padLeft(outStr, 6)} out`);
+    lines.push(`  ${padRight(shortName, 20)}  ${inStr} in  ${padLeft(outStr, 6)} out`);
   }
 
   return {
@@ -51,7 +64,7 @@ function buildCompactSessionTokenSectionModel(
     const modelIndent = width > 2 ? "  " : "";
     const modelLineWidth = Math.max(1, width - modelIndent.length);
     const detailIndent = width > 4 ? "    " : width > 2 ? "  " : "";
-    const inStr = formatTokenCount(model.input);
+    const inStr = formatInputWithCache(model.input, model.cachedInput);
     const outStr = formatTokenCount(model.output);
     const compactCounts = `${inStr} in  ${outStr} out`;
 
@@ -76,7 +89,8 @@ function buildSidebarSessionTokenSummaryModel(
   sessionTokens: SessionTokensData,
   options?: { maxWidth?: number },
 ): SessionTokenSectionModel {
-  const summaryLine = `  ${formatTokenCount(sessionTokens.totalInput)} in  ${formatTokenCount(sessionTokens.totalOutput)} out`;
+  const totalCached = sessionTokens.totalCachedInput ?? 0;
+  const summaryLine = `  ${formatInputWithCache(sessionTokens.totalInput, totalCached)} in  ${formatTokenCount(sessionTokens.totalOutput)} out`;
   return {
     heading: clampRenderedLine(SESSION_TOKEN_SECTION_HEADING, options?.maxWidth),
     lines: [clampRenderedLine(summaryLine, options?.maxWidth)],
@@ -98,7 +112,12 @@ export function buildSessionTokenSectionModel(
     return buildCompactSessionTokenSectionModel(sessionTokens, maxWidth);
   }
 
-  return buildWideSessionTokenSectionModel(sessionTokens);
+  const wideSection = buildWideSessionTokenSectionModel(sessionTokens);
+  if (maxWidth !== undefined && wideSection.lines.some((line) => line.length > maxWidth)) {
+    return buildCompactSessionTokenSectionModel(sessionTokens, maxWidth);
+  }
+
+  return wideSection;
 }
 
 /**

diff --git a/src/lib/session-tokens.ts b/src/lib/session-tokens.ts
@@ -35,6 +35,8 @@ export async function fetchSessionTokensForDisplay(params: {
         sessionTokens: {
           models: summary.models,
           totalInput: summary.totalInput,
+          totalCachedInput: summary.totalCachedInput,
+          totalCombinedInput: summary.totalCombinedInput,
           totalOutput: summary.totalOutput,
         },
       };

diff --git a/src/lib/tui-compact-format.ts b/src/lib/tui-compact-format.ts
@@ -168,11 +168,17 @@ function formatCompactSessionTokensSegment(data: QuotaRenderData): string | null
   const hasTokenData =
     sessionTokens.models.length > 0 ||
     sessionTokens.totalInput > 0 ||
+    (sessionTokens.totalCachedInput ?? 0) > 0 ||
     sessionTokens.totalOutput > 0;
   if (!hasTokenData) return null;
 
+  const totalCached = sessionTokens.totalCachedInput ?? 0;
+  const inputSegment = totalCached > 0
+    ? `${formatCompactTokenCount(sessionTokens.totalInput)} (${formatCompactTokenCount(totalCached)})`
+    : formatCompactTokenCount(sessionTokens.totalInput);
+
   return compactText(
-    `tok ${formatCompactTokenCount(sessionTokens.totalInput)} in / ${formatCompactTokenCount(
+    `tok ${inputSegment} in / ${formatCompactTokenCount(
       sessionTokens.totalOutput,
     )} out`,
   );

diff --git a/tests/format.test.ts b/tests/format.test.ts
@@ -469,6 +469,35 @@ describe("formatQuotaRows", () => {
     expect(out).not.toContain("openai/gpt-5.4-mini");
   });
 
+  it("renders single-window session tokens with new and cached input totals when available", () => {
+    const out = formatQuotaRows({
+      version: "1.0.0",
+      style: "singleWindow",
+      layout: { maxWidth: 80, narrowAt: 32, tinyAt: 20 },
+      entries: [],
+      sessionTokens: {
+        totalInput: 372,
+        totalCachedInput: 120,
+        totalCombinedInput: 492,
+        totalOutput: 41,
+        models: [
+          {
+            modelID: "openai/gpt-5.4-mini",
+            input: 372,
+            cachedInput: 120,
+            totalInput: 492,
+            output: 41,
+          },
+        ],
+      },
+    });
+
+    expect(out.split("\n")).toEqual([
+      SESSION_TOKEN_SECTION_HEADING,
+      "  372 (120) in  41 out",
+    ]);
+  });
+
   it("renders all-window session tokens with detailed per-model rows", () => {
     const out = formatQuotaRows({
       version: "1.0.0",
@@ -489,6 +518,35 @@ describe("formatQuotaRows", () => {
     ]);
   });
 
+  it("renders all-window session tokens with separate new and cached input when available", () => {
+    const out = formatQuotaRows({
+      version: "1.0.0",
+      style: "allWindows",
+      layout: { maxWidth: 80, narrowAt: 32, tinyAt: 20 },
+      entries: [],
+      sessionTokens: {
+        totalInput: 372,
+        totalCachedInput: 120,
+        totalCombinedInput: 492,
+        totalOutput: 41,
+        models: [
+          {
+            modelID: "openai/gpt-5.4-mini",
+            input: 372,
+            cachedInput: 120,
+            totalInput: 492,
+            output: 41,
+          },
+        ],
+      },
+    });
+
+    expect(out.split("\n")).toEqual([
+      SESSION_TOKEN_SECTION_HEADING,
+      "  openai/gpt-5.4-mini   372 (120) in      41 out",
+    ]);
+  });
+
   it("keeps legacy style aliases working for direct formatter calls", () => {
     const aliasOutput = formatQuotaRows({
       version: "1.0.0",

diff --git a/tests/quota-command-format.test.ts b/tests/quota-command-format.test.ts
@@ -52,10 +52,12 @@ describe("formatQuotaCommand", () => {
       errors: [{ label: "Z.ai", message: "Authentication expired" }],
       sessionTokens: {
         models: [
-          { modelID: "openai/gpt-5", input: 1234, output: 567 },
+          { modelID: "openai/gpt-5", input: 1234, cachedInput: 456, totalInput: 1690, output: 567 },
           { modelID: "github-copilot/claude-sonnet-4.5", input: 987, output: 654 },
         ],
         totalInput: 2221,
+        totalCachedInput: 456,
+        totalCombinedInput: 2677,
         totalOutput: 1221,
       },
     });
@@ -78,7 +80,7 @@ describe("formatQuotaCommand", () => {
         Claude:          ████████████░░░░░░  67% left (resets in 3h)
 
       Session input/output tokens
-        openai/gpt-5            1.2K in     567 out
+        openai/gpt-5          1.2K (456) in     567 out
         github-copilot/clau…     987 in     654 out
 
       Z.ai: Authentication expired"

diff --git a/tests/quota-stats.test.ts b/tests/quota-stats.test.ts
@@ -24,9 +24,13 @@ vi.mock("../src/lib/opencode-storage.js", () => {
 });
 
 vi.mock("../src/lib/modelsdev-pricing.js", () => ({
-  hasCost: vi.fn((providerID: string, modelID: string) => providerID === "openai" && modelID === "gpt-5"),
+  hasCost: vi.fn(
+    (providerID: string, modelID: string) => providerID === "openai" && modelID === "gpt-5",
+  ),
   hasProvider: vi.fn((providerID: string) => providerID === "openai"),
-  hasModel: vi.fn((providerID: string, modelID: string) => providerID === "openai" && modelID === "gpt-5"),
+  hasModel: vi.fn(
+    (providerID: string, modelID: string) => providerID === "openai" && modelID === "gpt-5",
+  ),
   isModelsDevProviderId: vi.fn((providerID: string) => providerID === "openai"),
   listProvidersForModelId: vi.fn((modelID: string) => (modelID === "gpt-5" ? ["openai"] : [])),
   lookupCost: vi.fn((providerID: string, modelID: string) =>
@@ -45,12 +49,25 @@ vi.mock("../src/lib/cursor-pricing.js", () => ({
 
 vi.mock("../src/lib/token-cost.js", () => ({
   calculateUsdFromTokenBuckets: vi.fn(
-    (_rates: unknown, tokens: { input: number; output: number; reasoning: number; cache_read: number; cache_write: number }) =>
-      tokens.input + tokens.output + tokens.reasoning + tokens.cache_read + tokens.cache_write,
+    (
+      _rates: unknown,
+      tokens: {
+        input: number;
+        output: number;
+        reasoning: number;
+        cache_read: number;
+        cache_write: number;
+      },
+    ) => tokens.input + tokens.output + tokens.reasoning + tokens.cache_read + tokens.cache_write,
   ),
 }));
 
-import { aggregateUsage, resolveSessionTree, SessionNotFoundError } from "../src/lib/quota-stats.js";
+import {
+  aggregateUsage,
+  getSessionTokenSummary,
+  resolveSessionTree,
+  SessionNotFoundError,
+} from "../src/lib/quota-stats.js";
 
 describe("quota stats session tree", () => {
   beforeEach(() => {
@@ -130,6 +147,51 @@ describe("quota stats session tree", () => {
   });
 });
 
+describe("session token summary", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("returns new and cached input token totals separately", async () => {
+    const storage = await import("../src/lib/opencode-storage.js");
+    (storage.iterAssistantMessagesForSession as any).mockResolvedValue([
+      {
+        sessionID: "ses_root",
+        role: "assistant",
+        providerID: "openai",
+        modelID: "gpt-5",
+        tokens: { input: 10, output: 5, reasoning: 0, cache: { read: 4, write: 0 } },
+      },
+      {
+        sessionID: "ses_root",
+        role: "assistant",
+        providerID: "openai",
+        modelID: "gpt-5",
+        tokens: { input: 2, output: 3, reasoning: 0, cache: { read: 6, write: 0 } },
+      },
+    ]);
+
+    const result = await getSessionTokenSummary("ses_root");
+
+    expect(result).toEqual({
+      sessionID: "ses_root",
+      totalInput: 12,
+      totalCachedInput: 10,
+      totalCombinedInput: 22,
+      totalOutput: 8,
+      models: [
+        {
+          modelID: "gpt-5",
+          input: 12,
+          cachedInput: 10,
+          totalInput: 22,
+          output: 8,
+        },
+      ],
+    });
+  });
+});
+
 describe("aggregateUsage session scoping", () => {
   beforeEach(async () => {
     vi.clearAllMocks();