diff --git a/README.md b/README.md index d669af2..de282b0 100644 --- a/README.md +++ b/README.md @@ -162,6 +162,9 @@ vibecosting --top 1 --json | jq -r '.rows[0].costUsd' | **$** | USD spend at Anthropic's published rates (or your currency) | | **bar / %** | share of total | | **out** | output tokens (the expensive ones) | +| **fresh input** | uncached input tokens from Anthropic's response | +| **cache read** | cached input tokens reused by Claude Code | +| **cache write** | input tokens written into prompt cache (5m and 1h priced separately) | | **ev** | "events" — assistant turns + user turns | | **sess** | distinct sessions in this bucket | | **cache N%** | cache_read / (cache_read + cache_create + tokens_in) | @@ -181,6 +184,7 @@ A high cache hit rate (≥70%, green) means most context is being reused — pay 2. **Pricing is hardcoded** at ship time. If Anthropic changes rates, output drifts until next release. 3. **Currency rates are approximate** — set `CLAUDE_COST_RATE` for live FX. 4. **Model name matching is fuzzy** — `claude-3-5-sonnet` and `claude-sonnet-4-7` both get sonnet rates. Off by 10–30% on legacy sessions. +5. **Claude Code transcript shape can change.** vibecosting de-duplicates repeated assistant snapshots by request id and recursively includes nested subagent transcripts, but the source of truth for actual charges remains Anthropic's billing page. Run `vibecosting --show-pricing` to see the exact rate table being used. diff --git a/src/cli.ts b/src/cli.ts index 66c9ab3..7cb3d7f 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -225,6 +225,7 @@ function fmtCost(usd: number): string { return `${CURRENCY_SYMBOL}${n.toFixed(3)}`; } function fmtTok(n: number): string { + if (n >= 1_000_000_000) return (n/1_000_000_000).toFixed(1) + 'B'; if (n >= 1_000_000) return (n/1_000_000).toFixed(1) + 'M'; if (n >= 1_000) return (n/1_000).toFixed(1) + 'K'; return String(n); @@ -249,13 +250,15 @@ type Bucket = { tokensOut: number; cacheRead: number; cacheCreate: number; + cacheCreate5m: number; + cacheCreate1h: number; events: number; sessions: number; models?: Set; }; function newBucket(key: string, label: string): Bucket { - return { key, label, costUsd: 0, tokensIn: 0, tokensOut: 0, cacheRead: 0, cacheCreate: 0, events: 0, sessions: 0, models: new Set() }; + return { key, label, costUsd: 0, tokensIn: 0, tokensOut: 0, cacheRead: 0, cacheCreate: 0, cacheCreate5m: 0, cacheCreate1h: 0, events: 0, sessions: 0, models: new Set() }; } function bucket(sessions: Session[], by: Args['groupBy']): Bucket[] { @@ -321,6 +324,8 @@ function bucket(sessions: Session[], by: Args['groupBy']): Bucket[] { b.tokensOut += s.tokensOut; b.cacheRead += s.cacheRead; b.cacheCreate += s.cacheCreate; + b.cacheCreate5m += s.cacheCreate5m; + b.cacheCreate1h += s.cacheCreate1h; b.events += s.events; b.sessions += 1; if (s.dominantModel) b.models!.add(s.dominantModel); @@ -387,7 +392,8 @@ function render(buckets: Bucket[], totals: Bucket, args: Args, label: string) { if (args.plan === 'api') { // Pay-per-token: the totals.costUsd IS what you're paying. rows.push(`${C.bold}${C.green}${fmtCost(totals.costUsd)}${C.reset} ${C.dim}total spend (API rates)${C.reset}`); - rows.push(`${C.bold}${C.cyan}${fmtTok(totals.tokensOut)}${C.reset} ${C.dim}output tokens · ${fmtTok(totals.tokensIn)} input${C.reset}`); + rows.push(`${C.bold}${C.cyan}${fmtTok(totals.tokensOut)}${C.reset} ${C.dim}output tokens · ${fmtTok(totals.tokensIn)} fresh input${C.reset}`); + rows.push(`${C.bold}${C.cyan}${fmtTok(totals.cacheRead)}${C.reset} ${C.dim}cache read · ${fmtTok(totals.cacheCreate)} cache write${C.reset}`); rows.push(`${cacheColor}${(totalCacheRatio*100).toFixed(0)}%${C.reset} ${C.dim}cache hit · ${totals.sessions} sessions · ${buckets.length} ${args.groupBy}${args.groupBy === 'session' ? '' : 's'}${C.reset}`); if ((args as any)._previousLabel) { rows.push(`${(args as any)._previousColor}${(args as any)._previousLabel}${C.reset} ${C.dim}vs previous period${C.reset}`); @@ -411,6 +417,7 @@ function render(buckets: Bucket[], totals: Bucket, args: Args, label: string) { rows.push(`${C.bold}${C.green}${fmtCost(actualPaid)}${C.reset} ${C.dim}what you actually pay${overage > 0 ? ` (${fmtCost(planUsd)} plan + ${fmtCost(overage)} overage)` : ` (${plan.name})`}${C.reset}`); rows.push(`${C.bold}${C.cyan}${fmtCost(apiEquiv)}${C.reset} ${C.dim}token-cost at raw API rates (same model, different billing)${C.reset}`); + rows.push(`${C.bold}${C.cyan}${fmtTok(totals.cacheRead)}${C.reset} ${C.dim}cache read · ${fmtTok(totals.cacheCreate)} cache write${C.reset}`); if (ratio > 1) { rows.push(`${C.dim}${ratio.toFixed(1)}×${C.reset} ${C.dim}per-token cost ratio (not a value/capability ratio)${C.reset}`); } @@ -579,10 +586,10 @@ async function main() { console.log(); console.log(` ${C.bold}${C.yellow}◆ pricing table${C.reset} ${C.dim}· per 1M tokens · in ${CURRENCY_CODE}${C.reset}`); console.log(); - const cols = `${pad('model', 10)} ${lpad('input', 8)} ${lpad('output', 8)} ${lpad('cache R', 8)} ${lpad('cache W', 8)}`; + const cols = `${pad('model', 10)} ${lpad('input', 8)} ${lpad('output', 8)} ${lpad('cache R', 8)} ${lpad('cache W5', 8)} ${lpad('cache W1h', 9)}`; console.log(` ${C.dim}${cols}${C.reset}`); for (const [name, p] of Object.entries(PRICING)) { - console.log(` ${pad(name, 10)} ${lpad(fmtCost(p.in), 8)} ${lpad(fmtCost(p.out), 8)} ${lpad(fmtCost(p.cacheRead), 8)} ${lpad(fmtCost(p.cacheWrite), 8)}`); + console.log(` ${pad(name, 10)} ${lpad(fmtCost(p.in), 8)} ${lpad(fmtCost(p.out), 8)} ${lpad(fmtCost(p.cacheRead), 8)} ${lpad(fmtCost(p.cacheWrite5m), 8)} ${lpad(fmtCost(p.cacheWrite1h), 9)}`); } console.log(); console.log(` ${C.dim}rates as of ship time. update src/parse.ts if Anthropic changes.${C.reset}`); @@ -660,10 +667,12 @@ async function main() { acc.tokensOut += s.tokensOut; acc.cacheRead += s.cacheRead; acc.cacheCreate += s.cacheCreate; + acc.cacheCreate5m += s.cacheCreate5m; + acc.cacheCreate1h += s.cacheCreate1h; acc.events += s.events; acc.sessions += 1; return acc; - }, { key: 'all', label: 'total', costUsd: 0, tokensIn: 0, tokensOut: 0, cacheRead: 0, cacheCreate: 0, events: 0, sessions: 0 }); + }, { key: 'all', label: 'total', costUsd: 0, tokensIn: 0, tokensOut: 0, cacheRead: 0, cacheCreate: 0, cacheCreate5m: 0, cacheCreate1h: 0, events: 0, sessions: 0 }); if (args.json) { const plan = PLANS[args.plan] ?? PLANS.api; @@ -685,6 +694,9 @@ async function main() { tokensOut: totals.tokensOut, cacheRead: totals.cacheRead, cacheCreate: totals.cacheCreate, + cacheCreate5m: totals.cacheCreate5m, + cacheCreate1h: totals.cacheCreate1h, + totalInputTokens: totals.tokensIn + totals.cacheRead + totals.cacheCreate, events: totals.events, sessions: totals.sessions, }, @@ -697,6 +709,9 @@ async function main() { tokensOut: b.tokensOut, cacheRead: b.cacheRead, cacheCreate: b.cacheCreate, + cacheCreate5m: b.cacheCreate5m, + cacheCreate1h: b.cacheCreate1h, + totalInputTokens: b.tokensIn + b.cacheRead + b.cacheCreate, events: b.events, sessions: b.sessions, models: b.models ? Array.from(b.models) : undefined, diff --git a/src/parse.ts b/src/parse.ts index c7124d3..2e2b7bb 100644 --- a/src/parse.ts +++ b/src/parse.ts @@ -5,16 +5,15 @@ import { join, basename } from 'node:path'; import { homedir } from 'node:os'; // Pricing (USD per 1M tokens). Per Anthropic's published rates as of 2026-04. -// Override at runtime via ~/.vibecosting.json or CLAUDE_COST_PRICING env var. // // SUBSCRIPTION USERS NOTE: if you're on Claude Max/Team flat-rate, what you // actually pay is $200/month (or whatever your plan is), not these rates. // This tool computes the API-equivalent cost — useful for "am I overusing // my plan" or "what would this cost without the subscription". -export const PRICING: Record = { - haiku: { in: 1, out: 5, cacheRead: 0.10, cacheWrite: 1.25 }, - sonnet: { in: 3, out: 15, cacheRead: 0.30, cacheWrite: 3.75 }, - opus: { in: 15, out: 75, cacheRead: 1.50, cacheWrite: 18.75 }, +export const PRICING: Record = { + haiku: { in: 0.80, out: 4, cacheRead: 0.08, cacheWrite5m: 1.00, cacheWrite1h: 1.60 }, + sonnet: { in: 3.00, out: 15, cacheRead: 0.30, cacheWrite5m: 3.75, cacheWrite1h: 6.00 }, + opus: { in: 15.00, out: 75, cacheRead: 1.50, cacheWrite5m: 18.75, cacheWrite1h: 30.00 }, }; export function priceFor(model: string | undefined) { @@ -79,6 +78,8 @@ export type Session = { tokensOut: number; cacheRead: number; cacheCreate: number; + cacheCreate5m: number; + cacheCreate1h: number; costUsd: number; events: number; toolUses: number; @@ -120,6 +121,7 @@ export function parseSession(filePath: string): Session | null { models: new Set(), modelEvents: new Map(), tokensIn: 0, tokensOut: 0, cacheRead: 0, cacheCreate: 0, + cacheCreate5m: 0, cacheCreate1h: 0, costUsd: 0, events: 0, toolUses: 0, errors: 0, toolCounts: new Map(), firstTs: Number.POSITIVE_INFINITY, @@ -129,6 +131,8 @@ export function parseSession(filePath: string): Session | null { topMessageTs: 0, }; + const assistantEvents: any[] = []; + for (const line of text.split('\n')) { if (!line) continue; let obj: any; @@ -148,48 +152,7 @@ export function parseSession(filePath: string): Session | null { } if (obj.type === 'assistant') { - s.events += 1; - const msg = obj.message; - let turnCost = 0; - if (msg?.usage) { - const u = msg.usage; - const model = msg.model || 'unknown'; - s.model = model; - s.models.add(model); - s.modelEvents.set(model, (s.modelEvents.get(model) ?? 0) + 1); - const p = priceFor(model); - const inT = Number(u.input_tokens) || 0; - const outT = Number(u.output_tokens) || 0; - const cR = Number(u.cache_read_input_tokens) || 0; - const cC = Number(u.cache_creation_input_tokens) || 0; - s.tokensIn += inT; - s.tokensOut += outT; - s.cacheRead += cR; - s.cacheCreate += cC; - turnCost = (inT * p.in + outT * p.out + cR * p.cacheRead + cC * p.cacheWrite) / 1_000_000; - s.costUsd += turnCost; - } - // Track most expensive single message - if (turnCost > s.topMessageCostUsd) { - s.topMessageCostUsd = turnCost; - s.topMessageTs = ts; - } - // Bucket cost by hour-of-day (local time of the timestamp) - if (ts) { - const h = new Date(ts).getHours(); - s.hourBuckets[h] += turnCost; - } - // tool_use sub-blocks - const content = msg?.content; - if (Array.isArray(content)) { - for (const b of content) { - if (b?.type === 'tool_use') { - s.toolUses += 1; - const tn = (b.name || 'unknown') as string; - s.toolCounts.set(tn, (s.toolCounts.get(tn) ?? 0) + 1); - } - } - } + assistantEvents.push(obj); } else if (obj.type === 'user') { s.events += 1; const content = obj.message?.content; @@ -203,6 +166,47 @@ export function parseSession(filePath: string): Session | null { } } + for (const obj of dedupeAssistantEvents(assistantEvents)) { + s.events += 1; + const msg = obj.message; + const ts = obj.timestamp ? Date.parse(obj.timestamp) : 0; + let turnCost = 0; + if (msg?.usage) { + const u = msg.usage; + const model = msg.model || 'unknown'; + s.model = model; + s.models.add(model); + s.modelEvents.set(model, (s.modelEvents.get(model) ?? 0) + 1); + const usage = normalizeUsage(u); + s.tokensIn += usage.input; + s.tokensOut += usage.output; + s.cacheRead += usage.cacheRead; + s.cacheCreate += usage.cacheCreate; + s.cacheCreate5m += usage.cacheCreate5m; + s.cacheCreate1h += usage.cacheCreate1h; + turnCost = costForUsage(model, usage); + s.costUsd += turnCost; + } + if (turnCost > s.topMessageCostUsd) { + s.topMessageCostUsd = turnCost; + s.topMessageTs = ts; + } + if (ts) { + const h = new Date(ts).getHours(); + s.hourBuckets[h] += turnCost; + } + const content = msg?.content; + if (Array.isArray(content)) { + for (const b of content) { + if (b?.type === 'tool_use') { + s.toolUses += 1; + const tn = (b.name || 'unknown') as string; + s.toolCounts.set(tn, (s.toolCounts.get(tn) ?? 0) + 1); + } + } + } + } + if (!Number.isFinite(s.firstTs)) s.firstTs = 0; // Compute dominantModel: model with most events let topN = 0; @@ -212,6 +216,63 @@ export function parseSession(filePath: string): Session | null { return s; } +type NormalizedUsage = { + input: number; + output: number; + cacheRead: number; + cacheCreate: number; + cacheCreate5m: number; + cacheCreate1h: number; +}; + +function normalizeUsage(u: any): NormalizedUsage { + const input = Number(u.input_tokens) || 0; + const output = Number(u.output_tokens) || 0; + const cacheRead = Number(u.cache_read_input_tokens) || 0; + const cacheCreate = Number(u.cache_creation_input_tokens) || 0; + const cacheCreate5m = Number(u.cache_creation?.ephemeral_5m_input_tokens) || 0; + const cacheCreate1h = Number(u.cache_creation?.ephemeral_1h_input_tokens) || 0; + const knownCacheCreate = cacheCreate5m + cacheCreate1h; + const unknownCacheCreate = Math.max(0, cacheCreate - knownCacheCreate); + return { + input, + output, + cacheRead, + cacheCreate, + cacheCreate5m: cacheCreate5m + unknownCacheCreate, + cacheCreate1h, + }; +} + +function usageTokenTotal(u: any): number { + const n = normalizeUsage(u); + return n.input + n.output + n.cacheRead + n.cacheCreate; +} + +function dedupeAssistantEvents(events: any[]): any[] { + const byResponse = new Map(); + for (let i = 0; i < events.length; i++) { + const obj = events[i]; + const key = String(obj.requestId || obj.message?.id || obj.uuid || i); + const prev = byResponse.get(key); + if (!prev || usageTokenTotal(obj.message?.usage) >= usageTokenTotal(prev.message?.usage)) { + byResponse.set(key, obj); + } + } + return Array.from(byResponse.values()); +} + +function costForUsage(model: string | undefined, u: NormalizedUsage): number { + const p = priceFor(model); + return ( + u.input * p.in + + u.output * p.out + + u.cacheRead * p.cacheRead + + u.cacheCreate5m * p.cacheWrite5m + + u.cacheCreate1h * p.cacheWrite1h + ) / 1_000_000; +} + // Short-display name for a model: "claude-opus-4-7-20251201" → "opus-4-7" export function shortModel(m: string | undefined): string { if (!m) return '?'; @@ -224,17 +285,22 @@ export function shortModel(m: string | undefined): string { export function loadAllSessions(): Session[] { if (!existsSync(PROJECTS_DIR)) return []; const out: Session[] = []; - for (const dir of readdirSync(PROJECTS_DIR, { withFileTypes: true })) { - if (!dir.isDirectory()) continue; - const dirPath = join(PROJECTS_DIR, dir.name); + + function visit(dirPath: string) { let entries; try { entries = readdirSync(dirPath, { withFileTypes: true }); } - catch { continue; } + catch { return; } for (const entry of entries) { - if (!entry.isFile() || !entry.name.endsWith('.jsonl')) continue; - const s = parseSession(join(dirPath, entry.name)); - if (s) out.push(s); + const p = join(dirPath, entry.name); + if (entry.isDirectory()) { + visit(p); + } else if (entry.isFile() && entry.name.endsWith('.jsonl')) { + const s = parseSession(p); + if (s) out.push(s); + } } } + + visit(PROJECTS_DIR); return out; } diff --git a/tests/parse.test.ts b/tests/parse.test.ts index 22a1834..fcc29af 100644 --- a/tests/parse.test.ts +++ b/tests/parse.test.ts @@ -19,7 +19,7 @@ function writeFixture(events: any[]): { filePath: string } { describe('priceFor', () => { test('opus', () => expect(priceFor('claude-opus-4-7').in).toBe(15)); - test('haiku', () => expect(priceFor('claude-haiku-4-5').in).toBe(1)); + test('haiku', () => expect(priceFor('claude-haiku-4-5').in).toBe(0.8)); test('sonnet default', () => expect(priceFor('claude-sonnet-4-6').in).toBe(3)); test('unknown model defaults to sonnet', () => expect(priceFor('mystery').in).toBe(3)); test('undefined defaults to sonnet', () => expect(priceFor(undefined).in).toBe(3)); @@ -85,7 +85,7 @@ describe('parseSession — pricing math', () => { expect(s.costUsd).toBeCloseTo(75, 2); }); - test('haiku: 100K cache_read = $0.01', () => { + test('haiku: 100K cache_read = $0.008', () => { const { filePath } = writeFixture([ { type: 'assistant', @@ -101,8 +101,65 @@ describe('parseSession — pricing math', () => { }, ]); const s = parseSession(filePath)!; - // 100K * $0.10 / 1M = $0.01 - expect(s.costUsd).toBeCloseTo(0.01, 4); + // 100K * $0.08 / 1M = $0.008 + expect(s.costUsd).toBeCloseTo(0.008, 4); + }); + + test('prices 1-hour cache creation separately when present', () => { + const { filePath } = writeFixture([ + { + type: 'assistant', + sessionId: 'abc', + timestamp: '2026-04-26T00:00:00Z', + requestId: 'req-1', + message: { + model: 'claude-sonnet-4-6', + usage: { + input_tokens: 0, + output_tokens: 0, + cache_read_input_tokens: 0, + cache_creation_input_tokens: 1_000_000, + cache_creation: { + ephemeral_5m_input_tokens: 250_000, + ephemeral_1h_input_tokens: 750_000, + }, + }, + }, + }, + ]); + const s = parseSession(filePath)!; + // 250K * $3.75 + 750K * $6 = $5.4375 + expect(s.costUsd).toBeCloseTo(5.4375, 4); + expect(s.cacheCreate5m).toBe(250_000); + expect(s.cacheCreate1h).toBe(750_000); + }); + + test('deduplicates repeated assistant snapshots for the same request', () => { + const { filePath } = writeFixture([ + { + type: 'assistant', + sessionId: 'abc', + timestamp: '2026-04-26T00:00:00Z', + requestId: 'req-1', + message: { + model: 'claude-sonnet-4-6', + usage: { input_tokens: 1_000_000, output_tokens: 0, cache_read_input_tokens: 0, cache_creation_input_tokens: 0 }, + }, + }, + { + type: 'assistant', + sessionId: 'abc', + timestamp: '2026-04-26T00:00:01Z', + requestId: 'req-1', + message: { + model: 'claude-sonnet-4-6', + usage: { input_tokens: 1_000_000, output_tokens: 0, cache_read_input_tokens: 0, cache_creation_input_tokens: 0 }, + }, + }, + ]); + const s = parseSession(filePath)!; + expect(s.costUsd).toBeCloseTo(3, 2); + expect(s.events).toBe(1); }); test('sums across multiple assistant events', () => {