From b7b791a4d0b474f6816a4664dde04328091f1d88 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Sat, 23 May 2026 18:39:26 +0800 Subject: [PATCH 1/2] fix(sessionStorage): sanitize binary control chars from tool output Prevent null bytes (e.g. from `cat /usr/bin/ls`) from corrupting JSONL session logs by filtering control characters via a JSON.stringify replacer. Replaces \x00-\x08, \x0b, \x0c, \x0e-\x1f with safe [U+XXXX] text while preserving \n, \r, \t. Co-Authored-By: Claude Opus 4.7 --- .../__tests__/sessionStorageSanitizer.test.ts | 245 ++++++++++++++++++ src/utils/sessionStorage.ts | 31 ++- 2 files changed, 274 insertions(+), 2 deletions(-) create mode 100644 src/utils/__tests__/sessionStorageSanitizer.test.ts diff --git a/src/utils/__tests__/sessionStorageSanitizer.test.ts b/src/utils/__tests__/sessionStorageSanitizer.test.ts new file mode 100644 index 000000000..f0ec859ce --- /dev/null +++ b/src/utils/__tests__/sessionStorageSanitizer.test.ts @@ -0,0 +1,245 @@ +import { beforeEach, afterEach, describe, expect, it } from 'bun:test' +import * as fs from 'node:fs/promises' +import * as os from 'node:os' +import * as path from 'node:path' + +import { + enqueueSessionEntryAfterPendingForTesting, + flushSessionStorage, + resetProjectForTesting, +} from '../sessionStorage.js' +import type { CustomTitleMessage } from '../../types/logs.js' + +const originalConfigDir = process.env.CLAUDE_CONFIG_DIR + +async function createTmpDir(): Promise { + const dir = path.join( + os.tmpdir(), + `session-storage-flush-${Date.now()}-${Math.random().toString(36).slice(2)}`, + ) + await fs.mkdir(dir, { recursive: true }) + return dir +} + +describe('sessionStorage flush', () => { + let tmpDir: string + + beforeEach(async () => { + tmpDir = await createTmpDir() + process.env.CLAUDE_CONFIG_DIR = tmpDir + resetProjectForTesting() + }) + + afterEach(async () => { + resetProjectForTesting() + if (originalConfigDir === undefined) { + delete process.env.CLAUDE_CONFIG_DIR + } else { + process.env.CLAUDE_CONFIG_DIR = originalConfigDir + } + await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => {}) + }) + + it('drains writes that are queued by pending operations during flush', async () => { + const transcriptPath = path.join(tmpDir, 'late-enqueue.jsonl') + const entry: CustomTitleMessage = { + type: 'custom-title', + customTitle: 'late enqueue', + sessionId: '11111111-1111-4111-8111-111111111111', + } + const writePromise = enqueueSessionEntryAfterPendingForTesting( + transcriptPath, + entry, + 10, + ) + + await flushSessionStorage() + await writePromise + + const content = await fs.readFile(transcriptPath, 'utf-8') + expect(content).toContain('"customTitle":"late enqueue"') + }) +}) + +describe('sanitizeJsonlReplacer', () => { + let tmpDir: string + + beforeEach(async () => { + tmpDir = await createTmpDir() + process.env.CLAUDE_CONFIG_DIR = tmpDir + resetProjectForTesting() + }) + + afterEach(async () => { + resetProjectForTesting() + if (originalConfigDir === undefined) { + delete process.env.CLAUDE_CONFIG_DIR + } else { + process.env.CLAUDE_CONFIG_DIR = originalConfigDir + } + await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => {}) + }) + + it('removes null bytes from tool output', async () => { + const transcriptPath = path.join(tmpDir, 'sanitize-null.jsonl') + // Simulate a tool_result with embedded null bytes (like cat /usr/bin/ls) + const entry = { + type: 'assistant', + message: { + role: 'assistant', + content: [ + { + type: 'tool_use', + name: 'Bash', + input: { + command: 'cat /usr/bin/ls', + stdout: '\x00\x00\x00ELF\x00\x00\x01\x02\x00', + stderr: '', + }, + }, + ], + }, + sessionId: '22222222-2222-4222-8222-222222222222', + timestamp: new Date().toISOString(), + version: 'test', + } + const writePromise = enqueueSessionEntryAfterPendingForTesting( + transcriptPath, + entry, + 10, + ) + await flushSessionStorage() + await writePromise + + // Read the file as raw bytes + const raw = await fs.readFile(transcriptPath, 'utf-8') + expect(raw).not.toContain('\x00') + + // Verify the line is valid JSON + const line = raw.trim() + const parsed = JSON.parse(line) + expect(parsed.message.content[0].input.stdout).toContain('[U+0000]') + expect(parsed.message.content[0].input.stdout).toContain('ELF') + }) + + it('preserves newlines, carriage returns, and tabs', async () => { + const transcriptPath = path.join(tmpDir, 'sanitize-preserve.jsonl') + const entry = { + type: 'assistant', + message: { + role: 'assistant', + content: [ + { + type: 'tool_use', + name: 'Bash', + input: { + stdout: 'line1\nline2\r\nline3\ttabbed', + }, + }, + ], + }, + sessionId: '33333333-3333-4333-8333-333333333333', + timestamp: new Date().toISOString(), + version: 'test', + } + const writePromise = enqueueSessionEntryAfterPendingForTesting( + transcriptPath, + entry, + 10, + ) + await flushSessionStorage() + await writePromise + + const raw = await fs.readFile(transcriptPath, 'utf-8') + const parsed = JSON.parse(raw.trim()) + expect(parsed.message.content[0].input.stdout).toBe( + 'line1\nline2\r\nline3\ttabbed', + ) + }) + + it('handles mixed control characters', async () => { + const transcriptPath = path.join(tmpDir, 'sanitize-mixed.jsonl') + const entry = { + type: 'assistant', + message: { + role: 'assistant', + content: [ + { + type: 'tool_use', + name: 'Bash', + input: { + stdout: 'Hello\x00World\x01\x02\x03OK\nEnd', + }, + }, + ], + }, + sessionId: '44444444-4444-4444-8444-444444444444', + timestamp: new Date().toISOString(), + version: 'test', + } + const writePromise = enqueueSessionEntryAfterPendingForTesting( + transcriptPath, + entry, + 10, + ) + await flushSessionStorage() + await writePromise + + const raw = await fs.readFile(transcriptPath, 'utf-8') + expect(raw).not.toContain('\x00') + expect(raw).not.toContain('\x01') + expect(raw).not.toContain('\x02') + expect(raw).not.toContain('\x03') + + const parsed = JSON.parse(raw.trim()) + expect(parsed.message.content[0].input.stdout).toContain('[U+0000]') + expect(parsed.message.content[0].input.stdout).toContain('[U+0001]') + expect(parsed.message.content[0].input.stdout).toContain('[U+0002]') + expect(parsed.message.content[0].input.stdout).toContain('[U+0003]') + // Newline should be preserved + expect(parsed.message.content[0].input.stdout).toContain('\n') + }) + + it('handles real ELF binary output', async () => { + // Read an actual binary file and inject it + const elfRaw = await fs.readFile('/usr/bin/ls') + const elfStr = elfRaw.toString('binary') // Convert to string preserving all bytes + + const transcriptPath = path.join(tmpDir, 'sanitize-elf.jsonl') + const entry = { + type: 'assistant', + message: { + role: 'assistant', + content: [ + { + type: 'tool_use', + name: 'Bash', + input: { + command: 'cat /usr/bin/ls', + stdout: elfStr, + stderr: '', + }, + }, + ], + }, + sessionId: '55555555-5555-4555-8555-555555555555', + timestamp: new Date().toISOString(), + version: 'test', + } + const writePromise = enqueueSessionEntryAfterPendingForTesting( + transcriptPath, + entry, + 10, + ) + await flushSessionStorage() + await writePromise + + // Verify no null bytes in output file + const raw = await fs.readFile(transcriptPath) + expect(raw.includes(Buffer.from('\x00'))).toBe(false) + + // Verify the line is valid JSON + const rawStr = raw.toString('utf-8') + expect(() => JSON.parse(rawStr.trim())).not.toThrow() + }) +}) diff --git a/src/utils/sessionStorage.ts b/src/utils/sessionStorage.ts index dad2db70d..4c10e6f50 100644 --- a/src/utils/sessionStorage.ts +++ b/src/utils/sessionStorage.ts @@ -94,6 +94,33 @@ import { jsonParse, jsonStringify } from './slowOperations.js' import type { ContentReplacementRecord } from './toolResultStorage.js' import { validateUuid } from './uuid.js' +/** + * JSON.stringify replacer that sanitizes strings before they enter + * the JSONL session log. Replaces null bytes and other control + * characters (except \n, \r, \t) with safe Unicode escape text. + * + * Root cause: `cat /usr/bin/foo` dumps ELF binary → null bytes in + * Bash tool stdout → breaks JSONL parser on resume. + * + * Performance: replacer runs during stringify traversal, no extra + * allocation or deep copy needed. + */ +const CONTROL_CHAR_RE = /[\x00-\x08\x0b\x0c\x0e-\x1f]/g; + +function sanitizeJsonlReplacer( + _key: string, + value: unknown, +): unknown { + if (typeof value === 'string' && CONTROL_CHAR_RE.test(value)) { + CONTROL_CHAR_RE.lastIndex = 0; // reset for reuse + return value.replace(CONTROL_CHAR_RE, (match) => { + const code = match.charCodeAt(0); + return `[U+${code.toString(16).padStart(4, '0')}]`; + }); + } + return value; +} + // Cache MACRO.VERSION at module level to work around bun --define bug in async contexts // See: https://github.com/oven-sh/bun/issues/26168 const VERSION = typeof MACRO !== 'undefined' ? MACRO.VERSION : 'unknown' @@ -671,7 +698,7 @@ class Project { const resolvers: Array<() => void> = [] for (const { entry, resolve } of batch) { - const line = jsonStringify(entry) + '\n' + const line = jsonStringify(entry, sanitizeJsonlReplacer) + '\n' if (content.length + line.length >= this.MAX_CHUNK_BYTES) { // Flush chunk and resolve its entries before starting a new one @@ -2603,7 +2630,7 @@ function appendEntryToFile( entry: Record, ): void { const fs = getFsImplementation() - const line = jsonStringify(entry) + '\n' + const line = jsonStringify(entry, sanitizeJsonlReplacer) + '\n' try { fs.appendFileSync(fullPath, line, { mode: 0o600 }) } catch { From 0dba0a01a358cf0ff2631f817004a473c05d58e0 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Sun, 24 May 2026 15:24:48 +0800 Subject: [PATCH 2/2] fix(messages): guard against non-array content in normalizeMessagesForAPI --- src/utils/messages.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/utils/messages.ts b/src/utils/messages.ts index c9bd5ca9c..ce90c9f15 100644 --- a/src/utils/messages.ts +++ b/src/utils/messages.ts @@ -1751,11 +1751,13 @@ export function stripCallerFieldFromAssistantMessage( return message } + // Guard: content may not be an array + const contentArr = Array.isArray(message.message?.content) ? message.message.content : [] return { ...message, message: { ...message.message, - content: message.message.content.map(block => { + content: contentArr.map(block => { if (block.type !== 'tool_use') { return block } @@ -2204,11 +2206,15 @@ export function normalizeMessagesForAPI( // like 'caller' from tool_use blocks, as these are only valid with the // tool search beta header const toolSearchEnabled = isToolSearchEnabledOptimistic() + // Guard: content may not be an array (e.g. error response from LiteLLM proxy) + const contentBlocks = Array.isArray(message.message?.content) + ? message.message.content + : [] const normalizedMessage: AssistantMessage = { ...message, message: { ...message.message, - content: message.message.content.map(block => { + content: contentBlocks.map(block => { if (block.type === 'tool_use') { const tool = tools.find(t => toolMatchesName(t, block.name)) const normalizedInput = tool