diff --git a/internal/cli/run.go b/internal/cli/run.go index a6685edce..6fcc167c1 100644 --- a/internal/cli/run.go +++ b/internal/cli/run.go @@ -1335,16 +1335,17 @@ func scanRepoContextFiles(repoDir string) []security.Finding { return allFindings } -// scanOutputFiles runs the secret redactor on extracted output files, -// recursively walking all subdirectories (iteration-N/output/, etc.). +// scanOutputFiles runs the output security pipeline (unicode normalization and +// secret redaction) on extracted output files, recursively walking all +// subdirectories (iteration-N/output/, etc.). func scanOutputFiles(outputDir, traceID string, printer *ui.Printer) error { if _, err := os.Stat(outputDir); os.IsNotExist(err) { printer.StepInfo("No output files to scan") return nil } - redactor := security.NewSecretRedactor() - redacted := 0 + pipeline := security.OutputPipeline() + findingCount := 0 findingsPath := filepath.Join(outputDir, "security", "findings.jsonl") err := filepath.WalkDir(outputDir, func(path string, d os.DirEntry, err error) error { @@ -1365,12 +1366,13 @@ func scanOutputFiles(outputDir, traceID string, printer *ui.Printer) error { return nil } - result := redactor.Scan(string(content)) + text := string(content) + result := pipeline.Scan(text) if len(result.Findings) > 0 { - redacted += len(result.Findings) + findingCount += len(result.Findings) relPath, _ := filepath.Rel(outputDir, path) for _, f := range result.Findings { - printer.StepWarn(fmt.Sprintf("Redacted [%s] in %s: %s", f.Name, relPath, f.Detail)) + printer.StepWarn(fmt.Sprintf("Sanitized [%s] in %s: %s", f.Name, relPath, f.Detail)) security.AppendFinding(findingsPath, security.TracedFinding{ TraceID: traceID, @@ -1379,8 +1381,12 @@ func scanOutputFiles(outputDir, traceID string, printer *ui.Printer) error { Finding: f, }) } - if writeErr := os.WriteFile(path, []byte(result.Sanitized), 0o644); writeErr != nil { - printer.StepWarn(fmt.Sprintf("Could not write redacted %s: %v", relPath, writeErr)) + out := result.Sanitized + if out == "" { + out = text + } + if writeErr := os.WriteFile(path, []byte(out), 0o644); writeErr != nil { + printer.StepWarn(fmt.Sprintf("Could not write sanitized %s: %v", relPath, writeErr)) } } return nil @@ -1389,10 +1395,10 @@ func scanOutputFiles(outputDir, traceID string, printer *ui.Printer) error { return err } - if redacted > 0 { - printer.StepWarn(fmt.Sprintf("Redacted %d secret(s) from output files", redacted)) + if findingCount > 0 { + printer.StepWarn(fmt.Sprintf("Sanitized %d finding(s) in output files", findingCount)) } else { - printer.StepDone("Output files clean — no secrets found") + printer.StepDone("Output files clean — no issues found") } return nil } diff --git a/internal/cli/scan.go b/internal/cli/scan.go index 61d8968c6..c2886eb3d 100644 --- a/internal/cli/scan.go +++ b/internal/cli/scan.go @@ -160,8 +160,9 @@ func newScanOutputCmd() *cobra.Command { return &cobra.Command{ Use: "output", Short: "Scan agent output for leaked secrets before posting", - Long: `Reads text from stdin and scans for API keys, tokens, credentials, -and sensitive patterns. Outputs the redacted version to stdout. + Long: `Reads text from stdin, normalizes invisible Unicode characters, and +scans for API keys, tokens, credentials, and sensitive patterns. Outputs the +sanitized version to stdout. Usage in a workflow step: echo "$AGENT_OUTPUT" | fullsend scan output > safe_output.txt`, @@ -186,15 +187,19 @@ Usage in a workflow step: return nil } - redactor := security.NewSecretRedactor() - result := redactor.Scan(text) + pipeline := security.OutputPipeline() + result := pipeline.Scan(text) if len(result.Findings) > 0 { - printer.StepWarn(fmt.Sprintf("Redacted %d secret(s) from agent output", len(result.Findings))) + printer.StepWarn(fmt.Sprintf("Sanitized %d finding(s) in agent output", len(result.Findings))) for _, f := range result.Findings { printer.StepWarn(fmt.Sprintf(" %s: %s", f.Name, f.Detail)) } - fmt.Fprint(os.Stdout, result.Sanitized) + out := result.Sanitized + if out == "" { + out = text + } + fmt.Fprint(os.Stdout, out) } else { printer.StepDone("No secrets found in output") fmt.Fprint(os.Stdout, text) diff --git a/internal/security/hooks.go b/internal/security/hooks.go index 16b03da3b..8667f4c61 100644 --- a/internal/security/hooks.go +++ b/internal/security/hooks.go @@ -110,22 +110,24 @@ func GenerateClaudeSettings(h *harness.Harness) ([]byte, error) { // PostToolUse hooks for Bash|WebFetch|Read. Combined into a single matcher // so Claude Code chains them sequentially (separate matchers run in parallel // on the original result, which would cause modifications to conflict). - // Order: context suppress (compacts verbose success output) → secret redact - // → unicode scan. Suppressing first avoids scanning text we'd discard. + // Order: context suppress (compacts verbose success output) → unicode normalize + // → secret redact. Suppressing first avoids scanning text we'd discard. + // Invariant: unicode normalization must run before secret redaction so + // zero-width characters cannot break prefix regexes and reconstruct secrets. var postToolHooks []hookEntry if contextSuppressPostToolEnabled(sec) { postToolHooks = append(postToolHooks, hookEntry{ Type: "command", Command: "python3 " + SandboxHooksDir + "/context_suppress_posttool.py", }) } - if secretRedactPostToolEnabled(sec) { + if unicodePostToolEnabled(sec) { postToolHooks = append(postToolHooks, hookEntry{ - Type: "command", Command: "python3 " + SandboxHooksDir + "/secret_redact_posttool.py", + Type: "command", Command: "python3 " + SandboxHooksDir + "/unicode_posttool.py", }) } - if unicodePostToolEnabled(sec) { + if secretRedactPostToolEnabled(sec) { postToolHooks = append(postToolHooks, hookEntry{ - Type: "command", Command: "python3 " + SandboxHooksDir + "/unicode_posttool.py", + Type: "command", Command: "python3 " + SandboxHooksDir + "/secret_redact_posttool.py", }) } if len(postToolHooks) > 0 { diff --git a/internal/security/hooks/posttool_chain_test.py b/internal/security/hooks/posttool_chain_test.py new file mode 100644 index 000000000..68cf4d50f --- /dev/null +++ b/internal/security/hooks/posttool_chain_test.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +"""Integration tests for post-tool hook chain ordering (unicode before secret redact).""" + +from __future__ import annotations + +import json +import subprocess +import sys +import unittest +from pathlib import Path + +HOOKS_DIR = Path(__file__).parent +UNICODE_HOOK = str(HOOKS_DIR / "unicode_posttool.py") +SECRET_HOOK = str(HOOKS_DIR / "secret_redact_posttool.py") + +PLAIN_PAT = "ghp_FAKEtesttoken000000000000000000000000" + + +def obfuscate_with_char(text: str, char: str) -> str: + """Insert invisible character between each codepoint.""" + return char.join(text) + + +def run_hook(script: str, tool_result: str) -> tuple[int, str, str]: + proc = subprocess.run( + [sys.executable, script], + input=json.dumps({"tool_name": "Read", "tool_result": tool_result}), + capture_output=True, + text=True, + timeout=10, + ) + return proc.returncode, proc.stdout, proc.stderr + + +def run_chain(tool_result: str) -> str: + """Run unicode_posttool then secret_redact_posttool (correct sandbox order).""" + rc, stdout, stderr = run_hook(UNICODE_HOOK, tool_result) + if rc != 0: + raise RuntimeError(f"unicode hook failed: rc={rc}, stderr={stderr}") + if stdout.strip(): + out = json.loads(stdout) + tool_result = out["tool_result"] + + rc, stdout, stderr = run_hook(SECRET_HOOK, tool_result) + if rc != 0: + raise RuntimeError(f"secret_redact hook failed: rc={rc}, stderr={stderr}") + if stdout.strip(): + out = json.loads(stdout) + return out["tool_result"] + return tool_result + + +class TestPostToolChain(unittest.TestCase): + def test_plain_pat_redacted_by_chain(self): + result = run_chain(PLAIN_PAT) + self.assertNotIn("ghp_FAKEtest", result) + self.assertIn("...", result) + + def test_zero_width_obfuscated_pat_redacted_by_chain(self): + obfuscated = obfuscate_with_char(PLAIN_PAT, "\u200c") + result = run_chain(obfuscated) + self.assertNotIn("ghp_FAKEtest", result) + self.assertIn("...", result) + + def test_ltr_mark_obfuscated_pat_redacted_by_chain(self): + obfuscated = obfuscate_with_char(PLAIN_PAT, "\u200e") + result = run_chain(obfuscated) + self.assertNotIn("ghp_FAKEtest", result) + self.assertIn("...", result) + + def test_redact_alone_misses_zero_width_obfuscated_pat(self): + obfuscated = obfuscate_with_char(PLAIN_PAT, "\u200c") + rc, stdout, _ = run_hook(SECRET_HOOK, obfuscated) + self.assertEqual(rc, 0) + # secret_redact alone does not modify output when regex cannot match + self.assertEqual(stdout.strip(), "") + # Obfuscated token still present in source (would leak after unicode strips ZWNJ) + self.assertIn("\u200c", obfuscated) + + +if __name__ == "__main__": + unittest.main() diff --git a/internal/security/hooks/unicode_posttool.py b/internal/security/hooks/unicode_posttool.py index 98d38a9d6..20d1dee9a 100644 --- a/internal/security/hooks/unicode_posttool.py +++ b/internal/security/hooks/unicode_posttool.py @@ -39,7 +39,9 @@ ( "zero_width", "high", - re.compile("[\u200b-\u200d\ufeff\u00ad\u2060-\u2064]+"), + re.compile( + "[\u00ad\u034f\u180e\u200b-\u200f\ufeff\u2060-\u2064\u206a-\u206f\ufff9-\ufffb]+" + ), ), ( "bidi_override", @@ -129,6 +131,18 @@ def scan_text(text: str) -> tuple[str, list[dict]]: result = pattern.sub("", result) + # Supplementary variation selectors (VS17-VS256, U+E0100-U+E01EF). + supp_vs = [c for c in result if 0xE0100 <= ord(c) <= 0xE01EF] + if supp_vs: + findings.append( + { + "name": "variation_selector", + "severity": "medium", + "detail": (f"{len(supp_vs)} supplementary variation selector character(s) removed"), + } + ) + result = "".join(c for c in result if not (0xE0100 <= ord(c) <= 0xE01EF)) + # NFKC normalization (fullwidth -> ASCII, compatibility decomposition). nfkc = unicodedata.normalize("NFKC", result) if nfkc != result: diff --git a/internal/security/hooks_test.go b/internal/security/hooks_test.go index abf456b59..170f1401e 100644 --- a/internal/security/hooks_test.go +++ b/internal/security/hooks_test.go @@ -2,6 +2,7 @@ package security import ( "encoding/json" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -32,7 +33,7 @@ func TestGenerateClaudeSettings_AllDefaults(t *testing.T) { matcher := postTools[0].(map[string]any) assert.Equal(t, "Bash|WebFetch|Read", matcher["matcher"]) chainedHooks := matcher["hooks"].([]any) - assert.Len(t, chainedHooks, 3) // context_suppress → secret_redact → unicode + assert.Len(t, chainedHooks, 3) // context_suppress → unicode → secret_redact // Verify canary hook has its own * matcher. canaryMatcher := postTools[1].(map[string]any) @@ -222,10 +223,46 @@ func TestGenerateClaudeSettings_ContextSuppressDisabled(t *testing.T) { postTools := hooks["PostToolUse"].([]any) assert.Len(t, postTools, 2) // chain matcher + canary matcher - // With context_suppress disabled: secret_redact + unicode in the chain. + // With context_suppress disabled: unicode + secret_redact in the chain. matcher := postTools[0].(map[string]any) chainedHooks := matcher["hooks"].([]any) - assert.Len(t, chainedHooks, 2) // secret_redact + unicode + assert.Len(t, chainedHooks, 2) // unicode + secret_redact +} + +func TestGenerateClaudeSettings_PostToolSanitizeHookOrder(t *testing.T) { + h := &harness.Harness{Agent: "test.md"} + data, err := GenerateClaudeSettings(h) + require.NoError(t, err) + + var settings map[string]any + require.NoError(t, json.Unmarshal(data, &settings)) + + postTools := settings["hooks"].(map[string]any)["PostToolUse"].([]any) + matcher := postTools[0].(map[string]any) + require.Equal(t, "Bash|WebFetch|Read", matcher["matcher"]) + + chainedHooks := matcher["hooks"].([]any) + commands := make([]string, len(chainedHooks)) + for i, h := range chainedHooks { + commands[i] = h.(map[string]any)["command"].(string) + } + + hookIndex := func(substr string) int { + for i, cmd := range commands { + if strings.Contains(cmd, substr) { + return i + } + } + t.Fatalf("hook containing %q not found in %v", substr, commands) + return -1 + } + + suppressIdx := hookIndex("context_suppress_posttool.py") + unicodeIdx := hookIndex("unicode_posttool.py") + redactIdx := hookIndex("secret_redact_posttool.py") + + assert.Less(t, suppressIdx, unicodeIdx, "context_suppress must run before unicode") + assert.Less(t, unicodeIdx, redactIdx, "unicode must run before secret_redact") } func TestGenerateClaudeSettings_CanaryPostToolDisabled(t *testing.T) { diff --git a/internal/security/scanner.go b/internal/security/scanner.go index 4334569d7..a793cdee5 100644 --- a/internal/security/scanner.go +++ b/internal/security/scanner.go @@ -92,9 +92,11 @@ func InputPipeline() *Pipeline { // OutputPipeline returns the standard output scanning pipeline for // agent-generated text before posting to the forge. -// 1. SecretRedactor — redact API keys, tokens, credentials +// 1. UnicodeNormalizer — strip invisible chars, normalize fullwidth +// 2. SecretRedactor — redact API keys, tokens, credentials func OutputPipeline() *Pipeline { return NewPipeline( + NewUnicodeNormalizer(), NewSecretRedactor(), ) } diff --git a/internal/security/scanner_test.go b/internal/security/scanner_test.go index 0c6301388..361850aa6 100644 --- a/internal/security/scanner_test.go +++ b/internal/security/scanner_test.go @@ -228,6 +228,36 @@ func TestPipeline(t *testing.T) { assert.NotContains(t, r.Sanitized, "ghp_FAKEtest") }) + t.Run("normalize then redact catches zero-width obfuscated PAT", func(t *testing.T) { + p := NewPipeline(NewUnicodeNormalizer(), NewSecretRedactor()) + plain := "ghp_FAKEtesttoken000000000000000000000000" + var obfuscated strings.Builder + for _, r := range plain { + obfuscated.WriteRune(r) + obfuscated.WriteRune('\u200c') + } + r := p.Scan(obfuscated.String()) + assert.False(t, r.Safe) + assert.True(t, hasFinding(r, "zero_width")) + assert.True(t, hasFinding(r, "github_pat")) + assert.NotContains(t, r.Sanitized, "ghp_FAKEtest") + }) + + t.Run("normalize then redact catches LTR mark obfuscated PAT", func(t *testing.T) { + p := NewPipeline(NewUnicodeNormalizer(), NewSecretRedactor()) + plain := "ghp_FAKEtesttoken000000000000000000000000" + var obfuscated strings.Builder + for _, r := range plain { + obfuscated.WriteRune(r) + obfuscated.WriteRune('\u200e') + } + r := p.Scan(obfuscated.String()) + assert.False(t, r.Safe) + assert.True(t, hasFinding(r, "zero_width")) + assert.True(t, hasFinding(r, "github_pat")) + assert.NotContains(t, r.Sanitized, "ghp_FAKEtest") + }) + t.Run("clean text passes both", func(t *testing.T) { p := InputPipeline() r := p.Scan("Normal commit message fixing a null pointer bug.") diff --git a/internal/security/unicode.go b/internal/security/unicode.go index 2a1625301..0c5983bd7 100644 --- a/internal/security/unicode.go +++ b/internal/security/unicode.go @@ -22,9 +22,9 @@ func NewUnicodeNormalizer() *UnicodeNormalizer { func (u *UnicodeNormalizer) Name() string { return "unicode_normalizer" } var ( - // Zero-width and invisible characters. + // Zero-width and invisible format characters (aligned with unicode_posttool.py). reZeroWidth = regexp.MustCompile( - "[\u200B\u200C\u200D\uFEFF\u00AD\u2060-\u2064]+", + "[\u00AD\u034F\u180E\u200B-\u200F\u2060-\u2064\u206A-\u206F\uFEFF\uFFF9-\uFFFB]+", ) // Bidirectional override characters. @@ -32,16 +32,37 @@ var ( "[\u202A-\u202E\u2066-\u2069]+", ) - // ANSI escape sequences. - reANSI = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]`) + // ANSI CSI escape sequences (ECMA-48 compliant). + reANSI = regexp.MustCompile(`\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]`) + + // OSC escape sequences (terminal hyperlinks, title injection). + reOSC = regexp.MustCompile(`\x1b\][^\x1b\x07]*(?:\x1b\\|\x07)`) // Null bytes. reNull = regexp.MustCompile("\x00+") - // Variation selectors. + // BMP variation selectors (VS1-VS16). reVariation = regexp.MustCompile("[\uFE00-\uFE0F]+") ) +// stripTerminalEscapes removes ANSI CSI and OSC sequences from text. +func stripTerminalEscapes(text string) (string, int, int) { + current := text + ansiCount := 0 + oscCount := 0 + + if matches := reANSI.FindAllString(current, -1); len(matches) > 0 { + ansiCount = len(matches) + current = reANSI.ReplaceAllString(current, "") + } + if matches := reOSC.FindAllString(current, -1); len(matches) > 0 { + oscCount = len(matches) + current = reOSC.ReplaceAllString(current, "") + } + + return current, ansiCount, oscCount +} + func (u *UnicodeNormalizer) Scan(text string) ScanResult { result := ScanResult{Safe: true, Sanitized: text} current := text @@ -62,15 +83,25 @@ func (u *UnicodeNormalizer) Scan(text string) ScanResult { current = reNull.ReplaceAllString(current, "") } - // ANSI escapes - if matches := reANSI.FindAllString(current, -1); len(matches) > 0 { - findings = append(findings, Finding{ - Scanner: "unicode_normalizer", - Name: "ansi_escape", - Severity: "medium", - Detail: fmt.Sprintf("%d ANSI escape sequences removed", len(matches)), - }) - current = reANSI.ReplaceAllString(current, "") + // ANSI and OSC escapes + if stripped, ansiCount, oscCount := stripTerminalEscapes(current); ansiCount > 0 || oscCount > 0 { + if ansiCount > 0 { + findings = append(findings, Finding{ + Scanner: "unicode_normalizer", + Name: "ansi_escape", + Severity: "medium", + Detail: fmt.Sprintf("%d ANSI escape sequences removed", ansiCount), + }) + } + if oscCount > 0 { + findings = append(findings, Finding{ + Scanner: "unicode_normalizer", + Name: "osc_escape", + Severity: "medium", + Detail: fmt.Sprintf("%d OSC escape sequences removed", oscCount), + }) + } + current = stripped } // Zero-width characters @@ -130,7 +161,7 @@ func (u *UnicodeNormalizer) Scan(text string) ScanResult { current = tagStripped.String() } - // Variation selectors + // BMP variation selectors (VS1-VS16) if locs := reVariation.FindAllStringIndex(current, -1); len(locs) > 0 { count := 0 for _, loc := range locs { @@ -145,6 +176,26 @@ func (u *UnicodeNormalizer) Scan(text string) ScanResult { current = reVariation.ReplaceAllString(current, "") } + // Supplementary variation selectors (VS17-VS256, U+E0100-U+E01EF). + var suppVSStripped strings.Builder + suppVSCount := 0 + for _, r := range current { + if r >= 0xE0100 && r <= 0xE01EF { + suppVSCount++ + } else { + suppVSStripped.WriteRune(r) + } + } + if suppVSCount > 0 { + findings = append(findings, Finding{ + Scanner: "unicode_normalizer", + Name: "variation_selector", + Severity: "medium", + Detail: fmt.Sprintf("%d supplementary variation selectors removed", suppVSCount), + }) + current = suppVSStripped.String() + } + // NFKC normalization (fullwidth -> ASCII, compatibility decomposition) nfkc := norm.NFKC.String(current) if nfkc != current { @@ -177,6 +228,27 @@ func (u *UnicodeNormalizer) Scan(text string) ScanResult { Detail: fmt.Sprintf("NFKC normalization applied (%d characters affected)", diffCount), }) current = nfkc + + // NFKC can reconstruct escape sequences from fullwidth characters. + if stripped, ansiCount, oscCount := stripTerminalEscapes(current); ansiCount > 0 || oscCount > 0 { + if ansiCount > 0 { + findings = append(findings, Finding{ + Scanner: "unicode_normalizer", + Name: "ansi_escape", + Severity: "medium", + Detail: fmt.Sprintf("%d ANSI escape sequences removed (post-NFKC)", ansiCount), + }) + } + if oscCount > 0 { + findings = append(findings, Finding{ + Scanner: "unicode_normalizer", + Name: "osc_escape", + Severity: "medium", + Detail: fmt.Sprintf("%d OSC escape sequences removed (post-NFKC)", oscCount), + }) + } + current = stripped + } } result.Findings = findings