Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions internal/cli/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -1343,7 +1343,7 @@ func scanOutputFiles(outputDir, traceID string, printer *ui.Printer) error {
return nil
}

redactor := security.NewSecretRedactor()
pipeline := security.OutputPipeline()
Comment thread
ifireball marked this conversation as resolved.
redacted := 0
findingsPath := filepath.Join(outputDir, "security", "findings.jsonl")

Expand All @@ -1365,7 +1365,7 @@ func scanOutputFiles(outputDir, traceID string, printer *ui.Printer) error {
return nil
}

result := redactor.Scan(string(content))
result := pipeline.Scan(string(content))
Comment thread
ifireball marked this conversation as resolved.
Outdated
Comment thread
ifireball marked this conversation as resolved.
Outdated
if len(result.Findings) > 0 {
redacted += len(result.Findings)
relPath, _ := filepath.Rel(outputDir, path)
Expand Down
12 changes: 8 additions & 4 deletions internal/cli/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,15 +186,19 @@ Usage in a workflow step:
return nil
}

redactor := security.NewSecretRedactor()
result := redactor.Scan(text)
pipeline := security.OutputPipeline()
result := pipeline.Scan(text)

if len(result.Findings) > 0 {
printer.StepWarn(fmt.Sprintf("Redacted %d secret(s) from agent output", len(result.Findings)))
printer.StepWarn(fmt.Sprintf("Sanitized %d finding(s) in agent output", len(result.Findings)))
for _, f := range result.Findings {
printer.StepWarn(fmt.Sprintf(" %s: %s", f.Name, f.Detail))
}
fmt.Fprint(os.Stdout, result.Sanitized)
out := result.Sanitized
if out == "" {
out = text
}
fmt.Fprint(os.Stdout, out)
} else {
printer.StepDone("No secrets found in output")
fmt.Fprint(os.Stdout, text)
Expand Down
14 changes: 8 additions & 6 deletions internal/security/hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,22 +110,24 @@ func GenerateClaudeSettings(h *harness.Harness) ([]byte, error) {
// PostToolUse hooks for Bash|WebFetch|Read. Combined into a single matcher
// so Claude Code chains them sequentially (separate matchers run in parallel
// on the original result, which would cause modifications to conflict).
// Order: context suppress (compacts verbose success output) → secret redact
// → unicode scan. Suppressing first avoids scanning text we'd discard.
// Order: context suppress (compacts verbose success output) → unicode normalize
// → secret redact. Suppressing first avoids scanning text we'd discard.
// Invariant: unicode normalization must run before secret redaction so
// zero-width characters cannot break prefix regexes and reconstruct secrets.
var postToolHooks []hookEntry
if contextSuppressPostToolEnabled(sec) {
postToolHooks = append(postToolHooks, hookEntry{
Type: "command", Command: "python3 " + SandboxHooksDir + "/context_suppress_posttool.py",
})
}
if secretRedactPostToolEnabled(sec) {
if unicodePostToolEnabled(sec) {
postToolHooks = append(postToolHooks, hookEntry{
Type: "command", Command: "python3 " + SandboxHooksDir + "/secret_redact_posttool.py",
Type: "command", Command: "python3 " + SandboxHooksDir + "/unicode_posttool.py",
})
}
if unicodePostToolEnabled(sec) {
if secretRedactPostToolEnabled(sec) {
postToolHooks = append(postToolHooks, hookEntry{
Type: "command", Command: "python3 " + SandboxHooksDir + "/unicode_posttool.py",
Type: "command", Command: "python3 " + SandboxHooksDir + "/secret_redact_posttool.py",
})
}
if len(postToolHooks) > 0 {
Expand Down
76 changes: 76 additions & 0 deletions internal/security/hooks/posttool_chain_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env python3
"""Integration tests for post-tool hook chain ordering (unicode before secret redact)."""

from __future__ import annotations

import json
import subprocess
import sys
import unittest
from pathlib import Path

HOOKS_DIR = Path(__file__).parent
UNICODE_HOOK = str(HOOKS_DIR / "unicode_posttool.py")
SECRET_HOOK = str(HOOKS_DIR / "secret_redact_posttool.py")

PLAIN_PAT = "ghp_FAKEtesttoken000000000000000000000000"


def obfuscate_with_zwnj(text: str) -> str:
"""Insert zero-width non-joiner (U+200C) between each character."""
return "\u200c".join(text)


def run_hook(script: str, tool_result: str) -> tuple[int, str]:
proc = subprocess.run(
[sys.executable, script],
input=json.dumps({"tool_name": "Read", "tool_result": tool_result}),
capture_output=True,
text=True,
timeout=10,
)
return proc.returncode, proc.stdout
Comment thread
ifireball marked this conversation as resolved.
Outdated


def run_chain(tool_result: str) -> str:
"""Run unicode_posttool then secret_redact_posttool (correct sandbox order)."""
rc, stdout = run_hook(UNICODE_HOOK, tool_result)
if rc != 0:
raise RuntimeError(f"unicode hook failed: rc={rc}")
if stdout.strip():
out = json.loads(stdout)
tool_result = out["tool_result"]

rc, stdout = run_hook(SECRET_HOOK, tool_result)
if rc != 0:
raise RuntimeError(f"secret_redact hook failed: rc={rc}")
if stdout.strip():
out = json.loads(stdout)
return out["tool_result"]
return tool_result


class TestPostToolChain(unittest.TestCase):
def test_plain_pat_redacted_by_chain(self):
result = run_chain(PLAIN_PAT)
self.assertNotIn("ghp_FAKEtest", result)
self.assertIn("...", result)

def test_zero_width_obfuscated_pat_redacted_by_chain(self):
obfuscated = obfuscate_with_zwnj(PLAIN_PAT)
result = run_chain(obfuscated)
self.assertNotIn("ghp_FAKEtest", result)
self.assertIn("...", result)

def test_redact_alone_misses_zero_width_obfuscated_pat(self):
obfuscated = obfuscate_with_zwnj(PLAIN_PAT)
rc, stdout = run_hook(SECRET_HOOK, obfuscated)
self.assertEqual(rc, 0)
# secret_redact alone does not modify output when regex cannot match
self.assertEqual(stdout.strip(), "")
# Obfuscated token still present in source (would leak after unicode strips ZWNJ)
self.assertIn("\u200c", obfuscated)


if __name__ == "__main__":
unittest.main()
43 changes: 40 additions & 3 deletions internal/security/hooks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package security

import (
"encoding/json"
"strings"
"testing"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -32,7 +33,7 @@ func TestGenerateClaudeSettings_AllDefaults(t *testing.T) {
matcher := postTools[0].(map[string]any)
assert.Equal(t, "Bash|WebFetch|Read", matcher["matcher"])
chainedHooks := matcher["hooks"].([]any)
assert.Len(t, chainedHooks, 3) // context_suppress → secret_redactunicode
assert.Len(t, chainedHooks, 3) // context_suppress → unicodesecret_redact

// Verify canary hook has its own * matcher.
canaryMatcher := postTools[1].(map[string]any)
Expand Down Expand Up @@ -222,10 +223,46 @@ func TestGenerateClaudeSettings_ContextSuppressDisabled(t *testing.T) {
postTools := hooks["PostToolUse"].([]any)
assert.Len(t, postTools, 2) // chain matcher + canary matcher

// With context_suppress disabled: secret_redact + unicode in the chain.
// With context_suppress disabled: unicode + secret_redact in the chain.
matcher := postTools[0].(map[string]any)
chainedHooks := matcher["hooks"].([]any)
assert.Len(t, chainedHooks, 2) // secret_redact + unicode
assert.Len(t, chainedHooks, 2) // unicode + secret_redact
}

func TestGenerateClaudeSettings_PostToolSanitizeHookOrder(t *testing.T) {
h := &harness.Harness{Agent: "test.md"}
data, err := GenerateClaudeSettings(h)
require.NoError(t, err)

var settings map[string]any
require.NoError(t, json.Unmarshal(data, &settings))

postTools := settings["hooks"].(map[string]any)["PostToolUse"].([]any)
matcher := postTools[0].(map[string]any)
require.Equal(t, "Bash|WebFetch|Read", matcher["matcher"])

chainedHooks := matcher["hooks"].([]any)
commands := make([]string, len(chainedHooks))
for i, h := range chainedHooks {
commands[i] = h.(map[string]any)["command"].(string)
}

hookIndex := func(substr string) int {
for i, cmd := range commands {
if strings.Contains(cmd, substr) {
return i
}
}
t.Fatalf("hook containing %q not found in %v", substr, commands)
return -1
}

suppressIdx := hookIndex("context_suppress_posttool.py")
unicodeIdx := hookIndex("unicode_posttool.py")
redactIdx := hookIndex("secret_redact_posttool.py")

assert.Less(t, suppressIdx, unicodeIdx, "context_suppress must run before unicode")
assert.Less(t, unicodeIdx, redactIdx, "unicode must run before secret_redact")
}

func TestGenerateClaudeSettings_CanaryPostToolDisabled(t *testing.T) {
Expand Down
4 changes: 3 additions & 1 deletion internal/security/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,11 @@ func InputPipeline() *Pipeline {

// OutputPipeline returns the standard output scanning pipeline for
// agent-generated text before posting to the forge.
Comment thread
ifireball marked this conversation as resolved.
// 1. SecretRedactor — redact API keys, tokens, credentials
// 1. UnicodeNormalizer — strip invisible chars, normalize fullwidth
// 2. SecretRedactor — redact API keys, tokens, credentials
func OutputPipeline() *Pipeline {
return NewPipeline(
Comment thread
ifireball marked this conversation as resolved.
NewUnicodeNormalizer(),
NewSecretRedactor(),
)
}
Expand Down
15 changes: 15 additions & 0 deletions internal/security/scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,21 @@ func TestPipeline(t *testing.T) {
assert.NotContains(t, r.Sanitized, "ghp_FAKEtest")
})

t.Run("normalize then redact catches zero-width obfuscated PAT", func(t *testing.T) {
p := NewPipeline(NewUnicodeNormalizer(), NewSecretRedactor())
plain := "ghp_FAKEtesttoken000000000000000000000000"
var obfuscated strings.Builder
for _, r := range plain {
obfuscated.WriteRune(r)
obfuscated.WriteRune('\u200c')
}
r := p.Scan(obfuscated.String())
assert.False(t, r.Safe)
assert.True(t, hasFinding(r, "zero_width"))
assert.True(t, hasFinding(r, "github_pat"))
assert.NotContains(t, r.Sanitized, "ghp_FAKEtest")
})

t.Run("clean text passes both", func(t *testing.T) {
p := InputPipeline()
r := p.Scan("Normal commit message fixing a null pointer bug.")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[MEDIUM] Missing wrong-order negative test (2/8 agents flagged)

These tests prove the correct pipeline order catches obfuscated tokens, but there's no test proving the wrong order (NewPipeline(NewSecretRedactor(), NewUnicodeNormalizer())) fails to catch them. A wrong-order test directly validates the ordering invariant documented in hooks.go:125-127 and would catch regressions if someone accidentally swaps the pipeline composition in OutputPipeline().

Suggestion:

t.Run("wrong order leaks zero-width obfuscated PAT", func(t *testing.T) {
    p := NewPipeline(NewSecretRedactor(), NewUnicodeNormalizer())
    plain := "ghp_FAKEtesttoken000000000000000000000000"
    var obfuscated strings.Builder
    for _, r := range plain {
        obfuscated.WriteRune(r)
        obfuscated.WriteRune('‌')
    }
    r := p.Scan(obfuscated.String())
    // Redactor runs first, sees obfuscated token, misses it
    assert.True(t, hasFinding(r, "zero_width"))
    assert.False(t, hasFinding(r, "github_pat"), "wrong order must NOT catch the obfuscated token")
})

Expand Down
Loading