From 431d9092ec742e0020212fb556f2f4b459b877ff Mon Sep 17 00:00:00 2001
From: Barak Korren <bkorren@redhat.com>
Date: Tue, 19 May 2026 15:56:51 +0300
Subject: [PATCH 1/5] fix(security): run unicode normalization before secret
 redaction

Zero-width characters interleaved in token-shaped strings bypassed
secret regexes when redaction ran before unicode stripping in sandbox
post-tool hooks. Reorder hooks to normalize first, extend OutputPipeline
and output scanning paths, and add regression tests.

Fixes #444

Co-authored-by: Cursor <cursoragent@cursor.com>
Signed-off-by: Barak Korren <bkorren@redhat.com>
---
 internal/cli/run.go                           |  4 +-
 internal/cli/scan.go                          | 12 ++-
 internal/security/hooks.go                    | 14 ++--
 .../security/hooks/posttool_chain_test.py     | 76 +++++++++++++++++++
 internal/security/hooks_test.go               | 43 ++++++++++-
 internal/security/scanner.go                  |  4 +-
 internal/security/scanner_test.go             | 15 ++++
 7 files changed, 152 insertions(+), 16 deletions(-)
 create mode 100644 internal/security/hooks/posttool_chain_test.py

diff --git a/internal/cli/run.go b/internal/cli/run.go
index 763213002..c30d9c3f2 100644
--- a/internal/cli/run.go
+++ b/internal/cli/run.go
@@ -1408,7 +1408,7 @@ func scanOutputFiles(outputDir, traceID string, printer *ui.Printer) error {
 		return nil
 	}
 
-	redactor := security.NewSecretRedactor()
+	pipeline := security.OutputPipeline()
 	redacted := 0
 	findingsPath := filepath.Join(outputDir, "security", "findings.jsonl")
 
@@ -1430,7 +1430,7 @@ func scanOutputFiles(outputDir, traceID string, printer *ui.Printer) error {
 			return nil
 		}
 
-		result := redactor.Scan(string(content))
+		result := pipeline.Scan(string(content))
 		if len(result.Findings) > 0 {
 			redacted += len(result.Findings)
 			relPath, _ := filepath.Rel(outputDir, path)
diff --git a/internal/cli/scan.go b/internal/cli/scan.go
index 61d8968c6..ab290c0bf 100644
--- a/internal/cli/scan.go
+++ b/internal/cli/scan.go
@@ -186,15 +186,19 @@ Usage in a workflow step:
 				return nil
 			}
 
-			redactor := security.NewSecretRedactor()
-			result := redactor.Scan(text)
+			pipeline := security.OutputPipeline()
+			result := pipeline.Scan(text)
 
 			if len(result.Findings) > 0 {
-				printer.StepWarn(fmt.Sprintf("Redacted %d secret(s) from agent output", len(result.Findings)))
+				printer.StepWarn(fmt.Sprintf("Sanitized %d finding(s) in agent output", len(result.Findings)))
 				for _, f := range result.Findings {
 					printer.StepWarn(fmt.Sprintf("  %s: %s", f.Name, f.Detail))
 				}
-				fmt.Fprint(os.Stdout, result.Sanitized)
+				out := result.Sanitized
+				if out == "" {
+					out = text
+				}
+				fmt.Fprint(os.Stdout, out)
 			} else {
 				printer.StepDone("No secrets found in output")
 				fmt.Fprint(os.Stdout, text)
diff --git a/internal/security/hooks.go b/internal/security/hooks.go
index 16b03da3b..8667f4c61 100644
--- a/internal/security/hooks.go
+++ b/internal/security/hooks.go
@@ -110,22 +110,24 @@ func GenerateClaudeSettings(h *harness.Harness) ([]byte, error) {
 	// PostToolUse hooks for Bash|WebFetch|Read. Combined into a single matcher
 	// so Claude Code chains them sequentially (separate matchers run in parallel
 	// on the original result, which would cause modifications to conflict).
-	// Order: context suppress (compacts verbose success output) → secret redact
-	// → unicode scan. Suppressing first avoids scanning text we'd discard.
+	// Order: context suppress (compacts verbose success output) → unicode normalize
+	// → secret redact. Suppressing first avoids scanning text we'd discard.
+	// Invariant: unicode normalization must run before secret redaction so
+	// zero-width characters cannot break prefix regexes and reconstruct secrets.
 	var postToolHooks []hookEntry
 	if contextSuppressPostToolEnabled(sec) {
 		postToolHooks = append(postToolHooks, hookEntry{
 			Type: "command", Command: "python3 " + SandboxHooksDir + "/context_suppress_posttool.py",
 		})
 	}
-	if secretRedactPostToolEnabled(sec) {
+	if unicodePostToolEnabled(sec) {
 		postToolHooks = append(postToolHooks, hookEntry{
-			Type: "command", Command: "python3 " + SandboxHooksDir + "/secret_redact_posttool.py",
+			Type: "command", Command: "python3 " + SandboxHooksDir + "/unicode_posttool.py",
 		})
 	}
-	if unicodePostToolEnabled(sec) {
+	if secretRedactPostToolEnabled(sec) {
 		postToolHooks = append(postToolHooks, hookEntry{
-			Type: "command", Command: "python3 " + SandboxHooksDir + "/unicode_posttool.py",
+			Type: "command", Command: "python3 " + SandboxHooksDir + "/secret_redact_posttool.py",
 		})
 	}
 	if len(postToolHooks) > 0 {
diff --git a/internal/security/hooks/posttool_chain_test.py b/internal/security/hooks/posttool_chain_test.py
new file mode 100644
index 000000000..a0ff78bae
--- /dev/null
+++ b/internal/security/hooks/posttool_chain_test.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+"""Integration tests for post-tool hook chain ordering (unicode before secret redact)."""
+
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+import unittest
+from pathlib import Path
+
+HOOKS_DIR = Path(__file__).parent
+UNICODE_HOOK = str(HOOKS_DIR / "unicode_posttool.py")
+SECRET_HOOK = str(HOOKS_DIR / "secret_redact_posttool.py")
+
+PLAIN_PAT = "ghp_FAKEtesttoken000000000000000000000000"
+
+
+def obfuscate_with_zwnj(text: str) -> str:
+    """Insert zero-width non-joiner (U+200C) between each character."""
+    return "\u200c".join(text)
+
+
+def run_hook(script: str, tool_result: str) -> tuple[int, str]:
+    proc = subprocess.run(
+        [sys.executable, script],
+        input=json.dumps({"tool_name": "Read", "tool_result": tool_result}),
+        capture_output=True,
+        text=True,
+        timeout=10,
+    )
+    return proc.returncode, proc.stdout
+
+
+def run_chain(tool_result: str) -> str:
+    """Run unicode_posttool then secret_redact_posttool (correct sandbox order)."""
+    rc, stdout = run_hook(UNICODE_HOOK, tool_result)
+    if rc != 0:
+        raise RuntimeError(f"unicode hook failed: rc={rc}")
+    if stdout.strip():
+        out = json.loads(stdout)
+        tool_result = out["tool_result"]
+
+    rc, stdout = run_hook(SECRET_HOOK, tool_result)
+    if rc != 0:
+        raise RuntimeError(f"secret_redact hook failed: rc={rc}")
+    if stdout.strip():
+        out = json.loads(stdout)
+        return out["tool_result"]
+    return tool_result
+
+
+class TestPostToolChain(unittest.TestCase):
+    def test_plain_pat_redacted_by_chain(self):
+        result = run_chain(PLAIN_PAT)
+        self.assertNotIn("ghp_FAKEtest", result)
+        self.assertIn("...", result)
+
+    def test_zero_width_obfuscated_pat_redacted_by_chain(self):
+        obfuscated = obfuscate_with_zwnj(PLAIN_PAT)
+        result = run_chain(obfuscated)
+        self.assertNotIn("ghp_FAKEtest", result)
+        self.assertIn("...", result)
+
+    def test_redact_alone_misses_zero_width_obfuscated_pat(self):
+        obfuscated = obfuscate_with_zwnj(PLAIN_PAT)
+        rc, stdout = run_hook(SECRET_HOOK, obfuscated)
+        self.assertEqual(rc, 0)
+        # secret_redact alone does not modify output when regex cannot match
+        self.assertEqual(stdout.strip(), "")
+        # Obfuscated token still present in source (would leak after unicode strips ZWNJ)
+        self.assertIn("\u200c", obfuscated)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/internal/security/hooks_test.go b/internal/security/hooks_test.go
index abf456b59..170f1401e 100644
--- a/internal/security/hooks_test.go
+++ b/internal/security/hooks_test.go
@@ -2,6 +2,7 @@ package security
 
 import (
 	"encoding/json"
+	"strings"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -32,7 +33,7 @@ func TestGenerateClaudeSettings_AllDefaults(t *testing.T) {
 	matcher := postTools[0].(map[string]any)
 	assert.Equal(t, "Bash|WebFetch|Read", matcher["matcher"])
 	chainedHooks := matcher["hooks"].([]any)
-	assert.Len(t, chainedHooks, 3) // context_suppress → secret_redact → unicode
+	assert.Len(t, chainedHooks, 3) // context_suppress → unicode → secret_redact
 
 	// Verify canary hook has its own * matcher.
 	canaryMatcher := postTools[1].(map[string]any)
@@ -222,10 +223,46 @@ func TestGenerateClaudeSettings_ContextSuppressDisabled(t *testing.T) {
 	postTools := hooks["PostToolUse"].([]any)
 	assert.Len(t, postTools, 2) // chain matcher + canary matcher
 
-	// With context_suppress disabled: secret_redact + unicode in the chain.
+	// With context_suppress disabled: unicode + secret_redact in the chain.
 	matcher := postTools[0].(map[string]any)
 	chainedHooks := matcher["hooks"].([]any)
-	assert.Len(t, chainedHooks, 2) // secret_redact + unicode
+	assert.Len(t, chainedHooks, 2) // unicode + secret_redact
+}
+
+func TestGenerateClaudeSettings_PostToolSanitizeHookOrder(t *testing.T) {
+	h := &harness.Harness{Agent: "test.md"}
+	data, err := GenerateClaudeSettings(h)
+	require.NoError(t, err)
+
+	var settings map[string]any
+	require.NoError(t, json.Unmarshal(data, &settings))
+
+	postTools := settings["hooks"].(map[string]any)["PostToolUse"].([]any)
+	matcher := postTools[0].(map[string]any)
+	require.Equal(t, "Bash|WebFetch|Read", matcher["matcher"])
+
+	chainedHooks := matcher["hooks"].([]any)
+	commands := make([]string, len(chainedHooks))
+	for i, h := range chainedHooks {
+		commands[i] = h.(map[string]any)["command"].(string)
+	}
+
+	hookIndex := func(substr string) int {
+		for i, cmd := range commands {
+			if strings.Contains(cmd, substr) {
+				return i
+			}
+		}
+		t.Fatalf("hook containing %q not found in %v", substr, commands)
+		return -1
+	}
+
+	suppressIdx := hookIndex("context_suppress_posttool.py")
+	unicodeIdx := hookIndex("unicode_posttool.py")
+	redactIdx := hookIndex("secret_redact_posttool.py")
+
+	assert.Less(t, suppressIdx, unicodeIdx, "context_suppress must run before unicode")
+	assert.Less(t, unicodeIdx, redactIdx, "unicode must run before secret_redact")
 }
 
 func TestGenerateClaudeSettings_CanaryPostToolDisabled(t *testing.T) {
diff --git a/internal/security/scanner.go b/internal/security/scanner.go
index 4334569d7..a793cdee5 100644
--- a/internal/security/scanner.go
+++ b/internal/security/scanner.go
@@ -92,9 +92,11 @@ func InputPipeline() *Pipeline {
 
 // OutputPipeline returns the standard output scanning pipeline for
 // agent-generated text before posting to the forge.
-//  1. SecretRedactor — redact API keys, tokens, credentials
+//  1. UnicodeNormalizer — strip invisible chars, normalize fullwidth
+//  2. SecretRedactor — redact API keys, tokens, credentials
 func OutputPipeline() *Pipeline {
 	return NewPipeline(
+		NewUnicodeNormalizer(),
 		NewSecretRedactor(),
 	)
 }
diff --git a/internal/security/scanner_test.go b/internal/security/scanner_test.go
index 2606c792e..39991af5c 100644
--- a/internal/security/scanner_test.go
+++ b/internal/security/scanner_test.go
@@ -297,6 +297,21 @@ func TestPipeline(t *testing.T) {
 		assert.NotContains(t, r.Sanitized, "ghp_FAKEtest")
 	})
 
+	t.Run("normalize then redact catches zero-width obfuscated PAT", func(t *testing.T) {
+		p := NewPipeline(NewUnicodeNormalizer(), NewSecretRedactor())
+		plain := "ghp_FAKEtesttoken000000000000000000000000"
+		var obfuscated strings.Builder
+		for _, r := range plain {
+			obfuscated.WriteRune(r)
+			obfuscated.WriteRune('\u200c')
+		}
+		r := p.Scan(obfuscated.String())
+		assert.False(t, r.Safe)
+		assert.True(t, hasFinding(r, "zero_width"))
+		assert.True(t, hasFinding(r, "github_pat"))
+		assert.NotContains(t, r.Sanitized, "ghp_FAKEtest")
+	})
+
 	t.Run("clean text passes both", func(t *testing.T) {
 		p := InputPipeline()
 		r := p.Scan("Normal commit message fixing a null pointer bug.")

From 6ec05a220bb95535048dede0ef965f7b52f62535 Mon Sep 17 00:00:00 2001
From: Barak Korren <bkorren@redhat.com>
Date: Wed, 20 May 2026 09:09:38 +0300
Subject: [PATCH 2/5] fix(security): address PR review feedback on unicode
 coverage

Expand invisible-character stripping (U+200E/F, U+180E, etc.), align Go
ANSI/OSC handling with Python including post-NFKC rescan, add empty
Sanitized guards and consistent scan output logging.

Co-authored-by: Cursor <cursoragent@cursor.com>
Signed-off-by: Barak Korren <bkorren@redhat.com>
---
 internal/cli/run.go                           |  28 +++--
 internal/cli/scan.go                          |   5 +-
 .../security/hooks/posttool_chain_test.py     |  30 +++---
 internal/security/hooks/unicode_posttool.py   |  18 +++-
 internal/security/scanner_test.go             |  15 +++
 internal/security/unicode.go                  | 102 +++++++++++++++---
 6 files changed, 157 insertions(+), 41 deletions(-)

diff --git a/internal/cli/run.go b/internal/cli/run.go
index c30d9c3f2..7775ee404 100644
--- a/internal/cli/run.go
+++ b/internal/cli/run.go
@@ -1400,8 +1400,9 @@ func scanRepoContextFiles(repoDir string) []security.Finding {
 	return allFindings
 }
 
-// scanOutputFiles runs the secret redactor on extracted output files,
-// recursively walking all subdirectories (iteration-N/output/, etc.).
+// scanOutputFiles runs the output security pipeline (unicode normalization and
+// secret redaction) on extracted output files, recursively walking all
+// subdirectories (iteration-N/output/, etc.).
 func scanOutputFiles(outputDir, traceID string, printer *ui.Printer) error {
 	if _, err := os.Stat(outputDir); os.IsNotExist(err) {
 		printer.StepInfo("No output files to scan")
@@ -1409,7 +1410,7 @@ func scanOutputFiles(outputDir, traceID string, printer *ui.Printer) error {
 	}
 
 	pipeline := security.OutputPipeline()
-	redacted := 0
+	findingCount := 0
 	findingsPath := filepath.Join(outputDir, "security", "findings.jsonl")
 
 	err := filepath.WalkDir(outputDir, func(path string, d os.DirEntry, err error) error {
@@ -1430,12 +1431,13 @@ func scanOutputFiles(outputDir, traceID string, printer *ui.Printer) error {
 			return nil
 		}
 
-		result := pipeline.Scan(string(content))
+		text := string(content)
+		result := pipeline.Scan(text)
 		if len(result.Findings) > 0 {
-			redacted += len(result.Findings)
+			findingCount += len(result.Findings)
 			relPath, _ := filepath.Rel(outputDir, path)
 			for _, f := range result.Findings {
-				printer.StepWarn(fmt.Sprintf("Redacted [%s] in %s: %s", f.Name, relPath, f.Detail))
+				printer.StepWarn(fmt.Sprintf("Sanitized [%s] in %s: %s", f.Name, relPath, f.Detail))
 				security.AppendFinding(findingsPath,
 					security.TracedFinding{
 						TraceID:   traceID,
@@ -1444,8 +1446,12 @@ func scanOutputFiles(outputDir, traceID string, printer *ui.Printer) error {
 						Finding:   f,
 					})
 			}
-			if writeErr := os.WriteFile(path, []byte(result.Sanitized), 0o644); writeErr != nil {
-				printer.StepWarn(fmt.Sprintf("Could not write redacted %s: %v", relPath, writeErr))
+			out := result.Sanitized
+			if out == "" {
+				out = text
+			}
+			if writeErr := os.WriteFile(path, []byte(out), 0o644); writeErr != nil {
+				printer.StepWarn(fmt.Sprintf("Could not write sanitized %s: %v", relPath, writeErr))
 			}
 		}
 		return nil
@@ -1454,10 +1460,10 @@ func scanOutputFiles(outputDir, traceID string, printer *ui.Printer) error {
 		return err
 	}
 
-	if redacted > 0 {
-		printer.StepWarn(fmt.Sprintf("Redacted %d secret(s) from output files", redacted))
+	if findingCount > 0 {
+		printer.StepWarn(fmt.Sprintf("Sanitized %d finding(s) in output files", findingCount))
 	} else {
-		printer.StepDone("Output files clean — no secrets found")
+		printer.StepDone("Output files clean — no issues found")
 	}
 	return nil
 }
diff --git a/internal/cli/scan.go b/internal/cli/scan.go
index ab290c0bf..c2886eb3d 100644
--- a/internal/cli/scan.go
+++ b/internal/cli/scan.go
@@ -160,8 +160,9 @@ func newScanOutputCmd() *cobra.Command {
 	return &cobra.Command{
 		Use:   "output",
 		Short: "Scan agent output for leaked secrets before posting",
-		Long: `Reads text from stdin and scans for API keys, tokens, credentials,
-and sensitive patterns. Outputs the redacted version to stdout.
+		Long: `Reads text from stdin, normalizes invisible Unicode characters, and
+scans for API keys, tokens, credentials, and sensitive patterns. Outputs the
+sanitized version to stdout.
 
 Usage in a workflow step:
   echo "$AGENT_OUTPUT" | fullsend scan output > safe_output.txt`,
diff --git a/internal/security/hooks/posttool_chain_test.py b/internal/security/hooks/posttool_chain_test.py
index a0ff78bae..68cf4d50f 100644
--- a/internal/security/hooks/posttool_chain_test.py
+++ b/internal/security/hooks/posttool_chain_test.py
@@ -16,12 +16,12 @@
 PLAIN_PAT = "ghp_FAKEtesttoken000000000000000000000000"
 
 
-def obfuscate_with_zwnj(text: str) -> str:
-    """Insert zero-width non-joiner (U+200C) between each character."""
-    return "\u200c".join(text)
+def obfuscate_with_char(text: str, char: str) -> str:
+    """Insert invisible character between each codepoint."""
+    return char.join(text)
 
 
-def run_hook(script: str, tool_result: str) -> tuple[int, str]:
+def run_hook(script: str, tool_result: str) -> tuple[int, str, str]:
     proc = subprocess.run(
         [sys.executable, script],
         input=json.dumps({"tool_name": "Read", "tool_result": tool_result}),
@@ -29,21 +29,21 @@ def run_hook(script: str, tool_result: str) -> tuple[int, str]:
         text=True,
         timeout=10,
     )
-    return proc.returncode, proc.stdout
+    return proc.returncode, proc.stdout, proc.stderr
 
 
 def run_chain(tool_result: str) -> str:
     """Run unicode_posttool then secret_redact_posttool (correct sandbox order)."""
-    rc, stdout = run_hook(UNICODE_HOOK, tool_result)
+    rc, stdout, stderr = run_hook(UNICODE_HOOK, tool_result)
     if rc != 0:
-        raise RuntimeError(f"unicode hook failed: rc={rc}")
+        raise RuntimeError(f"unicode hook failed: rc={rc}, stderr={stderr}")
     if stdout.strip():
         out = json.loads(stdout)
         tool_result = out["tool_result"]
 
-    rc, stdout = run_hook(SECRET_HOOK, tool_result)
+    rc, stdout, stderr = run_hook(SECRET_HOOK, tool_result)
     if rc != 0:
-        raise RuntimeError(f"secret_redact hook failed: rc={rc}")
+        raise RuntimeError(f"secret_redact hook failed: rc={rc}, stderr={stderr}")
     if stdout.strip():
         out = json.loads(stdout)
         return out["tool_result"]
@@ -57,14 +57,20 @@ def test_plain_pat_redacted_by_chain(self):
         self.assertIn("...", result)
 
     def test_zero_width_obfuscated_pat_redacted_by_chain(self):
-        obfuscated = obfuscate_with_zwnj(PLAIN_PAT)
+        obfuscated = obfuscate_with_char(PLAIN_PAT, "\u200c")
+        result = run_chain(obfuscated)
+        self.assertNotIn("ghp_FAKEtest", result)
+        self.assertIn("...", result)
+
+    def test_ltr_mark_obfuscated_pat_redacted_by_chain(self):
+        obfuscated = obfuscate_with_char(PLAIN_PAT, "\u200e")
         result = run_chain(obfuscated)
         self.assertNotIn("ghp_FAKEtest", result)
         self.assertIn("...", result)
 
     def test_redact_alone_misses_zero_width_obfuscated_pat(self):
-        obfuscated = obfuscate_with_zwnj(PLAIN_PAT)
-        rc, stdout = run_hook(SECRET_HOOK, obfuscated)
+        obfuscated = obfuscate_with_char(PLAIN_PAT, "\u200c")
+        rc, stdout, _ = run_hook(SECRET_HOOK, obfuscated)
         self.assertEqual(rc, 0)
         # secret_redact alone does not modify output when regex cannot match
         self.assertEqual(stdout.strip(), "")
diff --git a/internal/security/hooks/unicode_posttool.py b/internal/security/hooks/unicode_posttool.py
index 98d38a9d6..044c8d4cc 100644
--- a/internal/security/hooks/unicode_posttool.py
+++ b/internal/security/hooks/unicode_posttool.py
@@ -39,7 +39,9 @@
     (
         "zero_width",
         "high",
-        re.compile("[\u200b-\u200d\ufeff\u00ad\u2060-\u2064]+"),
+        re.compile(
+            "[\u00ad\u034f\u180e\u200b-\u200f\ufeff\u2060-\u2064\u206a-\u206f\ufff9-\ufffb]+"
+        ),
     ),
     (
         "bidi_override",
@@ -129,6 +131,20 @@ def scan_text(text: str) -> tuple[str, list[dict]]:
 
         result = pattern.sub("", result)
 
+    # Supplementary variation selectors (VS17-VS256, U+E0100-U+E01EF).
+    supp_vs = [c for c in result if 0xE0100 <= ord(c) <= 0xE01EF]
+    if supp_vs:
+        findings.append(
+            {
+                "name": "variation_selector",
+                "severity": "medium",
+                "detail": (
+                    f"{len(supp_vs)} supplementary variation selector character(s) removed"
+                ),
+            }
+        )
+        result = "".join(c for c in result if not (0xE0100 <= ord(c) <= 0xE01EF))
+
     # NFKC normalization (fullwidth -> ASCII, compatibility decomposition).
     nfkc = unicodedata.normalize("NFKC", result)
     if nfkc != result:
diff --git a/internal/security/scanner_test.go b/internal/security/scanner_test.go
index 39991af5c..8677649fc 100644
--- a/internal/security/scanner_test.go
+++ b/internal/security/scanner_test.go
@@ -312,6 +312,21 @@ func TestPipeline(t *testing.T) {
 		assert.NotContains(t, r.Sanitized, "ghp_FAKEtest")
 	})
 
+	t.Run("normalize then redact catches LTR mark obfuscated PAT", func(t *testing.T) {
+		p := NewPipeline(NewUnicodeNormalizer(), NewSecretRedactor())
+		plain := "ghp_FAKEtesttoken000000000000000000000000"
+		var obfuscated strings.Builder
+		for _, r := range plain {
+			obfuscated.WriteRune(r)
+			obfuscated.WriteRune('\u200e')
+		}
+		r := p.Scan(obfuscated.String())
+		assert.False(t, r.Safe)
+		assert.True(t, hasFinding(r, "zero_width"))
+		assert.True(t, hasFinding(r, "github_pat"))
+		assert.NotContains(t, r.Sanitized, "ghp_FAKEtest")
+	})
+
 	t.Run("clean text passes both", func(t *testing.T) {
 		p := InputPipeline()
 		r := p.Scan("Normal commit message fixing a null pointer bug.")
diff --git a/internal/security/unicode.go b/internal/security/unicode.go
index 2a1625301..0c5983bd7 100644
--- a/internal/security/unicode.go
+++ b/internal/security/unicode.go
@@ -22,9 +22,9 @@ func NewUnicodeNormalizer() *UnicodeNormalizer {
 func (u *UnicodeNormalizer) Name() string { return "unicode_normalizer" }
 
 var (
-	// Zero-width and invisible characters.
+	// Zero-width and invisible format characters (aligned with unicode_posttool.py).
 	reZeroWidth = regexp.MustCompile(
-		"[\u200B\u200C\u200D\uFEFF\u00AD\u2060-\u2064]+",
+		"[\u00AD\u034F\u180E\u200B-\u200F\u2060-\u2064\u206A-\u206F\uFEFF\uFFF9-\uFFFB]+",
 	)
 
 	// Bidirectional override characters.
@@ -32,16 +32,37 @@ var (
 		"[\u202A-\u202E\u2066-\u2069]+",
 	)
 
-	// ANSI escape sequences.
-	reANSI = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]`)
+	// ANSI CSI escape sequences (ECMA-48 compliant).
+	reANSI = regexp.MustCompile(`\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]`)
+
+	// OSC escape sequences (terminal hyperlinks, title injection).
+	reOSC = regexp.MustCompile(`\x1b\][^\x1b\x07]*(?:\x1b\\|\x07)`)
 
 	// Null bytes.
 	reNull = regexp.MustCompile("\x00+")
 
-	// Variation selectors.
+	// BMP variation selectors (VS1-VS16).
 	reVariation = regexp.MustCompile("[\uFE00-\uFE0F]+")
 )
 
+// stripTerminalEscapes removes ANSI CSI and OSC sequences from text.
+func stripTerminalEscapes(text string) (string, int, int) {
+	current := text
+	ansiCount := 0
+	oscCount := 0
+
+	if matches := reANSI.FindAllString(current, -1); len(matches) > 0 {
+		ansiCount = len(matches)
+		current = reANSI.ReplaceAllString(current, "")
+	}
+	if matches := reOSC.FindAllString(current, -1); len(matches) > 0 {
+		oscCount = len(matches)
+		current = reOSC.ReplaceAllString(current, "")
+	}
+
+	return current, ansiCount, oscCount
+}
+
 func (u *UnicodeNormalizer) Scan(text string) ScanResult {
 	result := ScanResult{Safe: true, Sanitized: text}
 	current := text
@@ -62,15 +83,25 @@ func (u *UnicodeNormalizer) Scan(text string) ScanResult {
 		current = reNull.ReplaceAllString(current, "")
 	}
 
-	// ANSI escapes
-	if matches := reANSI.FindAllString(current, -1); len(matches) > 0 {
-		findings = append(findings, Finding{
-			Scanner:  "unicode_normalizer",
-			Name:     "ansi_escape",
-			Severity: "medium",
-			Detail:   fmt.Sprintf("%d ANSI escape sequences removed", len(matches)),
-		})
-		current = reANSI.ReplaceAllString(current, "")
+	// ANSI and OSC escapes
+	if stripped, ansiCount, oscCount := stripTerminalEscapes(current); ansiCount > 0 || oscCount > 0 {
+		if ansiCount > 0 {
+			findings = append(findings, Finding{
+				Scanner:  "unicode_normalizer",
+				Name:     "ansi_escape",
+				Severity: "medium",
+				Detail:   fmt.Sprintf("%d ANSI escape sequences removed", ansiCount),
+			})
+		}
+		if oscCount > 0 {
+			findings = append(findings, Finding{
+				Scanner:  "unicode_normalizer",
+				Name:     "osc_escape",
+				Severity: "medium",
+				Detail:   fmt.Sprintf("%d OSC escape sequences removed", oscCount),
+			})
+		}
+		current = stripped
 	}
 
 	// Zero-width characters
@@ -130,7 +161,7 @@ func (u *UnicodeNormalizer) Scan(text string) ScanResult {
 		current = tagStripped.String()
 	}
 
-	// Variation selectors
+	// BMP variation selectors (VS1-VS16)
 	if locs := reVariation.FindAllStringIndex(current, -1); len(locs) > 0 {
 		count := 0
 		for _, loc := range locs {
@@ -145,6 +176,26 @@ func (u *UnicodeNormalizer) Scan(text string) ScanResult {
 		current = reVariation.ReplaceAllString(current, "")
 	}
 
+	// Supplementary variation selectors (VS17-VS256, U+E0100-U+E01EF).
+	var suppVSStripped strings.Builder
+	suppVSCount := 0
+	for _, r := range current {
+		if r >= 0xE0100 && r <= 0xE01EF {
+			suppVSCount++
+		} else {
+			suppVSStripped.WriteRune(r)
+		}
+	}
+	if suppVSCount > 0 {
+		findings = append(findings, Finding{
+			Scanner:  "unicode_normalizer",
+			Name:     "variation_selector",
+			Severity: "medium",
+			Detail:   fmt.Sprintf("%d supplementary variation selectors removed", suppVSCount),
+		})
+		current = suppVSStripped.String()
+	}
+
 	// NFKC normalization (fullwidth -> ASCII, compatibility decomposition)
 	nfkc := norm.NFKC.String(current)
 	if nfkc != current {
@@ -177,6 +228,27 @@ func (u *UnicodeNormalizer) Scan(text string) ScanResult {
 			Detail:   fmt.Sprintf("NFKC normalization applied (%d characters affected)", diffCount),
 		})
 		current = nfkc
+
+		// NFKC can reconstruct escape sequences from fullwidth characters.
+		if stripped, ansiCount, oscCount := stripTerminalEscapes(current); ansiCount > 0 || oscCount > 0 {
+			if ansiCount > 0 {
+				findings = append(findings, Finding{
+					Scanner:  "unicode_normalizer",
+					Name:     "ansi_escape",
+					Severity: "medium",
+					Detail:   fmt.Sprintf("%d ANSI escape sequences removed (post-NFKC)", ansiCount),
+				})
+			}
+			if oscCount > 0 {
+				findings = append(findings, Finding{
+					Scanner:  "unicode_normalizer",
+					Name:     "osc_escape",
+					Severity: "medium",
+					Detail:   fmt.Sprintf("%d OSC escape sequences removed (post-NFKC)", oscCount),
+				})
+			}
+			current = stripped
+		}
 	}
 
 	result.Findings = findings

From 2a0be483640b25485cbe4daa04edf5405fabe3f5 Mon Sep 17 00:00:00 2001
From: Barak Korren <bkorren@redhat.com>
Date: Wed, 20 May 2026 09:12:20 +0300
Subject: [PATCH 3/5] style: ruff-format unicode_posttool.py

Co-authored-by: Cursor <cursoragent@cursor.com>
Signed-off-by: Barak Korren <bkorren@redhat.com>
---
 internal/security/hooks/unicode_posttool.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/internal/security/hooks/unicode_posttool.py b/internal/security/hooks/unicode_posttool.py
index 044c8d4cc..20d1dee9a 100644
--- a/internal/security/hooks/unicode_posttool.py
+++ b/internal/security/hooks/unicode_posttool.py
@@ -138,9 +138,7 @@ def scan_text(text: str) -> tuple[str, list[dict]]:
             {
                 "name": "variation_selector",
                 "severity": "medium",
-                "detail": (
-                    f"{len(supp_vs)} supplementary variation selector character(s) removed"
-                ),
+                "detail": (f"{len(supp_vs)} supplementary variation selector character(s) removed"),
             }
         )
         result = "".join(c for c in result if not (0xE0100 <= ord(c) <= 0xE01EF))

From 0cb6fa3a8c46d709a4b37314790727da58633426 Mon Sep 17 00:00:00 2001
From: Barak Korren <bkorren@redhat.com>
Date: Tue, 26 May 2026 11:45:24 +0300
Subject: [PATCH 4/5] fix(security): address post-rebase review feedback

Only fall back to original text when the pipeline reports no findings,
use single-pass terminal escape stripping, and add wrong-order pipeline test.

Co-authored-by: Cursor <cursoragent@cursor.com>
Signed-off-by: Barak Korren <bkorren@redhat.com>
---
 internal/cli/run.go               |  4 +++-
 internal/cli/scan.go              |  2 +-
 internal/security/scanner_test.go | 13 +++++++++++++
 internal/security/unicode.go      | 19 ++++++++-----------
 4 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/internal/cli/run.go b/internal/cli/run.go
index 7775ee404..eac07dd7c 100644
--- a/internal/cli/run.go
+++ b/internal/cli/run.go
@@ -1447,7 +1447,9 @@ func scanOutputFiles(outputDir, traceID string, printer *ui.Printer) error {
 					})
 			}
 			out := result.Sanitized
-			if out == "" {
+			// Only fall back when the pipeline made no changes; empty Sanitized with
+			// findings means content was fully stripped (e.g. all invisible chars).
+			if out == "" && len(result.Findings) == 0 {
 				out = text
 			}
 			if writeErr := os.WriteFile(path, []byte(out), 0o644); writeErr != nil {
diff --git a/internal/cli/scan.go b/internal/cli/scan.go
index c2886eb3d..1c7ba6884 100644
--- a/internal/cli/scan.go
+++ b/internal/cli/scan.go
@@ -196,7 +196,7 @@ Usage in a workflow step:
 					printer.StepWarn(fmt.Sprintf("  %s: %s", f.Name, f.Detail))
 				}
 				out := result.Sanitized
-				if out == "" {
+				if out == "" && len(result.Findings) == 0 {
 					out = text
 				}
 				fmt.Fprint(os.Stdout, out)
diff --git a/internal/security/scanner_test.go b/internal/security/scanner_test.go
index 8677649fc..d72cd18d1 100644
--- a/internal/security/scanner_test.go
+++ b/internal/security/scanner_test.go
@@ -327,6 +327,19 @@ func TestPipeline(t *testing.T) {
 		assert.NotContains(t, r.Sanitized, "ghp_FAKEtest")
 	})
 
+	t.Run("wrong order leaks zero-width obfuscated PAT", func(t *testing.T) {
+		p := NewPipeline(NewSecretRedactor(), NewUnicodeNormalizer())
+		plain := "ghp_FAKEtesttoken000000000000000000000000"
+		var obfuscated strings.Builder
+		for _, r := range plain {
+			obfuscated.WriteRune(r)
+			obfuscated.WriteRune('\u200c')
+		}
+		r := p.Scan(obfuscated.String())
+		assert.True(t, hasFinding(r, "zero_width"))
+		assert.False(t, hasFinding(r, "github_pat"), "redact-before-normalize must not catch obfuscated token")
+	})
+
 	t.Run("clean text passes both", func(t *testing.T) {
 		p := InputPipeline()
 		r := p.Scan("Normal commit message fixing a null pointer bug.")
diff --git a/internal/security/unicode.go b/internal/security/unicode.go
index 0c5983bd7..c3fc078ea 100644
--- a/internal/security/unicode.go
+++ b/internal/security/unicode.go
@@ -47,19 +47,16 @@ var (
 
 // stripTerminalEscapes removes ANSI CSI and OSC sequences from text.
 func stripTerminalEscapes(text string) (string, int, int) {
-	current := text
 	ansiCount := 0
+	current := reANSI.ReplaceAllStringFunc(text, func(string) string {
+		ansiCount++
+		return ""
+	})
 	oscCount := 0
-
-	if matches := reANSI.FindAllString(current, -1); len(matches) > 0 {
-		ansiCount = len(matches)
-		current = reANSI.ReplaceAllString(current, "")
-	}
-	if matches := reOSC.FindAllString(current, -1); len(matches) > 0 {
-		oscCount = len(matches)
-		current = reOSC.ReplaceAllString(current, "")
-	}
-
+	current = reOSC.ReplaceAllStringFunc(current, func(string) string {
+		oscCount++
+		return ""
+	})
 	return current, ansiCount, oscCount
 }
 

From a4f164e8741012461019224ff7c41c261a8eac4e Mon Sep 17 00:00:00 2001
From: Barak Korren <bkorren@redhat.com>
Date: Tue, 26 May 2026 21:10:28 +0300
Subject: [PATCH 5/5] fix(security): address latest PR review on unicode
 normalization

Remove dead Sanitized fallback guards, expand invisible-character coverage
(U+061C, line/paragraph separators, Cf sweep), generalize ST-terminated
escape stripping, and add wrong-order/fullwidth chain tests.

Signed-off-by: Barak Korren <bkorren@redhat.com>
Co-authored-by: Cursor <cursoragent@cursor.com>
---
 internal/cli/run.go                           |  6 +--
 internal/cli/scan.go                          |  7 +--
 .../security/hooks/posttool_chain_test.py     | 41 ++++++++++++++++
 internal/security/hooks/unicode_posttool.py   |  9 ++--
 internal/security/scanner_test.go             | 33 +++++++++++++
 internal/security/unicode.go                  | 49 +++++++++++++------
 6 files changed, 117 insertions(+), 28 deletions(-)

diff --git a/internal/cli/run.go b/internal/cli/run.go
index eac07dd7c..d1b2b986d 100644
--- a/internal/cli/run.go
+++ b/internal/cli/run.go
@@ -1446,12 +1446,8 @@ func scanOutputFiles(outputDir, traceID string, printer *ui.Printer) error {
 						Finding:   f,
 					})
 			}
+			// Sanitized may be empty when all content was invisible characters.
 			out := result.Sanitized
-			// Only fall back when the pipeline made no changes; empty Sanitized with
-			// findings means content was fully stripped (e.g. all invisible chars).
-			if out == "" && len(result.Findings) == 0 {
-				out = text
-			}
 			if writeErr := os.WriteFile(path, []byte(out), 0o644); writeErr != nil {
 				printer.StepWarn(fmt.Sprintf("Could not write sanitized %s: %v", relPath, writeErr))
 			}
diff --git a/internal/cli/scan.go b/internal/cli/scan.go
index 1c7ba6884..4b96f3157 100644
--- a/internal/cli/scan.go
+++ b/internal/cli/scan.go
@@ -195,11 +195,8 @@ Usage in a workflow step:
 				for _, f := range result.Findings {
 					printer.StepWarn(fmt.Sprintf("  %s: %s", f.Name, f.Detail))
 				}
-				out := result.Sanitized
-				if out == "" && len(result.Findings) == 0 {
-					out = text
-				}
-				fmt.Fprint(os.Stdout, out)
+				// Sanitized may be empty when all content was invisible characters.
+				fmt.Fprint(os.Stdout, result.Sanitized)
 			} else {
 				printer.StepDone("No secrets found in output")
 				fmt.Fprint(os.Stdout, text)
diff --git a/internal/security/hooks/posttool_chain_test.py b/internal/security/hooks/posttool_chain_test.py
index 68cf4d50f..d52b1ddb8 100644
--- a/internal/security/hooks/posttool_chain_test.py
+++ b/internal/security/hooks/posttool_chain_test.py
@@ -32,6 +32,36 @@ def run_hook(script: str, tool_result: str) -> tuple[int, str, str]:
     return proc.returncode, proc.stdout, proc.stderr
 
 
+def run_wrong_chain(tool_result: str) -> str:
+    """Run secret_redact then unicode (wrong sandbox order — leaks obfuscated tokens)."""
+    rc, stdout, stderr = run_hook(SECRET_HOOK, tool_result)
+    if rc != 0:
+        raise RuntimeError(f"secret_redact hook failed: rc={rc}, stderr={stderr}")
+    if stdout.strip():
+        out = json.loads(stdout)
+        tool_result = out["tool_result"]
+
+    rc, stdout, stderr = run_hook(UNICODE_HOOK, tool_result)
+    if rc != 0:
+        raise RuntimeError(f"unicode hook failed: rc={rc}, stderr={stderr}")
+    if stdout.strip():
+        out = json.loads(stdout)
+        return out["tool_result"]
+    return tool_result
+
+
+def to_fullwidth_ascii(text: str) -> str:
+    """Convert printable ASCII to fullwidth compatibility forms."""
+    out: list[str] = []
+    for c in text:
+        o = ord(c)
+        if 0x21 <= o <= 0x7E:
+            out.append(chr(o + 0xFF00 - 0x20))
+        else:
+            out.append(c)
+    return "".join(out)
+
+
 def run_chain(tool_result: str) -> str:
     """Run unicode_posttool then secret_redact_posttool (correct sandbox order)."""
     rc, stdout, stderr = run_hook(UNICODE_HOOK, tool_result)
@@ -77,6 +107,17 @@ def test_redact_alone_misses_zero_width_obfuscated_pat(self):
         # Obfuscated token still present in source (would leak after unicode strips ZWNJ)
         self.assertIn("\u200c", obfuscated)
 
+    def test_wrong_order_chain_leaks_obfuscated_pat(self):
+        obfuscated = obfuscate_with_char(PLAIN_PAT, "\u200c")
+        result = run_wrong_chain(obfuscated)
+        self.assertIn("ghp_FAKEtest", result)
+
+    def test_fullwidth_obfuscated_pat_redacted_by_chain(self):
+        fullwidth = to_fullwidth_ascii(PLAIN_PAT)
+        result = run_chain(fullwidth)
+        self.assertNotIn("ghp_FAKEtest", result)
+        self.assertIn("...", result)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/internal/security/hooks/unicode_posttool.py b/internal/security/hooks/unicode_posttool.py
index 20d1dee9a..ba9418aee 100644
--- a/internal/security/hooks/unicode_posttool.py
+++ b/internal/security/hooks/unicode_posttool.py
@@ -40,7 +40,8 @@
         "zero_width",
         "high",
         re.compile(
-            "[\u00ad\u034f\u180e\u200b-\u200f\ufeff\u2060-\u2064\u206a-\u206f\ufff9-\ufffb]+"
+            "[\u00ad\u034f\u061c\u0600-\u0605\u070f\u0890-\u0891\u08e2\u180e"
+            "\u200b-\u200f\u2028\u2029\u2060-\u2064\u206a-\u206f\ufeff\ufff9-\ufffb]+"
         ),
     ),
     (
@@ -59,13 +60,13 @@
         "medium",
         re.compile(r"\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]"),
     ),
-    # OSC: terminal hyperlinks, window title injection.
+    # ST-terminated: OSC (ESC ]), DCS (ESC P), APC (ESC _), PM (ESC ^).
     # Uses negated class [^\x1b\x07]* instead of .*? to avoid O(n^2)
-    # backtracking on dense unterminated ESC] sequences.
+    # backtracking on dense unterminated sequences.
     (
         "osc_escape",
         "medium",
-        re.compile(r"\x1b\][^\x1b\x07]*(?:\x1b\\|\x07)"),
+        re.compile(r"\x1b[\]P_^][^\x1b\x07]*(?:\x1b\\|\x07)"),
     ),
     (
         "null_byte",
diff --git a/internal/security/scanner_test.go b/internal/security/scanner_test.go
index d72cd18d1..0b627c1c9 100644
--- a/internal/security/scanner_test.go
+++ b/internal/security/scanner_test.go
@@ -340,6 +340,39 @@ func TestPipeline(t *testing.T) {
 		assert.False(t, hasFinding(r, "github_pat"), "redact-before-normalize must not catch obfuscated token")
 	})
 
+	t.Run("normalize then redact catches ALM obfuscated PAT", func(t *testing.T) {
+		p := NewPipeline(NewUnicodeNormalizer(), NewSecretRedactor())
+		plain := "ghp_FAKEtesttoken000000000000000000000000"
+		var obfuscated strings.Builder
+		for _, r := range plain {
+			obfuscated.WriteRune(r)
+			obfuscated.WriteRune('\u061c')
+		}
+		r := p.Scan(obfuscated.String())
+		assert.False(t, r.Safe)
+		assert.True(t, hasFinding(r, "zero_width"))
+		assert.True(t, hasFinding(r, "github_pat"))
+		assert.NotContains(t, r.Sanitized, "ghp_FAKEtest")
+	})
+
+	t.Run("normalize then redact catches fullwidth obfuscated PAT", func(t *testing.T) {
+		p := NewPipeline(NewUnicodeNormalizer(), NewSecretRedactor())
+		plain := "ghp_FAKEtesttoken000000000000000000000000"
+		var fullwidth strings.Builder
+		for _, r := range plain {
+			if r >= 0x21 && r <= 0x7e {
+				fullwidth.WriteRune(r + 0xff00 - 0x20)
+			} else {
+				fullwidth.WriteRune(r)
+			}
+		}
+		r := p.Scan(fullwidth.String())
+		assert.False(t, r.Safe)
+		assert.True(t, hasFinding(r, "fullwidth"))
+		assert.True(t, hasFinding(r, "github_pat"))
+		assert.NotContains(t, r.Sanitized, "ghp_FAKEtest")
+	})
+
 	t.Run("clean text passes both", func(t *testing.T) {
 		p := InputPipeline()
 		r := p.Scan("Normal commit message fixing a null pointer bug.")
diff --git a/internal/security/unicode.go b/internal/security/unicode.go
index c3fc078ea..ffda77941 100644
--- a/internal/security/unicode.go
+++ b/internal/security/unicode.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"regexp"
 	"strings"
+	"unicode"
 	"unicode/utf8"
 
 	"golang.org/x/text/unicode/norm"
@@ -24,7 +25,7 @@ func (u *UnicodeNormalizer) Name() string { return "unicode_normalizer" }
 var (
 	// Zero-width and invisible format characters (aligned with unicode_posttool.py).
 	reZeroWidth = regexp.MustCompile(
-		"[\u00AD\u034F\u180E\u200B-\u200F\u2060-\u2064\u206A-\u206F\uFEFF\uFFF9-\uFFFB]+",
+		"[\u00AD\u034F\u061C\u0600-\u0605\u070F\u0890-\u0891\u08E2\u180E\u200B-\u200F\u2028\u2029\u2060-\u2064\u206A-\u206F\uFEFF\uFFF9-\uFFFB]+",
 	)
 
 	// Bidirectional override characters.
@@ -35,8 +36,8 @@ var (
 	// ANSI CSI escape sequences (ECMA-48 compliant).
 	reANSI = regexp.MustCompile(`\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]`)
 
-	// OSC escape sequences (terminal hyperlinks, title injection).
-	reOSC = regexp.MustCompile(`\x1b\][^\x1b\x07]*(?:\x1b\\|\x07)`)
+	// ST-terminated escape sequences: OSC (ESC ]), DCS (ESC P), APC (ESC _), PM (ESC ^).
+	reSTTerminated = regexp.MustCompile(`\x1b[\]P_^][^\x1b\x07]*(?:\x1b\\|\x07)`)
 
 	// Null bytes.
 	reNull = regexp.MustCompile("\x00+")
@@ -52,12 +53,12 @@ func stripTerminalEscapes(text string) (string, int, int) {
 		ansiCount++
 		return ""
 	})
-	oscCount := 0
-	current = reOSC.ReplaceAllStringFunc(current, func(string) string {
-		oscCount++
+	stCount := 0
+	current = reSTTerminated.ReplaceAllStringFunc(current, func(string) string {
+		stCount++
 		return ""
 	})
-	return current, ansiCount, oscCount
+	return current, ansiCount, stCount
 }
 
 func (u *UnicodeNormalizer) Scan(text string) ScanResult {
@@ -80,8 +81,8 @@ func (u *UnicodeNormalizer) Scan(text string) ScanResult {
 		current = reNull.ReplaceAllString(current, "")
 	}
 
-	// ANSI and OSC escapes
-	if stripped, ansiCount, oscCount := stripTerminalEscapes(current); ansiCount > 0 || oscCount > 0 {
+	// ANSI and ST-terminated escapes (OSC, DCS, APC, PM).
+	if stripped, ansiCount, stCount := stripTerminalEscapes(current); ansiCount > 0 || stCount > 0 {
 		if ansiCount > 0 {
 			findings = append(findings, Finding{
 				Scanner:  "unicode_normalizer",
@@ -90,12 +91,12 @@ func (u *UnicodeNormalizer) Scan(text string) ScanResult {
 				Detail:   fmt.Sprintf("%d ANSI escape sequences removed", ansiCount),
 			})
 		}
-		if oscCount > 0 {
+		if stCount > 0 {
 			findings = append(findings, Finding{
 				Scanner:  "unicode_normalizer",
 				Name:     "osc_escape",
 				Severity: "medium",
-				Detail:   fmt.Sprintf("%d OSC escape sequences removed", oscCount),
+				Detail:   fmt.Sprintf("%d ST-terminated escape sequences removed", stCount),
 			})
 		}
 		current = stripped
@@ -193,6 +194,26 @@ func (u *UnicodeNormalizer) Scan(text string) ScanResult {
 		current = suppVSStripped.String()
 	}
 
+	// Remaining Cf (format) category characters (e.g. U+061C Arabic Letter Mark).
+	var cfStripped strings.Builder
+	cfCount := 0
+	for _, r := range current {
+		if unicode.Is(unicode.Cf, r) {
+			cfCount++
+		} else {
+			cfStripped.WriteRune(r)
+		}
+	}
+	if cfCount > 0 {
+		findings = append(findings, Finding{
+			Scanner:  "unicode_normalizer",
+			Name:     "zero_width",
+			Severity: "high",
+			Detail:   fmt.Sprintf("%d format (Cf) characters removed", cfCount),
+		})
+		current = cfStripped.String()
+	}
+
 	// NFKC normalization (fullwidth -> ASCII, compatibility decomposition)
 	nfkc := norm.NFKC.String(current)
 	if nfkc != current {
@@ -227,7 +248,7 @@ func (u *UnicodeNormalizer) Scan(text string) ScanResult {
 		current = nfkc
 
 		// NFKC can reconstruct escape sequences from fullwidth characters.
-		if stripped, ansiCount, oscCount := stripTerminalEscapes(current); ansiCount > 0 || oscCount > 0 {
+		if stripped, ansiCount, stCount := stripTerminalEscapes(current); ansiCount > 0 || stCount > 0 {
 			if ansiCount > 0 {
 				findings = append(findings, Finding{
 					Scanner:  "unicode_normalizer",
@@ -236,12 +257,12 @@ func (u *UnicodeNormalizer) Scan(text string) ScanResult {
 					Detail:   fmt.Sprintf("%d ANSI escape sequences removed (post-NFKC)", ansiCount),
 				})
 			}
-			if oscCount > 0 {
+			if stCount > 0 {
 				findings = append(findings, Finding{
 					Scanner:  "unicode_normalizer",
 					Name:     "osc_escape",
 					Severity: "medium",
-					Detail:   fmt.Sprintf("%d OSC escape sequences removed (post-NFKC)", oscCount),
+					Detail:   fmt.Sprintf("%d ST-terminated escape sequences removed (post-NFKC)", stCount),
 				})
 			}
 			current = stripped