Skip to content

Commit d520e91

Browse files
committed
feat: redact full secret file content
For patterns.secret_files, also import full file contents as keyword patterns (with line-ending normalization/trimming) to redact pasted whole-file content.
1 parent d684a64 commit d520e91

File tree

2 files changed

+77
-1
lines changed

2 files changed

+77
-1
lines changed

internal/secretsources/secret_files.go

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ import (
1313
"github.com/inkdust2021/vibeguard/internal/pii_next/keywords"
1414
)
1515

16-
const maxSecretFileBytes = 1 << 20 // 1 MiB
16+
const maxSecretFileBytes = 1 << 20 // 1 MiB
17+
const maxSecretPatternBytes = 64 << 10 // 64 KiB (avoid huge patterns impacting memory/CPU)
1718

1819
var dotenvKeyRe = regexp.MustCompile(`^[A-Za-z_][A-Za-z0-9_]*$`)
1920

@@ -70,6 +71,21 @@ func LoadKeywords(sources []config.SecretFileConfig) ([]keywords.Keyword, []erro
7071
minLen = 8
7172
}
7273

74+
// Also treat the entire file content as a keyword (best-effort).
75+
// This helps when users paste/send a whole file into a prompt (or parts that include newlines),
76+
// without requiring them to enumerate individual secrets.
77+
for _, v := range fullContentVariants(data, minLen) {
78+
if len(v) > maxSecretPatternBytes {
79+
warns = append(warns, fmt.Errorf("secret_files: %q content pattern too large (%d bytes > %d bytes), skipped", path, len(v), maxSecretPatternBytes))
80+
continue
81+
}
82+
if _, ok := seen[v]; ok {
83+
continue
84+
}
85+
seen[v] = struct{}{}
86+
out = append(out, keywords.Keyword{Text: v, Category: cat})
87+
}
88+
7389
var values []string
7490
switch format {
7591
case "dotenv":
@@ -100,6 +116,45 @@ func LoadKeywords(sources []config.SecretFileConfig) ([]keywords.Keyword, []erro
100116
return out, warns
101117
}
102118

119+
func fullContentVariants(data []byte, minLen int) []string {
120+
if len(data) == 0 {
121+
return nil
122+
}
123+
if minLen <= 0 {
124+
minLen = 1
125+
}
126+
127+
s0 := string(data)
128+
// Drop UTF-8 BOM if present.
129+
s0 = strings.TrimPrefix(s0, "\ufeff")
130+
s1 := normalizeLineEndings(s0)
131+
s2 := strings.TrimRight(s1, " \t\r\n")
132+
s3 := strings.TrimLeft(s2, " \t\r\n")
133+
134+
var out []string
135+
for _, s := range []string{s0, s1, s2, s3} {
136+
if len(s) < minLen {
137+
continue
138+
}
139+
// Avoid patterns that are "mostly whitespace".
140+
if strings.TrimSpace(s) == "" {
141+
continue
142+
}
143+
out = append(out, s)
144+
}
145+
return out
146+
}
147+
148+
func normalizeLineEndings(s string) string {
149+
if s == "" {
150+
return s
151+
}
152+
// Normalize CRLF/CR into LF to improve match rate across different clients.
153+
s = strings.ReplaceAll(s, "\r\n", "\n")
154+
s = strings.ReplaceAll(s, "\r", "\n")
155+
return s
156+
}
157+
103158
func parseLineValues(data []byte, minLen int) ([]string, error) {
104159
var out []string
105160
s := bufio.NewScanner(bytes.NewReader(data))

internal/secretsources/secret_files_test.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,24 @@ func TestParseLineValues(t *testing.T) {
6868
t.Fatalf("unexpected values: %#v", values)
6969
}
7070
}
71+
72+
func TestFullContentVariants_NormalizesLineEndingsAndTrims(t *testing.T) {
73+
in := []byte("\ufeffline1\r\nline2\r\n\r\n")
74+
out := fullContentVariants(in, 1)
75+
if len(out) == 0 {
76+
t.Fatalf("expected variants, got none")
77+
}
78+
seen := map[string]bool{}
79+
for _, v := range out {
80+
seen[v] = true
81+
}
82+
if !seen["line1\r\nline2\r\n\r\n"] {
83+
t.Fatalf("missing raw variant, got: %#v", out)
84+
}
85+
if !seen["line1\nline2\n\n"] {
86+
t.Fatalf("missing normalized variant, got: %#v", out)
87+
}
88+
if !seen["line1\nline2"] {
89+
t.Fatalf("missing trimmed variant, got: %#v", out)
90+
}
91+
}

0 commit comments

Comments
 (0)