Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Playground PR for verification caching exploration #3457

Closed
wants to merge 32 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
6f54591
initial sketch
rosecodym Oct 17, 2024
0a9a30b
allow forced cache misses
rosecodym Oct 17, 2024
0f0917b
rearrange
rosecodym Oct 17, 2024
d074f6d
plug into engine
rosecodym Oct 17, 2024
6e9d760
clear stuff in cached copy
rosecodym Oct 17, 2024
61a70a3
fiddle with pointers more
rosecodym Oct 17, 2024
3e5ff9c
rename to verificationCache
rosecodym Oct 17, 2024
5ac0139
inject getCacheKey
rosecodym Oct 17, 2024
a4e092c
optimize when forcing a cache miss
rosecodym Oct 17, 2024
8d83daa
tweak
rosecodym Oct 17, 2024
fd58380
tweak more
rosecodym Oct 17, 2024
c119f2b
flag when cache was used
rosecodym Oct 17, 2024
15ddd63
store key builder in engine
rosecodym Oct 17, 2024
ea3a559
rename
rosecodym Oct 17, 2024
bf3170d
copy verification errors
rosecodym Oct 17, 2024
3229f3b
Merge remote-tracking branch 'origin/main' into detection-caching-pla…
rosecodym Nov 21, 2024
b126569
re-remove decodertype
rosecodym Dec 2, 2024
8ef29ec
remove cached decoder type
rosecodym Dec 2, 2024
193f5fa
Merge remote-tracking branch 'origin/main' into detection-caching-pla…
rosecodym Dec 6, 2024
20aa4ea
add tests and fix bugs
rosecodym Dec 6, 2024
328e121
tweak formatting
rosecodym Dec 6, 2024
817ea15
update engine
rosecodym Dec 6, 2024
ca07df2
calculate cache key by value
rosecodym Dec 6, 2024
e2924de
update comment
rosecodym Dec 6, 2024
3d097de
Merge remote-tracking branch 'origin/main' into detection-caching-pla…
rosecodym Dec 18, 2024
d5f8f31
add janky metrics + an actual cache
rosecodym Dec 18, 2024
abaa8dc
fix typo
rosecodym Dec 18, 2024
e950bc9
add flag
rosecodym Dec 18, 2024
add0c0a
add to printers
rosecodym Dec 18, 2024
076696a
add new metric
rosecodym Dec 18, 2024
48d9535
create struct
rosecodym Dec 19, 2024
8482b4d
rewrite metrics
rosecodym Dec 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 41 additions & 13 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ import (
"github.com/go-logr/logr"
"github.com/jpillora/overseer"
"github.com/mattn/go-isatty"
"github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/verificationcaching"
"go.uber.org/automaxprocs/maxprocs"

"github.com/trufflesecurity/trufflehog/v3/pkg/analyzer"
Expand Down Expand Up @@ -76,6 +79,8 @@ var (
excludeDetectors = cli.Flag("exclude-detectors", "Comma separated list of detector types to exclude. Protobuf name or IDs may be used, as well as ranges. IDs defined here take precedence over the include list.").String()
jobReportFile = cli.Flag("output-report", "Write a scan report to the provided path.").Hidden().OpenFile(os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0666)

noVerificationCache = cli.Flag("no-verification-cache", "Disable verification caching").Bool()

// Add feature flags
forceSkipBinaries = cli.Flag("force-skip-binaries", "Force skipping binaries.").Bool()
forceSkipArchives = cli.Flag("force-skip-archives", "Force skipping archives.").Bool()
Expand Down Expand Up @@ -480,25 +485,33 @@ func run(state overseer.State) {
logFatal(err, "failed to configure results flag")
}

verificationCacheMetrics := verificationcaching.InMemoryMetrics{}

engConf := engine.Config{
Concurrency: *concurrency,
// The engine must always be configured with the list of
// default detectors, which can be further filtered by the
// user. The filters are applied by the engine and are only
// subtractive.
Detectors: append(defaults.DefaultDetectors(), conf.Detectors...),
Verify: !*noVerification,
IncludeDetectors: *includeDetectors,
ExcludeDetectors: *excludeDetectors,
CustomVerifiersOnly: *customVerifiersOnly,
VerifierEndpoints: *verifiers,
Dispatcher: engine.NewPrinterDispatcher(printer),
FilterUnverified: *filterUnverified,
FilterEntropy: *filterEntropy,
VerificationOverlap: *allowVerificationOverlap,
Results: parsedResults,
PrintAvgDetectorTime: *printAvgDetectorTime,
ShouldScanEntireChunk: *scanEntireChunk,
Detectors: append(defaults.DefaultDetectors(), conf.Detectors...),
Verify: !*noVerification,
IncludeDetectors: *includeDetectors,
ExcludeDetectors: *excludeDetectors,
CustomVerifiersOnly: *customVerifiersOnly,
VerifierEndpoints: *verifiers,
Dispatcher: engine.NewPrinterDispatcher(printer),
FilterUnverified: *filterUnverified,
FilterEntropy: *filterEntropy,
VerificationOverlap: *allowVerificationOverlap,
Results: parsedResults,
PrintAvgDetectorTime: *printAvgDetectorTime,
ShouldScanEntireChunk: *scanEntireChunk,
VerificationCacheMetrics: &verificationCacheMetrics,
}

if !*noVerificationCache {
engConf.VerificationResultCache = simple.NewCache[detectors.Result]()
engConf.GetVerificationCacheKey = func(result detectors.Result) string { return string(result.Raw) + string(result.RawV2) }
}

if *compareDetectionStrategies {
Expand All @@ -518,6 +531,20 @@ func run(state overseer.State) {
logFatal(err, "error running scan")
}

verificationCacheMetrics := struct {
Hits int32
Misses int32
HitsWasted int32
AttemptsSaved int32
VerificationTimeSpentMS int64
}{
Hits: verificationCacheMetrics.ResultCacheHits.Load(),
Misses: verificationCacheMetrics.ResultCacheMisses.Load(),
HitsWasted: verificationCacheMetrics.ResultCacheHitsWasted.Load(),
AttemptsSaved: verificationCacheMetrics.CredentialVerificationsSaved.Load(),
VerificationTimeSpentMS: verificationCacheMetrics.FromDataVerifyTimeSpentMS.Load(),
}

// Print results.
logger.Info("finished scanning",
"chunks", metrics.ChunksScanned,
Expand All @@ -526,6 +553,7 @@ func run(state overseer.State) {
"unverified_secrets", metrics.UnverifiedSecretsFound,
"scan_duration", metrics.ScanDuration.String(),
"trufflehog_version", version.BuildVersion,
"verification_caching", verificationCacheMetrics,
)

if metrics.hasFoundResults && *fail {
Expand Down
13 changes: 12 additions & 1 deletion pkg/detectors/detectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ type Result struct {
// DetectorName is the name of the Detector. Used for custom detectors.
DetectorName string
Verified bool
// VerificationFromCache indicates whether this result's verification result came from the verification cache rather
// than an actual remote request.
VerificationFromCache bool
// Raw contains the raw secret identifier data. Prefer IDs over secrets since it is used for deduping after hashing.
Raw []byte
// RawV2 contains the raw secret identifier that is a combination of both the ID and the secret.
Expand All @@ -111,7 +114,15 @@ type Result struct {
AnalysisInfo map[string]string
}

// SetVerificationError is the only way to set a verification error. Any sensitive values should be passed-in as secrets to be redacted.
// CopyVerificationInfo clones verification info (status and error) from another Result struct. This is used when
// loading verification info from a verification cache. (A method is necessary because verification errors are not
// exported, to prevent the accidental storage of sensitive information in them.)
func (r *Result) CopyVerificationInfo(from *Result) {
r.Verified = from.Verified
r.verificationError = from.verificationError
}

// SetVerificationError is the only way to set a new verification error. Any sensitive values should be passed-in as secrets to be redacted.
func (r *Result) SetVerificationError(err error, secrets ...string) {
if err != nil {
r.verificationError = redactSecrets(err, secrets...)
Expand Down
26 changes: 22 additions & 4 deletions pkg/engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import (
"github.com/adrg/strutil"
"github.com/adrg/strutil/metrics"
lru "github.com/hashicorp/golang-lru/v2"
"github.com/trufflesecurity/trufflehog/v3/pkg/cache"
"github.com/trufflesecurity/trufflehog/v3/pkg/verificationcaching"
"google.golang.org/protobuf/proto"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
Expand Down Expand Up @@ -145,6 +147,10 @@ type Config struct {

// VerificationOverlapWorkerMultiplier is used to determine the number of verification overlap workers to spawn.
VerificationOverlapWorkerMultiplier int

VerificationResultCache cache.Cache[detectors.Result]
GetVerificationCacheKey func(result detectors.Result) string
VerificationCacheMetrics verificationcaching.MetricsReporter
}

// Engine represents the core scanning engine responsible for detecting secrets in input data.
Expand All @@ -153,9 +159,10 @@ type Config struct {
// customization through various options and configurations.
type Engine struct {
// CLI flags.
concurrency int
decoders []decoders.Decoder
detectors []detectors.Detector
concurrency int
decoders []decoders.Decoder
detectors []detectors.Detector
verificationCache verificationcaching.VerificationCache
// Any detectors configured to override sources' verification flags
detectorVerificationOverrides map[config.DetectorID]bool

Expand Down Expand Up @@ -216,10 +223,16 @@ type Engine struct {

// NewEngine creates a new Engine instance with the provided configuration.
func NewEngine(ctx context.Context, cfg *Config) (*Engine, error) {
verificationCache := verificationcaching.New(
cfg.VerificationResultCache,
cfg.GetVerificationCacheKey,
cfg.VerificationCacheMetrics)

engine := &Engine{
concurrency: cfg.Concurrency,
decoders: cfg.Decoders,
detectors: cfg.Detectors,
verificationCache: verificationCache,
dispatcher: cfg.Dispatcher,
verify: cfg.Verify,
filterUnverified: cfg.FilterUnverified,
Expand Down Expand Up @@ -1056,7 +1069,12 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) {
t := time.AfterFunc(detectionTimeout+1*time.Second, func() {
ctx.Logger().Error(nil, "a detector ignored the context timeout")
})
results, err := data.detector.Detector.FromData(ctx, data.chunk.Verify, matchBytes)
results, err := e.verificationCache.FromData(
ctx,
data.detector.Detector,
data.chunk.Verify,
data.chunk.SecretID != 0,
matchBytes)
t.Stop()
cancel()
if err != nil {
Expand Down
38 changes: 20 additions & 18 deletions pkg/output/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,10 @@ func (p *JSONPrinter) Print(_ context.Context, r *detectors.ResultWithMetadata)
// DetectorDescription is the description of the Detector.
DetectorDescription string
// DecoderName is the string name of the DecoderType.
DecoderName string
Verified bool
VerificationError string `json:",omitempty"`
DecoderName string
Verified bool
VerificationError string `json:",omitempty"`
VerificationFromCache bool
// Raw contains the raw secret data.
Raw string
// RawV2 contains the raw secret identifier that is a combination of both the ID and the secret.
Expand All @@ -54,21 +55,22 @@ func (p *JSONPrinter) Print(_ context.Context, r *detectors.ResultWithMetadata)
ExtraData map[string]string
StructuredData *detectorspb.StructuredData
}{
SourceMetadata: r.SourceMetadata,
SourceID: r.SourceID,
SourceType: r.SourceType,
SourceName: r.SourceName,
DetectorType: r.DetectorType,
DetectorName: r.DetectorType.String(),
DetectorDescription: r.DetectorDescription,
DecoderName: r.DecoderType.String(),
Verified: r.Verified,
VerificationError: verificationErr,
Raw: string(r.Raw),
RawV2: string(r.RawV2),
Redacted: r.Redacted,
ExtraData: r.ExtraData,
StructuredData: r.StructuredData,
SourceMetadata: r.SourceMetadata,
SourceID: r.SourceID,
SourceType: r.SourceType,
SourceName: r.SourceName,
DetectorType: r.DetectorType,
DetectorName: r.DetectorType.String(),
DetectorDescription: r.DetectorDescription,
DecoderName: r.DecoderType.String(),
Verified: r.Verified,
VerificationError: verificationErr,
VerificationFromCache: r.VerificationFromCache,
Raw: string(r.Raw),
RawV2: string(r.RawV2),
Redacted: r.Redacted,
ExtraData: r.ExtraData,
StructuredData: r.StructuredData,
}
out, err := json.Marshal(v)
if err != nil {
Expand Down
4 changes: 4 additions & 0 deletions pkg/output/plain.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ var (
boldGreenPrinter = color.New(color.Bold, color.FgHiGreen)
whitePrinter = color.New(color.FgWhite)
boldWhitePrinter = color.New(color.Bold, color.FgWhite)
cyanPrinter = color.New(color.FgCyan)
)

// PlainPrinter is a printer that prints results in plain text format.
Expand Down Expand Up @@ -56,6 +57,9 @@ func (p *PlainPrinter) Print(_ context.Context, r *detectors.ResultWithMetadata)
yellowPrinter.Printf("Verification issue: %s\n", out.VerificationError)
}
}
if r.VerificationFromCache {
cyanPrinter.Print("(Verification info cached)\n")
}
printer.Printf("Detector Type: %s\n", out.DetectorType)
printer.Printf("Decoder Type: %s\n", out.DecoderType)
printer.Printf("Raw result: %s\n", whitePrinter.Sprint(out.Raw))
Expand Down
33 changes: 33 additions & 0 deletions pkg/verificationcaching/in_memory_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package verificationcaching

import "sync/atomic"

type InMemoryMetrics struct {
CredentialVerificationsSaved atomic.Int32
FromDataVerifyTimeSpentMS atomic.Int64
ResultCacheHits atomic.Int32
ResultCacheHitsWasted atomic.Int32
ResultCacheMisses atomic.Int32
}

var _ MetricsReporter = (*InMemoryMetrics)(nil)

func (m *InMemoryMetrics) AddCredentialVerificationsSaved(count int) {
m.CredentialVerificationsSaved.Add(int32(count))
}

func (m *InMemoryMetrics) AddFromDataVerifyTimeSpent(ms int64) {
m.FromDataVerifyTimeSpentMS.Add(ms)
}

func (m *InMemoryMetrics) AddResultCacheHits(count int) {
m.ResultCacheHits.Add(int32(count))
}

func (m *InMemoryMetrics) AddResultCacheMisses(count int) {
m.ResultCacheMisses.Add(int32(count))
}

func (m *InMemoryMetrics) AddResultCacheHitsWasted(count int) {
m.ResultCacheHitsWasted.Add(int32(count))
}
9 changes: 9 additions & 0 deletions pkg/verificationcaching/metrics_reporter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package verificationcaching

type MetricsReporter interface {
AddCredentialVerificationsSaved(count int)
AddFromDataVerifyTimeSpent(ms int64)
AddResultCacheHits(count int)
AddResultCacheMisses(count int)
AddResultCacheHitsWasted(count int)
}
98 changes: 98 additions & 0 deletions pkg/verificationcaching/verification_cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package verificationcaching

import (
"context"
"time"

"github.com/trufflesecurity/trufflehog/v3/pkg/cache"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
)

type VerificationCache struct {
getResultCacheKey func(result detectors.Result) string
metrics MetricsReporter
resultCache cache.Cache[detectors.Result]
}

func New(
resultCache cache.Cache[detectors.Result],
getResultCacheKey func(result detectors.Result) string,
metrics MetricsReporter,
) VerificationCache {
return VerificationCache{
getResultCacheKey: getResultCacheKey,
metrics: metrics,
resultCache: resultCache,
}
}

func (v *VerificationCache) FromData(
ctx context.Context,
detector detectors.Detector,
verify bool,
forceCacheUpdate bool,
data []byte,
) ([]detectors.Result, error) {

if v.resultCache == nil {
if verify {
start := time.Now()
defer func() {
v.metrics.AddFromDataVerifyTimeSpent(time.Since(start).Milliseconds())
}()
}

return detector.FromData(ctx, verify, data)
}

if !forceCacheUpdate {
withoutRemoteVerification, err := detector.FromData(ctx, false, data)
if err != nil {
return nil, err
}

if !verify {
return withoutRemoteVerification, nil
}

isEverythingCached := true
var cacheHitsInCurrentChunk int
for i, r := range withoutRemoteVerification {
if cacheHit, ok := v.resultCache.Get(v.getResultCacheKey(r)); ok {
withoutRemoteVerification[i].CopyVerificationInfo(&cacheHit)
withoutRemoteVerification[i].VerificationFromCache = true
v.metrics.AddResultCacheHits(1)
cacheHitsInCurrentChunk++
} else {
v.metrics.AddResultCacheMisses(1)
isEverythingCached = false
v.metrics.AddResultCacheHitsWasted(cacheHitsInCurrentChunk)
break
}
}

if isEverythingCached {
v.metrics.AddCredentialVerificationsSaved(len(withoutRemoteVerification))
return withoutRemoteVerification, nil
}
}

start := time.Now()
withRemoteVerification, err := detector.FromData(ctx, verify, data)
if verify {
v.metrics.AddFromDataVerifyTimeSpent(time.Since(start).Milliseconds())
}
if err != nil {
return nil, err
}

for _, r := range withRemoteVerification {
copyForCaching := r
// Do not persist raw secret values in a long-lived cache
copyForCaching.Raw = nil
copyForCaching.RawV2 = nil
v.resultCache.Set(v.getResultCacheKey(r), copyForCaching)
}

return withRemoteVerification, nil
}
Loading
Loading