Skip to content

Commit

Permalink
Polite Verification (#2356)
Browse files Browse the repository at this point in the history
* draft reverify chunks

* remove

* remove

* reduce dupe map cap

* do not verify chunk

* cli arg and use val for dupe lut

* remove counter

* skipp empty results]

* working on test and normalizing val for comparison

* forgot to save file

* optimize normalize

* reuse map

* remove print

* use levenshtein distance to check dupes

* forgot to leave in emptying map

* use slice

* small tweak

* comment

* use bytes

* praise

* use ctx logger

* add len check

* add comments

* use 8x concurrency for reverifier workers

* revert worker count

* use more workers

* process result directly for any collisions

* continue after decoder match for reverifying

* use map

* use map

* otimization and fix the bug.

* revert worker count

* better option naming

* handle identical secrets in chunks

* update comment

* update comment

* fix test

* use DetecotrKey

* rm out of scope tests and testdata

* rename all reverification elements

* don't re-write map entry

* use correct key

* rename worker, remove log val

* test likelydupe, add eq detector check in loop

* add test

* add comment

* add test

* Set verification error

* Update tests

---------

Co-authored-by: Zachary Rice <[email protected]>
Co-authored-by: Dustin Decker <[email protected]>
  • Loading branch information
3 people authored Feb 2, 2024
1 parent c2ae31d commit b2074ad
Show file tree
Hide file tree
Showing 9 changed files with 530 additions and 34 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ require (
github.com/AzureAD/microsoft-authentication-library-for-go v1.2.1
github.com/BobuSumisu/aho-corasick v1.0.3
github.com/TheZeroSlave/zapsentry v1.19.0
github.com/adrg/strutil v0.3.1
github.com/alecthomas/kingpin/v2 v2.4.0
github.com/aws/aws-sdk-go v1.50.0
github.com/aymanbagabas/go-osc52 v1.2.2
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ github.com/ProtonMail/go-crypto v0.0.0-20230828082145-3c4c8a2d2371 h1:kkhsdkhsCv
github.com/ProtonMail/go-crypto v0.0.0-20230828082145-3c4c8a2d2371/go.mod h1:EjAoLdwvbIOoOQr3ihjnSoLZRtE8azugULFRteWMNc0=
github.com/TheZeroSlave/zapsentry v1.19.0 h1:/FVdMrq/w7bYt98m49ImZgmCTybXWbGc8/hOT0nLmyc=
github.com/TheZeroSlave/zapsentry v1.19.0/go.mod h1:D1YMfSuu6xnkhwFXxrronesmsiyDhIqo+86I3Ok+r64=
github.com/adrg/strutil v0.3.1 h1:OLvSS7CSJO8lBii4YmBt8jiK9QOtB9CzCzwl4Ic/Fz4=
github.com/adrg/strutil v0.3.1/go.mod h1:8h90y18QLrs11IBffcGX3NW/GFBXCMcNg4M7H6MspPA=
github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek=
github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s=
github.com/alecthomas/kingpin/v2 v2.4.0 h1:f48lwail6p8zpO1bC4TxtqACaGqHYA22qkHjHpqDjYY=
Expand Down
32 changes: 17 additions & 15 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,22 @@ import (
)

var (
cli = kingpin.New("TruffleHog", "TruffleHog is a tool for finding credentials.")
cmd string
debug = cli.Flag("debug", "Run in debug mode.").Bool()
trace = cli.Flag("trace", "Run in trace mode.").Bool()
profile = cli.Flag("profile", "Enables profiling and sets a pprof and fgprof server on :18066.").Bool()
localDev = cli.Flag("local-dev", "Hidden feature to disable overseer for local dev.").Hidden().Bool()
jsonOut = cli.Flag("json", "Output in JSON format.").Short('j').Bool()
jsonLegacy = cli.Flag("json-legacy", "Use the pre-v3.0 JSON format. Only works with git, gitlab, and github sources.").Bool()
gitHubActionsFormat = cli.Flag("github-actions", "Output in GitHub Actions format.").Bool()
concurrency = cli.Flag("concurrency", "Number of concurrent workers.").Default(strconv.Itoa(runtime.NumCPU())).Int()
noVerification = cli.Flag("no-verification", "Don't verify the results.").Bool()
onlyVerified = cli.Flag("only-verified", "Only output verified results.").Bool()
filterUnverified = cli.Flag("filter-unverified", "Only output first unverified result per chunk per detector if there are more than one results.").Bool()
filterEntropy = cli.Flag("filter-entropy", "Filter unverified results with Shannon entropy. Start with 3.0.").Float64()
configFilename = cli.Flag("config", "Path to configuration file.").ExistingFile()
cli = kingpin.New("TruffleHog", "TruffleHog is a tool for finding credentials.")
cmd string
debug = cli.Flag("debug", "Run in debug mode.").Bool()
trace = cli.Flag("trace", "Run in trace mode.").Bool()
profile = cli.Flag("profile", "Enables profiling and sets a pprof and fgprof server on :18066.").Bool()
localDev = cli.Flag("local-dev", "Hidden feature to disable overseer for local dev.").Hidden().Bool()
jsonOut = cli.Flag("json", "Output in JSON format.").Short('j').Bool()
jsonLegacy = cli.Flag("json-legacy", "Use the pre-v3.0 JSON format. Only works with git, gitlab, and github sources.").Bool()
gitHubActionsFormat = cli.Flag("github-actions", "Output in GitHub Actions format.").Bool()
concurrency = cli.Flag("concurrency", "Number of concurrent workers.").Default(strconv.Itoa(runtime.NumCPU())).Int()
noVerification = cli.Flag("no-verification", "Don't verify the results.").Bool()
onlyVerified = cli.Flag("only-verified", "Only output verified results.").Bool()
allowVerificationOverlap = cli.Flag("allow-verification-overlap", "Allow verification of similar credentials across detectors").Bool()
filterUnverified = cli.Flag("filter-unverified", "Only output first unverified result per chunk per detector if there are more than one results.").Bool()
filterEntropy = cli.Flag("filter-entropy", "Filter unverified results with Shannon entropy. Start with 3.0.").Float64()
configFilename = cli.Flag("config", "Path to configuration file.").ExistingFile()
// rules = cli.Flag("rules", "Path to file with custom rules.").String()
printAvgDetectorTime = cli.Flag("print-avg-detector-time", "Print the average time spent on each detector.").Bool()
noUpdate = cli.Flag("no-update", "Don't check for updates.").Bool()
Expand Down Expand Up @@ -411,6 +412,7 @@ func run(state overseer.State) {
engine.WithPrintAvgDetectorTime(*printAvgDetectorTime),
engine.WithPrinter(printer),
engine.WithFilterEntropy(*filterEntropy),
engine.WithVerificationOverlap(*allowVerificationOverlap),
)
if err != nil {
logFatal(err, "error initializing engine")
Expand Down
38 changes: 32 additions & 6 deletions pkg/engine/ahocorasick/ahocorasickcore.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func NewAhoCorasickCore(allDetectors []detectors.Detector) *AhoCorasickCore {
detectorsByKey := make(map[DetectorKey]detectors.Detector, len(allDetectors))
var keywords []string
for _, d := range allDetectors {
key := createDetectorKey(d)
key := CreateDetectorKey(d)
detectorsByKey[key] = d
for _, kw := range d.Keywords() {
kwLower := strings.ToLower(kw)
Expand All @@ -63,20 +63,46 @@ func NewAhoCorasickCore(allDetectors []detectors.Detector) *AhoCorasickCore {
}
}

// DetectorInfo represents a detected pattern's metadata in a data chunk.
// It encapsulates the key identifying a specific detector and the detector instance itself.
type DetectorInfo struct {
Key DetectorKey
detectors.Detector
}

// PopulateMatchingDetectors populates the given detector slice with all the detectors matching the
// provided input. This method populates an existing map rather than allocating a new one because
// it will be called once per chunk and that many allocations has a noticeable performance cost.
func (ac *AhoCorasickCore) PopulateMatchingDetectors(chunkData string, detectors map[DetectorKey]detectors.Detector) {
for _, m := range ac.prefilter.MatchString(strings.ToLower(chunkData)) {
// It returns a slice of unique 'DetectorInfo' corresponding to the matched detectors. This slice is
// constructed to prevent duplications by utilizing an internal map to track already processed detectors.
func (ac *AhoCorasickCore) PopulateMatchingDetectors(chunkData string, dts map[DetectorKey]detectors.Detector) []DetectorInfo {
matches := ac.prefilter.MatchString(strings.ToLower(chunkData))

// Use a map to avoid adding duplicate detectors to the slice.
addedDetectors := make(map[DetectorKey]struct{})
uniqueDetectors := make([]DetectorInfo, 0, len(matches))

for _, m := range matches {
for _, k := range ac.keywordsToDetectors[m.MatchString()] {
detectors[k] = ac.detectorsByKey[k]
if _, exists := addedDetectors[k]; exists {
continue
}
// Add to the map to track already added detectors.
addedDetectors[k] = struct{}{}

// Add the detector to the map and slice.
detector := ac.detectorsByKey[k]
dts[k] = detector
uniqueDetectors = append(uniqueDetectors, DetectorInfo{Key: k, Detector: detector})
}
}

return uniqueDetectors
}

// createDetectorKey creates a unique key for each detector from its type, version, and, for
// CreateDetectorKey creates a unique key for each detector from its type, version, and, for
// custom regex detectors, its name.
func createDetectorKey(d detectors.Detector) DetectorKey {
func CreateDetectorKey(d detectors.Detector) DetectorKey {
detectorType := d.Type()
var version int
if v, ok := d.(detectors.Versioner); ok {
Expand Down
Loading

0 comments on commit b2074ad

Please sign in to comment.