Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Polite Verification #2356

Merged
merged 58 commits into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
eca32b2
draft reverify chunks
ahrav Jan 25, 2024
c7691ae
remove
ahrav Jan 25, 2024
3a7a308
remove
ahrav Jan 25, 2024
b4deb04
reduce dupe map cap
ahrav Jan 25, 2024
89e0330
do not verify chunk
ahrav Jan 25, 2024
59421a1
cli arg and use val for dupe lut
zricethezav Jan 25, 2024
cd615f0
remove counter
zricethezav Jan 25, 2024
83e6c8d
skipp empty results]
ahrav Jan 25, 2024
6809ed5
working on test and normalizing val for comparison
zricethezav Jan 25, 2024
2609152
Merge branch 'reverify-chunks' of github.com:trufflesecurity/truffleh…
zricethezav Jan 25, 2024
eaa0e7c
forgot to save file
zricethezav Jan 25, 2024
43cf8d0
optimize normalize
ahrav Jan 25, 2024
0aee157
reuse map
ahrav Jan 26, 2024
cfa9b55
remove print
ahrav Jan 26, 2024
c38107b
use levenshtein distance to check dupes
zricethezav Jan 26, 2024
31ebcaf
Merge branch 'reverify-chunks' of github.com:trufflesecurity/truffleh…
zricethezav Jan 26, 2024
ef4861c
forgot to leave in emptying map
zricethezav Jan 26, 2024
da90938
use slice
ahrav Jan 26, 2024
61fd8cb
small tweak
zricethezav Jan 26, 2024
543624e
Merge branch 'reverify-chunks' of github.com:trufflesecurity/truffleh…
zricethezav Jan 26, 2024
744a401
comment
zricethezav Jan 26, 2024
9d52160
use bytes
ahrav Jan 26, 2024
ca45af1
update
ahrav Jan 26, 2024
6d8c309
praise
zricethezav Jan 26, 2024
eea3088
convert to string once
ahrav Jan 26, 2024
c6bfc1c
use ctx logger
ahrav Jan 26, 2024
398dfb9
add len check
ahrav Jan 26, 2024
c415880
add comments
ahrav Jan 26, 2024
8b21a71
use 8x concurrency for reverifier workers
ahrav Jan 26, 2024
f15f9ab
revert worker count
ahrav Jan 30, 2024
2ba36ec
Merge branch 'main' into reverify-chunks
ahrav Jan 30, 2024
0c56de5
use more workers
ahrav Jan 30, 2024
c98bff1
process result directly for any collisions
ahrav Jan 30, 2024
428b051
continue after decoder match for reverifying
ahrav Jan 30, 2024
4ea40b0
use map
dustin-decker Jan 30, 2024
6ca05ef
use map
ahrav Jan 31, 2024
404b2b6
use maps
ahrav Jan 31, 2024
76c2b20
otimization and fix the bug.
ahrav Jan 31, 2024
76bcb51
revert worker count
ahrav Jan 31, 2024
5bf61da
better option naming
zricethezav Jan 31, 2024
96c6cd5
handle identical secrets in chunks
ahrav Jan 31, 2024
a72201c
update comment
ahrav Jan 31, 2024
8ad9859
update comment
ahrav Jan 31, 2024
c098a6d
fix test
ahrav Jan 31, 2024
46a5828
use DetecotrKey
ahrav Jan 31, 2024
c3125f3
rm out of scope tests and testdata
zricethezav Jan 31, 2024
b47b9cf
Merge branch 'main' of github.com:trufflesecurity/trufflehog into rev…
zricethezav Jan 31, 2024
a89933a
rename all reverification elements
zricethezav Jan 31, 2024
2cc3f29
don't re-write map entry
ahrav Feb 1, 2024
c7c6678
use correct key
ahrav Feb 1, 2024
9d2c2b2
rename worker, remove log val
zricethezav Feb 1, 2024
bf31cab
test likelydupe, add eq detector check in loop
zricethezav Feb 1, 2024
093410b
Merge branch 'main' of github.com:trufflesecurity/trufflehog into rev…
zricethezav Feb 1, 2024
1e5b773
add test
ahrav Feb 2, 2024
482c005
add comment
ahrav Feb 2, 2024
c82850a
add test
ahrav Feb 2, 2024
893a8b8
Set verification error
dustin-decker Feb 2, 2024
4bd6450
Update tests
dustin-decker Feb 2, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ require (
github.com/AzureAD/microsoft-authentication-library-for-go v1.2.1
github.com/BobuSumisu/aho-corasick v1.0.3
github.com/TheZeroSlave/zapsentry v1.19.0
github.com/adrg/strutil v0.3.1
github.com/alecthomas/kingpin/v2 v2.4.0
github.com/aws/aws-sdk-go v1.50.0
github.com/aymanbagabas/go-osc52 v1.2.2
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ github.com/ProtonMail/go-crypto v0.0.0-20230828082145-3c4c8a2d2371 h1:kkhsdkhsCv
github.com/ProtonMail/go-crypto v0.0.0-20230828082145-3c4c8a2d2371/go.mod h1:EjAoLdwvbIOoOQr3ihjnSoLZRtE8azugULFRteWMNc0=
github.com/TheZeroSlave/zapsentry v1.19.0 h1:/FVdMrq/w7bYt98m49ImZgmCTybXWbGc8/hOT0nLmyc=
github.com/TheZeroSlave/zapsentry v1.19.0/go.mod h1:D1YMfSuu6xnkhwFXxrronesmsiyDhIqo+86I3Ok+r64=
github.com/adrg/strutil v0.3.1 h1:OLvSS7CSJO8lBii4YmBt8jiK9QOtB9CzCzwl4Ic/Fz4=
github.com/adrg/strutil v0.3.1/go.mod h1:8h90y18QLrs11IBffcGX3NW/GFBXCMcNg4M7H6MspPA=
github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek=
github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s=
github.com/alecthomas/kingpin/v2 v2.4.0 h1:f48lwail6p8zpO1bC4TxtqACaGqHYA22qkHjHpqDjYY=
Expand Down
32 changes: 17 additions & 15 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,22 @@ import (
)

var (
cli = kingpin.New("TruffleHog", "TruffleHog is a tool for finding credentials.")
cmd string
debug = cli.Flag("debug", "Run in debug mode.").Bool()
trace = cli.Flag("trace", "Run in trace mode.").Bool()
profile = cli.Flag("profile", "Enables profiling and sets a pprof and fgprof server on :18066.").Bool()
localDev = cli.Flag("local-dev", "Hidden feature to disable overseer for local dev.").Hidden().Bool()
jsonOut = cli.Flag("json", "Output in JSON format.").Short('j').Bool()
jsonLegacy = cli.Flag("json-legacy", "Use the pre-v3.0 JSON format. Only works with git, gitlab, and github sources.").Bool()
gitHubActionsFormat = cli.Flag("github-actions", "Output in GitHub Actions format.").Bool()
concurrency = cli.Flag("concurrency", "Number of concurrent workers.").Default(strconv.Itoa(runtime.NumCPU())).Int()
noVerification = cli.Flag("no-verification", "Don't verify the results.").Bool()
onlyVerified = cli.Flag("only-verified", "Only output verified results.").Bool()
filterUnverified = cli.Flag("filter-unverified", "Only output first unverified result per chunk per detector if there are more than one results.").Bool()
filterEntropy = cli.Flag("filter-entropy", "Filter unverified results with Shannon entropy. Start with 3.0.").Float64()
configFilename = cli.Flag("config", "Path to configuration file.").ExistingFile()
cli = kingpin.New("TruffleHog", "TruffleHog is a tool for finding credentials.")
cmd string
debug = cli.Flag("debug", "Run in debug mode.").Bool()
trace = cli.Flag("trace", "Run in trace mode.").Bool()
profile = cli.Flag("profile", "Enables profiling and sets a pprof and fgprof server on :18066.").Bool()
localDev = cli.Flag("local-dev", "Hidden feature to disable overseer for local dev.").Hidden().Bool()
jsonOut = cli.Flag("json", "Output in JSON format.").Short('j').Bool()
jsonLegacy = cli.Flag("json-legacy", "Use the pre-v3.0 JSON format. Only works with git, gitlab, and github sources.").Bool()
gitHubActionsFormat = cli.Flag("github-actions", "Output in GitHub Actions format.").Bool()
concurrency = cli.Flag("concurrency", "Number of concurrent workers.").Default(strconv.Itoa(runtime.NumCPU())).Int()
noVerification = cli.Flag("no-verification", "Don't verify the results.").Bool()
onlyVerified = cli.Flag("only-verified", "Only output verified results.").Bool()
allowVerificationOverlap = cli.Flag("allow-verification-overlap", "Allow verification of similar credentials across detectors").Bool()
filterUnverified = cli.Flag("filter-unverified", "Only output first unverified result per chunk per detector if there are more than one results.").Bool()
filterEntropy = cli.Flag("filter-entropy", "Filter unverified results with Shannon entropy. Start with 3.0.").Float64()
configFilename = cli.Flag("config", "Path to configuration file.").ExistingFile()
// rules = cli.Flag("rules", "Path to file with custom rules.").String()
printAvgDetectorTime = cli.Flag("print-avg-detector-time", "Print the average time spent on each detector.").Bool()
noUpdate = cli.Flag("no-update", "Don't check for updates.").Bool()
Expand Down Expand Up @@ -411,6 +412,7 @@ func run(state overseer.State) {
engine.WithPrintAvgDetectorTime(*printAvgDetectorTime),
engine.WithPrinter(printer),
engine.WithFilterEntropy(*filterEntropy),
engine.WithVerificationOverlap(*allowVerificationOverlap),
)
if err != nil {
logFatal(err, "error initializing engine")
Expand Down
38 changes: 32 additions & 6 deletions pkg/engine/ahocorasick/ahocorasickcore.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func NewAhoCorasickCore(allDetectors []detectors.Detector) *AhoCorasickCore {
detectorsByKey := make(map[DetectorKey]detectors.Detector, len(allDetectors))
var keywords []string
for _, d := range allDetectors {
key := createDetectorKey(d)
key := CreateDetectorKey(d)
detectorsByKey[key] = d
for _, kw := range d.Keywords() {
kwLower := strings.ToLower(kw)
Expand All @@ -63,20 +63,46 @@ func NewAhoCorasickCore(allDetectors []detectors.Detector) *AhoCorasickCore {
}
}

// DetectorInfo represents a detected pattern's metadata in a data chunk.
// It encapsulates the key identifying a specific detector and the detector instance itself.
type DetectorInfo struct {
Key DetectorKey
detectors.Detector
}

// PopulateMatchingDetectors populates the given detector slice with all the detectors matching the
// provided input. This method populates an existing map rather than allocating a new one because
// it will be called once per chunk and that many allocations has a noticeable performance cost.
func (ac *AhoCorasickCore) PopulateMatchingDetectors(chunkData string, detectors map[DetectorKey]detectors.Detector) {
for _, m := range ac.prefilter.MatchString(strings.ToLower(chunkData)) {
// It returns a slice of unique 'DetectorInfo' corresponding to the matched detectors. This slice is
// constructed to prevent duplications by utilizing an internal map to track already processed detectors.
func (ac *AhoCorasickCore) PopulateMatchingDetectors(chunkData string, dts map[DetectorKey]detectors.Detector) []DetectorInfo {
matches := ac.prefilter.MatchString(strings.ToLower(chunkData))

// Use a map to avoid adding duplicate detectors to the slice.
addedDetectors := make(map[DetectorKey]struct{})
uniqueDetectors := make([]DetectorInfo, 0, len(matches))

for _, m := range matches {
for _, k := range ac.keywordsToDetectors[m.MatchString()] {
detectors[k] = ac.detectorsByKey[k]
if _, exists := addedDetectors[k]; exists {
continue
}
// Add to the map to track already added detectors.
addedDetectors[k] = struct{}{}

// Add the detector to the map and slice.
detector := ac.detectorsByKey[k]
dts[k] = detector
uniqueDetectors = append(uniqueDetectors, DetectorInfo{Key: k, Detector: detector})
}
}

return uniqueDetectors
}

// createDetectorKey creates a unique key for each detector from its type, version, and, for
// CreateDetectorKey creates a unique key for each detector from its type, version, and, for
// custom regex detectors, its name.
func createDetectorKey(d detectors.Detector) DetectorKey {
func CreateDetectorKey(d detectors.Detector) DetectorKey {
detectorType := d.Type()
var version int
if v, ok := d.(detectors.Versioner); ok {
Expand Down
Loading
Loading