Skip to content

Commit

Permalink
Re-add detector version (#2060)
Browse files Browse the repository at this point in the history
#2010 mistakenly removed detector version tracking from the Aho Corasick wrapper. This PR re-adds it.
  • Loading branch information
rosecodym authored Oct 30, 2023
1 parent 3c2270a commit 4505986
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 12 deletions.
44 changes: 32 additions & 12 deletions pkg/engine/ahocorasickcore.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

// detectorKey is used to identify a detector in the keywordsToDetectors map.
// Multiple detectors can have the same detector type but different versions.
// This allows us to identify a detector by its type and version.
type detectorKey struct {
detectorType detectorspb.DetectorType
version int
}

// AhoCorasickCore encapsulates the operations and data structures used for keyword matching via the
// Aho-Corasick algorithm. It is responsible for constructing and managing the trie for efficient
// substring searches, as well as mapping keywords to their associated detectors for rapid lookups.
Expand All @@ -21,30 +29,31 @@ type AhoCorasickCore struct {
// type and then again from detector type to detector. We could
// go straight from keywords to detectors but doing it this way makes
// some consuming code a little cleaner.)
keywordsToDetectorTypes map[string][]detectorspb.DetectorType
detectorsByType map[detectorspb.DetectorType]detectors.Detector
keywordsToDetectors map[string][]detectorKey
detectorsByKey map[detectorKey]detectors.Detector
}

// NewAhoCorasickCore allocates and initializes a new instance of AhoCorasickCore. It uses the
// provided detector slice to create a map from keywords to detectors and build the Aho-Corasick
// prefilter trie.
func NewAhoCorasickCore(allDetectors []detectors.Detector) *AhoCorasickCore {
keywordsToDetectorTypes := make(map[string][]detectorspb.DetectorType)
detectorsByType := make(map[detectorspb.DetectorType]detectors.Detector, len(allDetectors))
keywordsToDetectors := make(map[string][]detectorKey)
detectorsByKey := make(map[detectorKey]detectors.Detector, len(allDetectors))
var keywords []string
for _, d := range allDetectors {
detectorsByType[d.Type()] = d
key := createDetectorKey(d)
detectorsByKey[key] = d
for _, kw := range d.Keywords() {
kwLower := strings.ToLower(kw)
keywords = append(keywords, kwLower)
keywordsToDetectorTypes[kwLower] = append(keywordsToDetectorTypes[kwLower], d.Type())
keywordsToDetectors[kwLower] = append(keywordsToDetectors[kwLower], key)
}
}

return &AhoCorasickCore{
keywordsToDetectorTypes: keywordsToDetectorTypes,
detectorsByType: detectorsByType,
prefilter: *ahocorasick.NewTrieBuilder().AddStrings(keywords).Build(),
keywordsToDetectors: keywordsToDetectors,
detectorsByKey: detectorsByKey,
prefilter: *ahocorasick.NewTrieBuilder().AddStrings(keywords).Build(),
}
}

Expand All @@ -58,12 +67,23 @@ func (ac *AhoCorasickCore) MatchString(input string) []*ahocorasick.Match {
// This method is designed to reuse the same map for performance optimization,
// reducing the need for repeated allocations within each detector worker in the engine.
func (ac *AhoCorasickCore) PopulateDetectorsByMatch(match *ahocorasick.Match, detectors map[detectorspb.DetectorType]detectors.Detector) bool {
matchedDetectorTypes, ok := ac.keywordsToDetectorTypes[match.MatchString()]
matchedDetectorKeys, ok := ac.keywordsToDetectors[match.MatchString()]
if !ok {
return false
}
for _, t := range matchedDetectorTypes {
detectors[t] = ac.detectorsByType[t]
for _, key := range matchedDetectorKeys {
detectors[key.detectorType] = ac.detectorsByKey[key]
}
return true
}

// createDetectorKey creates a unique key for each detector. This key based on type and version,
// it ensures faster lookups and reduces redundancy in our main detector store.
func createDetectorKey(d detectors.Detector) detectorKey {
detectorType := d.Type()
var version int
if v, ok := d.(detectors.Versioner); ok {
version = v.Version()
}
return detectorKey{detectorType: detectorType, version: version}
}
88 changes: 88 additions & 0 deletions pkg/engine/ahocorasickcore_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package engine

import (
"context"
"testing"

"github.com/stretchr/testify/assert"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

const TestDetectorType = -1

type testDetectorV1 struct {
}

func (d testDetectorV1) FromData(ctx context.Context, verify bool, data []byte) ([]detectors.Result, error) {
return make([]detectors.Result, 0), nil
}

func (d testDetectorV1) Keywords() []string {
return []string{"a"}
}

func (d testDetectorV1) Type() detectorspb.DetectorType {
return TestDetectorType
}

func (d testDetectorV1) Version() int {
return 1
}

type testDetectorV2 struct {
}

func (d testDetectorV2) FromData(ctx context.Context, verify bool, data []byte) ([]detectors.Result, error) {
return make([]detectors.Result, 0), nil
}

func (d testDetectorV2) Keywords() []string {
return []string{"b"}
}

func (d testDetectorV2) Type() detectorspb.DetectorType {
return TestDetectorType
}

func (d testDetectorV2) Version() int {
return 2
}

var _ detectors.Detector = (*testDetectorV1)(nil)
var _ detectors.Detector = (*testDetectorV2)(nil)
var _ detectors.Versioner = (*testDetectorV1)(nil)
var _ detectors.Versioner = (*testDetectorV2)(nil)

func TestAhoCorasickCore_MultipleDetectorVersionsMatchable(t *testing.T) {
testCases := []struct {
matchString string
detector detectors.Detector
}{
{
matchString: "a",
detector: testDetectorV1{},
},
{
matchString: "b",
detector: testDetectorV2{},
},
}

var allDetectors []detectors.Detector
for _, tt := range testCases {
allDetectors = append(allDetectors, tt.detector)
}

ac := NewAhoCorasickCore(allDetectors)

for _, tt := range testCases {
matches := ac.MatchString(tt.matchString)
assert.Equal(t, 1, len(matches))

matchingDetectors := make(map[detectorspb.DetectorType]detectors.Detector)
ac.PopulateDetectorsByMatch(matches[0], matchingDetectors)
assert.Equal(t, 1, len(matchingDetectors))
assert.Equal(t, tt.detector, matchingDetectors[TestDetectorType])
}
}

0 comments on commit 4505986

Please sign in to comment.