From 28d079bdad93787452175c0c4a8506a475f13fb7 Mon Sep 17 00:00:00 2001 From: ahrav Date: Mon, 5 Feb 2024 06:53:08 -0800 Subject: [PATCH] use only the DetectorKey as a map field (#2374) --- pkg/engine/ahocorasick/ahocorasickcore.go | 3 +++ pkg/engine/engine.go | 8 ++++---- pkg/engine/engine_test.go | 24 +++++++++++------------ 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/pkg/engine/ahocorasick/ahocorasickcore.go b/pkg/engine/ahocorasick/ahocorasickcore.go index b083715507db..e5eb3f60df59 100644 --- a/pkg/engine/ahocorasick/ahocorasickcore.go +++ b/pkg/engine/ahocorasick/ahocorasickcore.go @@ -23,6 +23,9 @@ type DetectorKey struct { customDetectorName string } +// Type returns the detector type of the key. +func (k DetectorKey) Type() detectorspb.DetectorType { return k.detectorType } + // AhoCorasickCore encapsulates the operations and data structures used for keyword matching via the // Aho-Corasick algorithm. It is responsible for constructing and managing the trie for efficient // substring searches, as well as mapping keywords to their associated detectors for rapid lookups. diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index eda8f3e4c0a4..3d822e4bb2e2 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -598,8 +598,8 @@ func (e *Engine) detectorWorker(ctx context.Context) { // by the same detector in the chunk. Exact matches on lookup indicate a duplicate secret for a detector // in that chunk - which is expected and not malicious. Those intra-detector dupes are still verified. type chunkSecretKey struct { - secret string - detectorInfo ahocorasick.DetectorInfo + secret string + detectorKey ahocorasick.DetectorKey } func likelyDuplicate(ctx context.Context, val chunkSecretKey, dupes map[chunkSecretKey]struct{}) bool { @@ -615,7 +615,7 @@ func likelyDuplicate(ctx context.Context, val chunkSecretKey, dupes map[chunkSec // If the detector type is the same, we don't need to compare the strings. // These are not duplicates, and should be verified. - if val.detectorInfo.Type() == dupeKey.detectorInfo.Type() { + if val.detectorKey.Type() == dupeKey.detectorKey.Type() { continue } @@ -674,7 +674,7 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) { // Ex: // - postman api key: PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r // - malicious detector "api key": qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r - key := chunkSecretKey{secret: string(val), detectorInfo: detector} + key := chunkSecretKey{secret: string(val), detectorKey: detector.Key} if _, ok := chunkSecrets[key]; ok { continue } diff --git a/pkg/engine/engine_test.go b/pkg/engine/engine_test.go index ea4b5807b4f5..39120d18c1cb 100644 --- a/pkg/engine/engine_test.go +++ b/pkg/engine/engine_test.go @@ -562,47 +562,47 @@ func TestLikelyDuplicate(t *testing.T) { }{ { name: "exact duplicate different detector", - val: chunkSecretKey{"PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorA}, + val: chunkSecretKey{"PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorA.Key}, dupes: map[chunkSecretKey]struct{}{ - {"PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorB}: {}, + {"PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorB.Key}: {}, }, expected: true, }, { name: "non-duplicate length outside range", - val: chunkSecretKey{"short", detectorA}, + val: chunkSecretKey{"short", detectorA.Key}, dupes: map[chunkSecretKey]struct{}{ - {"muchlongerthanthevalstring", detectorB}: {}, + {"muchlongerthanthevalstring", detectorB.Key}: {}, }, expected: false, }, { name: "similar within threshold", - val: chunkSecretKey{"PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorA}, + val: chunkSecretKey{"PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorA.Key}, dupes: map[chunkSecretKey]struct{}{ - {"qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorB}: {}, + {"qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorB.Key}: {}, }, expected: true, }, { name: "similar outside threshold", - val: chunkSecretKey{"anotherkey", detectorA}, + val: chunkSecretKey{"anotherkey", detectorA.Key}, dupes: map[chunkSecretKey]struct{}{ - {"completelydifferent", detectorB}: {}, + {"completelydifferent", detectorB.Key}: {}, }, expected: false, }, { name: "empty strings", - val: chunkSecretKey{"", detectorA}, - dupes: map[chunkSecretKey]struct{}{{"", detectorB}: {}}, + val: chunkSecretKey{"", detectorA.Key}, + dupes: map[chunkSecretKey]struct{}{{"", detectorB.Key}: {}}, expected: true, }, { name: "similar within threshold same detector", - val: chunkSecretKey{"PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorA}, + val: chunkSecretKey{"PMAK-qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorA.Key}, dupes: map[chunkSecretKey]struct{}{ - {"qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorA}: {}, + {"qnwfsLyRSyfCwfpHaQP1UzDhrgpWvHjbYzjpRCMshjt417zWcrzyHUArs7r", detectorA.Key}: {}, }, expected: false, },