Skip to content

Commit

Permalink
[feat] - Update span calculation logic to use offset magnitude (truff…
Browse files Browse the repository at this point in the history
…lesecurity#2957)

* Add a default start offset

* update

* use keywordIdx
  • Loading branch information
ahrav authored Jun 11, 2024
1 parent 68bea57 commit bf77251
Show file tree
Hide file tree
Showing 5 changed files with 228 additions and 42 deletions.
6 changes: 6 additions & 0 deletions pkg/detectors/detectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ type MaxSecretSizeProvider interface {
MaxSecretSize() int64
}

// StartOffsetProvider is an optional interface that a detector can implement to
// provide a custom start offset for the secret it finds.
type StartOffsetProvider interface {
StartOffset() int64
}

// MultiPartCredentialProvider is an optional interface that a detector can implement
// to indicate its compatibility with multi-part credentials and provide the maximum
// secret size for the credential it finds.
Expand Down
12 changes: 9 additions & 3 deletions pkg/detectors/gcp/gcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ type Scanner struct{}
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.CustomFalsePositiveChecker = (*Scanner)(nil)
var _ detectors.MaxSecretSizeProvider = (*Scanner)(nil)
var _ detectors.StartOffsetProvider = (*Scanner)(nil)

var (
keyPat = regexp.MustCompile(`\{[^{]+auth_provider_x509_cert_url[^}]+\}`)
Expand Down Expand Up @@ -50,10 +51,15 @@ func (s Scanner) Keywords() []string {
return []string{"provider_x509"}
}

const maxGCPKeySize = 4096
const maxGCPKeySize = 2048

// ProvideMaxSecretSize returns the maximum size of a secret that this detector can find.
func (s Scanner) MaxSecretSize() int64 { return maxGCPKeySize }
// MaxSecretSize returns the maximum size of a secret that this detector can find.
func (Scanner) MaxSecretSize() int64 { return maxGCPKeySize }

const startOffset = 4096

// StartOffset returns the start offset for the secret this detector finds.
func (Scanner) StartOffset() int64 { return startOffset }

// FromData will find and optionally verify GCP secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ type Scanner struct {
// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.MaxSecretSizeProvider = (*Scanner)(nil)
var _ detectors.StartOffsetProvider = (*Scanner)(nil)

var (
defaultClient = common.SaneHttpClient()
Expand All @@ -48,8 +49,13 @@ func (s Scanner) Keywords() []string {

const maxGCPADCKeySize = 1024

// ProvideMaxSecretSize returns the maximum size of a secret that this detector can find.
func (s Scanner) MaxSecretSize() int64 { return maxGCPADCKeySize }
// MaxSecretSize returns the maximum size of a secret that this detector can find.
func (Scanner) MaxSecretSize() int64 { return maxGCPADCKeySize }

const startOffset = maxGCPADCKeySize

// StartOffset returns the start offset for the secret this detector finds.
func (Scanner) StartOffset() int64 { return startOffset }

// FromData will find and optionally verify Gcpapplicationdefaultcredentials secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
Expand Down
78 changes: 44 additions & 34 deletions pkg/engine/ahocorasick/ahocorasickcore.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ type spanCalculator interface {
}

// spanCalculationParams provides the necessary context for calculating match spans,
// including the starting index in the chunk, the chunk data itself, and the detector being used.
// including the keyword index in the chunk, the chunk data itself, and the detector being used.
type spanCalculationParams struct {
startIdx int64
chunkData []byte
detector detectors.Detector
keywordIdx int64 // Index of the keyword in the chunk data
chunkData []byte
detector detectors.Detector
}

// EntireChunkSpanCalculator is a strategy that calculates the match span to use the entire chunk data.
Expand All @@ -51,34 +51,44 @@ func (e *EntireChunkSpanCalculator) calculateSpan(params spanCalculationParams)
return matchSpan{startOffset: 0, endOffset: int64(len(params.chunkData))}
}

// maxMatchLengthSpanCalculator is a strategy that calculates match spans based on a default max
// match length or values provided by detectors. This allows for more granular control over the match span.
type maxMatchLengthSpanCalculator struct{ maxMatchLength int64 }
// adjustableSpanCalculator is a strategy that calculates match spans. It uses a default offset magnitude
// or values provided by specific detectors to adjust the start and end indices of the span, allowing
// for more granular control over the match.
type adjustableSpanCalculator struct{ offsetMagnitude int64 }

// newMaxMatchLengthSpanCalculator creates a new instance of maxMatchLengthSpanCalculator with the
// specified max match length.
func newMaxMatchLengthSpanCalculator(maxMatchLength int64) *maxMatchLengthSpanCalculator {
return &maxMatchLengthSpanCalculator{maxMatchLength: maxMatchLength}
// newAdjustableSpanCalculator creates a new instance of adjustableSpanCalculator with the
// specified offset magnitude.
func newAdjustableSpanCalculator(offsetRadius int64) *adjustableSpanCalculator {
return &adjustableSpanCalculator{offsetMagnitude: offsetRadius}
}

// calculateSpans computes the match spans based on the start index and the max match length.
// If the detector provides an override value, it uses that instead of the default max match length.
func (m *maxMatchLengthSpanCalculator) calculateSpan(params spanCalculationParams) matchSpan {
maxSize := m.maxMatchLength

switch d := params.detector.(type) {
case detectors.MultiPartCredentialProvider:
maxSize = d.MaxCredentialSpan()
case detectors.MaxSecretSizeProvider:
maxSize = d.MaxSecretSize()
default: // Use the default max match length
// calculateSpan computes the match span based on the keyword index and the offset magnitude.
// If the detector provides an override value, it uses that instead of the default offset magnitude to
// calculate the maximum size of the span.
// The start index of the span is also adjusted if the detector provides a start offset.
func (m *adjustableSpanCalculator) calculateSpan(params spanCalculationParams) matchSpan {
keywordIdx := params.keywordIdx

maxSize := keywordIdx + m.offsetMagnitude
startOffset := keywordIdx - m.offsetMagnitude

// Check if the detector implements each interface and update values accordingly.
// This CAN'T be done in a switch statement because a detector can implement multiple interfaces.
if provider, ok := params.detector.(detectors.MultiPartCredentialProvider); ok {
maxSize = provider.MaxCredentialSpan() + keywordIdx
startOffset = keywordIdx - provider.MaxCredentialSpan()
}
endIdx := params.startIdx + maxSize
if endIdx > int64(len(params.chunkData)) {
endIdx = int64(len(params.chunkData))
if provider, ok := params.detector.(detectors.MaxSecretSizeProvider); ok {
maxSize = provider.MaxSecretSize() + keywordIdx
}
if provider, ok := params.detector.(detectors.StartOffsetProvider); ok {
startOffset = keywordIdx - provider.StartOffset()
}

startIdx := max(startOffset, 0)
endIdx := min(maxSize, int64(len(params.chunkData)))

return matchSpan{startOffset: params.startIdx, endOffset: endIdx}
return matchSpan{startOffset: startIdx, endOffset: endIdx}
}

// CoreOption is a functional option type for configuring an AhoCorasickCore instance.
Expand Down Expand Up @@ -123,19 +133,19 @@ func NewAhoCorasickCore(allDetectors []detectors.Detector, opts ...CoreOption) *
}
}

const maxMatchLength int64 = 512
ac := &Core{
const defaultOffsetRadius int64 = 512
core := &Core{
keywordsToDetectors: keywordsToDetectors,
detectorsByKey: detectorsByKey,
prefilter: *ahocorasick.NewTrieBuilder().AddStrings(keywords).Build(),
spanCalculator: newMaxMatchLengthSpanCalculator(maxMatchLength), // Default span calculator
spanCalculator: newAdjustableSpanCalculator(defaultOffsetRadius), // Default span calculator
}

for _, opt := range opts {
opt(ac)
opt(core)
}

return ac
return core
}

// DetectorMatch represents a detected pattern's metadata in a data chunk.
Expand Down Expand Up @@ -234,9 +244,9 @@ func (ac *Core) FindDetectorMatches(chunkData []byte) []*DetectorMatch {
startIdx := m.Pos()
span := ac.spanCalculator.calculateSpan(
spanCalculationParams{
startIdx: startIdx,
chunkData: chunkData,
detector: detectorMatch.Detector,
keywordIdx: startIdx,
chunkData: chunkData,
detector: detectorMatch.Detector,
},
)
detectorMatch.addMatchSpan(span)
Expand Down
164 changes: 161 additions & 3 deletions pkg/engine/ahocorasick/ahocorasickcore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,64 @@ func (testDetectorV3) Type() detectorspb.DetectorType {

func (testDetectorV3) Version() int { return 1 }

var _ detectors.Detector = (*testDetectorV4)(nil)
var _ detectors.MultiPartCredentialProvider = (*testDetectorV4)(nil)
var _ detectors.StartOffsetProvider = (*testDetectorV4)(nil)

type testDetectorV4 struct{}

func (testDetectorV4) FromData(context.Context, bool, []byte) ([]detectors.Result, error) {
return make([]detectors.Result, 0), nil
}

func (testDetectorV4) Keywords() []string { return []string{"password"} }

func (testDetectorV4) Type() detectorspb.DetectorType { return TestDetectorType }

func (testDetectorV4) Version() int { return 1 }

func (testDetectorV4) MaxCredentialSpan() int64 { return 15 }

func (testDetectorV4) StartOffset() int64 { return 5 }

var _ detectors.Detector = (*testDetectorV5)(nil)
var _ detectors.MaxSecretSizeProvider = (*testDetectorV5)(nil)
var _ detectors.StartOffsetProvider = (*testDetectorV5)(nil)

type testDetectorV5 struct{}

func (testDetectorV5) FromData(context.Context, bool, []byte) ([]detectors.Result, error) {
return make([]detectors.Result, 0), nil
}

func (testDetectorV5) Keywords() []string { return []string{"password"} }

func (testDetectorV5) Type() detectorspb.DetectorType { return TestDetectorType }

func (testDetectorV5) Version() int { return 1 }

func (testDetectorV5) MaxSecretSize() int64 { return 10 }

func (testDetectorV5) StartOffset() int64 { return 3 }

var _ detectors.Detector = (*testDetectorV6)(nil)
var _ detectors.Detector = (*testDetectorV6)(nil)
var _ detectors.StartOffsetProvider = (*testDetectorV6)(nil)

type testDetectorV6 struct{}

func (testDetectorV6) FromData(context.Context, bool, []byte) ([]detectors.Result, error) {
return make([]detectors.Result, 0), nil
}

func (testDetectorV6) Keywords() []string { return []string{"password"} }

func (testDetectorV6) Type() detectorspb.DetectorType { return TestDetectorType }

func (testDetectorV6) Version() int { return 1 }

func (testDetectorV6) StartOffset() int64 { return 1 }

var _ detectors.Detector = (*testDetectorV1)(nil)
var _ detectors.Detector = (*testDetectorV2)(nil)
var _ detectors.Versioner = (*testDetectorV1)(nil)
Expand Down Expand Up @@ -141,7 +199,7 @@ func TestFindDetectorMatches(t *testing.T) {
},
sampleData: "This is a sample data containing keyword truffle",
expectedResult: map[DetectorKey][][]int64{
CreateDetectorKey(testDetectorV3{}): {{41, 48}},
CreateDetectorKey(testDetectorV3{}): {{0, 48}},
},
},
{
Expand All @@ -151,7 +209,7 @@ func TestFindDetectorMatches(t *testing.T) {
},
sampleData: "This is a sample data containing keyword a",
expectedResult: map[DetectorKey][][]int64{
CreateDetectorKey(testDetectorV1{}): {{8, 42}},
CreateDetectorKey(testDetectorV1{}): {{0, 42}},
},
},
{
Expand All @@ -172,7 +230,7 @@ func TestFindDetectorMatches(t *testing.T) {
eget ultricies ugue ugue id ugue. Meens liquet libero
c libero molestie, nec mlesud ugue ugue eget. This is the second occurrence of the letter a.`,
expectedResult: map[DetectorKey][][]int64{
CreateDetectorKey(testDetectorV2{}): {{43, 555}, {854, 856}},
CreateDetectorKey(testDetectorV2{}): {{0, 856}},
},
},
{
Expand Down Expand Up @@ -219,6 +277,106 @@ func TestFindDetectorMatches(t *testing.T) {
CreateDetectorKey(testDetectorV2{}): {{0, 856}},
},
},
{
name: "keyword in the middle of the credential; MultiPartCredentialProvider, StartOffsetProvider",
detectors: []detectors.Detector{
testDetectorV4{},
},
sampleData: "This is a password in the middle of some data",
expectedResult: map[DetectorKey][][]int64{
CreateDetectorKey(testDetectorV4{}): {{5, 25}},
},
},
{
name: "keyword at the end of the credential; MultiPartCredentialProvider, StartOffsetProvider",
detectors: []detectors.Detector{
testDetectorV4{},
},
sampleData: "This data ends with a password",
expectedResult: map[DetectorKey][][]int64{
CreateDetectorKey(testDetectorV4{}): {{17, 30}},
},
},
{
name: "keyword near the start of the data; MultiPartCredentialProvider, StartOffsetProvider",
detectors: []detectors.Detector{
testDetectorV4{},
},
sampleData: "a password at the start",
expectedResult: map[DetectorKey][][]int64{
CreateDetectorKey(testDetectorV4{}): {{0, 17}},
},
},
{
name: "keyword in the middle of the credential; MaxSecretSizeProvider, StartOffsetProvider",
detectors: []detectors.Detector{
testDetectorV5{},
},
sampleData: "This is a password in the middle of some data",
expectedResult: map[DetectorKey][][]int64{
CreateDetectorKey(testDetectorV5{}): {{7, 20}},
},
},
{
name: "keyword at the end of the credential; MaxSecretSizeProvider, StartOffsetProvider",
detectors: []detectors.Detector{
testDetectorV5{},
},
sampleData: "This data ends with a password",
expectedResult: map[DetectorKey][][]int64{
CreateDetectorKey(testDetectorV5{}): {{19, 30}},
},
},
{
name: "keyword near the start of the data; MaxSecretSizeProvider, StartOffsetProvider",
detectors: []detectors.Detector{
testDetectorV5{},
},
sampleData: "a password at the start",
expectedResult: map[DetectorKey][][]int64{
CreateDetectorKey(testDetectorV5{}): {{0, 12}},
},
},
{
name: "keyword in the middle of the credential; StartOffsetProvider",
detectors: []detectors.Detector{
testDetectorV6{},
},
sampleData: "This is a password in the middle of some data",
expectedResult: map[DetectorKey][][]int64{
CreateDetectorKey(testDetectorV6{}): {{9, 45}},
},
},
{
name: "keyword at the end of the credential; StartOffsetProvider",
detectors: []detectors.Detector{
testDetectorV6{},
},
sampleData: "This data ends with a password",
expectedResult: map[DetectorKey][][]int64{
CreateDetectorKey(testDetectorV6{}): {{21, 30}},
},
},
{
name: "keyword near the start of the data; StartOffsetProvider",
detectors: []detectors.Detector{
testDetectorV6{},
},
sampleData: "a password at the start",
expectedResult: map[DetectorKey][][]int64{
CreateDetectorKey(testDetectorV6{}): {{1, 23}},
},
},
{
name: "multiple keyword in the middle of the credential; StartOffsetProvider",
detectors: []detectors.Detector{
testDetectorV6{},
},
sampleData: "This is a password in the middle of some data, and another password at the end!",
expectedResult: map[DetectorKey][][]int64{
CreateDetectorKey(testDetectorV6{}): {{9, 79}},
},
},
{
name: "No matches",
detectors: []detectors.Detector{
Expand Down

0 comments on commit bf77251

Please sign in to comment.