Skip to content

Commit

Permalink
remove _ from npm_ and update ahocorasick to our fork
Browse files Browse the repository at this point in the history
  • Loading branch information
zricethezav committed Jul 25, 2023
1 parent f393034 commit ebe7450
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 10 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ require (
cloud.google.com/go/secretmanager v1.11.1
cloud.google.com/go/storage v1.31.0
github.com/Azure/go-autorest/autorest/azure/auth v0.5.11
github.com/BobuSumisu/aho-corasick v1.0.3
github.com/TheZeroSlave/zapsentry v1.17.0
github.com/aws/aws-sdk-go v1.44.83
github.com/bill-rich/disk-buffer-reader v0.1.7
Expand Down Expand Up @@ -151,6 +150,7 @@ require (
github.com/sirupsen/logrus v1.9.0 // indirect
github.com/skeema/knownhosts v1.1.1 // indirect
github.com/therootcompany/xz v1.0.1 // indirect
github.com/trufflesecurity/aho-corasick v0.1.0
github.com/ulikunitz/xz v0.5.10 // indirect
github.com/vbatts/tar-split v0.11.3 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,6 @@ github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUM
github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU=
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8=
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
github.com/BobuSumisu/aho-corasick v1.0.3 h1:uuf+JHwU9CHP2Vx+wAy6jcksJThhJS9ehR8a+4nPE9g=
github.com/BobuSumisu/aho-corasick v1.0.3/go.mod h1:hm4jLcvZKI2vRF2WDU1N4p/jpWtpOzp3nLmi9AzX/XE=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
Expand Down Expand Up @@ -447,6 +445,8 @@ github.com/tailscale/depaware v0.0.0-20210622194025-720c4b409502 h1:34icjjmqJ2HP
github.com/tailscale/depaware v0.0.0-20210622194025-720c4b409502/go.mod h1:p9lPsd+cx33L3H9nNoecRRxPssFKUwwI50I3pZ0yT+8=
github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw=
github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY=
github.com/trufflesecurity/aho-corasick v0.1.0 h1:bwtdjWFkuzDRLWjJi4fCxkBSv2axhYXJryCqsSgMHSg=
github.com/trufflesecurity/aho-corasick v0.1.0/go.mod h1:JKKZLgcVlyWn0LsPezrqEIQPi76i2URv44hC0gJY4Vo=
github.com/trufflesecurity/overseer v1.1.7-custom5 h1:xu+Fg6fkSRifUPzUCl7N8HmobJ6WGOkIApGnM7mJS6w=
github.com/trufflesecurity/overseer v1.1.7-custom5/go.mod h1:nT9w37AiO1Nop2VhVhNfzAFaPjthvxgpDV3XKsxYkcI=
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
Expand Down
2 changes: 1 addition & 1 deletion pkg/detectors/npmtokenv2/npmtokenv2.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ var (
// Keywords are used for efficiently pre-filtering chunks.
// Use identifiers in the secret preferably, or the provider name.
func (s Scanner) Keywords() []string {
return []string{"npm_"}
return []string{"npm"}
}

// FromData will find and optionally verify NpmTokenV2 secrets in a given set of bytes.
Expand Down
17 changes: 11 additions & 6 deletions pkg/engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"sync/atomic"
"time"

ahocorasick "github.com/BobuSumisu/aho-corasick"
ahocorasick "github.com/trufflesecurity/aho-corasick"
"golang.org/x/sync/errgroup"
"google.golang.org/protobuf/proto"

Expand Down Expand Up @@ -43,7 +43,7 @@ type Engine struct {

// prefilter is a ahocorasick struct used for doing efficient string
// matching given a set of words (keywords from the rules in the config)
prefilter ahocorasick.Trie
prefilter ahocorasick.AhoCorasick
}

type EngineOption func(*Engine)
Expand Down Expand Up @@ -150,7 +150,13 @@ func Start(ctx context.Context, options ...EngineOption) *Engine {
for _, d := range e.detectors[true] {
keywords = append(keywords, d.Keywords()...)
}
e.prefilter = *ahocorasick.NewTrieBuilder().AddStrings(keywords).Build()
builder := ahocorasick.NewAhoCorasickBuilder(ahocorasick.Opts{
AsciiCaseInsensitive: true,
MatchOnlyWholeWords: false,
MatchKind: ahocorasick.StandardMatch,
DFA: false,
})
e.prefilter = builder.Build(keywords)

ctx.Logger().Info("loaded decoders", "count", len(e.decoders))
ctx.Logger().Info("loaded detectors",
Expand Down Expand Up @@ -291,8 +297,8 @@ func (e *Engine) detectorWorker(ctx context.Context) {
}

// build a map of all keywords that were matched in the chunk
for _, m := range e.prefilter.MatchString(string(decoded.Data)) {
matchedKeywords[strings.ToLower(m.MatchString())] = struct{}{}
for _, m := range e.prefilter.FindAll(string(decoded.Data)) {
matchedKeywords[strings.ToLower(string(decoded.Data[m.Start():m.End()]))] = struct{}{}
}

for verify, detectorsSet := range e.detectors {
Expand All @@ -301,7 +307,6 @@ func (e *Engine) detectorWorker(ctx context.Context) {
for _, kw := range detector.Keywords() {
if _, ok := matchedKeywords[strings.ToLower(kw)]; ok {
chunkContainsKeyword = true
break
}
}

Expand Down

0 comments on commit ebe7450

Please sign in to comment.