From 232032410ccda9b25a9ed64eb0fd9d23837d7f1a Mon Sep 17 00:00:00 2001 From: Richard Gomez <32133502+rgmz@users.noreply.github.com> Date: Tue, 30 Jan 2024 00:21:23 -0500 Subject: [PATCH] feat(detectors): update template (#2342) --- hack/generate/generate.go | 6 +- pkg/detectors/alchemy/alchemy.go | 75 +++++++++++-------- pkg/detectors/alchemy/alchemy_test.go | 63 +++++++++++++++- .../{ => ahocorasick}/ahocorasickcore.go | 3 +- .../{ => ahocorasick}/ahocorasickcore_test.go | 3 +- pkg/engine/engine.go | 7 +- 6 files changed, 118 insertions(+), 39 deletions(-) rename pkg/engine/{ => ahocorasick}/ahocorasickcore.go (99%) rename pkg/engine/{ => ahocorasick}/ahocorasickcore_test.go (99%) diff --git a/hack/generate/generate.go b/hack/generate/generate.go index 81acd6775066..e6286bed144c 100644 --- a/hack/generate/generate.go +++ b/hack/generate/generate.go @@ -85,8 +85,10 @@ func mustWriteTemplates(jobs []templateJob) { tmplRaw := string(tmplBytes) for _, rplString := range job.ReplaceString { + rplTitle := cases.Title(language.AmericanEnglish).String(rplString) + tmplRaw = strings.ReplaceAll(tmplRaw, "DetectorType_"+rplTitle, "DetectorType_<<.Name>>") tmplRaw = strings.ReplaceAll(tmplRaw, strings.ToLower(rplString), "<<.NameLower>>") - tmplRaw = strings.ReplaceAll(tmplRaw, cases.Title(language.AmericanEnglish).String(rplString), "<<.NameTitle>>") + tmplRaw = strings.ReplaceAll(tmplRaw, rplTitle, "<<.NameTitle>>") tmplRaw = strings.ReplaceAll(tmplRaw, strings.ToUpper(rplString), "<<.NameUpper>>") } @@ -98,6 +100,7 @@ func mustWriteTemplates(jobs []templateJob) { log.Fatal(err) } err = tmpl.Execute(f, templateData{ + Name: *name, NameTitle: nameTitle, NameLower: nameLower, NameUpper: nameUpper, @@ -109,6 +112,7 @@ func mustWriteTemplates(jobs []templateJob) { } type templateData struct { + Name string NameTitle string NameLower string NameUpper string diff --git a/pkg/detectors/alchemy/alchemy.go b/pkg/detectors/alchemy/alchemy.go index d9699dc0b71b..e32fd44499df 100644 --- a/pkg/detectors/alchemy/alchemy.go +++ b/pkg/detectors/alchemy/alchemy.go @@ -3,9 +3,10 @@ package alchemy import ( "context" "fmt" - regexp "github.com/wasilibs/go-re2" + "io" "net/http" - "strings" + + regexp "github.com/wasilibs/go-re2" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" @@ -22,7 +23,7 @@ var _ detectors.Detector = (*Scanner)(nil) var ( defaultClient = common.SaneHttpClient() // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. - keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"alchemy"}) + `\b([0-9a-zA-Z]{23}_[0-9a-zA-Z]{8})\b`) + keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"alchemy"}) + `\b([a-zA-Z0-9]{23}_[a-zA-Z0-9]{8})\b`) ) // Keywords are used for efficiently pre-filtering chunks. @@ -35,17 +36,15 @@ func (s Scanner) Keywords() []string { func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) - matches := keyPat.FindAllStringSubmatch(dataStr, -1) - - for _, match := range matches { - if len(match) != 2 { - continue - } - resMatch := strings.TrimSpace(match[1]) + uniqueMatches := make(map[string]struct{}) + for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) { + uniqueMatches[match[1]] = struct{}{} + } + for match := range uniqueMatches { s1 := detectors.Result{ DetectorType: detectorspb.DetectorType_Alchemy, - Raw: []byte(resMatch), + Raw: []byte(match), } if verify { @@ -53,35 +52,49 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result if client == nil { client = defaultClient } - req, err := http.NewRequestWithContext(ctx, "GET", "https://eth-mainnet.g.alchemy.com/v2/"+resMatch+"/getNFTs/?owner=vitalik.eth", nil) - if err != nil { - continue - } - res, err := client.Do(req) - if err == nil { - defer res.Body.Close() - if res.StatusCode >= 200 && res.StatusCode < 300 { - s1.Verified = true - } else if res.StatusCode == 401 { - // The secret is determinately not verified (nothing to do) - } else { - err = fmt.Errorf("unexpected HTTP response status %d", res.StatusCode) - s1.SetVerificationError(err, resMatch) - } - } else { - s1.SetVerificationError(err, resMatch) - } + + isVerified, extraData, verificationErr := verifyMatch(ctx, client, match) + s1.Verified = isVerified + s1.ExtraData = extraData + s1.SetVerificationError(verificationErr, match) } // This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key. - if !s1.Verified && detectors.IsKnownFalsePositive(resMatch, detectors.DefaultFalsePositives, true) { + if !s1.Verified && detectors.IsKnownFalsePositive(match, detectors.DefaultFalsePositives, true) { continue } results = append(results, s1) } - return results, nil + return +} + +func verifyMatch(ctx context.Context, client *http.Client, token string) (bool, map[string]string, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://eth-mainnet.g.alchemy.com/v2/"+token+"/getNFTs/?owner=vitalik.eth", nil) + if err != nil { + return false, nil, nil + } + + res, err := client.Do(req) + if err != nil { + return false, nil, err + } + defer func() { + _, _ = io.Copy(io.Discard, res.Body) + _ = res.Body.Close() + }() + + if res.StatusCode >= 200 && res.StatusCode < 300 { + // If the endpoint returns useful information, we can return it as a map. + return true, nil, nil + } else if res.StatusCode == 401 { + // The secret is determinately not verified (nothing to do) + return false, nil, nil + } else { + err = fmt.Errorf("unexpected HTTP response status %d", res.StatusCode) + return false, nil, err + } } func (s Scanner) Type() detectorspb.DetectorType { diff --git a/pkg/detectors/alchemy/alchemy_test.go b/pkg/detectors/alchemy/alchemy_test.go index 6bf24a625abf..e3465e5e9e1a 100644 --- a/pkg/detectors/alchemy/alchemy_test.go +++ b/pkg/detectors/alchemy/alchemy_test.go @@ -12,12 +12,71 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) +func TestAlchemy_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) + tests := []struct { + name string + input string + want []string + }{ + { + name: "typical pattern", + input: "alchemy_token = '3aBcDFE5678901234567890_1a2b3c4d'", + want: []string{"3aBcDFE5678901234567890_1a2b3c4d"}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + chunkSpecificDetectors := make(map[ahocorasick.DetectorKey]detectors.Detector, 2) + ahoCorasickCore.PopulateMatchingDetectors(test.input, chunkSpecificDetectors) + if len(chunkSpecificDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return + } + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return + } + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} + } + } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } + }) + } +} + func TestAlchemy_FromChunk(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) defer cancel() diff --git a/pkg/engine/ahocorasickcore.go b/pkg/engine/ahocorasick/ahocorasickcore.go similarity index 99% rename from pkg/engine/ahocorasickcore.go rename to pkg/engine/ahocorasick/ahocorasickcore.go index 19fda9d90f35..1dd35e29c994 100644 --- a/pkg/engine/ahocorasickcore.go +++ b/pkg/engine/ahocorasick/ahocorasickcore.go @@ -1,9 +1,10 @@ -package engine +package ahocorasick import ( "strings" ahocorasick "github.com/BobuSumisu/aho-corasick" + "github.com/trufflesecurity/trufflehog/v3/pkg/custom_detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" diff --git a/pkg/engine/ahocorasickcore_test.go b/pkg/engine/ahocorasick/ahocorasickcore_test.go similarity index 99% rename from pkg/engine/ahocorasickcore_test.go rename to pkg/engine/ahocorasick/ahocorasickcore_test.go index 3bf56a754de5..4e5229740385 100644 --- a/pkg/engine/ahocorasickcore_test.go +++ b/pkg/engine/ahocorasick/ahocorasickcore_test.go @@ -1,10 +1,11 @@ -package engine +package ahocorasick import ( "context" "testing" "github.com/stretchr/testify/assert" + "github.com/trufflesecurity/trufflehog/v3/pkg/custom_detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/custom_detectorspb" diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 38f0da44371a..ab33110f4f20 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -17,6 +17,7 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/decoders" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" "github.com/trufflesecurity/trufflehog/v3/pkg/giturl" "github.com/trufflesecurity/trufflehog/v3/pkg/output" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" @@ -65,7 +66,7 @@ type Engine struct { printAvgDetectorTime bool // ahoCorasickHandler manages the Aho-Corasick trie and related keyword lookups. - ahoCorasickCore *AhoCorasickCore + ahoCorasickCore *ahocorasick.AhoCorasickCore // Engine synchronization primitives. sourceManager *sources.SourceManager @@ -314,7 +315,7 @@ func (e *Engine) initialize(ctx context.Context, options ...Option) error { ctx.Logger().V(4).Info("engine initialized") ctx.Logger().V(4).Info("setting up aho-corasick core") - e.ahoCorasickCore = NewAhoCorasickCore(e.detectors) + e.ahoCorasickCore = ahocorasick.NewAhoCorasickCore(e.detectors) ctx.Logger().V(4).Info("set up aho-corasick core") return nil @@ -463,7 +464,7 @@ func (e *Engine) detectorWorker(ctx context.Context) { // Reuse the same map to avoid allocations. const avgDetectorsPerChunk = 2 - chunkSpecificDetectors := make(map[DetectorKey]detectors.Detector, avgDetectorsPerChunk) + chunkSpecificDetectors := make(map[ahocorasick.DetectorKey]detectors.Detector, avgDetectorsPerChunk) for originalChunk := range e.ChunksChan() { for chunk := range sources.Chunker(originalChunk) { atomic.AddUint64(&e.metrics.BytesScanned, uint64(len(chunk.Data)))