From 3b9ecaa7040803e837ad42b95969e5ce763ff244 Mon Sep 17 00:00:00 2001 From: Corben Leo <19563282+lc@users.noreply.github.com> Date: Fri, 3 Nov 2023 10:15:53 -0500 Subject: [PATCH] Detector-Competition-Fix: Fix ScraperSite (deprecated) (#2074) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: āh̳̕mͭͭͨͩ̐e̘ͬ́͋ͬ̊̓͂d <13666360+0x1@users.noreply.github.com> --- pkg/detectors/scrapersite/scrapersite.go | 89 ------------- pkg/detectors/scrapersite/scrapersite_test.go | 120 ------------------ pkg/engine/defaults.go | 2 - pkg/pb/detectorspb/detectors.pb.go | 51 ++++---- proto/detectors.proto | 2 +- 5 files changed, 27 insertions(+), 237 deletions(-) delete mode 100644 pkg/detectors/scrapersite/scrapersite.go delete mode 100644 pkg/detectors/scrapersite/scrapersite_test.go diff --git a/pkg/detectors/scrapersite/scrapersite.go b/pkg/detectors/scrapersite/scrapersite.go deleted file mode 100644 index 1295c831a6be..000000000000 --- a/pkg/detectors/scrapersite/scrapersite.go +++ /dev/null @@ -1,89 +0,0 @@ -package scrapersite - -import ( - "context" - "fmt" - "io" - "net/http" - "regexp" - "strings" - "time" - - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" -) - -type Scanner struct{} - -// Ensure the Scanner satisfies the interface at compile time. -var _ detectors.Detector = (*Scanner)(nil) - -var ( - client = common.SaneHttpClientTimeOut(10 * time.Second) - - // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. - keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"scrapersite"}) + `\b([a-zA-Z0-9]{45})\b`) -) - -// Keywords are used for efficiently pre-filtering chunks. -// Use identifiers in the secret preferably, or the provider name. -func (s Scanner) Keywords() []string { - return []string{"scrapersite"} -} - -// FromData will find and optionally verify ScraperSite secrets in a given set of bytes. -func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { - dataStr := string(data) - - matches := keyPat.FindAllStringSubmatch(dataStr, -1) - - for _, match := range matches { - if len(match) != 2 { - continue - } - resMatch := strings.TrimSpace(match[1]) - - s1 := detectors.Result{ - DetectorType: detectorspb.DetectorType_ScraperSite, - Raw: []byte(resMatch), - } - - if verify { - req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("https://scrapersite.com/api-v1?api_key=%s&url=https://google.com", resMatch), nil) - if err != nil { - continue - } - res, err := client.Do(req) - if err == nil { - bodyBytes, err := io.ReadAll(res.Body) - if err != nil { - continue - } - bodyString := string(bodyBytes) - validResponse := strings.Contains(bodyString, `"status":true`) - defer res.Body.Close() - if res.StatusCode >= 200 && res.StatusCode < 300 { - if validResponse { - s1.Verified = true - } else { - s1.Verified = false - } - } else { - // This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key. - if detectors.IsKnownFalsePositive(resMatch, detectors.DefaultFalsePositives, true) { - continue - } - } - } - } - - results = append(results, s1) - } - - return results, nil -} - -func (s Scanner) Type() detectorspb.DetectorType { - return detectorspb.DetectorType_ScraperSite -} diff --git a/pkg/detectors/scrapersite/scrapersite_test.go b/pkg/detectors/scrapersite/scrapersite_test.go deleted file mode 100644 index 187d85e06812..000000000000 --- a/pkg/detectors/scrapersite/scrapersite_test.go +++ /dev/null @@ -1,120 +0,0 @@ -//go:build detectors -// +build detectors - -package scrapersite - -import ( - "context" - "fmt" - "testing" - "time" - - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" -) - -func TestScraperSite_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("SCRAPERSITE") - inactiveSecret := testSecrets.MustGetField("SCRAPERSITE_INACTIVE") - - type args struct { - ctx context.Context - data []byte - verify bool - } - tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool - }{ - { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a scrapersite secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_ScraperSite, - Verified: true, - }, - }, - wantErr: false, - }, - { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a scrapersite secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_ScraperSite, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("ScraperSite.FromData() error = %v, wantErr %v", err, tt.wantErr) - return - } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil - } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("ScraperSite.FromData() %s diff: (-got +want)\n%s", tt.name, diff) - } - }) - } -} - -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) - } - } - }) - } -} diff --git a/pkg/engine/defaults.go b/pkg/engine/defaults.go index a3d4ebfce1fd..e5c78afcbe4e 100644 --- a/pkg/engine/defaults.go +++ b/pkg/engine/defaults.go @@ -569,7 +569,6 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scrapeowl" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scraperapi" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scraperbox" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scrapersite" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scrapestack" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scrapfly" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/scrapingant" @@ -1315,7 +1314,6 @@ func DefaultDetectors() []detectors.Detector { zenkitapi.Scanner{}, sherpadesk.Scanner{}, shotstack.Scanner{}, - scrapersite.Scanner{}, luno.Scanner{}, apacta.Scanner{}, fmfw.Scanner{}, diff --git a/pkg/pb/detectorspb/detectors.pb.go b/pkg/pb/detectorspb/detectors.pb.go index 9ded143acf6f..75c9e507b537 100644 --- a/pkg/pb/detectorspb/detectors.pb.go +++ b/pkg/pb/detectorspb/detectors.pb.go @@ -713,31 +713,32 @@ const ( DetectorType_Apacta DetectorType = 632 DetectorType_GetSandbox DetectorType = 633 // Deprecated: Do not use. - DetectorType_Happi DetectorType = 634 - DetectorType_Oanda DetectorType = 635 - DetectorType_FastForex DetectorType = 636 - DetectorType_APIMatic DetectorType = 637 - DetectorType_VersionEye DetectorType = 638 - DetectorType_EagleEyeNetworks DetectorType = 639 - DetectorType_ThousandEyes DetectorType = 640 - DetectorType_SelectPDF DetectorType = 641 - DetectorType_Flightstats DetectorType = 642 - DetectorType_ChecIO DetectorType = 643 - DetectorType_Manifest DetectorType = 644 - DetectorType_ApiScience DetectorType = 645 - DetectorType_AppSynergy DetectorType = 646 - DetectorType_Caflou DetectorType = 647 - DetectorType_Caspio DetectorType = 648 - DetectorType_ChecklyHQ DetectorType = 649 - DetectorType_CloudElements DetectorType = 650 - DetectorType_DronaHQ DetectorType = 651 - DetectorType_Enablex DetectorType = 652 - DetectorType_Fmfw DetectorType = 653 - DetectorType_GoodDay DetectorType = 654 - DetectorType_Luno DetectorType = 655 - DetectorType_Meistertask DetectorType = 656 - DetectorType_Mindmeister DetectorType = 657 - DetectorType_PeopleDataLabs DetectorType = 658 + DetectorType_Happi DetectorType = 634 + DetectorType_Oanda DetectorType = 635 + DetectorType_FastForex DetectorType = 636 + DetectorType_APIMatic DetectorType = 637 + DetectorType_VersionEye DetectorType = 638 + DetectorType_EagleEyeNetworks DetectorType = 639 + DetectorType_ThousandEyes DetectorType = 640 + DetectorType_SelectPDF DetectorType = 641 + DetectorType_Flightstats DetectorType = 642 + DetectorType_ChecIO DetectorType = 643 + DetectorType_Manifest DetectorType = 644 + DetectorType_ApiScience DetectorType = 645 + DetectorType_AppSynergy DetectorType = 646 + DetectorType_Caflou DetectorType = 647 + DetectorType_Caspio DetectorType = 648 + DetectorType_ChecklyHQ DetectorType = 649 + DetectorType_CloudElements DetectorType = 650 + DetectorType_DronaHQ DetectorType = 651 + DetectorType_Enablex DetectorType = 652 + DetectorType_Fmfw DetectorType = 653 + DetectorType_GoodDay DetectorType = 654 + DetectorType_Luno DetectorType = 655 + DetectorType_Meistertask DetectorType = 656 + DetectorType_Mindmeister DetectorType = 657 + DetectorType_PeopleDataLabs DetectorType = 658 + // Deprecated: Do not use. DetectorType_ScraperSite DetectorType = 659 DetectorType_Scrapfly DetectorType = 660 DetectorType_SimplyNoted DetectorType = 661 diff --git a/proto/detectors.proto b/proto/detectors.proto index 20045f9b41d7..0d19ddcf66f9 100644 --- a/proto/detectors.proto +++ b/proto/detectors.proto @@ -667,7 +667,7 @@ enum DetectorType { Meistertask = 656; Mindmeister = 657; PeopleDataLabs = 658; - ScraperSite = 659; + ScraperSite = 659 [deprecated = true]; Scrapfly = 660; SimplyNoted = 661; TravelPayouts = 662;