From e81ff7630cc44f9ceefda31cb7659343961c51ff Mon Sep 17 00:00:00 2001 From: Sahil Silare <32628578+sahil9001@users.noreply.github.com> Date: Wed, 30 Oct 2024 11:10:49 +0530 Subject: [PATCH] feat: added `v3` API version for the detector `captaindatago` (#3484) * feat: added `v3` API version for the detector `captaindatago` * chore: renamed captaindata to captaindatav2 * fix: fixed v3 detector to follow new pattern * feat: added versioner impl Signed-off-by: Sahil Silare * feat: changed the name from v2 -> v1 and v3 -> v2 Signed-off-by: Sahil Silare --------- Signed-off-by: Sahil Silare Co-authored-by: Kashif Khan <70996046+kashifkhan0771@users.noreply.github.com> --- .../captaindata/{ => v1}/captaindata.go | 8 +- .../captaindata/{ => v1}/captaindata_test.go | 0 pkg/detectors/captaindata/v2/captaindata.go | 113 +++++++++++++++ .../v2/captaindata_integration_test.go | 129 ++++++++++++++++++ .../captaindata/v2/captaindata_test.go | 84 ++++++++++++ pkg/engine/defaults.go | 6 +- 6 files changed, 336 insertions(+), 4 deletions(-) rename pkg/detectors/captaindata/{ => v1}/captaindata.go (95%) rename pkg/detectors/captaindata/{ => v1}/captaindata_test.go (100%) create mode 100644 pkg/detectors/captaindata/v2/captaindata.go create mode 100644 pkg/detectors/captaindata/v2/captaindata_integration_test.go create mode 100644 pkg/detectors/captaindata/v2/captaindata_test.go diff --git a/pkg/detectors/captaindata/captaindata.go b/pkg/detectors/captaindata/v1/captaindata.go similarity index 95% rename from pkg/detectors/captaindata/captaindata.go rename to pkg/detectors/captaindata/v1/captaindata.go index e2d51616ca38..dbc811545cd4 100644 --- a/pkg/detectors/captaindata/captaindata.go +++ b/pkg/detectors/captaindata/v1/captaindata.go @@ -2,21 +2,25 @@ package captaindata import ( "context" - regexp "github.com/wasilibs/go-re2" "net/http" "strings" + regexp "github.com/wasilibs/go-re2" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) -type Scanner struct{ +type Scanner struct { detectors.DefaultMultiPartCredentialProvider } +func (s Scanner) Version() int { return 1 } + // Ensure the Scanner satisfies the interface at compile time. var _ detectors.Detector = (*Scanner)(nil) +var _ detectors.Versioner = (*Scanner)(nil) var ( client = common.SaneHttpClient() diff --git a/pkg/detectors/captaindata/captaindata_test.go b/pkg/detectors/captaindata/v1/captaindata_test.go similarity index 100% rename from pkg/detectors/captaindata/captaindata_test.go rename to pkg/detectors/captaindata/v1/captaindata_test.go diff --git a/pkg/detectors/captaindata/v2/captaindata.go b/pkg/detectors/captaindata/v2/captaindata.go new file mode 100644 index 000000000000..3d79cea9d5ee --- /dev/null +++ b/pkg/detectors/captaindata/v2/captaindata.go @@ -0,0 +1,113 @@ +package captaindata + +import ( + "context" + "fmt" + "io" + "net/http" + + regexp "github.com/wasilibs/go-re2" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +type Scanner struct { + client *http.Client +} + +// Ensure the Scanner satisfies the interface at compile time. +var _ detectors.Detector = (*Scanner)(nil) +var _ detectors.Versioner = (*Scanner)(nil) + +func (Scanner) Version() int { return 2 } + +var ( + defaultClient = common.SaneHttpClient() + // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. + keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"captaindata"}) + `\b([0-9a-f]{64})\b`) + projIdPat = regexp.MustCompile(detectors.PrefixRegex([]string{"captaindata"}) + `\b([0-9a-f]{8}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{12})\b`) +) + +// Keywords are used for efficiently pre-filtering chunks. +// Use identifiers in the secret preferably, or the provider name. +func (s Scanner) Keywords() []string { + return []string{"captaindata"} +} + +// FromData will find and optionally verify CaptainData secrets in a given set of bytes. +func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { + dataStr := string(data) + + uniqueMatches := make(map[string]struct{}) + for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) { + uniqueMatches[match[1]] = struct{}{} + } + + uniqueProjIdMatches := make(map[string]struct{}) + for _, match := range projIdPat.FindAllStringSubmatch(dataStr, -1) { + uniqueProjIdMatches[match[1]] = struct{}{} + } + + for projId := range uniqueProjIdMatches { + for apiKey := range uniqueMatches { + s1 := detectors.Result{ + DetectorType: detectorspb.DetectorType_CaptainData, + Raw: []byte(apiKey), + RawV2: []byte(projId + apiKey), + } + + if verify { + client := s.client + if client == nil { + client = defaultClient + } + + isVerified, extraData, verificationErr := verifyMatch(ctx, client, projId, apiKey) + s1.Verified = isVerified + s1.ExtraData = extraData + s1.SetVerificationError(verificationErr, apiKey) + } + + results = append(results, s1) + } + } + + return +} + +func verifyMatch(ctx context.Context, client *http.Client, projId, apiKey string) (bool, map[string]string, error) { + req, err := http.NewRequestWithContext(ctx, "GET", "https://api.captaindata.co/v3/workspace", nil) + if err != nil { + return false, nil, nil + } + req.Header.Set("Authorization", "x-api-key "+apiKey) + req.Header.Set("x-project-id", projId) + + res, err := client.Do(req) + if err != nil { + return false, nil, err + } + defer func() { + _, _ = io.Copy(io.Discard, res.Body) + _ = res.Body.Close() + }() + + switch res.StatusCode { + case http.StatusOK: + return true, nil, nil + case http.StatusUnauthorized: + return false, nil, nil + default: + return false, nil, fmt.Errorf("unexpected HTTP response status %d", res.StatusCode) + } +} + +func (s Scanner) Type() detectorspb.DetectorType { + return detectorspb.DetectorType_CaptainData +} + +func (s Scanner) Description() string { + return "CaptainData is a service for automating data extraction and processing. The API keys can be used to access and control these automation processes." +} diff --git a/pkg/detectors/captaindata/v2/captaindata_integration_test.go b/pkg/detectors/captaindata/v2/captaindata_integration_test.go new file mode 100644 index 000000000000..b6efcb16e337 --- /dev/null +++ b/pkg/detectors/captaindata/v2/captaindata_integration_test.go @@ -0,0 +1,129 @@ +//go:build detectors +// +build detectors + +package captaindata + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestCaptainData_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + projId := testSecrets.MustGetField("CAPTAINDATA_PROJID") + secret := testSecrets.MustGetField("CAPTAINDATA") + inactiveSecret := testSecrets.MustGetField("CAPTAINDATA_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + wantVerificationErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a captaindata project %s with captaindata secret %s within", projId, secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_CaptainData, + Verified: true, + }, + }, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a captaindata project %s with captaindata secret %s within but not valid", projId, inactiveSecret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_CaptainData, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: true, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + wantVerificationErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("CaptainData.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + if (got[i].VerificationError() != nil) != tt.wantVerificationErr { + t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError()) + } + } + ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "RawV2", "ExtraData", "verificationError") + if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { + t.Errorf("CaptainData.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/captaindata/v2/captaindata_test.go b/pkg/detectors/captaindata/v2/captaindata_test.go new file mode 100644 index 000000000000..71bc022a862c --- /dev/null +++ b/pkg/detectors/captaindata/v2/captaindata_test.go @@ -0,0 +1,84 @@ +package captaindata + +import ( + "context" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" +) + +func TestCaptainData_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) + tests := []struct { + name string + input string + want []string + }{ + { + name: "typical pattern", + input: "captaindata_project = '12345678-1234-1234-1234-123456789012' captaindata_api_key = '1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef'", + want: []string{"12345678-1234-1234-1234-1234567890121234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"}, + }, + { + name: "finds all matches", + input: `captaindata_project1 = '12345678-1234-1234-1234-123456789012' captaindata_api_key1 = '1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef' +captaindata_project2 = '87654321-4321-4321-4321-210987654321' captaindata_api_key2 = 'fedcba0987654321fedcba0987654321fedcba0987654321fedcba0987654321'`, + want: []string{ + "12345678-1234-1234-1234-1234567890121234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef", + "12345678-1234-1234-1234-123456789012fedcba0987654321fedcba0987654321fedcba0987654321fedcba0987654321", + "87654321-4321-4321-4321-210987654321fedcba0987654321fedcba0987654321fedcba0987654321fedcba0987654321", + "87654321-4321-4321-4321-2109876543211234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef", + }, + }, + { + name: "invalid pattern", + input: "captaindata_project = '123456' captaindata_api_key = '1234567890'", + want: []string{}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return + } + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return + } + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} + } + } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } + }) + } +} diff --git a/pkg/engine/defaults.go b/pkg/engine/defaults.go index e98f2d239ba5..443378418844 100644 --- a/pkg/engine/defaults.go +++ b/pkg/engine/defaults.go @@ -113,7 +113,8 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/campayn" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/cannyio" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/capsulecrm" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/captaindata" + captainDataV1 "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/captaindata/v1" + captainDataV2 "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/captaindata/v2" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/carboninterface" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/cashboard" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/caspio" @@ -1412,7 +1413,6 @@ func DefaultDetectors() []detectors.Detector { checklyhq.Scanner{}, teamworkspaces.Scanner{}, cloudelements.Scanner{}, - captaindata.Scanner{}, uploadcare.Scanner{}, moderation.Scanner{}, myintervals.Scanner{}, @@ -1642,6 +1642,8 @@ func DefaultDetectors() []detectors.Detector { saladcloudapikey.Scanner{}, boxoauth.Scanner{}, apimetrics.Scanner{}, + captainDataV1.Scanner{}, + captainDataV2.Scanner{}, } // Automatically initialize all detectors that implement