From faf46175e4b0e9d24aa99be98a242805cec0ac3e Mon Sep 17 00:00:00 2001 From: Ankush Goel Date: Wed, 27 Sep 2023 22:39:39 +0530 Subject: [PATCH] added Web3 Storage detector (#1789) * added Web3 Storage detector * fixed the regex * removed test and disabled token --- pkg/detectors/web3storage/web3storage.go | 89 ++++++++++ pkg/detectors/web3storage/web3storage_test.go | 161 ++++++++++++++++++ pkg/engine/defaults.go | 2 + pkg/pb/detectorspb/detectors.pb.go | 18 +- proto/detectors.proto | 1 + 5 files changed, 264 insertions(+), 7 deletions(-) create mode 100644 pkg/detectors/web3storage/web3storage.go create mode 100644 pkg/detectors/web3storage/web3storage_test.go diff --git a/pkg/detectors/web3storage/web3storage.go b/pkg/detectors/web3storage/web3storage.go new file mode 100644 index 000000000000..359362e93f66 --- /dev/null +++ b/pkg/detectors/web3storage/web3storage.go @@ -0,0 +1,89 @@ +package web3storage + +import ( + "context" + "fmt" + "net/http" + "regexp" + "strings" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +type Scanner struct { + client *http.Client +} + +// Ensure the Scanner satisfies the interface at compile time. +var _ detectors.Detector = (*Scanner)(nil) + +var ( + defaultClient = common.SaneHttpClient() + // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. + keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"web3"}) + `\b(eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9\.eyJ[A-Za-z0-9-_]{100,300}\.[A-Za-z0-9-_]{25,100})\b`) +) + +// Keywords are used for efficiently pre-filtering chunks. +// Use identifiers in the secret preferably, or the provider name. +func (s Scanner) Keywords() []string { + return []string{"web3"} +} + +// FromData will find and optionally verify Web3storage secrets in a given set of bytes. +func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { + dataStr := string(data) + + matches := keyPat.FindAllStringSubmatch(dataStr, -1) + + for _, match := range matches { + if len(match) != 2 { + continue + } + resMatch := strings.TrimSpace(match[1]) + + s1 := detectors.Result{ + DetectorType: detectorspb.DetectorType_Web3Storage, + Raw: []byte(resMatch), + } + + if verify { + client := s.client + if client == nil { + client = defaultClient + } + req, err := http.NewRequestWithContext(ctx, "GET", "https://api.web3.storage/user/uploads", nil) + if err != nil { + continue + } + req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", resMatch)) + res, err := client.Do(req) + if err == nil { + defer res.Body.Close() + if res.StatusCode >= 200 && res.StatusCode < 300 { + s1.Verified = true + } else if res.StatusCode == 401 { + // The secret is determinately not verified (nothing to do) + } else { + s1.VerificationError = fmt.Errorf("unexpected HTTP response status %d", res.StatusCode) + } + } else { + s1.VerificationError = err + } + } + + // This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key. + if !s1.Verified && detectors.IsKnownFalsePositive(resMatch, detectors.DefaultFalsePositives, true) { + continue + } + + results = append(results, s1) + } + + return results, nil +} + +func (s Scanner) Type() detectorspb.DetectorType { + return detectorspb.DetectorType_Web3Storage +} diff --git a/pkg/detectors/web3storage/web3storage_test.go b/pkg/detectors/web3storage/web3storage_test.go new file mode 100644 index 000000000000..a0983d11282f --- /dev/null +++ b/pkg/detectors/web3storage/web3storage_test.go @@ -0,0 +1,161 @@ +//go:build detectors +// +build detectors + +package web3storage + +import ( + "context" + "fmt" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "testing" + "time" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestWeb3Storage_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors4") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("WEB3STORAGE") + inactiveSecret := testSecrets.MustGetField("WEB3STORAGE_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + wantVerificationErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a web3storage secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Web3Storage, + Verified: true, + }, + }, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a web3storage secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Web3Storage, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "found, would be verified if not for timeout", + s: Scanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a web3storage secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Web3Storage, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: true, + }, + { + name: "found, verified but unexpected api surface", + s: Scanner{client: common.ConstantResponseHttpClient(404, "")}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a web3storage secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Web3Storage, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Web3Storage.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + if (got[i].VerificationError != nil) != tt.wantVerificationErr { + t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError) + } + } + ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "VerificationError") + if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { + t.Errorf("Web3Storage.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/engine/defaults.go b/pkg/engine/defaults.go index c2d3ffe207d3..53613d31c643 100644 --- a/pkg/engine/defaults.go +++ b/pkg/engine/defaults.go @@ -728,6 +728,7 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/walkscore" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/weatherbit" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/weatherstack" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/web3storage" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/webex" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/webflow" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/webscraper" @@ -1544,6 +1545,7 @@ func DefaultDetectors() []detectors.Detector { salesforce.Scanner{}, sourcegraph.Scanner{}, tailscale.Scanner{}, + web3storage.Scanner{}, } } diff --git a/pkg/pb/detectorspb/detectors.pb.go b/pkg/pb/detectorspb/detectors.pb.go index b95045f1b2a1..aea67e7b64ab 100644 --- a/pkg/pb/detectorspb/detectors.pb.go +++ b/pkg/pb/detectorspb/detectors.pb.go @@ -1001,6 +1001,7 @@ const ( DetectorType_Snowflake DetectorType = 927 DetectorType_Sourcegraph DetectorType = 928 DetectorType_Tailscale DetectorType = 929 + DetectorType_Web3Storage DetectorType = 930 ) // Enum value maps for DetectorType. @@ -1932,6 +1933,7 @@ var ( 927: "Snowflake", 928: "Sourcegraph", 929: "Tailscale", + 930: "Web3Storage", } DetectorType_value = map[string]int32{ "Alibaba": 0, @@ -2860,6 +2862,7 @@ var ( "Snowflake": 927, "Sourcegraph": 928, "Tailscale": 929, + "Web3Storage": 930, } ) @@ -3238,7 +3241,7 @@ var file_detectors_proto_rawDesc = []byte{ 0x44, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x50, 0x4c, 0x41, 0x49, 0x4e, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x42, 0x41, 0x53, 0x45, 0x36, 0x34, 0x10, 0x02, 0x12, - 0x09, 0x0a, 0x05, 0x55, 0x54, 0x46, 0x31, 0x36, 0x10, 0x03, 0x2a, 0xae, 0x74, 0x0a, 0x0c, 0x44, + 0x09, 0x0a, 0x05, 0x55, 0x54, 0x46, 0x31, 0x36, 0x10, 0x03, 0x2a, 0xc0, 0x74, 0x0a, 0x0c, 0x44, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x41, 0x6c, 0x69, 0x62, 0x61, 0x62, 0x61, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x41, 0x4d, 0x51, 0x50, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x41, 0x57, 0x53, 0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x41, @@ -4169,12 +4172,13 @@ var file_detectors_proto_rawDesc = []byte{ 0x6e, 0x67, 0x46, 0x61, 0x63, 0x65, 0x10, 0x9e, 0x07, 0x12, 0x0e, 0x0a, 0x09, 0x53, 0x6e, 0x6f, 0x77, 0x66, 0x6c, 0x61, 0x6b, 0x65, 0x10, 0x9f, 0x07, 0x12, 0x10, 0x0a, 0x0b, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x10, 0xa0, 0x07, 0x12, 0x0e, 0x0a, 0x09, 0x54, - 0x61, 0x69, 0x6c, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x10, 0xa1, 0x07, 0x42, 0x3d, 0x5a, 0x3b, 0x67, - 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, - 0x65, 0x73, 0x65, 0x63, 0x75, 0x72, 0x69, 0x74, 0x79, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, - 0x65, 0x68, 0x6f, 0x67, 0x2f, 0x76, 0x33, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x70, 0x62, 0x2f, 0x64, - 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x33, + 0x61, 0x69, 0x6c, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x10, 0xa1, 0x07, 0x12, 0x10, 0x0a, 0x0b, 0x57, + 0x65, 0x62, 0x33, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x10, 0xa2, 0x07, 0x42, 0x3d, 0x5a, + 0x3b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x74, 0x72, 0x75, 0x66, + 0x66, 0x6c, 0x65, 0x73, 0x65, 0x63, 0x75, 0x72, 0x69, 0x74, 0x79, 0x2f, 0x74, 0x72, 0x75, 0x66, + 0x66, 0x6c, 0x65, 0x68, 0x6f, 0x67, 0x2f, 0x76, 0x33, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x70, 0x62, + 0x2f, 0x64, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/proto/detectors.proto b/proto/detectors.proto index a702754ffb90..0742a8129d79 100644 --- a/proto/detectors.proto +++ b/proto/detectors.proto @@ -938,6 +938,7 @@ enum DetectorType { Snowflake = 927; Sourcegraph = 928; Tailscale = 929; + Web3Storage = 930; } message Result {