Skip to content

Commit

Permalink
Merge branch 'main' into impl-data-model-gitlab
Browse files Browse the repository at this point in the history
  • Loading branch information
abmussani authored Oct 30, 2024
2 parents 81eece6 + e81ff76 commit 58bba02
Show file tree
Hide file tree
Showing 6 changed files with 336 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,25 @@ package captaindata

import (
"context"
regexp "github.com/wasilibs/go-re2"
"net/http"
"strings"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

type Scanner struct{
type Scanner struct {
detectors.DefaultMultiPartCredentialProvider
}

func (s Scanner) Version() int { return 1 }

// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.Versioner = (*Scanner)(nil)

var (
client = common.SaneHttpClient()
Expand Down
113 changes: 113 additions & 0 deletions pkg/detectors/captaindata/v2/captaindata.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package captaindata

import (
"context"
"fmt"
"io"
"net/http"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

type Scanner struct {
client *http.Client
}

// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.Versioner = (*Scanner)(nil)

func (Scanner) Version() int { return 2 }

var (
defaultClient = common.SaneHttpClient()
// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"captaindata"}) + `\b([0-9a-f]{64})\b`)
projIdPat = regexp.MustCompile(detectors.PrefixRegex([]string{"captaindata"}) + `\b([0-9a-f]{8}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{12})\b`)
)

// Keywords are used for efficiently pre-filtering chunks.
// Use identifiers in the secret preferably, or the provider name.
func (s Scanner) Keywords() []string {
return []string{"captaindata"}
}

// FromData will find and optionally verify CaptainData secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)

uniqueMatches := make(map[string]struct{})
for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) {
uniqueMatches[match[1]] = struct{}{}
}

uniqueProjIdMatches := make(map[string]struct{})
for _, match := range projIdPat.FindAllStringSubmatch(dataStr, -1) {
uniqueProjIdMatches[match[1]] = struct{}{}
}

for projId := range uniqueProjIdMatches {
for apiKey := range uniqueMatches {
s1 := detectors.Result{
DetectorType: detectorspb.DetectorType_CaptainData,
Raw: []byte(apiKey),
RawV2: []byte(projId + apiKey),
}

if verify {
client := s.client
if client == nil {
client = defaultClient
}

isVerified, extraData, verificationErr := verifyMatch(ctx, client, projId, apiKey)
s1.Verified = isVerified
s1.ExtraData = extraData
s1.SetVerificationError(verificationErr, apiKey)
}

results = append(results, s1)
}
}

return
}

func verifyMatch(ctx context.Context, client *http.Client, projId, apiKey string) (bool, map[string]string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", "https://api.captaindata.co/v3/workspace", nil)
if err != nil {
return false, nil, nil
}
req.Header.Set("Authorization", "x-api-key "+apiKey)
req.Header.Set("x-project-id", projId)

res, err := client.Do(req)
if err != nil {
return false, nil, err
}
defer func() {
_, _ = io.Copy(io.Discard, res.Body)
_ = res.Body.Close()
}()

switch res.StatusCode {
case http.StatusOK:
return true, nil, nil
case http.StatusUnauthorized:
return false, nil, nil
default:
return false, nil, fmt.Errorf("unexpected HTTP response status %d", res.StatusCode)
}
}

func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_CaptainData
}

func (s Scanner) Description() string {
return "CaptainData is a service for automating data extraction and processing. The API keys can be used to access and control these automation processes."
}
129 changes: 129 additions & 0 deletions pkg/detectors/captaindata/v2/captaindata_integration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
//go:build detectors
// +build detectors

package captaindata

import (
"context"
"fmt"
"testing"
"time"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

func TestCaptainData_FromChunk(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2")
if err != nil {
t.Fatalf("could not get test secrets from GCP: %s", err)
}
projId := testSecrets.MustGetField("CAPTAINDATA_PROJID")
secret := testSecrets.MustGetField("CAPTAINDATA")
inactiveSecret := testSecrets.MustGetField("CAPTAINDATA_INACTIVE")

type args struct {
ctx context.Context
data []byte
verify bool
}
tests := []struct {
name string
s Scanner
args args
want []detectors.Result
wantErr bool
wantVerificationErr bool
}{
{
name: "found, verified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a captaindata project %s with captaindata secret %s within", projId, secret)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_CaptainData,
Verified: true,
},
},
wantErr: false,
wantVerificationErr: false,
},
{
name: "found, unverified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a captaindata project %s with captaindata secret %s within but not valid", projId, inactiveSecret)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_CaptainData,
Verified: false,
},
},
wantErr: false,
wantVerificationErr: true,
},
{
name: "not found",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte("You cannot find the secret within"),
verify: true,
},
want: nil,
wantErr: false,
wantVerificationErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := Scanner{}
got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
if (err != nil) != tt.wantErr {
t.Errorf("CaptainData.FromData() error = %v, wantErr %v", err, tt.wantErr)
return
}
for i := range got {
if len(got[i].Raw) == 0 {
t.Fatalf("no raw secret present: \n %+v", got[i])
}
if (got[i].VerificationError() != nil) != tt.wantVerificationErr {
t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError())
}
}
ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "RawV2", "ExtraData", "verificationError")
if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" {
t.Errorf("CaptainData.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
}
})
}
}

func BenchmarkFromData(benchmark *testing.B) {
ctx := context.Background()
s := Scanner{}
for name, data := range detectors.MustGetBenchmarkData() {
benchmark.Run(name, func(b *testing.B) {
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err := s.FromData(ctx, false, data)
if err != nil {
b.Fatal(err)
}
}
})
}
}
84 changes: 84 additions & 0 deletions pkg/detectors/captaindata/v2/captaindata_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package captaindata

import (
"context"
"testing"

"github.com/google/go-cmp/cmp"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick"
)

func TestCaptainData_Pattern(t *testing.T) {
d := Scanner{}
ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d})
tests := []struct {
name string
input string
want []string
}{
{
name: "typical pattern",
input: "captaindata_project = '12345678-1234-1234-1234-123456789012' captaindata_api_key = '1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef'",
want: []string{"12345678-1234-1234-1234-1234567890121234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"},
},
{
name: "finds all matches",
input: `captaindata_project1 = '12345678-1234-1234-1234-123456789012' captaindata_api_key1 = '1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef'
captaindata_project2 = '87654321-4321-4321-4321-210987654321' captaindata_api_key2 = 'fedcba0987654321fedcba0987654321fedcba0987654321fedcba0987654321'`,
want: []string{
"12345678-1234-1234-1234-1234567890121234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef",
"12345678-1234-1234-1234-123456789012fedcba0987654321fedcba0987654321fedcba0987654321fedcba0987654321",
"87654321-4321-4321-4321-210987654321fedcba0987654321fedcba0987654321fedcba0987654321fedcba0987654321",
"87654321-4321-4321-4321-2109876543211234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef",
},
},
{
name: "invalid pattern",
input: "captaindata_project = '123456' captaindata_api_key = '1234567890'",
want: []string{},
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input))
if len(matchedDetectors) == 0 {
t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input)
return
}

results, err := d.FromData(context.Background(), false, []byte(test.input))
if err != nil {
t.Errorf("error = %v", err)
return
}

if len(results) != len(test.want) {
if len(results) == 0 {
t.Errorf("did not receive result")
} else {
t.Errorf("expected %d results, only received %d", len(test.want), len(results))
}
return
}

actual := make(map[string]struct{}, len(results))
for _, r := range results {
if len(r.RawV2) > 0 {
actual[string(r.RawV2)] = struct{}{}
} else {
actual[string(r.Raw)] = struct{}{}
}
}
expected := make(map[string]struct{}, len(test.want))
for _, v := range test.want {
expected[v] = struct{}{}
}

if diff := cmp.Diff(expected, actual); diff != "" {
t.Errorf("%s diff: (-want +got)\n%s", test.name, diff)
}
})
}
}
6 changes: 4 additions & 2 deletions pkg/engine/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/campayn"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/cannyio"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/capsulecrm"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/captaindata"
captainDataV1 "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/captaindata/v1"
captainDataV2 "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/captaindata/v2"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/carboninterface"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/cashboard"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/caspio"
Expand Down Expand Up @@ -1412,7 +1413,6 @@ func DefaultDetectors() []detectors.Detector {
checklyhq.Scanner{},
teamworkspaces.Scanner{},
cloudelements.Scanner{},
captaindata.Scanner{},
uploadcare.Scanner{},
moderation.Scanner{},
myintervals.Scanner{},
Expand Down Expand Up @@ -1642,6 +1642,8 @@ func DefaultDetectors() []detectors.Detector {
saladcloudapikey.Scanner{},
boxoauth.Scanner{},
apimetrics.Scanner{},
captainDataV1.Scanner{},
captainDataV2.Scanner{},
}

// Automatically initialize all detectors that implement
Expand Down

0 comments on commit 58bba02

Please sign in to comment.