Skip to content

Commit

Permalink
Detector-Competition-Fix: fixed regex for databricks domain and fixed…
Browse files Browse the repository at this point in the history
… tests (#1965)

* fixed regex for domain and fixed tests

* fixed regex

* fixed an issue with regex subgrouping

* made recommended changes

* made recommended changed

* fixed RawV2
  • Loading branch information
ankushgoel27 authored Nov 2, 2023
1 parent b6469f2 commit 965a274
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 34 deletions.
34 changes: 19 additions & 15 deletions pkg/detectors/databrickstoken/databrickstoken.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,19 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

type Scanner struct{}
type Scanner struct{
client *http.Client
}

// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)

var (
client = common.SaneHttpClient()
defaultClient = common.SaneHttpClient()

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
domain = regexp.MustCompile(`\b(https:\/\/[a-z0-9-]+\.cloud\.databricks\.com)\b`)
keyPat = regexp.MustCompile(`\b(dapi[a-z0-9]{32})\b`)
domain = regexp.MustCompile(`\b([a-z0-9-]+(?:\.[a-z0-9-]+)*\.(cloud\.databricks\.com|gcp\.databricks\.com|azurewebsites\.net))\b`)
keyPat = regexp.MustCompile(`\b(dapi[0-9a-f]{32})(-\d)?\b`)
)

// Keywords are used for efficiently pre-filtering chunks.
Expand All @@ -39,15 +41,9 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
domainMatches := domain.FindAllStringSubmatch(dataStr, -1)

for _, match := range matches {
if len(match) != 2 {
continue
}
resMatch := strings.TrimSpace(match[1])

for _, domainmatch := range domainMatches {
if len(domainmatch) != 2 {
continue
}
resDomainMatch := strings.TrimSpace(domainmatch[1])

s1 := detectors.Result{
Expand All @@ -57,7 +53,11 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
}

if verify {
req, err := http.NewRequestWithContext(ctx, "GET", resDomainMatch + "/api/2.0/clusters/list", nil)
client := s.client
if client == nil {
client = defaultClient
}
req, err := http.NewRequestWithContext(ctx, "GET", "https://" + resDomainMatch + "/api/2.0/clusters/list", nil)
if err != nil {
continue
}
Expand All @@ -67,14 +67,18 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
defer res.Body.Close()
if res.StatusCode >= 200 && res.StatusCode < 300 {
s1.Verified = true
} else if res.StatusCode == 403 {
// nothing to do here
} else {
// This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key.
if detectors.IsKnownFalsePositive(resMatch, detectors.DefaultFalsePositives, true) {
continue
}
s1.VerificationError = fmt.Errorf("unexpected HTTP response status %d", res.StatusCode)
}
} else {
s1.VerificationError = err
}
}
if !s1.Verified && detectors.IsKnownFalsePositive(string(s1.Raw), detectors.DefaultFalsePositives, true) {
continue
}

results = append(results, s1)
}
Expand Down
80 changes: 61 additions & 19 deletions pkg/detectors/databrickstoken/databrickstoken_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,18 @@ package databrickstoken
import (
"context"
"fmt"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"testing"
"time"

"github.com/kylelemons/godebug/pretty"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

func TestDatabrickstoken_FromChunk(t *testing.T) {
func TestDatabricksToken_FromChunk(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2")
Expand All @@ -25,25 +26,27 @@ func TestDatabrickstoken_FromChunk(t *testing.T) {
}
secret := testSecrets.MustGetField("DATABRICKSTOKEN")
inactiveSecret := testSecrets.MustGetField("DATABRICKSTOKEN_INACTIVE")
domain := testSecrets.MustGetField("DATABRICKSTOKEN_DOMAIN")

type args struct {
ctx context.Context
data []byte
verify bool
}
tests := []struct {
name string
s Scanner
args args
want []detectors.Result
wantErr bool
name string
s Scanner
args args
want []detectors.Result
wantErr bool
wantVerificationErr bool
}{
{
name: "found, verified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a databrickstoken secret %s within", secret)),
data: []byte(fmt.Sprintf("You can find a databrickstoken secret %s within %s", secret, domain)),
verify: true,
},
want: []detectors.Result{
Expand All @@ -52,14 +55,15 @@ func TestDatabrickstoken_FromChunk(t *testing.T) {
Verified: true,
},
},
wantErr: false,
wantErr: false,
wantVerificationErr: false,
},
{
name: "found, unverified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a databrickstoken secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation
data: []byte(fmt.Sprintf("You can find a databrickstoken secret %s within %s but not valid", inactiveSecret, domain)), // the secret would satisfy the regex but not pass validation
verify: true,
},
want: []detectors.Result{
Expand All @@ -68,7 +72,8 @@ func TestDatabrickstoken_FromChunk(t *testing.T) {
Verified: false,
},
},
wantErr: false,
wantErr: false,
wantVerificationErr: false,
},
{
name: "not found",
Expand All @@ -78,14 +83,48 @@ func TestDatabrickstoken_FromChunk(t *testing.T) {
data: []byte("You cannot find the secret within"),
verify: true,
},
want: nil,
wantErr: false,
want: nil,
wantErr: false,
wantVerificationErr: false,
},
{
name: "found, would be verified if not for timeout",
s: Scanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a databrickstoken secret %s within %s", secret, domain)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_DatabricksToken,
Verified: false,
},
},
wantErr: false,
wantVerificationErr: true,
},
{
name: "found, verified but unexpected api surface",
s: Scanner{client: common.ConstantResponseHttpClient(404, "")},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a databrickstoken secret %s within %s", secret, domain)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_DatabricksToken,
Verified: false,
},
},
wantErr: false,
wantVerificationErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := Scanner{}
got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
if (err != nil) != tt.wantErr {
t.Errorf("Databrickstoken.FromData() error = %v, wantErr %v", err, tt.wantErr)
return
Expand All @@ -94,10 +133,13 @@ func TestDatabrickstoken_FromChunk(t *testing.T) {
if len(got[i].Raw) == 0 {
t.Fatalf("no raw secret present: \n %+v", got[i])
}
got[i].Raw = nil
if (got[i].VerificationError != nil) != tt.wantVerificationErr {
t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError)
}
}
if diff := pretty.Compare(got, tt.want); diff != "" {
t.Errorf("Databrickstoken.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "RawV2", "VerificationError")
if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" {
t.Errorf("DatabricksToken.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
}
})
}
Expand Down

0 comments on commit 965a274

Please sign in to comment.