Skip to content

Commit

Permalink
added azurefunctionkey detector (#2337)
Browse files Browse the repository at this point in the history
* added azurefunctionkey detector

* update raw field to include url

* clean up and added prefix on key pattern

* update bench script

* update imports, snifftest, and gen proto

---------

Co-authored-by: Dustin Decker <[email protected]>
  • Loading branch information
roxanne-tampus and dustin-decker authored Jan 29, 2024
1 parent fa1c5fa commit 83dc986
Show file tree
Hide file tree
Showing 12 changed files with 364 additions and 71 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
PROTOS_IMAGE ?= trufflesecurity/protos:1.21-0
PROTOS_IMAGE ?= trufflesecurity/protos:1.21-1

.PHONY: check
.PHONY: lint
Expand Down Expand Up @@ -59,7 +59,7 @@ protos-windows:

release-protos-image:
docker buildx build --push --platform=linux/amd64,linux/arm64 \
-t trufflesecurity/protos:1.21-0 -f hack/Dockerfile.protos .
-t trufflesecurity/protos:1.21-1 -f hack/Dockerfile.protos .

snifftest:
./hack/snifftest/snifftest.sh
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ require (
github.com/stretchr/testify v1.8.4
github.com/tailscale/depaware v0.0.0-20210622194025-720c4b409502
github.com/trufflesecurity/disk-buffer-reader v0.2.1
github.com/wasilibs/go-re2 v1.4.1
github.com/xanzy/go-gitlab v0.94.0
go.mongodb.org/mongo-driver v1.12.1
go.uber.org/mock v0.3.0
Expand Down Expand Up @@ -239,7 +240,6 @@ require (
github.com/therootcompany/xz v1.0.1 // indirect
github.com/ulikunitz/xz v0.5.11 // indirect
github.com/vbatts/tar-split v0.11.3 // indirect
github.com/wasilibs/go-re2 v1.4.1 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
github.com/xdg-go/pbkdf2 v1.0.0 // indirect
github.com/xdg-go/scram v1.1.2 // indirect
Expand Down
48 changes: 4 additions & 44 deletions go.sum

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions hack/Dockerfile.protos
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ RUN GO111MODULE=off go get -d "github.com/envoyproxy/protoc-gen-validate" && \
git checkout v0.6.7 && \
ln -s /usr/local/protoc/include/google google && \
make build
RUN go install github.com/chrusty/protoc-gen-jsonschema/cmd/protoc-gen-jsonschema@latest
CMD ["bash"]
4 changes: 2 additions & 2 deletions hack/bench/versions.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ git clone --quiet "$test_repo" $repo_tmp


# Get list of git tags, sorted from newest to oldest
tags=$(git tag --sort=-creatordate)
tags=$(echo $(git describe --tags --always --dirty --match='v*') $(git tag --sort=-creatordate))

# Counter to keep track of number of tags checked out
count=0
Expand All @@ -40,7 +40,7 @@ do
fi

# Skip alpha tags
if [[ $tag == *"alpha"* ]]; then
if [[ $tag == *"alpha"* ]]; then
continue
fi

Expand Down
14 changes: 4 additions & 10 deletions hack/snifftest/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,25 @@
See the help pages with this command, or look further below to get started quickly.

```
go run snifftest/main.go
go run hack/snifftest/main.go
```

## Show available secret scanners

```
go run snifftest/main.go show-scanners
```

## Load a repo into a DB

```
go run snifftest/main.go load --db ~/sdb --repo https://github.com/Netflix/Hystrix.git
go run hack/snifftest/main.go show-scanners
```

## Scan

All scanners

```
go run snifftest/main.go scan --db ~/sdb --scanner all --print
go run snifftest/main.go scan --db ~/sdb --scanner all --print
```

Particular scanner

```
go run snifftest/main.go scan --db ~/sdb --scanner github --print --print-chunk --fail-threshold 5
```
```
4 changes: 3 additions & 1 deletion hack/snifftest/snifftest.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
#!/usr/bin/env bash

REPO_ARRAY=(
"https://github.com/Netflix/Hystrix.git"
# "https://github.com/Netflix/Hystrix.git"
# "https://github.com/facebook/flow.git"
# "https://github.com/Netflix/vizceral.git"
# "https://github.com/Netflix/metaflow.git"
# "https://github.com/Netflix/dgs-framework.git"
# "https://github.com/Netflix/vector.git"
# "https://github.com/expressjs/express.git"
"https://github.com/Azure/azure-sdk-for-net"
"https://github.com/Azure/azure-cli"
)
REPOS=$(printf "%s," "${REPO_ARRAY[@]}" | cut -d "," -f 1-${#REPO_ARRAY[@]})
go run hack/snifftest/main.go scan --exclude privatekey --exclude uri --exclude github_old --repo "$REPOS" --detector all --print --fail-threshold 99
89 changes: 89 additions & 0 deletions pkg/detectors/azurefunctionkey/azurefunctionkey.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package azurefunctionkey

import (
"context"
"fmt"
"net/http"
"regexp"
"strings"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

type Scanner struct {
client *http.Client
}

// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)

var (
defaultClient = common.SaneHttpClient()
// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"azure"}) + `\b([a-zA-Z0-9_-]{20,56})\b={0,2}`)
azureUrlPat = regexp.MustCompile(`\bhttps:\/\/([a-zA-Z0-9-]{2,30})\.azurewebsites\.net\/api\/([a-zA-Z0-9-]{2,30})\b`)
)

// Keywords are used for efficiently pre-filtering chunks.
// Use identifiers in the secret preferably, or the provider name.
func (s Scanner) Keywords() []string {
return []string{"azure"}
}

// FromData will find and optionally verify azure secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)
matches := keyPat.FindAllStringSubmatch(dataStr, -1)
urlMatches := azureUrlPat.FindAllStringSubmatch(dataStr, -1)
for _, match := range matches {
resTrim := strings.Split(strings.TrimSpace(match[0]), " ")
resMatch := resTrim[len(resTrim)-1]
for _, urlMatch := range urlMatches {
resUrl := strings.TrimSpace(urlMatch[0])
s1 := detectors.Result{
DetectorType: detectorspb.DetectorType_AzureFunctionKey,
Raw: []byte(resMatch + resUrl),
}

if verify {
client := s.client
if client == nil {
client = defaultClient
}
req, err := http.NewRequestWithContext(ctx, "GET", resUrl+"?code="+resMatch, nil)
if err != nil {
continue
}
res, err := client.Do(req)
if err == nil {
defer res.Body.Close()
if res.StatusCode >= 200 && res.StatusCode < 300 {
s1.Verified = true
} else if res.StatusCode == 401 {
// The secret is determinately not verified (nothing to do)
} else {
err = fmt.Errorf("unexpected HTTP response status %d", res.StatusCode)
s1.SetVerificationError(err, resMatch)
}
} else {
s1.SetVerificationError(err, resMatch)
}
}

// This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key.
if !s1.Verified && detectors.IsKnownFalsePositive(resMatch, detectors.DefaultFalsePositives, true) {
continue
}

results = append(results, s1)
}
}

return results, nil
}

func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_AzureFunctionKey
}
163 changes: 163 additions & 0 deletions pkg/detectors/azurefunctionkey/azurefunctionkey_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
//go:build detectors
// +build detectors

package azurefunctionkey

import (
"context"
"fmt"
"testing"
"time"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"

"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

func TestAzurefunctionkey_FromChunk(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5")
if err != nil {
t.Fatalf("could not get test secrets from GCP: %s", err)
}
secret := testSecrets.MustGetField("AZURE_FUNCTION_KEY")
inactiveSecret := testSecrets.MustGetField("AZURE_FUNCTION_KEY_INACTIVE")
url := testSecrets.MustGetField("AZURE_FUNCTION_URL")

type args struct {
ctx context.Context
data []byte
verify bool
}
tests := []struct {
name string
s Scanner
args args
want []detectors.Result
wantErr bool
wantVerificationErr bool
}{
{
name: "found, verified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a azure secret %s azure url %s", secret, url)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_AzureFunctionKey,
Verified: true,
},
},
wantErr: false,
wantVerificationErr: false,
},
{
name: "found, unverified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a azure secret %s azure url %s but not valid", inactiveSecret, url)), // the secret would satisfy the regex but not pass validation
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_AzureFunctionKey,
Verified: false,
},
},
wantErr: false,
wantVerificationErr: false,
},
{
name: "not found",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte("You cannot find the secret within"),
verify: true,
},
want: nil,
wantErr: false,
wantVerificationErr: false,
},
{
name: "found, would be verified if not for timeout",
s: Scanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a azure secret %s azure url %s", secret, url)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_AzureFunctionKey,
Verified: false,
},
},
wantErr: false,
wantVerificationErr: true,
},
{
name: "found, verified but unexpected api surface",
s: Scanner{client: common.ConstantResponseHttpClient(404, "")},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a azure secret %s azure url %s", secret, url)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_AzureFunctionKey,
Verified: false,
},
},
wantErr: false,
wantVerificationErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
if (err != nil) != tt.wantErr {
t.Errorf("Azurefunctionkey.FromData() error = %v, wantErr %v", err, tt.wantErr)
return
}
for i := range got {
if len(got[i].Raw) == 0 {
t.Fatalf("no raw secret present: \n %+v", got[i])
}
if (got[i].VerificationError() != nil) != tt.wantVerificationErr {
t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError())
}
}
ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError")
if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" {
t.Errorf("Azurefunctionkey.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
}
})
}
}

func BenchmarkFromData(benchmark *testing.B) {
ctx := context.Background()
s := Scanner{}
for name, data := range detectors.MustGetBenchmarkData() {
benchmark.Run(name, func(b *testing.B) {
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err := s.FromData(ctx, false, data)
if err != nil {
b.Fatal(err)
}
}
})
}
}
Loading

0 comments on commit 83dc986

Please sign in to comment.