Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Regex boundary clean and various tweaks to reduce FPs (part 1) #2407

Closed
wants to merge 10 commits into from
2 changes: 1 addition & 1 deletion pkg/detectors/accuweather/accuweather.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ var (
defaultClient = common.SaneHttpClient()

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"accuweather"}) + `([a-z0-9A-Z\%]{35})\b`)
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"accuweather"}) + `\b([a-z0-9A-Z\%]{35})\b`)
)

// Keywords are used for efficiently pre-filtering chunks.
Expand Down
2 changes: 1 addition & 1 deletion pkg/detectors/aeroworkflow/aeroworkflow.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ var (
defaultClient = common.SaneHttpClient()

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"aeroworkflow"}) + `([a-zA-Z0-9^!?#:*;]{20})\b`)
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"aeroworkflow"}) + `\b([a-zA-Z0-9^!?#:*;]{20})\b`)
idPat = regexp.MustCompile(detectors.PrefixRegex([]string{"aeroworkflow"}) + `\b([0-9]{1,})\b`)
)

Expand Down
5 changes: 3 additions & 2 deletions pkg/detectors/aiven/aiven.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ package aiven
import (
"context"
"fmt"
regexp "github.com/wasilibs/go-re2"
"net/http"
"strings"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
Expand All @@ -21,7 +22,7 @@ var (
client = common.SaneHttpClient()

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"aiven"}) + `([a-zA-Z0-9/+=]{372})`)
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"aiven"}) + `?\b([a-zA-Z0-9/+=]{350,380})`)
)

// Keywords are used for efficiently pre-filtering chunks.
Expand Down
2 changes: 1 addition & 1 deletion pkg/detectors/alibaba/alibaba.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ var (

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(`\b([a-zA-Z0-9]{30})\b`)
idPat = regexp.MustCompile(`\b(LTAI[a-zA-Z0-9]{17,21})[\"';\s]*`)
idPat = regexp.MustCompile(`\b(LTAI[a-zA-Z0-9]{17,21})\b`)
)

// Keywords are used for efficiently pre-filtering chunks.
Expand Down
9 changes: 7 additions & 2 deletions pkg/detectors/appointedd/appointedd.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ package appointedd

import (
"context"
regexp "github.com/wasilibs/go-re2"
"fmt"
"io"
"net/http"
"strings"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
Expand All @@ -21,7 +23,7 @@ var (
client = common.SaneHttpClient()

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"appointedd"}) + `\b([a-zA-Z0-9=+]{88})`)
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"appointedd"}) + `\b([a-zA-Z0-9=+]{88)`)
)

// Keywords are used for efficiently pre-filtering chunks.
Expand All @@ -33,6 +35,8 @@ func (s Scanner) Keywords() []string {
// FromData will find and optionally verify appointedd secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)
fmt.Println(keyPat.String())
fmt.Println("data", dataStr)

matches := keyPat.FindAllStringSubmatch(dataStr, -1)

Expand All @@ -41,6 +45,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
continue
}
resMatch := strings.TrimSpace(match[1])
fmt.Println(resMatch)

s1 := detectors.Result{
DetectorType: detectorspb.DetectorType_Appointedd,
Expand Down
2 changes: 1 addition & 1 deletion pkg/detectors/artifactory/artifactory.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ var (
defaultClient = common.SaneHttpClient()

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(`\b([a-zA-Z0-9]{73}|\b[a-zA-Z0-9]{64})`)
keyPat = regexp.MustCompile(`\b([a-zA-Z0-9]{73}|[a-zA-Z0-9]{64})\b`)
URLPat = regexp.MustCompile(`\b([A-Za-z0-9](?:[A-Za-z0-9\-]{0,61}[A-Za-z0-9])\.jfrog\.io)`)
)

Expand Down
6 changes: 4 additions & 2 deletions pkg/detectors/azurebatch/azurebatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ var _ detectors.Detector = (*Scanner)(nil)
var (
defaultClient = common.SaneHttpClient()
// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
urlPat = regexp.MustCompile(`https://(.{1,50})\.(.{1,50})\.batch\.azure\.com`)
secretPat = regexp.MustCompile(`[A-Za-z0-9+/=]{88}`)
urlPat = regexp.MustCompile(`https://(.{1,50})\.(.{1,50})\.batch\.azure\.com`)

// examples we have end with == so /b won't work for the end boundary, gotta specifically match double "="
secretPat = regexp.MustCompile(`\b([A-Za-z0-9+/\=]{86}==)`)
)

// Keywords are used for efficiently pre-filtering chunks.
Expand Down
3 changes: 2 additions & 1 deletion pkg/detectors/beamer/beamer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@ package beamer

import (
"context"
regexp "github.com/wasilibs/go-re2"
"net/http"
"strings"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
Expand Down
7 changes: 4 additions & 3 deletions pkg/detectors/bitmex/bitmex.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ import (
"crypto/hmac"
"crypto/sha256"
"encoding/hex"
regexp "github.com/wasilibs/go-re2"
"net/http"
"net/url"
"strconv"
"strings"
"time"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
Expand All @@ -26,8 +27,8 @@ var (
client = common.SaneHttpClient()

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"bitmex"}) + `([ \r\n]{1}[0-9a-zA-Z\-\_]{24}[ \r\n]{1})`)
secretPat = regexp.MustCompile(detectors.PrefixRegex([]string{"bitmex"}) + `([ \r\n]{1}[0-9a-zA-Z\-\_]{48}[ \r\n]{1})`)
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"bitmex"}) + `\b([0-9a-zA-Z\-\_]{24})\b`)
secretPat = regexp.MustCompile(detectors.PrefixRegex([]string{"bitmex"}) + `\b([0-9a-zA-Z\-\_]{48})\b`)
)

// Keywords are used for efficiently pre-filtering chunks.
Expand Down
5 changes: 3 additions & 2 deletions pkg/detectors/circleci/circleci.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ package circleci

import (
"context"
regexp "github.com/wasilibs/go-re2"
"net/http"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"

Expand All @@ -17,7 +18,7 @@ type Scanner struct{}
var _ detectors.Detector = (*Scanner)(nil)

var (
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"circle"}) + `([a-fA-F0-9]{40})`)
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"circle"}) + `\b([a-fA-F0-9]{40})\b`)
)

// Keywords are used for efficiently pre-filtering chunks.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@ package cloudflareglobalapikey

import (
"context"
regexp "github.com/wasilibs/go-re2"
"net/http"
"strings"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
Expand All @@ -19,7 +20,7 @@ var _ detectors.Detector = (*Scanner)(nil)
var (
client = common.SaneHttpClient()

apiKeyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"cloudflare"}) + `([A-Za-z0-9_-]{37})`)
apiKeyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"cloudflare"}) + `\b([A-Za-z0-9_-]{37})\b`)

// email pattern thanks https://golangcode.com/validate-an-email-address/
// emailPat = regexp.MustCompile("^[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$")
Expand Down
3 changes: 2 additions & 1 deletion pkg/detectors/d7network/d7network.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@ package d7network

import (
"context"
regexp "github.com/wasilibs/go-re2"
"net/http"
"strings"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)
Expand Down
7 changes: 4 additions & 3 deletions pkg/detectors/datadogtoken/datadogtoken.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@ package datadogtoken

import (
"context"
regexp "github.com/wasilibs/go-re2"
"net/http"
"strings"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
Expand All @@ -20,8 +21,8 @@ var (
client = common.SaneHttpClient()

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
appPat = regexp.MustCompile(detectors.PrefixRegex([]string{"datadog", "dd"}) + `\b([a-zA-Z-0-9]{40})\b`)
apiPat = regexp.MustCompile(detectors.PrefixRegex([]string{"datadog", "dd"}) + `\b([a-zA-Z-0-9]{32})\b`)
appPat = regexp.MustCompile(detectors.PrefixRegex([]string{"datadog", "dd_", "dd-"}) + `\b([a-zA-Z-0-9]{40})\b`)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the reviewer: I don't feel super strongly on this change. However it does seem like dd alone could introduce a fair number of FPs. Open for discussion!

apiPat = regexp.MustCompile(detectors.PrefixRegex([]string{"datadog", "dd_", "dd-"}) + `\b([a-zA-Z-0-9]{32})\b`)
)

// Keywords are used for efficiently pre-filtering chunks.
Expand Down
7 changes: 4 additions & 3 deletions pkg/detectors/dovico/dovico.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ package dovico
import (
"context"
"fmt"
regexp "github.com/wasilibs/go-re2"
"net/http"
"strings"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
Expand All @@ -21,8 +22,8 @@ var (
client = common.SaneHttpClient()

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"dovico"}) + `\b([0-9a-z]{32}\.[0-9a-z]{1,}\b)`)
userPat = regexp.MustCompile(detectors.PrefixRegex([]string{"dovico"}) + `\b([0-9a-z]{32}\.[0-9a-z]{1,}\b)`)
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"dovico"}) + `\b([0-9a-z]{32}\.[0-9a-z]{1,100})\b`)
Copy link
Collaborator Author

@zricethezav zricethezav Feb 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the reviewer: probably not gonna go past 100 alphanums on the backend here but 🤷🏻 I'm open for discussion

userPat = regexp.MustCompile(detectors.PrefixRegex([]string{"dovico"}) + `\b([0-9a-z]{32}\.[0-9a-z]{1,100})\b`)
)

// Keywords are used for efficiently pre-filtering chunks.
Expand Down
5 changes: 3 additions & 2 deletions pkg/detectors/formio/formio.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@ package formio

import (
"context"
regexp "github.com/wasilibs/go-re2"
"net/http"
"strings"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
Expand All @@ -20,7 +21,7 @@ var (
client = common.SaneHttpClient()

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"formio"}) + `\b(eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9\.[0-9A-Za-z]{220,310}\.[0-9A-Z-a-z\-_]{43}[ \r\n]{1})`)
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"formio"}) + `\b(eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9\.[0-9A-Za-z]{220,310}\.[0-9A-Z-a-z\-_]{43})\b`)
)

// Keywords are used for efficiently pre-filtering chunks.
Expand Down
9 changes: 5 additions & 4 deletions pkg/detectors/gemini/gemini.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ import (
"encoding/base64"
"encoding/hex"
"encoding/json"
regexp "github.com/wasilibs/go-re2"
"net/http"
"strings"
"time"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
Expand All @@ -33,14 +34,14 @@ var (
client = common.SaneHttpClient()

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(`\b((?:master-|account-)[0-9A-Za-z]{20})\b`)
secretPat = regexp.MustCompile(`[A-Za-z0-9]{27,28}`)
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"gemini"}) + `\b((?:master-|account-)[0-9A-Za-z]{20})\b`)
secretPat = regexp.MustCompile(detectors.PrefixRegex([]string{"gemini"}) + `\b([A-Za-z0-9]{27,28})\b`)
)

// Keywords are used for efficiently pre-filtering chunks.
// Use identifiers in the secret preferably, or the provider name.
func (s Scanner) Keywords() []string {
return []string{"master-", "account-"}
return []string{"master-", "account-", "gemini"}
}

// FromData will find and optionally verify Gemini secrets in a given set of bytes.
Expand Down
8 changes: 5 additions & 3 deletions pkg/detectors/gengo/gengo.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
regexp "github.com/wasilibs/go-re2"
"io"
"net/http"
"strconv"
"strings"
"time"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
Expand All @@ -29,8 +30,8 @@ var (

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
// Removed bounds since there are some cases where the start and end of the token is a special character
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"gengo"}) + `([ ]{0,1}[0-9a-zA-Z\[\]\-\(\)\{\}|_^@$=~]{64}[ \r\n]{1})`)
secretPat = regexp.MustCompile(detectors.PrefixRegex([]string{"gengo"}) + `([ ]{0,1}[0-9a-zA-Z\[\]\-\(\)\{\}|_^@$=~]{64}[ \r\n]{1})`)
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"gengo"}) + `([0-9a-zA-Z\[\]\-\(\)\{\}|_^@$=~]{64})`)
secretPat = regexp.MustCompile(detectors.PrefixRegex([]string{"gengo"}) + `([0-9a-zA-Z\[\]\-\(\)\{\}|_^@$=~]{64})`)
)

// Keywords are used for efficiently pre-filtering chunks.
Expand All @@ -42,6 +43,7 @@ func (s Scanner) Keywords() []string {
// FromData will find and optionally verify Gengo secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)
fmt.Println(keyPat.String())

matches := keyPat.FindAllStringSubmatch(dataStr, -1)
secretMatches := secretPat.FindAllStringSubmatch(dataStr, -1)
Expand Down
5 changes: 3 additions & 2 deletions pkg/detectors/getgist/getgist.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ package getgist
import (
"context"
"fmt"
regexp "github.com/wasilibs/go-re2"
"net/http"
"strings"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
Expand All @@ -21,7 +22,7 @@ var (
client = common.SaneHttpClient()

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"getgist"}) + `\b([a-z0-9A-Z+=]{68})`)
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"getgist"}) + `\b([a-z0-9A-Z+=]{67}=)`)
)

// Keywords are used for efficiently pre-filtering chunks.
Expand Down
3 changes: 2 additions & 1 deletion pkg/detectors/github/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ import (
"context"
"encoding/json"
"fmt"
regexp "github.com/wasilibs/go-re2"
"net/http"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"

Expand Down
Loading
Loading