From 2918ab29a21c8aef48e5320be7686b16778f0dfc Mon Sep 17 00:00:00 2001 From: ahmed Date: Wed, 20 Dec 2023 10:09:03 -0500 Subject: [PATCH] cleanup, fix postgres detector. add tests --- pkg/common/patterns.go | 3 +- pkg/common/patterns_test.go | 13 +- pkg/detectors/postgres/postgres.go | 215 ++++++++-------- pkg/detectors/postgres/postgres_test.go | 317 ++++++++++++++++++++++++ 4 files changed, 436 insertions(+), 112 deletions(-) create mode 100644 pkg/detectors/postgres/postgres_test.go diff --git a/pkg/common/patterns.go b/pkg/common/patterns.go index ed0812291b59..7c5b5961401c 100644 --- a/pkg/common/patterns.go +++ b/pkg/common/patterns.go @@ -20,7 +20,6 @@ type RegexState struct { compiledRegex *regexp.Regexp } - // Custom Regex functions func BuildRegex(pattern string, specialChar string, length int) string { return fmt.Sprintf(`\b([%s%s]{%s})\b`, pattern, specialChar, strconv.Itoa(length)) @@ -64,7 +63,7 @@ func UsernameRegexCheck(pattern string) RegexState { return RegexState{regexp.MustCompile(raw)} } -// PasswordRegexCheck constructs an username usernameRegex pattern from a given pattern of excluded characters. +// PasswordRegexCheck constructs a password passwordRegex pattern from a given pattern of excluded characters. func PasswordRegexCheck(pattern string) RegexState { raw := fmt.Sprintf(`(?im)(?:pass)\S{0,40}?[:=\s]{1,3}[ '"=]{0,1}([^:%+v'"\s]{4,40})`, pattern) diff --git a/pkg/common/patterns_test.go b/pkg/common/patterns_test.go index 8eaeb6abde2f..aa3504dc57f1 100644 --- a/pkg/common/patterns_test.go +++ b/pkg/common/patterns_test.go @@ -1,16 +1,17 @@ package common import ( - "github.com/stretchr/testify/assert" "regexp" "testing" + + "github.com/stretchr/testify/assert" ) const ( usernamePattern = `?()/\+=\s\n` passwordPattern = `^<>;.*&|£\n\s` - usernameRegex = `(?im)(?:user|usr)\S{0,40}?[:=\s]{1,3}[ '"=]{0,1}([^:?()/\+=\s\n]{4,40})\b` - passwordRegex = `(?im)(?:pass|password)\S{0,40}?[:=\s]{1,3}[ '"=]{0,1}([^:^<>;.*&|£\n\s]{4,40})` + usernameRegex = `(?im)(?:user|usr)\S{0,40}?[:=\s]{1,3}[ '"=]{0,1}([^:?()/\+=\s\n'"\s]{4,40})` + passwordRegex = `(?im)(?:pass)\S{0,40}?[:=\s]{1,3}[ '"=]{0,1}([^:^<>;.*&|£\n\s'"\s]{4,40})` ) func TestUsernameRegexCheck(t *testing.T) { @@ -43,10 +44,10 @@ func TestPasswordRegexCheck(t *testing.T) { assert.Equal(t, passwordRegexPat.compiledRegex, expectedRegexPattern) testString := `password = "johnsmith123$!" - password='johnsmith123$!' + password='johnsmith123$!' password:="johnsmith123$!" - password = johnsmith123$! - password=johnsmith123$! + password = johnsmith123$! + password=johnsmith123$! PasswordAuthenticator(username, "johnsmith123$!")` expectedStr := []string{"johnsmith123$!", "johnsmith123$!", "johnsmith123$!", "johnsmith123$!", "johnsmith123$!", diff --git a/pkg/detectors/postgres/postgres.go b/pkg/detectors/postgres/postgres.go index 7c3c2329f983..8465bc073a93 100644 --- a/pkg/detectors/postgres/postgres.go +++ b/pkg/detectors/postgres/postgres.go @@ -9,158 +9,165 @@ import ( "strings" "time" + _ "github.com/lib/pq" // PostgreSQL driver "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) -type Scanner struct{} - -var _ detectors.Detector = (*Scanner)(nil) - -const DEFAULT_PORT = "5432" +const ( + defaultPort = "5432" + defaultHost = "localhost" +) var ( - // URI pattern for PostgreSQL connection string - uriPat = regexp.MustCompile(`\b(?i)postgresql://[\S]+\b`) - - // Separate patterns for username, password, and hostname - hostnamePat = regexp.MustCompile(`(?i)(?:host|server).{0,40}?(\b[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*\b)`) - - // You might want to customize these patterns based on common practices in your codebases + _ detectors.Detector = (*Scanner)(nil) + uriPattern = regexp.MustCompile(`\b(?i)postgresql://[\S]+\b`) + hostnamePattern = regexp.MustCompile(`(?i)(?:host|server|address).{0,40}?(\b[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*\b)`) + portPattern = regexp.MustCompile(`(?i)(?:port|p).{0,40}?(\b[0-9]{1,5}\b)`) ) +type Scanner struct{} + func (s Scanner) Keywords() []string { return []string{"postgres", "psql", "pghost"} } -func verifyPostgres(pgURL *url.URL) error { - // Extract the necessary components - username := "" - password := "" - if pgURL.User != nil { - username = pgURL.User.Username() - password, _ = pgURL.User.Password() - } - hostname := pgURL.Hostname() - - // Handle custom port - port := pgURL.Port() - if port == "" { - port = DEFAULT_PORT - } - - // Handle SSL mode - sslmode := "disable" // Default to disable - queryParams := pgURL.Query() - if sslQuery, ok := queryParams["sslmode"]; ok && len(sslQuery) > 0 { - sslmode = sslQuery[0] - } - - // Construct the PostgreSQL connection string - connStr := fmt.Sprintf("user=%s password=%s host=%s port=%s sslmode=%s", username, password, hostname, port, sslmode) +func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) ([]detectors.Result, error) { + var results []detectors.Result + var pgURLs []url.URL + pgURLs = append(pgURLs, findUriMatches(string(data))) + pgURLs = append(pgURLs, findComponentMatches(string(data))...) - // Open a connection to the database - db, err := sql.Open("postgres", connStr) - if err != nil { - return err - } - defer db.Close() + for _, pgURL := range pgURLs { + if pgURL.User == nil { + continue + } + username := pgURL.User.Username() + password, _ := pgURL.User.Password() + hostport := pgURL.Host + result := detectors.Result{ + DetectorType: detectorspb.DetectorType_Postgres, + Raw: []byte(username + password), + RawV2: []byte(hostport + username + password), + } - ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) - defer cancel() + if verify { + isVerified, verificationErr := verifyPostgres(&pgURL) + result.Verified = isVerified + result.SetVerificationError(verificationErr, password) + } - // Try to establish a connection - err = db.PingContext(ctx) - if err != nil { - return err + if !result.Verified && detectors.IsKnownFalsePositive(password, detectors.DefaultFalsePositives, true) { + continue + } + results = append(results, result) } - // If we reach here, the credentials are valid - return nil + return results, nil } -func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { - dataStr := string(data) - - // Check for inline connection strings - uriMatches := uriPat.FindAllString(dataStr, -1) - for _, uri := range uriMatches { +func findUriMatches(dataStr string) url.URL { + var pgURL url.URL + for _, uri := range uriPattern.FindAllString(dataStr, -1) { pgURL, err := url.Parse(uri) if err != nil { continue } - - // PostgreSQL URLs might not always have the userinfo (username:password) part if pgURL.User != nil { - username := pgURL.User.Username() - password, _ := pgURL.User.Password() - hostname := pgURL.Hostname() - s1 := detectors.Result{ - DetectorType: detectorspb.DetectorType_Postgres, - Raw: []byte(strings.Join([]string{hostname, username, password}, "\t")), - } - if verify { - verificationErr := verifyPostgres(pgURL) - s1.Verified = verificationErr == nil - } - results = append(results, s1) + return *pgURL } } + return pgURL +} - // Check for separate components - usernameRegexState := common.UsernameRegexCheck("") - usernameMatches := usernameRegexState.Matches(data) +func findComponentMatches(dataStr string) []url.URL { + usernameMatches := common.UsernameRegexCheck("").Matches([]byte(dataStr)) + passwordMatches := common.PasswordRegexCheck("").Matches([]byte(dataStr)) + hostnameMatches := hostnamePattern.FindAllStringSubmatch(dataStr, -1) + portMatches := portPattern.FindAllStringSubmatch(dataStr, -1) - passwordRegexState := common.PasswordRegexCheck("") // No explicit character exclusions by Snowflake for passwords - passwordMatches := passwordRegexState.Matches(data) - hostnameMatches := hostnamePat.FindAllStringSubmatch(dataStr, -1) + var pgURLs []url.URL - // Combine the separate components into potential credentials for _, username := range usernameMatches { if len(username) < 2 { continue } - for _, hostname := range hostnameMatches { - if len(hostname) < 2 { + for _, password := range passwordMatches { + if len(password) < 2 { continue } - result := false - s1 := detectors.Result{ - DetectorType: detectorspb.DetectorType_Postgres, - } - for _, password := range passwordMatches { - if len(password) < 2 { + for _, hostname := range hostnameMatches { + if len(hostname) < 2 { continue } - - // Since we're combining these, we should probably also ensure that the total length does not exceed the 255 character limit for hostnames - combinedLength := len(username) + len(password) + len(hostname[1]) - if combinedLength > 255 { - continue // Skip if the combined length is too long + port := "" + for _, ports := range portMatches { + // this will only grab the last one if there are multiple + // TODO @0x1: enumerate found ports first + if len(ports) > 1 { + port = ports[1] + } + } + if combinedLength := len(username) + len(password) + len(hostname[1]); combinedLength > 255 { + continue } - s1.Raw = []byte(strings.Join([]string{hostname[1], username, password}, "\t")) - result = true postgresURL := url.URL{ Scheme: "postgresql", User: url.UserPassword(username, password), - Host: fmt.Sprintf("%s:%s", hostname[1], "5432"), + Host: fmt.Sprintf("%s:%s", hostname[1], port), } - if verify { - verificationErr := verifyPostgres(&postgresURL) - s1.Verified = verificationErr == nil - break - } - } - if result { - results = append(results, s1) + pgURLs = append(pgURLs, postgresURL) } } } + return pgURLs +} - // Verification could be done here if necessary +func verifyPostgres(pgURL *url.URL) (bool, error) { + if pgURL.User == nil { + return false, nil + } + username := pgURL.User.Username() + password, _ := pgURL.User.Password() - return results, nil + hostname, port := pgURL.Hostname(), pgURL.Port() + if hostname == "" { + hostname = defaultHost + } + if port == "" { + port = defaultPort + } + + sslmode := determineSSLMode(pgURL) + + connStr := fmt.Sprintf("user=%s password=%s host=%s port=%s sslmode=%s", username, password, hostname, port, sslmode) + db, err := sql.Open("postgres", connStr) + if err != nil { + return false, err + } + defer db.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + err = db.PingContext(ctx) + if err == nil { + return true, nil + } else if strings.Contains(err.Error(), "password authentication failed") { + // incorrect username or password + return false, nil + } + + return false, err +} + +func determineSSLMode(pgURL *url.URL) string { + sslmode := "disable" + if sslQuery, ok := pgURL.Query()["sslmode"]; ok && len(sslQuery) > 0 { + sslmode = sslQuery[0] + } + return sslmode } func (s Scanner) Type() detectorspb.DetectorType { diff --git a/pkg/detectors/postgres/postgres_test.go b/pkg/detectors/postgres/postgres_test.go new file mode 100644 index 000000000000..ec170527ce17 --- /dev/null +++ b/pkg/detectors/postgres/postgres_test.go @@ -0,0 +1,317 @@ +//go:build detectors +// +build detectors + +package postgres + +import ( + "bytes" + "context" + "errors" + "fmt" + "os/exec" + "strings" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +var postgresDockerHash string + +const ( + postgresUser = "postgres" + postgresPass = "23201dabb56ca236f3dc6736c0f9afad" + postgresHost = "localhost" + postgresPort = "5433" + + inactiveUser = "inactive" + inactivePass = "inactive" + inactivePort = "61000" + inactiveHost = "192.0.2.0" +) + +func TestPostgres_FromChunk(t *testing.T) { + startPostgres() + defer stopPostgres() + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + { + name: "found with seperated credentials, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf(` + POSTGRES_USER=%s + POSTGRES_PASSWORD=%s + POSTGRES_ADDRESS=%s + POSTGRES_PORT=%s + `, postgresUser, postgresPass, postgresHost, postgresPort)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Postgres, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found with single line credentials, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf(`postgresql://%s:%s@%s:%s/postgres`, postgresUser, postgresPass, postgresHost, postgresPort)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Postgres, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found with json credentials, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf( + `DB_CONFIG={"user": "%s", "password": "%s", "host": "%s", "port": "%s", "database": "postgres"}`, postgresUser, postgresPass, postgresHost, postgresPort)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Postgres, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found with seperated credentials, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf(` + POSTGRES_USER=%s + POSTGRES_PASSWORD=%s + POSTGRES_ADDRESS=%s + POSTGRES_PORT=%s + `, postgresUser, inactivePass, postgresHost, postgresPort)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Postgres, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "found with single line credentials, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf(`postgresql://%s:%s@%s:%s/postgres`, postgresUser, inactivePass, postgresHost, postgresPort)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Postgres, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "found with json credentials, unverified - inactive password", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf( + `DB_CONFIG={"user": "%s", "password": "%s", "host": "%s", "port": "%s", "database": "postgres"}`, postgresUser, inactivePass, postgresHost, postgresPort)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Postgres, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "found with json credentials, unverified - inactive user", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf( + `DB_CONFIG={"user": "%s", "password": "%s", "host": "%s", "port": "%s", "database": "postgres"}`, inactiveUser, postgresPass, postgresHost, postgresPort)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Postgres, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "found, unverified due to error - inactive port", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf(`postgresql://%s:%s@%s:%s/postgres`, postgresUser, postgresPass, postgresHost, inactivePort)), + verify: true, + }, + want: func() []detectors.Result { + r := detectors.Result{ + DetectorType: detectorspb.DetectorType_Postgres, + Verified: false, + } + r.SetVerificationError(errors.New("connection refused")) + return []detectors.Result{r} + }(), + wantErr: false, + }, + // TODO: This test seems take a long time to run (70s+) even with the timeout set to 1s. It's not clear why. + { + name: "found, unverified due to error - inactive host", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf(`postgresql://%s:%s@%s:%s/postgres`, postgresUser, postgresPass, inactiveHost, postgresPort)), + verify: true, + }, + want: func() []detectors.Result { + r := detectors.Result{ + DetectorType: detectorspb.DetectorType_Postgres, + Verified: false, + } + r.SetVerificationError(errors.New("operation timed out")) + return []detectors.Result{r} + }(), + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("postgres.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + gotErr := "" + if got[i].VerificationError() != nil { + gotErr = got[i].VerificationError().Error() + } + wantErr := "" + if tt.want[i].VerificationError() != nil { + wantErr = tt.want[i].VerificationError().Error() + } + if gotErr != wantErr { + t.Fatalf("wantVerificationError = %v, verification error = %v", tt.want[i].VerificationError(), got[i].VerificationError()) + } + } + ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "RawV2", "verificationError") + if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { + t.Errorf("Postgres.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func dockerLogLine(hash string, needle string) chan struct{} { + ch := make(chan struct{}, 1) + go func() { + for { + out, err := exec.Command("docker", "logs", hash).CombinedOutput() + if err != nil { + panic(err) + } + if strings.Contains(string(out), needle) { + ch <- struct{}{} + return + } + time.Sleep(1 * time.Second) + } + }() + return ch +} + +func startPostgres() error { + cmd := exec.Command( + "docker", "run", "--rm", "-p", postgresPort+":"+defaultPort, + "-e", "POSTGRES_PASSWORD="+postgresPass, + "-e", "POSTGRES_USER="+postgresUser, + "-d", "postgres", + ) + fmt.Println(cmd.String()) + out, err := cmd.Output() + if err != nil { + return err + } + postgresDockerHash = string(bytes.TrimSpace(out)) + select { + case <-dockerLogLine(postgresDockerHash, "PostgreSQL init process complete; ready for start up."): + return nil + case <-time.After(30 * time.Second): + stopPostgres() + return errors.New("timeout waiting for postgres database to be ready") + } +} + +func stopPostgres() { + exec.Command("docker", "kill", postgresDockerHash).Run() +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +}