From d51e3b6d83f39e96f2d0efe347a8484c5133ea66 Mon Sep 17 00:00:00 2001 From: ahrav Date: Sun, 20 Aug 2023 11:38:28 -0700 Subject: [PATCH 1/8] Only scan gist comments or repo comments. (#1646) --- pkg/sources/github/github.go | 69 +++++++++++++++++-------------- pkg/sources/github/github_test.go | 19 +++++++++ 2 files changed, 56 insertions(+), 32 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index bf7157e6e0de..26e615bb7104 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -988,44 +988,49 @@ func (s *Source) scanComments(ctx context.Context, repoPath string, chunksChan c trimmedURL := removeURLAndSplit(repoURL.String()) if repoURL.Host == "gist.github.com" && s.includeGistComments { - s.log.Info("scanning github gist comments", "repository", repoPath) - // GitHub Gist URL. - var gistId string - if len(trimmedURL) == 2 { - // https://gist.github.com/ - gistId = trimmedURL[1] - } else if len(trimmedURL) == 3 { - // https://gist.github.com// - gistId = trimmedURL[2] - } else { - return fmt.Errorf("failed to parse Gist URL: '%s'", repoURL.String()) - } + return s.processGistComments(ctx, repoPath, trimmedURL, repoURL, chunksChan) + } + return s.processRepoComments(ctx, repoPath, trimmedURL, repoURL, chunksChan) +} - options := &github.ListOptions{ - PerPage: defaultPagination, - Page: initialPage, +func (s *Source) processGistComments(ctx context.Context, repoPath string, trimmedURL []string, repoURL *url.URL, chunksChan chan *sources.Chunk) error { + s.log.Info("scanning github gist comments", "repository", repoPath) + // GitHub Gist URL. + gistID, err := extractGistID(trimmedURL) + if err != nil { + return err + } + + options := &github.ListOptions{ + PerPage: defaultPagination, + Page: initialPage, + } + for { + comments, resp, err := s.apiClient.Gists.ListComments(ctx, gistID, options) + if s.handleRateLimit(err, resp) { + break + } + if err != nil { + return err } - for { - comments, resp, err := s.apiClient.Gists.ListComments(ctx, gistId, options) - if s.handleRateLimit(err, resp) { - break - } - if err != nil { - return err - } - err = s.chunkGistComments(ctx, repoURL.String(), comments, chunksChan) - if err != nil { - return err - } + if err = s.chunkGistComments(ctx, repoURL.String(), comments, chunksChan); err != nil { + return err + } - options.Page++ - if len(comments) < options.PerPage { - break - } + options.Page++ + if len(comments) < options.PerPage { + break } } - return s.processRepoComments(ctx, repoPath, trimmedURL, repoURL, chunksChan) + return nil +} + +func extractGistID(url []string) (string, error) { + if len(url) < 2 || len(url) > 3 { + return "", fmt.Errorf("failed to parse Gist URL: length of trimmedURL should be 2 or 3") + } + return url[len(url)-1], nil } // Note: these can't be consts because the address is needed when using with the GitHub library. diff --git a/pkg/sources/github/github_test.go b/pkg/sources/github/github_test.go index 73890bb0467b..4f5b51294ea7 100644 --- a/pkg/sources/github/github_test.go +++ b/pkg/sources/github/github_test.go @@ -743,3 +743,22 @@ func TestProcessRepoComments(t *testing.T) { }) } } + +func TestGetGistID(t *testing.T) { + tests := []struct { + trimmedURL []string + expected string + err bool + }{ + {[]string{"https://gist.github.com", "12345"}, "12345", false}, + {[]string{"https://gist.github.com", "owner", "12345"}, "12345", false}, + {[]string{"https://gist.github.com"}, "", true}, + {[]string{"https://gist.github.com", "owner", "12345", "extra"}, "", true}, + } + + for _, tt := range tests { + got, err := extractGistID(tt.trimmedURL) + assert.Equal(t, tt.err, err != nil) + assert.Equal(t, tt.expected, got) + } +} From ed062178622c74167551eb8f2322c56a78d53a1d Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Mon, 21 Aug 2023 10:05:45 -0400 Subject: [PATCH 2/8] Add tri-state verification to sqlserver detector (#1624) This is a different detector than the general JDBC detector. --- .../jdbc/sqlserver_integration_test.go | 2 +- pkg/detectors/sqlserver/sqlserver.go | 11 +- .../sqlserver/sqlserver_integration_test.go | 209 ++++++++++++++++++ 3 files changed, 219 insertions(+), 3 deletions(-) create mode 100644 pkg/detectors/sqlserver/sqlserver_integration_test.go diff --git a/pkg/detectors/jdbc/sqlserver_integration_test.go b/pkg/detectors/jdbc/sqlserver_integration_test.go index 0258b5adef25..a43d4df09b71 100644 --- a/pkg/detectors/jdbc/sqlserver_integration_test.go +++ b/pkg/detectors/jdbc/sqlserver_integration_test.go @@ -88,7 +88,7 @@ func startSqlServer() error { return nil case <-time.After(30 * time.Second): stopSqlServer() - return errors.New("timeout waiting for mysql database to be ready") + return errors.New("timeout waiting for sql server database to be ready") } } diff --git a/pkg/detectors/sqlserver/sqlserver.go b/pkg/detectors/sqlserver/sqlserver.go index ff5a3ab8a7fb..71798cc74069 100644 --- a/pkg/detectors/sqlserver/sqlserver.go +++ b/pkg/detectors/sqlserver/sqlserver.go @@ -5,6 +5,7 @@ import ( "database/sql" "regexp" + mssql "github.com/denisenkom/go-mssqldb" "github.com/denisenkom/go-mssqldb/msdsn" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" @@ -49,9 +50,15 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result if verify { verified, err := ping(paramsUnsafe) - if err != nil { + + detected.Verified = verified + + if mssqlErr, isMssqlErr := err.(mssql.Error); isMssqlErr && mssqlErr.Number == 18456 { + // Login failed + // Number taken from https://learn.microsoft.com/en-us/sql/relational-databases/errors-events/database-engine-events-and-errors?view=sql-server-ver16 + // Nothing to do; determinate failure to verify } else { - detected.Verified = verified + detected.VerificationError = err } } diff --git a/pkg/detectors/sqlserver/sqlserver_integration_test.go b/pkg/detectors/sqlserver/sqlserver_integration_test.go new file mode 100644 index 000000000000..67fd75de2323 --- /dev/null +++ b/pkg/detectors/sqlserver/sqlserver_integration_test.go @@ -0,0 +1,209 @@ +//go:build detectors && integration +// +build detectors,integration + +package sqlserver + +import ( + "bytes" + "context" + "errors" + "os/exec" + "strings" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestSQLServerIntegration_FromChunk(t *testing.T) { + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + wantVerificationErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("Server=localhost;Initial Catalog=master;User ID=sa;Password=P@ssw0rd!;Persist Security Info=true;MultipleActiveResultSets=true;"), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_SQLServer, + Raw: []byte("P@ssw0rd!"), + RawV2: []byte("sqlserver://sa:P%40ssw0rd%21@localhost?database=master&disableRetry=false"), + Redacted: "sqlserver://sa:********@localhost?database=master&disableRetry=false", + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("Server=localhost;User ID=sa;Password=123"), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_SQLServer, + Raw: []byte("123"), + RawV2: []byte("sqlserver://sa:123@localhost?disableRetry=false"), + Redacted: "sqlserver://sa:********@localhost?disableRetry=false", + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found, in XML, missing password param (pwd is not valid)", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(``), + verify: true, + }, + want: nil, + wantErr: false, + }, + { + name: "found, verified, in XML", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(``), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_SQLServer, + Redacted: "sqlserver://sa:********@localhost?database=master&disableRetry=false", + Raw: []byte("P@ssw0rd!"), + RawV2: []byte("sqlserver://sa:P%40ssw0rd%21@localhost?database=master&disableRetry=false"), + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unreachable host", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("Server=unreachablehost;Initial Catalog=master;User ID=sa;Password=P@ssw0rd!;Persist Security Info=true;MultipleActiveResultSets=true;"), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_SQLServer, + Raw: []byte("P@ssw0rd!"), + RawV2: []byte("sqlserver://sa:P%40ssw0rd%21@unreachablehost?database=master&disableRetry=false"), + Redacted: "sqlserver://sa:********@unreachablehost?database=master&disableRetry=false", + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: true, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + + if err := startSqlServer(); err != nil { + t.Fatalf("could not start sql server for integration testing: %v", err) + } + defer stopSqlServer() + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("SQLServer.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + if (got[i].VerificationError != nil) != tt.wantVerificationErr { + t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError) + } + } + ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "VerificationError") + if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { + t.Errorf("SQLServer.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +var sqlServerDockerHash string + +func dockerLogLine(hash string, needle string) chan struct{} { + ch := make(chan struct{}, 1) + go func() { + for { + out, err := exec.Command("docker", "logs", hash).CombinedOutput() + if err != nil { + panic(err) + } + if strings.Contains(string(out), needle) { + ch <- struct{}{} + return + } + time.Sleep(1 * time.Second) + } + }() + return ch +} + +func startSqlServer() error { + cmd := exec.Command( + "docker", "run", "--rm", "-p", "1433:1433", + "-e", "ACCEPT_EULA=1", + "-e", "MSSQL_SA_PASSWORD=P@ssw0rd!", + "-d", "mcr.microsoft.com/azure-sql-edge", + ) + out, err := cmd.Output() + if err != nil { + return err + } + sqlServerDockerHash = string(bytes.TrimSpace(out)) + select { + case <-dockerLogLine(sqlServerDockerHash, "EdgeTelemetry starting up"): + return nil + case <-time.After(30 * time.Second): + stopSqlServer() + return errors.New("timeout waiting for sql server database to be ready") + } +} + +func stopSqlServer() { + exec.Command("docker", "kill", sqlServerDockerHash).Run() +} From dbb2c2e319505692f43421b8bf0353d4d3cd54e4 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Mon, 21 Aug 2023 12:36:36 -0400 Subject: [PATCH 3/8] wait before finishing s3 test (#1647) The S3 source test verifies that chunking has completed, but it didn't actually wait for completion first, leading to non-deterministic test failures. --- pkg/sources/s3/s3_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/sources/s3/s3_test.go b/pkg/sources/s3/s3_test.go index 5d8ce03e2694..62b6121da669 100644 --- a/pkg/sources/s3/s3_test.go +++ b/pkg/sources/s3/s3_test.go @@ -77,7 +77,10 @@ func TestSource_Chunks(t *testing.T) { return } chunksCh := make(chan *sources.Chunk) + var wg sync.WaitGroup + wg.Add(1) go func() { + defer wg.Done() err = s.Chunks(ctx, chunksCh) if (err != nil) != tt.wantErr { t.Errorf("Source.Chunks() error = %v, wantErr %v", err, tt.wantErr) @@ -90,6 +93,7 @@ func TestSource_Chunks(t *testing.T) { if diff := pretty.Compare(gotChunk.Data, wantData); diff != "" { t.Errorf("%s: Source.Chunks() diff: (-got +want)\n%s", tt.name, diff) } + wg.Wait() assert.Equal(t, "", s.GetProgress().EncodedResumeInfo) assert.Equal(t, int64(100), s.GetProgress().PercentComplete) }) From 9a13c74a358defadc9871b2765de518a6f266774 Mon Sep 17 00:00:00 2001 From: Zubair Khan Date: Tue, 22 Aug 2023 10:22:39 -0400 Subject: [PATCH 4/8] add thog CLI support for GitHub config validate (#1626) * add exportable validate function for github * update validator * use the context * gate to prevent panic * wrap error with context * wrap error with context for basic auth and unauth --- pkg/sources/github/github.go | 59 ++++++++++++++++++++++++++++++++++++ pkg/sources/sources.go | 2 +- pkg/sources/syslog/syslog.go | 2 +- 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 26e615bb7104..5bab8f08a63b 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -273,6 +273,65 @@ func (s *Source) Init(aCtx context.Context, name string, jobID, sourceID int64, return nil } +// Validate is used by enterprise CLI to validate the Github config file. +func (s *Source) Validate(ctx context.Context) []error { + var ( + errs []error + ghClient *github.Client + err error + ) + apiEndpoint := s.conn.Endpoint + + switch cred := s.conn.GetCredential().(type) { + case *sourcespb.GitHub_BasicAuth: + s.httpClient.Transport = &github.BasicAuthTransport{ + Username: cred.BasicAuth.Username, + Password: cred.BasicAuth.Password, + } + ghClient, err = createGitHubClient(s.httpClient, apiEndpoint) + if err != nil { + errs = append(errs, fmt.Errorf("error creating GitHub client: %+v", err)) + } + case *sourcespb.GitHub_Unauthenticated: + ghClient, err = createGitHubClient(s.httpClient, apiEndpoint) + if err != nil { + errs = append(errs, fmt.Errorf("error creating GitHub client: %+v", err)) + } + case *sourcespb.GitHub_Token: + s.githubToken = cred.Token + + ts := oauth2.StaticTokenSource( + &oauth2.Token{AccessToken: s.githubToken}, + ) + s.httpClient.Transport = &oauth2.Transport{ + Base: s.httpClient.Transport, + Source: oauth2.ReuseTokenSource(nil, ts), + } + + ghClient, err = createGitHubClient(s.httpClient, apiEndpoint) + if err != nil { + errs = append(errs, fmt.Errorf("error creating GitHub client: %+v", err)) + } + default: + errs = append(errs, errors.Errorf("Invalid configuration given for source. Name: %s, Type: %s", s.name, s.Type())) + } + + // Run a simple query to check if the client is actually valid + if ghClient != nil { + err = checkGitHubConnection(ctx, ghClient) + if err != nil { + errs = append(errs, err) + } + } + + return errs +} + +func checkGitHubConnection(ctx context.Context, client *github.Client) error { + _, _, err := client.Users.Get(ctx, "") + return err +} + func (s *Source) visibilityOf(ctx context.Context, repoURL string) (visibility source_metadatapb.Visibility) { s.mu.Lock() visibility, ok := s.publicMap[repoURL] diff --git a/pkg/sources/sources.go b/pkg/sources/sources.go index 17818d260451..474de24347aa 100644 --- a/pkg/sources/sources.go +++ b/pkg/sources/sources.go @@ -248,7 +248,7 @@ type Progress struct { // Validator is an interface for validating a source. Sources can optionally implement this interface to validate // their configuration. type Validator interface { - Validate() []error + Validate(ctx context.Context) []error } // SetProgressComplete sets job progress information for a running job based on the highest level objects in the source. diff --git a/pkg/sources/syslog/syslog.go b/pkg/sources/syslog/syslog.go index 5869f154ee36..b2ba64fa13a5 100644 --- a/pkg/sources/syslog/syslog.go +++ b/pkg/sources/syslog/syslog.go @@ -60,7 +60,7 @@ func NewSyslog(sourceType sourcespb.SourceType, jobID, sourceID int64, sourceNam } // Validate validates the configuration of the source. -func (s *Source) Validate() []error { +func (s *Source) Validate(ctx context.Context) []error { var errors []error if s.conn.TlsCert != nilString || s.conn.TlsKey != nilString { From 5cfbde783f23cce4c51a92ea0915846b9c532ecc Mon Sep 17 00:00:00 2001 From: Miccah Date: Tue, 22 Aug 2023 07:55:56 -0700 Subject: [PATCH 5/8] Fix reversed ordering of arguments (#1648) The source manager initialization function was defined as `sourceID` followed by `jobID`, while the source initialization function is the reverse. This is confusing and easy to mix up since the parameters are the same type. This commit adds a test to make sure the source manager initializes in the correct order, but it doesn't prevent the library user to make the same mistake. We may want to consider using different types. --- pkg/sources/source_manager.go | 4 +-- pkg/sources/source_manager_test.go | 39 ++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/pkg/sources/source_manager.go b/pkg/sources/source_manager.go index 923f04648c4b..c1c9c3a34144 100644 --- a/pkg/sources/source_manager.go +++ b/pkg/sources/source_manager.go @@ -19,7 +19,7 @@ type handle int64 // SourceInitFunc is a function that takes a source and job ID and returns an // initialized Source. -type SourceInitFunc func(ctx context.Context, sourceID int64, jobID int64) (Source, error) +type SourceInitFunc func(ctx context.Context, jobID, sourceID int64) (Source, error) // sourceInfo is an aggregate struct to store source information provided on // initialization. @@ -228,7 +228,7 @@ func (s *SourceManager) run(ctx context.Context, handle handle, jobID int64, rep report.ReportError(Fatal{err}) return Fatal{err} } - source, err := sourceInfo.initFunc(ctx, int64(handle), jobID) + source, err := sourceInfo.initFunc(ctx, jobID, int64(handle)) if err != nil { report.ReportError(Fatal{err}) return Fatal{err} diff --git a/pkg/sources/source_manager_test.go b/pkg/sources/source_manager_test.go index 266263343029..4d66dbc7c737 100644 --- a/pkg/sources/source_manager_test.go +++ b/pkg/sources/source_manager_test.go @@ -245,3 +245,42 @@ func TestSourceManagerContextCancelled(t *testing.T) { report := ref.Snapshot() assert.Error(t, report.FatalError()) } + +type DummyAPI struct { + registerSource func(context.Context, string, sourcespb.SourceType) (int64, error) + getJobID func(context.Context, int64) (int64, error) +} + +func (api DummyAPI) RegisterSource(ctx context.Context, name string, kind sourcespb.SourceType) (int64, error) { + return api.registerSource(ctx, name, kind) +} + +func (api DummyAPI) GetJobID(ctx context.Context, id int64) (int64, error) { + return api.getJobID(ctx, id) +} + +func TestSourceManagerJobAndSourceIDs(t *testing.T) { + mgr := NewManager(WithAPI(DummyAPI{ + registerSource: func(context.Context, string, sourcespb.SourceType) (int64, error) { + return 1337, nil + }, + getJobID: func(context.Context, int64) (int64, error) { + return 9001, nil + }, + })) + var ( + initializedJobID int64 + initializedSourceID int64 + ) + handle, err := mgr.Enroll(context.Background(), "dummy", 1337, + func(ctx context.Context, jobID, sourceID int64) (Source, error) { + initializedJobID = jobID + initializedSourceID = sourceID + return nil, fmt.Errorf("ignore") + }) + assert.NoError(t, err) + + _, _ = mgr.Run(context.Background(), handle) + assert.Equal(t, int64(1337), initializedSourceID) + assert.Equal(t, int64(9001), initializedJobID) +} From 059ea23a7205ab283e786b0d4543873d38ffab35 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Tue, 22 Aug 2023 12:43:38 -0400 Subject: [PATCH 6/8] update s3 test bucket (#1649) We're switching our S3 source test account over to a different one, which means we have to change the bucket name. --- pkg/sources/s3/s3_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/sources/s3/s3_test.go b/pkg/sources/s3/s3_test.go index 62b6121da669..f8f039ecdb12 100644 --- a/pkg/sources/s3/s3_test.go +++ b/pkg/sources/s3/s3_test.go @@ -52,7 +52,7 @@ func TestSource_Chunks(t *testing.T) { Secret: s3secret, }, }, - Buckets: []string{"thog-tmp-test"}, + Buckets: []string{"truffletestbucket-s3-tests"}, }, }, wantErr: false, From fd00d2b30bd1f6a7b260e1e22861c67a183a120c Mon Sep 17 00:00:00 2001 From: Zubair Khan Date: Tue, 22 Aug 2023 15:01:59 -0400 Subject: [PATCH 7/8] add rate limit and consumption metrics for GitHub (#1651) * add rate limit and consumption metrics * incrment after each repo scanned * update repo scanned label name --- pkg/sources/github/github.go | 11 +++++++++ pkg/sources/github/metrics.go | 42 +++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 pkg/sources/github/metrics.go diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 5bab8f08a63b..46ddaf58341c 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -416,6 +416,11 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk) err apiEndpoint = "https://api.github.com" } + // Reset consumption and rate limit metrics on each run. + githubNumRateLimitEncountered.WithLabelValues(s.name).Set(0) + githubSecondsSpentRateLimited.WithLabelValues(s.name).Set(0) + githubReposScanned.WithLabelValues(s.name).Set(0) + installationClient, err := s.enumerate(ctx, apiEndpoint) if err != nil { return err @@ -451,6 +456,7 @@ func (s *Source) enumerate(ctx context.Context, apiEndpoint string) (*github.Cli } s.repos = s.filteredRepoCache.Values() + githubReposEnumerated.WithLabelValues(s.name).Set(float64(len(s.repos))) s.log.Info("Completed enumeration", "num_repos", len(s.repos), "num_orgs", s.orgsCache.Count(), "num_members", len(s.memberCache)) // We must sort the repos so we can resume later if necessary. @@ -781,6 +787,8 @@ func (s *Source) scan(ctx context.Context, installationClient *github.Client, ch return nil } + githubReposScanned.WithLabelValues(s.name).Inc() + if err = s.scanComments(ctx, repoURL, chunksChan); err != nil { scanErrs.Add(fmt.Errorf("error scanning comments in repo %s: %w", repoURL, err)) return nil @@ -810,6 +818,8 @@ func (s *Source) handleRateLimit(errIn error, res *github.Response) bool { return false } + githubNumRateLimitEncountered.WithLabelValues(s.name).Inc() + if res != nil { knownWait := true remaining, err := strconv.Atoi(res.Header.Get("x-ratelimit-remaining")) @@ -827,6 +837,7 @@ func (s *Source) handleRateLimit(errIn error, res *github.Response) bool { duration := time.Duration(waitTime+1) * time.Second s.log.V(2).Info("rate limited", "resumeTime", time.Now().Add(duration).String()) time.Sleep(duration) + githubSecondsSpentRateLimited.WithLabelValues(s.name).Add(duration.Seconds()) return true } } diff --git a/pkg/sources/github/metrics.go b/pkg/sources/github/metrics.go new file mode 100644 index 000000000000..e6a1711fb98d --- /dev/null +++ b/pkg/sources/github/metrics.go @@ -0,0 +1,42 @@ +package github + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" +) + +var ( + githubNumRateLimitEncountered = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: common.MetricsNamespace, + Subsystem: common.MetricsSubsystem, + Name: "github_num_rate_limit_encountered", + Help: "Total number of times Github Rate Limit was encountered", + }, + []string{"source_name"}) + + githubSecondsSpentRateLimited = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: common.MetricsNamespace, + Subsystem: common.MetricsSubsystem, + Name: "github_seconds_spent_rate_limited", + Help: "Total number of seconds spent idle due to GitHub rate limits.", + }, + []string{"source_name"}) + + githubReposEnumerated = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: common.MetricsNamespace, + Subsystem: common.MetricsSubsystem, + Name: "github_repos_enumerated", + Help: "Total number of GitHub repositories enumerated.", + }, + []string{"source_name"}) + + githubReposScanned = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: common.MetricsNamespace, + Subsystem: common.MetricsSubsystem, + Name: "github_repos_scanned", + Help: "Total number of GitHub repositories scanned.", + }, + []string{"source_name"}) +) From 9ae72308bed926be04748d9d0632a8770cf7b524 Mon Sep 17 00:00:00 2001 From: ahrav Date: Tue, 22 Aug 2023 14:00:27 -0700 Subject: [PATCH 8/8] Include the job ID in a chunk (#1652) * Include the job ID in a source's chunk. * address comments. * address comments. --- pkg/sources/source_manager.go | 7 ++++++- pkg/sources/sources.go | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pkg/sources/source_manager.go b/pkg/sources/source_manager.go index c1c9c3a34144..a890b64fde93 100644 --- a/pkg/sources/source_manager.go +++ b/pkg/sources/source_manager.go @@ -6,10 +6,11 @@ import ( "sync/atomic" "time" + "golang.org/x/sync/errgroup" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb" - "golang.org/x/sync/errgroup" ) // handle uniquely identifies a Source given to the manager to manage. If the @@ -255,6 +256,7 @@ func (s *SourceManager) runWithoutUnits(ctx context.Context, handle handle, sour go func() { defer wg.Done() for chunk := range ch { + chunk.JobID = source.JobID() report.ReportChunk(nil, chunk) s.outputChunks <- chunk } @@ -334,6 +336,9 @@ func (s *SourceManager) runWithUnits(ctx context.Context, handle handle, source defer wg.Done() defer func() { report.EndUnitChunking(unit, time.Now()) }() for chunk := range chunkReporter.chunkCh { + if src, ok := source.(Source); ok { + chunk.JobID = src.JobID() + } s.outputChunks <- chunk } }() diff --git a/pkg/sources/sources.go b/pkg/sources/sources.go index 474de24347aa..07da7ccaf521 100644 --- a/pkg/sources/sources.go +++ b/pkg/sources/sources.go @@ -17,6 +17,8 @@ type Chunk struct { SourceName string // SourceID is the ID of the source that the Chunk originated from. SourceID int64 + // JobID is the ID of the job that the Chunk originated from. + JobID int64 // SourceType is the type of Source that produced the chunk. SourceType sourcespb.SourceType // SourceMetadata holds the context of where the Chunk was found.