diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 7a4715d87568..18fff8d2d433 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -977,6 +977,8 @@ func (s *Source) setProgressCompleteWithRepo(index int, offset int, repoURL stri s.SetProgressComplete(index+offset, len(s.repos)+offset, fmt.Sprintf("Repo: %s", repoURL), encodedResumeInfo) } +const initialPage = 1 // page to start listing from + func (s *Source) scanComments(ctx context.Context, repoPath string, chunksChan chan *sources.Chunk) error { // Support ssh and https URLs repoURL, err := git.GitURLParse(repoPath) @@ -1001,7 +1003,7 @@ func (s *Source) scanComments(ctx context.Context, repoPath string, chunksChan c options := &github.ListOptions{ PerPage: defaultPagination, - Page: 1, + Page: initialPage, } for { comments, resp, err := s.apiClient.Gists.ListComments(ctx, gistId, options) @@ -1026,6 +1028,25 @@ func (s *Source) scanComments(ctx context.Context, repoPath string, chunksChan c return s.processRepoComments(ctx, repoPath, trimmedURL, repoURL, chunksChan) } +// Note: these can't be consts because the address is needed when using with the GitHub library. +var ( + // sortType defines the criteria for sorting comments. + // By default comments are sorted by their creation date. + sortType = "created" + // directionType defines the direction of sorting. + // "desc" means comments will be sorted in descending order, showing the latest comments first. + directionType = "desc" + // allComments is a placeholder for specifying the comment ID to start listing from. + // A value of 0 means that all comments will be listed. + allComments = 0 +) + +type repoInfo struct { + owner string + repo string + repoPath string +} + func (s *Source) processRepoComments(ctx context.Context, repoPath string, trimmedURL []string, repoURL *url.URL, chunksChan chan *sources.Chunk) error { // Normal repository URL (https://github.com//). if len(trimmedURL) < 3 { @@ -1034,84 +1055,93 @@ func (s *Source) processRepoComments(ctx context.Context, repoPath string, trimm owner := trimmedURL[1] repo := trimmedURL[2] - var ( - sortType = "created" - directionType = "desc" - allComments = 0 - ) + repoInfo := repoInfo{owner: owner, repo: repo, repoPath: repoPath} if s.includeIssueComments { - s.log.Info("scanning github issue comments", "repository", repoPath) - - issueOpts := &github.IssueListCommentsOptions{ - Sort: &sortType, - Direction: &directionType, - ListOptions: github.ListOptions{ - PerPage: defaultPagination, - Page: 1, - }, + if err := s.processIssueComments(ctx, repoInfo, chunksChan); err != nil { + return err } - for { - issueComments, resp, err := s.apiClient.Issues.ListComments(ctx, owner, repo, allComments, issueOpts) - if s.handleRateLimit(err, resp) { - break - } + } - if err != nil { - return err - } + if !s.includePRComments { + return nil + } - if err = s.chunkIssueComments(ctx, repo, issueComments, chunksChan, repoPath); err != nil { - return err - } + return s.processPRComments(ctx, repoInfo, chunksChan) +} - issueOpts.ListOptions.Page++ +func (s *Source) processIssueComments(ctx context.Context, info repoInfo, chunksChan chan *sources.Chunk) error { + s.log.Info("scanning github issue comments", "repository", info.repoPath) - if len(issueComments) < defaultPagination { - break - } + issueOpts := &github.IssueListCommentsOptions{ + Sort: &sortType, + Direction: &directionType, + ListOptions: github.ListOptions{ + PerPage: defaultPagination, + Page: initialPage, + }, + } + + for { + issueComments, resp, err := s.apiClient.Issues.ListComments(ctx, info.owner, info.repo, allComments, issueOpts) + if s.handleRateLimit(err, resp) { + break } - } + if err != nil { + return err + } - if s.includePRComments { - s.log.Info("scanning github pull request comments", "repository", repoPath) + if err = s.chunkIssueComments(ctx, info.repo, info.repoPath, issueComments, chunksChan); err != nil { + return err + } - prOpts := &github.PullRequestListCommentsOptions{ - Sort: sortType, - Direction: directionType, - ListOptions: github.ListOptions{ - PerPage: defaultPagination, - Page: 1, - }, + issueOpts.ListOptions.Page++ + + if len(issueComments) < defaultPagination { + break } + } + return nil +} - for { - prComments, resp, err := s.apiClient.PullRequests.ListComments(ctx, owner, repo, allComments, prOpts) - if s.handleRateLimit(err, resp) { - break - } +func (s *Source) processPRComments(ctx context.Context, info repoInfo, chunksChan chan *sources.Chunk) error { + s.log.Info("scanning github pull request comments", "repository", info.repoPath) - if err != nil { - return err - } + prOpts := &github.PullRequestListCommentsOptions{ + Sort: sortType, + Direction: directionType, + ListOptions: github.ListOptions{ + PerPage: defaultPagination, + Page: initialPage, + }, + } - if err = s.chunkPullRequestComments(ctx, repo, prComments, chunksChan, repoPath); err != nil { - return err - } + for { + prComments, resp, err := s.apiClient.PullRequests.ListComments(ctx, info.owner, info.repo, allComments, prOpts) + if s.handleRateLimit(err, resp) { + break + } - prOpts.ListOptions.Page++ + if err != nil { + return err + } - if len(prComments) < defaultPagination { - break - } + if err = s.chunkPullRequestComments(ctx, info.repo, prComments, chunksChan); err != nil { + return err + } + + prOpts.ListOptions.Page++ + + if len(prComments) < defaultPagination { + break } } return nil } -func (s *Source) chunkIssueComments(ctx context.Context, repo string, comments []*github.IssueComment, chunksChan chan *sources.Chunk, repoPath string) error { +func (s *Source) chunkIssueComments(ctx context.Context, repo, repoPath string, comments []*github.IssueComment, chunksChan chan *sources.Chunk) error { for _, comment := range comments { // Create chunk and send it to the channel. chunk := &sources.Chunk{ @@ -1143,7 +1173,7 @@ func (s *Source) chunkIssueComments(ctx context.Context, repo string, comments [ return nil } -func (s *Source) chunkPullRequestComments(ctx context.Context, repo string, comments []*github.PullRequestComment, chunksChan chan *sources.Chunk, repoPath string) error { +func (s *Source) chunkPullRequestComments(ctx context.Context, repo string, comments []*github.PullRequestComment, chunksChan chan *sources.Chunk) error { for _, comment := range comments { // Create chunk and send it to the channel. chunk := &sources.Chunk{ diff --git a/pkg/sources/github/github_test.go b/pkg/sources/github/github_test.go index ee106638b80a..73890bb0467b 100644 --- a/pkg/sources/github/github_test.go +++ b/pkg/sources/github/github_test.go @@ -8,8 +8,10 @@ import ( "encoding/pem" "fmt" "net/http" + "net/url" "reflect" "strconv" + "strings" "testing" "time" @@ -25,6 +27,7 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/credentialspb" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb" + "github.com/trufflesecurity/trufflehog/v3/pkg/sources" ) func createTestSource(src *sourcespb.GitHub) (*Source, *anypb.Any) { @@ -709,3 +712,34 @@ func Test_scan_SetProgressComplete(t *testing.T) { }) } } + +func TestProcessRepoComments(t *testing.T) { + tests := []struct { + name string + trimmedURL []string + wantErr bool + }{ + { + name: "URL with missing owner and/or repo", + trimmedURL: []string{"https://github.com/"}, + wantErr: true, + }, + { + name: "URL with complete owner and repo", + trimmedURL: []string{"https://github.com/", "owner", "repo"}, + wantErr: false, + }, + // TODO: Add more test cases to cover other scenarios. + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := &Source{} + repoURL, _ := url.Parse(strings.Join(tt.trimmedURL, "/")) + chunksChan := make(chan *sources.Chunk) + + err := s.processRepoComments(context.Background(), "repoPath", tt.trimmedURL, repoURL, chunksChan) + assert.Equal(t, tt.wantErr, err != nil) + }) + } +}