diff --git a/pkg/handlers/archive_test.go b/pkg/handlers/archive_test.go index 9b43582983e6..d0b0ed7ce69c 100644 --- a/pkg/handlers/archive_test.go +++ b/pkg/handlers/archive_test.go @@ -144,6 +144,28 @@ func TestExtractDebContent(t *testing.T) { assert.Equal(t, expectedLength, len(string(content))) } +func TestExtractTarContent(t *testing.T) { + file, err := os.Open("testdata/test.tgz") + assert.Nil(t, err) + defer file.Close() + + ctx := context.Background() + + chunkCh := make(chan *sources.Chunk) + go func() { + defer close(chunkCh) + ok := HandleFile(ctx, file, &sources.Chunk{}, chunkCh) + assert.True(t, ok) + }() + + wantCount := 4 + count := 0 + for range chunkCh { + count++ + } + assert.Equal(t, wantCount, count) +} + func TestExtractRPMContent(t *testing.T) { // Open the sample .rpm file from the testdata folder. file, err := os.Open("testdata/test.rpm") diff --git a/pkg/handlers/handlers.go b/pkg/handlers/handlers.go index cb18f7dd8eb9..cce0fe1bf0d0 100644 --- a/pkg/handlers/handlers.go +++ b/pkg/handlers/handlers.go @@ -26,7 +26,7 @@ type SpecializedHandler interface { } type Handler interface { - FromFile(context.Context, io.Reader) chan ([]byte) + FromFile(context.Context, io.Reader) chan []byte IsFiletype(context.Context, io.Reader) (io.Reader, bool) New() } @@ -68,12 +68,18 @@ func HandleFile(ctx context.Context, file io.Reader, chunkSkel *sources.Chunk, c aCtx.Logger().Error(err, "error resetting re-reader") return false } - reReader.Stop() - var isType bool - if file, isType = h.IsFiletype(aCtx, reReader); isType { - return handleChunks(aCtx, h.FromFile(ctx, file), chunkSkel, chunksChan) + if _, isType := h.IsFiletype(aCtx, reReader); !isType { + continue + } + + if err := reReader.Reset(); err != nil { + aCtx.Logger().Error(err, "error resetting re-reader") + return false } + reReader.Stop() + return handleChunks(aCtx, h.FromFile(ctx, reReader), chunkSkel, chunksChan) } + return false } diff --git a/pkg/handlers/testdata/test.tgz b/pkg/handlers/testdata/test.tgz new file mode 100644 index 000000000000..61717fb821ab Binary files /dev/null and b/pkg/handlers/testdata/test.tgz differ diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index dea631e5cb42..bf7157e6e0de 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -977,6 +977,8 @@ func (s *Source) setProgressCompleteWithRepo(index int, offset int, repoURL stri s.SetProgressComplete(index+offset, len(s.repos)+offset, fmt.Sprintf("Repo: %s", repoURL), encodedResumeInfo) } +const initialPage = 1 // page to start listing from + func (s *Source) scanComments(ctx context.Context, repoPath string, chunksChan chan *sources.Chunk) error { // Support ssh and https URLs repoURL, err := git.GitURLParse(repoPath) @@ -1001,7 +1003,7 @@ func (s *Source) scanComments(ctx context.Context, repoPath string, chunksChan c options := &github.ListOptions{ PerPage: defaultPagination, - Page: 1, + Page: initialPage, } for { comments, resp, err := s.apiClient.Gists.ListComments(ctx, gistId, options) @@ -1022,94 +1024,124 @@ func (s *Source) scanComments(ctx context.Context, repoPath string, chunksChan c break } } - } else { - // Normal repository URL (https://github.com//). - owner := trimmedURL[1] - repo := trimmedURL[2] - - var ( - sortType = "created" - directionType = "desc" - allComments = 0 - ) + } + return s.processRepoComments(ctx, repoPath, trimmedURL, repoURL, chunksChan) +} - if s.includeIssueComments { +// Note: these can't be consts because the address is needed when using with the GitHub library. +var ( + // sortType defines the criteria for sorting comments. + // By default comments are sorted by their creation date. + sortType = "created" + // directionType defines the direction of sorting. + // "desc" means comments will be sorted in descending order, showing the latest comments first. + directionType = "desc" + // allComments is a placeholder for specifying the comment ID to start listing from. + // A value of 0 means that all comments will be listed. + allComments = 0 +) - s.log.Info("scanning github issue comments", "repository", repoPath) +type repoInfo struct { + owner string + repo string + repoPath string +} - issueOpts := &github.IssueListCommentsOptions{ - Sort: &sortType, - Direction: &directionType, - ListOptions: github.ListOptions{ - PerPage: defaultPagination, - Page: 1, - }, - } +func (s *Source) processRepoComments(ctx context.Context, repoPath string, trimmedURL []string, repoURL *url.URL, chunksChan chan *sources.Chunk) error { + // Normal repository URL (https://github.com//). + if len(trimmedURL) < 3 { + return fmt.Errorf("url missing owner and/or repo: '%s'", repoURL.String()) + } + owner := trimmedURL[1] + repo := trimmedURL[2] - for { - issueComments, resp, err := s.apiClient.Issues.ListComments(ctx, owner, repo, allComments, issueOpts) - if s.handleRateLimit(err, resp) { - break - } + repoInfo := repoInfo{owner: owner, repo: repo, repoPath: repoPath} - if err != nil { - return err - } + if s.includeIssueComments { + if err := s.processIssueComments(ctx, repoInfo, chunksChan); err != nil { + return err + } - err = s.chunkIssueComments(ctx, repo, issueComments, chunksChan, repoPath) - if err != nil { - return err - } + } - issueOpts.ListOptions.Page++ + if s.includePRComments { + return s.processPRComments(ctx, repoInfo, chunksChan) + } + return nil - if len(issueComments) < defaultPagination { - break - } - } +} + +func (s *Source) processIssueComments(ctx context.Context, info repoInfo, chunksChan chan *sources.Chunk) error { + s.log.Info("scanning github issue comments", "repository", info.repoPath) + + issueOpts := &github.IssueListCommentsOptions{ + Sort: &sortType, + Direction: &directionType, + ListOptions: github.ListOptions{ + PerPage: defaultPagination, + Page: initialPage, + }, + } + + for { + issueComments, resp, err := s.apiClient.Issues.ListComments(ctx, info.owner, info.repo, allComments, issueOpts) + if s.handleRateLimit(err, resp) { + break + } + + if err != nil { + return err + } + if err = s.chunkIssueComments(ctx, info.repo, info.repoPath, issueComments, chunksChan); err != nil { + return err } - if s.includePRComments { - s.log.Info("scanning github pull request comments", "repository", repoPath) + issueOpts.ListOptions.Page++ - prOpts := &github.PullRequestListCommentsOptions{ - Sort: sortType, - Direction: directionType, - ListOptions: github.ListOptions{ - PerPage: defaultPagination, - Page: 1, - }, - } + if len(issueComments) < defaultPagination { + break + } + } + return nil +} - for { - prComments, resp, err := s.apiClient.PullRequests.ListComments(ctx, owner, repo, allComments, prOpts) - if s.handleRateLimit(err, resp) { - break - } +func (s *Source) processPRComments(ctx context.Context, info repoInfo, chunksChan chan *sources.Chunk) error { + s.log.Info("scanning github pull request comments", "repository", info.repoPath) - if err != nil { - return err - } + prOpts := &github.PullRequestListCommentsOptions{ + Sort: sortType, + Direction: directionType, + ListOptions: github.ListOptions{ + PerPage: defaultPagination, + Page: initialPage, + }, + } - err = s.chunkPullRequestComments(ctx, repo, prComments, chunksChan, repoPath) - if err != nil { - return err - } + for { + prComments, resp, err := s.apiClient.PullRequests.ListComments(ctx, info.owner, info.repo, allComments, prOpts) + if s.handleRateLimit(err, resp) { + break + } - prOpts.ListOptions.Page++ + if err != nil { + return err + } - if len(prComments) < defaultPagination { - break - } - } + if err = s.chunkPullRequestComments(ctx, info.repo, prComments, chunksChan); err != nil { + return err } - } + prOpts.ListOptions.Page++ + + if len(prComments) < defaultPagination { + break + } + } return nil } -func (s *Source) chunkIssueComments(ctx context.Context, repo string, comments []*github.IssueComment, chunksChan chan *sources.Chunk, repoPath string) error { +func (s *Source) chunkIssueComments(ctx context.Context, repo, repoPath string, comments []*github.IssueComment, chunksChan chan *sources.Chunk) error { for _, comment := range comments { // Create chunk and send it to the channel. chunk := &sources.Chunk{ @@ -1141,7 +1173,7 @@ func (s *Source) chunkIssueComments(ctx context.Context, repo string, comments [ return nil } -func (s *Source) chunkPullRequestComments(ctx context.Context, repo string, comments []*github.PullRequestComment, chunksChan chan *sources.Chunk, repoPath string) error { +func (s *Source) chunkPullRequestComments(ctx context.Context, repo string, comments []*github.PullRequestComment, chunksChan chan *sources.Chunk) error { for _, comment := range comments { // Create chunk and send it to the channel. chunk := &sources.Chunk{ @@ -1189,7 +1221,7 @@ func (s *Source) chunkGistComments(ctx context.Context, gistUrl string, comments Timestamp: sanitizer.UTF8(comment.GetCreatedAt().String()), // Fetching this information requires making an additional API call. // We may want to include this in the future. - //Visibility: s.visibilityOf(ctx, repoPath), + // Visibility: s.visibilityOf(ctx, repoPath), }, }, }, diff --git a/pkg/sources/github/github_integration_test.go b/pkg/sources/github/github_integration_test.go index 552e5cd9fc53..e30f47d8a2f8 100644 --- a/pkg/sources/github/github_integration_test.go +++ b/pkg/sources/github/github_integration_test.go @@ -81,7 +81,7 @@ func TestSource_Token(t *testing.T) { } func TestSource_ScanComments(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*3) + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) defer cancel() secret, err := common.GetTestSecret(ctx) @@ -190,7 +190,7 @@ func TestSource_ScanComments(t *testing.T) { return } - chunksCh := make(chan *sources.Chunk, 5) + chunksCh := make(chan *sources.Chunk, 1) go func() { // Close the channel defer close(chunksCh) diff --git a/pkg/sources/github/github_test.go b/pkg/sources/github/github_test.go index ee106638b80a..73890bb0467b 100644 --- a/pkg/sources/github/github_test.go +++ b/pkg/sources/github/github_test.go @@ -8,8 +8,10 @@ import ( "encoding/pem" "fmt" "net/http" + "net/url" "reflect" "strconv" + "strings" "testing" "time" @@ -25,6 +27,7 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/credentialspb" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb" + "github.com/trufflesecurity/trufflehog/v3/pkg/sources" ) func createTestSource(src *sourcespb.GitHub) (*Source, *anypb.Any) { @@ -709,3 +712,34 @@ func Test_scan_SetProgressComplete(t *testing.T) { }) } } + +func TestProcessRepoComments(t *testing.T) { + tests := []struct { + name string + trimmedURL []string + wantErr bool + }{ + { + name: "URL with missing owner and/or repo", + trimmedURL: []string{"https://github.com/"}, + wantErr: true, + }, + { + name: "URL with complete owner and repo", + trimmedURL: []string{"https://github.com/", "owner", "repo"}, + wantErr: false, + }, + // TODO: Add more test cases to cover other scenarios. + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := &Source{} + repoURL, _ := url.Parse(strings.Join(tt.trimmedURL, "/")) + chunksChan := make(chan *sources.Chunk) + + err := s.processRepoComments(context.Background(), "repoPath", tt.trimmedURL, repoURL, chunksChan) + assert.Equal(t, tt.wantErr, err != nil) + }) + } +}