From 3f5e985f3bbd883e151e45d2ac2c34d24ea4b2b5 Mon Sep 17 00:00:00 2001 From: Richard Gomez Date: Wed, 26 Jul 2023 09:41:31 -0400 Subject: [PATCH] fix(github): fix runtime error from gist comments --- pkg/sources/github/github.go | 176 +++++++++++++++++++++++++---------- 1 file changed, 125 insertions(+), 51 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 5ccee2df2163..5762eda7bcbc 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -697,15 +697,16 @@ func (s *Source) scan(ctx context.Context, installationClient *github.Client, ch logger.V(2).Info(fmt.Sprintf("scanned %d/%d repos", scanned, len(s.repos)), "repo_size", repoSize, "duration_seconds", time.Since(start).Seconds()) }(now) - if err = s.scanComments(ctx, repoURL, chunksChan); err != nil { - scanErrs.Add(fmt.Errorf("error scanning comments in repo %s: %w", repoURL, err)) + if err = s.git.ScanRepo(ctx, repo, path, s.scanOptions, chunksChan); err != nil { + scanErrs.Add(fmt.Errorf("error scanning repo %s: %w", repoURL, err)) return nil } - if err = s.git.ScanRepo(ctx, repo, path, s.scanOptions, chunksChan); err != nil { - scanErrs.Add(fmt.Errorf("error scanning repo %s: %w", repoURL, err)) + if err = s.scanComments(ctx, repoURL, chunksChan); err != nil { + scanErrs.Add(fmt.Errorf("error scanning comments in repo %s: %w", repoURL, err)) return nil } + atomic.AddUint64(&scanned, 1) return nil @@ -966,79 +967,118 @@ func (s *Source) scanComments(ctx context.Context, repoPath string, chunksChan c } trimmedURL := removeURLAndSplit(repoURL.String()) - owner := trimmedURL[1] - repo := trimmedURL[2] - - var ( - sortType = "created" - directionType = "desc" - allComments = 0 - ) - - if s.includeIssueComments { - - issueOpts := &github.IssueListCommentsOptions{ - Sort: &sortType, - Direction: &directionType, - ListOptions: github.ListOptions{ - PerPage: defaultPagination, - Page: 1, - }, + if repoURL.Host == "gist.github.com" { + // GitHub Gist URL. + var gistId string + if len(trimmedURL) == 2 { + // https://gist.github.com/ + gistId = trimmedURL[1] + } else if len(trimmedURL) == 3 { + // https://gist.github.com// + gistId = trimmedURL[2] + } else { + return fmt.Errorf("failed to parse Gist URL: '%s'", repoURL.String()) } + options := &github.ListOptions{ + PerPage: defaultPagination, + Page: 1, + } for { - issueComments, resp, err := s.apiClient.Issues.ListComments(ctx, owner, repo, allComments, issueOpts) + comments, resp, err := s.apiClient.Gists.ListComments(ctx, gistId, options) if s.handleRateLimit(err, resp) { break } - if err != nil { return err } - err = s.chunkIssueComments(ctx, repo, issueComments, chunksChan, repoPath) + err = s.chunkGistComments(ctx, repoURL.String(), comments, chunksChan) if err != nil { return err } - issueOpts.ListOptions.Page++ - - if len(issueComments) < defaultPagination { + options.Page++ + if len(comments) < options.PerPage { break } } + } else { + // Normal repository URL (https://github.com//). + owner := trimmedURL[1] + repo := trimmedURL[2] + + var ( + sortType = "created" + directionType = "desc" + allComments = 0 + ) - } - - if s.includePRComments { - prOpts := &github.PullRequestListCommentsOptions{ - Sort: sortType, - Direction: directionType, - ListOptions: github.ListOptions{ - PerPage: defaultPagination, - Page: 1, - }, - } + if s.includeIssueComments { - for { - prComments, resp, err := s.apiClient.PullRequests.ListComments(ctx, owner, repo, allComments, prOpts) - if s.handleRateLimit(err, resp) { - break + issueOpts := &github.IssueListCommentsOptions{ + Sort: &sortType, + Direction: &directionType, + ListOptions: github.ListOptions{ + PerPage: defaultPagination, + Page: 1, + }, } - if err != nil { - return err + for { + issueComments, resp, err := s.apiClient.Issues.ListComments(ctx, owner, repo, allComments, issueOpts) + if s.handleRateLimit(err, resp) { + break + } + + if err != nil { + return err + } + + err = s.chunkIssueComments(ctx, repo, issueComments, chunksChan, repoPath) + if err != nil { + return err + } + + issueOpts.ListOptions.Page++ + + if len(issueComments) < defaultPagination { + break + } } - err = s.chunkPullRequestComments(ctx, repo, prComments, chunksChan, repoPath) - if err != nil { - return err + } + + if s.includePRComments { + prOpts := &github.PullRequestListCommentsOptions{ + Sort: sortType, + Direction: directionType, + ListOptions: github.ListOptions{ + PerPage: defaultPagination, + Page: 1, + }, } - prOpts.ListOptions.Page++ + for { + prComments, resp, err := s.apiClient.PullRequests.ListComments(ctx, owner, repo, allComments, prOpts) + if s.handleRateLimit(err, resp) { + break + } - if len(prComments) < defaultPagination { - break + if err != nil { + return err + } + + err = s.chunkPullRequestComments(ctx, repo, prComments, chunksChan, repoPath) + if err != nil { + return err + } + + prOpts.ListOptions.Page++ + + if len(prComments) < defaultPagination { + break + } } } } @@ -1109,6 +1149,40 @@ func (s *Source) chunkPullRequestComments(ctx context.Context, repo string, comm return nil } +func (s *Source) chunkGistComments(ctx context.Context, gistUrl string, comments []*github.GistComment, chunksChan chan *sources.Chunk) error { + for _, comment := range comments { + // Create chunk and send it to the channel. + chunk := &sources.Chunk{ + SourceName: s.name, + SourceID: s.SourceID(), + SourceType: s.Type(), + SourceMetadata: &source_metadatapb.MetaData{ + Data: &source_metadatapb.MetaData_Github{ + Github: &source_metadatapb.Github{ + Link: sanitizer.UTF8(comment.GetURL()), + Username: sanitizer.UTF8(comment.GetUser().GetLogin()), + Email: sanitizer.UTF8(comment.GetUser().GetEmail()), + Repository: sanitizer.UTF8(gistUrl), + Timestamp: sanitizer.UTF8(comment.GetCreatedAt().String()), + // Fetching this information requires making an additional API call. + // We may want to include this in the future. + //Visibility: s.visibilityOf(ctx, repoPath), + }, + }, + }, + Data: []byte(sanitizer.UTF8(comment.GetBody())), + Verify: s.verify, + } + + select { + case <-ctx.Done(): + return ctx.Err() + case chunksChan <- chunk: + } + } + return nil +} + func removeURLAndSplit(url string) []string { trimmedURL := strings.TrimPrefix(url, "https://") trimmedURL = strings.TrimSuffix(trimmedURL, ".git")