From d334b3075eda60d33067c2942461c35f2c2a08db Mon Sep 17 00:00:00 2001 From: ahrav Date: Thu, 16 Nov 2023 13:59:53 -0800 Subject: [PATCH] move all Git setup into Init method (#2105) * add proto fields for git * add uri to proto * move all git setup into Init method * fix logic for when to use repoPath --- main.go | 36 +++++++------------------- pkg/engine/git.go | 41 ++++++----------------------- pkg/engine/git_test.go | 14 +++------- pkg/sources/git/git.go | 58 +++++++++++++++++++++++++++++------------- pkg/sources/sources.go | 16 +++++++----- 5 files changed, 69 insertions(+), 96 deletions(-) diff --git a/main.go b/main.go index 8a07708f2f0d..caad1101b971 100644 --- a/main.go +++ b/main.go @@ -30,7 +30,6 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/output" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb" "github.com/trufflesecurity/trufflehog/v3/pkg/sources" - "github.com/trufflesecurity/trufflehog/v3/pkg/sources/git" "github.com/trufflesecurity/trufflehog/v3/pkg/tui" "github.com/trufflesecurity/trufflehog/v3/pkg/updater" "github.com/trufflesecurity/trufflehog/v3/pkg/version" @@ -157,7 +156,7 @@ func init() { cli.Version("trufflehog " + version.BuildVersion) - //Support -h for help + // Support -h for help cli.HelpFlag.Short('h') if len(os.Args) <= 1 && isatty.IsTerminal(os.Stdout.Fd()) { @@ -416,34 +415,17 @@ func run(state overseer.State) { logFatal(err, "error initializing engine") } - var repoPath string - var remote bool switch cmd { case gitScan.FullCommand(): - filter, err := common.FilterFromFiles(*gitScanIncludePaths, *gitScanExcludePaths) - if err != nil { - logFatal(err, "could not create filter") - } - repoPath, remote, err = git.PrepareRepoSinceCommit(ctx, *gitScanURI, *gitScanSinceCommit) - if err != nil || repoPath == "" { - logFatal(err, "error preparing git repo for scanning") - } - if remote { - defer os.RemoveAll(repoPath) - } - excludedGlobs := []string{} - if *gitScanExcludeGlobs != "" { - excludedGlobs = strings.Split(*gitScanExcludeGlobs, ",") - } - cfg := sources.GitConfig{ - RepoPath: repoPath, - HeadRef: *gitScanBranch, - BaseRef: *gitScanSinceCommit, - MaxDepth: *gitScanMaxDepth, - Bare: *gitScanBare, - Filter: filter, - ExcludeGlobs: excludedGlobs, + URI: *gitScanURI, + IncludePathsFile: *gitScanIncludePaths, + ExcludePathsFile: *gitScanExcludePaths, + HeadRef: *gitScanBranch, + BaseRef: *gitScanSinceCommit, + MaxDepth: *gitScanMaxDepth, + Bare: *gitScanBare, + ExcludeGlobs: *gitScanExcludeGlobs, } if err = e.ScanGit(ctx, cfg); err != nil { logFatal(err, "Failed to scan Git.") diff --git a/pkg/engine/git.go b/pkg/engine/git.go index cf0c8b4d35d6..f484087a16ed 100644 --- a/pkg/engine/git.go +++ b/pkg/engine/git.go @@ -3,7 +3,6 @@ package engine import ( "runtime" - gogit "github.com/go-git/go-git/v5" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" @@ -15,35 +14,15 @@ import ( // ScanGit scans any git source. func (e *Engine) ScanGit(ctx context.Context, c sources.GitConfig) error { - logOptions := &gogit.LogOptions{} - opts := []git.ScanOption{ - git.ScanOptionFilter(c.Filter), - git.ScanOptionLogOptions(logOptions), - } - - if c.MaxDepth != 0 { - opts = append(opts, git.ScanOptionMaxDepth(int64(c.MaxDepth))) - } - if c.BaseRef != "" { - opts = append(opts, git.ScanOptionBaseHash(c.BaseRef)) - } - if c.HeadRef != "" { - opts = append(opts, git.ScanOptionHeadCommit(c.HeadRef)) - } - if c.ExcludeGlobs != nil { - opts = append(opts, git.ScanOptionExcludeGlobs(c.ExcludeGlobs)) - } - if c.Bare { - opts = append(opts, git.ScanOptionBare(c.Bare)) - } - scanOptions := git.NewScanOptions(opts...) - connection := &sourcespb.Git{ - // Using Directories here allows us to not pass any - // authentication. Also by this point, the c.RepoPath should - // still have been prepared and downloaded to a temporary - // directory if it was a URL. - Directories: []string{c.RepoPath}, + Head: c.HeadRef, + Base: c.BaseRef, + Bare: c.Bare, + Uri: c.URI, + ExcludeGlobs: c.ExcludeGlobs, + IncludePathsFile: c.IncludePathsFile, + ExcludePathsFile: c.ExcludePathsFile, + MaxDepth: int64(c.MaxDepth), } var conn anypb.Any if err := anypb.MarshalFrom(&conn, connection, proto.MarshalOptions{}); err != nil { @@ -58,10 +37,6 @@ func (e *Engine) ScanGit(ctx context.Context, c sources.GitConfig) error { if err := gitSource.Init(ctx, sourceName, jobID, sourceID, true, &conn, runtime.NumCPU()); err != nil { return err } - gitSource.WithScanOptions(scanOptions) - // Don't try to clean up the provided directory. That's handled by the - // caller of ScanGit. - gitSource.WithPreserveTempDirs(true) _, err := e.sourceManager.Run(ctx, sourceName, gitSource) return err diff --git a/pkg/engine/git_test.go b/pkg/engine/git_test.go index a44e7f797b8b..e6cbaaeac5e8 100644 --- a/pkg/engine/git_test.go +++ b/pkg/engine/git_test.go @@ -7,7 +7,6 @@ import ( "github.com/stretchr/testify/assert" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/decoders" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" @@ -45,7 +44,6 @@ func TestGitEngine(t *testing.T) { branch string base string maxDepth int - filter *common.Filter } for tName, tTest := range map[string]testProfile{ "all_secrets": { @@ -55,14 +53,12 @@ func TestGitEngine(t *testing.T) { "8afb0ecd4998b1179e428db5ebbcdc8221214432": {"369963c1434c377428ca8531fbc46c0c43d037a0", 3, false}, "27fbead3bf883cdb7de9d7825ed401f28f9398f1": {"ffc7e0f9400fb6300167009e42d2f842cd7956e2", 7, false}, }, - filter: common.FilterEmpty(), }, "base_commit": { expected: map[string]expResult{ "70001020fab32b1fcf2f1f0e5c66424eae649826": {"AKIAXYZDQCEN4B6JSJQI", 2, true}, }, - filter: common.FilterEmpty(), - base: "2f251b8c1e72135a375b659951097ec7749d4af9", + base: "2f251b8c1e72135a375b659951097ec7749d4af9", }, } { t.Run(tName, func(t *testing.T) { @@ -76,11 +72,10 @@ func TestGitEngine(t *testing.T) { assert.Nil(t, err) cfg := sources.GitConfig{ - RepoPath: path, + URI: path, HeadRef: tTest.branch, BaseRef: tTest.base, MaxDepth: tTest.maxDepth, - Filter: tTest.filter, } if err := e.ScanGit(ctx, cfg); err != nil { return @@ -141,10 +136,7 @@ func BenchmarkGitEngine(b *testing.B) { for i := 0; i < b.N; i++ { // TODO: this is measuring the time it takes to initialize the source // and not to do the full scan - cfg := sources.GitConfig{ - RepoPath: path, - Filter: common.FilterEmpty(), - } + cfg := sources.GitConfig{URI: path} if err := e.ScanGit(ctx, cfg); err != nil { return } diff --git a/pkg/sources/git/git.go b/pkg/sources/git/git.go index 8b04fe392abd..0c1e2b40b8b5 100644 --- a/pkg/sources/git/git.go +++ b/pkg/sources/git/git.go @@ -15,6 +15,7 @@ import ( "github.com/go-errors/errors" "github.com/go-git/go-git/v5" + gogit "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/object" "github.com/google/go-github/v42/github" @@ -25,6 +26,7 @@ import ( "google.golang.org/protobuf/types/known/anypb" "github.com/trufflesecurity/trufflehog/v3/pkg/cleantemp" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/gitparse" "github.com/trufflesecurity/trufflehog/v3/pkg/handlers" @@ -45,9 +47,6 @@ type Source struct { sources.Progress conn *sourcespb.Git scanOptions *ScanOptions - // Kludge to preserve engine.ScanGit functionality which doesn't expect - // the scanning to clean up the directory. - preserveTempDirs bool } type Git struct { @@ -100,19 +99,11 @@ func (s *Source) JobID() sources.JobID { return s.jobId } -// WithScanOptions sets the scan options. -func (s *Source) WithScanOptions(scanOptions *ScanOptions) { +// withScanOptions sets the scan options. +func (s *Source) withScanOptions(scanOptions *ScanOptions) { s.scanOptions = scanOptions } -// WithPreserveTempDirs sets whether to preserve temp directories when scanning -// the provided list of s.conn.Directories. NOTE: This is *only* for -// s.conn.Directories, not all temp directories created. This is also a kludge -// and should be refactored away. -func (s *Source) WithPreserveTempDirs(preserve bool) { - s.preserveTempDirs = preserve -} - // Init returns an initialized GitHub source. func (s *Source) Init(aCtx context.Context, name string, jobId sources.JobID, sourceId sources.SourceID, verify bool, connection *anypb.Any, concurrency int) error { s.name = name @@ -128,14 +119,45 @@ func (s *Source) Init(aCtx context.Context, name string, jobId sources.JobID, so return errors.WrapPrefix(err, "error unmarshalling connection", 0) } + if uri := conn.GetUri(); uri != "" { + repoPath, _, err := prepareRepoSinceCommit(aCtx, uri, conn.GetBase()) + if err != nil || repoPath == "" { + return fmt.Errorf("error preparing repo: %w", err) + } + conn.Directories = append(conn.Directories, repoPath) + } + + filter, err := common.FilterFromFiles(conn.IncludePathsFile, conn.ExcludePathsFile) + if err != nil { + return fmt.Errorf("error creating filter: %w", err) + } + opts := []ScanOption{ScanOptionFilter(filter), ScanOptionLogOptions(new(gogit.LogOptions))} + + if depth := conn.GetMaxDepth(); depth != 0 { + opts = append(opts, ScanOptionMaxDepth(depth)) + } + if base := conn.GetBase(); base != "" { + opts = append(opts, ScanOptionBaseHash(base)) + } + if head := conn.GetHead(); head != "" { + opts = append(opts, ScanOptionHeadCommit(head)) + } + if globs := conn.GetExcludeGlobs(); globs != "" { + excludedGlobs := strings.Split(globs, ",") + opts = append(opts, ScanOptionExcludeGlobs(excludedGlobs)) + } + if isBare := conn.GetBare(); isBare { + opts = append(opts, ScanOptionBare(isBare)) + } + s.withScanOptions(NewScanOptions(opts...)) + s.conn = &conn if concurrency == 0 { concurrency = runtime.NumCPU() } - err := GitCmdCheck() - if err != nil { + if err = GitCmdCheck(); err != nil { return err } @@ -261,7 +283,7 @@ func (s *Source) scanDir(ctx context.Context, gitDir string, reporter sources.Ch } err = func() error { - if !s.preserveTempDirs && strings.HasPrefix(gitDir, filepath.Join(os.TempDir(), "trufflehog")) { + if strings.HasPrefix(gitDir, filepath.Join(os.TempDir(), "trufflehog")) { defer os.RemoveAll(gitDir) } @@ -798,8 +820,8 @@ func TryAdditionalBaseRefs(repo *git.Repository, base string) (*plumbing.Hash, e return nil, fmt.Errorf("no base refs succeeded for base: %q", base) } -// PrepareRepoSinceCommit clones a repo starting at the given commitHash and returns the cloned repo path. -func PrepareRepoSinceCommit(ctx context.Context, uriString, commitHash string) (string, bool, error) { +// prepareRepoSinceCommit clones a repo starting at the given commitHash and returns the cloned repo path. +func prepareRepoSinceCommit(ctx context.Context, uriString, commitHash string) (string, bool, error) { if commitHash == "" { return PrepareRepo(ctx, uriString) } diff --git a/pkg/sources/sources.go b/pkg/sources/sources.go index f113b41ac09a..70bc2f9fe538 100644 --- a/pkg/sources/sources.go +++ b/pkg/sources/sources.go @@ -154,21 +154,23 @@ type GCSConfig struct { // GitConfig defines the optional configuration for a git source. type GitConfig struct { - // RepoPath is the path to the repository to scan. - RepoPath, // HeadRef is the head reference to use to scan from. - HeadRef, + HeadRef string // BaseRef is the base reference to use to scan from. BaseRef string // MaxDepth is the maximum depth to scan the source. MaxDepth int // Bare is an indicator to handle bare repositories properly. Bare bool - // Filter is the filter to use to scan the source. - Filter *common.Filter - // ExcludeGlobs is a list of globs to exclude from the scan. + // URI is the URI of the repository to scan. file://, http://, https:// and ssh:// are supported. + URI string + // IncludePathsFile is the path to a file containing a list of regexps to include in the scan. + IncludePathsFile string + // ExcludePathsFile is the path to a file containing a list of regexps to exclude from the scan. + ExcludePathsFile string + // ExcludeGlobs is a list of comma separated globs to exclude from the scan. // This differs from the Filter exclusions as ExcludeGlobs is applied at the `git log -p` level - ExcludeGlobs []string + ExcludeGlobs string } // GithubConfig defines the optional configuration for a github source.