Skip to content

Commit

Permalink
move all Git setup into Init method (#2105)
Browse files Browse the repository at this point in the history
* add proto fields for git

* add uri to proto

* move all git setup into Init method

* fix logic for when to use repoPath
  • Loading branch information
ahrav authored Nov 16, 2023
1 parent fd33198 commit d334b30
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 96 deletions.
36 changes: 9 additions & 27 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/output"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources/git"
"github.com/trufflesecurity/trufflehog/v3/pkg/tui"
"github.com/trufflesecurity/trufflehog/v3/pkg/updater"
"github.com/trufflesecurity/trufflehog/v3/pkg/version"
Expand Down Expand Up @@ -157,7 +156,7 @@ func init() {

cli.Version("trufflehog " + version.BuildVersion)

//Support -h for help
// Support -h for help
cli.HelpFlag.Short('h')

if len(os.Args) <= 1 && isatty.IsTerminal(os.Stdout.Fd()) {
Expand Down Expand Up @@ -416,34 +415,17 @@ func run(state overseer.State) {
logFatal(err, "error initializing engine")
}

var repoPath string
var remote bool
switch cmd {
case gitScan.FullCommand():
filter, err := common.FilterFromFiles(*gitScanIncludePaths, *gitScanExcludePaths)
if err != nil {
logFatal(err, "could not create filter")
}
repoPath, remote, err = git.PrepareRepoSinceCommit(ctx, *gitScanURI, *gitScanSinceCommit)
if err != nil || repoPath == "" {
logFatal(err, "error preparing git repo for scanning")
}
if remote {
defer os.RemoveAll(repoPath)
}
excludedGlobs := []string{}
if *gitScanExcludeGlobs != "" {
excludedGlobs = strings.Split(*gitScanExcludeGlobs, ",")
}

cfg := sources.GitConfig{
RepoPath: repoPath,
HeadRef: *gitScanBranch,
BaseRef: *gitScanSinceCommit,
MaxDepth: *gitScanMaxDepth,
Bare: *gitScanBare,
Filter: filter,
ExcludeGlobs: excludedGlobs,
URI: *gitScanURI,
IncludePathsFile: *gitScanIncludePaths,
ExcludePathsFile: *gitScanExcludePaths,
HeadRef: *gitScanBranch,
BaseRef: *gitScanSinceCommit,
MaxDepth: *gitScanMaxDepth,
Bare: *gitScanBare,
ExcludeGlobs: *gitScanExcludeGlobs,
}
if err = e.ScanGit(ctx, cfg); err != nil {
logFatal(err, "Failed to scan Git.")
Expand Down
41 changes: 8 additions & 33 deletions pkg/engine/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package engine
import (
"runtime"

gogit "github.com/go-git/go-git/v5"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/types/known/anypb"

Expand All @@ -15,35 +14,15 @@ import (

// ScanGit scans any git source.
func (e *Engine) ScanGit(ctx context.Context, c sources.GitConfig) error {
logOptions := &gogit.LogOptions{}
opts := []git.ScanOption{
git.ScanOptionFilter(c.Filter),
git.ScanOptionLogOptions(logOptions),
}

if c.MaxDepth != 0 {
opts = append(opts, git.ScanOptionMaxDepth(int64(c.MaxDepth)))
}
if c.BaseRef != "" {
opts = append(opts, git.ScanOptionBaseHash(c.BaseRef))
}
if c.HeadRef != "" {
opts = append(opts, git.ScanOptionHeadCommit(c.HeadRef))
}
if c.ExcludeGlobs != nil {
opts = append(opts, git.ScanOptionExcludeGlobs(c.ExcludeGlobs))
}
if c.Bare {
opts = append(opts, git.ScanOptionBare(c.Bare))
}
scanOptions := git.NewScanOptions(opts...)

connection := &sourcespb.Git{
// Using Directories here allows us to not pass any
// authentication. Also by this point, the c.RepoPath should
// still have been prepared and downloaded to a temporary
// directory if it was a URL.
Directories: []string{c.RepoPath},
Head: c.HeadRef,
Base: c.BaseRef,
Bare: c.Bare,
Uri: c.URI,
ExcludeGlobs: c.ExcludeGlobs,
IncludePathsFile: c.IncludePathsFile,
ExcludePathsFile: c.ExcludePathsFile,
MaxDepth: int64(c.MaxDepth),
}
var conn anypb.Any
if err := anypb.MarshalFrom(&conn, connection, proto.MarshalOptions{}); err != nil {
Expand All @@ -58,10 +37,6 @@ func (e *Engine) ScanGit(ctx context.Context, c sources.GitConfig) error {
if err := gitSource.Init(ctx, sourceName, jobID, sourceID, true, &conn, runtime.NumCPU()); err != nil {
return err
}
gitSource.WithScanOptions(scanOptions)
// Don't try to clean up the provided directory. That's handled by the
// caller of ScanGit.
gitSource.WithPreserveTempDirs(true)

_, err := e.sourceManager.Run(ctx, sourceName, gitSource)
return err
Expand Down
14 changes: 3 additions & 11 deletions pkg/engine/git_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (

"github.com/stretchr/testify/assert"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/decoders"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
Expand Down Expand Up @@ -45,7 +44,6 @@ func TestGitEngine(t *testing.T) {
branch string
base string
maxDepth int
filter *common.Filter
}
for tName, tTest := range map[string]testProfile{
"all_secrets": {
Expand All @@ -55,14 +53,12 @@ func TestGitEngine(t *testing.T) {
"8afb0ecd4998b1179e428db5ebbcdc8221214432": {"369963c1434c377428ca8531fbc46c0c43d037a0", 3, false},
"27fbead3bf883cdb7de9d7825ed401f28f9398f1": {"ffc7e0f9400fb6300167009e42d2f842cd7956e2", 7, false},
},
filter: common.FilterEmpty(),
},
"base_commit": {
expected: map[string]expResult{
"70001020fab32b1fcf2f1f0e5c66424eae649826": {"AKIAXYZDQCEN4B6JSJQI", 2, true},
},
filter: common.FilterEmpty(),
base: "2f251b8c1e72135a375b659951097ec7749d4af9",
base: "2f251b8c1e72135a375b659951097ec7749d4af9",
},
} {
t.Run(tName, func(t *testing.T) {
Expand All @@ -76,11 +72,10 @@ func TestGitEngine(t *testing.T) {
assert.Nil(t, err)

cfg := sources.GitConfig{
RepoPath: path,
URI: path,
HeadRef: tTest.branch,
BaseRef: tTest.base,
MaxDepth: tTest.maxDepth,
Filter: tTest.filter,
}
if err := e.ScanGit(ctx, cfg); err != nil {
return
Expand Down Expand Up @@ -141,10 +136,7 @@ func BenchmarkGitEngine(b *testing.B) {
for i := 0; i < b.N; i++ {
// TODO: this is measuring the time it takes to initialize the source
// and not to do the full scan
cfg := sources.GitConfig{
RepoPath: path,
Filter: common.FilterEmpty(),
}
cfg := sources.GitConfig{URI: path}
if err := e.ScanGit(ctx, cfg); err != nil {
return
}
Expand Down
58 changes: 40 additions & 18 deletions pkg/sources/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (

"github.com/go-errors/errors"
"github.com/go-git/go-git/v5"
gogit "github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/object"
"github.com/google/go-github/v42/github"
Expand All @@ -25,6 +26,7 @@ import (
"google.golang.org/protobuf/types/known/anypb"

"github.com/trufflesecurity/trufflehog/v3/pkg/cleantemp"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/gitparse"
"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
Expand All @@ -45,9 +47,6 @@ type Source struct {
sources.Progress
conn *sourcespb.Git
scanOptions *ScanOptions
// Kludge to preserve engine.ScanGit functionality which doesn't expect
// the scanning to clean up the directory.
preserveTempDirs bool
}

type Git struct {
Expand Down Expand Up @@ -100,19 +99,11 @@ func (s *Source) JobID() sources.JobID {
return s.jobId
}

// WithScanOptions sets the scan options.
func (s *Source) WithScanOptions(scanOptions *ScanOptions) {
// withScanOptions sets the scan options.
func (s *Source) withScanOptions(scanOptions *ScanOptions) {
s.scanOptions = scanOptions
}

// WithPreserveTempDirs sets whether to preserve temp directories when scanning
// the provided list of s.conn.Directories. NOTE: This is *only* for
// s.conn.Directories, not all temp directories created. This is also a kludge
// and should be refactored away.
func (s *Source) WithPreserveTempDirs(preserve bool) {
s.preserveTempDirs = preserve
}

// Init returns an initialized GitHub source.
func (s *Source) Init(aCtx context.Context, name string, jobId sources.JobID, sourceId sources.SourceID, verify bool, connection *anypb.Any, concurrency int) error {
s.name = name
Expand All @@ -128,14 +119,45 @@ func (s *Source) Init(aCtx context.Context, name string, jobId sources.JobID, so
return errors.WrapPrefix(err, "error unmarshalling connection", 0)
}

if uri := conn.GetUri(); uri != "" {
repoPath, _, err := prepareRepoSinceCommit(aCtx, uri, conn.GetBase())
if err != nil || repoPath == "" {
return fmt.Errorf("error preparing repo: %w", err)
}
conn.Directories = append(conn.Directories, repoPath)
}

filter, err := common.FilterFromFiles(conn.IncludePathsFile, conn.ExcludePathsFile)
if err != nil {
return fmt.Errorf("error creating filter: %w", err)
}
opts := []ScanOption{ScanOptionFilter(filter), ScanOptionLogOptions(new(gogit.LogOptions))}

if depth := conn.GetMaxDepth(); depth != 0 {
opts = append(opts, ScanOptionMaxDepth(depth))
}
if base := conn.GetBase(); base != "" {
opts = append(opts, ScanOptionBaseHash(base))
}
if head := conn.GetHead(); head != "" {
opts = append(opts, ScanOptionHeadCommit(head))
}
if globs := conn.GetExcludeGlobs(); globs != "" {
excludedGlobs := strings.Split(globs, ",")
opts = append(opts, ScanOptionExcludeGlobs(excludedGlobs))
}
if isBare := conn.GetBare(); isBare {
opts = append(opts, ScanOptionBare(isBare))
}
s.withScanOptions(NewScanOptions(opts...))

s.conn = &conn

if concurrency == 0 {
concurrency = runtime.NumCPU()
}

err := GitCmdCheck()
if err != nil {
if err = GitCmdCheck(); err != nil {
return err
}

Expand Down Expand Up @@ -261,7 +283,7 @@ func (s *Source) scanDir(ctx context.Context, gitDir string, reporter sources.Ch
}

err = func() error {
if !s.preserveTempDirs && strings.HasPrefix(gitDir, filepath.Join(os.TempDir(), "trufflehog")) {
if strings.HasPrefix(gitDir, filepath.Join(os.TempDir(), "trufflehog")) {
defer os.RemoveAll(gitDir)
}

Expand Down Expand Up @@ -798,8 +820,8 @@ func TryAdditionalBaseRefs(repo *git.Repository, base string) (*plumbing.Hash, e
return nil, fmt.Errorf("no base refs succeeded for base: %q", base)
}

// PrepareRepoSinceCommit clones a repo starting at the given commitHash and returns the cloned repo path.
func PrepareRepoSinceCommit(ctx context.Context, uriString, commitHash string) (string, bool, error) {
// prepareRepoSinceCommit clones a repo starting at the given commitHash and returns the cloned repo path.
func prepareRepoSinceCommit(ctx context.Context, uriString, commitHash string) (string, bool, error) {
if commitHash == "" {
return PrepareRepo(ctx, uriString)
}
Expand Down
16 changes: 9 additions & 7 deletions pkg/sources/sources.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,21 +154,23 @@ type GCSConfig struct {

// GitConfig defines the optional configuration for a git source.
type GitConfig struct {
// RepoPath is the path to the repository to scan.
RepoPath,
// HeadRef is the head reference to use to scan from.
HeadRef,
HeadRef string
// BaseRef is the base reference to use to scan from.
BaseRef string
// MaxDepth is the maximum depth to scan the source.
MaxDepth int
// Bare is an indicator to handle bare repositories properly.
Bare bool
// Filter is the filter to use to scan the source.
Filter *common.Filter
// ExcludeGlobs is a list of globs to exclude from the scan.
// URI is the URI of the repository to scan. file://, http://, https:// and ssh:// are supported.
URI string
// IncludePathsFile is the path to a file containing a list of regexps to include in the scan.
IncludePathsFile string
// ExcludePathsFile is the path to a file containing a list of regexps to exclude from the scan.
ExcludePathsFile string
// ExcludeGlobs is a list of comma separated globs to exclude from the scan.
// This differs from the Filter exclusions as ExcludeGlobs is applied at the `git log -p` level
ExcludeGlobs []string
ExcludeGlobs string
}

// GithubConfig defines the optional configuration for a github source.
Expand Down

0 comments on commit d334b30

Please sign in to comment.