Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fixup] - correctly use the buffered file writer #2373

Merged
merged 10 commits into from
Feb 5, 2024
47 changes: 32 additions & 15 deletions pkg/gitparse/gitparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
bufferedfilewriter "github.com/trufflesecurity/trufflehog/v3/pkg/writers/buffered_file_writer"
)

const (
Expand Down Expand Up @@ -100,21 +101,26 @@ func (b *buffer) String() (string, error) { return b.Buffer.String(), nil }
// The use of contentWriter enables the management of diff data either in memory or on disk,
// based on its size, optimizing resource usage and performance.
type Diff struct {
PathB string
LineStart int
PathB string
LineStart int
IsBinary bool

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was bugging me a bit so i just reordered the struct to group the fields a bit differently.

contentWriter contentWriter
IsBinary bool
}

type diffOption func(*Diff)

// withPathB sets the PathB option.
func withPathB(pathB string) diffOption { return func(d *Diff) { d.PathB = pathB } }

// withCustomContentWriter sets the useCustomContentWriter option.
func withCustomContentWriter(cr contentWriter) diffOption {
return func(d *Diff) { d.contentWriter = cr }
}

// NewDiff creates a new Diff with a threshold.
func NewDiff(opts ...diffOption) *Diff {
diff := new(Diff)
diff.contentWriter = newBuffer()
for _, opt := range opts {
opt(diff)
}
Expand Down Expand Up @@ -203,7 +209,8 @@ type Parser struct {
maxDiffSize int
maxCommitSize int
dateFormat string
contentWriter contentWriter

useCustomContentWriter bool
}

type ParseState int
Expand Down Expand Up @@ -250,11 +257,9 @@ func (state ParseState) String() string {
}[state]
}

// WithContentWriter sets the ContentWriter for the Parser.
func WithContentWriter(writer contentWriter) Option {
return func(parser *Parser) {
parser.contentWriter = writer
}
// UseCustomContentWriter sets useCustomContentWriter option.
func UseCustomContentWriter() Option {
return func(parser *Parser) { parser.useCustomContentWriter = true }
}

// WithMaxDiffSize sets maxDiffSize option. Diffs larger than maxDiffSize will
Expand Down Expand Up @@ -283,7 +288,6 @@ func NewParser(options ...Option) *Parser {
dateFormat: defaultDateFormat,
maxDiffSize: defaultMaxDiffSize,
maxCommitSize: defaultMaxCommitSize,
contentWriter: newBuffer(),
}
for _, option := range options {
option(parser)
Expand Down Expand Up @@ -387,7 +391,18 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch
totalLogSize int
)
var latestState = Initial
currentDiff := NewDiff()

diff := func(opts ...diffOption) *Diff {
opts = append(opts, withCustomContentWriter(newBuffer()))
return NewDiff(opts...)
}
if c.useCustomContentWriter {
diff = func(opts ...diffOption) *Diff {
opts = append(opts, withCustomContentWriter(bufferedfilewriter.New()))
return NewDiff(opts...)
}
}
currentDiff := diff()

defer common.RecoverWithExit(ctx)
defer close(commitChan)
Expand Down Expand Up @@ -430,7 +445,8 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch
totalLogSize += currentCommit.Size
}
// Create a new currentDiff and currentCommit
currentDiff = NewDiff()
currentDiff = diff()
// currentDiff = NewDiff(withCustomContentWriter(c.contentWriter()))
currentCommit = &Commit{Message: strings.Builder{}}
// Check that the commit line contains a hash and set it.
if len(line) >= 47 {
Expand Down Expand Up @@ -498,7 +514,8 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch
currentCommit.Message.WriteString(oldCommit.Message.String())
}
}
currentDiff = NewDiff()
currentDiff = diff()
// currentDiff = NewDiff(withCustomContentWriter(c.contentWriter()))
case isModeLine(isStaged, latestState, line):
latestState = ModeLine
// NoOp
Expand Down Expand Up @@ -538,7 +555,7 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch
}
currentCommit.Diffs = append(currentCommit.Diffs, *currentDiff)
}
currentDiff = NewDiff(withPathB(currentDiff.PathB))
currentDiff = diff(withPathB(currentDiff.PathB))

words := bytes.Split(line, []byte(" "))
if len(words) >= 3 {
Expand Down
5 changes: 2 additions & 3 deletions pkg/sources/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
"github.com/trufflesecurity/trufflehog/v3/pkg/sanitizer"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
bufferedfilewriter "github.com/trufflesecurity/trufflehog/v3/pkg/writers/buffered_file_writer"
)

const SourceType = sourcespb.SourceType_SOURCE_TYPE_GIT
Expand Down Expand Up @@ -99,7 +98,7 @@ type Config struct {
func NewGit(config *Config) *Git {
var parser *gitparse.Parser
if config.UseCustomContentWriter {
parser = gitparse.NewParser(gitparse.WithContentWriter(bufferedfilewriter.New()))
parser = gitparse.NewParser(gitparse.UseCustomContentWriter())
} else {
parser = gitparse.NewParser()
}
Expand Down Expand Up @@ -522,7 +521,7 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
repoCtx = context.WithValue(ctx, "repo", path)
}

commitChan, err := gitparse.NewParser().RepoPath(repoCtx, path, scanOptions.HeadHash, scanOptions.BaseHash == "", scanOptions.ExcludeGlobs, scanOptions.Bare)
commitChan, err := s.parser.RepoPath(repoCtx, path, scanOptions.HeadHash, scanOptions.BaseHash == "", scanOptions.ExcludeGlobs, scanOptions.Bare)
if err != nil {
return err
}
Expand Down
Loading