Skip to content

Commit

Permalink
Add support for storing handler metadata
Browse files Browse the repository at this point in the history
Currently it's a unstructured map[string]string that gets copied into
a Result.ExtraData after detection, but we might want to revisit this
decision when we get more handlers.
  • Loading branch information
mcastorina committed Jul 26, 2023
1 parent f48a635 commit c659796
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 24 deletions.
9 changes: 8 additions & 1 deletion pkg/engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,8 +349,15 @@ func (e *Engine) detectorWorker(ctx context.Context) {
continue
}
result.DecoderType = decoderType
if result.ExtraData == nil && chunk.HandleMetadata != nil {
result.ExtraData = chunk.HandleMetadata
} else {
for k, v := range chunk.HandleMetadata {
// TODO: Check key collisions.
result.ExtraData[k] = v
}
}
chunkResults = append(chunkResults, detectors.CopyMetadata(resultChunk, result))

}
if len(results) > 0 {
elapsed := time.Since(start)
Expand Down
35 changes: 20 additions & 15 deletions pkg/handlers/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ import (
"errors"
"fmt"
"io"
"strings"
"time"

"github.com/mholt/archiver/v4"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
)

type ctxKey int
Expand Down Expand Up @@ -52,14 +54,14 @@ func SetArchiveMaxTimeout(timeout time.Duration) {
}

// FromFile extracts the files from an archive.
func (d *Archive) FromFile(originalCtx context.Context, data io.Reader) chan ([]byte) {
archiveChan := make(chan ([]byte), 512)
func (d *Archive) FromFile(originalCtx context.Context, data io.Reader) chan ChunkOpt {
archiveChan := make(chan ChunkOpt, 512)
go func() {
ctx, cancel := context.WithTimeout(originalCtx, maxTimeout)
logger := logContext.AddLogger(ctx).Logger()
defer cancel()
defer close(archiveChan)
err := d.openArchive(ctx, 0, data, archiveChan)
err := d.openArchive(ctx, nil, data, archiveChan)
if err != nil {
if errors.Is(err, archiver.ErrNoMatch) {
return
Expand All @@ -71,18 +73,21 @@ func (d *Archive) FromFile(originalCtx context.Context, data io.Reader) chan ([]
}

// openArchive takes a reader and extracts the contents up to the maximum depth.
func (d *Archive) openArchive(ctx context.Context, depth int, reader io.Reader, archiveChan chan ([]byte)) error {
if depth >= maxDepth {
func (d *Archive) openArchive(ctx context.Context, depth []string, reader io.Reader, archiveChan chan ChunkOpt) error {
if len(depth) >= maxDepth {
return fmt.Errorf("max archive depth reached")
}
format, reader, err := archiver.Identify("", reader)
if err != nil {
if errors.Is(err, archiver.ErrNoMatch) && depth > 0 {
if errors.Is(err, archiver.ErrNoMatch) && len(depth) > 0 {
chunkSize := 10 * 1024
for {
chunk := make([]byte, chunkSize)
n, _ := reader.Read(chunk)
archiveChan <- chunk
data := make([]byte, chunkSize)
n, _ := reader.Read(data)
archiveChan <- func(chunk *sources.Chunk) {
chunk.Data = data
chunk.HandleMetadata = map[string]string{"archive path": strings.Join(depth, " > ")}
}
if n < chunkSize {
break
}
Expand All @@ -102,9 +107,9 @@ func (d *Archive) openArchive(ctx context.Context, depth int, reader io.Reader,
return err
}
newReader := bytes.NewReader(fileBytes)
return d.openArchive(ctx, depth+1, newReader, archiveChan)
return d.openArchive(ctx, append(depth, "decompressed"), newReader, archiveChan)
case archiver.Extractor:
err := archive.Extract(context.WithValue(ctx, depthKey, depth+1), reader, nil, d.extractorHandler(archiveChan))
err := archive.Extract(context.WithValue(ctx, depthKey, depth), reader, nil, d.extractorHandler(archiveChan))
if err != nil {
return err
}
Expand All @@ -129,12 +134,12 @@ func (d *Archive) IsFiletype(ctx context.Context, reader io.Reader) (io.Reader,
}

// extractorHandler is applied to each file in an archiver.Extractor file.
func (d *Archive) extractorHandler(archiveChan chan ([]byte)) func(context.Context, archiver.File) error {
func (d *Archive) extractorHandler(archiveChan chan ChunkOpt) func(context.Context, archiver.File) error {
return func(ctx context.Context, f archiver.File) error {
logger := logContext.AddLogger(ctx).Logger()
logger.V(5).Info("Handling extracted file.", "filename", f.Name())
depth := 0
if ctxDepth, ok := ctx.Value(depthKey).(int); ok {
var depth []string
if ctxDepth, ok := ctx.Value(depthKey).([]string); ok {
depth = ctxDepth
}

Expand All @@ -148,7 +153,7 @@ func (d *Archive) extractorHandler(archiveChan chan ([]byte)) func(context.Conte
}
fileContent := bytes.NewReader(fileBytes)

err = d.openArchive(ctx, depth, fileContent, archiveChan)
err = d.openArchive(ctx, append(depth, f.Name()), fileContent, archiveChan)
if err != nil {
return err
}
Expand Down
8 changes: 5 additions & 3 deletions pkg/handlers/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ func DefaultHandlers() []Handler {
}
}

type ChunkOpt func(*sources.Chunk)

type Handler interface {
FromFile(context.Context, io.Reader) chan ([]byte)
FromFile(context.Context, io.Reader) chan ChunkOpt
IsFiletype(context.Context, io.Reader) (io.Reader, bool)
New()
}
Expand All @@ -38,13 +40,13 @@ func HandleFile(ctx context.Context, file io.Reader, chunkSkel *sources.Chunk, c
handlerChan := handler.FromFile(ctx, file)
for {
select {
case data, open := <-handlerChan:
case opt, open := <-handlerChan:
if !open {
// We finished reading everything from handlerChan.
return true
}
chunk := *chunkSkel
chunk.Data = data
opt(&chunk)
// Send data on chunksChan.
select {
case chunksChan <- &chunk:
Expand Down
8 changes: 4 additions & 4 deletions pkg/output/plain.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@ func structToMap(obj interface{}) (m map[string]map[string]interface{}, err erro
}

type outputFormat struct {
DetectorType,
DecoderType string
Verified bool
Raw string
DetectorType string
DecoderType string
Verified bool
Raw string
*source_metadatapb.MetaData
}
3 changes: 2 additions & 1 deletion pkg/sources/sources.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ type Chunk struct {
SourceType sourcespb.SourceType
// SourceMetadata holds the context of where the Chunk was found.
SourceMetadata *source_metadatapb.MetaData

// HandleMetadata holds the metadata from a handler if one was used.
HandleMetadata map[string]string
// Data is the data to decode and scan.
Data []byte
// Verify specifies whether any secrets in the Chunk should be verified.
Expand Down

0 comments on commit c659796

Please sign in to comment.