From c6597969eb54613c1176e5432a2aba0d06b35511 Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Wed, 26 Jul 2023 00:24:11 -0500 Subject: [PATCH] Add support for storing handler metadata Currently it's a unstructured map[string]string that gets copied into a Result.ExtraData after detection, but we might want to revisit this decision when we get more handlers. --- pkg/engine/engine.go | 9 ++++++++- pkg/handlers/archive.go | 35 ++++++++++++++++++++--------------- pkg/handlers/handlers.go | 8 +++++--- pkg/output/plain.go | 8 ++++---- pkg/sources/sources.go | 3 ++- 5 files changed, 39 insertions(+), 24 deletions(-) diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index b7a03ce596de..cc80678e85fc 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -349,8 +349,15 @@ func (e *Engine) detectorWorker(ctx context.Context) { continue } result.DecoderType = decoderType + if result.ExtraData == nil && chunk.HandleMetadata != nil { + result.ExtraData = chunk.HandleMetadata + } else { + for k, v := range chunk.HandleMetadata { + // TODO: Check key collisions. + result.ExtraData[k] = v + } + } chunkResults = append(chunkResults, detectors.CopyMetadata(resultChunk, result)) - } if len(results) > 0 { elapsed := time.Since(start) diff --git a/pkg/handlers/archive.go b/pkg/handlers/archive.go index 8ad25cb4abeb..9502758cbfb3 100644 --- a/pkg/handlers/archive.go +++ b/pkg/handlers/archive.go @@ -6,12 +6,14 @@ import ( "errors" "fmt" "io" + "strings" "time" "github.com/mholt/archiver/v4" "github.com/trufflesecurity/trufflehog/v3/pkg/common" logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context" + "github.com/trufflesecurity/trufflehog/v3/pkg/sources" ) type ctxKey int @@ -52,14 +54,14 @@ func SetArchiveMaxTimeout(timeout time.Duration) { } // FromFile extracts the files from an archive. -func (d *Archive) FromFile(originalCtx context.Context, data io.Reader) chan ([]byte) { - archiveChan := make(chan ([]byte), 512) +func (d *Archive) FromFile(originalCtx context.Context, data io.Reader) chan ChunkOpt { + archiveChan := make(chan ChunkOpt, 512) go func() { ctx, cancel := context.WithTimeout(originalCtx, maxTimeout) logger := logContext.AddLogger(ctx).Logger() defer cancel() defer close(archiveChan) - err := d.openArchive(ctx, 0, data, archiveChan) + err := d.openArchive(ctx, nil, data, archiveChan) if err != nil { if errors.Is(err, archiver.ErrNoMatch) { return @@ -71,18 +73,21 @@ func (d *Archive) FromFile(originalCtx context.Context, data io.Reader) chan ([] } // openArchive takes a reader and extracts the contents up to the maximum depth. -func (d *Archive) openArchive(ctx context.Context, depth int, reader io.Reader, archiveChan chan ([]byte)) error { - if depth >= maxDepth { +func (d *Archive) openArchive(ctx context.Context, depth []string, reader io.Reader, archiveChan chan ChunkOpt) error { + if len(depth) >= maxDepth { return fmt.Errorf("max archive depth reached") } format, reader, err := archiver.Identify("", reader) if err != nil { - if errors.Is(err, archiver.ErrNoMatch) && depth > 0 { + if errors.Is(err, archiver.ErrNoMatch) && len(depth) > 0 { chunkSize := 10 * 1024 for { - chunk := make([]byte, chunkSize) - n, _ := reader.Read(chunk) - archiveChan <- chunk + data := make([]byte, chunkSize) + n, _ := reader.Read(data) + archiveChan <- func(chunk *sources.Chunk) { + chunk.Data = data + chunk.HandleMetadata = map[string]string{"archive path": strings.Join(depth, " > ")} + } if n < chunkSize { break } @@ -102,9 +107,9 @@ func (d *Archive) openArchive(ctx context.Context, depth int, reader io.Reader, return err } newReader := bytes.NewReader(fileBytes) - return d.openArchive(ctx, depth+1, newReader, archiveChan) + return d.openArchive(ctx, append(depth, "decompressed"), newReader, archiveChan) case archiver.Extractor: - err := archive.Extract(context.WithValue(ctx, depthKey, depth+1), reader, nil, d.extractorHandler(archiveChan)) + err := archive.Extract(context.WithValue(ctx, depthKey, depth), reader, nil, d.extractorHandler(archiveChan)) if err != nil { return err } @@ -129,12 +134,12 @@ func (d *Archive) IsFiletype(ctx context.Context, reader io.Reader) (io.Reader, } // extractorHandler is applied to each file in an archiver.Extractor file. -func (d *Archive) extractorHandler(archiveChan chan ([]byte)) func(context.Context, archiver.File) error { +func (d *Archive) extractorHandler(archiveChan chan ChunkOpt) func(context.Context, archiver.File) error { return func(ctx context.Context, f archiver.File) error { logger := logContext.AddLogger(ctx).Logger() logger.V(5).Info("Handling extracted file.", "filename", f.Name()) - depth := 0 - if ctxDepth, ok := ctx.Value(depthKey).(int); ok { + var depth []string + if ctxDepth, ok := ctx.Value(depthKey).([]string); ok { depth = ctxDepth } @@ -148,7 +153,7 @@ func (d *Archive) extractorHandler(archiveChan chan ([]byte)) func(context.Conte } fileContent := bytes.NewReader(fileBytes) - err = d.openArchive(ctx, depth, fileContent, archiveChan) + err = d.openArchive(ctx, append(depth, f.Name()), fileContent, archiveChan) if err != nil { return err } diff --git a/pkg/handlers/handlers.go b/pkg/handlers/handlers.go index 84eeadcd0331..ee1dd85d0bcf 100644 --- a/pkg/handlers/handlers.go +++ b/pkg/handlers/handlers.go @@ -13,8 +13,10 @@ func DefaultHandlers() []Handler { } } +type ChunkOpt func(*sources.Chunk) + type Handler interface { - FromFile(context.Context, io.Reader) chan ([]byte) + FromFile(context.Context, io.Reader) chan ChunkOpt IsFiletype(context.Context, io.Reader) (io.Reader, bool) New() } @@ -38,13 +40,13 @@ func HandleFile(ctx context.Context, file io.Reader, chunkSkel *sources.Chunk, c handlerChan := handler.FromFile(ctx, file) for { select { - case data, open := <-handlerChan: + case opt, open := <-handlerChan: if !open { // We finished reading everything from handlerChan. return true } chunk := *chunkSkel - chunk.Data = data + opt(&chunk) // Send data on chunksChan. select { case chunksChan <- &chunk: diff --git a/pkg/output/plain.go b/pkg/output/plain.go index ac2ef1c60729..6d55fa64ebb7 100644 --- a/pkg/output/plain.go +++ b/pkg/output/plain.go @@ -96,9 +96,9 @@ func structToMap(obj interface{}) (m map[string]map[string]interface{}, err erro } type outputFormat struct { - DetectorType, - DecoderType string - Verified bool - Raw string + DetectorType string + DecoderType string + Verified bool + Raw string *source_metadatapb.MetaData } diff --git a/pkg/sources/sources.go b/pkg/sources/sources.go index 9d8c85edae04..c22adeb2650b 100644 --- a/pkg/sources/sources.go +++ b/pkg/sources/sources.go @@ -21,7 +21,8 @@ type Chunk struct { SourceType sourcespb.SourceType // SourceMetadata holds the context of where the Chunk was found. SourceMetadata *source_metadatapb.MetaData - + // HandleMetadata holds the metadata from a handler if one was used. + HandleMetadata map[string]string // Data is the data to decode and scan. Data []byte // Verify specifies whether any secrets in the Chunk should be verified.