From 51aadf96e18447d24e2f59834f7f78f61ddf570b Mon Sep 17 00:00:00 2001 From: Joe Leon Date: Thu, 24 Aug 2023 08:46:30 -0400 Subject: [PATCH] added ahrav's comments --- pkg/handlers/archive.go | 47 ++++++----- pkg/handlers/archive_integration_test.go | 101 +++++++++++++++++++++++ pkg/handlers/archive_test.go | 85 ------------------- pkg/handlers/handlers.go | 2 +- pkg/sources/docker/docker.go | 6 +- pkg/sources/sources.go | 4 + 6 files changed, 133 insertions(+), 112 deletions(-) create mode 100644 pkg/handlers/archive_integration_test.go diff --git a/pkg/handlers/archive.go b/pkg/handlers/archive.go index 2c6c1e0d89c6..482befcea4c1 100644 --- a/pkg/handlers/archive.go +++ b/pkg/handlers/archive.go @@ -16,6 +16,7 @@ import ( "github.com/google/go-containerregistry/pkg/v1/tarball" "github.com/h2non/filetype" "github.com/mholt/archiver/v4" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context" ) @@ -35,12 +36,19 @@ var ( // Ensure the Archive satisfies the interfaces at compile time. var _ SpecializedHandler = (*Archive)(nil) +// tempEnv contains the temporary file and directory information for the archive. type tempEnv struct { tempFile *os.File tempFileName string extractPath string } +// dockerImageDetails contains the details of the Docker image and the temporary environment. +type dockerImageDetails struct { + image v1.Image + temp tempEnv +} + // Archive is a handler for extracting and decompressing archives. type Archive struct { size int @@ -283,20 +291,19 @@ func (a *Archive) HandleSpecialized(ctx logContext.Context, reader io.Reader) (i reader, err = a.extractRpmContent(ctx, reader) case tarMimeType: //check if tar is a docker image - isImg, img, tmpEnv, err := a.isDockerImage(ctx, reader) + isImg, details, err := a.isDockerImage(ctx, reader) if err != nil { return nil, false, err } - if isImg { - // Build dockerReader containing docker image - reader = DockerTarReader{ - Reader: reader, - img: img, - tmpEnv: tmpEnv, - } - } else { + if !isImg { return nil, false, nil } + // Build dockerReader containing docker image + reader = DockerTarReader{ + Reader: reader, + img: details.image, + tmpEnv: details.temp, + } default: return reader, false, nil } @@ -321,8 +328,8 @@ func (a *Archive) extractDebContent(ctx logContext.Context, file io.Reader) (io. if err != nil { return nil, err } - defer os.Remove(tmpEnv.tempFileName) defer os.RemoveAll(tmpEnv.extractPath) + defer os.Remove(tmpEnv.tempFileName) cmd := exec.Command("ar", "x", tmpEnv.tempFile.Name()) cmd.Dir = tmpEnv.extractPath @@ -359,8 +366,8 @@ func (a *Archive) extractRpmContent(ctx logContext.Context, file io.Reader) (io. if err != nil { return nil, err } - defer os.Remove(tmpEnv.tempFileName) defer os.RemoveAll(tmpEnv.extractPath) + defer os.Remove(tmpEnv.tempFileName) // Use rpm2cpio to convert the RPM file to a cpio archive and then extract it using cpio command. cmd := exec.Command("sh", "-c", "rpm2cpio "+tmpEnv.tempFile.Name()+" | cpio -id") @@ -504,22 +511,20 @@ func openDataArchive(extractPath string, dataArchiveName string) (io.ReadCloser, } // isDockerImage checks if a reader object is a docker image. -// returns true if the tar file is a docker image, a tempEnv struct, and err values +// Returns true if the tar file is a docker image, dockerImageDetails, and error. // Caller is responsible for removing temporary files and directories IF the file is a docker image. -// The docker scanner requires a file path to the image, so we must leave the temporary file on disk -// and then remove it after the scan is complete. -func (a *Archive) isDockerImage(ctx context.Context, file io.Reader) (isImg bool, img v1.Image, tmpEnv tempEnv, err error) { - - tmpEnv, err = a.createTempEnv(ctx, file) +// The docker scanner requires a file path to the image, so we must leave the temporary file on disk. +func (a *Archive) isDockerImage(ctx context.Context, file io.Reader) (isImg bool, details dockerImageDetails, err error) { + tmpEnv, err := a.createTempEnv(ctx, file) if err != nil { - return false, nil, tempEnv{}, err + return false, dockerImageDetails{}, err } - img, err = tarball.ImageFromPath(tmpEnv.tempFileName, nil) + img, err := tarball.ImageFromPath(tmpEnv.tempFileName, nil) if err != nil { os.Remove(tmpEnv.tempFileName) os.RemoveAll(tmpEnv.extractPath) - return false, nil, tempEnv{}, err + return false, dockerImageDetails{}, err } - return true, img, tmpEnv, nil + return true, dockerImageDetails{image: img, temp: tmpEnv}, nil } diff --git a/pkg/handlers/archive_integration_test.go b/pkg/handlers/archive_integration_test.go new file mode 100644 index 000000000000..39c1f0fc4f80 --- /dev/null +++ b/pkg/handlers/archive_integration_test.go @@ -0,0 +1,101 @@ +//go:build integration +// +build integration + +package handlers + +import ( + "context" + "os" + "testing" + + diskbufferreader "github.com/bill-rich/disk-buffer-reader" + "github.com/google/go-containerregistry/pkg/name" + "github.com/google/go-containerregistry/pkg/v1/remote" + "github.com/google/go-containerregistry/pkg/v1/tarball" + "github.com/stretchr/testify/assert" + + logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb" + "github.com/trufflesecurity/trufflehog/v3/pkg/sanitizer" + "github.com/trufflesecurity/trufflehog/v3/pkg/sources" +) + +func TestFoundKeyInDockerTar(t *testing.T) { + // URI of the Docker image to clone + imageURI := "ghcr.io/joeleonjr/getting-started-app-with-canary-token:main" + + var imageName name.Reference + imageName, err := name.NewTag(imageURI) + if err != nil { + t.Errorf("%s: error creating image name: %s", imageURI, err) + } + + // Pull the image + img, err := remote.Image(imageName) + if err != nil { + t.Errorf("%s: error pulling image: %s", imageURI, err) + } + + tempFile, err := os.CreateTemp("", "archive_test_docker_img.tar") + if err != nil { + t.Errorf("%s: error creating temporary file: %s", imageURI, err) + return + } + + defer os.Remove(tempFile.Name()) // Clean up the temporary file + + // Save the image as a tar file + err = tarball.WriteToFile(tempFile.Name(), imageName, img) + if err != nil { + t.Errorf("%s: error saving image as tar file: %s", imageURI, err) + return + } + + ctx := logContext.AddLogger(context.Background()) + + inputFile, err := os.Open(tempFile.Name()) + if err != nil { + t.Errorf("%s: error opening tar file: %s", imageURI, err) + return + } + defer inputFile.Close() + + reReader, err := diskbufferreader.New(inputFile) + if err != nil { + t.Errorf("%s: error creating re-readable reader: %s", imageURI, err) + return + } + defer reReader.Close() + + chunkSkel := &sources.Chunk{ + SourceType: 1, + SourceName: "filesystem", + SourceID: 1, + SourceMetadata: &source_metadatapb.MetaData{ + Data: &source_metadatapb.MetaData_Filesystem{ + Filesystem: &source_metadatapb.Filesystem{ + File: sanitizer.UTF8(tempFile.Name()), + }, + }, + }, + Verify: true, + } + + chunksChan := make(chan *sources.Chunk, 1) + + go func() { + defer close(chunksChan) + HandleFile(ctx, reReader, chunkSkel, chunksChan) + }() + + secret := "AKIA2OGYBAH6Q2PQJUGN" + + // Read from the channel and validate the secrets. + foundSecret := "" + for chunkCh := range chunksChan { + foundSecret += string(chunkCh.Data) + } + + assert.Contains(t, foundSecret, secret) + +} diff --git a/pkg/handlers/archive_test.go b/pkg/handlers/archive_test.go index 4115592f9f2a..4f9270119699 100644 --- a/pkg/handlers/archive_test.go +++ b/pkg/handlers/archive_test.go @@ -12,10 +12,6 @@ import ( diskbufferreader "github.com/bill-rich/disk-buffer-reader" "github.com/stretchr/testify/assert" - "github.com/google/go-containerregistry/pkg/v1/remote" - - "github.com/google/go-containerregistry/pkg/name" - "github.com/google/go-containerregistry/pkg/v1/tarball" logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/sources" ) @@ -188,84 +184,3 @@ func TestExtractRPMContent(t *testing.T) { expectedLength := 1822720 assert.Equal(t, expectedLength, len(string(content))) } - -func TestFoundKeyInDockerTar(t *testing.T) { - // URI of the Docker image to clone - imageURI := "ghcr.io/joeleonjr/getting-started-app-with-canary-token:main" - - var imageName name.Reference - imageName, err := name.NewTag(imageURI) - if err != nil { - t.Errorf("%s: error creating image name: %s", imageURI, err) - } - - // Pull the image - img, err := remote.Image(imageName) - if err != nil { - t.Errorf("%s: error pulling image: %s", imageURI, err) - } - - tempFile, err := os.CreateTemp("", "archive_test_docker_img.tar") - if err != nil { - t.Errorf("%s: error creating temporary file: %s", imageURI, err) - return - } - - defer os.Remove(tempFile.Name()) // Clean up the temporary file - - // Save the image as a tar file - err = tarball.WriteToFile(tempFile.Name(), imageName, img) - if err != nil { - t.Errorf("%s: error saving image as tar file: %s", imageURI, err) - return - } - - // ctx := logContext.AddLogger(context.Background()) - - // inputFile, err := os.Open(tempFile.Name()) - // if err != nil { - // t.Errorf("%s: error opening tar file: %s", imageURI, err) - // return - // } - // defer inputFile.Close() - - // reReader, err := diskbufferreader.New(inputFile) - // if err != nil { - // t.Errorf("%s: error creating re-readable reader: %s", imageURI, err) - // return - // } - // defer reReader.Close() - - // chunkSkel := &sources.Chunk{ - // SourceType: 1, - // SourceName: "filesystem", - // SourceID: 1, - // SourceMetadata: &source_metadatapb.MetaData{ - // Data: &source_metadatapb.MetaData_Filesystem{ - // Filesystem: &source_metadatapb.Filesystem{ - // File: sanitizer.UTF8(tempFile.Name()), - // }, - // }, - // }, - // Verify: true, - // } - - // chunksChan := make(chan *sources.Chunk, 1) - - // HandleFile(ctx, reReader, chunkSkel, chunksChan) - - // println("here") - // fmt.Printf("chunksChan: %v\n", chunksChan) - // fmt.Printf("chunksChan: %v\n", &chunksChan) - - // secret := "AKIA2OGYBAH6Q2PQJUGN" - - // // Read from the channel and validate the secrets. - // foundSecret := "" - // for chunkCh := range chunksChan { - // foundSecret += string(chunkCh.Data) - // } - - // assert.Contains(t, foundSecret, secret) - -} diff --git a/pkg/handlers/handlers.go b/pkg/handlers/handlers.go index b8e76820b1c6..cf300d10f0e1 100644 --- a/pkg/handlers/handlers.go +++ b/pkg/handlers/handlers.go @@ -60,8 +60,8 @@ func HandleFile(ctx logContext.Context, file io.Reader, chunkSkel *sources.Chunk if isSpecial { if dockerTarReader, ok := file.(DockerTarReader); ok { // Clean up temporary files when done. - defer os.Remove(dockerTarReader.tmpEnv.tempFileName) defer os.RemoveAll(dockerTarReader.tmpEnv.extractPath) + defer os.Remove(dockerTarReader.tmpEnv.tempFileName) err = docker.ScanDockerImg(ctx, dockerTarReader.img, chunksChan, chunkSkel) return err == nil } diff --git a/pkg/sources/docker/docker.go b/pkg/sources/docker/docker.go index 2c0c838f39fb..c5aff0fc6b6f 100644 --- a/pkg/sources/docker/docker.go +++ b/pkg/sources/docker/docker.go @@ -140,20 +140,16 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk) err } func ScanDockerImg(ctx context.Context, img v1.Image, chunksChan chan *sources.Chunk, chunkSkel *sources.Chunk) error { - base, tag := chunkSkel.BaseAndTagForDockerImg() logger := ctx.Logger().WithValues("image", base, "tag", tag) logger.V(2).Info("scanning image") layers, err := img.Layers() - if err != nil { return err } - for _, layer := range layers { - digest, err := layer.Digest() if err != nil { return err @@ -229,7 +225,7 @@ func ScanDockerImg(ctx context.Context, img v1.Image, chunksChan chan *sources.C } dockerImagesScanned.WithLabelValues(chunkSkel.SourceName).Inc() - return err + return nil } func baseAndTagFromImage(image string) (base, tag string, hasDigest bool) { diff --git a/pkg/sources/sources.go b/pkg/sources/sources.go index bd2465b35432..716e74270ce8 100644 --- a/pkg/sources/sources.go +++ b/pkg/sources/sources.go @@ -30,6 +30,10 @@ type Chunk struct { Verify bool } +// BaseAndTagForDockerImg returns a base and tag value for a +// docker image regardless of the source type. If the source type +// is not a docker image, the base will be a filename/path from the source metadata. +// The default is the entire source metadata string. Update for new source types. func (c *Chunk) BaseAndTagForDockerImg() (base string, tag string) { switch c.SourceType { case sourcespb.SourceType_SOURCE_TYPE_DOCKER: