Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docker Image Identification in Tar Files #1643

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 60 additions & 8 deletions pkg/handlers/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import (
"strings"
"time"

v1 "github.com/google/go-containerregistry/pkg/v1"
"github.com/google/go-containerregistry/pkg/v1/tarball"
"github.com/h2non/filetype"
"github.com/mholt/archiver/v4"

Expand All @@ -34,12 +36,33 @@ var (
// Ensure the Archive satisfies the interfaces at compile time.
var _ SpecializedHandler = (*Archive)(nil)

// tempEnv contains the temporary file and directory information for the archive.
type tempEnv struct {
tempFile *os.File
tempFileName string
extractPath string
}

// dockerImageDetails contains the details of the Docker image and the temporary environment.
type dockerImageDetails struct {
image v1.Image
temp tempEnv
}

// Archive is a handler for extracting and decompressing archives.
type Archive struct {
size int
currentDepth int
}

// DockerTarReader is a wrapper for io.Reader that also stores the temp filepath and image of the tar file.
// Temporary file path information needed for inner-workings of go-containerregistry Image methods.
type DockerTarReader struct {
io.Reader
img v1.Image
tmpEnv tempEnv
}

// New sets a default maximum size and current size counter.
func (a *Archive) New() {
a.size = 0
Expand Down Expand Up @@ -212,6 +235,7 @@ func (a *Archive) ReadToMax(ctx context.Context, reader io.Reader) (data []byte,
const (
arMimeType = "application/x-unix-archive"
rpmMimeType = "application/x-rpm"
tarMimeType = "application/x-tar"
)

// Define a map of mime types to corresponding command-line tools
Expand Down Expand Up @@ -265,6 +289,21 @@ func (a *Archive) HandleSpecialized(ctx logContext.Context, reader io.Reader) (i
return nil, false, err
}
reader, err = a.extractRpmContent(ctx, reader)
case tarMimeType:
//check if tar is a docker image
isImg, details, err := a.isDockerImage(ctx, reader)
if err != nil {
return nil, false, err
}
if !isImg {
return nil, false, nil
}
// Build dockerReader containing docker image
reader = DockerTarReader{
Reader: reader,
img: details.image,
tmpEnv: details.temp,
}
default:
return reader, false, nil
}
Expand All @@ -289,8 +328,8 @@ func (a *Archive) extractDebContent(ctx logContext.Context, file io.Reader) (io.
if err != nil {
return nil, err
}
defer os.Remove(tmpEnv.tempFileName)
defer os.RemoveAll(tmpEnv.extractPath)
defer os.Remove(tmpEnv.tempFileName)

cmd := exec.Command("ar", "x", tmpEnv.tempFile.Name())
cmd.Dir = tmpEnv.extractPath
Expand Down Expand Up @@ -327,8 +366,8 @@ func (a *Archive) extractRpmContent(ctx logContext.Context, file io.Reader) (io.
if err != nil {
return nil, err
}
defer os.Remove(tmpEnv.tempFileName)
defer os.RemoveAll(tmpEnv.extractPath)
defer os.Remove(tmpEnv.tempFileName)

// Use rpm2cpio to convert the RPM file to a cpio archive and then extract it using cpio command.
cmd := exec.Command("sh", "-c", "rpm2cpio "+tmpEnv.tempFile.Name()+" | cpio -id")
Expand Down Expand Up @@ -427,12 +466,6 @@ func (a *Archive) handleExtractedFiles(ctx logContext.Context, env tempEnv, hand
return dataArchiveName, nil
}

type tempEnv struct {
tempFile *os.File
tempFileName string
extractPath string
}

// createTempEnv creates a temporary file and a temporary directory for extracting archives.
// The caller is responsible for removing these temporary resources
// (both the file and directory) when they are no longer needed.
Expand Down Expand Up @@ -476,3 +509,22 @@ func openDataArchive(extractPath string, dataArchiveName string) (io.ReadCloser,
}
return dataFile, nil
}

// isDockerImage checks if a reader object is a docker image.
// Returns true if the tar file is a docker image, dockerImageDetails, and error.
// Caller is responsible for removing temporary files and directories IF the file is a docker image.
// The docker scanner requires a file path to the image, so we must leave the temporary file on disk.
func (a *Archive) isDockerImage(ctx context.Context, file io.Reader) (isImg bool, details dockerImageDetails, err error) {
tmpEnv, err := a.createTempEnv(ctx, file)
if err != nil {
return false, dockerImageDetails{}, err
}

img, err := tarball.ImageFromPath(tmpEnv.tempFileName, nil)
if err != nil {
os.Remove(tmpEnv.tempFileName)
os.RemoveAll(tmpEnv.extractPath)
return false, dockerImageDetails{}, err
}
return true, dockerImageDetails{image: img, temp: tmpEnv}, nil
}
102 changes: 102 additions & 0 deletions pkg/handlers/archive_integration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
//go:build integration
// +build integration

package handlers

import (
"context"
"os"
"testing"

diskbufferreader "github.com/bill-rich/disk-buffer-reader"
"github.com/google/go-containerregistry/pkg/name"
"github.com/google/go-containerregistry/pkg/v1/remote"
"github.com/google/go-containerregistry/pkg/v1/tarball"
"github.com/stretchr/testify/assert"

logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
"github.com/trufflesecurity/trufflehog/v3/pkg/sanitizer"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
)

func TestFoundKeyInDockerTar(t *testing.T) {
// URI of the Docker image to clone
// imageURI := "ghcr.io/joeleonjr/getting-started-app-with-canary-token:main"
imageURI := "trufflesecurity/secrets"

var imageName name.Reference
imageName, err := name.NewTag(imageURI)
if err != nil {
t.Errorf("%s: error creating image name: %s", imageURI, err)
}

// Pull the image
img, err := remote.Image(imageName)
if err != nil {
t.Errorf("%s: error pulling image: %s", imageURI, err)
}

tempFile, err := os.CreateTemp("", "archive_test_docker_img.tar")
if err != nil {
t.Errorf("%s: error creating temporary file: %s", imageURI, err)
return
}

defer os.Remove(tempFile.Name()) // Clean up the temporary file

// Save the image as a tar file
err = tarball.WriteToFile(tempFile.Name(), imageName, img)
if err != nil {
t.Errorf("%s: error saving image as tar file: %s", imageURI, err)
return
}

ctx := logContext.AddLogger(context.Background())

inputFile, err := os.Open(tempFile.Name())
if err != nil {
t.Errorf("%s: error opening tar file: %s", imageURI, err)
return
}
defer inputFile.Close()

reReader, err := diskbufferreader.New(inputFile)
if err != nil {
t.Errorf("%s: error creating re-readable reader: %s", imageURI, err)
return
}
defer reReader.Close()

chunkSkel := &sources.Chunk{
SourceType: 1,
SourceName: "filesystem",
SourceID: 1,
SourceMetadata: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Filesystem{
Filesystem: &source_metadatapb.Filesystem{
File: sanitizer.UTF8(tempFile.Name()),
},
},
},
Verify: true,
}

chunksChan := make(chan *sources.Chunk, 1)

go func() {
defer close(chunksChan)
HandleFile(ctx, reReader, chunkSkel, chunksChan)
}()

secret := "AKIA2OGYBAH6Q2PQJUGN"

// Read from the channel and validate the secrets.
foundSecret := ""
for chunkCh := range chunksChan {
foundSecret += string(chunkCh.Data)
}

assert.Contains(t, foundSecret, secret)

}
7 changes: 4 additions & 3 deletions pkg/handlers/archive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ func TestHandleFile(t *testing.T) {
ch := make(chan *sources.Chunk, 2)

// Context cancels the operation.
canceledCtx, cancel := context.WithCancel(context.Background())
ctx := logContext.AddLogger(context.Background())
canceledCtx, cancel := logContext.WithCancel(ctx)
cancel()
assert.False(t, HandleFile(canceledCtx, strings.NewReader("file"), &sources.Chunk{}, ch))

Expand All @@ -122,7 +123,7 @@ func TestHandleFile(t *testing.T) {
assert.NoError(t, err)

assert.Equal(t, 0, len(ch))
assert.True(t, HandleFile(context.Background(), reader, &sources.Chunk{}, ch))
assert.True(t, HandleFile(ctx, reader, &sources.Chunk{}, ch))
assert.Equal(t, 1, len(ch))
}

Expand All @@ -149,7 +150,7 @@ func TestExtractTarContent(t *testing.T) {
assert.Nil(t, err)
defer file.Close()

ctx := context.Background()
ctx := logContext.AddLogger(context.Background())

chunkCh := make(chan *sources.Chunk)
go func() {
Expand Down
12 changes: 10 additions & 2 deletions pkg/handlers/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ package handlers
import (
"context"
"io"
"os"

diskbufferreader "github.com/bill-rich/disk-buffer-reader"

logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources/docker"
)

func DefaultHandlers() []Handler {
Expand Down Expand Up @@ -37,7 +39,7 @@ type Handler interface {
// packages them in the provided chunk skeleton, and sends them to chunksChan.
// The function returns true if processing was successful and false otherwise.
// Context is used for cancellation, and the caller is responsible for canceling it if needed.
func HandleFile(ctx context.Context, file io.Reader, chunkSkel *sources.Chunk, chunksChan chan *sources.Chunk) bool {
func HandleFile(ctx logContext.Context, file io.Reader, chunkSkel *sources.Chunk, chunksChan chan *sources.Chunk) bool {
aCtx := logContext.AddLogger(ctx)
for _, h := range DefaultHandlers() {
h.New()
Expand All @@ -56,9 +58,15 @@ func HandleFile(ctx context.Context, file io.Reader, chunkSkel *sources.Chunk, c
if specialHandler, ok := h.(SpecializedHandler); ok {
file, isSpecial, err := specialHandler.HandleSpecialized(aCtx, reReader)
if isSpecial {
if dockerTarReader, ok := file.(DockerTarReader); ok {
// Clean up temporary files when done.
defer os.RemoveAll(dockerTarReader.tmpEnv.extractPath)
defer os.Remove(dockerTarReader.tmpEnv.tempFileName)
err = docker.ScanDockerImg(ctx, dockerTarReader.img, chunksChan, chunkSkel)
return err == nil
}
return handleChunks(aCtx, h.FromFile(ctx, file), chunkSkel, chunksChan)
}

if err != nil {
aCtx.Logger().Error(err, "error handling file")
}
Expand Down
Loading
Loading