diff --git a/pkg/fanal/analyzer/const.go b/pkg/fanal/analyzer/const.go index 197c0033296e..ea2108e89281 100644 --- a/pkg/fanal/analyzer/const.go +++ b/pkg/fanal/analyzer/const.go @@ -75,11 +75,10 @@ const ( TypeCondaEnv Type = "conda-environment" // Python - TypePythonPkg Type = "python-pkg" - TypePythonPkgEgg Type = "python-egg" - TypePip Type = "pip" - TypePipenv Type = "pipenv" - TypePoetry Type = "poetry" + TypePythonPkg Type = "python-pkg" + TypePip Type = "pip" + TypePipenv Type = "pipenv" + TypePoetry Type = "poetry" // Go TypeGoBinary Type = "gobinary" diff --git a/pkg/fanal/analyzer/language/python/packaging/egg.go b/pkg/fanal/analyzer/language/python/packaging/egg.go deleted file mode 100644 index 7dbe18b2c6db..000000000000 --- a/pkg/fanal/analyzer/language/python/packaging/egg.go +++ /dev/null @@ -1,186 +0,0 @@ -package packaging - -import ( - "archive/zip" - "bytes" - "context" - "io" - "io/fs" - "os" - "path" - "path/filepath" - "slices" - "strings" - - "github.com/samber/lo" - "golang.org/x/xerrors" - - "github.com/aquasecurity/trivy/pkg/dependency/parser/python/packaging" - "github.com/aquasecurity/trivy/pkg/fanal/analyzer" - "github.com/aquasecurity/trivy/pkg/fanal/analyzer/language" - "github.com/aquasecurity/trivy/pkg/fanal/types" - "github.com/aquasecurity/trivy/pkg/licensing" - "github.com/aquasecurity/trivy/pkg/log" - "github.com/aquasecurity/trivy/pkg/utils/fsutils" - xio "github.com/aquasecurity/trivy/pkg/x/io" -) - -func init() { - analyzer.RegisterPostAnalyzer(analyzer.TypePythonPkgEgg, newEggAnalyzer) -} - -const ( - eggAnalyzerVersion = 1 - eggExt = ".egg" -) - -func newEggAnalyzer(opts analyzer.AnalyzerOptions) (analyzer.PostAnalyzer, error) { - return &eggAnalyzer{ - logger: log.WithPrefix("python"), - pkgParser: packaging.NewParser(), - licenseClassifierConfidenceLevel: opts.LicenseScannerOption.ClassifierConfidenceLevel, - }, nil -} - -type eggAnalyzer struct { - logger *log.Logger - pkgParser language.Parser - licenseClassifierConfidenceLevel float64 -} - -// PostAnalyze analyzes egg archive files -func (a eggAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysisInput) (*analyzer.AnalysisResult, error) { - var apps []types.Application - - required := func(path string, _ fs.DirEntry) bool { - return a.Required(path, nil) || slices.Contains(input.FilePathsMatchedFromPatterns, path) - } - - err := fsutils.WalkDir(input.FS, ".", required, func(path string, d fs.DirEntry, r io.Reader) error { - rsa, ok := r.(xio.ReadSeekerAt) - if !ok { - return xerrors.New("invalid reader") - } - - // .egg file is zip format and PKG-INFO needs to be extracted from the zip file. - info, err := d.Info() - if err != nil { - return xerrors.Errorf("egg file error: %w", err) - } - pkginfoInZip, err := a.findFileInZip(rsa, info.Size(), isEggFile) - if err != nil { - return xerrors.Errorf("egg analysis error: %w", err) - } - - // Egg archive may not contain required files, then we will get nil. Skip this archives - if pkginfoInZip == nil { - return nil - } - - app, err := language.ParsePackage(types.PythonPkg, path, pkginfoInZip, a.pkgParser, input.Options.FileChecksum) - if err != nil { - return xerrors.Errorf("parse error: %w", err) - } else if app == nil { - return nil - } - - if err = a.fillLicensesFromFile(rsa, info.Size(), app); err != nil { - a.logger.Warn("Unable to fill licenses", log.FilePath(path), log.Err(err)) - } - - apps = append(apps, *app) - return nil - }) - - if err != nil { - return nil, xerrors.Errorf("python package walk error: %w", err) - } - return &analyzer.AnalysisResult{ - Applications: apps, - }, nil -} - -func (a eggAnalyzer) findFileInZip(r xio.ReadSeekerAt, size int64, required func(filePath string) bool) (xio.ReadSeekerAt, error) { - if _, err := r.Seek(0, io.SeekStart); err != nil { - return nil, xerrors.Errorf("file seek error: %w", err) - } - - zr, err := zip.NewReader(r, size) - if err != nil { - return nil, xerrors.Errorf("zip reader error: %w", err) - } - - found, ok := lo.Find(zr.File, func(f *zip.File) bool { - return required(f.Name) - }) - if !ok { - return nil, nil - } - return a.open(found) -} - -// open reads the file content in the zip archive to make it seekable. -func (a eggAnalyzer) open(file *zip.File) (xio.ReadSeekerAt, error) { - f, err := file.Open() - if err != nil { - return nil, err - } - defer f.Close() - - b, err := io.ReadAll(f) - if err != nil { - return nil, xerrors.Errorf("file %s open error: %w", file.Name, err) - } - - return bytes.NewReader(b), nil -} - -func (a eggAnalyzer) fillLicensesFromFile(r xio.ReadSeekerAt, size int64, app *types.Application) error { - for i, pkg := range app.Packages { - var licenses []string - for _, license := range pkg.Licenses { - if !strings.HasPrefix(license, "file://") { - licenses = append(licenses, license) - continue - } - - required := func(filePath string) bool { - return path.Base(filePath) == path.Base(strings.TrimPrefix(license, "file://")) - } - f, err := a.findFileInZip(r, size, required) - if err != nil { - a.logger.Debug("unable to find license file in `*.egg` file", log.Err(err)) - continue - } else if f == nil { // zip doesn't contain license file - continue - } - - l, err := licensing.Classify("", f, a.licenseClassifierConfidenceLevel) - if err != nil { - return xerrors.Errorf("license classify error: %w", err) - } else if l == nil { - continue - } - - // License found - foundLicenses := lo.Map(l.Findings, func(finding types.LicenseFinding, _ int) string { - return finding.Name - }) - licenses = append(licenses, foundLicenses...) - } - app.Packages[i].Licenses = licenses - } - return nil -} - -func (a eggAnalyzer) Required(filePath string, _ os.FileInfo) bool { - return filepath.Ext(filePath) == eggExt -} - -func (a eggAnalyzer) Type() analyzer.Type { - return analyzer.TypePythonPkgEgg -} - -func (a eggAnalyzer) Version() int { - return eggAnalyzerVersion -} diff --git a/pkg/fanal/analyzer/language/python/packaging/egg_test.go b/pkg/fanal/analyzer/language/python/packaging/egg_test.go deleted file mode 100644 index ddbdf7fdc130..000000000000 --- a/pkg/fanal/analyzer/language/python/packaging/egg_test.go +++ /dev/null @@ -1,145 +0,0 @@ -package packaging - -import ( - "context" - "os" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/aquasecurity/trivy/pkg/fanal/analyzer" - "github.com/aquasecurity/trivy/pkg/fanal/types" -) - -func Test_eggAnalyzer_Analyze(t *testing.T) { - tests := []struct { - name string - dir string - includeChecksum bool - want *analyzer.AnalysisResult - wantErr string - }{ - { - name: "egg zip", - dir: "testdata/egg-zip", - want: &analyzer.AnalysisResult{ - Applications: []types.Application{ - { - Type: types.PythonPkg, - FilePath: "kitchen-1.2.6-py2.7.egg", - Packages: types.Packages{ - { - Name: "kitchen", - Version: "1.2.6", - Licenses: []string{ - "GNU Library or Lesser General Public License (LGPL)", - }, - FilePath: "kitchen-1.2.6-py2.7.egg", - }, - }, - }, - }, - }, - }, - { - name: "egg zip with checksum", - dir: "testdata/egg-zip", - includeChecksum: true, - want: &analyzer.AnalysisResult{ - Applications: []types.Application{ - { - Type: types.PythonPkg, - FilePath: "kitchen-1.2.6-py2.7.egg", - Packages: types.Packages{ - { - Name: "kitchen", - Version: "1.2.6", - Licenses: []string{ - "GNU Library or Lesser General Public License (LGPL)", - }, - FilePath: "kitchen-1.2.6-py2.7.egg", - Digest: "sha1:4e13b6e379966771e896ee43cf8e240bf6083dca", - }, - }, - }, - }, - }, - }, - { - name: "egg zip with license file", - dir: "testdata/egg-zip-with-license-file", - want: &analyzer.AnalysisResult{ - Applications: []types.Application{ - { - Type: types.PythonPkg, - FilePath: "sample_package.egg", - Packages: types.Packages{ - { - Name: "sample_package", - Version: "0.1", - Licenses: []string{ - "MIT", - }, - FilePath: "sample_package.egg", - }, - }, - }, - }, - }, - }, - { - name: "egg zip doesn't contain required files", - dir: "testdata/no-req-files", - want: &analyzer.AnalysisResult{}, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - - a, err := newEggAnalyzer(analyzer.AnalyzerOptions{}) - require.NoError(t, err) - got, err := a.PostAnalyze(context.Background(), analyzer.PostAnalysisInput{ - FS: os.DirFS(tt.dir), - Options: analyzer.AnalysisOptions{ - FileChecksum: tt.includeChecksum, - }, - }) - - if tt.wantErr != "" { - require.Error(t, err) - assert.Contains(t, err.Error(), tt.wantErr) - return - } - require.NoError(t, err) - assert.Equal(t, tt.want, got) - }) - } - -} - -func Test_eggAnalyzer_Required(t *testing.T) { - tests := []struct { - name string - filePath string - want bool - }{ - { - name: "egg zip", - filePath: "python2.7/site-packages/cssutils-1.0-py2.7.egg", - want: true, - }, - { - name: "egg-info PKG-INFO", - filePath: "python3.8/site-packages/wrapt-1.12.1.egg-info/PKG-INFO", - want: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - a := eggAnalyzer{} - got := a.Required(tt.filePath, nil) - assert.Equal(t, tt.want, got) - }) - } -} diff --git a/pkg/fanal/analyzer/language/python/packaging/packaging.go b/pkg/fanal/analyzer/language/python/packaging/packaging.go index a030aff1a17f..05453cc636b5 100644 --- a/pkg/fanal/analyzer/language/python/packaging/packaging.go +++ b/pkg/fanal/analyzer/language/python/packaging/packaging.go @@ -1,6 +1,8 @@ package packaging import ( + "archive/zip" + "bytes" "context" "errors" "io" @@ -28,7 +30,7 @@ func init() { analyzer.RegisterPostAnalyzer(analyzer.TypePythonPkg, newPackagingAnalyzer) } -const version = 2 +const version = 1 func newPackagingAnalyzer(opt analyzer.AnalyzerOptions) (analyzer.PostAnalyzer, error) { return &packagingAnalyzer{ @@ -42,7 +44,7 @@ var ( eggFiles = []string{ // .egg format // https://setuptools.readthedocs.io/en/latest/deprecated/python_eggs.html#eggs-and-their-formats - // ".egg" is zip format. We check it in `eggAnalyzer`. + ".egg", // zip format "EGG-INFO/PKG-INFO", // .egg-info format: .egg-info can be a file or directory @@ -73,6 +75,24 @@ func (a packagingAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAna return xerrors.New("invalid reader") } + // .egg file is zip format and PKG-INFO needs to be extracted from the zip file. + if strings.HasSuffix(path, ".egg") { + info, err := d.Info() + if err != nil { + return xerrors.Errorf("egg file error: %w", err) + } + pkginfoInZip, err := a.analyzeEggZip(rsa, info.Size()) + if err != nil { + return xerrors.Errorf("egg analysis error: %w", err) + } + + // Egg archive may not contain required files, then we will get nil. Skip this archives + if pkginfoInZip == nil { + return nil + } + rsa = pkginfoInZip + } + app, err := a.parse(path, rsa, input.Options.FileChecksum) if err != nil { return xerrors.Errorf("parse error: %w", err) @@ -80,7 +100,7 @@ func (a packagingAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAna return nil } - if err = a.fillAdditionalData(input.FS, app); err != nil { + if err := a.fillAdditionalData(input.FS, app); err != nil { a.logger.Warn("Unable to collect additional info", log.Err(err)) } @@ -153,6 +173,37 @@ func (a packagingAnalyzer) parse(filePath string, r xio.ReadSeekerAt, checksum b return language.ParsePackage(types.PythonPkg, filePath, r, a.pkgParser, checksum) } +func (a packagingAnalyzer) analyzeEggZip(r io.ReaderAt, size int64) (xio.ReadSeekerAt, error) { + zr, err := zip.NewReader(r, size) + if err != nil { + return nil, xerrors.Errorf("zip reader error: %w", err) + } + + found, ok := lo.Find(zr.File, func(f *zip.File) bool { + return isEggFile(f.Name) + }) + if !ok { + return nil, nil + } + return a.open(found) +} + +// open reads the file content in the zip archive to make it seekable. +func (a packagingAnalyzer) open(file *zip.File) (xio.ReadSeekerAt, error) { + f, err := file.Open() + if err != nil { + return nil, err + } + defer f.Close() + + b, err := io.ReadAll(f) + if err != nil { + return nil, xerrors.Errorf("file %s open error: %w", file.Name, err) + } + + return bytes.NewReader(b), nil +} + func (a packagingAnalyzer) Required(filePath string, _ os.FileInfo) bool { return strings.Contains(filePath, ".dist-info") || isEggFile(filePath) } diff --git a/pkg/fanal/analyzer/language/python/packaging/packaging_test.go b/pkg/fanal/analyzer/language/python/packaging/packaging_test.go index eb1a62093cff..c3a89ad0cd19 100644 --- a/pkg/fanal/analyzer/language/python/packaging/packaging_test.go +++ b/pkg/fanal/analyzer/language/python/packaging/packaging_test.go @@ -20,6 +20,28 @@ func Test_packagingAnalyzer_Analyze(t *testing.T) { want *analyzer.AnalysisResult wantErr string }{ + { + name: "egg zip", + dir: "testdata/egg-zip", + want: &analyzer.AnalysisResult{ + Applications: []types.Application{ + { + Type: types.PythonPkg, + FilePath: "kitchen-1.2.6-py2.7.egg", + Packages: types.Packages{ + { + Name: "kitchen", + Version: "1.2.6", + Licenses: []string{ + "GNU Library or Lesser General Public License (LGPL)", + }, + FilePath: "kitchen-1.2.6-py2.7.egg", + }, + }, + }, + }, + }, + }, { name: "egg-info", dir: "testdata/happy-egg", @@ -102,6 +124,11 @@ func Test_packagingAnalyzer_Analyze(t *testing.T) { }, }, }, + { + name: "egg zip doesn't contain required files", + dir: "testdata/no-req-files", + want: &analyzer.AnalysisResult{}, + }, { name: "license file in dist.info", dir: "testdata/license-file-dist", diff --git a/pkg/fanal/analyzer/language/python/packaging/testdata/egg-zip-with-license-file/sample_package.egg b/pkg/fanal/analyzer/language/python/packaging/testdata/egg-zip-with-license-file/sample_package.egg deleted file mode 100644 index 91d67dc5947b..000000000000 Binary files a/pkg/fanal/analyzer/language/python/packaging/testdata/egg-zip-with-license-file/sample_package.egg and /dev/null differ