From 063e9da65dbf99135b520d207982570e9b49be11 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Thu, 27 Jul 2023 14:26:59 -0400 Subject: [PATCH] Guess unpinned versions in python requirements.txt (#1966) * feat: python requirements.txt parsing inclusive Signed-off-by: manifestori * refactor: parseVersion Signed-off-by: manifestori * add python config for optional requirements version constraint resolution Signed-off-by: Alex Goodman * fix tests Signed-off-by: Alex Goodman * allow for python requirements metadata to be optional Signed-off-by: Alex Goodman * restore cyclonedx dependency Signed-off-by: Alex Goodman --------- Signed-off-by: manifestori Signed-off-by: Alex Goodman Signed-off-by: Alex Goodman Co-authored-by: manifestori --- README.md | 7 + go.mod | 2 + go.sum | 8 + internal/config/application.go | 5 + internal/config/python.go | 13 + schema/json/schema-10.0.0.json | 5 +- syft/pkg/cataloger/cataloger.go | 16 +- syft/pkg/cataloger/config.go | 11 +- syft/pkg/cataloger/python/cataloger.go | 15 +- syft/pkg/cataloger/python/cataloger_test.go | 2 +- .../cataloger/python/parse_requirements.go | 259 +++++++++++------ .../python/parse_requirements_test.go | 268 +++++++++++++++--- .../test-fixtures/requires/requirements.txt | 4 +- syft/pkg/python_requirements_metadata.go | 6 +- 14 files changed, 474 insertions(+), 147 deletions(-) create mode 100644 internal/config/python.go diff --git a/README.md b/README.md index b35af21ecb8..064acf9422c 100644 --- a/README.md +++ b/README.md @@ -577,6 +577,13 @@ linux-kernel: # SYFT_LINUX_KERNEL_CATALOG_MODULES env var catalog-modules: true +python: + # when running across entries in requirements.txt that do not specify a specific version + # (e.g. "sqlalchemy >= 1.0.0, <= 2.0.0, != 3.0.0, <= 3.0.0"), attempt to guess what the version could + # be based on the version requirements specified (e.g. "1.0.0"). When enabled the lowest expressible version + # when given an arbitrary constraint will be used (even if that version may not be available/published). + guess-unpinned-requirements: false + # cataloging file contents is exposed through the power-user subcommand file-contents: cataloger: diff --git a/go.mod b/go.mod index e7783644d30..910960fc103 100644 --- a/go.mod +++ b/go.mod @@ -55,6 +55,7 @@ require ( github.com/anchore/clio v0.0.0-20230602170917-e747e60c4aa0 github.com/anchore/go-logger v0.0.0-20230531193951-db5ae83e7dbe github.com/anchore/stereoscope v0.0.0-20230724160817-d515761c6ca2 + github.com/aquasecurity/go-pep440-version v0.0.0-20210121094942-22b2f8951d46 github.com/charmbracelet/bubbletea v0.24.2 github.com/charmbracelet/lipgloss v0.7.1 github.com/dave/jennifer v1.6.1 @@ -90,6 +91,7 @@ require ( github.com/acomagu/bufpipe v1.0.4 // indirect github.com/anchore/fangs v0.0.0-20230531202914-48a718c6b4ba // indirect github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092 // indirect + github.com/aquasecurity/go-version v0.0.0-20210121072130-637058cfe492 // indirect github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect github.com/becheran/wildmatch-go v1.0.0 // indirect github.com/charmbracelet/bubbles v0.16.1 // indirect diff --git a/go.sum b/go.sum index 191f4825b74..b56945b1db6 100644 --- a/go.sum +++ b/go.sum @@ -114,6 +114,10 @@ github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= +github.com/aquasecurity/go-pep440-version v0.0.0-20210121094942-22b2f8951d46 h1:vmXNl+HDfqqXgr0uY1UgK1GAhps8nbAAtqHNBcgyf+4= +github.com/aquasecurity/go-pep440-version v0.0.0-20210121094942-22b2f8951d46/go.mod h1:olhPNdiiAAMiSujemd1O/sc6GcyePr23f/6uGKtthNg= +github.com/aquasecurity/go-version v0.0.0-20210121072130-637058cfe492 h1:rcEG5HI490FF0a7zuvxOxen52ddygCfNVjP0XOCMl+M= +github.com/aquasecurity/go-version v0.0.0-20210121072130-637058cfe492/go.mod h1:9Beu8XsUNNfzml7WBf3QmyPToP1wm1Gj/Vc5UJKqTzU= github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= github.com/armon/go-metrics v0.3.10/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc= @@ -171,6 +175,7 @@ github.com/containerd/stargz-snapshotter/estargz v0.14.3 h1:OqlDCK3ZVUO6C3B/5FSk github.com/containerd/stargz-snapshotter/estargz v0.14.3/go.mod h1:KY//uOCIkSuNAHhJogcZtrNHdKrA99/FCCRjE3HD36o= github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= @@ -585,6 +590,7 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/saferwall/pe v1.4.4 h1:Ml++7/2/Z1iKwV4zCsd1nIqTEAdUQKAetwbbcCarhOg= @@ -602,6 +608,7 @@ github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8= github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I= github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ= github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= +github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= @@ -675,6 +682,7 @@ github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oW github.com/ulikunitz/xz v0.5.10 h1:t92gobL9l3HE202wg3rlk19F6X+JOxl9BBrCCMYEYd8= github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8= +github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI= github.com/vbatts/go-mtree v0.5.3 h1:S/jYlfG8rZ+a0bhZd+RANXejy7M4Js8fq9U+XoWTd5w= github.com/vbatts/go-mtree v0.5.3/go.mod h1:eXsdoPMdL2jcJx6HweWi9lYQxBsTp4lNhqqAjgkZUg8= github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck= diff --git a/internal/config/application.go b/internal/config/application.go index e7c726134d8..ea417aa1779 100644 --- a/internal/config/application.go +++ b/internal/config/application.go @@ -20,6 +20,7 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger" golangCataloger "github.com/anchore/syft/syft/pkg/cataloger/golang" "github.com/anchore/syft/syft/pkg/cataloger/kernel" + pythonCataloger "github.com/anchore/syft/syft/pkg/cataloger/python" ) var ( @@ -52,6 +53,7 @@ type Application struct { Package pkg `yaml:"package" json:"package" mapstructure:"package"` Golang golang `yaml:"golang" json:"golang" mapstructure:"golang"` LinuxKernel linuxKernel `yaml:"linux-kernel" json:"linux-kernel" mapstructure:"linux-kernel"` + Python python `yaml:"python" json:"python" mapstructure:"python"` Attest attest `yaml:"attest" json:"attest" mapstructure:"attest"` FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"` FileClassification fileClassification `yaml:"file-classification" json:"file-classification" mapstructure:"file-classification"` @@ -85,6 +87,9 @@ func (cfg Application) ToCatalogerConfig() cataloger.Config { LinuxKernel: kernel.LinuxCatalogerConfig{ CatalogModules: cfg.LinuxKernel.CatalogModules, }, + Python: pythonCataloger.CatalogerConfig{ + GuessUnpinnedRequirements: cfg.Python.GuessUnpinnedRequirements, + }, } } diff --git a/internal/config/python.go b/internal/config/python.go new file mode 100644 index 00000000000..d86da39be6a --- /dev/null +++ b/internal/config/python.go @@ -0,0 +1,13 @@ +package config + +import ( + "github.com/spf13/viper" +) + +type python struct { + GuessUnpinnedRequirements bool `json:"guess-unpinned-requirements" yaml:"guess-unpinned-requirements" mapstructure:"guess-unpinned-requirements"` +} + +func (cfg python) loadDefaultValues(v *viper.Viper) { + v.SetDefault("python.guess-unpinned-requirements", false) +} diff --git a/schema/json/schema-10.0.0.json b/schema/json/schema-10.0.0.json index 55147ec7e9b..73c3ac722ec 100644 --- a/schema/json/schema-10.0.0.json +++ b/schema/json/schema-10.0.0.json @@ -1626,10 +1626,7 @@ "type": "object", "required": [ "name", - "extras", - "versionConstraint", - "url", - "markers" + "versionConstraint" ] }, "RDescriptionFileMetadata": { diff --git a/syft/pkg/cataloger/cataloger.go b/syft/pkg/cataloger/cataloger.go index d7ff4cbdd38..2d358002403 100644 --- a/syft/pkg/cataloger/cataloger.go +++ b/syft/pkg/cataloger/cataloger.go @@ -46,7 +46,7 @@ func ImageCatalogers(cfg Config) []pkg.Cataloger { binary.NewCataloger(), deb.NewDpkgdbCataloger(), dotnet.NewDotnetPortableExecutableCataloger(), - golang.NewGoModuleBinaryCataloger(cfg.Go()), + golang.NewGoModuleBinaryCataloger(cfg.Golang), java.NewJavaCataloger(cfg.Java()), java.NewNativeImageCataloger(), javascript.NewPackageCataloger(), @@ -74,8 +74,8 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger { dotnet.NewDotnetPortableExecutableCataloger(), elixir.NewMixLockCataloger(), erlang.NewRebarLockCataloger(), - golang.NewGoModFileCataloger(cfg.Go()), - golang.NewGoModuleBinaryCataloger(cfg.Go()), + golang.NewGoModFileCataloger(cfg.Golang), + golang.NewGoModuleBinaryCataloger(cfg.Golang), haskell.NewHackageCataloger(), java.NewJavaCataloger(cfg.Java()), java.NewJavaGradleLockfileCataloger(), @@ -85,7 +85,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger { nix.NewStoreCataloger(), php.NewComposerLockCataloger(), portage.NewPortageCataloger(), - python.NewPythonIndexCataloger(), + python.NewPythonIndexCataloger(cfg.Python), python.NewPythonPackageCataloger(), rpm.NewFileCataloger(), rpm.NewRpmDBCataloger(), @@ -110,8 +110,8 @@ func AllCatalogers(cfg Config) []pkg.Cataloger { dotnet.NewDotnetPortableExecutableCataloger(), elixir.NewMixLockCataloger(), erlang.NewRebarLockCataloger(), - golang.NewGoModFileCataloger(cfg.Go()), - golang.NewGoModuleBinaryCataloger(cfg.Go()), + golang.NewGoModFileCataloger(cfg.Golang), + golang.NewGoModuleBinaryCataloger(cfg.Golang), haskell.NewHackageCataloger(), java.NewJavaCataloger(cfg.Java()), java.NewJavaGradleLockfileCataloger(), @@ -119,12 +119,12 @@ func AllCatalogers(cfg Config) []pkg.Cataloger { java.NewNativeImageCataloger(), javascript.NewLockCataloger(), javascript.NewPackageCataloger(), - kernel.NewLinuxKernelCataloger(cfg.Kernel()), + kernel.NewLinuxKernelCataloger(cfg.LinuxKernel), nix.NewStoreCataloger(), php.NewComposerInstalledCataloger(), php.NewComposerLockCataloger(), portage.NewPortageCataloger(), - python.NewPythonIndexCataloger(), + python.NewPythonIndexCataloger(cfg.Python), python.NewPythonPackageCataloger(), r.NewPackageCataloger(), rpm.NewFileCataloger(), diff --git a/syft/pkg/cataloger/config.go b/syft/pkg/cataloger/config.go index 8a334a78ab7..29c1633c246 100644 --- a/syft/pkg/cataloger/config.go +++ b/syft/pkg/cataloger/config.go @@ -4,6 +4,7 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/golang" "github.com/anchore/syft/syft/pkg/cataloger/java" "github.com/anchore/syft/syft/pkg/cataloger/kernel" + "github.com/anchore/syft/syft/pkg/cataloger/python" ) // TODO: these field naming vs helper function naming schemes are inconsistent. @@ -12,6 +13,7 @@ type Config struct { Search SearchConfig Golang golang.GoCatalogerOpts LinuxKernel kernel.LinuxCatalogerConfig + Python python.CatalogerConfig Catalogers []string Parallelism int } @@ -21,6 +23,7 @@ func DefaultConfig() Config { Search: DefaultSearchConfig(), Parallelism: 1, LinuxKernel: kernel.DefaultLinuxCatalogerConfig(), + Python: python.DefaultCatalogerConfig(), } } @@ -30,11 +33,3 @@ func (c Config) Java() java.Config { SearchIndexedArchives: c.Search.IncludeIndexedArchives, } } - -func (c Config) Go() golang.GoCatalogerOpts { - return c.Golang -} - -func (c Config) Kernel() kernel.LinuxCatalogerConfig { - return c.LinuxKernel -} diff --git a/syft/pkg/cataloger/python/cataloger.go b/syft/pkg/cataloger/python/cataloger.go index 1401c3a2f79..88868255e81 100644 --- a/syft/pkg/cataloger/python/cataloger.go +++ b/syft/pkg/cataloger/python/cataloger.go @@ -6,10 +6,21 @@ import ( const eggInfoGlob = "**/*.egg-info" +type CatalogerConfig struct { + GuessUnpinnedRequirements bool +} + +func DefaultCatalogerConfig() CatalogerConfig { + return CatalogerConfig{ + GuessUnpinnedRequirements: false, + } +} + // NewPythonIndexCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files. -func NewPythonIndexCataloger() *generic.Cataloger { +func NewPythonIndexCataloger(cfg CatalogerConfig) *generic.Cataloger { + rqp := newRequirementsParser(cfg) return generic.NewCataloger("python-index-cataloger"). - WithParserByGlobs(parseRequirementsTxt, "**/*requirements*.txt"). + WithParserByGlobs(rqp.parseRequirementsTxt, "**/*requirements*.txt"). WithParserByGlobs(parsePoetryLock, "**/poetry.lock"). WithParserByGlobs(parsePipfileLock, "**/Pipfile.lock"). WithParserByGlobs(parseSetup, "**/setup.py") diff --git a/syft/pkg/cataloger/python/cataloger_test.go b/syft/pkg/cataloger/python/cataloger_test.go index da15f299313..b37f6724c4d 100644 --- a/syft/pkg/cataloger/python/cataloger_test.go +++ b/syft/pkg/cataloger/python/cataloger_test.go @@ -263,7 +263,7 @@ func Test_IndexCataloger_Globs(t *testing.T) { pkgtest.NewCatalogTester(). FromDirectory(t, test.fixture). ExpectsResolverContentQueries(test.expected). - TestCataloger(t, NewPythonIndexCataloger()) + TestCataloger(t, NewPythonIndexCataloger(DefaultCatalogerConfig())) }) } } diff --git a/syft/pkg/cataloger/python/parse_requirements.go b/syft/pkg/cataloger/python/parse_requirements.go index 205f8c61a92..ac310b10bb8 100644 --- a/syft/pkg/cataloger/python/parse_requirements.go +++ b/syft/pkg/cataloger/python/parse_requirements.go @@ -7,6 +7,10 @@ import ( "strings" "unicode" + pep440 "github.com/aquasecurity/go-pep440-version" + "github.com/mitchellh/mapstructure" + + "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" @@ -14,23 +18,99 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/generic" ) -var _ generic.Parser = parseRequirementsTxt +const ( + // given the example requirement: + // requests[security] == 2.8.* ; python_version < "2.7" and sys_platform == "linux" \ + // --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 \ + // --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65 # some comment + + // namePattern matches: requests[security] + namePattern = `(?P\w[\w\[\],\s-_]+)` + + // versionConstraintPattern matches: == 2.8.* + versionConstraintPattern = `(?P([^\S\r\n]*[~=>!<]+\s*[0-9a-zA-Z.*]+[^\S\r\n]*,?)+)?(@[^\S\r\n]*(?P[^;]*))?` + + // markersPattern matches: python_version < "2.7" and sys_platform == "linux" + markersPattern = `(;(?P.*))?` + + // hashesPattern matches: --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65 + hashesPattern = `(?P([^\S\r\n]*--hash=[a-zA-Z0-9:]+)+)?` -var ( - extrasRegex = regexp.MustCompile(`\[.*\]`) - urlRegex = regexp.MustCompile("@.*git.*") + // whiteSpaceNoNewlinePattern matches: (any whitespace character except for \r and \n) + whiteSpaceNoNewlinePattern = `[^\S\r\n]*` ) +var requirementPattern = regexp.MustCompile( + `^` + + whiteSpaceNoNewlinePattern + + namePattern + + whiteSpaceNoNewlinePattern + + versionConstraintPattern + + markersPattern + + hashesPattern, +) + +type unprocessedRequirement struct { + Name string `mapstructure:"name"` + VersionConstraint string `mapstructure:"versionConstraint"` + Markers string `mapstructure:"markers"` + URL string `mapstructure:"url"` + Hashes string `mapstructure:"hashes"` +} + +func newRequirement(raw string) *unprocessedRequirement { + var r unprocessedRequirement + + values := internal.MatchNamedCaptureGroups(requirementPattern, raw) + + if err := mapstructure.Decode(values, &r); err != nil { + return nil + } + + r.Name = strings.TrimSpace(r.Name) + r.VersionConstraint = strings.TrimSpace(r.VersionConstraint) + r.Markers = strings.TrimSpace(r.Markers) + r.URL = strings.TrimSpace(r.URL) + r.Hashes = strings.TrimSpace(r.Hashes) + + if r.Name == "" { + return nil + } + + return &r +} + +type requirementsParser struct { + guessUnpinnedRequirements bool +} + +func newRequirementsParser(cfg CatalogerConfig) requirementsParser { + return requirementsParser{ + guessUnpinnedRequirements: cfg.GuessUnpinnedRequirements, + } +} + // parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a // specific version. -func parseRequirementsTxt(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { +func (rp requirementsParser) parseRequirementsTxt(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { var packages []pkg.Package scanner := bufio.NewScanner(reader) + var lastLine string for scanner.Scan() { - line := scanner.Text() - rawLineNoComments := removeTrailingComment(line) - line = trimRequirementsTxtLine(line) + line := trimRequirementsTxtLine(scanner.Text()) + + if lastLine != "" { + line = lastLine + line + lastLine = "" + } + + // remove line continuations... smashes the file into a single line + if strings.HasSuffix(line, "\\") { + // this line is a continuation of the previous line + lastLine += strings.TrimSuffix(line, "\\") + continue + } if line == "" { // nothing to parse on this line @@ -42,35 +122,20 @@ func parseRequirementsTxt(_ file.Resolver, _ *generic.Environment, reader file.L continue } - if !strings.Contains(line, "==") { - // a package without a version, or a range (unpinned) which does not tell us - // exactly what will be installed. - continue - } - - // parse a new requirement - parts := strings.Split(line, "==") - if len(parts) < 2 { - // this should never happen, but just in case + req := newRequirement(line) + if req == nil { log.WithFields("path", reader.RealPath).Warnf("unable to parse requirements.txt line: %q", line) continue } - // check if the version contains hash declarations on the same line - version, _ := parseVersionAndHashes(parts[1]) - - name := strings.TrimSpace(parts[0]) - version = strings.TrimFunc(version, func(r rune) bool { - return !unicode.IsLetter(r) && !unicode.IsNumber(r) - }) + name := removeExtras(req.Name) + version := parseVersion(req.VersionConstraint, rp.guessUnpinnedRequirements) - // TODO: Update to support more than only == - versionConstraint := fmt.Sprintf("== %s", version) - - if name == "" || version == "" { - log.WithFields("path", reader.RealPath).Debugf("found empty package in requirements.txt line: %q", line) + if version == "" { + log.WithFields("path", reader.RealPath).Tracef("unable to determine package version in requirements.txt line: %q", line) continue } + packages = append( packages, newPackageForRequirementsWithMetadata( @@ -78,10 +143,10 @@ func parseRequirementsTxt(_ file.Resolver, _ *generic.Environment, reader file.L version, pkg.PythonRequirementsMetadata{ Name: name, - Extras: parseExtras(rawLineNoComments), - VersionConstraint: versionConstraint, - URL: parseURL(rawLineNoComments), - Markers: parseMarkers(rawLineNoComments), + Extras: parseExtras(req.Name), + VersionConstraint: req.VersionConstraint, + URL: parseURL(req.URL), + Markers: req.Markers, }, reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), ), @@ -95,13 +160,68 @@ func parseRequirementsTxt(_ file.Resolver, _ *generic.Environment, reader file.L return packages, nil, nil } -func parseVersionAndHashes(version string) (string, []string) { - parts := strings.Split(version, "--hash=") - if len(parts) < 2 { - return version, nil +func parseVersion(version string, guessFromConstraint bool) string { + if isPinnedConstraint(version) { + return strings.TrimSpace(strings.ReplaceAll(version, "==", "")) + } + + if guessFromConstraint { + return guessVersion(version) } - return parts[0], parts[1:] + return "" +} + +func isPinnedConstraint(version string) bool { + return strings.Contains(version, "==") && !strings.ContainsAny(version, "*,<>!") +} + +func guessVersion(constraint string) string { + // handle "2.8.*" -> "2.8.0" + constraint = strings.ReplaceAll(constraint, "*", "0") + if isPinnedConstraint(constraint) { + return strings.TrimSpace(strings.ReplaceAll(constraint, "==", "")) + } + + constraints := strings.Split(constraint, ",") + filteredVersions := map[string]struct{}{} + for _, part := range constraints { + if strings.Contains(part, "!=") { + parts := strings.Split(part, "!=") + filteredVersions[strings.TrimSpace(parts[1])] = struct{}{} + } + } + + var closestVersion *pep440.Version + for _, part := range constraints { + // ignore any parts that do not have '=' in them, >,<,~ are not valid semver + parts := strings.SplitAfter(part, "=") + if len(parts) < 2 { + continue + } + version, err := pep440.Parse(strings.TrimSpace(parts[1])) + if err != nil { + // ignore any parts that are not valid semver + continue + } + if _, ok := filteredVersions[version.String()]; ok { + continue + } + + if strings.Contains(part, "==") { + parts := strings.Split(part, "==") + return strings.TrimSpace(parts[1]) + } + + if closestVersion == nil || version.GreaterThan(*closestVersion) { + closestVersion = &version + } + } + if closestVersion == nil { + return "" + } + + return closestVersion.String() } // trimRequirementsTxtLine removes content from the given requirements.txt line @@ -109,8 +229,6 @@ func parseVersionAndHashes(version string) (string, []string) { func trimRequirementsTxtLine(line string) string { line = strings.TrimSpace(line) line = removeTrailingComment(line) - line = removeEnvironmentMarkers(line) - line = checkForRegex(line) // remove extras and url from line if found return line } @@ -127,44 +245,29 @@ func removeTrailingComment(line string) string { return parts[0] } -// removeEnvironmentMarkers removes any instances of environment markers (delimited by ';') from the line. -// For more information, see https://www.python.org/dev/peps/pep-0508/#environment-markers. -func removeEnvironmentMarkers(line string) string { - parts := strings.SplitN(line, ";", 2) - if len(parts) < 2 { - // there aren't any environment markers - - return line +func removeExtras(packageName string) string { + start := strings.Index(packageName, "[") + if start == -1 { + return packageName } - return parts[0] + return strings.TrimSpace(packageName[:start]) } func parseExtras(packageName string) []string { - if extrasRegex.MatchString(packageName) { - // Remove square brackets - extras := strings.TrimFunc(extrasRegex.FindString(packageName), func(r rune) bool { - return !unicode.IsLetter(r) && !unicode.IsNumber(r) - }) + var extras []string - // Remove any additional whitespace - extras = strings.ReplaceAll(extras, " ", "") - - return strings.Split(extras, ",") + start := strings.Index(packageName, "[") + stop := strings.Index(packageName, "]") + if start == -1 || stop == -1 { + return extras } - return []string{} -} - -func parseMarkers(line string) string { - var markers string - parts := strings.SplitN(line, ";", 2) - - if len(parts) == 2 { - markers = strings.TrimSpace(parts[1]) + extraString := packageName[start+1 : stop] + for _, extra := range strings.Split(extraString, ",") { + extras = append(extras, strings.TrimSpace(extra)) } - - return markers + return extras } func parseURL(line string) string { @@ -191,19 +294,3 @@ func parseURL(line string) string { return "" } - -// function to check a string for all possilbe regex expressions, replacing it if found -func checkForRegex(stringToCheck string) string { - stringToReturn := stringToCheck - - for _, r := range []*regexp.Regexp{ - urlRegex, - extrasRegex, - } { - if r.MatchString(stringToCheck) { - stringToReturn = r.ReplaceAllString(stringToCheck, "") - } - } - - return stringToReturn -} diff --git a/syft/pkg/cataloger/python/parse_requirements_test.go b/syft/pkg/cataloger/python/parse_requirements_test.go index 14313a5993e..355b764e1b0 100644 --- a/syft/pkg/cataloger/python/parse_requirements_test.go +++ b/syft/pkg/cataloger/python/parse_requirements_test.go @@ -3,6 +3,8 @@ package python import ( "testing" + "github.com/stretchr/testify/assert" + "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" @@ -12,7 +14,8 @@ import ( func TestParseRequirementsTxt(t *testing.T) { fixture := "test-fixtures/requires/requirements.txt" locations := file.NewLocationSet(file.NewLocation(fixture)) - expectedPkgs := []pkg.Package{ + + pinnedPkgs := []pkg.Package{ { Name: "flask", Version: "4.0.0", @@ -23,9 +26,7 @@ func TestParseRequirementsTxt(t *testing.T) { MetadataType: pkg.PythonRequirementsMetadataType, Metadata: pkg.PythonRequirementsMetadata{ Name: "flask", - Extras: []string{}, VersionConstraint: "== 4.0.0", - URL: "", }, }, { @@ -38,9 +39,7 @@ func TestParseRequirementsTxt(t *testing.T) { MetadataType: pkg.PythonRequirementsMetadataType, Metadata: pkg.PythonRequirementsMetadata{ Name: "foo", - Extras: []string{}, VersionConstraint: "== 1.0.0", - URL: "", }, }, { @@ -53,9 +52,7 @@ func TestParseRequirementsTxt(t *testing.T) { MetadataType: pkg.PythonRequirementsMetadataType, Metadata: pkg.PythonRequirementsMetadata{ Name: "SomeProject", - Extras: []string{}, - VersionConstraint: "== 5.4", - URL: "", + VersionConstraint: "==5.4", Markers: "python_version < '3.8'", }, }, @@ -69,9 +66,7 @@ func TestParseRequirementsTxt(t *testing.T) { MetadataType: pkg.PythonRequirementsMetadataType, Metadata: pkg.PythonRequirementsMetadata{ Name: "argh", - Extras: []string{}, - VersionConstraint: "== 0.26.2", - URL: "", + VersionConstraint: "==0.26.2", }, }, { @@ -84,9 +79,7 @@ func TestParseRequirementsTxt(t *testing.T) { MetadataType: pkg.PythonRequirementsMetadataType, Metadata: pkg.PythonRequirementsMetadata{ Name: "argh", - Extras: []string{}, - VersionConstraint: "== 0.26.3", - URL: "", + VersionConstraint: "==0.26.3", }, }, { @@ -101,23 +94,6 @@ func TestParseRequirementsTxt(t *testing.T) { Name: "celery", Extras: []string{"redis", "pytest"}, VersionConstraint: "== 4.4.7", - URL: "", - }, - }, - { - Name: "requests", - Version: "2.8", - PURL: "pkg:pypi/requests@2.8", - Locations: locations, - Language: pkg.Python, - Type: pkg.PythonPkg, - MetadataType: pkg.PythonRequirementsMetadataType, - Metadata: pkg.PythonRequirementsMetadata{ - Name: "requests", - Extras: []string{"security"}, - VersionConstraint: "== 2.8", - URL: "", - Markers: `python_version < "2.7" and sys_platform == "linux"`, }, }, { @@ -130,14 +106,238 @@ func TestParseRequirementsTxt(t *testing.T) { MetadataType: pkg.PythonRequirementsMetadataType, Metadata: pkg.PythonRequirementsMetadata{ Name: "GithubSampleProject", - Extras: []string{}, VersionConstraint: "== 3.7.1", URL: "git+https://github.com/owner/repo@releases/tag/v3.7.1", }, }, } - var expectedRelationships []artifact.Relationship + var testCases = []struct { + name string + fixture string + cfg CatalogerConfig + expectedPkgs []pkg.Package + expectedRelationships []artifact.Relationship + }{ + { + name: "pinned dependencies only", + fixture: fixture, + cfg: CatalogerConfig{ + GuessUnpinnedRequirements: false, + }, + expectedPkgs: pinnedPkgs, + }, + { + name: "guess unpinned requirements (lowest version)", + fixture: fixture, + cfg: CatalogerConfig{ + GuessUnpinnedRequirements: true, + }, + expectedPkgs: append([]pkg.Package{ + { + Name: "Mopidy-Dirble", + Version: "1.1", + PURL: "pkg:pypi/Mopidy-Dirble@1.1", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, + MetadataType: pkg.PythonRequirementsMetadataType, + Metadata: pkg.PythonRequirementsMetadata{ + Name: "Mopidy-Dirble", + VersionConstraint: "~= 1.1", + }, + }, + { + Name: "sqlalchemy", + Version: "2.0.0", + PURL: "pkg:pypi/sqlalchemy@2.0.0", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, + MetadataType: pkg.PythonRequirementsMetadataType, + Metadata: pkg.PythonRequirementsMetadata{ + Name: "sqlalchemy", + VersionConstraint: ">= 1.0.0, <= 2.0.0, != 3.0.0, <= 3.0.0", + }, + }, + { + Name: "bar", + Version: "2.0.0", + PURL: "pkg:pypi/bar@2.0.0", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, + MetadataType: pkg.PythonRequirementsMetadataType, + Metadata: pkg.PythonRequirementsMetadata{ + Name: "bar", + VersionConstraint: ">= 1.0.0, <= 2.0.0, != 3.0.0, <= 3.0.0", + }, + }, + { + Name: "numpy", + Version: "3.4.1", + PURL: "pkg:pypi/numpy@3.4.1", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, + MetadataType: pkg.PythonRequirementsMetadataType, + Metadata: pkg.PythonRequirementsMetadata{ + Name: "numpy", + VersionConstraint: ">= 3.4.1", + Markers: `sys_platform == 'win32'`, + }, + }, + { + Name: "requests", + Version: "2.8.0", + PURL: "pkg:pypi/requests@2.8.0", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, + MetadataType: pkg.PythonRequirementsMetadataType, + Metadata: pkg.PythonRequirementsMetadata{ + Name: "requests", + Extras: []string{"security"}, + VersionConstraint: "== 2.8.*", + Markers: `python_version < "2.7" and sys_platform == "linux"`, + }, + }, + }, pinnedPkgs...), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + parser := newRequirementsParser(tc.cfg) + pkgtest.TestFileParser(t, tc.fixture, parser.parseRequirementsTxt, tc.expectedPkgs, tc.expectedRelationships) + }) + } +} + +func Test_newRequirement(t *testing.T) { + + tests := []struct { + name string + raw string + want *unprocessedRequirement + }{ + { + name: "simple", + raw: "requests==2.8", + want: &unprocessedRequirement{ + Name: "requests", + VersionConstraint: "==2.8", + }, + }, + { + name: "comment + constraint", + raw: "Mopidy-Dirble ~= 1.1 # Compatible release. Same as >= 1.1, == 1.*", + want: &unprocessedRequirement{ + Name: "Mopidy-Dirble", + VersionConstraint: "~= 1.1", + }, + }, + { + name: "hashes", + raw: "argh==0.26.3 --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65", + want: &unprocessedRequirement{ + Name: "argh", + VersionConstraint: "==0.26.3", + Hashes: "--hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65", + }, + }, + { + name: "extras", + raw: "celery[redis, pytest] == 4.4.7 # should remove [redis, pytest]", + want: &unprocessedRequirement{ + Name: "celery[redis, pytest]", + VersionConstraint: "== 4.4.7", + }, + }, + { + name: "url", + raw: "GithubSampleProject == 3.7.1 @ git+https://github.com/owner/repo@releases/tag/v3.7.1", + want: &unprocessedRequirement{ + Name: "GithubSampleProject", + VersionConstraint: "== 3.7.1", + URL: "git+https://github.com/owner/repo@releases/tag/v3.7.1", + }, + }, + { + name: "markers", + raw: "numpy >= 3.4.1 ; sys_platform == 'win32'", + want: &unprocessedRequirement{ + Name: "numpy", + VersionConstraint: ">= 3.4.1", + Markers: "sys_platform == 'win32'", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, newRequirement(tt.raw)) + }) + } +} - pkgtest.TestFileParser(t, fixture, parseRequirementsTxt, expectedPkgs, expectedRelationships) +// checkout https://www.darius.page/pipdev/ for help here! (github.com/nok/pipdev) +func Test_parseVersion(t *testing.T) { + tests := []struct { + name string + version string + guess bool + want string + }{ + { + name: "exact", + version: "1.0.0", + want: "", // we can only parse constraints, not assume that a single version is a pin + }, + { + name: "exact constraint", + version: " == 1.0.0 ", + want: "1.0.0", + }, + { + name: "resolve lowest, simple constraint", + version: " >= 1.0.0 ", + guess: true, + want: "1.0.0", + }, + { + name: "resolve lowest, compound constraint", + version: " < 2.0.0, >= 1.0.0, != 1.1.0 ", + guess: true, + want: "1.0.0", + }, + { + name: "resolve lowest, handle asterisk", + version: "==2.8.*", + guess: true, + want: "2.8.0", + }, + { + name: "resolve lowest, handle exceptions", + version: " !=4.0.2,!=4.1.0,!=4.2.0,>=4.0.1,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0", + guess: true, + want: "4.0.1", + }, + { + name: "resolve lowest, compatible version constraint", + version: "~=0.6.10", // equates to >=0.6.10, ==0.6.* + guess: true, + want: "0.6.10", + }, + { + name: "resolve lowest, with character in version", + version: "~=1.2b,<=1.3a,!=1.1,!=1.2", + guess: true, + want: "1.3a0", // note: 1.3a == 1.3a0 + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, parseVersion(tt.version, tt.guess)) + }) + } } diff --git a/syft/pkg/cataloger/python/test-fixtures/requires/requirements.txt b/syft/pkg/cataloger/python/test-fixtures/requires/requirements.txt index df96fe539d8..188258bb7cc 100644 --- a/syft/pkg/cataloger/python/test-fixtures/requires/requirements.txt +++ b/syft/pkg/cataloger/python/test-fixtures/requires/requirements.txt @@ -1,7 +1,9 @@ flask == 4.0.0 # a line that is ignored -sqlalchemy >= 1.0.0 +sqlalchemy >= 1.0.0, <= 2.0.0, != 3.0.0, <= 3.0.0 foo == 1.0.0 # a comment that needs to be ignored +bar >= 1.0.0, <= 2.0.0, \ + != 3.0.0, <= 3.0.0 -e https://github.com/pecan/pecan.git -r other-requirements.txt --requirements super-secretrequirements.txt diff --git a/syft/pkg/python_requirements_metadata.go b/syft/pkg/python_requirements_metadata.go index 7189e957fc2..161669a446d 100644 --- a/syft/pkg/python_requirements_metadata.go +++ b/syft/pkg/python_requirements_metadata.go @@ -2,8 +2,8 @@ package pkg type PythonRequirementsMetadata struct { Name string `json:"name" mapstruct:"Name"` - Extras []string `json:"extras" mapstruct:"Extras"` + Extras []string `json:"extras,omitempty" mapstruct:"Extras"` VersionConstraint string `json:"versionConstraint" mapstruct:"VersionConstraint"` - URL string `json:"url" mapstruct:"URL"` - Markers string `json:"markers" mapstruct:"Markers"` + URL string `json:"url,omitempty" mapstruct:"URL"` + Markers string `json:"markers,omitempty" mapstruct:"Markers"` }