diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index b5334544e..79dc1e542 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -59,43 +59,6 @@ jobs: docker run --rm -v $(pwd)/sbom.json:/sbom.json --entrypoint "sh" cgr.dev/chainguard/wolfi-base -c "apk add spdx-tools-java && tools-java Verify /sbom.json" done - - name: Verify SBOM External Refs (git-checkout) - if: matrix.example == 'git-checkout.yaml' - run: | - set -euxo pipefail - tar -Oxf packages/x86_64/git-checkout*.apk var/lib/db/sbom > git-checkout.sbom.json - - # Verify APK ref - grep '"pkg:apk/unknown/git-checkout@v0.0.1-r0?arch=x86_64"' git-checkout.sbom.json - - # Verify github tag ref - grep '"pkg:github/puerco/hello.git@v0.0.1"' sbom.json git-checkout.sbom.json - - # Verify github sha ref - grep '"pkg:github/puerco/hello.git@a73c4feb284dc6ed1e5758740f717f99dcd4c9d7"' git-checkout.sbom.json - - # Verify generic git ref - grep '"pkg:generic/hello@v0.0.1?vcs_url=git%2Bhttps%3A%2F%2Fgitlab.com%2Fxnox%2Fhello.git%40a73c4feb284dc6ed1e5758740f717f99dcd4c9d7"' git-checkout.sbom.json - - # Verify ConfigFile ref - grep '"pkg:github/chainguard-dev/melange@${{github.sha}}#examples/git-checkout.yaml"' git-checkout.sbom.json - - - name: Verify SBOM External Refs (gnu-hello) - if: matrix.example == 'gnu-hello.yaml' - run: | - set -euxo pipefail - tar -Oxf packages/x86_64/hello-2*.apk var/lib/db/sbom > hello.sbom.json - - # Verify generic fetch ref - grep '"pkg:generic/hello@2.12?checksum=sha256%3Acf04af86dc085268c5f4470fbae49b18afbc221b78096aab842d934a76bad0ab\\u0026download_url=https%3A%2F%2Fftp.gnu.org%2Fgnu%2Fhello%2Fhello-2.12.tar.gz"' hello.sbom.json - - - name: Check packages can be installed with apk - run: | - set -euxo pipefail - for f in packages/x86_64/*.apk; do - docker run --rm -v $(pwd):/work cgr.dev/chainguard/wolfi-base apk add --allow-untrusted /work/$f - done - bootstrap: name: bootstrap package runs-on: ubuntu-latest diff --git a/Makefile b/Makefile index 7da0a340a..1a01ea5b9 100644 --- a/Makefile +++ b/Makefile @@ -145,12 +145,19 @@ log-%: lint: checkfmt setup-golangci-lint ## Run linters and checks like golangci-lint $(GOLANGCI_LINT_BIN) run --verbose --concurrency 4 --skip-dirs .modcache ./... -.PHONY: test -test: +.PHONY: unit +unit: go test ./... -race +.PHONY: integration +integration: + go test ./... -race -tags=integration + +.PHONY: test +test: integration + .PHONY: test-e2e -test-e2e: test generate +test-e2e: generate # This is invoked by a separate GHA workflow, so not combining it with the other test targets. go test -tags e2e ./... -race cd e2e-tests && ./run-tests diff --git a/docs/md/melange_build.md b/docs/md/melange_build.md index 6366437b5..0940bfc1a 100644 --- a/docs/md/melange_build.md +++ b/docs/md/melange_build.md @@ -34,6 +34,7 @@ melange build [flags] --build-option strings build options to enable --cache-dir string directory used for cached inputs (default "./melange-cache/") --cache-source string directory or bucket used for preloading the cache + --cleanup when enabled, the temp dir used for the guest will be cleaned up after completion (default true) --cpu string default CPU resources to use for builds --create-build-log creates a package.log file containing a list of packages that were built by the command --debug enables debug logging of build pipelines @@ -43,11 +44,14 @@ melange build [flags] --empty-workspace whether the build workspace should be empty --env-file string file to use for preloaded environment variables --generate-index whether to generate APKINDEX.tar.gz (default true) + --git-commit string commit hash of the git repository containing the build config file (defaults to detecting HEAD) + --git-repo-url string URL of the git repository containing the build config file (defaults to detecting from configured git remotes) --guest-dir string directory used for the build environment guest -h, --help help for build --ignore-signatures ignore repository signature verification -i, --interactive when enabled, attaches stdin with a tty to the pod on failure -k, --keyring-append strings path to extra keys to include in the build environment keyring + --license string license to use for the build config file itself (default "NOASSERTION") --lint-require strings linters that must pass (default [dev,infodir,tempdir,varempty]) --lint-warn strings linters that will generate warnings (default [object,opt,python/docs,python/multiple,python/test,setuidgid,srv,strip,usrlocal,worldwrite]) --memory string default memory resources to use for builds diff --git a/go.mod b/go.mod index 4d6c9ad19..ccc15d9aa 100644 --- a/go.mod +++ b/go.mod @@ -29,6 +29,7 @@ require ( github.com/package-url/packageurl-go v0.1.3 github.com/pkg/errors v0.9.1 github.com/psanford/memfs v0.0.0-20230130182539-4dbf7e3e865e + github.com/spdx/tools-golang v0.5.5 github.com/spf13/cobra v1.8.1 github.com/stretchr/testify v1.9.0 github.com/yookoala/realpath v1.0.0 diff --git a/go.sum b/go.sum index 10529a8e5..bda0288f3 100644 --- a/go.sum +++ b/go.sum @@ -59,6 +59,7 @@ github.com/ProtonMail/go-crypto v1.0.0 h1:LRuvITjQWX+WIfr930YHG2HNfjR1uOfyf5vE0k github.com/ProtonMail/go-crypto v1.0.0/go.mod h1:EjAoLdwvbIOoOQr3ihjnSoLZRtE8azugULFRteWMNc0= github.com/adrg/xdg v0.5.0 h1:dDaZvhMXatArP1NPHhnfaQUqWBLBsmx1h1HXQdMoFCY= github.com/adrg/xdg v0.5.0/go.mod h1:dDdY4M4DF9Rjy4kHPeNL+ilVF+p2lK8IdM9/rTSGcI4= +github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092/go.mod h1:rYqSE9HbjzpHTI74vwPvae4ZVYZd1lue2ta6xHPdblA= github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNgfBlViaCIJKLlCJ6/fmUseuG0wVQ= github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= @@ -441,6 +442,9 @@ github.com/smallstep/assert v0.0.0-20200723003110-82e2b9b3b262 h1:unQFBIznI+VYD1 github.com/smallstep/assert v0.0.0-20200723003110-82e2b9b3b262/go.mod h1:MyOHs9Po2fbM1LHej6sBUT8ozbxmMOFG+E+rx/GSGuc= github.com/sosodev/duration v1.3.1 h1:qtHBDMQ6lvMQsL15g4aopM4HEfOaYuhWBw3NPTtlqq4= github.com/sosodev/duration v1.3.1/go.mod h1:RQIBBX0+fMLc/D9+Jb/fwvVmo0eZvDDEERAikUR6SDg= +github.com/spdx/gordf v0.0.0-20201111095634-7098f93598fb/go.mod h1:uKWaldnbMnjsSAXRurWqqrdyZen1R7kxl8TkmWk2OyM= +github.com/spdx/tools-golang v0.5.5 h1:61c0KLfAcNqAjlg6UNMdkwpMernhw3zVRwDZ2x9XOmk= +github.com/spdx/tools-golang v0.5.5/go.mod h1:MVIsXx8ZZzaRWNQpUDhC4Dud34edUYJYecciXgrw5vE= github.com/spf13/afero v1.2.0/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY= @@ -452,6 +456,7 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= @@ -460,6 +465,7 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/theupdateframework/go-tuf v0.7.0 h1:CqbQFrWo1ae3/I0UCblSbczevCCbS31Qvs5LdxRWqRI= @@ -744,3 +750,4 @@ sigs.k8s.io/release-utils v0.8.5 h1:FUtFqEAN621gSXv0L7kHyWruBeS7TUU9aWf76olX7uQ= sigs.k8s.io/release-utils v0.8.5/go.mod h1:qsm5bdxdgoHkD8HsXpgme2/c3mdsNaiV53Sz2HmKeJA= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/pkg/build/build.go b/pkg/build/build.go index 559317b16..852f82c52 100644 --- a/pkg/build/build.go +++ b/pkg/build/build.go @@ -18,6 +18,7 @@ import ( "archive/tar" "compress/gzip" "context" + "encoding/json" "errors" "fmt" "io" @@ -35,10 +36,9 @@ import ( apko_build "chainguard.dev/apko/pkg/build" apko_types "chainguard.dev/apko/pkg/build/types" "chainguard.dev/apko/pkg/options" + "chainguard.dev/apko/pkg/sbom/generator/spdx" "cloud.google.com/go/storage" "github.com/chainguard-dev/clog" - "github.com/go-git/go-git/v5" - "github.com/go-git/go-git/v5/storage/filesystem" purl "github.com/package-url/packageurl-go" "github.com/yookoala/realpath" "github.com/zealic/xignore" @@ -55,11 +55,27 @@ import ( "chainguard.dev/melange/pkg/sbom" ) +const melangeOutputDirName = "melange-out" + var ErrSkipThisArch = errors.New("error: skip this arch") type Build struct { - Configuration config.Configuration - ConfigFile string + Configuration config.Configuration + + // The name of the build configuration file, e.g. "crane.yaml". + ConfigFile string + + // The URL of the git repository where the build configuration file is stored, + // e.g. "https://github.com/wolfi-dev/os". + ConfigFileRepositoryURL string + + // The commit hash of the git repository corresponding to the current state of + // the build configuration file. + ConfigFileRepositoryCommit string + + // The SPDX license string to use for the build configuration file. + ConfigFileLicense string + SourceDateEpoch time.Time WorkspaceDir string WorkspaceIgnore string @@ -103,8 +119,10 @@ type Build struct { EnabledBuildOptions []string - // mutated by Compile - externalRefs []purl.PackageURL + // Initialized in New and mutated throughout the build process as we gain + // visibility into our packages' (including subpackages') composition. This is + // how we get "build-time" SBOMs! + SBOMGroup *SBOMGroup } func New(ctx context.Context, opts ...Option) (*Build, error) { @@ -164,6 +182,15 @@ func New(ctx context.Context, opts ...Option) (*Build, error) { if b.ConfigFile == "" { return nil, fmt.Errorf("melange.yaml is missing") } + if b.ConfigFileRepositoryURL == "" { + return nil, fmt.Errorf("config file repository URL was not set") + } + if b.ConfigFileRepositoryCommit == "" { + return nil, fmt.Errorf("config file repository commit was not set") + } + if b.Runner == nil { + return nil, fmt.Errorf("no runner was specified") + } parsedCfg, err := config.ParseConfiguration(ctx, b.ConfigFile, @@ -173,6 +200,7 @@ func New(ctx context.Context, opts ...Option) (*Build, error) { config.WithDefaultDisk(b.DefaultDisk), config.WithDefaultMemory(b.DefaultMemory), config.WithDefaultTimeout(b.DefaultTimeout), + config.WithCommit(b.ConfigFileRepositoryCommit), ) if err != nil { return nil, fmt.Errorf("failed to load configuration: %w", err) @@ -180,6 +208,10 @@ func New(ctx context.Context, opts ...Option) (*Build, error) { b.Configuration = *parsedCfg + // Now that we can find out the names of all the packages we'll be producing, we + // can start tracking SBOM data for each of them, using our SBOMGroup type. + b.SBOMGroup = NewSBOMGroup(slices.Collect(b.Configuration.AllPackageNames())...) + if len(b.Configuration.Package.TargetArchitecture) == 1 && b.Configuration.Package.TargetArchitecture[0] == "all" { log.Warnf("target-architecture: ['all'] is deprecated and will become an error; remove this field to build for all available archs") @@ -196,6 +228,7 @@ func New(ctx context.Context, opts ...Option) (*Build, error) { } b.SourceDateEpoch = t } + b.SBOMGroup.SetCreatedTime(b.SourceDateEpoch) // Check that we actually can run things in containers. if b.Runner != nil && !b.Runner.TestUsability(ctx) { @@ -494,69 +527,25 @@ func (b *Build) IsBuildLess() bool { return len(b.Configuration.Pipeline) == 0 } -// ConfigFileExternalRef calculates ExternalRef for the melange config -// file itself. -func (b *Build) ConfigFileExternalRef() (*purl.PackageURL, error) { - // TODO(luhring): This is now the second implementation of finding the commit - // for the config file (the first being the "detectedCommit" logic. We should - // unify these. - - // configFile must exist - configpath, err := filepath.Abs(b.ConfigFile) - if err != nil { - return nil, err - } - // If not a git repository, skip - opt := &git.PlainOpenOptions{DetectDotGit: true} - r, err := git.PlainOpenWithOptions(configpath, opt) - if err != nil { - return nil, nil +// getBuildConfigPURL determines the package URL for the melange config file +// itself. +func (b Build) getBuildConfigPURL() (*purl.PackageURL, error) { + namespace, name, found := strings.Cut(strings.TrimPrefix(b.ConfigFileRepositoryURL, "https://github.com/"), "/") + if !found { + return nil, fmt.Errorf("extracting namespace and name from %s", b.ConfigFileRepositoryURL) } - // TODO(luhring): This is brittle and assumes a specific git remote configuration. - // We should consider a more general approach, and this may be moot when we - // unify our git state detection mechanisms. - - // If no remote origin, skip (local git repo) - remote, err := r.Remote("origin") - if err != nil { - return nil, nil - } - repository := remote.Config().URLs[0] - // Only supports github-actions style https github checkouts - if !strings.HasPrefix(repository, "https://github.com/") { - return nil, nil - } - namespace, name, _ := strings.Cut(strings.TrimPrefix(repository, "https://github.com/"), "/") - - // Head must exist - ref, err := r.Head() - if err != nil { - return nil, err - } - version := ref.Hash() - - // Try to get configfile as subpath in the repository - s, ok := r.Storer.(*filesystem.Storage) - if !ok { - return nil, errors.New("Repository storage is not filesystem.Storage") - } - base := filepath.Dir(s.Filesystem().Root()) - subpath, err := filepath.Rel(base, configpath) - if err != nil { - return nil, err - } - newpurl := &purl.PackageURL{ - Type: "github", + u := &purl.PackageURL{ + Type: purl.TypeGithub, Namespace: namespace, Name: name, - Version: version.String(), - Subpath: subpath, + Version: b.ConfigFileRepositoryCommit, + Subpath: b.ConfigFile, } - if err := newpurl.Normalize(); err != nil { - return nil, err + if err := u.Normalize(); err != nil { + return nil, fmt.Errorf("normalizing PURL: %w", err) } - return newpurl, nil + return u, nil } func (b *Build) PopulateCache(ctx context.Context) error { @@ -688,6 +677,11 @@ func (b *Build) BuildPackage(ctx context.Context) error { b.Summarize(ctx) + namespace := b.Namespace + if namespace == "" { + namespace = "unknown" + } + if to := b.Configuration.Package.Timeout; to > 0 { tctx, cancel := context.WithTimeoutCause(ctx, to, fmt.Errorf("build exceeded its timeout of %s", to)) @@ -696,6 +690,38 @@ func (b *Build) BuildPackage(ctx context.Context) error { } pkg := &b.Configuration.Package + arch := b.Arch.ToAPK() + + // Add the APK package(s) to their respective SBOMs. We do this early in the + // build process so that we can later add more kinds of packages that relate to + // these packages, as we learn more during the build. + for _, sp := range b.Configuration.Subpackages { + sp := sp + spSBOM := b.SBOMGroup.Document(sp.Name) + + apkSubPkg := &sbom.Package{ + Name: sp.Name, + Version: pkg.FullVersion(), + Copyright: pkg.FullCopyright(), + LicenseDeclared: pkg.LicenseExpression(), + Namespace: namespace, + Arch: arch, + PURL: pkg.PackageURLForSubpackage(namespace, arch, sp.Name), + } + spSBOM.AddPackageAndSetDescribed(apkSubPkg) + } + + pSBOM := b.SBOMGroup.Document(pkg.Name) + apkPkg := &sbom.Package{ + Name: pkg.Name, + Version: pkg.FullVersion(), + Copyright: pkg.FullCopyright(), + LicenseDeclared: pkg.LicenseExpression(), + Namespace: namespace, + Arch: arch, + PURL: pkg.PackageURL(namespace, arch), + } + pSBOM.AddPackageAndSetDescribed(apkPkg) if b.GuestDir == "" { guestDir, err := os.MkdirTemp(b.Runner.TempDir(), "melange-guest-*") @@ -728,16 +754,8 @@ func (b *Build) BuildPackage(ctx context.Context) error { return !result }) - configFileRef, err := b.ConfigFileExternalRef() - if err != nil { - return fmt.Errorf("failed to create ExternalRef for configfile: %w", err) - } - - if configFileRef != nil { - // In SPDX v3 there is dedicate field for this - // https://spdx.github.io/spdx-spec/v3.0/model/Build/Properties/configSourceUri/ - log.Infof("adding external ref %s for ConfigFile", configFileRef) - b.externalRefs = append(b.externalRefs, *configFileRef) + if err := b.addSBOMPackageForBuildConfigFile(); err != nil { + return fmt.Errorf("adding SBOM package for build config file: %w", err) } pr := &pipelineRunner{ @@ -761,7 +779,7 @@ func (b *Build) BuildPackage(ctx context.Context) error { } } - if err := os.MkdirAll(filepath.Join(b.WorkspaceDir, "melange-out", b.Configuration.Package.Name), 0o755); err != nil { + if err := os.MkdirAll(filepath.Join(b.WorkspaceDir, melangeOutputDirName, b.Configuration.Package.Name), 0o755); err != nil { return err } @@ -808,10 +826,25 @@ func (b *Build) BuildPackage(ctx context.Context) error { // run the main pipeline log.Debug("running the main pipeline") - if err := pr.runPipelines(ctx, b.Configuration.Pipeline); err != nil { + pipelines := b.Configuration.Pipeline + if err := pr.runPipelines(ctx, pipelines); err != nil { return fmt.Errorf("unable to run package %s pipeline: %w", b.Configuration.Name(), err) } + for _, p := range pipelines { + pkg, err := p.SBOMPackageForUpstreamSource(b.Configuration.Package.LicenseExpression(), namespace) + if err != nil { + return fmt.Errorf("creating SBOM package for upstream source: %w", err) + } + + if pkg == nil { + // This particular pipeline step doesn't tell us about the upstream source code. + continue + } + + b.SBOMGroup.AddUpstreamSourcePackage(pkg) + } + // add the main package to the linter queue lintTarget := linterTarget{ pkgName: b.Configuration.Package.Name, @@ -820,11 +853,6 @@ func (b *Build) BuildPackage(ctx context.Context) error { linterQueue = append(linterQueue, lintTarget) } - namespace := b.Namespace - if namespace == "" { - namespace = "unknown" - } - // run any pipelines for subpackages for _, sp := range b.Configuration.Subpackages { sp := sp @@ -838,7 +866,7 @@ func (b *Build) BuildPackage(ctx context.Context) error { } } - if err := os.MkdirAll(filepath.Join(b.WorkspaceDir, "melange-out", sp.Name), 0o755); err != nil { + if err := os.MkdirAll(filepath.Join(b.WorkspaceDir, melangeOutputDirName, sp.Name), 0o755); err != nil { return err } @@ -852,8 +880,8 @@ func (b *Build) BuildPackage(ctx context.Context) error { // Retrieve the post build workspace from the runner log.Infof("retrieving workspace from builder: %s", cfg.PodID) - fs := apkofs.DirFS(b.WorkspaceDir) - if err := b.RetrieveWorkspace(ctx, fs); err != nil { + fsys := apkofs.DirFS(b.WorkspaceDir) + if err := b.RetrieveWorkspace(ctx, fsys); err != nil { return fmt.Errorf("retrieving workspace: %w", err) } log.Infof("retrieved and wrote post-build workspace to: %s", b.WorkspaceDir) @@ -861,7 +889,7 @@ func (b *Build) BuildPackage(ctx context.Context) error { // perform package linting for _, lt := range linterQueue { log.Infof("running package linters for %s", lt.pkgName) - path := filepath.Join(b.WorkspaceDir, "melange-out", lt.pkgName) + path := filepath.Join(b.WorkspaceDir, melangeOutputDirName, lt.pkgName) // Downgrade disabled checks from required to warn require := slices.DeleteFunc(b.LintRequire, func(s string) bool { @@ -876,48 +904,29 @@ func (b *Build) BuildPackage(ctx context.Context) error { } } - licensinginfos, err := b.Configuration.Package.LicensingInfos(b.WorkspaceDir) + li, err := b.Configuration.Package.LicensingInfos(b.WorkspaceDir) if err != nil { - return err + return fmt.Errorf("gathering licensing infos: %w", err) } + b.SBOMGroup.SetLicensingInfos(li) - // generate SBOMs for subpackages - for _, sp := range b.Configuration.Subpackages { - sp := sp + // Convert the SBOMs we've been working on to their SPDX representation, and + // write them to disk. We'll handle any subpackages first, and then the main + // package, but the order doesn't really matter. - log.Infof("generating SBOM for subpackage %s", sp.Name) - - apkFSPath := filepath.Join(b.WorkspaceDir, "melange-out", sp.Name) - if err := sbom.GenerateAndWrite(ctx, apkFSPath, &sbom.Spec{ - PackageName: sp.Name, - PackageVersion: fmt.Sprintf("%s-r%d", b.Configuration.Package.Version, b.Configuration.Package.Epoch), - License: b.Configuration.Package.LicenseExpression(), - LicensingInfos: licensinginfos, - ExternalRefs: b.externalRefs, - Copyright: b.Configuration.Package.FullCopyright(), - Namespace: namespace, - Arch: b.Arch.ToAPK(), - SourceDateEpoch: b.SourceDateEpoch, - }); err != nil { - return fmt.Errorf("writing SBOMs: %w", err) + for _, sp := range b.Configuration.Subpackages { + spSBOM := b.SBOMGroup.Document(sp.Name) + spdxDoc := spSBOM.ToSPDX(ctx) + log.Infof("writing SBOM for subpackage %s", sp.Name) + if err := b.writeSBOM(sp.Name, &spdxDoc); err != nil { + return fmt.Errorf("writing SBOM for %s: %w", sp.Name, err) } } - log.Infof("generating SBOM for %s", b.Configuration.Package.Name) - - apkFSPath := filepath.Join(b.WorkspaceDir, "melange-out", b.Configuration.Package.Name) - if err := sbom.GenerateAndWrite(ctx, apkFSPath, &sbom.Spec{ - PackageName: b.Configuration.Package.Name, - PackageVersion: fmt.Sprintf("%s-r%d", b.Configuration.Package.Version, b.Configuration.Package.Epoch), - License: b.Configuration.Package.LicenseExpression(), - LicensingInfos: licensinginfos, - ExternalRefs: b.externalRefs, - Copyright: b.Configuration.Package.FullCopyright(), - Namespace: namespace, - Arch: b.Arch.ToAPK(), - SourceDateEpoch: b.SourceDateEpoch, - }); err != nil { - return fmt.Errorf("writing SBOMs: %w", err) + spdxDoc := pSBOM.ToSPDX(ctx) + log.Infof("writing SBOM for %s", pkg.Name) + if err := b.writeSBOM(pkg.Name, &spdxDoc); err != nil { + return fmt.Errorf("writing SBOM for %s: %w", pkg.Name, err) } // emit main package @@ -985,6 +994,59 @@ func (b *Build) BuildPackage(ctx context.Context) error { return nil } +// writeSBOM encodes the given SPDX document to JSON and writes it to the +// filesystem in the directory `/var/lib/db/sbom`. The pkgName parameter should +// be set to the name of the origin package or subpackage. +func (b Build) writeSBOM(pkgName string, doc *spdx.Document) error { + apkFSPath := filepath.Join(b.WorkspaceDir, melangeOutputDirName, pkgName) + sbomDirPath := filepath.Join(apkFSPath, "/var/lib/db/sbom") + if err := os.MkdirAll(sbomDirPath, os.FileMode(0755)); err != nil { + return fmt.Errorf("creating SBOM directory: %w", err) + } + + pkgVersion := b.Configuration.Package.FullVersion() + sbomPath := getPathForPackageSBOM(sbomDirPath, pkgName, pkgVersion) + f, err := os.Create(sbomPath) + if err != nil { + return fmt.Errorf("opening SBOM file for writing: %w", err) + } + + enc := json.NewEncoder(f) + enc.SetIndent("", " ") + enc.SetEscapeHTML(true) + + if err := enc.Encode(doc); err != nil { + return fmt.Errorf("encoding SPDX SBOM: %w", err) + } + + return nil +} + +func (b *Build) addSBOMPackageForBuildConfigFile() error { + buildConfigPURL, err := b.getBuildConfigPURL() + if err != nil { + return fmt.Errorf("getting PURL for build config: %w", err) + } + + b.SBOMGroup.AddBuildConfigurationPackage(&sbom.Package{ + Name: b.ConfigFile, + Version: b.ConfigFileRepositoryCommit, + LicenseDeclared: b.ConfigFileLicense, + Namespace: b.Namespace, + Arch: "", // This field doesn't make sense in this context + PURL: buildConfigPURL, + }) + + return nil +} + +func getPathForPackageSBOM(sbomDirPath, pkgName, pkgVersion string) string { + return filepath.Join( + sbomDirPath, + fmt.Sprintf("%s-%s.spdx.json", pkgName, pkgVersion), + ) +} + func (b *Build) SummarizePaths(ctx context.Context) { log := clog.FromContext(ctx) log.Infof(" workspace dir: %s", b.WorkspaceDir) diff --git a/pkg/build/build_integration_test.go b/pkg/build/build_integration_test.go new file mode 100644 index 000000000..c93bf1c69 --- /dev/null +++ b/pkg/build/build_integration_test.go @@ -0,0 +1,140 @@ +//go:build integration +// +build integration + +package build + +import ( + "archive/tar" + "compress/gzip" + "context" + "fmt" + "os" + "path/filepath" + "testing" + + "io" + + "chainguard.dev/melange/pkg/container" + "chainguard.dev/melange/pkg/container/docker" + "github.com/google/go-cmp/cmp" +) + +func TestBuild_BuildPackage(t *testing.T) { + tests := []struct { + name string + expectedVersion string + }{ + { + name: "crane", + expectedVersion: "0.20.2-r1", + }, + } + + const arch = "x86_64" + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + p := filepath.Join("testdata", "build_configs", tt.name) + ".yaml" + + t.Run("builds successfully", func(t *testing.T) { + ctx := context.Background() + + // NOTE: Ideally we have one runner that works everywhere to make it easier to + // work on these tests. But until then, we'll try to use the most appropriate + // runner for the environment. + r := getRunner(ctx, t) + + b, err := New( + ctx, + WithConfig(p), + WithOutDir(tempDir), + WithArch(arch), + WithConfigFileRepositoryURL("https://github.com/wolfi-dev/os"), + WithConfigFileRepositoryCommit("c0ffee"), + WithRunner(r), + WithNamespace("wolfi"), + WithExtraRepos([]string{"https://packages.wolfi.dev/os"}), + WithExtraKeys([]string{"https://packages.wolfi.dev/os/wolfi-signing.rsa.pub"}), + ) + if err != nil { + t.Fatalf("setting up build: %v", err) + } + + if err := b.BuildPackage(ctx); err != nil { + t.Fatalf("building package: %v", err) + } + + t.Run("sbom correctness", func(t *testing.T) { + apkPath := filepath.Join(tempDir, arch, fmt.Sprintf("%s-%s.apk", tt.name, tt.expectedVersion)) + apkFile, err := os.Open(apkPath) + if err != nil { + t.Fatalf("opening apk: %v", err) + } + defer apkFile.Close() + + gr, err := gzip.NewReader(apkFile) + if err != nil { + t.Fatalf("creating gzip reader: %v", err) + } + defer gr.Close() + + tr := tar.NewReader(gr) + var sbom io.Reader + sbomPath := fmt.Sprintf("var/lib/db/sbom/%s-%s.spdx.json", tt.name, tt.expectedVersion) + for { + hdr, err := tr.Next() + if err != nil { + t.Fatalf("reading tar header: %v", err) + } + if hdr.Name == sbomPath { + sbom = tr + break + } + } + if sbom == nil { + t.Fatalf("SBOM not found in apk: %s", sbomPath) + } + + expectedSBOMPath := filepath.Join("testdata", "goldenfiles", "sboms", fmt.Sprintf("%s-%s.spdx.json", tt.name, tt.expectedVersion)) + expectedSbomFile, err := os.Open(expectedSBOMPath) + if err != nil { + t.Fatalf("opening expected SBOM: %v", err) + } + + expected, err := io.ReadAll(expectedSbomFile) + if err != nil { + t.Fatalf("reading expected SBOM: %v", err) + } + actual, err := io.ReadAll(sbom) + if err != nil { + t.Fatalf("reading actual SBOM: %v", err) + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Fatalf("SBOMs differ: \n%s\n", diff) + } + }) + }) + }) + } +} + +func getRunner(ctx context.Context, t *testing.T) container.Runner { + t.Helper() + + if r := container.BubblewrapRunner(true); r.TestUsability(ctx) { + return r + } + + r, err := docker.NewRunner(ctx) + if err != nil { + t.Fatalf("creating docker runner: %v", err) + } + if r.TestUsability(ctx) { + return r + } + + t.Fatal("no usable runner found") + return nil +} diff --git a/pkg/build/compile.go b/pkg/build/compile.go index 517b4c5d8..9a2c6b83f 100644 --- a/pkg/build/compile.go +++ b/pkg/build/compile.go @@ -25,10 +25,11 @@ import ( "chainguard.dev/melange/pkg/config" "chainguard.dev/melange/pkg/util" "github.com/chainguard-dev/clog" - purl "github.com/package-url/packageurl-go" "gopkg.in/yaml.v3" ) +const unidentifiablePipeline = "???" + func (t *Test) Compile(ctx context.Context) error { cfg := t.Configuration @@ -180,16 +181,12 @@ func (b *Build) Compile(ctx context.Context) error { te.Packages = append(te.Packages, b.Configuration.Package.Name) } - b.externalRefs = c.ExternalRefs - return nil } type Compiled struct { PipelineDirs []string - Needs []string - ExternalRefs []purl.PackageURL } func (c *Compiled) CompilePipelines(ctx context.Context, sm *SubstitutionMap, pipelines []config.Pipeline) error { @@ -289,14 +286,6 @@ func (c *Compiled) compilePipeline(ctx context.Context, sm *SubstitutionMap, pip } } - // Compute external refs for this pipeline. - externalRefs, err := computeExternalRefs(uses, mutated) - if err != nil { - return fmt.Errorf("computing external refs: %w", err) - } - - c.ExternalRefs = append(c.ExternalRefs, externalRefs...) - for i := range pipeline.Pipeline { p := &pipeline.Pipeline[i] @@ -339,7 +328,8 @@ func identity(p *config.Pipeline) string { if p.Uses != "" { return p.Uses } - return "???" + + return unidentifiablePipeline } func (c *Compiled) gatherDeps(ctx context.Context, pipeline *config.Pipeline) error { diff --git a/pkg/build/options.go b/pkg/build/options.go index 7dea92f34..3902538a8 100644 --- a/pkg/build/options.go +++ b/pkg/build/options.go @@ -34,6 +34,27 @@ func WithConfig(configFile string) Option { } } +func WithConfigFileRepositoryURL(u string) Option { + return func(b *Build) error { + b.ConfigFileRepositoryURL = u + return nil + } +} + +func WithConfigFileRepositoryCommit(hash string) Option { + return func(b *Build) error { + b.ConfigFileRepositoryCommit = hash + return nil + } +} + +func WithConfigFileLicense(license string) Option { + return func(b *Build) error { + b.ConfigFileLicense = license + return nil + } +} + // WithLintRequire sets required linter checks. func WithLintRequire(linters []string) Option { return func(b *Build) error { diff --git a/pkg/build/package.go b/pkg/build/package.go index 1f793555c..2bb45ee61 100644 --- a/pkg/build/package.go +++ b/pkg/build/package.go @@ -141,7 +141,7 @@ func (pc *PackageBuild) Filename() string { } func (pc *PackageBuild) WorkspaceSubdir() string { - return filepath.Join(pc.Build.WorkspaceDir, "melange-out", pc.PackageName) + return filepath.Join(pc.Build.WorkspaceDir, melangeOutputDirName, pc.PackageName) } var controlTemplate = `# Generated by melange diff --git a/pkg/build/pipeline.go b/pkg/build/pipeline.go index 687501e21..b1b140c62 100644 --- a/pkg/build/pipeline.go +++ b/pkg/build/pipeline.go @@ -26,14 +26,12 @@ import ( "strconv" "strings" - "github.com/chainguard-dev/clog" - purl "github.com/package-url/packageurl-go" - apko_types "chainguard.dev/apko/pkg/build/types" "chainguard.dev/melange/pkg/cond" "chainguard.dev/melange/pkg/config" "chainguard.dev/melange/pkg/container" "chainguard.dev/melange/pkg/util" + "github.com/chainguard-dev/clog" ) func (sm *SubstitutionMap) MutateWith(with map[string]string) (map[string]string, error) { @@ -202,7 +200,7 @@ func (r *pipelineRunner) runPipeline(ctx context.Context, pipeline *config.Pipel defer stop() } - if id := identity(pipeline); id != "???" { + if id := identity(pipeline); id != unidentifiablePipeline { log.Infof("running step %q", id) } @@ -315,81 +313,5 @@ func shouldRun(ifs string) (bool, error) { return result, nil } -// computeExternalRefs generates PURLs for subpipelines -func computeExternalRefs(uses string, with map[string]string) ([]purl.PackageURL, error) { - var purls []purl.PackageURL - var newpurl purl.PackageURL - - switch uses { - case "fetch": - args := make(map[string]string) - args["download_url"] = with["${{inputs.uri}}"] - if len(with["${{inputs.expected-sha256}}"]) > 0 { - args["checksum"] = "sha256:" + with["${{inputs.expected-sha256}}"] - } - if len(with["${{inputs.expected-sha512}}"]) > 0 { - args["checksum"] = "sha512:" + with["${{inputs.expected-sha512}}"] - } - newpurl = purl.PackageURL{ - Type: "generic", - Name: with["${{inputs.purl-name}}"], - Version: with["${{inputs.purl-version}}"], - Qualifiers: purl.QualifiersFromMap(args), - } - if err := newpurl.Normalize(); err != nil { - return nil, err - } - purls = append(purls, newpurl) - - case "git-checkout": - repository := with["${{inputs.repository}}"] - if strings.HasPrefix(repository, "https://github.com/") { - namespace, name, _ := strings.Cut(strings.TrimPrefix(repository, "https://github.com/"), "/") - versions := []string{ - with["${{inputs.tag}}"], - with["${{inputs.expected-commit}}"], - } - for _, version := range versions { - if version != "" { - newpurl = purl.PackageURL{ - Type: "github", - Namespace: namespace, - Name: name, - Version: version, - } - if err := newpurl.Normalize(); err != nil { - return nil, err - } - purls = append(purls, newpurl) - } - } - } else { - // Create nice looking package name, last component of uri, without .git - name := strings.TrimSuffix(filepath.Base(repository), ".git") - // Encode vcs_url with git+ prefix and @commit suffix - vcsUrl := "git+" + repository - if len(with["${{inputs.expected-commit}}"]) > 0 { - vcsUrl = vcsUrl + "@" + with["${{inputs.expected-commit}}"] - } - // Use tag as version - version := "" - if len(with["${{inputs.tag}}"]) > 0 { - version = with["${{inputs.tag}}"] - } - newpurl = purl.PackageURL{ - Type: "generic", - Name: name, - Version: version, - Qualifiers: purl.QualifiersFromMap(map[string]string{"vcs_url": vcsUrl}), - } - if err := newpurl.Normalize(); err != nil { - return nil, err - } - purls = append(purls, newpurl) - } - } - return purls, nil -} - //go:embed pipelines/* var f embed.FS diff --git a/pkg/build/sbom_group.go b/pkg/build/sbom_group.go new file mode 100644 index 000000000..f79df8d81 --- /dev/null +++ b/pkg/build/sbom_group.go @@ -0,0 +1,67 @@ +package build + +import ( + "time" + + "chainguard.dev/melange/pkg/sbom" + "github.com/spdx/tools-golang/spdx/v2/common" +) + +// An SBOMGroup stores SBOMs corresponding to each package (or subpackage) +// within a build group. Its purpose is to let the build process easily manage +// SBOMs for the 1-N number of packages it ends up emitting. +type SBOMGroup struct { + set map[string]*sbom.Document +} + +// NewSBOMGroup creates a new SBOMGroup, initializing SBOMs for each package and +// subpackage name provided. +func NewSBOMGroup(pkgNames ...string) *SBOMGroup { + sg := &SBOMGroup{ + set: make(map[string]*sbom.Document), + } + + for _, n := range pkgNames { + doc := sbom.NewDocument() + sg.set[n] = doc + } + + return sg +} + +// SetCreatedTime sets the creation time for all SBOMs in the group. +func (sg *SBOMGroup) SetCreatedTime(t time.Time) { + for _, doc := range sg.set { + doc.CreatedTime = t + } +} + +// SetLicensingInfos sets the licensing information for all SBOMs in the group. +func (sg *SBOMGroup) SetLicensingInfos(li map[string]string) { + for _, doc := range sg.set { + doc.LicensingInfos = li + } +} + +// Document retrieves the SBOM for the given package or subpackage name. +func (sg *SBOMGroup) Document(name string) *sbom.Document { + return sg.set[name] +} + +// AddBuildConfigurationPackage adds a package serving as the "build +// configuration package" to all SBOMs in the group. +func (sg *SBOMGroup) AddBuildConfigurationPackage(p *sbom.Package) { + for _, doc := range sg.set { + doc.AddPackage(p) + doc.AddRelationship(doc.Describes, p, common.TypeRelationshipDescribeBy) + } +} + +// AddUpstreamSourcePackage adds a package serving as an "upstream source +// package" to all SBOMs in the group. +func (sg *SBOMGroup) AddUpstreamSourcePackage(p *sbom.Package) { + for _, doc := range sg.set { + doc.AddPackage(p) + doc.AddRelationship(doc.Describes, p, common.TypeRelationshipGeneratedFrom) + } +} diff --git a/pkg/build/sca_interface.go b/pkg/build/sca_interface.go index 969bd3c5e..66fb6c359 100644 --- a/pkg/build/sca_interface.go +++ b/pkg/build/sca_interface.go @@ -54,7 +54,7 @@ func (scabi *SCABuildInterface) Version() string { // FilesystemForRelative implements an abstract filesystem for any of the packages being // built. func (scabi *SCABuildInterface) FilesystemForRelative(pkgName string) (sca.SCAFS, error) { - pkgDir := filepath.Join(scabi.PackageBuild.Build.WorkspaceDir, "melange-out", pkgName) + pkgDir := filepath.Join(scabi.PackageBuild.Build.WorkspaceDir, melangeOutputDirName, pkgName) rlFS := readlinkFS(pkgDir) scaFS, ok := rlFS.(sca.SCAFS) if !ok { diff --git a/pkg/build/testdata/build_configs/crane.yaml b/pkg/build/testdata/build_configs/crane.yaml new file mode 100644 index 000000000..3eb59fcf1 --- /dev/null +++ b/pkg/build/testdata/build_configs/crane.yaml @@ -0,0 +1,64 @@ +package: + name: crane + version: 0.20.2 + epoch: 1 + description: Tool for interacting with remote images and registries. + copyright: + - license: Apache-2.0 + dependencies: + runtime: + - ca-certificates-bundle + +environment: + contents: + packages: + - busybox + - ca-certificates-bundle + - go + environment: + CGO_ENABLED: "0" + +pipeline: + - uses: git-checkout + with: + repository: https://github.com/google/go-containerregistry + tag: v${{package.version}} + expected-commit: c195f151efe3369874c72662cd69ad43ee485128 + + - uses: go/build + with: + packages: ./cmd/crane + ldflags: -s -w -buildid= -X github.com/google/go-containerregistry/cmd/crane/cmd.Version=${{package.version}} -X github.com/google/go-containerregistry/pkg/v1/remote/transport.Version=${{package.version}} + output: crane + + - uses: strip + +update: + enabled: true + github: + identifier: google/go-containerregistry + strip-prefix: v + +test: + environment: + contents: + packages: + - jq + pipeline: + - name: Verify Crane installation + runs: | + crane version || exit 1 + crane --help + - name: Fetch and verify manifest + runs: | + crane manifest chainguard/static | jq '.schemaVersion' | grep '2' || exit 1 + - name: List tags for a public image + runs: | + crane ls chainguard/static | grep -E 'latest|v[0-9]+.[0-9]+.[0-9]+' || exit 1 + - name: Validate image existence + runs: | + crane digest chainguard/static:latest && echo "Image exists" || exit 1 + - name: Pull and save an image locally + runs: | + crane pull chainguard/static:latest static_latest.tar || exit 1 + [ -f static_latest.tar ] || exit 1 diff --git a/pkg/build/testdata/goldenfiles/sboms/crane-0.20.2-r1.spdx.json b/pkg/build/testdata/goldenfiles/sboms/crane-0.20.2-r1.spdx.json new file mode 100644 index 000000000..cd412d88c --- /dev/null +++ b/pkg/build/testdata/goldenfiles/sboms/crane-0.20.2-r1.spdx.json @@ -0,0 +1,87 @@ +{ + "SPDXID": "SPDXRef-DOCUMENT", + "name": "apk-crane-0.20.2-r1", + "spdxVersion": "SPDX-2.3", + "creationInfo": { + "created": "0001-01-01T00:00:00Z", + "creators": [ + "Tool: melange (devel)", + "Organization: Chainguard, Inc" + ], + "licenseListVersion": "3.22" + }, + "dataLicense": "CC0-1.0", + "documentNamespace": "https://spdx.org/spdxdocs/chainguard/melange/f5eb3a5b5887866fa76fe4eb2b7b5165f07c9505", + "documentDescribes": [ + "SPDXRef-Package-crane-0.20.2-r1" + ], + "packages": [ + { + "SPDXID": "SPDXRef-Package-crane-0.20.2-r1", + "name": "crane", + "versionInfo": "0.20.2-r1", + "filesAnalyzed": false, + "licenseConcluded": "NOASSERTION", + "licenseDeclared": "Apache-2.0", + "downloadLocation": "NOASSERTION", + "originator": "Organization: Wolfi", + "supplier": "Organization: Wolfi", + "copyrightText": "\n", + "externalRefs": [ + { + "referenceCategory": "PACKAGE-MANAGER", + "referenceLocator": "pkg:apk/wolfi/crane@0.20.2-r1?arch=x86_64", + "referenceType": "purl" + } + ] + }, + { + "SPDXID": "SPDXRef-Package-testdata-buildC95configs-crane.yaml-c0ffee", + "name": "testdata/build_configs/crane.yaml", + "versionInfo": "c0ffee", + "filesAnalyzed": false, + "licenseConcluded": "NOASSERTION", + "licenseDeclared": "NOASSERTION", + "downloadLocation": "NOASSERTION", + "originator": "Organization: Wolfi", + "supplier": "Organization: Wolfi", + "externalRefs": [ + { + "referenceCategory": "PACKAGE-MANAGER", + "referenceLocator": "pkg:github/wolfi-dev/os@c0ffee#testdata/build_configs/crane.yaml", + "referenceType": "purl" + } + ] + }, + { + "SPDXID": "SPDXRef-Package-github.com-google-go-containerregistry-v0.20.2-c195f151efe3369874c72662cd69ad43ee485128", + "name": "go-containerregistry", + "versionInfo": "v0.20.2", + "filesAnalyzed": false, + "licenseConcluded": "NOASSERTION", + "licenseDeclared": "Apache-2.0", + "downloadLocation": "NOASSERTION", + "originator": "Organization: Google", + "supplier": "Organization: Google", + "externalRefs": [ + { + "referenceCategory": "PACKAGE-MANAGER", + "referenceLocator": "pkg:github/google/go-containerregistry@v0.20.2", + "referenceType": "purl" + } + ] + } + ], + "relationships": [ + { + "spdxElementId": "SPDXRef-Package-crane-0.20.2-r1", + "relationshipType": "DESCRIBED_BY", + "relatedSpdxElement": "SPDXRef-Package-testdata-buildC95configs-crane.yaml-c0ffee" + }, + { + "spdxElementId": "SPDXRef-Package-crane-0.20.2-r1", + "relationshipType": "GENERATED_FROM", + "relatedSpdxElement": "SPDXRef-Package-github.com-google-go-containerregistry-v0.20.2-c195f151efe3369874c72662cd69ad43ee485128" + } + ] +} diff --git a/pkg/cli/build.go b/pkg/cli/build.go index 47f7111c4..6209619ba 100644 --- a/pkg/cli/build.go +++ b/pkg/cli/build.go @@ -22,6 +22,7 @@ import ( "os" "path/filepath" "runtime" + "slices" "strings" "time" @@ -32,6 +33,8 @@ import ( "chainguard.dev/melange/pkg/container/docker" "chainguard.dev/melange/pkg/linter" "github.com/chainguard-dev/clog" + "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing/transport" "github.com/spf13/cobra" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/exporters/stdout/stdouttrace" @@ -76,6 +79,10 @@ func buildCmd() *cobra.Command { var libc string var lintRequire, lintWarn []string var ignoreSignatures bool + var cleanup bool + var configFileGitCommit string + var configFileGitRepoURL string + var configFileLicense string var traceFile string @@ -87,6 +94,7 @@ func buildCmd() *cobra.Command { Args: cobra.MinimumNArgs(0), RunE: func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() + log := clog.FromContext(ctx) if traceFile != "" { w, err := os.Create(traceFile) @@ -103,7 +111,7 @@ func buildCmd() *cobra.Command { defer func() { if err := tp.Shutdown(context.WithoutCancel(ctx)); err != nil { - clog.FromContext(ctx).Errorf("shutting down trace provider: %v", err) + log.Errorf("shutting down trace provider: %v", err) } }() @@ -117,6 +125,25 @@ func buildCmd() *cobra.Command { return err } + // Favor explicit, user-provided information for the git provenance of the + // melange build definition. As a fallback, detect this from local git state. + // Git auto-detection should be "best effort" and not fail the build if it + // fails. + if configFileGitCommit == "" || configFileGitRepoURL == "" { + gs, err := detectGitState(ctx) + if err != nil { + log.Warnf("failed to auto-detect git information: %v", err) + } else { + if configFileGitCommit == "" { + configFileGitCommit = gs.commit + } + if configFileGitRepoURL == "" { + // To form e.g. "github.com/wolfi-dev/os" + configFileGitRepoURL = fmt.Sprintf("https://%s/%s/%s", gs.repoHost, gs.repoPathParent, gs.repoName) + } + } + } + archs := apko_types.ParseArchitectures(archstrs) options := []build.Option{ build.WithBuildDate(buildDate), @@ -157,6 +184,9 @@ func buildCmd() *cobra.Command { build.WithTimeout(timeout), build.WithLibcFlavorOverride(libc), build.WithIgnoreSignatures(ignoreSignatures), + build.WithConfigFileRepositoryCommit(configFileGitCommit), + build.WithConfigFileRepositoryURL(configFileGitRepoURL), + build.WithConfigFileLicense(configFileLicense), } if len(args) > 0 { @@ -224,6 +254,10 @@ func buildCmd() *cobra.Command { cmd.Flags().StringSliceVar(&lintRequire, "lint-require", linter.DefaultRequiredLinters(), "linters that must pass") cmd.Flags().StringSliceVar(&lintWarn, "lint-warn", linter.DefaultWarnLinters(), "linters that will generate warnings") cmd.Flags().BoolVar(&ignoreSignatures, "ignore-signatures", false, "ignore repository signature verification") + cmd.Flags().BoolVar(&cleanup, "cleanup", true, "when enabled, the temp dir used for the guest will be cleaned up after completion") + cmd.Flags().StringVar(&configFileGitCommit, "git-commit", "", "commit hash of the git repository containing the build config file (defaults to detecting HEAD)") + cmd.Flags().StringVar(&configFileGitRepoURL, "git-repo-url", "", "URL of the git repository containing the build config file (defaults to detecting from configured git remotes)") + cmd.Flags().StringVar(&configFileLicense, "license", "NOASSERTION", "license to use for the build config file itself") _ = cmd.Flags().Bool("fail-on-lint-warning", false, "DEPRECATED: DO NOT USE") _ = cmd.Flags().MarkDeprecated("fail-on-lint-warning", "use --lint-require and --lint-warn instead") @@ -231,6 +265,91 @@ func buildCmd() *cobra.Command { return cmd } +type gitState struct { + // The current git commit, from which we're building packages. e.g. "0c1f2c1f3fe13e8b01f4fccbc5880525739d74a2". + commit string + + // e.g. "github.com" (as in from "github.com/wolfi-dev/os") + repoHost string + + // e.g. "wolfi-dev" + repoPathParent string + + // e.g. "os" + repoName string +} + +// Detect the git state of the current directory +func detectGitState(_ context.Context) (*gitState, error) { + allowedRepoOwners := []string{ + "wolfi-dev", + "chainguard-dev", + } + + repo, err := git.PlainOpenWithOptions(".", &git.PlainOpenOptions{DetectDotGit: true}) + if err != nil { + return nil, fmt.Errorf("opening git repository: %w", err) + } + + head, err := repo.Head() + if err != nil { + return nil, fmt.Errorf("determining HEAD: %w", err) + } + commit := head.Hash().String() + + remotes, err := repo.Remotes() + if err != nil { + return nil, fmt.Errorf("getting remotes: %w", err) + } + for _, r := range remotes { + for _, u := range r.Config().URLs { + ep, err := transport.NewEndpoint(u) + if err != nil { + // This URL isn't usable for detection, but we should keep trying. + continue + } + + host, repoPathParent, repoName := parseGitTransportEndpoint(ep) + if host != "github.com" { + continue + } + if !slices.Contains(allowedRepoOwners, repoPathParent) { + continue + } + if repoName == "" { + continue + } + + return &gitState{ + repoHost: host, + repoPathParent: repoPathParent, + repoName: repoName, + commit: commit, + }, nil + } + } + + return nil, errors.New("no usable git remote found") +} + +func parseGitTransportEndpoint(ep *transport.Endpoint) (host string, repoPathParent string, repoName string) { + if ep == nil { + return + } + + p := ep.Path + p = strings.TrimSuffix(p, ".git") + p = strings.TrimPrefix(p, "/") + parts := strings.Split(p, "/") + if len(parts) < 2 { + return + } + + host = ep.Host + repoPathParent, repoName = parts[0], parts[1] + return +} + func getRunner(ctx context.Context, runner string, remove bool) (container.Runner, error) { if runner != "" { switch runner { diff --git a/pkg/config/config.go b/pkg/config/config.go index 4ba13464f..1a94e39d9 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -20,7 +20,9 @@ import ( "errors" "fmt" "io/fs" + "iter" "os" + "path" "path/filepath" "regexp" "sort" @@ -29,15 +31,18 @@ import ( "time" apko_types "chainguard.dev/apko/pkg/build/types" + "chainguard.dev/melange/pkg/sbom" + purl "github.com/package-url/packageurl-go" "github.com/chainguard-dev/clog" - "github.com/go-git/go-git/v5" "github.com/joho/godotenv" "gopkg.in/yaml.v3" "chainguard.dev/melange/pkg/util" ) +const purlTypeAPK = "apk" + type Trigger struct { // Optional: The script to run Script string `json:"script,omitempty"` @@ -91,7 +96,7 @@ type Package struct { Version string `json:"version" yaml:"version"` // The monotone increasing epoch of the package Epoch uint64 `json:"epoch" yaml:"epoch"` - // A human readable description of the package + // A human-readable description of the package Description string `json:"description,omitempty" yaml:"description,omitempty"` // The URL to the package's homepage URL string `json:"url,omitempty" yaml:"url,omitempty"` @@ -123,18 +128,39 @@ type Resources struct { Disk string `json:"disk,omitempty" yaml:"disk,omitempty"` } -// PackageURL returns the package URL ("purl") for the package. For more -// information, see https://github.com/package-url/purl-spec#purl. -func (p Package) PackageURL(distro string) string { - const typ = "apk" - version := fmt.Sprintf("%s-r%d", p.Version, p.Epoch) +// PackageURL returns the package URL ("purl") for the APK (origin) package. +func (p Package) PackageURL(distro, arch string) *purl.PackageURL { + return newAPKPackageURL(distro, p.Name, p.FullVersion(), arch) +} - return fmt.Sprintf("pkg:%s/%s/%s@%s", - typ, - distro, - p.Name, - version, - ) +// PackageURLForSubpackage returns the package URL ("purl") for the APK +// subpackage. +func (p Package) PackageURLForSubpackage(distro, arch, subpackage string) *purl.PackageURL { + return newAPKPackageURL(distro, subpackage, p.FullVersion(), arch) +} + +func newAPKPackageURL(distro, name, version, arch string) *purl.PackageURL { + u := &purl.PackageURL{ + Type: purlTypeAPK, + Namespace: distro, + Name: name, + Version: version, + } + + if arch != "" { + u.Qualifiers = append(u.Qualifiers, purl.Qualifier{ + Key: "arch", + Value: arch, + }) + } + + return u +} + +// FullVersion returns the full version of the APK package produced by the +// build, including the epoch. +func (p Package) FullVersion() string { + return fmt.Sprintf("%s-r%d", p.Version, p.Epoch) } func (cfg *Configuration) applySubstitutionsForProvides() error { @@ -276,9 +302,9 @@ type Copyright struct { LicensePath string `json:"license-path,omitempty" yaml:"license-path,omitempty"` } -// LicenseExpression returns an SPDX license expression formed from the -// data in the copyright structs found in the conf. Its a simple OR for now. -func (p *Package) LicenseExpression() string { +// LicenseExpression returns an SPDX license expression formed from the data in +// the copyright structs found in the conf. It's a simple OR for now. +func (p Package) LicenseExpression() string { licenseExpression := "" if p.Copyright == nil { return licenseExpression @@ -292,9 +318,13 @@ func (p *Package) LicenseExpression() string { return licenseExpression } -// Returns array of ExtractedLicensingInfos formed from the data in -// the copyright structs found in the conf. -func (p *Package) LicensingInfos(WorkspaceDir string) (map[string]string, error) { +// LicensingInfos looks at the `Package.Copyright[].LicensePath` fields of the +// parsed build configuration for the package. If this value has been set, +// LicensingInfos opens the file at this path from the build's workspace +// directory, and reads in the license content. LicensingInfos then returns a +// map of the `Copyright.License` field to the string content of the file from +// `.LicensePath`. +func (p Package) LicensingInfos(WorkspaceDir string) (map[string]string, error) { licenseInfos := make(map[string]string) for _, cp := range p.Copyright { if cp.LicensePath != "" { @@ -310,7 +340,7 @@ func (p *Package) LicensingInfos(WorkspaceDir string) (map[string]string, error) // FullCopyright returns the concatenated copyright expressions defined // in the configuration file. -func (p *Package) FullCopyright() string { +func (p Package) FullCopyright() string { copyright := "" for _, cp := range p.Copyright { copyright += cp.Attestation + "\n" @@ -344,7 +374,7 @@ type Pipeline struct { Runs string `json:"runs,omitempty" yaml:"runs,omitempty"` // Optional: The list of pipelines to run. // - // Each pipeline runs in it's own context that is not shared between other + // Each pipeline runs in its own context that is not shared between other // pipelines. To share context between pipelines, nest a pipeline within an // existing pipeline. This can be useful when you wish to share common // configuration, such as an alternative `working-directory`. @@ -367,6 +397,158 @@ type Pipeline struct { Environment map[string]string `json:"environment,omitempty" yaml:"environment,omitempty"` } +// SBOMPackageForUpstreamSource returns an SBOM package for the upstream source +// of the package, if this Pipeline step was used to bring source code from an +// upstream project into the build. This function helps with generating SBOMs +// for the package being built. If the pipeline step is not a fetch or +// git-checkout step, this function returns nil and no error. +func (p Pipeline) SBOMPackageForUpstreamSource(licenseDeclared, supplier string) (*sbom.Package, error) { + // TODO: It'd be great to detect the license from the source code itself. Such a + // feature could even eliminate the need for the package's license field in the + // build configuration. + + uses, with := p.Uses, p.With + + switch uses { + case "fetch": + args := make(map[string]string) + args["download_url"] = with["uri"] + + expectedSHA256 := with["expected-sha256"] + if len(expectedSHA256) > 0 { + args["checksum"] = "sha256:" + expectedSHA256 + } + expectedSHA512 := with["expected-sha512"] + if len(expectedSHA512) > 0 { + args["checksum"] = "sha512:" + expectedSHA512 + } + + // These get defaulted correctly from within the fetch pipeline definition + // (YAML) itself. + pkgName := with["purl-name"] + pkgVersion := with["purl-version"] + + pu := &purl.PackageURL{ + Type: "generic", + Name: pkgName, + Version: pkgVersion, + Qualifiers: purl.QualifiersFromMap(args), + } + if err := pu.Normalize(); err != nil { + return nil, err + } + + return &sbom.Package{ + Name: pkgName, + Version: pkgVersion, + Namespace: supplier, + PURL: pu, + }, nil + + case "git-checkout": + repo := with["repository"] + branch := with["branch"] + tag := with["tag"] + expectedCommit := with["expected-commit"] + + // We'll use all available data to ensure our SBOM's package ID is unique, even + // when the same repo is git-checked out multiple times. + var idComponents []string + repoCleaned := func() string { + s := strings.TrimPrefix(repo, "https://") + s = strings.TrimPrefix(s, "http://") + return s + }() + for _, component := range []string{repoCleaned, branch, tag, expectedCommit} { + if component != "" { + idComponents = append(idComponents, component) + } + } + + if strings.HasPrefix(repo, "https://github.com/") { + namespace, name, _ := strings.Cut(strings.TrimPrefix(repo, "https://github.com/"), "/") + + // Prefer tag to commit, but use only ONE of these. + + versions := []string{ + tag, + expectedCommit, + } + + for _, v := range versions { + if v == "" { + continue + } + + pu := &purl.PackageURL{ + Type: purl.TypeGithub, + Namespace: namespace, + Name: name, + Version: v, + } + if err := pu.Normalize(); err != nil { + return nil, err + } + + return &sbom.Package{ + IDComponents: idComponents, + Name: name, + Version: v, + LicenseDeclared: licenseDeclared, + Namespace: namespace, + PURL: pu, + }, nil + } + + // If we get here, we have a GitHub repo but no tag or commit. Without version + // information, we can't create a sensible SBOM package. + // + // TODO: Decide if this should be an error condition. + + return nil, nil + } + + // Create nice looking package name, last component of uri, without .git + name := strings.TrimSuffix(path.Base(repo), ".git") + + // Encode vcs_url with git+ prefix and @commit suffix + vcsUrl := "git+" + repo + + if len(expectedCommit) > 0 { + vcsUrl += "@" + expectedCommit + } + + // Use tag as version + version := "" + if len(tag) > 0 { + version = tag + } + + pu := purl.PackageURL{ + Type: "generic", + Name: name, + Version: version, + Qualifiers: purl.QualifiersFromMap(map[string]string{"vcs_url": vcsUrl}), + } + if err := pu.Normalize(); err != nil { + return nil, err + } + + return &sbom.Package{ + IDComponents: idComponents, + Name: name, + Version: version, + LicenseDeclared: licenseDeclared, + Namespace: supplier, + PURL: &pu, + }, nil + } + + // This is not a fetch or git-checkout step. + + return nil, nil +} + type Subpackage struct { // Optional: A conditional statement to evaluate for the subpackage If string `json:"if,omitempty" yaml:"if,omitempty"` @@ -393,26 +575,8 @@ type Subpackage struct { Test *Test `json:"test,omitempty" yaml:"test,omitempty"` } -// PackageURL returns the package URL ("purl") for the subpackage. For more -// information, see https://github.com/package-url/purl-spec#purl. -func (spkg Subpackage) PackageURL(distro, packageVersionWithRelease string) string { - const typ = "apk" - - return fmt.Sprintf("pkg:%s/%s/%s@%s", - typ, - distro, - spkg.Name, - packageVersionWithRelease, - ) -} - -type SBOM struct { - // Optional: The language of the generated SBOM - Language string `json:"language" yaml:"language"` -} - type Input struct { - // Optional: The human readable description of the input + // Optional: The human-readable description of the input Description string `json:"description,omitempty"` // Optional: The default value of the input. Required when the input is. Default string `json:"default,omitempty"` @@ -451,6 +615,22 @@ type Configuration struct { root *yaml.Node } +// AllPackageNames returns a sequence of all package names in the configuration, +// i.e. the origin package name and the names of all subpackages. +func (cfg Configuration) AllPackageNames() iter.Seq[string] { + return func(yield func(string) bool) { + if !yield(cfg.Package.Name) { + return + } + + for _, sp := range cfg.Subpackages { + if !yield(sp.Name) { + return + } + } + } +} + type Test struct { // Additional Environment necessary for test. // Environment.Contents.Packages automatically get @@ -462,7 +642,9 @@ type Test struct { Pipeline []Pipeline `json:"pipeline" yaml:"pipeline"` } -// Name returns a name for the configuration, using the package name. +// Name returns a name for the configuration, using the package name. This +// implements the configs.Configuration interface in wolfictl and is important +// to keep as long as that package is in use. func (cfg Configuration) Name() string { return cfg.Package.Name } @@ -695,6 +877,7 @@ type configOptions struct { envFilePath string cpu, memory, disk string timeout time.Duration + commit string varsFilePath string } @@ -740,6 +923,12 @@ func WithFS(filesystem fs.FS) ConfigurationParsingOption { } } +func WithCommit(hash string) ConfigurationParsingOption { + return func(options *configOptions) { + options.commit = hash + } +} + // WithEnvFileForParsing set the paths from which to read an environment file. func WithEnvFileForParsing(path string) ConfigurationParsingOption { return func(options *configOptions) { @@ -755,29 +944,6 @@ func WithVarsFileForParsing(path string) ConfigurationParsingOption { } } -func detectCommit(ctx context.Context, dirPath string) string { - // TODO(luhring): Heads up, a similar implementation was added after this one. - // We should unify these implementations. See Build.ConfigFileExternalRef. - - log := clog.FromContext(ctx) - // Best-effort detection of current commit, to be used when not specified in the config file - - // TODO: figure out how to use an abstract FS - repo, err := git.PlainOpen(dirPath) - if err != nil { - log.Debugf("unable to detect git commit for build configuration: %v", err) - return "" - } - - head, err := repo.Head() - if err != nil { - return "" - } - - commit := head.Hash().String() - return commit -} - // buildConfigMap builds a map used to prepare a replacer for variable substitution. func buildConfigMap(cfg *Configuration) map[string]string { out := map[string]string{ @@ -931,10 +1097,10 @@ func replaceScriptlets(r *strings.Replacer, in *Scriptlets) *Scriptlets { } } -// default to detectedCommit unless commit is explicitly specified -func replaceCommit(detectedCommit string, in string) string { +// default to value of in parameter unless commit is explicitly specified. +func replaceCommit(commit string, in string) string { if in == "" { - return detectedCommit + return commit } return in } @@ -949,14 +1115,14 @@ func replaceDependencies(r *strings.Replacer, in Dependencies) Dependencies { } } -func replacePackage(r *strings.Replacer, detectedCommit string, in Package) Package { +func replacePackage(r *strings.Replacer, commit string, in Package) Package { return Package{ Name: r.Replace(in.Name), Version: r.Replace(in.Version), Epoch: in.Epoch, Description: r.Replace(in.Description), URL: r.Replace(in.URL), - Commit: replaceCommit(detectedCommit, in.Commit), + Commit: replaceCommit(commit, in.Commit), TargetArchitecture: replaceAll(r, in.TargetArchitecture), Copyright: in.Copyright, Dependencies: replaceDependencies(r, in.Dependencies), @@ -1057,7 +1223,7 @@ func (cfg *Configuration) propagatePipelines() { } // ParseConfiguration returns a decoded build Configuration using the parsing options provided. -func ParseConfiguration(ctx context.Context, configurationFilePath string, opts ...ConfigurationParsingOption) (*Configuration, error) { +func ParseConfiguration(_ context.Context, configurationFilePath string, opts ...ConfigurationParsingOption) (*Configuration, error) { options := &configOptions{} configurationDirPath := filepath.Dir(configurationFilePath) options.include(opts...) @@ -1132,9 +1298,7 @@ func ParseConfiguration(ctx context.Context, configurationFilePath string, opts replacer := replacerFromMap(configMap) - detectedCommit := detectCommit(ctx, configurationDirPath) - - cfg.Package = replacePackage(replacer, detectedCommit, cfg.Package) + cfg.Package = replacePackage(replacer, options.commit, cfg.Package) cfg.Pipeline = replacePipelines(replacer, cfg.Pipeline) @@ -1351,24 +1515,6 @@ func validateDependenciesPriorities(deps Dependencies) error { return nil } -// PackageURLs returns a list of package URLs ("purls") for the given -// configuration. The first PURL is always the origin package, and any subsequent -// items are the PURLs for the Configuration's subpackages. For more information -// on PURLs, see https://github.com/package-url/purl-spec#purl. -func (cfg Configuration) PackageURLs(distro string) []string { - var purls []string - - p := cfg.Package - purls = append(purls, p.PackageURL(distro)) - - for _, subpackage := range cfg.Subpackages { - version := fmt.Sprintf("%s-r%d", p.Version, p.Epoch) - purls = append(purls, subpackage.PackageURL(distro, version)) - } - - return purls -} - // Summarize lists the dependencies that are configured in a dependency set. func (dep *Dependencies) Summarize(ctx context.Context) { log := clog.FromContext(ctx) diff --git a/pkg/container/bubblewrap_runner.go b/pkg/container/bubblewrap_runner.go index 250bcd953..1e127875a 100644 --- a/pkg/container/bubblewrap_runner.go +++ b/pkg/container/bubblewrap_runner.go @@ -49,7 +49,7 @@ func (bw *bubblewrap) Close() error { return nil } -// Name name of the runner +// Name of the runner. func (bw *bubblewrap) Name() string { return BubblewrapName } diff --git a/pkg/sbom/bom.go b/pkg/sbom/bom.go deleted file mode 100644 index 1d634396d..000000000 --- a/pkg/sbom/bom.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2022 Chainguard, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package sbom captures the internal data model of the SBOMs melange produces -// into a private, generalized bill of materials model (with relationship data) -// designed to be transcoded to specific formats. -package sbom - -import ( - "fmt" - - purl "github.com/package-url/packageurl-go" -) - -type element interface { - ID() string -} - -type pkg struct { - FilesAnalyzed bool - id string - Name string - Version string - HomePage string - Supplier string - Originator string - Copyright string - LicenseDeclared string - LicenseConcluded string - Namespace string - Arch string - Checksums map[string]string - Relationships []relationship - ExternalRefs []purl.PackageURL -} - -func (p *pkg) ID() string { - if p.id != "" { - return fmt.Sprintf("SPDXRef-Package-%s", p.id) - } - return "SPDXRef-Package-" + p.Name -} - -type relationship struct { - Source element - Target element - Type string -} diff --git a/pkg/sbom/document.go b/pkg/sbom/document.go new file mode 100644 index 000000000..f5adda98d --- /dev/null +++ b/pkg/sbom/document.go @@ -0,0 +1,117 @@ +package sbom + +import ( + "context" + "crypto/sha1" + "encoding/hex" + "fmt" + "time" + + "chainguard.dev/apko/pkg/sbom/generator/spdx" + "sigs.k8s.io/release-utils/version" +) + +// Document is a representation of an SBOM information provided by the build +// process. It is later converted to an SPDX document. +type Document struct { + CreatedTime time.Time + Describes *Package + Packages []Package + + // Relationships is a list of relationships between elements in the SBOM. + // + // We're using the SPDX relationship type for now out of convenience, but we can + // decouple this from our internal SBOM types later if it becomes valuable. + Relationships []spdx.Relationship + + // LicensingInfos is a map of instances of the `Copyright.License` field in the + // described package's build configuration to the string content of the file + // from its corresponding `Copyright.LicensePath` field. It should be set by the + // consumer, using the value from calling `(config.Package).LicensingInfos` on + // the package being set as this document's described package. + LicensingInfos map[string]string +} + +// NewDocument creates a new Document. +func NewDocument() *Document { + return &Document{} +} + +// ToSPDX returns the Document converted to its SPDX representation. +func (d Document) ToSPDX(ctx context.Context) spdx.Document { + spdxPkgs := make([]spdx.Package, 0, len(d.Packages)) + for _, p := range d.Packages { + spdxPkgs = append(spdxPkgs, p.ToSPDX(ctx)) + } + + licensingInfos := make([]spdx.LicensingInfo, 0, len(d.LicensingInfos)) + for licenseID, extractedText := range d.LicensingInfos { + licensingInfos = append(licensingInfos, + spdx.LicensingInfo{ + LicenseID: licenseID, + ExtractedText: extractedText, + }, + ) + } + + doc := spdx.Document{ + ID: "SPDXRef-DOCUMENT", + Name: d.getSPDXName(), + Version: "SPDX-2.3", + CreationInfo: spdx.CreationInfo{ + Created: d.CreatedTime.Format(time.RFC3339), + Creators: []string{ + fmt.Sprintf("Tool: melange (%s)", version.GetVersionInfo().GitVersion), + "Organization: Chainguard, Inc", + }, + LicenseListVersion: "3.22", // https://spdx.org/licenses/ + }, + DataLicense: "CC0-1.0", + Namespace: d.getSPDXNamespace(), + DocumentDescribes: []string{ + d.Describes.ID(), + }, + Packages: spdxPkgs, + Relationships: d.Relationships, + ExternalDocumentRefs: []spdx.ExternalDocumentRef{}, + LicensingInfos: licensingInfos, + } + + return doc +} + +func (d Document) getSPDXName() string { + return fmt.Sprintf("apk-%s-%s", d.Describes.Name, d.Describes.Version) +} + +func (d Document) getSPDXNamespace() string { + h := sha1.New() + h.Write([]byte(fmt.Sprintf("apk-%s-%s", d.Describes.Namespace, d.Describes.Version))) + hexHash := hex.EncodeToString(h.Sum(nil)) + + return "https://spdx.org/spdxdocs/chainguard/melange/" + hexHash +} + +// AddPackageAndSetDescribed adds a package to the document and sets it as the +// document's described package. +func (d *Document) AddPackageAndSetDescribed(p *Package) { + d.AddPackage(p) + d.Describes = p +} + +// AddPackage adds a package to the document. +func (d *Document) AddPackage(p *Package) { + if p == nil { + return + } + d.Packages = append(d.Packages, *p) +} + +// AddRelationship adds a relationship between two elements in the SBOM. +func (d *Document) AddRelationship(a, b Element, typ string) { + d.Relationships = append(d.Relationships, spdx.Relationship{ + Element: a.ID(), + Related: b.ID(), + Type: typ, + }) +} diff --git a/pkg/sbom/element.go b/pkg/sbom/element.go new file mode 100644 index 000000000..7b96ab5db --- /dev/null +++ b/pkg/sbom/element.go @@ -0,0 +1,7 @@ +package sbom + +// Element represents any referenceable entity in an SBOM. +type Element interface { + // ID returns the unique identifier for this element. + ID() string +} diff --git a/pkg/sbom/generator.go b/pkg/sbom/generator.go deleted file mode 100644 index ff8e53df0..000000000 --- a/pkg/sbom/generator.go +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright 2022 Chainguard, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package sbom - -import ( - "context" - "fmt" - "time" - - "chainguard.dev/apko/pkg/sbom/generator/spdx" - "github.com/chainguard-dev/clog" - purl "github.com/package-url/packageurl-go" - "go.opentelemetry.io/otel" -) - -// Spec describes the metadata of an APK package for which an SBOM should be -// created. -type Spec struct { - PackageName string - PackageVersion string - License string // Full SPDX license expression - LicensingInfos map[string]string - ExternalRefs []purl.PackageURL - Copyright string - Namespace string - Arch string - SourceDateEpoch time.Time -} - -// GenerateAndWrite creates an SBOM for the APK package described by the given -// Spec and writes the SBOM to the APK's filesystem. -func GenerateAndWrite(ctx context.Context, apkFSPath string, spec *Spec) error { - _, span := otel.Tracer("melange").Start(ctx, "GenerateSBOM") - defer span.End() - log := clog.FromContext(ctx) - - if shouldRun, err := checkPathExists(apkFSPath); err != nil { - return fmt.Errorf("checking SBOM environment: %w", err) - } else if !shouldRun { - log.Warnf("working directory not found, apk is empty") - return nil - } - - document, err := GenerateSPDX(ctx, spec) - if err != nil { - return fmt.Errorf("generating SPDX document: %w", err) - } - - if err := writeSBOM(apkFSPath, spec.PackageName, spec.PackageVersion, document); err != nil { - return fmt.Errorf("writing sbom to disk: %w", err) - } - - return nil -} - -// GenerateSPDX creates an SPDX 2.3 document from the given Spec. -func GenerateSPDX(ctx context.Context, spec *Spec) (*spdx.Document, error) { - p, err := generateSBOMDataForAPKPackage(spec) - if err != nil { - return nil, fmt.Errorf("generating main APK package: %w", err) - } - - doc, err := newSPDXDocument(ctx, spec, p) - if err != nil { - return nil, fmt.Errorf("creating SPDX document: %w", err) - } - - return doc, nil -} diff --git a/pkg/sbom/implementation.go b/pkg/sbom/implementation.go deleted file mode 100644 index 3e426bcde..000000000 --- a/pkg/sbom/implementation.go +++ /dev/null @@ -1,296 +0,0 @@ -// Copyright 2022 Chainguard, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Some of this code is based on the bom tool scan code originally -// found at https://github.com/kubernetes-sigs/bom/blob/main/pkg/spdx/implementation.go - -package sbom - -import ( - "context" - "crypto/sha1" - "encoding/hex" - "encoding/json" - "errors" - "fmt" - "os" - "path/filepath" - "regexp" - "sort" - "strconv" - "strings" - "time" - - "github.com/chainguard-dev/clog" - "github.com/github/go-spdx/v2/spdxexp" - purl "github.com/package-url/packageurl-go" - "golang.org/x/text/cases" - "golang.org/x/text/language" - "sigs.k8s.io/release-utils/version" - - "chainguard.dev/apko/pkg/sbom/generator/spdx" -) - -const extRefCatPackageManager = "PACKAGE-MANAGER" - -// invalidIDCharsRe is a regular expression that matches characters not -// considered valid in SPDX identifiers. -var invalidIDCharsRe = regexp.MustCompile(`[^a-zA-Z0-9-.]+`) - -// stringToIdentifier converts a string to a valid SPDX identifier by replacing -// invalid characters. Colons and slashes are replaced by dashes, and all other -// invalid characters are replaced by their Unicode code point prefixed with -// "C". -// -// Examples: -// -// "foo:bar" -> "foo-bar" -// "foo/bar" -> "foo-bar" -// "foo bar" -> "fooC32bar" -func stringToIdentifier(in string) string { - in = strings.ReplaceAll(in, ":", "-") - in = strings.ReplaceAll(in, "/", "-") - - invalidCharReplacer := func(s string) string { - sb := strings.Builder{} - for _, r := range s { - sb.WriteString(encodeInvalidRune(r)) - } - return sb.String() - } - - return invalidIDCharsRe.ReplaceAllStringFunc(in, invalidCharReplacer) -} - -func encodeInvalidRune(r rune) string { - return "C" + strconv.Itoa(int(r)) -} - -// checkPathExists returns a bool indicating if the specified path exists. If -// the path does not exist, it returns false and a nil error. If an error occurs -// while checking the directory, it returns false and the error. -func checkPathExists(p string) (bool, error) { - dirPath, err := filepath.Abs(p) - if err != nil { - return false, fmt.Errorf("getting absolute path: %w", err) - } - - // Check if directory exists - if _, err := os.Stat(dirPath); err != nil { - if os.IsNotExist(err) { - return false, nil - } - return false, fmt.Errorf("stat: %w", err) - } - - return true, nil -} - -// generateSBOMDataForAPKPackage puts together the normalized SBOM -// representation for the APK package. -func generateSBOMDataForAPKPackage(spec *Spec) (*pkg, error) { - if spec.PackageName == "" { - return nil, errors.New("package name not specified") - } - - supplier := "Organization: " + cases.Title(language.English).String(spec.Namespace) - newPackage := &pkg{ - id: stringToIdentifier(fmt.Sprintf("%s-%s", spec.PackageName, spec.PackageVersion)), - FilesAnalyzed: false, - Name: spec.PackageName, - Version: spec.PackageVersion, - Relationships: []relationship{}, - LicenseDeclared: spdx.NOASSERTION, - LicenseConcluded: spdx.NOASSERTION, // remove when omitted upstream - ExternalRefs: spec.ExternalRefs, - Copyright: spec.Copyright, - Namespace: spec.Namespace, - Arch: spec.Arch, - Originator: supplier, - Supplier: supplier, - } - - if spec.License != "" { - newPackage.LicenseDeclared = spec.License - } - - return newPackage, nil -} - -// addPackage adds a package to the document -func addPackage(doc *spdx.Document, p *pkg) { - spdxPkg := spdx.Package{ - ID: p.ID(), - Name: p.Name, - Version: p.Version, - FilesAnalyzed: false, - LicenseConcluded: p.LicenseConcluded, - LicenseDeclared: p.LicenseDeclared, - DownloadLocation: spdx.NOASSERTION, - CopyrightText: p.Copyright, - Checksums: []spdx.Checksum{}, - ExternalRefs: []spdx.ExternalRef{}, - Originator: p.Originator, - Supplier: p.Supplier, - } - - algos := []string{} - for algo := range p.Checksums { - algos = append(algos, algo) - } - sort.Strings(algos) - for _, algo := range algos { - spdxPkg.Checksums = append(spdxPkg.Checksums, spdx.Checksum{ - Algorithm: algo, - Value: p.Checksums[algo], - }) - } - - // Add the purl to the package - if p.Namespace != "" { - var q purl.Qualifiers - if p.Arch != "" { - q = purl.QualifiersFromMap( - map[string]string{"arch": p.Arch}, - ) - } - spdxPkg.ExternalRefs = append(spdxPkg.ExternalRefs, spdx.ExternalRef{ - Category: extRefCatPackageManager, - Locator: purl.NewPackageURL( - "apk", p.Namespace, p.Name, p.Version, q, "", - ).ToString(), - Type: "purl", - }) - } - for _, purl := range p.ExternalRefs { - spdxPkg.ExternalRefs = append(spdxPkg.ExternalRefs, spdx.ExternalRef{ - Category: extRefCatPackageManager, - Locator: purl.ToString(), - Type: "purl", - }) - } - - doc.Packages = append(doc.Packages, spdxPkg) - - // Cycle the related objects and add them - for _, rel := range p.Relationships { - if sbomHasRelationship(doc, rel) { - continue - } - switch v := rel.Target.(type) { - case *pkg: - addPackage(doc, v) - } - doc.Relationships = append(doc.Relationships, spdx.Relationship{ - Element: rel.Source.ID(), - Type: rel.Type, - Related: rel.Target.ID(), - }) - } -} - -// sbomHasRelationship takes a relationship and an SPDX sbom and heck if -// it already has it in its rel catalog -func sbomHasRelationship(spdxDoc *spdx.Document, bomRel relationship) bool { - for _, spdxRel := range spdxDoc.Relationships { - if spdxRel.Element == bomRel.Source.ID() && spdxRel.Related == bomRel.Target.ID() && spdxRel.Type == bomRel.Type { - return true - } - } - return false -} - -// newSPDXDocument creates an SPDX 2.3 document from our generic representation. -func newSPDXDocument(ctx context.Context, spec *Spec, p *pkg) (*spdx.Document, error) { - log := clog.FromContext(ctx) - - h := sha1.New() - h.Write([]byte(fmt.Sprintf("apk-%s-%s", spec.PackageName, spec.PackageVersion))) - - spdxDoc := spdx.Document{ - ID: "SPDXRef-DOCUMENT", - Name: fmt.Sprintf("apk-%s-%s", spec.PackageName, spec.PackageVersion), - Version: "SPDX-2.3", - CreationInfo: spdx.CreationInfo{ - Created: spec.SourceDateEpoch.Format(time.RFC3339), - Creators: []string{ - fmt.Sprintf("Tool: melange (%s)", version.GetVersionInfo().GitVersion), - "Organization: Chainguard, Inc", - }, - LicenseListVersion: "3.22", // https://spdx.org/licenses/ - }, - DataLicense: "CC0-1.0", - Namespace: "https://spdx.org/spdxdocs/chainguard/melange/" + hex.EncodeToString(h.Sum(nil)), - DocumentDescribes: []string{ - stringToIdentifier(p.ID()), - }, - Packages: []spdx.Package{}, - Relationships: []spdx.Relationship{}, - ExternalDocumentRefs: []spdx.ExternalDocumentRef{}, - LicensingInfos: []spdx.LicensingInfo{}, - } - - for licenseID, extractedText := range spec.LicensingInfos { - spdxDoc.LicensingInfos = append(spdxDoc.LicensingInfos, - spdx.LicensingInfo{ - LicenseID: licenseID, - ExtractedText: extractedText, - }) - } - - if spec.License == "" { - log.Warnf("no license specified, defaulting to %s", spdx.NOASSERTION) - } else { - valid, bad := spdxexp.ValidateLicenses([]string{spec.License}) - if !valid { - log.Warnf("invalid license: %s", strings.Join(bad, ", ")) - } - } - - addPackage(&spdxDoc, p) - - return &spdxDoc, nil -} - -func getPathForPackageSBOM(sbomDirPath, pkgName, pkgVersion string) string { - return filepath.Join( - sbomDirPath, - fmt.Sprintf("%s-%s.spdx.json", pkgName, pkgVersion), - ) -} - -// writeSBOM encodes the given SPDX document to JSON and writes it to the -// filesystem in the directory `/var/lib/db/sbom`. -func writeSBOM(apkFSPath, pkgName, pkgVersion string, spdxDoc *spdx.Document) error { - sbomDirPath := filepath.Join(apkFSPath, "/var/lib/db/sbom") - if err := os.MkdirAll(sbomDirPath, os.FileMode(0755)); err != nil { - return fmt.Errorf("creating SBOM directory: %w", err) - } - - sbomPath := getPathForPackageSBOM(sbomDirPath, pkgName, pkgVersion) - f, err := os.Create(sbomPath) - if err != nil { - return fmt.Errorf("opening SBOM file for writing: %w", err) - } - - enc := json.NewEncoder(f) - enc.SetIndent("", " ") - enc.SetEscapeHTML(true) - - if err := enc.Encode(spdxDoc); err != nil { - return fmt.Errorf("encoding spdx sbom: %w", err) - } - - return nil -} diff --git a/pkg/sbom/package.go b/pkg/sbom/package.go new file mode 100644 index 000000000..c6a7dad57 --- /dev/null +++ b/pkg/sbom/package.go @@ -0,0 +1,208 @@ +// Copyright 2022 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package sbom captures the internal data model of the SBOMs melange produces +// into a private, generalized bill of materials model (with relationship data) +// designed to be converted to specific formats — for now, just SPDX. +package sbom + +import ( + "context" + "fmt" + "regexp" + "sort" + "strconv" + "strings" + + "chainguard.dev/apko/pkg/sbom/generator/spdx" + "github.com/chainguard-dev/clog" + "github.com/github/go-spdx/v2/spdxexp" + purl "github.com/package-url/packageurl-go" + "golang.org/x/text/cases" + "golang.org/x/text/language" +) + +// Package is a representation of an SBOM package specified by the build +// process. It is later converted to an SPDX package, but it doesn't expose +// fields that are invariant in the SPDX output. +type Package struct { + // IDComponents lets the consumer specify additional bits of data that should be + // included in the generation of the eventual SBOM package ID. By default, this + // slice has a length of zero, in which case only the package's name and version + // will be used. But sometimes it's necessary to include more bits of data to + // ensure package IDs remain unique. If this slice's length is non-zero, only + // these values will be used when producing the ID (via calling the ID method) + // (i.e. name and version would need to be added explicitly to this slice). + IDComponents []string + + // The name of the origin package, a subpackage, or any other kind of (e.g. + // non-APK) package for inclusion in the SBOM. + Name string + + // The version of the package. For APK packages, this should be the "full + // version" (including the epoch). + Version string + + // This is the copyright text in the SPDX package. It's usually left blank. + Copyright string + + // SPDX license expression. Leaving this empty will result in NOASSERTION being + // used as its value. + LicenseDeclared string + + // Name of the distro/organization that produced the package. E.g. "wolfi". + // + // TODO: consider renaming this to avoid confusion from our other uses of + // "namespace", perhaps to "supplier" or "originator" (or have both), and signal + // that it's safe to leave this blank. + Namespace string + + // The architecture of the package. E.g. "aarch64". This field isn't always + // relevant, especially when describing material upstream of the built APK + // package (e.g. source code or language ecosystem dependencies). + Arch string + + // Checksums of the package. The keys are the checksum algorithms (e.g. "SHA-256"), + // and the values are the checksums. + // + // TODO: We're not currently using this field, consider removing it. + Checksums map[string]string + + // The Package URL for this package, if any. If set, it will be added as the + // only ExternalRef of type "purl" to the SPDX package. (A package + // should have only one PURL external ref.) + PURL *purl.PackageURL +} + +// ToSPDX returns the Package converted to its SPDX representation. +func (p Package) ToSPDX(ctx context.Context) spdx.Package { + log := clog.FromContext(ctx) + + if p.LicenseDeclared == "" { + log.Warnf("%s: no license specified, defaulting to %s", p.ID(), spdx.NOASSERTION) + p.LicenseDeclared = spdx.NOASSERTION + } else { + valid, bad := spdxexp.ValidateLicenses([]string{p.LicenseDeclared}) + if !valid { + log.Warnf("invalid license: %s", strings.Join(bad, ", ")) + } + } + + sp := spdx.Package{ + ID: p.ID(), + Name: p.Name, + Version: p.Version, + FilesAnalyzed: false, + LicenseConcluded: spdx.NOASSERTION, + LicenseDeclared: p.LicenseDeclared, + DownloadLocation: spdx.NOASSERTION, + CopyrightText: p.Copyright, + Checksums: p.getChecksums(), + ExternalRefs: p.getExternalRefs(), + Originator: p.getSupplier(), // yes, we use this value for both fields (for now) + Supplier: p.getSupplier(), + } + + return sp +} + +// ID returns the unique identifier for this package. It implements the Element +// interface. +func (p Package) ID() string { + if len(p.IDComponents) == 0 { + return stringToIdentifier( + fmt.Sprintf("SPDXRef-Package-%s-%s", p.Name, p.Version), + ) + } + + id := "SPDXRef-Package" + for _, component := range p.IDComponents { + id += "-" + component + } + return stringToIdentifier(id) +} + +func (p Package) getChecksums() []spdx.Checksum { + var algos []string + for algo := range p.Checksums { + algos = append(algos, algo) + } + sort.Strings(algos) + + var result []spdx.Checksum + for _, algo := range algos { + result = append(result, spdx.Checksum{ + Algorithm: algo, + Value: p.Checksums[algo], + }) + } + + // For JSON, we'll want an empty array, not `null`. + if len(result) == 0 { + return []spdx.Checksum{} + } + + return result +} + +func (p Package) getSupplier() string { + return "Organization: " + cases.Title(language.English).String(p.Namespace) +} + +func (p Package) getExternalRefs() []spdx.ExternalRef { + var result []spdx.ExternalRef + + if p.PURL != nil { + result = append(result, spdx.ExternalRef{ + Category: spdx.ExtRefPackageManager, + Locator: p.PURL.ToString(), + Type: spdx.ExtRefTypePurl, + }) + } + + return result +} + +// invalidIDCharsRe is a regular expression that matches characters not +// considered valid in SPDX identifiers. +var invalidIDCharsRe = regexp.MustCompile(`[^a-zA-Z0-9-.]+`) + +// stringToIdentifier converts a string to a valid SPDX identifier by replacing +// invalid characters. Colons and slashes are replaced by dashes, and all other +// invalid characters are replaced by their Unicode code point prefixed with +// "C". +// +// Examples: +// +// "foo:bar" -> "foo-bar" +// "foo/bar" -> "foo-bar" +// "foo bar" -> "fooC32bar" +func stringToIdentifier(in string) string { + in = strings.ReplaceAll(in, ":", "-") + in = strings.ReplaceAll(in, "/", "-") + + invalidCharReplacer := func(s string) string { + sb := strings.Builder{} + for _, r := range s { + sb.WriteString(encodeInvalidRune(r)) + } + return sb.String() + } + + return invalidIDCharsRe.ReplaceAllStringFunc(in, invalidCharReplacer) +} + +func encodeInvalidRune(r rune) string { + return "C" + strconv.Itoa(int(r)) +}