From 1fd9ac4e6268f44fa02e033a47e002ba9edd6f26 Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Thu, 11 May 2023 03:47:37 +0000 Subject: [PATCH 01/14] pkg: support run oci image with erofs tarfs Add 'enable_tarfs' option in config.toml, to enable run oci image with erofs tarfs. Snapshotter will download all image layers, uncompress to tar files and generate tarfs boostraps. Finally mount erofs based on loop device as the image read only rootfs. Signed-off-by: Xin Yin --- config/config.go | 2 + go.mod | 2 + go.sum | 4 + pkg/filesystem/config.go | 11 + pkg/filesystem/fs.go | 53 ++-- pkg/filesystem/tarfs_adaptor.go | 68 ++++ pkg/label/label.go | 12 + pkg/tarfs/tarfs.go | 538 ++++++++++++++++++++++++++++++++ snapshot/process.go | 36 ++- snapshot/snapshot.go | 24 +- 10 files changed, 721 insertions(+), 29 deletions(-) create mode 100755 pkg/filesystem/tarfs_adaptor.go create mode 100755 pkg/tarfs/tarfs.go diff --git a/config/config.go b/config/config.go index 521497cf52..151e25be53 100644 --- a/config/config.go +++ b/config/config.go @@ -109,6 +109,8 @@ const ( type Experimental struct { EnableStargz bool `toml:"enable_stargz"` EnableReferrerDetect bool `toml:"enable_referrer_detect"` + EnableTarfs bool `toml:"enable_tarfs"` + TarfsHint bool `toml:"tarfs_hint"` } type CgroupConfig struct { diff --git a/go.mod b/go.mod index 6fd8462309..799362bcf9 100644 --- a/go.mod +++ b/go.mod @@ -49,6 +49,8 @@ require ( k8s.io/cri-api v0.27.0-alpha.3 ) +require github.com/freddierice/go-losetup v0.0.0-20220711213114-2a14873012db // indirect + require ( github.com/cilium/ebpf v0.9.1 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect diff --git a/go.sum b/go.sum index 8756658ac4..37f0be0f7d 100644 --- a/go.sum +++ b/go.sum @@ -132,6 +132,10 @@ github.com/flowstack/go-jsonschema v0.1.1/go.mod h1:yL7fNggx1o8rm9RlgXv7hTBWxdBM github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY= github.com/frankban/quicktest v1.14.4/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= +github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= +github.com/freddierice/go-losetup v0.0.0-20220711213114-2a14873012db h1:StM6A9LvaVrFS2chAGcfRVDoBB6rHYPIGJ3GknpB25c= +github.com/freddierice/go-losetup v0.0.0-20220711213114-2a14873012db/go.mod h1:pwuQfHWn6j2Fpl2AWw/bPLlKfojHxIIEa5TeKIgDFW4= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= diff --git a/pkg/filesystem/config.go b/pkg/filesystem/config.go index 773af0bac6..6aecfdca77 100644 --- a/pkg/filesystem/config.go +++ b/pkg/filesystem/config.go @@ -14,6 +14,7 @@ import ( "github.com/containerd/nydus-snapshotter/pkg/referrer" "github.com/containerd/nydus-snapshotter/pkg/signature" "github.com/containerd/nydus-snapshotter/pkg/stargz" + "github.com/containerd/nydus-snapshotter/pkg/tarfs" "github.com/pkg/errors" ) @@ -66,6 +67,16 @@ func WithReferrerManager(rm *referrer.Manager) NewFSOpt { } } +func WithTarfsManager(tm *tarfs.Manager) NewFSOpt { + return func(fs *Filesystem) error { + if tm == nil { + return errors.New("tarfs manager cannot be nil") + } + fs.tarfsMgr = tm + return nil + } +} + func WithVerifier(verifier *signature.Verifier) NewFSOpt { return func(fs *Filesystem) error { fs.verifier = verifier diff --git a/pkg/filesystem/fs.go b/pkg/filesystem/fs.go index a13e78f380..fc380dd7d0 100644 --- a/pkg/filesystem/fs.go +++ b/pkg/filesystem/fs.go @@ -33,6 +33,7 @@ import ( "github.com/containerd/nydus-snapshotter/pkg/referrer" "github.com/containerd/nydus-snapshotter/pkg/signature" "github.com/containerd/nydus-snapshotter/pkg/stargz" + "github.com/containerd/nydus-snapshotter/pkg/tarfs" ) // TODO: refact `enabledManagers` and `xxxManager` into `ManagerCoordinator` @@ -46,6 +47,7 @@ type Filesystem struct { enabledManagers []*manager.Manager cacheMgr *cache.Manager referrerMgr *referrer.Manager + tarfsMgr *tarfs.Manager stargzResolver *stargz.Resolver verifier *signature.Verifier nydusImageBinaryPath string @@ -218,11 +220,9 @@ func (fs *Filesystem) WaitUntilReady(snapshotID string) error { // Mount will be called when containerd snapshotter prepare remote snapshotter // this method will fork nydus daemon and manage it in the internal store, and indexed by snapshotID // It must set up all necessary resources during Mount procedure and revoke any step if necessary. -func (fs *Filesystem) Mount(snapshotID string, labels map[string]string) (err error) { - // TODO: support tarfs - isTarfsMode := false +func (fs *Filesystem) Mount(snapshotID string, labels map[string]string, isTarfs bool) (err error) { fsDriver := config.GetFsDriver() - if isTarfsMode { + if isTarfs { fsDriver = config.FsDriverBlockdev } else if !fs.DaemonBacked() { fsDriver = config.FsDriverNodev @@ -266,16 +266,17 @@ func (fs *Filesystem) Mount(snapshotID string, labels map[string]string) (err er }() fsManager, err := fs.getManager(fsDriver) - if err != nil { + if err != nil && fsDriver != config.FsDriverBlockdev { return errors.Wrapf(err, "get filesystem manager for snapshot %s", snapshotID) } - bootstrap, err := rafs.BootstrapFile() - if err != nil { - return errors.Wrapf(err, "find bootstrap file snapshot %s", snapshotID) - } var d *daemon.Daemon if fsDriver == config.FsDriverFscache || fsDriver == config.FsDriverFusedev { + bootstrap, err := rafs.BootstrapFile() + if err != nil { + return errors.Wrapf(err, "find bootstrap file snapshot %s", snapshotID) + } + if useSharedDaemon { d, err = fs.getSharedDaemon(fsDriver) if err != nil { @@ -334,12 +335,12 @@ func (fs *Filesystem) Mount(snapshotID string, labels map[string]string) (err er } d.AddInstance(rafs) - } - // if publicKey is not empty we should verify bootstrap file of image - err = fs.verifier.Verify(labels, bootstrap) - if err != nil { - return errors.Wrapf(err, "verify signature of daemon %s", d.ID()) + // if publicKey is not empty we should verify bootstrap file of image + err = fs.verifier.Verify(labels, bootstrap) + if err != nil { + return errors.Wrapf(err, "verify signature of daemon %s", d.ID()) + } } switch fsDriver { @@ -353,13 +354,20 @@ func (fs *Filesystem) Mount(snapshotID string, labels map[string]string) (err er if err != nil { return errors.Wrapf(err, "mount file system by daemon %s, snapshot %s", d.ID(), snapshotID) } - // case config.FsDriverBlockdev: - // TODO: support tarfs + case config.FsDriverBlockdev: + mountPoint := path.Join(rafs.GetSnapshotDir(), "tarfs") + err = fs.tarfsMgr.MountTarErofs(snapshotID, mountPoint) + if err != nil { + return errors.Wrapf(err, "mount tarfs for snapshot %s", snapshotID) + } + rafs.SetMountpoint(mountPoint) } // Persist it after associate instance after all the states are calculated. - if err := fsManager.NewInstance(rafs); err != nil { - return errors.Wrapf(err, "create instance %s", snapshotID) + if fsDriver == config.FsDriverFscache || fsDriver == config.FsDriverFusedev { + if err := fsManager.NewInstance(rafs); err != nil { + return errors.Wrapf(err, "create instance %s", snapshotID) + } } return nil @@ -374,7 +382,7 @@ func (fs *Filesystem) Umount(ctx context.Context, snapshotID string) error { fsDriver := instance.GetFsDriver() fsManager, err := fs.getManager(fsDriver) - if err != nil { + if err != nil && fsDriver != config.FsDriverBlockdev { return errors.Wrapf(err, "get manager for filesystem instance %s", instance.DaemonID) } @@ -398,10 +406,11 @@ func (fs *Filesystem) Umount(ctx context.Context, snapshotID string) error { return errors.Wrapf(err, "destroy daemon %s", daemon.ID()) } } - // } else if fsDriver == config.FsDriverBlockdev { - // TODO: support tarfs + } else if fsDriver == config.FsDriverBlockdev { + if err := fs.tarfsMgr.UmountTarErofs(snapshotID); err != nil { + return errors.Wrapf(err, "umount tar erofs on snapshot %s", snapshotID) + } } - return nil } diff --git a/pkg/filesystem/tarfs_adaptor.go b/pkg/filesystem/tarfs_adaptor.go new file mode 100755 index 0000000000..f1daf33a14 --- /dev/null +++ b/pkg/filesystem/tarfs_adaptor.go @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2023. Nydus Developers. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package filesystem + +import ( + "context" + + "github.com/containerd/containerd/log" + snpkg "github.com/containerd/containerd/pkg/snapshotters" + "github.com/containerd/containerd/snapshots/storage" + "github.com/opencontainers/go-digest" + "github.com/pkg/errors" +) + +func (fs *Filesystem) TarfsEnabled() bool { + return fs.tarfsMgr != nil +} + +func (fs *Filesystem) PrepareTarfsLayer(ctx context.Context, labels map[string]string, snapshotID, storagePath string) error { + ref, ok := labels[snpkg.TargetRefLabel] + if !ok { + return errors.Errorf("not found image reference lable") + } + layerDigest := digest.Digest(labels[snpkg.TargetLayerDigestLabel]) + if layerDigest.Validate() != nil { + return errors.Errorf("not found manifest digest lable") + } + manifest := digest.Digest(labels[snpkg.TargetManifestDigestLabel]) + if manifest.Validate() != nil { + return errors.Errorf("not found manifest digest lable") + } + + ok, err := fs.tarfsMgr.CheckTarfsHintAnnotation(ctx, ref, manifest) + if err != nil { + return errors.Wrapf(err, "check tarfs hint annotaion") + } + if !ok { + return errors.Errorf("this image is not recommended for tarfs") + } + + go func() { + // TODO concurrency control + if err := fs.tarfsMgr.PrepareLayer(snapshotID, ref, manifest, layerDigest, storagePath); err != nil { + log.L.WithError(err).Errorf("async prepare Tarfs layer of snapshot ID %s", snapshotID) + } + }() + return nil +} + +func (fs *Filesystem) MergeTarfsLayers(s storage.Snapshot, storageLocater func(string) string) error { + return fs.tarfsMgr.MergeLayers(s, storageLocater) +} + +func (fs *Filesystem) DetachTarfsLayer(snapshotID string) error { + return fs.tarfsMgr.DetachLayer(snapshotID) +} + +func (fs *Filesystem) IsTarfsLayer(snapshotID string) bool { + return fs.tarfsMgr.CheckTarfsLayer(snapshotID, false) +} + +func (fs *Filesystem) IsMergedTarfsLayer(snapshotID string) bool { + return fs.tarfsMgr.CheckTarfsLayer(snapshotID, true) +} diff --git a/pkg/label/label.go b/pkg/label/label.go index af9417bce0..7621275d78 100644 --- a/pkg/label/label.go +++ b/pkg/label/label.go @@ -48,6 +48,10 @@ const ( // If this optional label of a snapshot is specified, when mounted to rootdir // this snapshot will include volatile option OverlayfsVolatileOpt = "containerd.io/snapshot/overlay.volatile" + + // A bool flag to mark it is recommended to run this image with tarfs mode, set by image builders. + // runtime can decide whether to rely on this annotation + TarfsHint = "containerd.io/snapshot/tarfs-hint" ) func IsNydusDataLayer(labels map[string]string) bool { @@ -62,3 +66,11 @@ func IsNydusMetaLayer(labels map[string]string) bool { _, ok := labels[NydusMetaLayer] return ok } + +func IsTarfsHint(labels map[string]string) bool { + if labels == nil { + return false + } + _, ok := labels[TarfsHint] + return ok +} diff --git a/pkg/tarfs/tarfs.go b/pkg/tarfs/tarfs.go new file mode 100755 index 0000000000..ff55211d9e --- /dev/null +++ b/pkg/tarfs/tarfs.go @@ -0,0 +1,538 @@ +/* + * Copyright (c) 2023. Nydus Developers. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package tarfs + +import ( + "bytes" + "context" + "encoding/json" + "io" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + "syscall" + + "github.com/containerd/containerd/archive/compression" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/snapshots/storage" + "github.com/containerd/nydus-snapshotter/pkg/auth" + "github.com/containerd/nydus-snapshotter/pkg/label" + "github.com/containerd/nydus-snapshotter/pkg/remote" + "github.com/containerd/nydus-snapshotter/pkg/remote/remotes" + losetup "github.com/freddierice/go-losetup" + "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" + "golang.org/x/sync/singleflight" + "golang.org/x/sys/unix" + "k8s.io/utils/lru" +) + +type Manager struct { + snapshotMap map[string]*snapshotStatus // tarfs snapshots status, indexed by snapshot ID + mutex sync.Mutex + insecure bool + checkTarfsHint bool // whether to rely on tarfs hint annotation + tarfsHintCache *lru.Cache // cache oci image raf and tarfs hint annotation + nydusImagePath string + diffIDCache *lru.Cache // cache oci blob digest and diffID + sg singleflight.Group +} + +const ( + TarfsStatusInit = 0 + TarfsStatusFormatting = 1 + TarfsStatusReady = 2 + TarfsStatusFailed = 3 +) + +const ( + TarfsBlobName = "blob.tar" + TarfsLayerBootstapName = "layer_bootstrap" + TarfsMeragedBootstapName = "merged_bootstrap" +) + +var ErrEmptyBlob = errors.New("empty blob") + +type snapshotStatus struct { + status int + layerBsLoopdev *losetup.Device + mergedBsLoopdev *losetup.Device + erofsMountPoint string + erofsMountOpts string + wg *sync.WaitGroup + cancel context.CancelFunc +} + +func NewManager(insecure, checkTarfsHint bool, nydusImagePath string) *Manager { + return &Manager{ + snapshotMap: map[string]*snapshotStatus{}, + nydusImagePath: nydusImagePath, + insecure: insecure, + checkTarfsHint: checkTarfsHint, + tarfsHintCache: lru.New(50), + diffIDCache: lru.New(1000), + sg: singleflight.Group{}, + } +} + +// fetch image form manifest and config, then cache them in lru +// FIXME need an update policy +func (t *Manager) fetchImageInfo(ctx context.Context, remote *remote.Remote, ref string, manifest digest.Digest) error { + // fetch the manifest find config digest and layers digest + rc, err := t.getBlobStream(ctx, remote, ref, manifest) + if err != nil { + return err + } + defer rc.Close() + var manifestOCI ocispec.Manifest + bytes, err := io.ReadAll(rc) + if err != nil { + return errors.Wrap(err, "read manifest") + } + if err := json.Unmarshal(bytes, &manifestOCI); err != nil { + return errors.Wrap(err, "unmarshal manifest") + } + if len(manifestOCI.Layers) < 1 { + return errors.Errorf("invalid manifest") + } + + // fetch the config and find diff ids + rc, err = t.getBlobStream(ctx, remote, ref, manifestOCI.Config.Digest) + if err != nil { + return errors.Wrap(err, "fetch referrers") + } + defer rc.Close() + var config ocispec.Image + bytes, err = io.ReadAll(rc) + if err != nil { + return errors.Wrap(err, "read config") + } + if err := json.Unmarshal(bytes, &config); err != nil { + return errors.Wrap(err, "unmarshal config") + } + if len(config.RootFS.DiffIDs) != len(manifestOCI.Layers) { + return errors.Errorf("number of diff ids unmatch manifest layers") + } + // cache ref & tarfs hint annotation + t.tarfsHintCache.Add(ref, label.IsTarfsHint(manifestOCI.Annotations)) + // cache OCI blob digest & diff id + for i := range manifestOCI.Layers { + t.diffIDCache.Add(manifestOCI.Layers[i].Digest, config.RootFS.DiffIDs[i]) + } + return nil +} + +func (t *Manager) getBlobDiffID(ctx context.Context, remote *remote.Remote, ref string, manifest, layerDigest digest.Digest) (digest.Digest, error) { + if diffid, ok := t.diffIDCache.Get(layerDigest); ok { + return diffid.(digest.Digest), nil + } + + if _, err, _ := t.sg.Do(ref, func() (interface{}, error) { + err := t.fetchImageInfo(ctx, remote, ref, manifest) + return nil, err + }); err != nil { + return "", err + } + + if diffid, ok := t.diffIDCache.Get(layerDigest); ok { + return diffid.(digest.Digest), nil + } + + return "", errors.Errorf("get blob diff id failed") +} + +func (t *Manager) getBlobStream(ctx context.Context, remote *remote.Remote, ref string, layerDigest digest.Digest) (io.ReadCloser, error) { + fetcher, err := remote.Fetcher(ctx, ref) + if err != nil { + return nil, errors.Wrap(err, "get remote fetcher") + } + + fetcherByDigest, ok := fetcher.(remotes.FetcherByDigest) + if !ok { + return nil, errors.Errorf("fetcher %T does not implement remotes.FetcherByDigest", fetcher) + } + + rc, _, err := fetcherByDigest.FetchByDigest(ctx, layerDigest) + return rc, err +} + +// generate tar file and layer bootstrap, return if this blob is an empty blob +func (t *Manager) generateBootstrap(tarReader io.Reader, storagePath, snapshotID string) (emptyBlob bool, err error) { + layerBootstrap := filepath.Join(storagePath, TarfsLayerBootstapName) + blob := filepath.Join(storagePath, "layer_"+snapshotID+"_"+TarfsBlobName) + + tarFile, err := os.Create(blob) + if err != nil { + return false, err + } + defer tarFile.Close() + + fifoName := filepath.Join(storagePath, "layer_"+snapshotID+"_"+"tar.fifo") + if err = syscall.Mkfifo(fifoName, 0644); err != nil { + return false, err + } + defer os.Remove(fifoName) + + go func() { + fifoFile, err := os.OpenFile(fifoName, os.O_WRONLY, os.ModeNamedPipe) + if err != nil { + log.L.Warnf("can not open fifo file, err %v", err) + return + } + defer fifoFile.Close() + if _, err := io.Copy(fifoFile, io.TeeReader(tarReader, tarFile)); err != nil { + log.L.Warnf("tar stream copy err %v", err) + } + }() + + options := []string{ + "create", + "--type", "tar-tarfs", + "--bootstrap", layerBootstrap, + "--blob-id", "layer_" + snapshotID + "_" + TarfsBlobName, + "--blob-dir", storagePath, + fifoName, + } + cmd := exec.Command(t.nydusImagePath, options...) + var errb, outb bytes.Buffer + cmd.Stderr = &errb + cmd.Stdout = &outb + log.L.Debugf("nydus image command %v", options) + err = cmd.Run() + if err != nil { + log.L.Warnf("nydus image exec failed, %s", errb.String()) + return false, errors.Wrap(err, "converting tarfs layer failed") + } + log.L.Debugf("nydus image output %s", outb.String()) + log.L.Debugf("nydus image err %s", errb.String()) + + // TODO need a more reliable way to check if this is an empty blob + if strings.Contains(outb.String(), "data blob size: 0x0") || + strings.Contains(errb.String(), "data blob size: 0x0") { + return true, nil + } + return false, nil +} + +func (t *Manager) attachLoopdev(blob string) (*losetup.Device, error) { + // losetup.Attach() is not thread-safe hold lock here + t.mutex.Lock() + defer t.mutex.Unlock() + dev, err := losetup.Attach(blob, 0, false) + return &dev, err +} + +// download & uncompress oci blob, generate tarfs bootstrap +func (t *Manager) blobProcess(ctx context.Context, snapshotID, ref string, manifest, layerDigest digest.Digest, storagePath string) (*losetup.Device, error) { + keyChain, err := auth.GetKeyChainByRef(ref, nil) + if err != nil { + return nil, err + } + remote := remote.New(keyChain, t.insecure) + + handle := func() (bool, error) { + diffID, err := t.getBlobDiffID(ctx, remote, ref, manifest, layerDigest) + if err != nil { + return false, err + } + + rc, err := t.getBlobStream(ctx, remote, ref, layerDigest) + if err != nil { + return false, err + } + defer rc.Close() + + ds, err := compression.DecompressStream(rc) + if err != nil { + return false, errors.Wrap(err, "unpack stream") + } + defer ds.Close() + + digester := digest.Canonical.Digester() + dr := io.TeeReader(ds, digester.Hash()) + + emptyBlob, err := t.generateBootstrap(dr, storagePath, snapshotID) + if err != nil { + return false, err + } + log.L.Infof("prepare tarfs Layer generateBootstrap done layer %s, digest %s", snapshotID, digester.Digest()) + if digester.Digest() != diffID { + return false, errors.Errorf("diff id %s not match", diffID) + } + return emptyBlob, nil + } + + var emptyBlob bool + emptyBlob, err = handle() + if err != nil && remote.RetryWithPlainHTTP(ref, err) { + emptyBlob, err = handle() + } + if err != nil { + return nil, err + } + + // for empty blob do not need a loop device + if emptyBlob { + return nil, ErrEmptyBlob + } + + return t.attachLoopdev(filepath.Join(storagePath, "layer_"+snapshotID+"_"+TarfsBlobName)) +} + +func (t *Manager) PrepareLayer(snapshotID, ref string, manifest, layerDigest digest.Digest, storagePath string) error { + t.mutex.Lock() + if _, ok := t.snapshotMap[snapshotID]; ok { + t.mutex.Unlock() + return errors.Errorf("snapshot %s already prapared", snapshotID) + } + wg := &sync.WaitGroup{} + wg.Add(1) + defer wg.Done() + ctx, cancel := context.WithCancel(context.Background()) + + t.snapshotMap[snapshotID] = &snapshotStatus{ + status: TarfsStatusFormatting, + wg: wg, + cancel: cancel, + } + t.mutex.Unlock() + + loopdev, err := t.blobProcess(ctx, snapshotID, ref, manifest, layerDigest, storagePath) + + st, err1 := t.getSnapshotStatus(snapshotID) + if err1 != nil { + return errors.Errorf("can not found snapshot status after prepare") + } + if err != nil { + if errors.Is(err, ErrEmptyBlob) { + st.status = TarfsStatusReady + err = nil + } else { + st.status = TarfsStatusFailed + } + } else { + st.status = TarfsStatusReady + st.layerBsLoopdev = loopdev + } + return err +} + +func (t *Manager) MergeLayers(s storage.Snapshot, storageLocater func(string) string) error { + mergedBootstrap := filepath.Join(storageLocater(s.ParentIDs[0]), TarfsMeragedBootstapName) + if _, err := os.Stat(mergedBootstrap); err == nil { + log.L.Infof("tarfs snapshot %s already has merged bootstrap %s", s.ParentIDs[0], mergedBootstrap) + return nil + } + + var mountOpts string + bootstraps := []string{} + // When merging bootstrap, we need to arrange layer bootstrap in order from low to high + for idx := len(s.ParentIDs) - 1; idx >= 0; idx-- { + snapshotID := s.ParentIDs[idx] + err := t.waitLayerFormating(snapshotID) + if err != nil { + return errors.Wrapf(err, "wait layer formating err") + } + + st, err := t.getSnapshotStatus(snapshotID) + if err != nil { + return err + } + if st.status != TarfsStatusReady { + return errors.Errorf("snapshot %s tarfs format error %d", snapshotID, st.status) + } + bootstraps = append(bootstraps, filepath.Join(storageLocater(snapshotID), TarfsLayerBootstapName)) + + // mount opt skip empty blob + if st.layerBsLoopdev != nil { + mountOpts += "device=" + st.layerBsLoopdev.Path() + "," + } + } + + options := []string{ + "merge", + "--bootstrap", mergedBootstrap, + } + options = append(options, bootstraps...) + cmd := exec.Command(t.nydusImagePath, options...) + cmd.Stderr = os.Stderr + cmd.Stdout = os.Stdout + log.L.Debugf("nydus image command %v", options) + err := cmd.Run() + if err != nil { + return errors.Wrap(err, "merging tarfs layers") + } + + loopdev, err := t.attachLoopdev(mergedBootstrap) + if err != nil { + return errors.Wrap(err, "attach bootstrap to loop error") + } + + st, err := t.getSnapshotStatus(s.ParentIDs[0]) + if err != nil { + return errors.Errorf("snapshot %s not found", s.ParentIDs[0]) + } + st.mergedBsLoopdev = loopdev + st.erofsMountOpts = mountOpts + + return nil +} + +func (t *Manager) MountTarErofs(snapshotID string, mountPoint string) error { + var devName string + + st, err := t.getSnapshotStatus(snapshotID) + if err != nil { + return err + } + if len(st.erofsMountPoint) > 0 { + if st.erofsMountPoint == mountPoint { + log.L.Debugf("erofs tarfs %s already mounted", mountPoint) + return nil + } + return errors.Errorf("erofs snapshot %s already mounted at %s", snapshotID, st.erofsMountPoint) + } + + if err = os.MkdirAll(mountPoint, 0755); err != nil { + return errors.Wrapf(err, "failed to create tarfs mount dir %s", mountPoint) + } + + if st.mergedBsLoopdev != nil { + devName = st.mergedBsLoopdev.Path() + } else { + return errors.Errorf("snapshot %s not found boostrap loopdev error %d", snapshotID, st.status) + } + + err = unix.Mount(devName, mountPoint, "erofs", 0, st.erofsMountOpts) + if err != nil { + return errors.Wrapf(err, "mount erofs at %s with opts %s", mountPoint, st.erofsMountOpts) + } + st.erofsMountPoint = mountPoint + return nil +} + +func (t *Manager) UmountTarErofs(snapshotID string) error { + st, err := t.getSnapshotStatus(snapshotID) + if err != nil { + return errors.Wrapf(err, "umount a tarfs snapshot %s which is already removed", snapshotID) + } + + if len(st.erofsMountPoint) > 0 { + err := unix.Unmount(st.erofsMountPoint, 0) + if err != nil { + return errors.Wrapf(err, "umount erofs tarfs %s failed", st.erofsMountPoint) + } + } + st.erofsMountPoint = "" + return nil +} + +func (t *Manager) waitLayerFormating(snapshotID string) error { + log.L.Debugf("wait for tarfs formating snapshot %s", snapshotID) + st, err := t.getSnapshotStatus(snapshotID) + if err != nil { + return err + } + st.wg.Wait() + return nil +} + +// check if a snapshot is tarfs layer and if mounted a erofs tarfs +func (t *Manager) CheckTarfsLayer(snapshotID string, merged bool) bool { + st, err := t.getSnapshotStatus(snapshotID) + if err != nil { + return false + } + if merged && len(st.erofsMountPoint) == 0 { + return false + } + return true +} + +func (t *Manager) DetachLayer(snapshotID string) error { + st, err := t.getSnapshotStatus(snapshotID) + if err != nil { + return os.ErrNotExist + } + + if len(st.erofsMountPoint) > 0 { + err := unix.Unmount(st.erofsMountPoint, 0) + if err != nil { + return errors.Wrapf(err, "umount erofs tarfs %s failed", st.erofsMountPoint) + } + } + + if st.mergedBsLoopdev != nil { + err := st.mergedBsLoopdev.Detach() + if err != nil { + return errors.Wrapf(err, "detach merged bootstrap loopdev for tarfs snapshot %s failed", snapshotID) + } + } + + if st.layerBsLoopdev != nil { + err := st.layerBsLoopdev.Detach() + if err != nil { + return errors.Wrapf(err, "detach layer bootstrap loopdev for tarfs snapshot %s failed", snapshotID) + } + } + st.cancel() + + t.mutex.Lock() + defer t.mutex.Unlock() + delete(t.snapshotMap, snapshotID) + return nil +} + +func (t *Manager) getSnapshotStatus(snapshotID string) (*snapshotStatus, error) { + t.mutex.Lock() + defer t.mutex.Unlock() + st, ok := t.snapshotMap[snapshotID] + if ok { + return st, nil + } + return nil, errors.Errorf("not found snapshot %s", snapshotID) +} + +func (t *Manager) CheckTarfsHintAnnotation(ctx context.Context, ref string, manifest digest.Digest) (bool, error) { + if !t.checkTarfsHint { + return true, nil + } + + keyChain, err := auth.GetKeyChainByRef(ref, nil) + if err != nil { + return false, err + } + remote := remote.New(keyChain, t.insecure) + + handle := func() (bool, error) { + if tarfsHint, ok := t.tarfsHintCache.Get(ref); ok { + return tarfsHint.(bool), nil + } + + if _, err, _ := t.sg.Do(ref, func() (interface{}, error) { + err := t.fetchImageInfo(ctx, remote, ref, manifest) + return nil, err + }); err != nil { + return false, err + } + + if tarfsHint, ok := t.tarfsHintCache.Get(ref); ok { + return tarfsHint.(bool), nil + } + + return false, errors.Errorf("get tarfs hint annotation failed") + } + + tarfsHint, err := handle() + if err != nil && remote.RetryWithPlainHTTP(ref, err) { + return handle() + } + return tarfsHint, err +} diff --git a/snapshot/process.go b/snapshot/process.go index 7d36941c77..d2d8a0dc43 100644 --- a/snapshot/process.go +++ b/snapshot/process.go @@ -40,10 +40,10 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, return true, nil, nil } - remoteHandler := func(id string, labels map[string]string) func() (bool, []mount.Mount, error) { + remoteHandler := func(id string, labels map[string]string, isTarfs bool) func() (bool, []mount.Mount, error) { return func() (bool, []mount.Mount, error) { logger.Debugf("Found nydus meta layer id %s", id) - if err := sn.prepareRemoteSnapshot(id, labels); err != nil { + if err := sn.prepareRemoteSnapshot(id, labels, isTarfs); err != nil { return false, nil, err } @@ -84,6 +84,14 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, labels[label.StargzLayer] = "true" } } + case sn.fs.TarfsEnabled(): + err := sn.fs.PrepareTarfsLayer(ctx, labels, s.ID, sn.upperPath(s.ID)) + if err != nil { + logger.Debugf("snapshot ID %s can't prepare as tarfs fallback to containerd, err: %v", s.ID, err) + handler = defaultHandler + } else { + handler = skipHandler + } default: // OCI image is also marked with "containerd.io/snapshot.ref" by Containerd handler = defaultHandler @@ -95,7 +103,7 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, // TODO: Trying find nydus meta layer will slow down setting up rootfs to OCI images if id, info, err := sn.findMetaLayer(ctx, key); err == nil { logger.Infof("Prepares active snapshot %s, nydusd should start afterwards", key) - handler = remoteHandler(id, info.Labels) + handler = remoteHandler(id, info.Labels, false) } if handler == nil && sn.fs.ReferrerDetectEnabled() { @@ -105,7 +113,7 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, if err := sn.fs.TryFetchMetadata(ctx, info.Labels, metaPath); err != nil { return nil, "", errors.Wrap(err, "try fetch metadata") } - handler = remoteHandler(id, info.Labels) + handler = remoteHandler(id, info.Labels, false) } } @@ -122,6 +130,26 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, } } } + + if handler == nil && sn.fs.TarfsEnabled() { + // TODO may need check all parrents, in case share layers with other images which already prepared by overlay snapshotter + + // tarfs merged & mounted on the uppermost parent layer + id, pInfo, _, err := snapshot.GetSnapshotInfo(ctx, sn.ms, parent) + if err == nil { + if sn.fs.IsTarfsLayer(id) { + err := sn.fs.MergeTarfsLayers(s, func(id string) string { return sn.upperPath(id) }) + if err != nil { + return nil, "", errors.Wrap(err, "merge tarfs layers") + } + handler = remoteHandler(id, pInfo.Labels, true) + } else { + logger.Warnf("Tarfs enable but Parent (%s) of snapshot %s is not a tarfs layer, is an untar oci or nydus snapshot?", id, s.ID) + } + } else { + logger.Warnf("Tarfs enable but can't get snapshot %s Parent, is an untar oci or nydus snapshot?", s.ID) + } + } } if handler == nil { diff --git a/snapshot/snapshot.go b/snapshot/snapshot.go index 007ec95755..c7d62f4839 100644 --- a/snapshot/snapshot.go +++ b/snapshot/snapshot.go @@ -40,6 +40,7 @@ import ( "github.com/containerd/nydus-snapshotter/pkg/pprof" "github.com/containerd/nydus-snapshotter/pkg/referrer" "github.com/containerd/nydus-snapshotter/pkg/system" + "github.com/containerd/nydus-snapshotter/pkg/tarfs" "github.com/containerd/nydus-snapshotter/pkg/store" @@ -165,6 +166,13 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho opts = append(opts, filesystem.WithReferrerManager(referrerMgr)) } + if cfg.Experimental.EnableTarfs { + // FIXME: get the insecure option from nydusd config. + _, backendConfig := daemonConfig.StorageBackend() + tarfsMgr := tarfs.NewManager(backendConfig.SkipVerify, cfg.Experimental.TarfsHint, cfg.DaemonConfig.NydusImagePath) + opts = append(opts, filesystem.WithTarfsManager(tarfsMgr)) + } + nydusFs, err := filesystem.NewFileSystem(ctx, opts...) if err != nil { return nil, errors.Wrap(err, "initialize filesystem thin layer") @@ -334,6 +342,10 @@ func (o *snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, er needRemoteMounts = true metaSnapshotID = pID } + if o.fs.TarfsEnabled() && o.fs.IsMergedTarfsLayer(pID) { + needRemoteMounts = true + metaSnapshotID = pID + } } else { return nil, errors.Wrapf(err, "get parent snapshot info, parent key=%q", pKey) } @@ -410,7 +422,7 @@ func (o *snapshotter) View(ctx context.Context, key, parent string, opts ...snap // Nydusd might not be running. We should run nydusd to reflect the rootfs. if err = o.fs.WaitUntilReady(pID); err != nil { if errors.Is(err, errdefs.ErrNotFound) { - if err := o.fs.Mount(pID, pInfo.Labels); err != nil { + if err := o.fs.Mount(pID, pInfo.Labels, false); err != nil { return nil, errors.Wrapf(err, "mount rafs, instance id %s", pID) } @@ -717,8 +729,8 @@ func overlayMount(options []string) []mount.Mount { } } -func (o *snapshotter) prepareRemoteSnapshot(id string, labels map[string]string) error { - return o.fs.Mount(id, labels) +func (o *snapshotter) prepareRemoteSnapshot(id string, labels map[string]string, isTarfs bool) error { + return o.fs.Mount(id, labels, isTarfs) } // `s` is the upmost snapshot and `id` refers to the nydus meta snapshot @@ -948,6 +960,12 @@ func (o *snapshotter) cleanupSnapshotDirectory(ctx context.Context, dir string) log.G(ctx).WithError(err).WithField("dir", dir).Error("failed to unmount") } + if o.fs.TarfsEnabled() { + if err := o.fs.DetachTarfsLayer(snapshotID); err != nil && !os.IsNotExist(err) { + log.G(ctx).WithError(err).Error("detach tarfs layer") + } + } + if err := os.RemoveAll(dir); err != nil { return errors.Wrapf(err, "remove directory %q", dir) } From fba1c892d5854b1d9c3cc1734c3c09a8b00fb215 Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Tue, 16 May 2023 06:16:43 +0000 Subject: [PATCH 02/14] pkg: add concurrent control for tarfs blob process Add 'tarfs_max_concurrent_proc' option to set number of concurrent goroutines for tarfs blob processing. Signed-off-by: Xin Yin --- config/config.go | 9 +++--- pkg/filesystem/tarfs_adaptor.go | 11 ++++++- pkg/tarfs/tarfs.go | 51 ++++++++++++++++++++++----------- snapshot/snapshot.go | 2 +- 4 files changed, 51 insertions(+), 22 deletions(-) diff --git a/config/config.go b/config/config.go index 151e25be53..34d9d6bf23 100644 --- a/config/config.go +++ b/config/config.go @@ -107,10 +107,11 @@ const ( ) type Experimental struct { - EnableStargz bool `toml:"enable_stargz"` - EnableReferrerDetect bool `toml:"enable_referrer_detect"` - EnableTarfs bool `toml:"enable_tarfs"` - TarfsHint bool `toml:"tarfs_hint"` + EnableStargz bool `toml:"enable_stargz"` + EnableReferrerDetect bool `toml:"enable_referrer_detect"` + EnableTarfs bool `toml:"enable_tarfs"` + TarfsHint bool `toml:"tarfs_hint"` + TarfsMaxConcurrentProc int `toml:"tarfs_max_concurrent_proc"` } type CgroupConfig struct { diff --git a/pkg/filesystem/tarfs_adaptor.go b/pkg/filesystem/tarfs_adaptor.go index f1daf33a14..6796fd2f2a 100755 --- a/pkg/filesystem/tarfs_adaptor.go +++ b/pkg/filesystem/tarfs_adaptor.go @@ -42,11 +42,20 @@ func (fs *Filesystem) PrepareTarfsLayer(ctx context.Context, labels map[string]s return errors.Errorf("this image is not recommended for tarfs") } + limiter := fs.tarfsMgr.GetConcurrentLimiter(ref) + if limiter != nil { + if err := limiter.Acquire(context.Background(), 1); err != nil { + return errors.Wrapf(err, "concurrent limiter acquire") + } + } + go func() { - // TODO concurrency control if err := fs.tarfsMgr.PrepareLayer(snapshotID, ref, manifest, layerDigest, storagePath); err != nil { log.L.WithError(err).Errorf("async prepare Tarfs layer of snapshot ID %s", snapshotID) } + if limiter != nil { + limiter.Release(1) + } }() return nil } diff --git a/pkg/tarfs/tarfs.go b/pkg/tarfs/tarfs.go index ff55211d9e..8d2b19bcea 100755 --- a/pkg/tarfs/tarfs.go +++ b/pkg/tarfs/tarfs.go @@ -29,20 +29,23 @@ import ( "github.com/opencontainers/go-digest" ocispec "github.com/opencontainers/image-spec/specs-go/v1" "github.com/pkg/errors" + "golang.org/x/sync/semaphore" "golang.org/x/sync/singleflight" "golang.org/x/sys/unix" "k8s.io/utils/lru" ) type Manager struct { - snapshotMap map[string]*snapshotStatus // tarfs snapshots status, indexed by snapshot ID - mutex sync.Mutex - insecure bool - checkTarfsHint bool // whether to rely on tarfs hint annotation - tarfsHintCache *lru.Cache // cache oci image raf and tarfs hint annotation - nydusImagePath string - diffIDCache *lru.Cache // cache oci blob digest and diffID - sg singleflight.Group + snapshotMap map[string]*snapshotStatus // tarfs snapshots status, indexed by snapshot ID + mutex sync.Mutex + nydusImagePath string + insecure bool + checkTarfsHint bool // whether to rely on tarfs hint annotation + maxConcurrentProcess int64 + tarfsHintCache *lru.Cache // cache image ref and tarfs hint annotation + diffIDCache *lru.Cache // cache oci blob digest and diffID + processLimiterCache *lru.Cache // cache image ref and concurrent limiter for blob processes + sg singleflight.Group } const ( @@ -70,15 +73,17 @@ type snapshotStatus struct { cancel context.CancelFunc } -func NewManager(insecure, checkTarfsHint bool, nydusImagePath string) *Manager { +func NewManager(insecure, checkTarfsHint bool, nydusImagePath string, maxConcurrentProcess int64) *Manager { return &Manager{ - snapshotMap: map[string]*snapshotStatus{}, - nydusImagePath: nydusImagePath, - insecure: insecure, - checkTarfsHint: checkTarfsHint, - tarfsHintCache: lru.New(50), - diffIDCache: lru.New(1000), - sg: singleflight.Group{}, + snapshotMap: map[string]*snapshotStatus{}, + nydusImagePath: nydusImagePath, + insecure: insecure, + checkTarfsHint: checkTarfsHint, + maxConcurrentProcess: maxConcurrentProcess, + tarfsHintCache: lru.New(50), + processLimiterCache: lru.New(50), + diffIDCache: lru.New(1000), + sg: singleflight.Group{}, } } @@ -536,3 +541,17 @@ func (t *Manager) CheckTarfsHintAnnotation(ctx context.Context, ref string, mani } return tarfsHint, err } + +func (t *Manager) GetConcurrentLimiter(ref string) *semaphore.Weighted { + if t.maxConcurrentProcess == 0 { + return nil + } + + if limiter, ok := t.processLimiterCache.Get(ref); ok { + return limiter.(*semaphore.Weighted) + } + + limiter := semaphore.NewWeighted(t.maxConcurrentProcess) + t.processLimiterCache.Add(ref, limiter) + return limiter +} diff --git a/snapshot/snapshot.go b/snapshot/snapshot.go index c7d62f4839..0812409b35 100644 --- a/snapshot/snapshot.go +++ b/snapshot/snapshot.go @@ -169,7 +169,7 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho if cfg.Experimental.EnableTarfs { // FIXME: get the insecure option from nydusd config. _, backendConfig := daemonConfig.StorageBackend() - tarfsMgr := tarfs.NewManager(backendConfig.SkipVerify, cfg.Experimental.TarfsHint, cfg.DaemonConfig.NydusImagePath) + tarfsMgr := tarfs.NewManager(backendConfig.SkipVerify, cfg.Experimental.TarfsHint, cfg.DaemonConfig.NydusImagePath, int64(cfg.Experimental.TarfsMaxConcurrentProc)) opts = append(opts, filesystem.WithTarfsManager(tarfsMgr)) } From ace7d56c81ebf98e11da6b837a1c1ce1b771ed7e Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Mon, 22 May 2023 08:54:08 +0000 Subject: [PATCH 03/14] pkg: support VIEW for tarfs Support VIEW for tarfs snapshot. Signed-off-by: Xin Yin --- snapshot/snapshot.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/snapshot/snapshot.go b/snapshot/snapshot.go index 0812409b35..3139723ca5 100644 --- a/snapshot/snapshot.go +++ b/snapshot/snapshot.go @@ -440,12 +440,27 @@ func (o *snapshotter) View(ctx context.Context, key, parent string, opts ...snap return nil, errors.New("only can view nydus topmost layer") } // Otherwise, it is OCI snapshots - base, s, err := o.createSnapshot(ctx, snapshots.KindView, key, parent, opts) if err != nil { return nil, err } + if o.fs.TarfsEnabled() { + if o.fs.IsTarfsLayer(pID) { + if !o.fs.IsMergedTarfsLayer(pID) { + if err := o.fs.MergeTarfsLayers(s, func(id string) string { return o.upperPath(id) }); err != nil { + return nil, errors.Wrapf(err, "tarfs merge fail %s", pID) + } + + if err := o.fs.Mount(pID, pInfo.Labels, true); err != nil { + return nil, errors.Wrapf(err, "mount tarfs, snapshot id %s", pID) + } + } + needRemoteMounts = true + metaSnapshotID = pID + } + } + log.L.Infof("[View] snapshot with key %s parent %s", key, parent) if needRemoteMounts { From 91fb5350804a3fe3c8867215a349d7d0ef95673c Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 22 Jun 2023 16:11:46 +0800 Subject: [PATCH 04/14] tarfs: syntax only changes Syntax only changes: - fix some typos - rename manifest to manifestDigest Signed-off-by: Jiang Liu --- pkg/filesystem/tarfs_adaptor.go | 15 ++++---- pkg/tarfs/tarfs.go | 63 +++++++++++++++++---------------- 2 files changed, 40 insertions(+), 38 deletions(-) diff --git a/pkg/filesystem/tarfs_adaptor.go b/pkg/filesystem/tarfs_adaptor.go index 6796fd2f2a..af1f4dcd0f 100755 --- a/pkg/filesystem/tarfs_adaptor.go +++ b/pkg/filesystem/tarfs_adaptor.go @@ -23,18 +23,18 @@ func (fs *Filesystem) TarfsEnabled() bool { func (fs *Filesystem) PrepareTarfsLayer(ctx context.Context, labels map[string]string, snapshotID, storagePath string) error { ref, ok := labels[snpkg.TargetRefLabel] if !ok { - return errors.Errorf("not found image reference lable") + return errors.Errorf("not found image reference label") } layerDigest := digest.Digest(labels[snpkg.TargetLayerDigestLabel]) if layerDigest.Validate() != nil { - return errors.Errorf("not found manifest digest lable") + return errors.Errorf("not found layer digest label") } - manifest := digest.Digest(labels[snpkg.TargetManifestDigestLabel]) - if manifest.Validate() != nil { - return errors.Errorf("not found manifest digest lable") + manifestDigest := digest.Digest(labels[snpkg.TargetManifestDigestLabel]) + if manifestDigest.Validate() != nil { + return errors.Errorf("not found manifest digest label") } - ok, err := fs.tarfsMgr.CheckTarfsHintAnnotation(ctx, ref, manifest) + ok, err := fs.tarfsMgr.CheckTarfsHintAnnotation(ctx, ref, manifestDigest) if err != nil { return errors.Wrapf(err, "check tarfs hint annotaion") } @@ -50,13 +50,14 @@ func (fs *Filesystem) PrepareTarfsLayer(ctx context.Context, labels map[string]s } go func() { - if err := fs.tarfsMgr.PrepareLayer(snapshotID, ref, manifest, layerDigest, storagePath); err != nil { + if err := fs.tarfsMgr.PrepareLayer(snapshotID, ref, manifestDigest, layerDigest, storagePath); err != nil { log.L.WithError(err).Errorf("async prepare Tarfs layer of snapshot ID %s", snapshotID) } if limiter != nil { limiter.Release(1) } }() + return nil } diff --git a/pkg/tarfs/tarfs.go b/pkg/tarfs/tarfs.go index 8d2b19bcea..e95ed1c8a3 100755 --- a/pkg/tarfs/tarfs.go +++ b/pkg/tarfs/tarfs.go @@ -42,9 +42,9 @@ type Manager struct { insecure bool checkTarfsHint bool // whether to rely on tarfs hint annotation maxConcurrentProcess int64 - tarfsHintCache *lru.Cache // cache image ref and tarfs hint annotation - diffIDCache *lru.Cache // cache oci blob digest and diffID processLimiterCache *lru.Cache // cache image ref and concurrent limiter for blob processes + tarfsHintCache *lru.Cache // cache oci image ref and tarfs hint annotation + diffIDCache *lru.Cache // cache oci blob digest and diffID sg singleflight.Group } @@ -87,40 +87,43 @@ func NewManager(insecure, checkTarfsHint bool, nydusImagePath string, maxConcurr } } -// fetch image form manifest and config, then cache them in lru +// Fetch image manifest and config contents, cache frequently used information. // FIXME need an update policy -func (t *Manager) fetchImageInfo(ctx context.Context, remote *remote.Remote, ref string, manifest digest.Digest) error { - // fetch the manifest find config digest and layers digest - rc, err := t.getBlobStream(ctx, remote, ref, manifest) +func (t *Manager) fetchImageInfo(ctx context.Context, remote *remote.Remote, ref string, manifestDigest digest.Digest) error { + // fetch image manifest content + rc, err := t.getBlobStream(ctx, remote, ref, manifestDigest) if err != nil { return err } defer rc.Close() - var manifestOCI ocispec.Manifest bytes, err := io.ReadAll(rc) if err != nil { return errors.Wrap(err, "read manifest") } + + // TODO: support docker images + var manifestOCI ocispec.Manifest if err := json.Unmarshal(bytes, &manifestOCI); err != nil { - return errors.Wrap(err, "unmarshal manifest") + return errors.Wrap(err, "unmarshal OCI manifest") } if len(manifestOCI.Layers) < 1 { return errors.Errorf("invalid manifest") } - // fetch the config and find diff ids + // fetch image config content and extract diffIDs rc, err = t.getBlobStream(ctx, remote, ref, manifestOCI.Config.Digest) if err != nil { - return errors.Wrap(err, "fetch referrers") + return errors.Wrap(err, "fetch image config content") } defer rc.Close() - var config ocispec.Image bytes, err = io.ReadAll(rc) if err != nil { - return errors.Wrap(err, "read config") + return errors.Wrap(err, "read image config content") } + + var config ocispec.Image if err := json.Unmarshal(bytes, &config); err != nil { - return errors.Wrap(err, "unmarshal config") + return errors.Wrap(err, "unmarshal image config") } if len(config.RootFS.DiffIDs) != len(manifestOCI.Layers) { return errors.Errorf("number of diff ids unmatch manifest layers") @@ -134,13 +137,13 @@ func (t *Manager) fetchImageInfo(ctx context.Context, remote *remote.Remote, ref return nil } -func (t *Manager) getBlobDiffID(ctx context.Context, remote *remote.Remote, ref string, manifest, layerDigest digest.Digest) (digest.Digest, error) { +func (t *Manager) getBlobDiffID(ctx context.Context, remote *remote.Remote, ref string, manifestDigest, layerDigest digest.Digest) (digest.Digest, error) { if diffid, ok := t.diffIDCache.Get(layerDigest); ok { return diffid.(digest.Digest), nil } if _, err, _ := t.sg.Do(ref, func() (interface{}, error) { - err := t.fetchImageInfo(ctx, remote, ref, manifest) + err := t.fetchImageInfo(ctx, remote, ref, manifestDigest) return nil, err }); err != nil { return "", err @@ -153,7 +156,7 @@ func (t *Manager) getBlobDiffID(ctx context.Context, remote *remote.Remote, ref return "", errors.Errorf("get blob diff id failed") } -func (t *Manager) getBlobStream(ctx context.Context, remote *remote.Remote, ref string, layerDigest digest.Digest) (io.ReadCloser, error) { +func (t *Manager) getBlobStream(ctx context.Context, remote *remote.Remote, ref string, contentDigest digest.Digest) (io.ReadCloser, error) { fetcher, err := remote.Fetcher(ctx, ref) if err != nil { return nil, errors.Wrap(err, "get remote fetcher") @@ -164,7 +167,7 @@ func (t *Manager) getBlobStream(ctx context.Context, remote *remote.Remote, ref return nil, errors.Errorf("fetcher %T does not implement remotes.FetcherByDigest", fetcher) } - rc, _, err := fetcherByDigest.FetchByDigest(ctx, layerDigest) + rc, _, err := fetcherByDigest.FetchByDigest(ctx, contentDigest) return rc, err } @@ -234,8 +237,8 @@ func (t *Manager) attachLoopdev(blob string) (*losetup.Device, error) { return &dev, err } -// download & uncompress oci blob, generate tarfs bootstrap -func (t *Manager) blobProcess(ctx context.Context, snapshotID, ref string, manifest, layerDigest digest.Digest, storagePath string) (*losetup.Device, error) { +// download & uncompress an oci/docker blob, and then generate the tarfs bootstrap +func (t *Manager) blobProcess(ctx context.Context, snapshotID, ref string, manifestDigest, layerDigest digest.Digest, storagePath string) (*losetup.Device, error) { keyChain, err := auth.GetKeyChainByRef(ref, nil) if err != nil { return nil, err @@ -243,17 +246,11 @@ func (t *Manager) blobProcess(ctx context.Context, snapshotID, ref string, manif remote := remote.New(keyChain, t.insecure) handle := func() (bool, error) { - diffID, err := t.getBlobDiffID(ctx, remote, ref, manifest, layerDigest) - if err != nil { - return false, err - } - rc, err := t.getBlobStream(ctx, remote, ref, layerDigest) if err != nil { return false, err } defer rc.Close() - ds, err := compression.DecompressStream(rc) if err != nil { return false, errors.Wrap(err, "unpack stream") @@ -262,12 +259,16 @@ func (t *Manager) blobProcess(ctx context.Context, snapshotID, ref string, manif digester := digest.Canonical.Digester() dr := io.TeeReader(ds, digester.Hash()) - emptyBlob, err := t.generateBootstrap(dr, storagePath, snapshotID) if err != nil { return false, err } log.L.Infof("prepare tarfs Layer generateBootstrap done layer %s, digest %s", snapshotID, digester.Digest()) + + diffID, err := t.getBlobDiffID(ctx, remote, ref, manifestDigest, layerDigest) + if err != nil { + return false, err + } if digester.Digest() != diffID { return false, errors.Errorf("diff id %s not match", diffID) } @@ -291,7 +292,7 @@ func (t *Manager) blobProcess(ctx context.Context, snapshotID, ref string, manif return t.attachLoopdev(filepath.Join(storagePath, "layer_"+snapshotID+"_"+TarfsBlobName)) } -func (t *Manager) PrepareLayer(snapshotID, ref string, manifest, layerDigest digest.Digest, storagePath string) error { +func (t *Manager) PrepareLayer(snapshotID, ref string, manifestDigest, layerDigest digest.Digest, storagePath string) error { t.mutex.Lock() if _, ok := t.snapshotMap[snapshotID]; ok { t.mutex.Unlock() @@ -309,7 +310,7 @@ func (t *Manager) PrepareLayer(snapshotID, ref string, manifest, layerDigest dig } t.mutex.Unlock() - loopdev, err := t.blobProcess(ctx, snapshotID, ref, manifest, layerDigest, storagePath) + loopdev, err := t.blobProcess(ctx, snapshotID, ref, manifestDigest, layerDigest, storagePath) st, err1 := t.getSnapshotStatus(snapshotID) if err1 != nil { @@ -505,7 +506,7 @@ func (t *Manager) getSnapshotStatus(snapshotID string) (*snapshotStatus, error) return nil, errors.Errorf("not found snapshot %s", snapshotID) } -func (t *Manager) CheckTarfsHintAnnotation(ctx context.Context, ref string, manifest digest.Digest) (bool, error) { +func (t *Manager) CheckTarfsHintAnnotation(ctx context.Context, ref string, manifestDigest digest.Digest) (bool, error) { if !t.checkTarfsHint { return true, nil } @@ -522,7 +523,7 @@ func (t *Manager) CheckTarfsHintAnnotation(ctx context.Context, ref string, mani } if _, err, _ := t.sg.Do(ref, func() (interface{}, error) { - err := t.fetchImageInfo(ctx, remote, ref, manifest) + err := t.fetchImageInfo(ctx, remote, ref, manifestDigest) return nil, err }); err != nil { return false, err @@ -537,7 +538,7 @@ func (t *Manager) CheckTarfsHintAnnotation(ctx context.Context, ref string, mani tarfsHint, err := handle() if err != nil && remote.RetryWithPlainHTTP(ref, err) { - return handle() + tarfsHint, err = handle() } return tarfsHint, err } From 47372c14de7911b69f21e583fcd801fef110d548 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 22 Jun 2023 16:09:22 +0800 Subject: [PATCH 05/14] tarfs: limit maximum size of image manifest and config Limit maximum size of image manifest and config, to avoid DoS attack. Signed-off-by: Jiang Liu --- pkg/tarfs/tarfs.go | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/pkg/tarfs/tarfs.go b/pkg/tarfs/tarfs.go index e95ed1c8a3..02cf60fbc1 100755 --- a/pkg/tarfs/tarfs.go +++ b/pkg/tarfs/tarfs.go @@ -56,6 +56,7 @@ const ( ) const ( + MaxManifestConfigSize = 0x100000 TarfsBlobName = "blob.tar" TarfsLayerBootstapName = "layer_bootstrap" TarfsMeragedBootstapName = "merged_bootstrap" @@ -91,11 +92,14 @@ func NewManager(insecure, checkTarfsHint bool, nydusImagePath string, maxConcurr // FIXME need an update policy func (t *Manager) fetchImageInfo(ctx context.Context, remote *remote.Remote, ref string, manifestDigest digest.Digest) error { // fetch image manifest content - rc, err := t.getBlobStream(ctx, remote, ref, manifestDigest) + rc, desc, err := t.getBlobStream(ctx, remote, ref, manifestDigest) if err != nil { return err } defer rc.Close() + if desc.Size > MaxManifestConfigSize { + return errors.Errorf("image manifest content size %x is too big", desc.Size) + } bytes, err := io.ReadAll(rc) if err != nil { return errors.Wrap(err, "read manifest") @@ -111,11 +115,14 @@ func (t *Manager) fetchImageInfo(ctx context.Context, remote *remote.Remote, ref } // fetch image config content and extract diffIDs - rc, err = t.getBlobStream(ctx, remote, ref, manifestOCI.Config.Digest) + rc, desc, err = t.getBlobStream(ctx, remote, ref, manifestOCI.Config.Digest) if err != nil { return errors.Wrap(err, "fetch image config content") } defer rc.Close() + if desc.Size > MaxManifestConfigSize { + return errors.Errorf("image config content size %x is too big", desc.Size) + } bytes, err = io.ReadAll(rc) if err != nil { return errors.Wrap(err, "read image config content") @@ -156,19 +163,18 @@ func (t *Manager) getBlobDiffID(ctx context.Context, remote *remote.Remote, ref return "", errors.Errorf("get blob diff id failed") } -func (t *Manager) getBlobStream(ctx context.Context, remote *remote.Remote, ref string, contentDigest digest.Digest) (io.ReadCloser, error) { +func (t *Manager) getBlobStream(ctx context.Context, remote *remote.Remote, ref string, contentDigest digest.Digest) (io.ReadCloser, ocispec.Descriptor, error) { fetcher, err := remote.Fetcher(ctx, ref) if err != nil { - return nil, errors.Wrap(err, "get remote fetcher") + return nil, ocispec.Descriptor{}, errors.Wrap(err, "get remote fetcher") } fetcherByDigest, ok := fetcher.(remotes.FetcherByDigest) if !ok { - return nil, errors.Errorf("fetcher %T does not implement remotes.FetcherByDigest", fetcher) + return nil, ocispec.Descriptor{}, errors.Errorf("fetcher %T does not implement remotes.FetcherByDigest", fetcher) } - rc, _, err := fetcherByDigest.FetchByDigest(ctx, contentDigest) - return rc, err + return fetcherByDigest.FetchByDigest(ctx, contentDigest) } // generate tar file and layer bootstrap, return if this blob is an empty blob @@ -246,7 +252,7 @@ func (t *Manager) blobProcess(ctx context.Context, snapshotID, ref string, manif remote := remote.New(keyChain, t.insecure) handle := func() (bool, error) { - rc, err := t.getBlobStream(ctx, remote, ref, layerDigest) + rc, _, err := t.getBlobStream(ctx, remote, ref, layerDigest) if err != nil { return false, err } From 1eb7bf23d5eb693ad42f4a04c930c542dadc5a16 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 22 Jun 2023 16:55:38 +0800 Subject: [PATCH 06/14] tarfs: only caches needed information Only caches needed information according to manager configuration. Signed-off-by: Jiang Liu --- pkg/tarfs/tarfs.go | 54 +++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/pkg/tarfs/tarfs.go b/pkg/tarfs/tarfs.go index 02cf60fbc1..5a9cb84201 100755 --- a/pkg/tarfs/tarfs.go +++ b/pkg/tarfs/tarfs.go @@ -40,6 +40,7 @@ type Manager struct { mutex sync.Mutex nydusImagePath string insecure bool + validateDiffID bool // whether to validate digest for uncompressed content checkTarfsHint bool // whether to rely on tarfs hint annotation maxConcurrentProcess int64 processLimiterCache *lru.Cache // cache image ref and concurrent limiter for blob processes @@ -79,6 +80,7 @@ func NewManager(insecure, checkTarfsHint bool, nydusImagePath string, maxConcurr snapshotMap: map[string]*snapshotStatus{}, nydusImagePath: nydusImagePath, insecure: insecure, + validateDiffID: true, checkTarfsHint: checkTarfsHint, maxConcurrentProcess: maxConcurrentProcess, tarfsHintCache: lru.New(50), @@ -135,11 +137,15 @@ func (t *Manager) fetchImageInfo(ctx context.Context, remote *remote.Remote, ref if len(config.RootFS.DiffIDs) != len(manifestOCI.Layers) { return errors.Errorf("number of diff ids unmatch manifest layers") } - // cache ref & tarfs hint annotation - t.tarfsHintCache.Add(ref, label.IsTarfsHint(manifestOCI.Annotations)) - // cache OCI blob digest & diff id - for i := range manifestOCI.Layers { - t.diffIDCache.Add(manifestOCI.Layers[i].Digest, config.RootFS.DiffIDs[i]) + if t.checkTarfsHint { + // cache ref & tarfs hint annotation + t.tarfsHintCache.Add(ref, label.IsTarfsHint(manifestOCI.Annotations)) + } + if t.validateDiffID { + // cache OCI blob digest & diff id + for i := range manifestOCI.Layers { + t.diffIDCache.Add(manifestOCI.Layers[i].Digest, config.RootFS.DiffIDs[i]) + } } return nil } @@ -263,22 +269,30 @@ func (t *Manager) blobProcess(ctx context.Context, snapshotID, ref string, manif } defer ds.Close() - digester := digest.Canonical.Digester() - dr := io.TeeReader(ds, digester.Hash()) - emptyBlob, err := t.generateBootstrap(dr, storagePath, snapshotID) - if err != nil { - return false, err - } - log.L.Infof("prepare tarfs Layer generateBootstrap done layer %s, digest %s", snapshotID, digester.Digest()) - - diffID, err := t.getBlobDiffID(ctx, remote, ref, manifestDigest, layerDigest) - if err != nil { - return false, err - } - if digester.Digest() != diffID { - return false, errors.Errorf("diff id %s not match", diffID) + if t.validateDiffID { + diffID, err := t.getBlobDiffID(ctx, remote, ref, manifestDigest, layerDigest) + if err != nil { + return false, err + } + digester := digest.Canonical.Digester() + dr := io.TeeReader(ds, digester.Hash()) + emptyBlob, err := t.generateBootstrap(dr, storagePath, snapshotID) + if err != nil { + return false, err + } + log.L.Infof("prepare tarfs Layer generateBootstrap done layer %s, digest %s", snapshotID, digester.Digest()) + if digester.Digest() != diffID { + return false, errors.Errorf("diff id %s not match", diffID) + } + return emptyBlob, nil + } else { + emptyBlob, err := t.generateBootstrap(ds, storagePath, snapshotID) + if err != nil { + return false, err + } + log.L.Infof("prepare tarfs Layer generateBootstrap done layer %s", snapshotID) + return emptyBlob, nil } - return emptyBlob, nil } var emptyBlob bool From 35b48e5472b1afea5ef00c3c7d750faa4846b25e Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 22 Jun 2023 13:49:02 +0800 Subject: [PATCH 07/14] tarfs: refine tarfs overall design and implementation details 1) move loopdev setup from Prepare()/Merge() into Mount(), preparing for support of nodev mode in addition to blockdev. 2) store tar file in the global cache directory, mount tarfs at "mnt" instead of "tarfs", to keep consistence with fusedev/fscache drivers. 3) use tempfile/rename mechanism to easy error recover 4) use lock to avoid some possible race conditions 5) use mode 0750 instead of 0755 for new directories 6) associate a Manager with filesystems managed by blockdev Signed-off-by: Jiang Liu --- pkg/filesystem/config.go | 20 +- pkg/filesystem/fs.go | 54 +++-- pkg/filesystem/tarfs_adaptor.go | 12 +- pkg/label/label.go | 15 +- pkg/manager/manager.go | 23 +-- pkg/metrics/serve.go | 4 +- pkg/tarfs/tarfs.go | 343 ++++++++++++++++++++------------ snapshot/process.go | 92 +++++---- snapshot/snapshot.go | 153 +++++++++----- 9 files changed, 440 insertions(+), 276 deletions(-) diff --git a/pkg/filesystem/config.go b/pkg/filesystem/config.go index 6aecfdca77..4f2fb668bd 100644 --- a/pkg/filesystem/config.go +++ b/pkg/filesystem/config.go @@ -29,18 +29,18 @@ func WithNydusImageBinaryPath(p string) NewFSOpt { func WithManager(pm *manager.Manager) NewFSOpt { return func(fs *Filesystem) error { - if pm == nil { - return errors.New("process manager cannot be nil") + if pm != nil { + switch pm.FsDriver { + case config.FsDriverBlockdev: + fs.blockdevManager = pm + case config.FsDriverFscache: + fs.fscacheManager = pm + case config.FsDriverFusedev: + fs.fusedevManager = pm + } + fs.enabledManagers = append(fs.enabledManagers, pm) } - if pm.FsDriver == config.FsDriverFusedev { - fs.fusedevManager = pm - } else if pm.FsDriver == config.FsDriverFscache { - fs.fscacheManager = pm - } - - fs.enabledManagers = append(fs.enabledManagers, pm) - return nil } } diff --git a/pkg/filesystem/fs.go b/pkg/filesystem/fs.go index fc380dd7d0..89a6c6eb28 100644 --- a/pkg/filesystem/fs.go +++ b/pkg/filesystem/fs.go @@ -15,20 +15,23 @@ import ( "os" "path" - "github.com/containerd/containerd/log" snpkg "github.com/containerd/containerd/pkg/snapshotters" - "github.com/containerd/containerd/snapshots" "github.com/mohae/deepcopy" "github.com/opencontainers/go-digest" "github.com/pkg/errors" "golang.org/x/sync/errgroup" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/snapshots" + "github.com/containerd/containerd/snapshots/storage" + "github.com/containerd/nydus-snapshotter/config" "github.com/containerd/nydus-snapshotter/config/daemonconfig" "github.com/containerd/nydus-snapshotter/pkg/cache" "github.com/containerd/nydus-snapshotter/pkg/daemon" "github.com/containerd/nydus-snapshotter/pkg/daemon/types" "github.com/containerd/nydus-snapshotter/pkg/errdefs" + "github.com/containerd/nydus-snapshotter/pkg/label" "github.com/containerd/nydus-snapshotter/pkg/manager" "github.com/containerd/nydus-snapshotter/pkg/referrer" "github.com/containerd/nydus-snapshotter/pkg/signature" @@ -220,21 +223,19 @@ func (fs *Filesystem) WaitUntilReady(snapshotID string) error { // Mount will be called when containerd snapshotter prepare remote snapshotter // this method will fork nydus daemon and manage it in the internal store, and indexed by snapshotID // It must set up all necessary resources during Mount procedure and revoke any step if necessary. -func (fs *Filesystem) Mount(snapshotID string, labels map[string]string, isTarfs bool) (err error) { +func (fs *Filesystem) Mount(snapshotID string, labels map[string]string, s *storage.Snapshot) (err error) { + // Do not create RAFS instance in case of nodev. + if !fs.DaemonBacked() { + return nil + } + fsDriver := config.GetFsDriver() - if isTarfs { + if label.IsTarfsDataLayer(labels) { fsDriver = config.FsDriverBlockdev - } else if !fs.DaemonBacked() { - fsDriver = config.FsDriverNodev } isSharedFusedev := fsDriver == config.FsDriverFusedev && config.GetDaemonMode() == config.DaemonModeShared useSharedDaemon := fsDriver == config.FsDriverFscache || isSharedFusedev - // Do not create RAFS instance in case of nodev. - if fsDriver == config.FsDriverNodev { - return nil - } - var imageID string imageID, ok := labels[snpkg.TargetRefLabel] if !ok { @@ -266,7 +267,7 @@ func (fs *Filesystem) Mount(snapshotID string, labels map[string]string, isTarfs }() fsManager, err := fs.getManager(fsDriver) - if err != nil && fsDriver != config.FsDriverBlockdev { + if err != nil { return errors.Wrapf(err, "get filesystem manager for snapshot %s", snapshotID) } @@ -355,19 +356,17 @@ func (fs *Filesystem) Mount(snapshotID string, labels map[string]string, isTarfs return errors.Wrapf(err, "mount file system by daemon %s, snapshot %s", d.ID(), snapshotID) } case config.FsDriverBlockdev: - mountPoint := path.Join(rafs.GetSnapshotDir(), "tarfs") - err = fs.tarfsMgr.MountTarErofs(snapshotID, mountPoint) + err = fs.tarfsMgr.MountTarErofs(snapshotID, s, rafs) if err != nil { return errors.Wrapf(err, "mount tarfs for snapshot %s", snapshotID) } - rafs.SetMountpoint(mountPoint) + default: + return errors.Errorf("unknown filesystem driver %s for snapshot %s", fsDriver, snapshotID) } // Persist it after associate instance after all the states are calculated. - if fsDriver == config.FsDriverFscache || fsDriver == config.FsDriverFusedev { - if err := fsManager.NewInstance(rafs); err != nil { - return errors.Wrapf(err, "create instance %s", snapshotID) - } + if err := fsManager.NewInstance(rafs); err != nil { + return errors.Wrapf(err, "create instance %s", snapshotID) } return nil @@ -381,12 +380,18 @@ func (fs *Filesystem) Umount(ctx context.Context, snapshotID string) error { } fsDriver := instance.GetFsDriver() + if fsDriver == config.FsDriverNodev { + return nil + } fsManager, err := fs.getManager(fsDriver) - if err != nil && fsDriver != config.FsDriverBlockdev { + if err != nil { return errors.Wrapf(err, "get manager for filesystem instance %s", instance.DaemonID) } - if fsDriver == config.FsDriverFscache || fsDriver == config.FsDriverFusedev { + switch fsDriver { + case config.FsDriverFscache: + fallthrough + case config.FsDriverFusedev: daemon, err := fs.getDaemonByRafs(instance) if err != nil { log.L.Debugf("snapshot %s has no associated nydusd", snapshotID) @@ -406,10 +411,15 @@ func (fs *Filesystem) Umount(ctx context.Context, snapshotID string) error { return errors.Wrapf(err, "destroy daemon %s", daemon.ID()) } } - } else if fsDriver == config.FsDriverBlockdev { + case config.FsDriverBlockdev: if err := fs.tarfsMgr.UmountTarErofs(snapshotID); err != nil { return errors.Wrapf(err, "umount tar erofs on snapshot %s", snapshotID) } + if err := fsManager.RemoveInstance(snapshotID); err != nil { + return errors.Wrapf(err, "remove snapshot %s", snapshotID) + } + default: + return errors.Errorf("unknown filesystem driver %s for snapshot %s", fsDriver, snapshotID) } return nil } diff --git a/pkg/filesystem/tarfs_adaptor.go b/pkg/filesystem/tarfs_adaptor.go index af1f4dcd0f..86824e408f 100755 --- a/pkg/filesystem/tarfs_adaptor.go +++ b/pkg/filesystem/tarfs_adaptor.go @@ -20,7 +20,7 @@ func (fs *Filesystem) TarfsEnabled() bool { return fs.tarfsMgr != nil } -func (fs *Filesystem) PrepareTarfsLayer(ctx context.Context, labels map[string]string, snapshotID, storagePath string) error { +func (fs *Filesystem) PrepareTarfsLayer(ctx context.Context, labels map[string]string, snapshotID, upperDirPath string) error { ref, ok := labels[snpkg.TargetRefLabel] if !ok { return errors.Errorf("not found image reference label") @@ -50,8 +50,8 @@ func (fs *Filesystem) PrepareTarfsLayer(ctx context.Context, labels map[string]s } go func() { - if err := fs.tarfsMgr.PrepareLayer(snapshotID, ref, manifestDigest, layerDigest, storagePath); err != nil { - log.L.WithError(err).Errorf("async prepare Tarfs layer of snapshot ID %s", snapshotID) + if err := fs.tarfsMgr.PrepareLayer(snapshotID, ref, manifestDigest, layerDigest, upperDirPath); err != nil { + log.L.WithError(err).Errorf("async prepare tarfs layer of snapshot ID %s", snapshotID) } if limiter != nil { limiter.Release(1) @@ -70,9 +70,9 @@ func (fs *Filesystem) DetachTarfsLayer(snapshotID string) error { } func (fs *Filesystem) IsTarfsLayer(snapshotID string) bool { - return fs.tarfsMgr.CheckTarfsLayer(snapshotID, false) + return fs.tarfsMgr.IsTarfsLayer(snapshotID) } -func (fs *Filesystem) IsMergedTarfsLayer(snapshotID string) bool { - return fs.tarfsMgr.CheckTarfsLayer(snapshotID, true) +func (fs *Filesystem) IsMountedTarfsLayer(snapshotID string) bool { + return fs.tarfsMgr.IsMountedTarfsLayer(snapshotID) } diff --git a/pkg/label/label.go b/pkg/label/label.go index 7621275d78..820cd26bbe 100644 --- a/pkg/label/label.go +++ b/pkg/label/label.go @@ -34,6 +34,8 @@ const ( NydusMetaLayer = "containerd.io/snapshot/nydus-bootstrap" // The referenced blob sha256 in format of `sha256:xxx`, set by image builders. NydusRefLayer = "containerd.io/snapshot/nydus-ref" + // A bool flag to mark the layer as a nydus tarfs, set by the snapshotter + NydusTarfsLayer = "containerd.io/snapshot/nydus-tarfs" // Annotation containing secret to pull images from registry, set by the snapshotter. NydusImagePullSecret = "containerd.io/snapshot/pullsecret" // Annotation containing username to pull images from registry, set by the snapshotter. @@ -60,17 +62,16 @@ func IsNydusDataLayer(labels map[string]string) bool { } func IsNydusMetaLayer(labels map[string]string) bool { - if labels == nil { - return false - } _, ok := labels[NydusMetaLayer] return ok } -func IsTarfsHint(labels map[string]string) bool { - if labels == nil { - return false - } +func IsTarfsDataLayer(labels map[string]string) bool { + _, ok := labels[NydusTarfsLayer] + return ok +} + +func HasTarfsHint(labels map[string]string) bool { _, ok := labels[TarfsHint] return ok } diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 2ace2fb296..2c7d63e331 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -127,9 +127,8 @@ type Manager struct { // supposed to refilled when nydus-snapshotter restarting. daemonStates *DaemonStates - monitor LivenessMonitor - // TODO: Close me - LivenessNotifier chan deathEvent + monitor LivenessMonitor + LivenessNotifier chan deathEvent // TODO: Close me RecoverPolicy config.DaemonRecoverPolicy SupervisorSet *supervisor.SupervisorsSet @@ -151,12 +150,10 @@ type Opt struct { Database *store.Database CacheDir string RecoverPolicy config.DaemonRecoverPolicy - // Nydus-snapshotter work directory - RootDir string - DaemonConfig daemonconfig.DaemonConfig - CgroupMgr *cgroup.Manager - // In order to validate daemon fs driver is consistent with the latest snapshotter boot - FsDriver string + RootDir string // Nydus-snapshotter work directory + DaemonConfig daemonconfig.DaemonConfig + CgroupMgr *cgroup.Manager + FsDriver string // In order to validate daemon fs driver is consistent with the latest snapshotter boot } func (m *Manager) doDaemonFailover(d *daemon.Daemon) { @@ -328,6 +325,10 @@ func (m *Manager) NewInstance(r *daemon.Rafs) error { return m.store.AddInstance(r) } +func (m *Manager) RemoveInstance(snapshotID string) error { + return m.store.DeleteInstance(snapshotID) +} + func (m *Manager) Lock() { m.mu.Lock() } @@ -353,10 +354,6 @@ func (m *Manager) UnsubscribeDaemonEvent(d *daemon.Daemon) error { return nil } -func (m *Manager) RemoveInstance(snapshotID string) error { - return m.store.DeleteInstance(snapshotID) -} - func (m *Manager) UpdateDaemon(daemon *daemon.Daemon) error { m.mu.Lock() defer m.mu.Unlock() diff --git a/pkg/metrics/serve.go b/pkg/metrics/serve.go index 6544270d7b..73b68511af 100644 --- a/pkg/metrics/serve.go +++ b/pkg/metrics/serve.go @@ -36,7 +36,9 @@ type Server struct { func WithProcessManager(pm *manager.Manager) ServerOpt { return func(s *Server) error { - s.managers = append(s.managers, pm) + if pm != nil { + s.managers = append(s.managers, pm) + } return nil } } diff --git a/pkg/tarfs/tarfs.go b/pkg/tarfs/tarfs.go index 5a9cb84201..ddbf2ff6ef 100755 --- a/pkg/tarfs/tarfs.go +++ b/pkg/tarfs/tarfs.go @@ -13,6 +13,7 @@ import ( "io" "os" "os/exec" + "path" "path/filepath" "strings" "sync" @@ -22,6 +23,7 @@ import ( "github.com/containerd/containerd/log" "github.com/containerd/containerd/snapshots/storage" "github.com/containerd/nydus-snapshotter/pkg/auth" + "github.com/containerd/nydus-snapshotter/pkg/daemon" "github.com/containerd/nydus-snapshotter/pkg/label" "github.com/containerd/nydus-snapshotter/pkg/remote" "github.com/containerd/nydus-snapshotter/pkg/remote/remotes" @@ -38,6 +40,8 @@ import ( type Manager struct { snapshotMap map[string]*snapshotStatus // tarfs snapshots status, indexed by snapshot ID mutex sync.Mutex + mutexLoopDev sync.Mutex + cacheDirPath string nydusImagePath string insecure bool validateDiffID bool // whether to validate digest for uncompressed content @@ -50,34 +54,37 @@ type Manager struct { } const ( - TarfsStatusInit = 0 - TarfsStatusFormatting = 1 - TarfsStatusReady = 2 - TarfsStatusFailed = 3 + TarfsStatusInit = 0 + TarfsStatusPrepare = 1 + TarfsStatusReady = 2 + TarfsStatusFailed = 3 ) const ( MaxManifestConfigSize = 0x100000 - TarfsBlobName = "blob.tar" - TarfsLayerBootstapName = "layer_bootstrap" - TarfsMeragedBootstapName = "merged_bootstrap" + TarfsLayerBootstapName = "layer.boot" + TarfsMeragedBootstapName = "image.boot" ) var ErrEmptyBlob = errors.New("empty blob") type snapshotStatus struct { + mutex sync.Mutex status int - layerBsLoopdev *losetup.Device - mergedBsLoopdev *losetup.Device + isEmptyBlob bool + blobID string + blobTarFilePath string erofsMountPoint string - erofsMountOpts string + dataLoopdev *losetup.Device + metaLoopdev *losetup.Device wg *sync.WaitGroup cancel context.CancelFunc } -func NewManager(insecure, checkTarfsHint bool, nydusImagePath string, maxConcurrentProcess int64) *Manager { +func NewManager(insecure, checkTarfsHint bool, cacheDirPath, nydusImagePath string, maxConcurrentProcess int64) *Manager { return &Manager{ snapshotMap: map[string]*snapshotStatus{}, + cacheDirPath: cacheDirPath, nydusImagePath: nydusImagePath, insecure: insecure, validateDiffID: true, @@ -104,16 +111,16 @@ func (t *Manager) fetchImageInfo(ctx context.Context, remote *remote.Remote, ref } bytes, err := io.ReadAll(rc) if err != nil { - return errors.Wrap(err, "read manifest") + return errors.Wrap(err, "read image manifest content") } - // TODO: support docker images + // TODO: support docker v2 images var manifestOCI ocispec.Manifest if err := json.Unmarshal(bytes, &manifestOCI); err != nil { - return errors.Wrap(err, "unmarshal OCI manifest") + return errors.Wrap(err, "unmarshal OCI image manifest") } if len(manifestOCI.Layers) < 1 { - return errors.Errorf("invalid manifest") + return errors.Errorf("invalid OCI image manifest without any layer") } // fetch image config content and extract diffIDs @@ -135,11 +142,12 @@ func (t *Manager) fetchImageInfo(ctx context.Context, remote *remote.Remote, ref return errors.Wrap(err, "unmarshal image config") } if len(config.RootFS.DiffIDs) != len(manifestOCI.Layers) { - return errors.Errorf("number of diff ids unmatch manifest layers") + return errors.Errorf("number of diffIDs does not match manifest layers") } + if t.checkTarfsHint { // cache ref & tarfs hint annotation - t.tarfsHintCache.Add(ref, label.IsTarfsHint(manifestOCI.Annotations)) + t.tarfsHintCache.Add(ref, label.HasTarfsHint(manifestOCI.Annotations)) } if t.validateDiffID { // cache OCI blob digest & diff id @@ -184,17 +192,28 @@ func (t *Manager) getBlobStream(ctx context.Context, remote *remote.Remote, ref } // generate tar file and layer bootstrap, return if this blob is an empty blob -func (t *Manager) generateBootstrap(tarReader io.Reader, storagePath, snapshotID string) (emptyBlob bool, err error) { - layerBootstrap := filepath.Join(storagePath, TarfsLayerBootstapName) - blob := filepath.Join(storagePath, "layer_"+snapshotID+"_"+TarfsBlobName) +func (t *Manager) generateBootstrap(tarReader io.Reader, snapshotID, layerBlobID, upperDirPath string) (emptyBlob bool, err error) { + snapshotImageDir := filepath.Join(upperDirPath, "image") + if err := os.MkdirAll(snapshotImageDir, 0750); err != nil { + return false, errors.Wrapf(err, "create data dir %s for tarfs snapshot", snapshotImageDir) + } + layerMetaFile := t.layerMetaFilePath(upperDirPath) + if _, err := os.Stat(layerMetaFile); err == nil { + return false, errors.Errorf("tarfs bootstrap file %s for snapshot %s already exists", layerMetaFile, snapshotID) + } + layerMetaFileTmp := layerMetaFile + ".tarfs.tmp" + defer os.Remove(layerMetaFileTmp) - tarFile, err := os.Create(blob) + layerTarFile := t.layerTarFilePath(layerBlobID) + layerTarFileTmp := layerTarFile + ".tarfs.tmp" + tarFile, err := os.Create(layerTarFileTmp) if err != nil { - return false, err + return false, errors.Wrap(err, "create temporary file to store tar stream") } defer tarFile.Close() + defer os.Remove(layerTarFileTmp) - fifoName := filepath.Join(storagePath, "layer_"+snapshotID+"_"+"tar.fifo") + fifoName := filepath.Join(upperDirPath, "layer_"+snapshotID+"_"+"tar.fifo") if err = syscall.Mkfifo(fifoName, 0644); err != nil { return false, err } @@ -215,9 +234,9 @@ func (t *Manager) generateBootstrap(tarReader io.Reader, storagePath, snapshotID options := []string{ "create", "--type", "tar-tarfs", - "--bootstrap", layerBootstrap, - "--blob-id", "layer_" + snapshotID + "_" + TarfsBlobName, - "--blob-dir", storagePath, + "--bootstrap", layerMetaFileTmp, + "--blob-id", layerBlobID, + "--blob-dir", t.cacheDirPath, fifoName, } cmd := exec.Command(t.nydusImagePath, options...) @@ -228,32 +247,32 @@ func (t *Manager) generateBootstrap(tarReader io.Reader, storagePath, snapshotID err = cmd.Run() if err != nil { log.L.Warnf("nydus image exec failed, %s", errb.String()) - return false, errors.Wrap(err, "converting tarfs layer failed") + return false, errors.Wrap(err, "converting OCIv1 layer blob to tarfs") } log.L.Debugf("nydus image output %s", outb.String()) log.L.Debugf("nydus image err %s", errb.String()) + if err := os.Rename(layerTarFileTmp, layerTarFile); err != nil { + return false, errors.Wrapf(err, "rename file %s to %s", layerTarFileTmp, layerTarFile) + } + if err := os.Rename(layerMetaFileTmp, layerMetaFile); err != nil { + return false, errors.Wrapf(err, "rename file %s to %s", layerMetaFileTmp, layerMetaFile) + } + // TODO need a more reliable way to check if this is an empty blob - if strings.Contains(outb.String(), "data blob size: 0x0") || - strings.Contains(errb.String(), "data blob size: 0x0") { + if strings.Contains(outb.String(), "data blob size: 0x0\n") || + strings.Contains(errb.String(), "data blob size: 0x0\n") { return true, nil } return false, nil } -func (t *Manager) attachLoopdev(blob string) (*losetup.Device, error) { - // losetup.Attach() is not thread-safe hold lock here - t.mutex.Lock() - defer t.mutex.Unlock() - dev, err := losetup.Attach(blob, 0, false) - return &dev, err -} - // download & uncompress an oci/docker blob, and then generate the tarfs bootstrap -func (t *Manager) blobProcess(ctx context.Context, snapshotID, ref string, manifestDigest, layerDigest digest.Digest, storagePath string) (*losetup.Device, error) { +func (t *Manager) blobProcess(ctx context.Context, snapshotID, ref string, manifestDigest, layerDigest digest.Digest, + layerBlobID, upperDirPath string) error { keyChain, err := auth.GetKeyChainByRef(ref, nil) if err != nil { - return nil, err + return err } remote := remote.New(keyChain, t.insecure) @@ -265,34 +284,34 @@ func (t *Manager) blobProcess(ctx context.Context, snapshotID, ref string, manif defer rc.Close() ds, err := compression.DecompressStream(rc) if err != nil { - return false, errors.Wrap(err, "unpack stream") + return false, errors.Wrap(err, "unpack layer blob stream for tarfs") } defer ds.Close() + var emptyBlob bool if t.validateDiffID { diffID, err := t.getBlobDiffID(ctx, remote, ref, manifestDigest, layerDigest) if err != nil { - return false, err + return false, errors.Wrap(err, "get image layer diffID") } digester := digest.Canonical.Digester() dr := io.TeeReader(ds, digester.Hash()) - emptyBlob, err := t.generateBootstrap(dr, storagePath, snapshotID) + emptyBlob, err = t.generateBootstrap(dr, snapshotID, layerBlobID, upperDirPath) if err != nil { - return false, err + return false, errors.Wrap(err, "generate tarfs data from image layer blob") } - log.L.Infof("prepare tarfs Layer generateBootstrap done layer %s, digest %s", snapshotID, digester.Digest()) if digester.Digest() != diffID { - return false, errors.Errorf("diff id %s not match", diffID) + return false, errors.Errorf("image layer diffID %s for tarfs does not match", diffID) } - return emptyBlob, nil + log.L.Infof("tarfs data for layer %s is ready, digest %s", snapshotID, digester.Digest()) } else { - emptyBlob, err := t.generateBootstrap(ds, storagePath, snapshotID) + emptyBlob, err = t.generateBootstrap(ds, snapshotID, layerBlobID, upperDirPath) if err != nil { - return false, err + return false, errors.Wrap(err, "generate tarfs data from image layer blob") } - log.L.Infof("prepare tarfs Layer generateBootstrap done layer %s", snapshotID) - return emptyBlob, nil + log.L.Infof("tarfs data for layer %s is ready", snapshotID) } + return emptyBlob, nil } var emptyBlob bool @@ -301,22 +320,20 @@ func (t *Manager) blobProcess(ctx context.Context, snapshotID, ref string, manif emptyBlob, err = handle() } if err != nil { - return nil, err + return err } - - // for empty blob do not need a loop device if emptyBlob { - return nil, ErrEmptyBlob + return ErrEmptyBlob } - - return t.attachLoopdev(filepath.Join(storagePath, "layer_"+snapshotID+"_"+TarfsBlobName)) + return nil } -func (t *Manager) PrepareLayer(snapshotID, ref string, manifestDigest, layerDigest digest.Digest, storagePath string) error { +func (t *Manager) PrepareLayer(snapshotID, ref string, manifestDigest, layerDigest digest.Digest, + upperDirPath string) error { t.mutex.Lock() if _, ok := t.snapshotMap[snapshotID]; ok { t.mutex.Unlock() - return errors.Errorf("snapshot %s already prapared", snapshotID) + return errors.Errorf("snapshot %s has already been prapared", snapshotID) } wg := &sync.WaitGroup{} wg.Add(1) @@ -324,145 +341,200 @@ func (t *Manager) PrepareLayer(snapshotID, ref string, manifestDigest, layerDige ctx, cancel := context.WithCancel(context.Background()) t.snapshotMap[snapshotID] = &snapshotStatus{ - status: TarfsStatusFormatting, + status: TarfsStatusPrepare, wg: wg, cancel: cancel, } t.mutex.Unlock() - loopdev, err := t.blobProcess(ctx, snapshotID, ref, manifestDigest, layerDigest, storagePath) + layerBlobID := layerDigest.Hex() + err := t.blobProcess(ctx, snapshotID, ref, manifestDigest, layerDigest, layerBlobID, upperDirPath) - st, err1 := t.getSnapshotStatus(snapshotID) + st, err1 := t.getSnapshotStatus(snapshotID, true) if err1 != nil { - return errors.Errorf("can not found snapshot status after prepare") + return errors.Errorf("can not found status object for snapshot %s after prepare", snapshotID) } + defer st.mutex.Unlock() + + st.blobID = layerBlobID + st.blobTarFilePath = t.layerTarFilePath(layerBlobID) if err != nil { if errors.Is(err, ErrEmptyBlob) { + st.isEmptyBlob = true st.status = TarfsStatusReady err = nil } else { st.status = TarfsStatusFailed } } else { + st.isEmptyBlob = false st.status = TarfsStatusReady - st.layerBsLoopdev = loopdev } + log.L.Debugf("finish converting snapshot %s to tarfs, status %d, empty blob %v", snapshotID, st.status, st.isEmptyBlob) + return err } func (t *Manager) MergeLayers(s storage.Snapshot, storageLocater func(string) string) error { - mergedBootstrap := filepath.Join(storageLocater(s.ParentIDs[0]), TarfsMeragedBootstapName) + mergedBootstrap := t.mergedMetaFilePath(storageLocater(s.ParentIDs[0])) if _, err := os.Stat(mergedBootstrap); err == nil { - log.L.Infof("tarfs snapshot %s already has merged bootstrap %s", s.ParentIDs[0], mergedBootstrap) + log.L.Debugf("tarfs snapshot %s already has merged bootstrap %s", s.ParentIDs[0], mergedBootstrap) return nil } - var mountOpts string bootstraps := []string{} // When merging bootstrap, we need to arrange layer bootstrap in order from low to high for idx := len(s.ParentIDs) - 1; idx >= 0; idx-- { snapshotID := s.ParentIDs[idx] - err := t.waitLayerFormating(snapshotID) + err := t.waitLayerReady(snapshotID) if err != nil { - return errors.Wrapf(err, "wait layer formating err") + return errors.Wrapf(err, "wait for tarfs snapshot %s to get ready", snapshotID) } - st, err := t.getSnapshotStatus(snapshotID) + st, err := t.getSnapshotStatus(snapshotID, false) if err != nil { return err } if st.status != TarfsStatusReady { - return errors.Errorf("snapshot %s tarfs format error %d", snapshotID, st.status) + return errors.Errorf("tarfs snapshot %s is not ready, %d", snapshotID, st.status) } - bootstraps = append(bootstraps, filepath.Join(storageLocater(snapshotID), TarfsLayerBootstapName)) - // mount opt skip empty blob - if st.layerBsLoopdev != nil { - mountOpts += "device=" + st.layerBsLoopdev.Path() + "," - } + metaFilePath := t.layerMetaFilePath(storageLocater(snapshotID)) + bootstraps = append(bootstraps, metaFilePath) } + mergedBootstrapTmp := mergedBootstrap + ".tarfs.tmp" + defer os.Remove(mergedBootstrapTmp) + options := []string{ "merge", - "--bootstrap", mergedBootstrap, + "--bootstrap", mergedBootstrapTmp, } options = append(options, bootstraps...) cmd := exec.Command(t.nydusImagePath, options...) - cmd.Stderr = os.Stderr - cmd.Stdout = os.Stdout + var errb, outb bytes.Buffer + cmd.Stderr = &errb + cmd.Stdout = &outb log.L.Debugf("nydus image command %v", options) err := cmd.Run() if err != nil { - return errors.Wrap(err, "merging tarfs layers") + return errors.Wrap(err, "merge tarfs image layers") } - loopdev, err := t.attachLoopdev(mergedBootstrap) + err = os.Rename(mergedBootstrapTmp, mergedBootstrap) if err != nil { - return errors.Wrap(err, "attach bootstrap to loop error") + return errors.Wrap(err, "rename merged bootstrap file") } - st, err := t.getSnapshotStatus(s.ParentIDs[0]) - if err != nil { - return errors.Errorf("snapshot %s not found", s.ParentIDs[0]) - } - st.mergedBsLoopdev = loopdev - st.erofsMountOpts = mountOpts - return nil } -func (t *Manager) MountTarErofs(snapshotID string, mountPoint string) error { - var devName string +func (t *Manager) attachLoopdev(blob string) (*losetup.Device, error) { + // losetup.Attach() is not thread-safe hold lock here + t.mutexLoopDev.Lock() + defer t.mutexLoopDev.Unlock() + dev, err := losetup.Attach(blob, 0, false) + return &dev, err +} - st, err := t.getSnapshotStatus(snapshotID) +func (t *Manager) MountTarErofs(snapshotID string, s *storage.Snapshot, rafs *daemon.Rafs) error { + if s == nil { + return errors.New("snapshot object for MountTarErofs() is nil") + } + + var devices []string + // When merging bootstrap, we need to arrange layer bootstrap in order from low to high + for idx := len(s.ParentIDs) - 1; idx >= 0; idx-- { + snapshotID := s.ParentIDs[idx] + err := t.waitLayerReady(snapshotID) + if err != nil { + return errors.Wrapf(err, "wait for tarfs conversion task") + } + + st, err := t.getSnapshotStatus(snapshotID, true) + if err != nil { + return err + } + if st.status != TarfsStatusReady { + st.mutex.Unlock() + return errors.Errorf("snapshot %s tarfs format error %d", snapshotID, st.status) + } + + if !st.isEmptyBlob { + if st.dataLoopdev == nil { + loopdev, err := t.attachLoopdev(st.blobTarFilePath) + if err != nil { + st.mutex.Unlock() + return errors.Wrapf(err, "attach layer tar file %s to loopdev", st.blobTarFilePath) + } + st.dataLoopdev = loopdev + } + devices = append(devices, "device="+st.dataLoopdev.Path()) + } + + st.mutex.Unlock() + } + mountOpts := strings.Join(devices, ",") + + st, err := t.getSnapshotStatus(snapshotID, true) if err != nil { return err } + defer st.mutex.Unlock() + + mountPoint := path.Join(rafs.GetSnapshotDir(), "mnt") if len(st.erofsMountPoint) > 0 { if st.erofsMountPoint == mountPoint { - log.L.Debugf("erofs tarfs %s already mounted", mountPoint) + log.L.Debugf("tarfs for snapshot %s has already been mounted at %s", snapshotID, mountPoint) return nil } - return errors.Errorf("erofs snapshot %s already mounted at %s", snapshotID, st.erofsMountPoint) + return errors.Errorf("tarfs for snapshot %s has already been mounted at %s", snapshotID, st.erofsMountPoint) } - if err = os.MkdirAll(mountPoint, 0755); err != nil { - return errors.Wrapf(err, "failed to create tarfs mount dir %s", mountPoint) + if st.metaLoopdev == nil { + upperDirPath := path.Join(rafs.GetSnapshotDir(), "fs") + mergedBootstrap := t.mergedMetaFilePath(upperDirPath) + loopdev, err := t.attachLoopdev(mergedBootstrap) + if err != nil { + return errors.Wrapf(err, "attach merged bootstrap %s to loopdev", mergedBootstrap) + } + st.metaLoopdev = loopdev } + devName := st.metaLoopdev.Path() - if st.mergedBsLoopdev != nil { - devName = st.mergedBsLoopdev.Path() - } else { - return errors.Errorf("snapshot %s not found boostrap loopdev error %d", snapshotID, st.status) + if err = os.MkdirAll(mountPoint, 0750); err != nil { + return errors.Wrapf(err, "create tarfs mount dir %s", mountPoint) } - err = unix.Mount(devName, mountPoint, "erofs", 0, st.erofsMountOpts) + err = unix.Mount(devName, mountPoint, "erofs", 0, mountOpts) if err != nil { - return errors.Wrapf(err, "mount erofs at %s with opts %s", mountPoint, st.erofsMountOpts) + return errors.Wrapf(err, "mount erofs at %s with opts %s", mountPoint, mountOpts) } st.erofsMountPoint = mountPoint + rafs.SetMountpoint(mountPoint) return nil } func (t *Manager) UmountTarErofs(snapshotID string) error { - st, err := t.getSnapshotStatus(snapshotID) + st, err := t.getSnapshotStatus(snapshotID, true) if err != nil { return errors.Wrapf(err, "umount a tarfs snapshot %s which is already removed", snapshotID) } + defer st.mutex.Unlock() if len(st.erofsMountPoint) > 0 { err := unix.Unmount(st.erofsMountPoint, 0) if err != nil { - return errors.Wrapf(err, "umount erofs tarfs %s failed", st.erofsMountPoint) + return errors.Wrapf(err, "umount erofs tarfs %s", st.erofsMountPoint) } } st.erofsMountPoint = "" return nil } -func (t *Manager) waitLayerFormating(snapshotID string) error { - log.L.Debugf("wait for tarfs formating snapshot %s", snapshotID) - st, err := t.getSnapshotStatus(snapshotID) +func (t *Manager) waitLayerReady(snapshotID string) error { + log.L.Debugf("wait tarfs conversion task for snapshot %s", snapshotID) + st, err := t.getSnapshotStatus(snapshotID, false) if err != nil { return err } @@ -470,20 +542,24 @@ func (t *Manager) waitLayerFormating(snapshotID string) error { return nil } +func (t *Manager) IsTarfsLayer(snapshotID string) bool { + _, err := t.getSnapshotStatus(snapshotID, false) + return err == nil +} + // check if a snapshot is tarfs layer and if mounted a erofs tarfs -func (t *Manager) CheckTarfsLayer(snapshotID string, merged bool) bool { - st, err := t.getSnapshotStatus(snapshotID) +func (t *Manager) IsMountedTarfsLayer(snapshotID string) bool { + st, err := t.getSnapshotStatus(snapshotID, true) if err != nil { return false } - if merged && len(st.erofsMountPoint) == 0 { - return false - } - return true + defer st.mutex.Unlock() + + return len(st.erofsMountPoint) != 0 } func (t *Manager) DetachLayer(snapshotID string) error { - st, err := t.getSnapshotStatus(snapshotID) + st, err := t.getSnapshotStatus(snapshotID, true) if err != nil { return os.ErrNotExist } @@ -491,36 +567,47 @@ func (t *Manager) DetachLayer(snapshotID string) error { if len(st.erofsMountPoint) > 0 { err := unix.Unmount(st.erofsMountPoint, 0) if err != nil { - return errors.Wrapf(err, "umount erofs tarfs %s failed", st.erofsMountPoint) + st.mutex.Unlock() + return errors.Wrapf(err, "umount erofs tarfs %s", st.erofsMountPoint) } } - if st.mergedBsLoopdev != nil { - err := st.mergedBsLoopdev.Detach() + if st.metaLoopdev != nil { + err := st.metaLoopdev.Detach() if err != nil { - return errors.Wrapf(err, "detach merged bootstrap loopdev for tarfs snapshot %s failed", snapshotID) + st.mutex.Unlock() + return errors.Wrapf(err, "detach merged bootstrap loopdev for tarfs snapshot %s", snapshotID) } + st.metaLoopdev = nil } - if st.layerBsLoopdev != nil { - err := st.layerBsLoopdev.Detach() + if st.dataLoopdev != nil { + err := st.dataLoopdev.Detach() if err != nil { - return errors.Wrapf(err, "detach layer bootstrap loopdev for tarfs snapshot %s failed", snapshotID) + st.mutex.Unlock() + return errors.Wrapf(err, "detach layer bootstrap loopdev for tarfs snapshot %s", snapshotID) } + st.dataLoopdev = nil } + + st.mutex.Unlock() + // TODO: check order st.cancel() t.mutex.Lock() - defer t.mutex.Unlock() delete(t.snapshotMap, snapshotID) + t.mutex.Unlock() return nil } -func (t *Manager) getSnapshotStatus(snapshotID string) (*snapshotStatus, error) { +func (t *Manager) getSnapshotStatus(snapshotID string, lock bool) (*snapshotStatus, error) { t.mutex.Lock() defer t.mutex.Unlock() st, ok := t.snapshotMap[snapshotID] if ok { + if lock { + st.mutex.Lock() + } return st, nil } return nil, errors.Errorf("not found snapshot %s", snapshotID) @@ -576,3 +663,15 @@ func (t *Manager) GetConcurrentLimiter(ref string) *semaphore.Weighted { t.processLimiterCache.Add(ref, limiter) return limiter } + +func (t *Manager) layerTarFilePath(blobID string) string { + return filepath.Join(t.cacheDirPath, blobID) +} + +func (t *Manager) layerMetaFilePath(upperDirPath string) string { + return filepath.Join(upperDirPath, "image", TarfsLayerBootstapName) +} + +func (t *Manager) mergedMetaFilePath(upperDirPath string) string { + return filepath.Join(upperDirPath, "image", TarfsMeragedBootstapName) +} diff --git a/snapshot/process.go b/snapshot/process.go index d2d8a0dc43..bade268a3d 100644 --- a/snapshot/process.go +++ b/snapshot/process.go @@ -13,7 +13,6 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" - "github.com/containerd/containerd/log" "github.com/containerd/containerd/mount" snpkg "github.com/containerd/containerd/pkg/snapshotters" "github.com/containerd/containerd/snapshots/storage" @@ -24,9 +23,8 @@ import ( // `storageLocater` provides a local storage for each handler to save their intermediates. // Different actions for different layer types func chooseProcessor(ctx context.Context, logger *logrus.Entry, - sn *snapshotter, s storage.Snapshot, - key, parent string, labels map[string]string, storageLocater func() string) (_ func() (bool, []mount.Mount, error), target string, err error) { - + sn *snapshotter, s storage.Snapshot, key, parent string, labels map[string]string, + storageLocater func() string) (_ func() (bool, []mount.Mount, error), target string, err error) { var handler func() (bool, []mount.Mount, error) // Handler to prepare a directory for containerd to download and unpacking layer. @@ -40,10 +38,10 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, return true, nil, nil } - remoteHandler := func(id string, labels map[string]string, isTarfs bool) func() (bool, []mount.Mount, error) { + remoteHandler := func(id string, labels map[string]string) func() (bool, []mount.Mount, error) { return func() (bool, []mount.Mount, error) { - logger.Debugf("Found nydus meta layer id %s", id) - if err := sn.prepareRemoteSnapshot(id, labels, isTarfs); err != nil { + logger.Debugf("Prepare remote snapshot %s", id) + if err := sn.prepareRemoteSnapshot(id, labels, s); err != nil { return false, nil, err } @@ -52,12 +50,13 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, return false, nil, err } - log.L.Infof("Nydus remote snapshot %s is ready", id) + logger.Infof("Nydus remote snapshot %s is ready", id) mounts, err := sn.remoteMounts(ctx, s, id) return false, mounts, err } } + // OCI image is also marked with "containerd.io/snapshot.ref" by Containerd target, remote := labels[label.TargetSnapshotRef] if remote { @@ -72,54 +71,58 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, case sn.fs.CheckReferrer(ctx, labels): logger.Debugf("found referenced nydus manifest") handler = skipHandler - case sn.fs.StargzEnabled(): - // Check if the blob is format of estargz - if ok, blob := sn.fs.IsStargzDataLayer(labels); ok { - err := sn.fs.PrepareStargzMetaLayer(blob, storageLocater(), labels) + default: + if sn.fs.StargzEnabled() { + // Check if the blob is format of estargz + if ok, blob := sn.fs.IsStargzDataLayer(labels); ok { + err := sn.fs.PrepareStargzMetaLayer(blob, storageLocater(), labels) + if err != nil { + logger.Errorf("prepare stargz layer of snapshot ID %s, err: %v", s.ID, err) + } else { + logger.Debugf("found estargz data layer") + // Mark this snapshot as stargz layer since estargz image format does not + // has special annotation or media type. + labels[label.StargzLayer] = "true" + handler = skipHandler + } + } + } + if handler == nil && sn.fs.TarfsEnabled() { + err := sn.fs.PrepareTarfsLayer(ctx, labels, s.ID, sn.upperPath(s.ID)) if err != nil { - logger.Errorf("prepare stargz layer of snapshot ID %s, err: %v", s.ID, err) + logger.Warnf("snapshot ID %s can't be converted into tarfs, fallback to containerd, err: %v", s.ID, err) } else { - // Mark this snapshot as stargz layer since estargz image format does not - // has special annotation or media type. - labels[label.StargzLayer] = "true" + logger.Debugf("convert OCIv1 layer to tarfs") + labels[label.NydusTarfsLayer] = "true" + handler = skipHandler } } - case sn.fs.TarfsEnabled(): - err := sn.fs.PrepareTarfsLayer(ctx, labels, s.ID, sn.upperPath(s.ID)) - if err != nil { - logger.Debugf("snapshot ID %s can't prepare as tarfs fallback to containerd, err: %v", s.ID, err) - handler = defaultHandler - } else { - handler = skipHandler - } - default: - // OCI image is also marked with "containerd.io/snapshot.ref" by Containerd - handler = defaultHandler } } else { - // Container writable layer comes into this branch. It can't be committed within this Prepare + // Container writable layer comes into this branch. + // It should not be committed during this Prepare() operation. // Hope to find bootstrap layer and prepares to start nydusd // TODO: Trying find nydus meta layer will slow down setting up rootfs to OCI images if id, info, err := sn.findMetaLayer(ctx, key); err == nil { - logger.Infof("Prepares active snapshot %s, nydusd should start afterwards", key) - handler = remoteHandler(id, info.Labels, false) + logger.Infof("Prepare active Nydus snapshot %s", key) + handler = remoteHandler(id, info.Labels) } if handler == nil && sn.fs.ReferrerDetectEnabled() { if id, info, err := sn.findReferrerLayer(ctx, key); err == nil { - logger.Infof("found referenced nydus manifest for image: %s", info.Labels[snpkg.TargetRefLabel]) + logger.Infof("Found referenced nydus manifest for image: %s", info.Labels[snpkg.TargetRefLabel]) metaPath := path.Join(sn.snapshotDir(id), "fs", "image.boot") if err := sn.fs.TryFetchMetadata(ctx, info.Labels, metaPath); err != nil { return nil, "", errors.Wrap(err, "try fetch metadata") } - handler = remoteHandler(id, info.Labels, false) + handler = remoteHandler(id, info.Labels) } } if handler == nil && sn.fs.StargzEnabled() { // `pInfo` must be the uppermost parent layer - _, pInfo, _, err := snapshot.GetSnapshotInfo(ctx, sn.ms, parent) + id, pInfo, _, err := snapshot.GetSnapshotInfo(ctx, sn.ms, parent) if err != nil { return nil, "", errors.Wrap(err, "get parent snapshot info") } @@ -128,6 +131,8 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, if err := sn.fs.MergeStargzMetaLayer(ctx, s); err != nil { return nil, "", errors.Wrap(err, "merge stargz meta layers") } + handler = remoteHandler(id, pInfo.Labels) + logger.Infof("Generated estargz merged meta for %s", key) } } @@ -136,18 +141,17 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, // tarfs merged & mounted on the uppermost parent layer id, pInfo, _, err := snapshot.GetSnapshotInfo(ctx, sn.ms, parent) - if err == nil { - if sn.fs.IsTarfsLayer(id) { - err := sn.fs.MergeTarfsLayers(s, func(id string) string { return sn.upperPath(id) }) - if err != nil { - return nil, "", errors.Wrap(err, "merge tarfs layers") - } - handler = remoteHandler(id, pInfo.Labels, true) - } else { - logger.Warnf("Tarfs enable but Parent (%s) of snapshot %s is not a tarfs layer, is an untar oci or nydus snapshot?", id, s.ID) - } - } else { + switch { + case err != nil: logger.Warnf("Tarfs enable but can't get snapshot %s Parent, is an untar oci or nydus snapshot?", s.ID) + case !label.IsTarfsDataLayer(pInfo.Labels): + logger.Debugf("Tarfs enable but Parent (%s) of snapshot %s is not a tarfs layer", id, s.ID) + default: + err := sn.fs.MergeTarfsLayers(s, func(id string) string { return sn.upperPath(id) }) + if err != nil { + return nil, "", errors.Wrap(err, "merge tarfs layers") + } + handler = remoteHandler(id, pInfo.Labels) } } } diff --git a/snapshot/snapshot.go b/snapshot/snapshot.go index 3139723ca5..9205efdea2 100644 --- a/snapshot/snapshot.go +++ b/snapshot/snapshot.go @@ -53,12 +53,11 @@ import ( var _ snapshots.Snapshotter = &snapshotter{} type snapshotter struct { - root string - nydusdPath string - // Storing snapshots' state, parentage and other metadata - ms *storage.MetaStore + root string + nydusdPath string + ms *storage.MetaStore // Storing snapshots' state, parentage and other metadata fs *filesystem.Filesystem - manager *mgr.Manager + cgroupManager *cgroup.Manager enableNydusOverlayFS bool syncRemove bool cleanupOnClose bool @@ -102,23 +101,61 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho } } - manager, err := mgr.NewManager(mgr.Opt{ - NydusdBinaryPath: cfg.DaemonConfig.NydusdPath, + blockdevManager, err := mgr.NewManager(mgr.Opt{ + NydusdBinaryPath: "", Database: db, CacheDir: cfg.CacheManagerConfig.CacheDir, RootDir: cfg.Root, RecoverPolicy: rp, - FsDriver: config.GetFsDriver(), + FsDriver: config.FsDriverBlockdev, DaemonConfig: daemonConfig, CgroupMgr: cgroupMgr, }) if err != nil { - return nil, errors.Wrap(err, "create daemons manager") + return nil, errors.Wrap(err, "create blockdevice manager") + } + + var fscacheManager *mgr.Manager + if config.GetFsDriver() == config.FsDriverFscache { + mgr, err := mgr.NewManager(mgr.Opt{ + NydusdBinaryPath: cfg.DaemonConfig.NydusdPath, + Database: db, + CacheDir: cfg.CacheManagerConfig.CacheDir, + RootDir: cfg.Root, + RecoverPolicy: rp, + FsDriver: config.FsDriverFscache, + DaemonConfig: daemonConfig, + CgroupMgr: cgroupMgr, + }) + if err != nil { + return nil, errors.Wrap(err, "create fscache manager") + } + fscacheManager = mgr + } + + var fusedevManager *mgr.Manager + if config.GetFsDriver() == config.FsDriverFusedev { + mgr, err := mgr.NewManager(mgr.Opt{ + NydusdBinaryPath: cfg.DaemonConfig.NydusdPath, + Database: db, + CacheDir: cfg.CacheManagerConfig.CacheDir, + RootDir: cfg.Root, + RecoverPolicy: rp, + FsDriver: config.FsDriverFusedev, + DaemonConfig: daemonConfig, + CgroupMgr: cgroupMgr, + }) + if err != nil { + return nil, errors.Wrap(err, "create fusedev manager") + } + fusedevManager = mgr } metricServer, err := metrics.NewServer( ctx, - metrics.WithProcessManager(manager), + metrics.WithProcessManager(blockdevManager), + metrics.WithProcessManager(fscacheManager), + metrics.WithProcessManager(fusedevManager), ) if err != nil { return nil, errors.Wrap(err, "create metrics server") @@ -139,7 +176,9 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho } opts := []filesystem.NewFSOpt{ - filesystem.WithManager(manager), + filesystem.WithManager(blockdevManager), + filesystem.WithManager(fscacheManager), + filesystem.WithManager(fusedevManager), filesystem.WithNydusImageBinaryPath(cfg.DaemonConfig.NydusdPath), filesystem.WithVerifier(verifier), filesystem.WithRootMountpoint(config.GetRootMountpoint()), @@ -169,7 +208,9 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho if cfg.Experimental.EnableTarfs { // FIXME: get the insecure option from nydusd config. _, backendConfig := daemonConfig.StorageBackend() - tarfsMgr := tarfs.NewManager(backendConfig.SkipVerify, cfg.Experimental.TarfsHint, cfg.DaemonConfig.NydusImagePath, int64(cfg.Experimental.TarfsMaxConcurrentProc)) + tarfsMgr := tarfs.NewManager(backendConfig.SkipVerify, cfg.Experimental.TarfsHint, + cacheConfig.CacheDir, cfg.DaemonConfig.NydusImagePath, + int64(cfg.Experimental.TarfsMaxConcurrentProc)) opts = append(opts, filesystem.WithTarfsManager(tarfsMgr)) } @@ -179,7 +220,16 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho } if config.IsSystemControllerEnabled() { - managers := []*mgr.Manager{manager} + managers := []*mgr.Manager{} + if blockdevManager != nil { + managers = append(managers, blockdevManager) + } + if fscacheManager != nil { + managers = append(managers, fscacheManager) + } + if fusedevManager != nil { + managers = append(managers, fusedevManager) + } systemController, err := system.NewSystemController(nydusFs, managers, config.SystemControllerAddress()) if err != nil { return nil, errors.Wrap(err, "create system controller") @@ -232,7 +282,7 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho ms: ms, syncRemove: syncRemove, fs: nydusFs, - manager: manager, + cgroupManager: cgroupMgr, enableNydusOverlayFS: cfg.SnapshotsConfig.EnableNydusOverlayFS, cleanupOnClose: cfg.CleanupOnClose, }, nil @@ -283,15 +333,17 @@ func (o *snapshotter) Usage(ctx context.Context, key string) (snapshots.Usage, e usage = snapshots.Usage(du) } - // Blob layers are all committed snapshots - if info.Kind == snapshots.KindCommitted && label.IsNydusDataLayer(info.Labels) { - blobDigest := info.Labels[snpkg.TargetLayerDigestLabel] - // Try to get nydus meta layer/snapshot disk usage - cacheUsage, err := o.fs.CacheUsage(ctx, blobDigest) - if err != nil { - return snapshots.Usage{}, errors.Wrapf(err, "try to get snapshot %s nydus disk usage", id) + // Caculate disk space usage under cacheDir of committed snapshots. + if info.Kind == snapshots.KindCommitted && + (label.IsNydusDataLayer(info.Labels) || label.IsTarfsDataLayer(info.Labels)) { + if blobDigest, ok := info.Labels[snpkg.TargetLayerDigestLabel]; ok { + // Try to get nydus meta layer/snapshot disk usage + cacheUsage, err := o.fs.CacheUsage(ctx, blobDigest) + if err != nil { + return snapshots.Usage{}, errors.Wrapf(err, "try to get snapshot %s nydus disk usage", id) + } + usage.Add(cacheUsage) } - usage.Add(cacheUsage) } return usage, nil @@ -330,6 +382,9 @@ func (o *snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, er needRemoteMounts = true metaSnapshotID = id } + } else if label.IsTarfsDataLayer(info.Labels) { + needRemoteMounts = true + metaSnapshotID = id } if info.Kind == snapshots.KindActive && info.Parent != "" { @@ -341,8 +396,7 @@ func (o *snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, er } needRemoteMounts = true metaSnapshotID = pID - } - if o.fs.TarfsEnabled() && o.fs.IsMergedTarfsLayer(pID) { + } else if o.fs.TarfsEnabled() && o.fs.IsMountedTarfsLayer(pID) { needRemoteMounts = true metaSnapshotID = pID } @@ -351,7 +405,7 @@ func (o *snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, er } } - if o.fs.ReferrerDetectEnabled() { + if o.fs.ReferrerDetectEnabled() && !needRemoteMounts { if id, _, err := o.findReferrerLayer(ctx, key); err == nil { needRemoteMounts = true metaSnapshotID = id @@ -422,7 +476,7 @@ func (o *snapshotter) View(ctx context.Context, key, parent string, opts ...snap // Nydusd might not be running. We should run nydusd to reflect the rootfs. if err = o.fs.WaitUntilReady(pID); err != nil { if errors.Is(err, errdefs.ErrNotFound) { - if err := o.fs.Mount(pID, pInfo.Labels, false); err != nil { + if err := o.fs.Mount(pID, pInfo.Labels, nil); err != nil { return nil, errors.Wrapf(err, "mount rafs, instance id %s", pID) } @@ -440,25 +494,23 @@ func (o *snapshotter) View(ctx context.Context, key, parent string, opts ...snap return nil, errors.New("only can view nydus topmost layer") } // Otherwise, it is OCI snapshots + base, s, err := o.createSnapshot(ctx, snapshots.KindView, key, parent, opts) if err != nil { return nil, err } - if o.fs.TarfsEnabled() { - if o.fs.IsTarfsLayer(pID) { - if !o.fs.IsMergedTarfsLayer(pID) { - if err := o.fs.MergeTarfsLayers(s, func(id string) string { return o.upperPath(id) }); err != nil { - return nil, errors.Wrapf(err, "tarfs merge fail %s", pID) - } - - if err := o.fs.Mount(pID, pInfo.Labels, true); err != nil { - return nil, errors.Wrapf(err, "mount tarfs, snapshot id %s", pID) - } + if o.fs.TarfsEnabled() && label.IsTarfsDataLayer(pInfo.Labels) { + if !o.fs.IsMountedTarfsLayer(pID) { + if err := o.fs.MergeTarfsLayers(s, func(id string) string { return o.upperPath(id) }); err != nil { + return nil, errors.Wrapf(err, "tarfs merge fail %s", pID) + } + if err := o.fs.Mount(pID, pInfo.Labels, &s); err != nil { + return nil, errors.Wrapf(err, "mount tarfs, snapshot id %s", pID) } - needRemoteMounts = true - metaSnapshotID = pID } + needRemoteMounts = true + metaSnapshotID = pID } log.L.Infof("[View] snapshot with key %s parent %s", key, parent) @@ -487,21 +539,18 @@ func (o *snapshotter) Commit(ctx context.Context, name, key string, opts ...snap }() // grab the existing id - id, info, _, err := storage.GetInfo(ctx, key) + id, _, _, err := storage.GetInfo(ctx, key) if err != nil { return err } log.L.Infof("[Commit] snapshot with key %q snapshot id %s", key, id) - var usage fs.Usage - // For OCI compatibility, we calculate disk usage and commit the usage to DB. - // Nydus disk usage calculation will be delayed until containerd queries. - if !label.IsNydusMetaLayer(info.Labels) && !label.IsNydusDataLayer(info.Labels) { - usage, err = fs.DiskUsage(ctx, o.upperPath(id)) - if err != nil { - return err - } + // For OCI compatibility, we calculate disk usage of the snapshotDir and commit the usage to DB. + // Nydus disk usage under the cacheDir will be delayed until containerd queries. + usage, err := fs.DiskUsage(ctx, o.upperPath(id)) + if err != nil { + return err } if _, err = storage.CommitActive(ctx, key, name, snapshots.Usage(usage), opts...); err != nil { @@ -544,6 +593,8 @@ func (o *snapshotter) Remove(ctx context.Context, key string) error { if label.IsNydusMetaLayer(info.Labels) { log.L.Infof("[Remove] nydus meta snapshot with key %s snapshot id %s", key, id) + } else if label.IsTarfsDataLayer(info.Labels) { + log.L.Infof("[Remove] nydus tarfs snapshot with key %s snapshot id %s", key, id) } if info.Kind == snapshots.KindCommitted { @@ -608,8 +659,8 @@ func (o *snapshotter) Close() error { o.fs.TryStopSharedDaemon() - if o.manager.CgroupMgr != nil { - if err := o.manager.CgroupMgr.Delete(); err != nil { + if o.cgroupManager != nil { + if err := o.cgroupManager.Delete(); err != nil { log.L.Errorf("failed to destroy cgroup, err %v", err) } } @@ -744,8 +795,8 @@ func overlayMount(options []string) []mount.Mount { } } -func (o *snapshotter) prepareRemoteSnapshot(id string, labels map[string]string, isTarfs bool) error { - return o.fs.Mount(id, labels, isTarfs) +func (o *snapshotter) prepareRemoteSnapshot(id string, labels map[string]string, s storage.Snapshot) error { + return o.fs.Mount(id, labels, &s) } // `s` is the upmost snapshot and `id` refers to the nydus meta snapshot From ea6f182249fa24cba98bca5b6c2aa703af425b8c Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Sun, 25 Jun 2023 12:21:34 +0800 Subject: [PATCH 08/14] snapshot: remove function prepareRemoteSnapshot() Remove function prepareRemoteSnapshot() to simplify the code and improve readability. Signed-off-by: Jiang Liu --- snapshot/process.go | 4 ++-- snapshot/snapshot.go | 10 +++------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/snapshot/process.go b/snapshot/process.go index bade268a3d..60c76c3325 100644 --- a/snapshot/process.go +++ b/snapshot/process.go @@ -41,7 +41,7 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, remoteHandler := func(id string, labels map[string]string) func() (bool, []mount.Mount, error) { return func() (bool, []mount.Mount, error) { logger.Debugf("Prepare remote snapshot %s", id) - if err := sn.prepareRemoteSnapshot(id, labels, s); err != nil { + if err := sn.fs.Mount(id, labels, &s); err != nil { return false, nil, err } @@ -51,7 +51,7 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, } logger.Infof("Nydus remote snapshot %s is ready", id) - mounts, err := sn.remoteMounts(ctx, s, id) + mounts, err := sn.remoteMounts(ctx, labels, s, id) return false, mounts, err } } diff --git a/snapshot/snapshot.go b/snapshot/snapshot.go index 9205efdea2..ee94cb3c3e 100644 --- a/snapshot/snapshot.go +++ b/snapshot/snapshot.go @@ -418,7 +418,7 @@ func (o *snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, er } if needRemoteMounts { - return o.remoteMounts(ctx, *snap, metaSnapshotID) + return o.remoteMounts(ctx, info.Labels, *snap, metaSnapshotID) } return o.mounts(ctx, info.Labels, *snap) @@ -516,7 +516,7 @@ func (o *snapshotter) View(ctx context.Context, key, parent string, opts ...snap log.L.Infof("[View] snapshot with key %s parent %s", key, parent) if needRemoteMounts { - return o.remoteMounts(ctx, s, metaSnapshotID) + return o.remoteMounts(ctx, base.Labels, s, metaSnapshotID) } return o.mounts(ctx, base.Labels, s) @@ -795,13 +795,9 @@ func overlayMount(options []string) []mount.Mount { } } -func (o *snapshotter) prepareRemoteSnapshot(id string, labels map[string]string, s storage.Snapshot) error { - return o.fs.Mount(id, labels, &s) -} - // `s` is the upmost snapshot and `id` refers to the nydus meta snapshot // `s` and `id` can represent a different layer, it's useful when View an image -func (o *snapshotter) remoteMounts(ctx context.Context, s storage.Snapshot, id string) ([]mount.Mount, error) { +func (o *snapshotter) remoteMounts(ctx context.Context, labels map[string]string, s storage.Snapshot, id string) ([]mount.Mount, error) { var overlayOptions []string lowerPaths := make([]string, 0, 8) if s.Kind == snapshots.KindActive { From d90b73db02f06e72218ad5e9acd3db125d9cef91 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Sun, 25 Jun 2023 12:32:37 +0800 Subject: [PATCH 09/14] snapshot: support overlay volatile option for nydus The overlay volatile option is only supported for OCIv1 images, so enhance nydus remote images to support it too. Signed-off-by: Jiang Liu --- snapshot/snapshot.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/snapshot/snapshot.go b/snapshot/snapshot.go index ee94cb3c3e..9a4a81f087 100644 --- a/snapshot/snapshot.go +++ b/snapshot/snapshot.go @@ -799,16 +799,19 @@ func overlayMount(options []string) []mount.Mount { // `s` and `id` can represent a different layer, it's useful when View an image func (o *snapshotter) remoteMounts(ctx context.Context, labels map[string]string, s storage.Snapshot, id string) ([]mount.Mount, error) { var overlayOptions []string - lowerPaths := make([]string, 0, 8) if s.Kind == snapshots.KindActive { overlayOptions = append(overlayOptions, fmt.Sprintf("workdir=%s", o.workPath(s.ID)), fmt.Sprintf("upperdir=%s", o.upperPath(s.ID)), ) + if _, ok := labels[label.OverlayfsVolatileOpt]; ok { + overlayOptions = append(overlayOptions, "volatile") + } } else if len(s.ParentIDs) == 1 { return bindMount(o.upperPath(s.ParentIDs[0]), "ro"), nil } + lowerPaths := make([]string, 0, 8) lowerPathNydus, err := o.lowerPath(id) if err != nil { return nil, errors.Wrapf(err, "failed to locate overlay lowerdir") From 2543daa761ace045a0716d2eb8cc9ffd499afc57 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Sun, 25 Jun 2023 15:24:34 +0800 Subject: [PATCH 10/14] snapshot: change if else to switch Change if else to switch according to golint suggestions. Signed-off-by: Jiang Liu --- pkg/snapshot/storage.go | 2 - pkg/tarfs/tarfs.go | 6 ++- snapshot/snapshot.go | 91 +++++++++++++++++++++-------------------- 3 files changed, 51 insertions(+), 48 deletions(-) diff --git a/pkg/snapshot/storage.go b/pkg/snapshot/storage.go index 0da112bef2..ea03b9aa58 100644 --- a/pkg/snapshot/storage.go +++ b/pkg/snapshot/storage.go @@ -83,8 +83,6 @@ func IterateParentSnapshots(ctx context.Context, ms *storage.MetaStore, key stri return id, info, nil } - log.L.Debugf("continue to check snapshot %s parent", id) - cKey = info.Parent } diff --git a/pkg/tarfs/tarfs.go b/pkg/tarfs/tarfs.go index ddbf2ff6ef..4e95d98868 100755 --- a/pkg/tarfs/tarfs.go +++ b/pkg/tarfs/tarfs.go @@ -533,11 +533,13 @@ func (t *Manager) UmountTarErofs(snapshotID string) error { } func (t *Manager) waitLayerReady(snapshotID string) error { - log.L.Debugf("wait tarfs conversion task for snapshot %s", snapshotID) st, err := t.getSnapshotStatus(snapshotID, false) if err != nil { return err } + if st.status != TarfsStatusReady { + log.L.Debugf("wait tarfs conversion task for snapshot %s", snapshotID) + } st.wg.Wait() return nil } @@ -651,7 +653,7 @@ func (t *Manager) CheckTarfsHintAnnotation(ctx context.Context, ref string, mani } func (t *Manager) GetConcurrentLimiter(ref string) *semaphore.Weighted { - if t.maxConcurrentProcess == 0 { + if t.maxConcurrentProcess <= 0 { return nil } diff --git a/snapshot/snapshot.go b/snapshot/snapshot.go index 9a4a81f087..90b73b6ace 100644 --- a/snapshot/snapshot.go +++ b/snapshot/snapshot.go @@ -324,25 +324,25 @@ func (o *snapshotter) Usage(ctx context.Context, key string) (snapshots.Usage, e return snapshots.Usage{}, err } - if info.Kind == snapshots.KindActive { + switch info.Kind { + case snapshots.KindActive: upperPath := o.upperPath(id) du, err := fs.DiskUsage(ctx, upperPath) if err != nil { return snapshots.Usage{}, err } usage = snapshots.Usage(du) - } - - // Caculate disk space usage under cacheDir of committed snapshots. - if info.Kind == snapshots.KindCommitted && - (label.IsNydusDataLayer(info.Labels) || label.IsTarfsDataLayer(info.Labels)) { - if blobDigest, ok := info.Labels[snpkg.TargetLayerDigestLabel]; ok { - // Try to get nydus meta layer/snapshot disk usage - cacheUsage, err := o.fs.CacheUsage(ctx, blobDigest) - if err != nil { - return snapshots.Usage{}, errors.Wrapf(err, "try to get snapshot %s nydus disk usage", id) + case snapshots.KindCommitted: + // Caculate disk space usage under cacheDir of committed snapshots. + if label.IsNydusDataLayer(info.Labels) || label.IsTarfsDataLayer(info.Labels) { + if blobDigest, ok := info.Labels[snpkg.TargetLayerDigestLabel]; ok { + // Try to get nydus meta layer/snapshot disk usage + cacheUsage, err := o.fs.CacheUsage(ctx, blobDigest) + if err != nil { + return snapshots.Usage{}, errors.Wrapf(err, "try to get snapshot %s nydus disk usage", id) + } + usage.Add(cacheUsage) } - usage.Add(cacheUsage) } } @@ -365,43 +365,46 @@ func (o *snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, er } log.L.Infof("[Mounts] snapshot %s ID %s Kind %s", key, id, info.Kind) - if label.IsNydusMetaLayer(info.Labels) { - err = o.fs.WaitUntilReady(id) - if err != nil { - // Skip waiting if clients is unpacking nydus artifacts to `mounts` - // For example, nydus-snapshotter's client like Buildkit is calling snapshotter in below workflow: - // 1. [Prepare] snapshot for the uppermost layer - bootstrap - // 2. [Mounts] - // 3. Unpacking by applying the mounts, then we get bootstrap in its path position. - // In above steps, no container write layer is called to set up from nydus-snapshotter. So it has no - // chance to start nydusd, during which the Rafs instance is created. - if !errors.Is(err, errdefs.ErrNotFound) { - return nil, errors.Wrapf(err, "mounts: snapshot %s is not ready, err: %v", id, err) + switch info.Kind { + case snapshots.KindView: + if label.IsNydusMetaLayer(info.Labels) { + err = o.fs.WaitUntilReady(id) + if err != nil { + // Skip waiting if clients is unpacking nydus artifacts to `mounts` + // For example, nydus-snapshotter's client like Buildkit is calling snapshotter in below workflow: + // 1. [Prepare] snapshot for the uppermost layer - bootstrap + // 2. [Mounts] + // 3. Unpacking by applying the mounts, then we get bootstrap in its path position. + // In above steps, no container write layer is called to set up from nydus-snapshotter. So it has no + // chance to start nydusd, during which the Rafs instance is created. + if !errors.Is(err, errdefs.ErrNotFound) { + return nil, errors.Wrapf(err, "mounts: snapshot %s is not ready, err: %v", id, err) + } + } else { + needRemoteMounts = true + metaSnapshotID = id } - } else { + } else if label.IsTarfsDataLayer(info.Labels) { needRemoteMounts = true metaSnapshotID = id } - } else if label.IsTarfsDataLayer(info.Labels) { - needRemoteMounts = true - metaSnapshotID = id - } - - if info.Kind == snapshots.KindActive && info.Parent != "" { - pKey := info.Parent - if pID, info, _, err := snapshot.GetSnapshotInfo(ctx, o.ms, pKey); err == nil { - if label.IsNydusMetaLayer(info.Labels) { - if err = o.fs.WaitUntilReady(pID); err != nil { - return nil, errors.Wrapf(err, "mounts: snapshot %s is not ready, err: %v", pID, err) + case snapshots.KindActive: + if info.Parent != "" { + pKey := info.Parent + if pID, info, _, err := snapshot.GetSnapshotInfo(ctx, o.ms, pKey); err == nil { + if label.IsNydusMetaLayer(info.Labels) { + if err = o.fs.WaitUntilReady(pID); err != nil { + return nil, errors.Wrapf(err, "mounts: snapshot %s is not ready, err: %v", pID, err) + } + needRemoteMounts = true + metaSnapshotID = pID + } else if o.fs.TarfsEnabled() && o.fs.IsMountedTarfsLayer(pID) { + needRemoteMounts = true + metaSnapshotID = pID } - needRemoteMounts = true - metaSnapshotID = pID - } else if o.fs.TarfsEnabled() && o.fs.IsMountedTarfsLayer(pID) { - needRemoteMounts = true - metaSnapshotID = pID + } else { + return nil, errors.Wrapf(err, "get parent snapshot info, parent key=%q", pKey) } - } else { - return nil, errors.Wrapf(err, "get parent snapshot info, parent key=%q", pKey) } } @@ -1027,7 +1030,7 @@ func (o *snapshotter) cleanupSnapshotDirectory(ctx context.Context, dir string) if o.fs.TarfsEnabled() { if err := o.fs.DetachTarfsLayer(snapshotID); err != nil && !os.IsNotExist(err) { - log.G(ctx).WithError(err).Error("detach tarfs layer") + log.G(ctx).WithError(err).Errorf("failed to detach tarfs layer for snapshot %s", snapshotID) } } From 472624d93c8e0c671d61b787549877e2b5335703 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 26 Jun 2023 09:58:31 +0800 Subject: [PATCH 11/14] tarfs: export nydus tarfs image as block with verity Enhance tarfs implementation to support following operations: - export an tarfs image as a block device - export an tarfs image as a block device with verity - export an tarfs layer as a block device - export an tarfs layer as a block device with verity Signed-off-by: Jiang Liu --- config/config.go | 15 ++- config/global.go | 43 ++++++++ misc/snapshotter/config.toml | 18 ++++ pkg/cache/manager.go | 14 ++- pkg/filesystem/tarfs_adaptor.go | 19 ++-- pkg/label/label.go | 4 + pkg/tarfs/tarfs.go | 178 ++++++++++++++++++++++++++------ snapshot/process.go | 13 +++ snapshot/snapshot.go | 49 ++++++--- 9 files changed, 291 insertions(+), 62 deletions(-) diff --git a/config/config.go b/config/config.go index 34d9d6bf23..932bafe3a8 100644 --- a/config/config.go +++ b/config/config.go @@ -107,11 +107,16 @@ const ( ) type Experimental struct { - EnableStargz bool `toml:"enable_stargz"` - EnableReferrerDetect bool `toml:"enable_referrer_detect"` - EnableTarfs bool `toml:"enable_tarfs"` - TarfsHint bool `toml:"tarfs_hint"` - TarfsMaxConcurrentProc int `toml:"tarfs_max_concurrent_proc"` + EnableStargz bool `toml:"enable_stargz"` + EnableReferrerDetect bool `toml:"enable_referrer_detect"` + TarfsConfig TarfsConfig `toml:"tarfs"` +} + +type TarfsConfig struct { + EnableTarfs bool `toml:"enable_tarfs"` + TarfsHint bool `toml:"tarfs_hint"` + MaxConcurrentProc int `toml:"max_concurrent_proc"` + ExportMode string `toml:"export_mode"` } type CgroupConfig struct { diff --git a/config/global.go b/config/global.go index 5272a20bdc..a50de69f7b 100644 --- a/config/global.go +++ b/config/global.go @@ -112,6 +112,49 @@ func GetDaemonProfileCPUDuration() int64 { return globalConfig.origin.SystemControllerConfig.DebugConfig.ProfileDuration } +const ( + TarfsLayerVerityOnly string = "layer_verity_only" + TarfsImageVerityOnly string = "image_verity_only" + TarfsLayerBlockDevice string = "layer_block" + TarfsImageBlockDevice string = "image_block" + TarfsLayerBlockWithVerity string = "layer_block_with_verity" + TarfsImageBlockWithVerity string = "image_block_with_verity" +) + +func GetTarfsExportEnabled() bool { + switch globalConfig.origin.Experimental.TarfsConfig.ExportMode { + case TarfsLayerVerityOnly, TarfsLayerBlockDevice, TarfsLayerBlockWithVerity: + return true + case TarfsImageVerityOnly, TarfsImageBlockDevice, TarfsImageBlockWithVerity: + return true + default: + return false + } +} + +// Returns (wholeImage, generateBlockImage, withVerityInfo) +// wholeImage: generate tarfs for the whole image instead of of a specific layer. +// generateBlockImage: generate a block image file. +// withVerityInfo: generate disk verity information. +func GetTarfsExportFlags() (bool, bool, bool) { + switch globalConfig.origin.Experimental.TarfsConfig.ExportMode { + case "layer_verity_only": + return false, false, true + case "image_verity_only": + return true, false, true + case "layer_block": + return false, true, false + case "image_block": + return true, true, false + case "layer_block_with_verity": + return false, true, true + case "image_block_with_verity": + return true, true, true + default: + return false, false, false + } +} + func ProcessConfigurations(c *SnapshotterConfig) error { if c.LoggingConfig.LogDir == "" { c.LoggingConfig.LogDir = filepath.Join(c.Root, logging.DefaultLogDirName) diff --git a/misc/snapshotter/config.toml b/misc/snapshotter/config.toml index dbc9728830..dbdf7ba4f6 100644 --- a/misc/snapshotter/config.toml +++ b/misc/snapshotter/config.toml @@ -104,3 +104,21 @@ enable_stargz = false # The option enables trying to fetch the Nydus image associated with the OCI image and run it. # Also see https://github.com/opencontainers/distribution-spec/blob/main/spec.md#listing-referrers enable_referrer_detect = false +[experimental.tarfs] +# Whether to enable nydus tarfs mode. Tarfs is supported by: +# - The EROFS filesystem driver since Linux 6.4 +# - Nydus Image Service release v2.3 +enable_tarfs = false +# Only enable nydus tarfs mode for images with `tarfs hint` label when true +tarfs_hint = false +# Maximum of concurrence to converting OCIv1 images to tarfs, 0 means default +max_concurrent_proc = 0 +# Mode to export tarfs images: +# - "none" or "": do not export tarfs +# - "layer_verity_only": only generate disk verity information for a layer blob +# - "image_verity_only": only generate disk verity information for all blobs of an image +# - "layer_block": generate a raw block disk image with tarfs for a layer +# - "image_block": generate a raw block disk image with tarfs for an image +# - "layer_block_with_verity": generate a raw block disk image with tarfs for a layer with dm-verity info +# - "image_block_with_verity": generate a raw block disk image with tarfs for an image with dm-verity info +export_mode = "" diff --git a/pkg/cache/manager.go b/pkg/cache/manager.go index ca055bfdd8..33b221a296 100644 --- a/pkg/cache/manager.go +++ b/pkg/cache/manager.go @@ -21,8 +21,10 @@ import ( ) const ( - chunkMapFileSuffix = ".chunk_map" - metaFileSuffix = ".blob.meta" + imageDiskFileSuffix = ".image.disk" + layerDiskFileSuffix = ".layer.disk" + chunkMapFileSuffix = ".chunk_map" + metaFileSuffix = ".blob.meta" // Blob cache is suffixed after nydus v2.1 dataFileSuffix = ".blob.data" ) @@ -72,8 +74,10 @@ func (m *Manager) CacheUsage(ctx context.Context, blobID string) (snapshots.Usag blobCacheSuffixedPath := path.Join(m.cacheDir, blobID+dataFileSuffix) blobChunkMap := path.Join(m.cacheDir, blobID+chunkMapFileSuffix) blobMeta := path.Join(m.cacheDir, blobID+metaFileSuffix) + imageDisk := path.Join(m.cacheDir, blobID+imageDiskFileSuffix) + layerDisk := path.Join(m.cacheDir, blobID+layerDiskFileSuffix) - stuffs := []string{blobCachePath, blobCacheSuffixedPath, blobChunkMap, blobMeta} + stuffs := []string{blobCachePath, blobCacheSuffixedPath, blobChunkMap, blobMeta, imageDisk, layerDisk} for _, f := range stuffs { du, err := fs.DiskUsage(ctx, f) @@ -95,9 +99,11 @@ func (m *Manager) RemoveBlobCache(blobID string) error { blobCacheSuffixedPath := path.Join(m.cacheDir, blobID+dataFileSuffix) blobChunkMap := path.Join(m.cacheDir, blobID+chunkMapFileSuffix) blobMeta := path.Join(m.cacheDir, blobID+metaFileSuffix) + imageDisk := path.Join(m.cacheDir, blobID+imageDiskFileSuffix) + layerDisk := path.Join(m.cacheDir, blobID+layerDiskFileSuffix) // NOTE: Delete chunk bitmap file before data blob - stuffs := []string{blobChunkMap, blobMeta, blobCachePath, blobCacheSuffixedPath} + stuffs := []string{blobChunkMap, blobMeta, blobCachePath, blobCacheSuffixedPath, imageDisk, layerDisk} for _, f := range stuffs { err := os.Remove(f) diff --git a/pkg/filesystem/tarfs_adaptor.go b/pkg/filesystem/tarfs_adaptor.go index 86824e408f..45f3a328f9 100755 --- a/pkg/filesystem/tarfs_adaptor.go +++ b/pkg/filesystem/tarfs_adaptor.go @@ -49,18 +49,21 @@ func (fs *Filesystem) PrepareTarfsLayer(ctx context.Context, labels map[string]s } } - go func() { - if err := fs.tarfsMgr.PrepareLayer(snapshotID, ref, manifestDigest, layerDigest, upperDirPath); err != nil { - log.L.WithError(err).Errorf("async prepare tarfs layer of snapshot ID %s", snapshotID) - } - if limiter != nil { - limiter.Release(1) - } - }() + if err := fs.tarfsMgr.PrepareLayer(snapshotID, ref, manifestDigest, layerDigest, upperDirPath); err != nil { + log.L.WithError(err).Errorf("async prepare tarfs layer of snapshot ID %s", snapshotID) + } + if limiter != nil { + limiter.Release(1) + } return nil } +func (fs *Filesystem) ExportBlockData(s storage.Snapshot, perLayer bool, labels map[string]string, + storageLocater func(string) string) ([]string, error) { + return fs.tarfsMgr.ExportBlockData(s, perLayer, labels, storageLocater) +} + func (fs *Filesystem) MergeTarfsLayers(s storage.Snapshot, storageLocater func(string) string) error { return fs.tarfsMgr.MergeLayers(s, storageLocater) } diff --git a/pkg/label/label.go b/pkg/label/label.go index 820cd26bbe..948abe6da9 100644 --- a/pkg/label/label.go +++ b/pkg/label/label.go @@ -42,6 +42,10 @@ const ( NydusImagePullUsername = "containerd.io/snapshot/pullusername" // A bool flag to enable integrity verification of meta data blob NydusSignature = "containerd.io/snapshot/nydus-signature" + // Information for image block device + NydusImageBlockInfo = "containerd.io/snapshot/nydus-image-block" + // Information for layer block device + NydusLayerBlockInfo = "containerd.io/snapshot/nydus-layer-block" // A bool flag to mark the blob as a estargz data blob, set by the snapshotter. StargzLayer = "containerd.io/snapshot/stargz" diff --git a/pkg/tarfs/tarfs.go b/pkg/tarfs/tarfs.go index 4e95d98868..f42a87b805 100755 --- a/pkg/tarfs/tarfs.go +++ b/pkg/tarfs/tarfs.go @@ -10,11 +10,13 @@ import ( "bytes" "context" "encoding/json" + "fmt" "io" "os" "os/exec" "path" "path/filepath" + "strconv" "strings" "sync" "syscall" @@ -22,6 +24,7 @@ import ( "github.com/containerd/containerd/archive/compression" "github.com/containerd/containerd/log" "github.com/containerd/containerd/snapshots/storage" + "github.com/containerd/nydus-snapshotter/config" "github.com/containerd/nydus-snapshotter/pkg/auth" "github.com/containerd/nydus-snapshotter/pkg/daemon" "github.com/containerd/nydus-snapshotter/pkg/label" @@ -61,9 +64,11 @@ const ( ) const ( - MaxManifestConfigSize = 0x100000 - TarfsLayerBootstapName = "layer.boot" - TarfsMeragedBootstapName = "image.boot" + MaxManifestConfigSize = 0x100000 + TarfsLayerBootstrapName = "layer.boot" + TarfsImageBootstrapName = "image.boot" + TarfsLayerDiskName = "layer.disk" + TarfsImageDiskName = "image.disk" ) var ErrEmptyBlob = errors.New("empty blob") @@ -337,7 +342,6 @@ func (t *Manager) PrepareLayer(snapshotID, ref string, manifestDigest, layerDige } wg := &sync.WaitGroup{} wg.Add(1) - defer wg.Done() ctx, cancel := context.WithCancel(context.Background()) t.snapshotMap[snapshotID] = &snapshotStatus{ @@ -347,36 +351,43 @@ func (t *Manager) PrepareLayer(snapshotID, ref string, manifestDigest, layerDige } t.mutex.Unlock() - layerBlobID := layerDigest.Hex() - err := t.blobProcess(ctx, snapshotID, ref, manifestDigest, layerDigest, layerBlobID, upperDirPath) + go func() { + defer wg.Done() - st, err1 := t.getSnapshotStatus(snapshotID, true) - if err1 != nil { - return errors.Errorf("can not found status object for snapshot %s after prepare", snapshotID) - } - defer st.mutex.Unlock() + layerBlobID := layerDigest.Hex() + err := t.blobProcess(ctx, snapshotID, ref, manifestDigest, layerDigest, layerBlobID, upperDirPath) - st.blobID = layerBlobID - st.blobTarFilePath = t.layerTarFilePath(layerBlobID) - if err != nil { - if errors.Is(err, ErrEmptyBlob) { - st.isEmptyBlob = true - st.status = TarfsStatusReady - err = nil + st, err1 := t.getSnapshotStatus(snapshotID, true) + if err1 != nil { + // return errors.Errorf("can not found status object for snapshot %s after prepare", snapshotID) + err1 = errors.Wrapf(err1, "can not found status object for snapshot %s after prepare", snapshotID) + log.L.WithError(err1).Errorf("async prepare tarfs layer of snapshot ID %s", snapshotID) + return + } + defer st.mutex.Unlock() + + st.blobID = layerBlobID + st.blobTarFilePath = t.layerTarFilePath(layerBlobID) + if err != nil { + if errors.Is(err, ErrEmptyBlob) { + st.isEmptyBlob = true + st.status = TarfsStatusReady + } else { + log.L.WithError(err).Errorf("failed to convert OCI image to tarfs") + st.status = TarfsStatusFailed + } } else { - st.status = TarfsStatusFailed + st.isEmptyBlob = false + st.status = TarfsStatusReady } - } else { - st.isEmptyBlob = false - st.status = TarfsStatusReady - } - log.L.Debugf("finish converting snapshot %s to tarfs, status %d, empty blob %v", snapshotID, st.status, st.isEmptyBlob) + log.L.Debugf("finish converting snapshot %s to tarfs, status %d, empty blob %v", snapshotID, st.status, st.isEmptyBlob) + }() - return err + return nil } func (t *Manager) MergeLayers(s storage.Snapshot, storageLocater func(string) string) error { - mergedBootstrap := t.mergedMetaFilePath(storageLocater(s.ParentIDs[0])) + mergedBootstrap := t.imageMetaFilePath(storageLocater(s.ParentIDs[0])) if _, err := os.Stat(mergedBootstrap); err == nil { log.L.Debugf("tarfs snapshot %s already has merged bootstrap %s", s.ParentIDs[0], mergedBootstrap) return nil @@ -429,6 +440,102 @@ func (t *Manager) MergeLayers(s storage.Snapshot, storageLocater func(string) st return nil } +func (t *Manager) ExportBlockData(s storage.Snapshot, perLayer bool, labels map[string]string, storageLocater func(string) string) ([]string, error) { + updateFields := []string{} + + wholeImage, exportDisk, withVerity := config.GetTarfsExportFlags() + // Nothing to do for this case, all needed datum are ready. + if !exportDisk && !withVerity { + return updateFields, nil + } else if !wholeImage != perLayer { + return updateFields, nil + } + + var snapshotID string + if perLayer { + snapshotID = s.ID + } else { + if len(s.ParentIDs) == 0 { + return updateFields, errors.Errorf("snapshot %s has parent", s.ID) + } + snapshotID = s.ParentIDs[0] + } + err := t.waitLayerReady(snapshotID) + if err != nil { + return updateFields, errors.Wrapf(err, "wait for tarfs snapshot %s to get ready", snapshotID) + } + st, err := t.getSnapshotStatus(snapshotID, false) + if err != nil { + return updateFields, err + } + if st.status != TarfsStatusReady { + return updateFields, errors.Errorf("tarfs snapshot %s is not ready, %d", snapshotID, st.status) + } + + var metaFileName, diskFileName string + if wholeImage { + metaFileName = t.imageMetaFilePath(storageLocater(snapshotID)) + diskFileName = t.imageDiskFilePath(st.blobID) + } else { + metaFileName = t.layerMetaFilePath(storageLocater(snapshotID)) + diskFileName = t.layerDiskFilePath(st.blobID) + } + + // Do not regenerate if the disk image already exists. + if _, err := os.Stat(diskFileName); err == nil { + return updateFields, nil + } + diskFileNameTmp := diskFileName + ".tarfs.tmp" + defer os.Remove(diskFileNameTmp) + + options := []string{ + "export", + "--block", + "--localfs-dir", t.cacheDirPath, + "--bootstrap", metaFileName, + "--output", diskFileNameTmp, + } + if withVerity { + options = append(options, "--verity") + } + log.L.Warnf("nydus image command %v", options) + cmd := exec.Command(t.nydusImagePath, options...) + var errb, outb bytes.Buffer + cmd.Stderr = &errb + cmd.Stdout = &outb + err = cmd.Run() + if err != nil { + return updateFields, errors.Wrap(err, "merge tarfs image layers") + } + log.L.Debugf("nydus image export command, stdout: %s, stderr: %s", &outb, &errb) + + if withVerity { + pattern := "dm-verity options: --no-superblock --format=1 -s \"\" --hash=sha256 --data-block-size=512 --hash-block-size=4096 --data-blocks %d --hash-offset %d %s\n" + var dataBlobks, hashOffset uint64 + var rootHash string + if count, err := fmt.Sscanf(outb.String(), pattern, &dataBlobks, &hashOffset, &rootHash); err != nil || count != 3 { + return updateFields, errors.Errorf("failed to parse dm-verity options from nydus image output: %s", outb.String()) + } + + blockInfo := strconv.FormatUint(dataBlobks, 10) + "," + strconv.FormatUint(hashOffset, 10) + "," + "sha256:" + rootHash + if wholeImage { + labels[label.NydusImageBlockInfo] = blockInfo + updateFields = append(updateFields, "labels."+label.NydusImageBlockInfo) + } else { + labels[label.NydusLayerBlockInfo] = blockInfo + updateFields = append(updateFields, "labels."+label.NydusLayerBlockInfo) + } + log.L.Warnf("export block labels %v", labels) + } + + err = os.Rename(diskFileNameTmp, diskFileName) + if err != nil { + return updateFields, errors.Wrap(err, "rename disk image file") + } + + return updateFields, nil +} + func (t *Manager) attachLoopdev(blob string) (*losetup.Device, error) { // losetup.Attach() is not thread-safe hold lock here t.mutexLoopDev.Lock() @@ -493,7 +600,7 @@ func (t *Manager) MountTarErofs(snapshotID string, s *storage.Snapshot, rafs *da if st.metaLoopdev == nil { upperDirPath := path.Join(rafs.GetSnapshotDir(), "fs") - mergedBootstrap := t.mergedMetaFilePath(upperDirPath) + mergedBootstrap := t.imageMetaFilePath(upperDirPath) loopdev, err := t.attachLoopdev(mergedBootstrap) if err != nil { return errors.Wrapf(err, "attach merged bootstrap %s to loopdev", mergedBootstrap) @@ -541,6 +648,9 @@ func (t *Manager) waitLayerReady(snapshotID string) error { log.L.Debugf("wait tarfs conversion task for snapshot %s", snapshotID) } st.wg.Wait() + if st.status != TarfsStatusReady { + return errors.Errorf("snapshot %s is in state %d instead of ready state", snapshotID, st.status) + } return nil } @@ -670,10 +780,18 @@ func (t *Manager) layerTarFilePath(blobID string) string { return filepath.Join(t.cacheDirPath, blobID) } +func (t *Manager) layerDiskFilePath(blobID string) string { + return filepath.Join(t.cacheDirPath, blobID+"."+TarfsLayerDiskName) +} + +func (t *Manager) imageDiskFilePath(blobID string) string { + return filepath.Join(t.cacheDirPath, blobID+"."+TarfsImageDiskName) +} + func (t *Manager) layerMetaFilePath(upperDirPath string) string { - return filepath.Join(upperDirPath, "image", TarfsLayerBootstapName) + return filepath.Join(upperDirPath, "image", TarfsLayerBootstrapName) } -func (t *Manager) mergedMetaFilePath(upperDirPath string) string { - return filepath.Join(upperDirPath, "image", TarfsMeragedBootstapName) +func (t *Manager) imageMetaFilePath(upperDirPath string) string { + return filepath.Join(upperDirPath, "image", TarfsImageBootstrapName) } diff --git a/snapshot/process.go b/snapshot/process.go index 60c76c3325..56075d0143 100644 --- a/snapshot/process.go +++ b/snapshot/process.go @@ -16,6 +16,7 @@ import ( "github.com/containerd/containerd/mount" snpkg "github.com/containerd/containerd/pkg/snapshotters" "github.com/containerd/containerd/snapshots/storage" + "github.com/containerd/nydus-snapshotter/config" "github.com/containerd/nydus-snapshotter/pkg/label" "github.com/containerd/nydus-snapshotter/pkg/snapshot" ) @@ -93,6 +94,12 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, logger.Warnf("snapshot ID %s can't be converted into tarfs, fallback to containerd, err: %v", s.ID, err) } else { logger.Debugf("convert OCIv1 layer to tarfs") + if config.GetTarfsExportEnabled() { + _, err = sn.fs.ExportBlockData(s, true, labels, func(id string) string { return sn.upperPath(id) }) + if err != nil { + return nil, "", errors.Wrap(err, "export layer as tarfs block device") + } + } labels[label.NydusTarfsLayer] = "true" handler = skipHandler } @@ -151,6 +158,12 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, if err != nil { return nil, "", errors.Wrap(err, "merge tarfs layers") } + if config.GetTarfsExportEnabled() { + _, err = sn.fs.ExportBlockData(s, false, labels, func(id string) string { return sn.upperPath(id) }) + if err != nil { + return nil, "", errors.Wrap(err, "export image as tarfs block device") + } + } handler = remoteHandler(id, pInfo.Labels) } } diff --git a/snapshot/snapshot.go b/snapshot/snapshot.go index 90b73b6ace..097d1b498f 100644 --- a/snapshot/snapshot.go +++ b/snapshot/snapshot.go @@ -101,18 +101,21 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho } } - blockdevManager, err := mgr.NewManager(mgr.Opt{ - NydusdBinaryPath: "", - Database: db, - CacheDir: cfg.CacheManagerConfig.CacheDir, - RootDir: cfg.Root, - RecoverPolicy: rp, - FsDriver: config.FsDriverBlockdev, - DaemonConfig: daemonConfig, - CgroupMgr: cgroupMgr, - }) - if err != nil { - return nil, errors.Wrap(err, "create blockdevice manager") + var blockdevManager *mgr.Manager + if cfg.Experimental.TarfsConfig.EnableTarfs { + blockdevManager, err = mgr.NewManager(mgr.Opt{ + NydusdBinaryPath: "", + Database: db, + CacheDir: cfg.CacheManagerConfig.CacheDir, + RootDir: cfg.Root, + RecoverPolicy: rp, + FsDriver: config.FsDriverBlockdev, + DaemonConfig: daemonConfig, + CgroupMgr: cgroupMgr, + }) + if err != nil { + return nil, errors.Wrap(err, "create blockdevice manager") + } } var fscacheManager *mgr.Manager @@ -205,12 +208,12 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho opts = append(opts, filesystem.WithReferrerManager(referrerMgr)) } - if cfg.Experimental.EnableTarfs { + if cfg.Experimental.TarfsConfig.EnableTarfs { // FIXME: get the insecure option from nydusd config. _, backendConfig := daemonConfig.StorageBackend() - tarfsMgr := tarfs.NewManager(backendConfig.SkipVerify, cfg.Experimental.TarfsHint, + tarfsMgr := tarfs.NewManager(backendConfig.SkipVerify, cfg.Experimental.TarfsConfig.TarfsHint, cacheConfig.CacheDir, cfg.DaemonConfig.NydusImagePath, - int64(cfg.Experimental.TarfsMaxConcurrentProc)) + int64(cfg.Experimental.TarfsConfig.MaxConcurrentProc)) opts = append(opts, filesystem.WithTarfsManager(tarfsMgr)) } @@ -344,6 +347,8 @@ func (o *snapshotter) Usage(ctx context.Context, key string) (snapshots.Usage, e usage.Add(cacheUsage) } } + case snapshots.KindUnknown: + case snapshots.KindView: } return usage, nil @@ -406,6 +411,8 @@ func (o *snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, er return nil, errors.Wrapf(err, "get parent snapshot info, parent key=%q", pKey) } } + case snapshots.KindCommitted: + case snapshots.KindUnknown: } if o.fs.ReferrerDetectEnabled() && !needRemoteMounts { @@ -508,6 +515,18 @@ func (o *snapshotter) View(ctx context.Context, key, parent string, opts ...snap if err := o.fs.MergeTarfsLayers(s, func(id string) string { return o.upperPath(id) }); err != nil { return nil, errors.Wrapf(err, "tarfs merge fail %s", pID) } + if config.GetTarfsExportEnabled() { + updateFields, err := o.fs.ExportBlockData(s, false, pInfo.Labels, func(id string) string { return o.upperPath(id) }) + if err != nil { + return nil, errors.Wrap(err, "export tarfs as block image") + } + if len(updateFields) > 0 { + _, err = o.Update(ctx, pInfo, updateFields...) + if err != nil { + return nil, errors.Wrapf(err, "update snapshot label information") + } + } + } if err := o.fs.Mount(pID, pInfo.Labels, &s); err != nil { return nil, errors.Wrapf(err, "mount tarfs, snapshot id %s", pID) } From d9c0b38f253630542088806589469a08cd82e5ae Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 30 Aug 2023 18:12:12 +0800 Subject: [PATCH 12/14] tarfs: simplify tarfs implementation Simplify tarfs implementation by: 1) simplify the way to detech images without data blob 2) do not regenerate tarfs data when it already exists Signed-off-by: Jiang Liu --- pkg/filesystem/fs.go | 2 +- pkg/filesystem/tarfs_adaptor.go | 14 +- pkg/tarfs/tarfs.go | 280 +++++++++++++++++--------------- snapshot/process.go | 16 +- snapshot/snapshot.go | 32 ++-- 5 files changed, 174 insertions(+), 170 deletions(-) diff --git a/pkg/filesystem/fs.go b/pkg/filesystem/fs.go index 89a6c6eb28..375a36caa4 100644 --- a/pkg/filesystem/fs.go +++ b/pkg/filesystem/fs.go @@ -600,7 +600,6 @@ func (fs *Filesystem) initSharedDaemon(fsManager *manager.Manager) (err error) { } // createDaemon create new nydus daemon by snapshotID and imageID -// For fscache driver, no need to provide mountpoint to nydusd daemon. func (fs *Filesystem) createDaemon(fsManager *manager.Manager, daemonMode config.DaemonMode, mountpoint string, ref int32) (d *daemon.Daemon, err error) { opts := []daemon.NewDaemonOpt{ @@ -616,6 +615,7 @@ func (fs *Filesystem) createDaemon(fsManager *manager.Manager, daemonMode config daemon.WithDaemonMode(daemonMode), } + // For fscache driver, no need to provide mountpoint to nydusd daemon. if mountpoint != "" { opts = append(opts, daemon.WithMountpoint(mountpoint)) } diff --git a/pkg/filesystem/tarfs_adaptor.go b/pkg/filesystem/tarfs_adaptor.go index 45f3a328f9..09b3aeaab1 100755 --- a/pkg/filesystem/tarfs_adaptor.go +++ b/pkg/filesystem/tarfs_adaptor.go @@ -59,11 +59,6 @@ func (fs *Filesystem) PrepareTarfsLayer(ctx context.Context, labels map[string]s return nil } -func (fs *Filesystem) ExportBlockData(s storage.Snapshot, perLayer bool, labels map[string]string, - storageLocater func(string) string) ([]string, error) { - return fs.tarfsMgr.ExportBlockData(s, perLayer, labels, storageLocater) -} - func (fs *Filesystem) MergeTarfsLayers(s storage.Snapshot, storageLocater func(string) string) error { return fs.tarfsMgr.MergeLayers(s, storageLocater) } @@ -72,10 +67,7 @@ func (fs *Filesystem) DetachTarfsLayer(snapshotID string) error { return fs.tarfsMgr.DetachLayer(snapshotID) } -func (fs *Filesystem) IsTarfsLayer(snapshotID string) bool { - return fs.tarfsMgr.IsTarfsLayer(snapshotID) -} - -func (fs *Filesystem) IsMountedTarfsLayer(snapshotID string) bool { - return fs.tarfsMgr.IsMountedTarfsLayer(snapshotID) +func (fs *Filesystem) ExportBlockData(s storage.Snapshot, perLayer bool, labels map[string]string, + storageLocater func(string) string) ([]string, error) { + return fs.tarfsMgr.ExportBlockData(s, perLayer, labels, storageLocater) } diff --git a/pkg/tarfs/tarfs.go b/pkg/tarfs/tarfs.go index f42a87b805..229f6bc05b 100755 --- a/pkg/tarfs/tarfs.go +++ b/pkg/tarfs/tarfs.go @@ -27,6 +27,7 @@ import ( "github.com/containerd/nydus-snapshotter/config" "github.com/containerd/nydus-snapshotter/pkg/auth" "github.com/containerd/nydus-snapshotter/pkg/daemon" + "github.com/containerd/nydus-snapshotter/pkg/errdefs" "github.com/containerd/nydus-snapshotter/pkg/label" "github.com/containerd/nydus-snapshotter/pkg/remote" "github.com/containerd/nydus-snapshotter/pkg/remote/remotes" @@ -40,22 +41,6 @@ import ( "k8s.io/utils/lru" ) -type Manager struct { - snapshotMap map[string]*snapshotStatus // tarfs snapshots status, indexed by snapshot ID - mutex sync.Mutex - mutexLoopDev sync.Mutex - cacheDirPath string - nydusImagePath string - insecure bool - validateDiffID bool // whether to validate digest for uncompressed content - checkTarfsHint bool // whether to rely on tarfs hint annotation - maxConcurrentProcess int64 - processLimiterCache *lru.Cache // cache image ref and concurrent limiter for blob processes - tarfsHintCache *lru.Cache // cache oci image ref and tarfs hint annotation - diffIDCache *lru.Cache // cache oci blob digest and diffID - sg singleflight.Group -} - const ( TarfsStatusInit = 0 TarfsStatusPrepare = 1 @@ -71,12 +56,25 @@ const ( TarfsImageDiskName = "image.disk" ) -var ErrEmptyBlob = errors.New("empty blob") +type Manager struct { + snapshotMap map[string]*snapshotStatus // tarfs snapshots status, indexed by snapshot ID + mutex sync.Mutex + mutexLoopDev sync.Mutex + cacheDirPath string + nydusImagePath string + insecure bool + validateDiffID bool // whether to validate digest for uncompressed content + checkTarfsHint bool // whether to rely on tarfs hint annotation + maxConcurrentProcess int64 + processLimiterCache *lru.Cache // cache image ref and concurrent limiter for blob processes + tarfsHintCache *lru.Cache // cache oci image ref and tarfs hint annotation + diffIDCache *lru.Cache // cache oci blob digest and diffID + sg singleflight.Group +} type snapshotStatus struct { mutex sync.Mutex status int - isEmptyBlob bool blobID string blobTarFilePath string erofsMountPoint string @@ -105,62 +103,78 @@ func NewManager(insecure, checkTarfsHint bool, cacheDirPath, nydusImagePath stri // Fetch image manifest and config contents, cache frequently used information. // FIXME need an update policy func (t *Manager) fetchImageInfo(ctx context.Context, remote *remote.Remote, ref string, manifestDigest digest.Digest) error { - // fetch image manifest content - rc, desc, err := t.getBlobStream(ctx, remote, ref, manifestDigest) + manifest, err := t.fetchImageManifest(ctx, remote, ref, manifestDigest) + if err != nil { + return err + } + config, err := t.fetchImageConfig(ctx, remote, ref, &manifest) if err != nil { return err } + + if t.checkTarfsHint { + // cache ref & tarfs hint annotation + t.tarfsHintCache.Add(ref, label.HasTarfsHint(manifest.Annotations)) + } + if t.validateDiffID { + // cache OCI blob digest & diff id + for i := range manifest.Layers { + t.diffIDCache.Add(manifest.Layers[i].Digest, config.RootFS.DiffIDs[i]) + } + } + + return nil +} + +func (t *Manager) fetchImageManifest(ctx context.Context, remote *remote.Remote, ref string, manifestDigest digest.Digest) (ocispec.Manifest, error) { + rc, desc, err := t.getBlobStream(ctx, remote, ref, manifestDigest) + if err != nil { + return ocispec.Manifest{}, err + } defer rc.Close() if desc.Size > MaxManifestConfigSize { - return errors.Errorf("image manifest content size %x is too big", desc.Size) + return ocispec.Manifest{}, errors.Errorf("image manifest content size %x is too big", desc.Size) } bytes, err := io.ReadAll(rc) if err != nil { - return errors.Wrap(err, "read image manifest content") + return ocispec.Manifest{}, errors.Wrap(err, "read image manifest content") } - // TODO: support docker v2 images var manifestOCI ocispec.Manifest if err := json.Unmarshal(bytes, &manifestOCI); err != nil { - return errors.Wrap(err, "unmarshal OCI image manifest") + return ocispec.Manifest{}, errors.Wrap(err, "unmarshal OCI image manifest") } if len(manifestOCI.Layers) < 1 { - return errors.Errorf("invalid OCI image manifest without any layer") + return ocispec.Manifest{}, errors.Errorf("invalid OCI image manifest without any layer") } + return manifestOCI, nil +} + +func (t *Manager) fetchImageConfig(ctx context.Context, remote *remote.Remote, ref string, manifest *ocispec.Manifest) (ocispec.Image, error) { // fetch image config content and extract diffIDs - rc, desc, err = t.getBlobStream(ctx, remote, ref, manifestOCI.Config.Digest) + rc, desc, err := t.getBlobStream(ctx, remote, ref, manifest.Config.Digest) if err != nil { - return errors.Wrap(err, "fetch image config content") + return ocispec.Image{}, errors.Wrap(err, "fetch image config content") } defer rc.Close() if desc.Size > MaxManifestConfigSize { - return errors.Errorf("image config content size %x is too big", desc.Size) + return ocispec.Image{}, errors.Errorf("image config content size %x is too big", desc.Size) } - bytes, err = io.ReadAll(rc) + bytes, err := io.ReadAll(rc) if err != nil { - return errors.Wrap(err, "read image config content") + return ocispec.Image{}, errors.Wrap(err, "read image config content") } var config ocispec.Image if err := json.Unmarshal(bytes, &config); err != nil { - return errors.Wrap(err, "unmarshal image config") + return ocispec.Image{}, errors.Wrap(err, "unmarshal image config") } - if len(config.RootFS.DiffIDs) != len(manifestOCI.Layers) { - return errors.Errorf("number of diffIDs does not match manifest layers") + if len(config.RootFS.DiffIDs) != len(manifest.Layers) { + return ocispec.Image{}, errors.Errorf("number of diffIDs does not match manifest layers") } - if t.checkTarfsHint { - // cache ref & tarfs hint annotation - t.tarfsHintCache.Add(ref, label.HasTarfsHint(manifestOCI.Annotations)) - } - if t.validateDiffID { - // cache OCI blob digest & diff id - for i := range manifestOCI.Layers { - t.diffIDCache.Add(manifestOCI.Layers[i].Digest, config.RootFS.DiffIDs[i]) - } - } - return nil + return config, nil } func (t *Manager) getBlobDiffID(ctx context.Context, remote *remote.Remote, ref string, manifestDigest, layerDigest digest.Digest) (digest.Digest, error) { @@ -197,14 +211,14 @@ func (t *Manager) getBlobStream(ctx context.Context, remote *remote.Remote, ref } // generate tar file and layer bootstrap, return if this blob is an empty blob -func (t *Manager) generateBootstrap(tarReader io.Reader, snapshotID, layerBlobID, upperDirPath string) (emptyBlob bool, err error) { +func (t *Manager) generateBootstrap(tarReader io.Reader, snapshotID, layerBlobID, upperDirPath string) (err error) { snapshotImageDir := filepath.Join(upperDirPath, "image") if err := os.MkdirAll(snapshotImageDir, 0750); err != nil { - return false, errors.Wrapf(err, "create data dir %s for tarfs snapshot", snapshotImageDir) + return errors.Wrapf(err, "create data dir %s for tarfs snapshot", snapshotImageDir) } layerMetaFile := t.layerMetaFilePath(upperDirPath) if _, err := os.Stat(layerMetaFile); err == nil { - return false, errors.Errorf("tarfs bootstrap file %s for snapshot %s already exists", layerMetaFile, snapshotID) + return errdefs.ErrAlreadyExists } layerMetaFileTmp := layerMetaFile + ".tarfs.tmp" defer os.Remove(layerMetaFileTmp) @@ -213,14 +227,14 @@ func (t *Manager) generateBootstrap(tarReader io.Reader, snapshotID, layerBlobID layerTarFileTmp := layerTarFile + ".tarfs.tmp" tarFile, err := os.Create(layerTarFileTmp) if err != nil { - return false, errors.Wrap(err, "create temporary file to store tar stream") + return errors.Wrap(err, "create temporary file to store tar stream") } defer tarFile.Close() defer os.Remove(layerTarFileTmp) fifoName := filepath.Join(upperDirPath, "layer_"+snapshotID+"_"+"tar.fifo") if err = syscall.Mkfifo(fifoName, 0644); err != nil { - return false, err + return err } defer os.Remove(fifoName) @@ -252,24 +266,43 @@ func (t *Manager) generateBootstrap(tarReader io.Reader, snapshotID, layerBlobID err = cmd.Run() if err != nil { log.L.Warnf("nydus image exec failed, %s", errb.String()) - return false, errors.Wrap(err, "converting OCIv1 layer blob to tarfs") + return errors.Wrap(err, "converting OCIv1 layer blob to tarfs") } log.L.Debugf("nydus image output %s", outb.String()) log.L.Debugf("nydus image err %s", errb.String()) if err := os.Rename(layerTarFileTmp, layerTarFile); err != nil { - return false, errors.Wrapf(err, "rename file %s to %s", layerTarFileTmp, layerTarFile) + return errors.Wrapf(err, "rename file %s to %s", layerTarFileTmp, layerTarFile) } if err := os.Rename(layerMetaFileTmp, layerMetaFile); err != nil { - return false, errors.Wrapf(err, "rename file %s to %s", layerMetaFileTmp, layerMetaFile) + return errors.Wrapf(err, "rename file %s to %s", layerMetaFileTmp, layerMetaFile) } - // TODO need a more reliable way to check if this is an empty blob - if strings.Contains(outb.String(), "data blob size: 0x0\n") || - strings.Contains(errb.String(), "data blob size: 0x0\n") { - return true, nil + return nil +} + +func (t *Manager) getImageBlobInfo(metaFilePath string) (string, error) { + if _, err := os.Stat(metaFilePath); err != nil { + return "", err } - return false, nil + + options := []string{ + "inspect", + "-R blobs", + metaFilePath, + } + cmd := exec.Command(t.nydusImagePath, options...) + var errb, outb bytes.Buffer + cmd.Stderr = &errb + cmd.Stdout = &outb + log.L.Debugf("nydus image command %v", options) + err := cmd.Run() + if err != nil { + log.L.Warnf("nydus image exec failed, %s", errb.String()) + return "", errors.Wrap(err, "converting OCIv1 layer blob to tarfs") + } + + return string(outb.Bytes()), nil } // download & uncompress an oci/docker blob, and then generate the tarfs bootstrap @@ -281,56 +314,51 @@ func (t *Manager) blobProcess(ctx context.Context, snapshotID, ref string, manif } remote := remote.New(keyChain, t.insecure) - handle := func() (bool, error) { + handle := func() error { rc, _, err := t.getBlobStream(ctx, remote, ref, layerDigest) if err != nil { - return false, err + return err } defer rc.Close() ds, err := compression.DecompressStream(rc) if err != nil { - return false, errors.Wrap(err, "unpack layer blob stream for tarfs") + return errors.Wrap(err, "unpack layer blob stream for tarfs") } defer ds.Close() - var emptyBlob bool if t.validateDiffID { diffID, err := t.getBlobDiffID(ctx, remote, ref, manifestDigest, layerDigest) if err != nil { - return false, errors.Wrap(err, "get image layer diffID") + return errors.Wrap(err, "get image layer diffID") } digester := digest.Canonical.Digester() dr := io.TeeReader(ds, digester.Hash()) - emptyBlob, err = t.generateBootstrap(dr, snapshotID, layerBlobID, upperDirPath) - if err != nil { - return false, errors.Wrap(err, "generate tarfs data from image layer blob") + err = t.generateBootstrap(dr, snapshotID, layerBlobID, upperDirPath) + if err != nil && !errdefs.IsAlreadyExists(err) { + return errors.Wrap(err, "generate tarfs data from image layer blob") } - if digester.Digest() != diffID { - return false, errors.Errorf("image layer diffID %s for tarfs does not match", diffID) + if err == nil { + if digester.Digest() != diffID { + return errors.Errorf("image layer diffID %s for tarfs does not match", diffID) + } + log.L.Infof("tarfs data for layer %s is ready, digest %s", snapshotID, digester.Digest()) } - log.L.Infof("tarfs data for layer %s is ready, digest %s", snapshotID, digester.Digest()) } else { - emptyBlob, err = t.generateBootstrap(ds, snapshotID, layerBlobID, upperDirPath) - if err != nil { - return false, errors.Wrap(err, "generate tarfs data from image layer blob") + err = t.generateBootstrap(ds, snapshotID, layerBlobID, upperDirPath) + if err != nil && !errdefs.IsAlreadyExists(err) { + return errors.Wrap(err, "generate tarfs data from image layer blob") } log.L.Infof("tarfs data for layer %s is ready", snapshotID) } - return emptyBlob, nil + return nil } - var emptyBlob bool - emptyBlob, err = handle() + err = handle() if err != nil && remote.RetryWithPlainHTTP(ref, err) { - emptyBlob, err = handle() - } - if err != nil { - return err - } - if emptyBlob { - return ErrEmptyBlob + err = handle() } - return nil + + return err } func (t *Manager) PrepareLayer(snapshotID, ref string, manifestDigest, layerDigest digest.Digest, @@ -369,18 +397,12 @@ func (t *Manager) PrepareLayer(snapshotID, ref string, manifestDigest, layerDige st.blobID = layerBlobID st.blobTarFilePath = t.layerTarFilePath(layerBlobID) if err != nil { - if errors.Is(err, ErrEmptyBlob) { - st.isEmptyBlob = true - st.status = TarfsStatusReady - } else { - log.L.WithError(err).Errorf("failed to convert OCI image to tarfs") - st.status = TarfsStatusFailed - } + log.L.WithError(err).Errorf("failed to convert OCI image to tarfs") + st.status = TarfsStatusFailed } else { - st.isEmptyBlob = false st.status = TarfsStatusReady } - log.L.Debugf("finish converting snapshot %s to tarfs, status %d, empty blob %v", snapshotID, st.status, st.isEmptyBlob) + log.L.Debugf("finish converting snapshot %s to tarfs, status %d", snapshotID, st.status) }() return nil @@ -456,7 +478,7 @@ func (t *Manager) ExportBlockData(s storage.Snapshot, perLayer bool, labels map[ snapshotID = s.ID } else { if len(s.ParentIDs) == 0 { - return updateFields, errors.Errorf("snapshot %s has parent", s.ID) + return updateFields, errors.Errorf("snapshot %s has no parent", s.ID) } snapshotID = s.ParentIDs[0] } @@ -536,19 +558,18 @@ func (t *Manager) ExportBlockData(s storage.Snapshot, perLayer bool, labels map[ return updateFields, nil } -func (t *Manager) attachLoopdev(blob string) (*losetup.Device, error) { - // losetup.Attach() is not thread-safe hold lock here - t.mutexLoopDev.Lock() - defer t.mutexLoopDev.Unlock() - dev, err := losetup.Attach(blob, 0, false) - return &dev, err -} - func (t *Manager) MountTarErofs(snapshotID string, s *storage.Snapshot, rafs *daemon.Rafs) error { if s == nil { return errors.New("snapshot object for MountTarErofs() is nil") } + upperDirPath := path.Join(rafs.GetSnapshotDir(), "fs") + mergedBootstrap := t.imageMetaFilePath(upperDirPath) + blobInfo, err := t.getImageBlobInfo(mergedBootstrap) + if err != nil { + return errors.Wrapf(err, "get image blob info") + } + var devices []string // When merging bootstrap, we need to arrange layer bootstrap in order from low to high for idx := len(s.ParentIDs) - 1; idx >= 0; idx-- { @@ -567,7 +588,8 @@ func (t *Manager) MountTarErofs(snapshotID string, s *storage.Snapshot, rafs *da return errors.Errorf("snapshot %s tarfs format error %d", snapshotID, st.status) } - if !st.isEmptyBlob { + var blobMarker = "\"blob_id\":\"" + st.blobID + "\"" + if strings.Contains(blobInfo, blobMarker) { if st.dataLoopdev == nil { loopdev, err := t.attachLoopdev(st.blobTarFilePath) if err != nil { @@ -599,8 +621,6 @@ func (t *Manager) MountTarErofs(snapshotID string, s *storage.Snapshot, rafs *da } if st.metaLoopdev == nil { - upperDirPath := path.Join(rafs.GetSnapshotDir(), "fs") - mergedBootstrap := t.imageMetaFilePath(upperDirPath) loopdev, err := t.attachLoopdev(mergedBootstrap) if err != nil { return errors.Wrapf(err, "attach merged bootstrap %s to loopdev", mergedBootstrap) @@ -639,37 +659,6 @@ func (t *Manager) UmountTarErofs(snapshotID string) error { return nil } -func (t *Manager) waitLayerReady(snapshotID string) error { - st, err := t.getSnapshotStatus(snapshotID, false) - if err != nil { - return err - } - if st.status != TarfsStatusReady { - log.L.Debugf("wait tarfs conversion task for snapshot %s", snapshotID) - } - st.wg.Wait() - if st.status != TarfsStatusReady { - return errors.Errorf("snapshot %s is in state %d instead of ready state", snapshotID, st.status) - } - return nil -} - -func (t *Manager) IsTarfsLayer(snapshotID string) bool { - _, err := t.getSnapshotStatus(snapshotID, false) - return err == nil -} - -// check if a snapshot is tarfs layer and if mounted a erofs tarfs -func (t *Manager) IsMountedTarfsLayer(snapshotID string) bool { - st, err := t.getSnapshotStatus(snapshotID, true) - if err != nil { - return false - } - defer st.mutex.Unlock() - - return len(st.erofsMountPoint) != 0 -} - func (t *Manager) DetachLayer(snapshotID string) error { st, err := t.getSnapshotStatus(snapshotID, true) if err != nil { @@ -725,6 +714,29 @@ func (t *Manager) getSnapshotStatus(snapshotID string, lock bool) (*snapshotStat return nil, errors.Errorf("not found snapshot %s", snapshotID) } +func (t *Manager) waitLayerReady(snapshotID string) error { + st, err := t.getSnapshotStatus(snapshotID, false) + if err != nil { + return err + } + if st.status != TarfsStatusReady { + log.L.Debugf("wait tarfs conversion task for snapshot %s", snapshotID) + } + st.wg.Wait() + if st.status != TarfsStatusReady { + return errors.Errorf("snapshot %s is in state %d instead of ready state", snapshotID, st.status) + } + return nil +} + +func (t *Manager) attachLoopdev(blob string) (*losetup.Device, error) { + // losetup.Attach() is not thread-safe hold lock here + t.mutexLoopDev.Lock() + defer t.mutexLoopDev.Unlock() + dev, err := losetup.Attach(blob, 0, false) + return &dev, err +} + func (t *Manager) CheckTarfsHintAnnotation(ctx context.Context, ref string, manifestDigest digest.Digest) (bool, error) { if !t.checkTarfsHint { return true, nil diff --git a/snapshot/process.go b/snapshot/process.go index 56075d0143..779d5b2707 100644 --- a/snapshot/process.go +++ b/snapshot/process.go @@ -58,9 +58,9 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, } // OCI image is also marked with "containerd.io/snapshot.ref" by Containerd - target, remote := labels[label.TargetSnapshotRef] + target, is_ro_layer := labels[label.TargetSnapshotRef] - if remote { + if is_ro_layer { // Containerd won't consume mount slice for below snapshots switch { case label.IsNydusMetaLayer(labels): @@ -88,6 +88,7 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, } } } + if handler == nil && sn.fs.TarfsEnabled() { err := sn.fs.PrepareTarfsLayer(ctx, labels, s.ID, sn.upperPath(s.ID)) if err != nil { @@ -144,16 +145,17 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, } if handler == nil && sn.fs.TarfsEnabled() { - // TODO may need check all parrents, in case share layers with other images which already prepared by overlay snapshotter - - // tarfs merged & mounted on the uppermost parent layer + // Merge and mount tarfs on the uppermost parent layer. id, pInfo, _, err := snapshot.GetSnapshotInfo(ctx, sn.ms, parent) switch { case err != nil: - logger.Warnf("Tarfs enable but can't get snapshot %s Parent, is an untar oci or nydus snapshot?", s.ID) + logger.Warnf("Tarfs enabled but can't get parent of snapshot %s", s.ID) case !label.IsTarfsDataLayer(pInfo.Labels): - logger.Debugf("Tarfs enable but Parent (%s) of snapshot %s is not a tarfs layer", id, s.ID) + logger.Debugf("Tarfs enabled but Parent (%s) of snapshot %s is not a tarfs layer", id, s.ID) default: + // TODO may need to check all parrent layers, in case share layers with other images + // which have already been prepared by overlay snapshotter + err := sn.fs.MergeTarfsLayers(s, func(id string) string { return sn.upperPath(id) }) if err != nil { return nil, "", errors.Wrap(err, "merge tarfs layers") diff --git a/snapshot/snapshot.go b/snapshot/snapshot.go index 097d1b498f..ec0ed00502 100644 --- a/snapshot/snapshot.go +++ b/snapshot/snapshot.go @@ -275,7 +275,7 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho syncRemove := cfg.SnapshotsConfig.SyncRemove if config.GetFsDriver() == config.FsDriverFscache { - log.L.Infof("for fscache mode enable syncRemove") + log.L.Infof("enable syncRemove for fscache mode") syncRemove = true } @@ -403,7 +403,7 @@ func (o *snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, er } needRemoteMounts = true metaSnapshotID = pID - } else if o.fs.TarfsEnabled() && o.fs.IsMountedTarfsLayer(pID) { + } else if o.fs.TarfsEnabled() && label.IsTarfsDataLayer(info.Labels) { needRemoteMounts = true metaSnapshotID = pID } @@ -511,25 +511,23 @@ func (o *snapshotter) View(ctx context.Context, key, parent string, opts ...snap } if o.fs.TarfsEnabled() && label.IsTarfsDataLayer(pInfo.Labels) { - if !o.fs.IsMountedTarfsLayer(pID) { - if err := o.fs.MergeTarfsLayers(s, func(id string) string { return o.upperPath(id) }); err != nil { - return nil, errors.Wrapf(err, "tarfs merge fail %s", pID) + if err := o.fs.MergeTarfsLayers(s, func(id string) string { return o.upperPath(id) }); err != nil { + return nil, errors.Wrapf(err, "tarfs merge fail %s", pID) + } + if config.GetTarfsExportEnabled() { + updateFields, err := o.fs.ExportBlockData(s, false, pInfo.Labels, func(id string) string { return o.upperPath(id) }) + if err != nil { + return nil, errors.Wrap(err, "export tarfs as block image") } - if config.GetTarfsExportEnabled() { - updateFields, err := o.fs.ExportBlockData(s, false, pInfo.Labels, func(id string) string { return o.upperPath(id) }) + if len(updateFields) > 0 { + _, err = o.Update(ctx, pInfo, updateFields...) if err != nil { - return nil, errors.Wrap(err, "export tarfs as block image") - } - if len(updateFields) > 0 { - _, err = o.Update(ctx, pInfo, updateFields...) - if err != nil { - return nil, errors.Wrapf(err, "update snapshot label information") - } + return nil, errors.Wrapf(err, "update snapshot label information") } } - if err := o.fs.Mount(pID, pInfo.Labels, &s); err != nil { - return nil, errors.Wrapf(err, "mount tarfs, snapshot id %s", pID) - } + } + if err := o.fs.Mount(pID, pInfo.Labels, &s); err != nil { + return nil, errors.Wrapf(err, "mount tarfs, snapshot id %s", pID) } needRemoteMounts = true metaSnapshotID = pID From 14d2417b374459e55fb37da2a5a43f4d3e390e27 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 30 Aug 2023 22:31:57 +0800 Subject: [PATCH 13/14] tarfs: add documentation for tarfs Add documentation for tarfs. Signed-off-by: Jiang Liu --- docs/tarfs.md | 175 ++++++++++++++++++++++++++++++++++++++++++++ pkg/tarfs/tarfs.go | 2 +- snapshot/process.go | 4 +- 3 files changed, 178 insertions(+), 3 deletions(-) create mode 100644 docs/tarfs.md diff --git a/docs/tarfs.md b/docs/tarfs.md new file mode 100644 index 0000000000..da7c0a568f --- /dev/null +++ b/docs/tarfs.md @@ -0,0 +1,175 @@ +# Nydus Tarfs Mode + +`Nydus Tarfs Mode` or `Tarfs` is a working mode for Nydus Image, which uses tar files as Nydus data blobs instead of generating native Nydus data blobs. + +### Enable Tarfs +`Nydus Tarfs Mode` is still an experiment feature, please edit the snapshotter configuration file to enable the feature: +``` +[experimental.tarfs] +enable_tarfs = true +``` + +### Generate Raw Disk Image for Each Layer of a Container Image +`Tarfs` supports generating a raw disk image for each layer of a container image, which can be directly mounted as EROFS filesystem through loopdev. Please edit the snapshotter configuration file to enable this submode: +``` +[experimental.tarfs] +enable_tarfs = true +export_mode = "layer_block" +``` + +This is an example to generate and verify raw disk image for each layer of a container image: +``` +$ containerd-nydus-grpc --config /etc/nydus/config.toml & +$ nerdctl run --snapshotter nydus --rm nginx + +# Show mounted rootfs a container +$ mount +/dev/loop17 on /var/lib/containerd-nydus/snapshots/7/mnt type erofs (ro,relatime,user_xattr,acl,cache_strategy=readaround) + +# Show loop devices used to mount layers and bootstrap for a container image +$ losetup +NAME SIZELIMIT OFFSET AUTOCLEAR RO BACK-FILE DIO LOG-SEC +/dev/loop11 0 0 0 0 /var/lib/containerd-nydus/cache/fd9f026c631046113bd492f69761c3ba6042c791c35a60e7c7f3b8f254592daa 0 512 +/dev/loop12 0 0 0 0 /var/lib/containerd-nydus/cache/055fa98b43638b67d10c58d41094d99c8696cc34b7a960c7a0cc5d9d152d12b3 0 512 +/dev/loop13 0 0 0 0 /var/lib/containerd-nydus/cache/96576293dd2954ff84251aa0455687c8643358ba1b190ea1818f56b41884bdbd 0 512 +/dev/loop14 0 0 0 0 /var/lib/containerd-nydus/cache/a7c4092be9044bd4eef78f27c95785ef3a9f345d01fd4512bc94ddaaefc359f4 0 512 +/dev/loop15 0 0 0 0 /var/lib/containerd-nydus/cache/e3b6889c89547ec9ba653ab44ed32a99370940d51df956968c0d578dd61ab665 0 512 +/dev/loop16 0 0 0 0 /var/lib/containerd-nydus/cache/da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75 0 512 +/dev/loop17 0 0 0 0 /var/lib/containerd-nydus/snapshots/7/fs/image/image.boot 0 512 + +# Files without suffix are tar files, files with suffix `layer.disk` are raw disk image for container image layers +$ ls -l /var/lib/containerd-nydus/cache/ +total 376800 +-rw-r--r-- 1 root root 3584 Aug 30 23:18 055fa98b43638b67d10c58d41094d99c8696cc34b7a960c7a0cc5d9d152d12b3 +-rw-r--r-- 1 root root 527872 Aug 30 23:18 055fa98b43638b67d10c58d41094d99c8696cc34b7a960c7a0cc5d9d152d12b3.layer.disk +-rw-r--r-- 1 root root 77814784 Aug 30 23:18 52d2b7f179e32b4cbd579ee3c4958027988f9a8274850ab0c7c24661e3adaac5 +-rw-r--r-- 1 root root 78863360 Aug 30 23:18 52d2b7f179e32b4cbd579ee3c4958027988f9a8274850ab0c7c24661e3adaac5.layer.disk +-rw-r--r-- 1 root root 4608 Aug 30 23:18 96576293dd2954ff84251aa0455687c8643358ba1b190ea1818f56b41884bdbd +-rw-r--r-- 1 root root 528896 Aug 30 23:18 96576293dd2954ff84251aa0455687c8643358ba1b190ea1818f56b41884bdbd.layer.disk +-rw-r--r-- 1 root root 2560 Aug 30 23:18 a7c4092be9044bd4eef78f27c95785ef3a9f345d01fd4512bc94ddaaefc359f4 +-rw-r--r-- 1 root root 526848 Aug 30 23:18 a7c4092be9044bd4eef78f27c95785ef3a9f345d01fd4512bc94ddaaefc359f4.layer.disk +-rw-r--r-- 1 root root 7168 Aug 30 23:18 da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75 +-rw-r--r-- 1 root root 531456 Aug 30 23:18 da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75.layer.disk +-rw-r--r-- 1 root root 5120 Aug 30 23:18 e3b6889c89547ec9ba653ab44ed32a99370940d51df956968c0d578dd61ab665 +-rw-r--r-- 1 root root 529408 Aug 30 23:18 e3b6889c89547ec9ba653ab44ed32a99370940d51df956968c0d578dd61ab665.layer.disk +-rw-r--r-- 1 root root 112968704 Aug 30 23:18 fd9f026c631046113bd492f69761c3ba6042c791c35a60e7c7f3b8f254592daa +-rw-r--r-- 1 root root 113492992 Aug 30 23:18 fd9f026c631046113bd492f69761c3ba6042c791c35a60e7c7f3b8f254592daa.layer.disk +$ file /var/lib/containerd-nydus/cache/055fa98b43638b67d10c58d41094d99c8696cc34b7a960c7a0cc5d9d152d12b3 +/var/lib/containerd-nydus/cache/055fa98b43638b67d10c58d41094d99c8696cc34b7a960c7a0cc5d9d152d12b3: POSIX tar archive + +# Mount the raw disk image for a container image layer +$ losetup /dev/loop100 /var/lib/containerd-nydus/cache/055fa98b43638b67d10c58d41094d99c8696cc34b7a960c7a0cc5d9d152d12b3.layer.disk +$ mount -t erofs /dev/loop100 ./mnt/ +$ mount +tmpfs on /run/user/0 type tmpfs (rw,nosuid,nodev,relatime,size=1544836k,nr_inodes=386209,mode=700,inode64) +/dev/loop17 on /var/lib/containerd-nydus/snapshots/7/mnt type erofs (ro,relatime,user_xattr,acl,cache_strategy=readaround) +/dev/loop100 on /root/ws/nydus-snapshotter.git/mnt type erofs (ro,relatime,user_xattr,acl,cache_strategy=readaround) + +``` + +### Generate Raw Disk Image for a Container Image +`Tarfs` supports generating a raw disk image a container image, which can be directly mounted as EROFS filesystem through loopdev. Please edit the snapshotter configuration file to enable this submode: +``` +[experimental.tarfs] +enable_tarfs = true +export_mode = "image_block" +``` + +This is an example to generate and verify raw disk image for a container image: +``` +$ containerd-nydus-grpc --config /etc/nydus/config.toml & +$ nerdctl run --snapshotter nydus --rm nginx + +# Files without suffix are tar files, files with suffix `image.disk` are raw disk image for a container image +$ ls -l /var/lib/containerd-nydus/cache/ +total 376320 +-rw-r--r-- 1 root root 3584 Aug 30 23:35 055fa98b43638b67d10c58d41094d99c8696cc34b7a960c7a0cc5d9d152d12b3 +-rw-r--r-- 1 root root 77814784 Aug 30 23:35 52d2b7f179e32b4cbd579ee3c4958027988f9a8274850ab0c7c24661e3adaac5 +-rw-r--r-- 1 root root 4608 Aug 30 23:35 96576293dd2954ff84251aa0455687c8643358ba1b190ea1818f56b41884bdbd +-rw-r--r-- 1 root root 2560 Aug 30 23:35 a7c4092be9044bd4eef78f27c95785ef3a9f345d01fd4512bc94ddaaefc359f4 +-rw-r--r-- 1 root root 7168 Aug 30 23:35 da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75 +-rw-r--r-- 1 root root 194518016 Aug 30 23:36 da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75.image.disk +-rw-r--r-- 1 root root 5120 Aug 30 23:35 e3b6889c89547ec9ba653ab44ed32a99370940d51df956968c0d578dd61ab665 +-rw-r--r-- 1 root root 112968704 Aug 30 23:36 fd9f026c631046113bd492f69761c3ba6042c791c35a60e7c7f3b8f254592daa + +``` + +### Generate Raw Disk Image with dm-verity Information +`Tarfs` supports generating raw disk images with dm-verity information, to enable runtime data integrity validation. Please change `export_mode` in snapshotter configuration file to `layer_block_with_verity` or `image_block_with_verity`. + +``` +[experimental.tarfs] +enable_tarfs = true +export_mode = "image_block_with_verity" +``` + +This is an example to generate and verify raw disk image for a container image with dm-verity information: +``` +$ containerd-nydus-grpc --config /etc/nydus/config.toml & +$ nerdctl run --snapshotter nydus --rm nginx + +# Files without suffix are tar files, files with suffix `image.disk` are raw disk image for a container image +$ ls -l /var/lib/containerd-nydus/cache/ +total 388296 +-rw-r--r-- 1 root root 3584 Aug 30 23:45 055fa98b43638b67d10c58d41094d99c8696cc34b7a960c7a0cc5d9d152d12b3 +-rw-r--r-- 1 root root 77814784 Aug 30 23:46 52d2b7f179e32b4cbd579ee3c4958027988f9a8274850ab0c7c24661e3adaac5 +-rw-r--r-- 1 root root 4608 Aug 30 23:45 96576293dd2954ff84251aa0455687c8643358ba1b190ea1818f56b41884bdbd +-rw-r--r-- 1 root root 2560 Aug 30 23:45 a7c4092be9044bd4eef78f27c95785ef3a9f345d01fd4512bc94ddaaefc359f4 +-rw-r--r-- 1 root root 7168 Aug 30 23:45 da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75 +-rw-r--r-- 1 root root 206782464 Aug 30 23:46 da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75.image.disk +-rw-r--r-- 1 root root 5120 Aug 30 23:45 e3b6889c89547ec9ba653ab44ed32a99370940d51df956968c0d578dd61ab665 +-rw-r--r-- 1 root root 112968704 Aug 30 23:46 fd9f026c631046113bd492f69761c3ba6042c791c35a60e7c7f3b8f254592daa + +$ losetup /dev/loop100 /var/lib/containerd-nydus/cache/da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75.image.disk +$ veritysetup open --no-superblock --format=1 -s "" --hash=sha256 --data-block-size=512 --hash-block-size=4096 --data-blocks 379918 --hash-offset 194519040 /dev/loop100 image1 /dev/loop100 8113799aaf9a5d14feca1eadc3b7e6ea98bdaf61e3a2e4a8ef8c24e26a551efd +$ lsblk +loop100 7:100 0 197.2M 0 loop +└─dm-0 252:0 0 185.5M 1 crypt + +$ veritysetup status dm-0 +/dev/mapper/dm-0 is active and is in use. + type: VERITY + status: verified + hash type: 1 + data block: 512 + hash block: 4096 + hash name: sha256 + salt: - + data device: /dev/loop100 + data loop: /var/lib/containerd-nydus/cache/da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75.image.disk + size: 379918 sectors + mode: readonly + hash device: /dev/loop100 + hash loop: /var/lib/containerd-nydus/cache/da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75.image.disk + hash offset: 379920 sectors + root hash: 8113799aaf9a5d14feca1eadc3b7e6ea98bdaf61e3a2e4a8ef8c24e26a551efd + +$ mount -t erofs /dev/dm-0 ./mnt/ +mount: /root/ws/nydus-snapshotter.git/mnt: WARNING: source write-protected, mounted read-only. +$ ls -l mnt/ +total 14 +lrwxrwxrwx 1 root root 7 Aug 14 08:00 bin -> usr/bin +drwxr-xr-x 2 root root 27 Jul 15 00:00 boot +drwxr-xr-x 2 root root 27 Aug 14 08:00 dev +drwxr-xr-x 2 root root 184 Aug 16 17:50 docker-entrypoint.d +-rwxrwxr-x 1 root root 1620 Aug 16 17:50 docker-entrypoint.sh +drwxr-xr-x 34 root root 1524 Aug 16 17:50 etc +drwxr-xr-x 2 root root 27 Jul 15 00:00 home +lrwxrwxrwx 1 root root 7 Aug 14 08:00 lib -> usr/lib +lrwxrwxrwx 1 root root 9 Aug 14 08:00 lib32 -> usr/lib32 +lrwxrwxrwx 1 root root 9 Aug 14 08:00 lib64 -> usr/lib64 +lrwxrwxrwx 1 root root 10 Aug 14 08:00 libx32 -> usr/libx32 +drwxr-xr-x 2 root root 27 Aug 14 08:00 media +drwxr-xr-x 2 root root 27 Aug 14 08:00 mnt +drwxr-xr-x 2 root root 27 Aug 14 08:00 opt +drwxr-xr-x 2 root root 27 Jul 15 00:00 proc +drwx------ 2 root root 66 Aug 14 08:00 root +drwxr-xr-x 3 root root 43 Aug 14 08:00 run +lrwxrwxrwx 1 root root 8 Aug 14 08:00 sbin -> usr/sbin +drwxr-xr-x 2 root root 27 Aug 14 08:00 srv +drwxr-xr-x 2 root root 27 Jul 15 00:00 sys +drwxrwxrwt 2 root root 27 Aug 16 17:50 tmp +drwxr-xr-x 14 root root 229 Aug 14 08:00 usr +drwxr-xr-x 11 root root 204 Aug 14 08:00 var + +``` \ No newline at end of file diff --git a/pkg/tarfs/tarfs.go b/pkg/tarfs/tarfs.go index 229f6bc05b..53082f5702 100755 --- a/pkg/tarfs/tarfs.go +++ b/pkg/tarfs/tarfs.go @@ -302,7 +302,7 @@ func (t *Manager) getImageBlobInfo(metaFilePath string) (string, error) { return "", errors.Wrap(err, "converting OCIv1 layer blob to tarfs") } - return string(outb.Bytes()), nil + return outb.String(), nil } // download & uncompress an oci/docker blob, and then generate the tarfs bootstrap diff --git a/snapshot/process.go b/snapshot/process.go index 779d5b2707..93e4387736 100644 --- a/snapshot/process.go +++ b/snapshot/process.go @@ -58,9 +58,9 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, } // OCI image is also marked with "containerd.io/snapshot.ref" by Containerd - target, is_ro_layer := labels[label.TargetSnapshotRef] + target, isRoLayer := labels[label.TargetSnapshotRef] - if is_ro_layer { + if isRoLayer { // Containerd won't consume mount slice for below snapshots switch { case label.IsNydusMetaLayer(labels): From f3c15419097d57658bfcda03df373085e3346a48 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 27 Jun 2023 10:35:13 +0800 Subject: [PATCH 14/14] snapshot: extend the ExtraOption struct Move ExtraOption to dedicated file and extend it for more usage cases. Signed-off-by: Jiang Liu --- snapshot/mount_option.go | 99 ++++++++++++++++++++++++++++++++++++++++ snapshot/snapshot.go | 80 -------------------------------- 2 files changed, 99 insertions(+), 80 deletions(-) create mode 100644 snapshot/mount_option.go diff --git a/snapshot/mount_option.go b/snapshot/mount_option.go new file mode 100644 index 0000000000..59784fe696 --- /dev/null +++ b/snapshot/mount_option.go @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2023. Nydus Developers. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package snapshot + +import ( + "context" + "encoding/base64" + "encoding/json" + "fmt" + "os" + + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/snapshots/storage" + "github.com/containerd/nydus-snapshotter/config/daemonconfig" + "github.com/containerd/nydus-snapshotter/pkg/daemon" + "github.com/containerd/nydus-snapshotter/pkg/layout" + "github.com/pkg/errors" +) + +type ExtraOption struct { + Source string `json:"source"` + Config string `json:"config"` + Snapshotdir string `json:"snapshotdir"` + Version string `json:"fs_version"` +} + +func (o *snapshotter) remoteMountWithExtraOptions(ctx context.Context, s storage.Snapshot, id string, overlayOptions []string) ([]mount.Mount, error) { + source, err := o.fs.BootstrapFile(id) + if err != nil { + return nil, err + } + + instance := daemon.RafsSet.Get(id) + daemon, err := o.fs.GetDaemonByID(instance.DaemonID) + if err != nil { + return nil, errors.Wrapf(err, "get daemon with ID %s", instance.DaemonID) + } + + var c daemonconfig.DaemonConfig + if daemon.IsSharedDaemon() { + c, err = daemonconfig.NewDaemonConfig(daemon.States.FsDriver, daemon.ConfigFile(instance.SnapshotID)) + if err != nil { + return nil, errors.Wrapf(err, "Failed to load instance configuration %s", + daemon.ConfigFile(instance.SnapshotID)) + } + } else { + c = daemon.Config + } + configContent, err := c.DumpString() + if err != nil { + return nil, errors.Wrapf(err, "remoteMounts: failed to marshal config") + } + + // get version from bootstrap + f, err := os.Open(source) + if err != nil { + return nil, errors.Wrapf(err, "remoteMounts: check bootstrap version: failed to open bootstrap") + } + defer f.Close() + header := make([]byte, 4096) + sz, err := f.Read(header) + if err != nil { + return nil, errors.Wrapf(err, "remoteMounts: check bootstrap version: failed to read bootstrap") + } + version, err := layout.DetectFsVersion(header[0:sz]) + if err != nil { + return nil, errors.Wrapf(err, "remoteMounts: failed to detect filesystem version") + } + + // when enable nydus-overlayfs, return unified mount slice for runc and kata + extraOption := &ExtraOption{ + Source: source, + Config: configContent, + Snapshotdir: o.snapshotDir(s.ID), + Version: version, + } + no, err := json.Marshal(extraOption) + if err != nil { + return nil, errors.Wrapf(err, "remoteMounts: failed to marshal NydusOption") + } + // XXX: Log options without extraoptions as it might contain secrets. + log.G(ctx).Debugf("fuse.nydus-overlayfs mount options %v", overlayOptions) + // base64 to filter easily in `nydus-overlayfs` + opt := fmt.Sprintf("extraoption=%s", base64.StdEncoding.EncodeToString(no)) + overlayOptions = append(overlayOptions, opt) + + return []mount.Mount{ + { + Type: "fuse.nydus-overlayfs", + Source: "overlay", + Options: overlayOptions, + }, + }, nil +} diff --git a/snapshot/snapshot.go b/snapshot/snapshot.go index ec0ed00502..ee384f2b65 100644 --- a/snapshot/snapshot.go +++ b/snapshot/snapshot.go @@ -9,8 +9,6 @@ package snapshot import ( "context" - "encoding/base64" - "encoding/json" "fmt" "os" "path/filepath" @@ -31,9 +29,7 @@ import ( "github.com/containerd/nydus-snapshotter/pkg/cache" "github.com/containerd/nydus-snapshotter/pkg/cgroup" v2 "github.com/containerd/nydus-snapshotter/pkg/cgroup/v2" - "github.com/containerd/nydus-snapshotter/pkg/daemon" "github.com/containerd/nydus-snapshotter/pkg/errdefs" - "github.com/containerd/nydus-snapshotter/pkg/layout" mgr "github.com/containerd/nydus-snapshotter/pkg/manager" "github.com/containerd/nydus-snapshotter/pkg/metrics" "github.com/containerd/nydus-snapshotter/pkg/metrics/collector" @@ -858,82 +854,6 @@ func (o *snapshotter) remoteMounts(ctx context.Context, labels map[string]string return overlayMount(overlayOptions), nil } -type ExtraOption struct { - Source string `json:"source"` - Config string `json:"config"` - Snapshotdir string `json:"snapshotdir"` - Version string `json:"fs_version"` -} - -func (o *snapshotter) remoteMountWithExtraOptions(ctx context.Context, s storage.Snapshot, id string, overlayOptions []string) ([]mount.Mount, error) { - source, err := o.fs.BootstrapFile(id) - if err != nil { - return nil, err - } - - instance := daemon.RafsSet.Get(id) - daemon, err := o.fs.GetDaemonByID(instance.DaemonID) - if err != nil { - return nil, errors.Wrapf(err, "get daemon with ID %s", instance.DaemonID) - } - - var c daemonconfig.DaemonConfig - if daemon.IsSharedDaemon() { - c, err = daemonconfig.NewDaemonConfig(daemon.States.FsDriver, daemon.ConfigFile(instance.SnapshotID)) - if err != nil { - return nil, errors.Wrapf(err, "Failed to load instance configuration %s", - daemon.ConfigFile(instance.SnapshotID)) - } - } else { - c = daemon.Config - } - configContent, err := c.DumpString() - if err != nil { - return nil, errors.Wrapf(err, "remoteMounts: failed to marshal config") - } - - // get version from bootstrap - f, err := os.Open(source) - if err != nil { - return nil, errors.Wrapf(err, "remoteMounts: check bootstrap version: failed to open bootstrap") - } - defer f.Close() - header := make([]byte, 4096) - sz, err := f.Read(header) - if err != nil { - return nil, errors.Wrapf(err, "remoteMounts: check bootstrap version: failed to read bootstrap") - } - version, err := layout.DetectFsVersion(header[0:sz]) - if err != nil { - return nil, errors.Wrapf(err, "remoteMounts: failed to detect filesystem version") - } - - // when enable nydus-overlayfs, return unified mount slice for runc and kata - extraOption := &ExtraOption{ - Source: source, - Config: configContent, - Snapshotdir: o.snapshotDir(s.ID), - Version: version, - } - no, err := json.Marshal(extraOption) - if err != nil { - return nil, errors.Wrapf(err, "remoteMounts: failed to marshal NydusOption") - } - // XXX: Log options without extraoptions as it might contain secrets. - log.G(ctx).Debugf("fuse.nydus-overlayfs mount options %v", overlayOptions) - // base64 to filter easily in `nydus-overlayfs` - opt := fmt.Sprintf("extraoption=%s", base64.StdEncoding.EncodeToString(no)) - overlayOptions = append(overlayOptions, opt) - - return []mount.Mount{ - { - Type: "fuse.nydus-overlayfs", - Source: "overlay", - Options: overlayOptions, - }, - }, nil -} - func (o *snapshotter) mounts(ctx context.Context, labels map[string]string, s storage.Snapshot) ([]mount.Mount, error) { if len(s.ParentIDs) == 0 { // if we only have one layer/no parents then just return a bind mount as overlay will not work