diff --git a/config/config.go b/config/config.go index 521497cf52..932bafe3a8 100644 --- a/config/config.go +++ b/config/config.go @@ -107,8 +107,16 @@ const ( ) type Experimental struct { - EnableStargz bool `toml:"enable_stargz"` - EnableReferrerDetect bool `toml:"enable_referrer_detect"` + EnableStargz bool `toml:"enable_stargz"` + EnableReferrerDetect bool `toml:"enable_referrer_detect"` + TarfsConfig TarfsConfig `toml:"tarfs"` +} + +type TarfsConfig struct { + EnableTarfs bool `toml:"enable_tarfs"` + TarfsHint bool `toml:"tarfs_hint"` + MaxConcurrentProc int `toml:"max_concurrent_proc"` + ExportMode string `toml:"export_mode"` } type CgroupConfig struct { diff --git a/config/global.go b/config/global.go index 5272a20bdc..a50de69f7b 100644 --- a/config/global.go +++ b/config/global.go @@ -112,6 +112,49 @@ func GetDaemonProfileCPUDuration() int64 { return globalConfig.origin.SystemControllerConfig.DebugConfig.ProfileDuration } +const ( + TarfsLayerVerityOnly string = "layer_verity_only" + TarfsImageVerityOnly string = "image_verity_only" + TarfsLayerBlockDevice string = "layer_block" + TarfsImageBlockDevice string = "image_block" + TarfsLayerBlockWithVerity string = "layer_block_with_verity" + TarfsImageBlockWithVerity string = "image_block_with_verity" +) + +func GetTarfsExportEnabled() bool { + switch globalConfig.origin.Experimental.TarfsConfig.ExportMode { + case TarfsLayerVerityOnly, TarfsLayerBlockDevice, TarfsLayerBlockWithVerity: + return true + case TarfsImageVerityOnly, TarfsImageBlockDevice, TarfsImageBlockWithVerity: + return true + default: + return false + } +} + +// Returns (wholeImage, generateBlockImage, withVerityInfo) +// wholeImage: generate tarfs for the whole image instead of of a specific layer. +// generateBlockImage: generate a block image file. +// withVerityInfo: generate disk verity information. +func GetTarfsExportFlags() (bool, bool, bool) { + switch globalConfig.origin.Experimental.TarfsConfig.ExportMode { + case "layer_verity_only": + return false, false, true + case "image_verity_only": + return true, false, true + case "layer_block": + return false, true, false + case "image_block": + return true, true, false + case "layer_block_with_verity": + return false, true, true + case "image_block_with_verity": + return true, true, true + default: + return false, false, false + } +} + func ProcessConfigurations(c *SnapshotterConfig) error { if c.LoggingConfig.LogDir == "" { c.LoggingConfig.LogDir = filepath.Join(c.Root, logging.DefaultLogDirName) diff --git a/docs/tarfs.md b/docs/tarfs.md new file mode 100644 index 0000000000..da7c0a568f --- /dev/null +++ b/docs/tarfs.md @@ -0,0 +1,175 @@ +# Nydus Tarfs Mode + +`Nydus Tarfs Mode` or `Tarfs` is a working mode for Nydus Image, which uses tar files as Nydus data blobs instead of generating native Nydus data blobs. + +### Enable Tarfs +`Nydus Tarfs Mode` is still an experiment feature, please edit the snapshotter configuration file to enable the feature: +``` +[experimental.tarfs] +enable_tarfs = true +``` + +### Generate Raw Disk Image for Each Layer of a Container Image +`Tarfs` supports generating a raw disk image for each layer of a container image, which can be directly mounted as EROFS filesystem through loopdev. Please edit the snapshotter configuration file to enable this submode: +``` +[experimental.tarfs] +enable_tarfs = true +export_mode = "layer_block" +``` + +This is an example to generate and verify raw disk image for each layer of a container image: +``` +$ containerd-nydus-grpc --config /etc/nydus/config.toml & +$ nerdctl run --snapshotter nydus --rm nginx + +# Show mounted rootfs a container +$ mount +/dev/loop17 on /var/lib/containerd-nydus/snapshots/7/mnt type erofs (ro,relatime,user_xattr,acl,cache_strategy=readaround) + +# Show loop devices used to mount layers and bootstrap for a container image +$ losetup +NAME SIZELIMIT OFFSET AUTOCLEAR RO BACK-FILE DIO LOG-SEC +/dev/loop11 0 0 0 0 /var/lib/containerd-nydus/cache/fd9f026c631046113bd492f69761c3ba6042c791c35a60e7c7f3b8f254592daa 0 512 +/dev/loop12 0 0 0 0 /var/lib/containerd-nydus/cache/055fa98b43638b67d10c58d41094d99c8696cc34b7a960c7a0cc5d9d152d12b3 0 512 +/dev/loop13 0 0 0 0 /var/lib/containerd-nydus/cache/96576293dd2954ff84251aa0455687c8643358ba1b190ea1818f56b41884bdbd 0 512 +/dev/loop14 0 0 0 0 /var/lib/containerd-nydus/cache/a7c4092be9044bd4eef78f27c95785ef3a9f345d01fd4512bc94ddaaefc359f4 0 512 +/dev/loop15 0 0 0 0 /var/lib/containerd-nydus/cache/e3b6889c89547ec9ba653ab44ed32a99370940d51df956968c0d578dd61ab665 0 512 +/dev/loop16 0 0 0 0 /var/lib/containerd-nydus/cache/da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75 0 512 +/dev/loop17 0 0 0 0 /var/lib/containerd-nydus/snapshots/7/fs/image/image.boot 0 512 + +# Files without suffix are tar files, files with suffix `layer.disk` are raw disk image for container image layers +$ ls -l /var/lib/containerd-nydus/cache/ +total 376800 +-rw-r--r-- 1 root root 3584 Aug 30 23:18 055fa98b43638b67d10c58d41094d99c8696cc34b7a960c7a0cc5d9d152d12b3 +-rw-r--r-- 1 root root 527872 Aug 30 23:18 055fa98b43638b67d10c58d41094d99c8696cc34b7a960c7a0cc5d9d152d12b3.layer.disk +-rw-r--r-- 1 root root 77814784 Aug 30 23:18 52d2b7f179e32b4cbd579ee3c4958027988f9a8274850ab0c7c24661e3adaac5 +-rw-r--r-- 1 root root 78863360 Aug 30 23:18 52d2b7f179e32b4cbd579ee3c4958027988f9a8274850ab0c7c24661e3adaac5.layer.disk +-rw-r--r-- 1 root root 4608 Aug 30 23:18 96576293dd2954ff84251aa0455687c8643358ba1b190ea1818f56b41884bdbd +-rw-r--r-- 1 root root 528896 Aug 30 23:18 96576293dd2954ff84251aa0455687c8643358ba1b190ea1818f56b41884bdbd.layer.disk +-rw-r--r-- 1 root root 2560 Aug 30 23:18 a7c4092be9044bd4eef78f27c95785ef3a9f345d01fd4512bc94ddaaefc359f4 +-rw-r--r-- 1 root root 526848 Aug 30 23:18 a7c4092be9044bd4eef78f27c95785ef3a9f345d01fd4512bc94ddaaefc359f4.layer.disk +-rw-r--r-- 1 root root 7168 Aug 30 23:18 da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75 +-rw-r--r-- 1 root root 531456 Aug 30 23:18 da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75.layer.disk +-rw-r--r-- 1 root root 5120 Aug 30 23:18 e3b6889c89547ec9ba653ab44ed32a99370940d51df956968c0d578dd61ab665 +-rw-r--r-- 1 root root 529408 Aug 30 23:18 e3b6889c89547ec9ba653ab44ed32a99370940d51df956968c0d578dd61ab665.layer.disk +-rw-r--r-- 1 root root 112968704 Aug 30 23:18 fd9f026c631046113bd492f69761c3ba6042c791c35a60e7c7f3b8f254592daa +-rw-r--r-- 1 root root 113492992 Aug 30 23:18 fd9f026c631046113bd492f69761c3ba6042c791c35a60e7c7f3b8f254592daa.layer.disk +$ file /var/lib/containerd-nydus/cache/055fa98b43638b67d10c58d41094d99c8696cc34b7a960c7a0cc5d9d152d12b3 +/var/lib/containerd-nydus/cache/055fa98b43638b67d10c58d41094d99c8696cc34b7a960c7a0cc5d9d152d12b3: POSIX tar archive + +# Mount the raw disk image for a container image layer +$ losetup /dev/loop100 /var/lib/containerd-nydus/cache/055fa98b43638b67d10c58d41094d99c8696cc34b7a960c7a0cc5d9d152d12b3.layer.disk +$ mount -t erofs /dev/loop100 ./mnt/ +$ mount +tmpfs on /run/user/0 type tmpfs (rw,nosuid,nodev,relatime,size=1544836k,nr_inodes=386209,mode=700,inode64) +/dev/loop17 on /var/lib/containerd-nydus/snapshots/7/mnt type erofs (ro,relatime,user_xattr,acl,cache_strategy=readaround) +/dev/loop100 on /root/ws/nydus-snapshotter.git/mnt type erofs (ro,relatime,user_xattr,acl,cache_strategy=readaround) + +``` + +### Generate Raw Disk Image for a Container Image +`Tarfs` supports generating a raw disk image a container image, which can be directly mounted as EROFS filesystem through loopdev. Please edit the snapshotter configuration file to enable this submode: +``` +[experimental.tarfs] +enable_tarfs = true +export_mode = "image_block" +``` + +This is an example to generate and verify raw disk image for a container image: +``` +$ containerd-nydus-grpc --config /etc/nydus/config.toml & +$ nerdctl run --snapshotter nydus --rm nginx + +# Files without suffix are tar files, files with suffix `image.disk` are raw disk image for a container image +$ ls -l /var/lib/containerd-nydus/cache/ +total 376320 +-rw-r--r-- 1 root root 3584 Aug 30 23:35 055fa98b43638b67d10c58d41094d99c8696cc34b7a960c7a0cc5d9d152d12b3 +-rw-r--r-- 1 root root 77814784 Aug 30 23:35 52d2b7f179e32b4cbd579ee3c4958027988f9a8274850ab0c7c24661e3adaac5 +-rw-r--r-- 1 root root 4608 Aug 30 23:35 96576293dd2954ff84251aa0455687c8643358ba1b190ea1818f56b41884bdbd +-rw-r--r-- 1 root root 2560 Aug 30 23:35 a7c4092be9044bd4eef78f27c95785ef3a9f345d01fd4512bc94ddaaefc359f4 +-rw-r--r-- 1 root root 7168 Aug 30 23:35 da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75 +-rw-r--r-- 1 root root 194518016 Aug 30 23:36 da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75.image.disk +-rw-r--r-- 1 root root 5120 Aug 30 23:35 e3b6889c89547ec9ba653ab44ed32a99370940d51df956968c0d578dd61ab665 +-rw-r--r-- 1 root root 112968704 Aug 30 23:36 fd9f026c631046113bd492f69761c3ba6042c791c35a60e7c7f3b8f254592daa + +``` + +### Generate Raw Disk Image with dm-verity Information +`Tarfs` supports generating raw disk images with dm-verity information, to enable runtime data integrity validation. Please change `export_mode` in snapshotter configuration file to `layer_block_with_verity` or `image_block_with_verity`. + +``` +[experimental.tarfs] +enable_tarfs = true +export_mode = "image_block_with_verity" +``` + +This is an example to generate and verify raw disk image for a container image with dm-verity information: +``` +$ containerd-nydus-grpc --config /etc/nydus/config.toml & +$ nerdctl run --snapshotter nydus --rm nginx + +# Files without suffix are tar files, files with suffix `image.disk` are raw disk image for a container image +$ ls -l /var/lib/containerd-nydus/cache/ +total 388296 +-rw-r--r-- 1 root root 3584 Aug 30 23:45 055fa98b43638b67d10c58d41094d99c8696cc34b7a960c7a0cc5d9d152d12b3 +-rw-r--r-- 1 root root 77814784 Aug 30 23:46 52d2b7f179e32b4cbd579ee3c4958027988f9a8274850ab0c7c24661e3adaac5 +-rw-r--r-- 1 root root 4608 Aug 30 23:45 96576293dd2954ff84251aa0455687c8643358ba1b190ea1818f56b41884bdbd +-rw-r--r-- 1 root root 2560 Aug 30 23:45 a7c4092be9044bd4eef78f27c95785ef3a9f345d01fd4512bc94ddaaefc359f4 +-rw-r--r-- 1 root root 7168 Aug 30 23:45 da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75 +-rw-r--r-- 1 root root 206782464 Aug 30 23:46 da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75.image.disk +-rw-r--r-- 1 root root 5120 Aug 30 23:45 e3b6889c89547ec9ba653ab44ed32a99370940d51df956968c0d578dd61ab665 +-rw-r--r-- 1 root root 112968704 Aug 30 23:46 fd9f026c631046113bd492f69761c3ba6042c791c35a60e7c7f3b8f254592daa + +$ losetup /dev/loop100 /var/lib/containerd-nydus/cache/da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75.image.disk +$ veritysetup open --no-superblock --format=1 -s "" --hash=sha256 --data-block-size=512 --hash-block-size=4096 --data-blocks 379918 --hash-offset 194519040 /dev/loop100 image1 /dev/loop100 8113799aaf9a5d14feca1eadc3b7e6ea98bdaf61e3a2e4a8ef8c24e26a551efd +$ lsblk +loop100 7:100 0 197.2M 0 loop +└─dm-0 252:0 0 185.5M 1 crypt + +$ veritysetup status dm-0 +/dev/mapper/dm-0 is active and is in use. + type: VERITY + status: verified + hash type: 1 + data block: 512 + hash block: 4096 + hash name: sha256 + salt: - + data device: /dev/loop100 + data loop: /var/lib/containerd-nydus/cache/da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75.image.disk + size: 379918 sectors + mode: readonly + hash device: /dev/loop100 + hash loop: /var/lib/containerd-nydus/cache/da761d9a302b21dc50767b67d46f737f5072fb4490c525b4a7ae6f18e1dbbf75.image.disk + hash offset: 379920 sectors + root hash: 8113799aaf9a5d14feca1eadc3b7e6ea98bdaf61e3a2e4a8ef8c24e26a551efd + +$ mount -t erofs /dev/dm-0 ./mnt/ +mount: /root/ws/nydus-snapshotter.git/mnt: WARNING: source write-protected, mounted read-only. +$ ls -l mnt/ +total 14 +lrwxrwxrwx 1 root root 7 Aug 14 08:00 bin -> usr/bin +drwxr-xr-x 2 root root 27 Jul 15 00:00 boot +drwxr-xr-x 2 root root 27 Aug 14 08:00 dev +drwxr-xr-x 2 root root 184 Aug 16 17:50 docker-entrypoint.d +-rwxrwxr-x 1 root root 1620 Aug 16 17:50 docker-entrypoint.sh +drwxr-xr-x 34 root root 1524 Aug 16 17:50 etc +drwxr-xr-x 2 root root 27 Jul 15 00:00 home +lrwxrwxrwx 1 root root 7 Aug 14 08:00 lib -> usr/lib +lrwxrwxrwx 1 root root 9 Aug 14 08:00 lib32 -> usr/lib32 +lrwxrwxrwx 1 root root 9 Aug 14 08:00 lib64 -> usr/lib64 +lrwxrwxrwx 1 root root 10 Aug 14 08:00 libx32 -> usr/libx32 +drwxr-xr-x 2 root root 27 Aug 14 08:00 media +drwxr-xr-x 2 root root 27 Aug 14 08:00 mnt +drwxr-xr-x 2 root root 27 Aug 14 08:00 opt +drwxr-xr-x 2 root root 27 Jul 15 00:00 proc +drwx------ 2 root root 66 Aug 14 08:00 root +drwxr-xr-x 3 root root 43 Aug 14 08:00 run +lrwxrwxrwx 1 root root 8 Aug 14 08:00 sbin -> usr/sbin +drwxr-xr-x 2 root root 27 Aug 14 08:00 srv +drwxr-xr-x 2 root root 27 Jul 15 00:00 sys +drwxrwxrwt 2 root root 27 Aug 16 17:50 tmp +drwxr-xr-x 14 root root 229 Aug 14 08:00 usr +drwxr-xr-x 11 root root 204 Aug 14 08:00 var + +``` \ No newline at end of file diff --git a/go.mod b/go.mod index 6fd8462309..799362bcf9 100644 --- a/go.mod +++ b/go.mod @@ -49,6 +49,8 @@ require ( k8s.io/cri-api v0.27.0-alpha.3 ) +require github.com/freddierice/go-losetup v0.0.0-20220711213114-2a14873012db // indirect + require ( github.com/cilium/ebpf v0.9.1 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect diff --git a/go.sum b/go.sum index 8756658ac4..37f0be0f7d 100644 --- a/go.sum +++ b/go.sum @@ -132,6 +132,10 @@ github.com/flowstack/go-jsonschema v0.1.1/go.mod h1:yL7fNggx1o8rm9RlgXv7hTBWxdBM github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY= github.com/frankban/quicktest v1.14.4/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= +github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= +github.com/freddierice/go-losetup v0.0.0-20220711213114-2a14873012db h1:StM6A9LvaVrFS2chAGcfRVDoBB6rHYPIGJ3GknpB25c= +github.com/freddierice/go-losetup v0.0.0-20220711213114-2a14873012db/go.mod h1:pwuQfHWn6j2Fpl2AWw/bPLlKfojHxIIEa5TeKIgDFW4= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= diff --git a/misc/snapshotter/config.toml b/misc/snapshotter/config.toml index dbc9728830..dbdf7ba4f6 100644 --- a/misc/snapshotter/config.toml +++ b/misc/snapshotter/config.toml @@ -104,3 +104,21 @@ enable_stargz = false # The option enables trying to fetch the Nydus image associated with the OCI image and run it. # Also see https://github.com/opencontainers/distribution-spec/blob/main/spec.md#listing-referrers enable_referrer_detect = false +[experimental.tarfs] +# Whether to enable nydus tarfs mode. Tarfs is supported by: +# - The EROFS filesystem driver since Linux 6.4 +# - Nydus Image Service release v2.3 +enable_tarfs = false +# Only enable nydus tarfs mode for images with `tarfs hint` label when true +tarfs_hint = false +# Maximum of concurrence to converting OCIv1 images to tarfs, 0 means default +max_concurrent_proc = 0 +# Mode to export tarfs images: +# - "none" or "": do not export tarfs +# - "layer_verity_only": only generate disk verity information for a layer blob +# - "image_verity_only": only generate disk verity information for all blobs of an image +# - "layer_block": generate a raw block disk image with tarfs for a layer +# - "image_block": generate a raw block disk image with tarfs for an image +# - "layer_block_with_verity": generate a raw block disk image with tarfs for a layer with dm-verity info +# - "image_block_with_verity": generate a raw block disk image with tarfs for an image with dm-verity info +export_mode = "" diff --git a/pkg/cache/manager.go b/pkg/cache/manager.go index ca055bfdd8..33b221a296 100644 --- a/pkg/cache/manager.go +++ b/pkg/cache/manager.go @@ -21,8 +21,10 @@ import ( ) const ( - chunkMapFileSuffix = ".chunk_map" - metaFileSuffix = ".blob.meta" + imageDiskFileSuffix = ".image.disk" + layerDiskFileSuffix = ".layer.disk" + chunkMapFileSuffix = ".chunk_map" + metaFileSuffix = ".blob.meta" // Blob cache is suffixed after nydus v2.1 dataFileSuffix = ".blob.data" ) @@ -72,8 +74,10 @@ func (m *Manager) CacheUsage(ctx context.Context, blobID string) (snapshots.Usag blobCacheSuffixedPath := path.Join(m.cacheDir, blobID+dataFileSuffix) blobChunkMap := path.Join(m.cacheDir, blobID+chunkMapFileSuffix) blobMeta := path.Join(m.cacheDir, blobID+metaFileSuffix) + imageDisk := path.Join(m.cacheDir, blobID+imageDiskFileSuffix) + layerDisk := path.Join(m.cacheDir, blobID+layerDiskFileSuffix) - stuffs := []string{blobCachePath, blobCacheSuffixedPath, blobChunkMap, blobMeta} + stuffs := []string{blobCachePath, blobCacheSuffixedPath, blobChunkMap, blobMeta, imageDisk, layerDisk} for _, f := range stuffs { du, err := fs.DiskUsage(ctx, f) @@ -95,9 +99,11 @@ func (m *Manager) RemoveBlobCache(blobID string) error { blobCacheSuffixedPath := path.Join(m.cacheDir, blobID+dataFileSuffix) blobChunkMap := path.Join(m.cacheDir, blobID+chunkMapFileSuffix) blobMeta := path.Join(m.cacheDir, blobID+metaFileSuffix) + imageDisk := path.Join(m.cacheDir, blobID+imageDiskFileSuffix) + layerDisk := path.Join(m.cacheDir, blobID+layerDiskFileSuffix) // NOTE: Delete chunk bitmap file before data blob - stuffs := []string{blobChunkMap, blobMeta, blobCachePath, blobCacheSuffixedPath} + stuffs := []string{blobChunkMap, blobMeta, blobCachePath, blobCacheSuffixedPath, imageDisk, layerDisk} for _, f := range stuffs { err := os.Remove(f) diff --git a/pkg/filesystem/config.go b/pkg/filesystem/config.go index 773af0bac6..4f2fb668bd 100644 --- a/pkg/filesystem/config.go +++ b/pkg/filesystem/config.go @@ -14,6 +14,7 @@ import ( "github.com/containerd/nydus-snapshotter/pkg/referrer" "github.com/containerd/nydus-snapshotter/pkg/signature" "github.com/containerd/nydus-snapshotter/pkg/stargz" + "github.com/containerd/nydus-snapshotter/pkg/tarfs" "github.com/pkg/errors" ) @@ -28,18 +29,18 @@ func WithNydusImageBinaryPath(p string) NewFSOpt { func WithManager(pm *manager.Manager) NewFSOpt { return func(fs *Filesystem) error { - if pm == nil { - return errors.New("process manager cannot be nil") + if pm != nil { + switch pm.FsDriver { + case config.FsDriverBlockdev: + fs.blockdevManager = pm + case config.FsDriverFscache: + fs.fscacheManager = pm + case config.FsDriverFusedev: + fs.fusedevManager = pm + } + fs.enabledManagers = append(fs.enabledManagers, pm) } - if pm.FsDriver == config.FsDriverFusedev { - fs.fusedevManager = pm - } else if pm.FsDriver == config.FsDriverFscache { - fs.fscacheManager = pm - } - - fs.enabledManagers = append(fs.enabledManagers, pm) - return nil } } @@ -66,6 +67,16 @@ func WithReferrerManager(rm *referrer.Manager) NewFSOpt { } } +func WithTarfsManager(tm *tarfs.Manager) NewFSOpt { + return func(fs *Filesystem) error { + if tm == nil { + return errors.New("tarfs manager cannot be nil") + } + fs.tarfsMgr = tm + return nil + } +} + func WithVerifier(verifier *signature.Verifier) NewFSOpt { return func(fs *Filesystem) error { fs.verifier = verifier diff --git a/pkg/filesystem/fs.go b/pkg/filesystem/fs.go index a13e78f380..375a36caa4 100644 --- a/pkg/filesystem/fs.go +++ b/pkg/filesystem/fs.go @@ -15,24 +15,28 @@ import ( "os" "path" - "github.com/containerd/containerd/log" snpkg "github.com/containerd/containerd/pkg/snapshotters" - "github.com/containerd/containerd/snapshots" "github.com/mohae/deepcopy" "github.com/opencontainers/go-digest" "github.com/pkg/errors" "golang.org/x/sync/errgroup" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/snapshots" + "github.com/containerd/containerd/snapshots/storage" + "github.com/containerd/nydus-snapshotter/config" "github.com/containerd/nydus-snapshotter/config/daemonconfig" "github.com/containerd/nydus-snapshotter/pkg/cache" "github.com/containerd/nydus-snapshotter/pkg/daemon" "github.com/containerd/nydus-snapshotter/pkg/daemon/types" "github.com/containerd/nydus-snapshotter/pkg/errdefs" + "github.com/containerd/nydus-snapshotter/pkg/label" "github.com/containerd/nydus-snapshotter/pkg/manager" "github.com/containerd/nydus-snapshotter/pkg/referrer" "github.com/containerd/nydus-snapshotter/pkg/signature" "github.com/containerd/nydus-snapshotter/pkg/stargz" + "github.com/containerd/nydus-snapshotter/pkg/tarfs" ) // TODO: refact `enabledManagers` and `xxxManager` into `ManagerCoordinator` @@ -46,6 +50,7 @@ type Filesystem struct { enabledManagers []*manager.Manager cacheMgr *cache.Manager referrerMgr *referrer.Manager + tarfsMgr *tarfs.Manager stargzResolver *stargz.Resolver verifier *signature.Verifier nydusImageBinaryPath string @@ -218,23 +223,19 @@ func (fs *Filesystem) WaitUntilReady(snapshotID string) error { // Mount will be called when containerd snapshotter prepare remote snapshotter // this method will fork nydus daemon and manage it in the internal store, and indexed by snapshotID // It must set up all necessary resources during Mount procedure and revoke any step if necessary. -func (fs *Filesystem) Mount(snapshotID string, labels map[string]string) (err error) { - // TODO: support tarfs - isTarfsMode := false +func (fs *Filesystem) Mount(snapshotID string, labels map[string]string, s *storage.Snapshot) (err error) { + // Do not create RAFS instance in case of nodev. + if !fs.DaemonBacked() { + return nil + } + fsDriver := config.GetFsDriver() - if isTarfsMode { + if label.IsTarfsDataLayer(labels) { fsDriver = config.FsDriverBlockdev - } else if !fs.DaemonBacked() { - fsDriver = config.FsDriverNodev } isSharedFusedev := fsDriver == config.FsDriverFusedev && config.GetDaemonMode() == config.DaemonModeShared useSharedDaemon := fsDriver == config.FsDriverFscache || isSharedFusedev - // Do not create RAFS instance in case of nodev. - if fsDriver == config.FsDriverNodev { - return nil - } - var imageID string imageID, ok := labels[snpkg.TargetRefLabel] if !ok { @@ -269,13 +270,14 @@ func (fs *Filesystem) Mount(snapshotID string, labels map[string]string) (err er if err != nil { return errors.Wrapf(err, "get filesystem manager for snapshot %s", snapshotID) } - bootstrap, err := rafs.BootstrapFile() - if err != nil { - return errors.Wrapf(err, "find bootstrap file snapshot %s", snapshotID) - } var d *daemon.Daemon if fsDriver == config.FsDriverFscache || fsDriver == config.FsDriverFusedev { + bootstrap, err := rafs.BootstrapFile() + if err != nil { + return errors.Wrapf(err, "find bootstrap file snapshot %s", snapshotID) + } + if useSharedDaemon { d, err = fs.getSharedDaemon(fsDriver) if err != nil { @@ -334,12 +336,12 @@ func (fs *Filesystem) Mount(snapshotID string, labels map[string]string) (err er } d.AddInstance(rafs) - } - // if publicKey is not empty we should verify bootstrap file of image - err = fs.verifier.Verify(labels, bootstrap) - if err != nil { - return errors.Wrapf(err, "verify signature of daemon %s", d.ID()) + // if publicKey is not empty we should verify bootstrap file of image + err = fs.verifier.Verify(labels, bootstrap) + if err != nil { + return errors.Wrapf(err, "verify signature of daemon %s", d.ID()) + } } switch fsDriver { @@ -353,8 +355,13 @@ func (fs *Filesystem) Mount(snapshotID string, labels map[string]string) (err er if err != nil { return errors.Wrapf(err, "mount file system by daemon %s, snapshot %s", d.ID(), snapshotID) } - // case config.FsDriverBlockdev: - // TODO: support tarfs + case config.FsDriverBlockdev: + err = fs.tarfsMgr.MountTarErofs(snapshotID, s, rafs) + if err != nil { + return errors.Wrapf(err, "mount tarfs for snapshot %s", snapshotID) + } + default: + return errors.Errorf("unknown filesystem driver %s for snapshot %s", fsDriver, snapshotID) } // Persist it after associate instance after all the states are calculated. @@ -373,12 +380,18 @@ func (fs *Filesystem) Umount(ctx context.Context, snapshotID string) error { } fsDriver := instance.GetFsDriver() + if fsDriver == config.FsDriverNodev { + return nil + } fsManager, err := fs.getManager(fsDriver) if err != nil { return errors.Wrapf(err, "get manager for filesystem instance %s", instance.DaemonID) } - if fsDriver == config.FsDriverFscache || fsDriver == config.FsDriverFusedev { + switch fsDriver { + case config.FsDriverFscache: + fallthrough + case config.FsDriverFusedev: daemon, err := fs.getDaemonByRafs(instance) if err != nil { log.L.Debugf("snapshot %s has no associated nydusd", snapshotID) @@ -398,10 +411,16 @@ func (fs *Filesystem) Umount(ctx context.Context, snapshotID string) error { return errors.Wrapf(err, "destroy daemon %s", daemon.ID()) } } - // } else if fsDriver == config.FsDriverBlockdev { - // TODO: support tarfs + case config.FsDriverBlockdev: + if err := fs.tarfsMgr.UmountTarErofs(snapshotID); err != nil { + return errors.Wrapf(err, "umount tar erofs on snapshot %s", snapshotID) + } + if err := fsManager.RemoveInstance(snapshotID); err != nil { + return errors.Wrapf(err, "remove snapshot %s", snapshotID) + } + default: + return errors.Errorf("unknown filesystem driver %s for snapshot %s", fsDriver, snapshotID) } - return nil } @@ -581,7 +600,6 @@ func (fs *Filesystem) initSharedDaemon(fsManager *manager.Manager) (err error) { } // createDaemon create new nydus daemon by snapshotID and imageID -// For fscache driver, no need to provide mountpoint to nydusd daemon. func (fs *Filesystem) createDaemon(fsManager *manager.Manager, daemonMode config.DaemonMode, mountpoint string, ref int32) (d *daemon.Daemon, err error) { opts := []daemon.NewDaemonOpt{ @@ -597,6 +615,7 @@ func (fs *Filesystem) createDaemon(fsManager *manager.Manager, daemonMode config daemon.WithDaemonMode(daemonMode), } + // For fscache driver, no need to provide mountpoint to nydusd daemon. if mountpoint != "" { opts = append(opts, daemon.WithMountpoint(mountpoint)) } diff --git a/pkg/filesystem/tarfs_adaptor.go b/pkg/filesystem/tarfs_adaptor.go new file mode 100755 index 0000000000..09b3aeaab1 --- /dev/null +++ b/pkg/filesystem/tarfs_adaptor.go @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2023. Nydus Developers. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package filesystem + +import ( + "context" + + "github.com/containerd/containerd/log" + snpkg "github.com/containerd/containerd/pkg/snapshotters" + "github.com/containerd/containerd/snapshots/storage" + "github.com/opencontainers/go-digest" + "github.com/pkg/errors" +) + +func (fs *Filesystem) TarfsEnabled() bool { + return fs.tarfsMgr != nil +} + +func (fs *Filesystem) PrepareTarfsLayer(ctx context.Context, labels map[string]string, snapshotID, upperDirPath string) error { + ref, ok := labels[snpkg.TargetRefLabel] + if !ok { + return errors.Errorf("not found image reference label") + } + layerDigest := digest.Digest(labels[snpkg.TargetLayerDigestLabel]) + if layerDigest.Validate() != nil { + return errors.Errorf("not found layer digest label") + } + manifestDigest := digest.Digest(labels[snpkg.TargetManifestDigestLabel]) + if manifestDigest.Validate() != nil { + return errors.Errorf("not found manifest digest label") + } + + ok, err := fs.tarfsMgr.CheckTarfsHintAnnotation(ctx, ref, manifestDigest) + if err != nil { + return errors.Wrapf(err, "check tarfs hint annotaion") + } + if !ok { + return errors.Errorf("this image is not recommended for tarfs") + } + + limiter := fs.tarfsMgr.GetConcurrentLimiter(ref) + if limiter != nil { + if err := limiter.Acquire(context.Background(), 1); err != nil { + return errors.Wrapf(err, "concurrent limiter acquire") + } + } + + if err := fs.tarfsMgr.PrepareLayer(snapshotID, ref, manifestDigest, layerDigest, upperDirPath); err != nil { + log.L.WithError(err).Errorf("async prepare tarfs layer of snapshot ID %s", snapshotID) + } + if limiter != nil { + limiter.Release(1) + } + + return nil +} + +func (fs *Filesystem) MergeTarfsLayers(s storage.Snapshot, storageLocater func(string) string) error { + return fs.tarfsMgr.MergeLayers(s, storageLocater) +} + +func (fs *Filesystem) DetachTarfsLayer(snapshotID string) error { + return fs.tarfsMgr.DetachLayer(snapshotID) +} + +func (fs *Filesystem) ExportBlockData(s storage.Snapshot, perLayer bool, labels map[string]string, + storageLocater func(string) string) ([]string, error) { + return fs.tarfsMgr.ExportBlockData(s, perLayer, labels, storageLocater) +} diff --git a/pkg/label/label.go b/pkg/label/label.go index af9417bce0..948abe6da9 100644 --- a/pkg/label/label.go +++ b/pkg/label/label.go @@ -34,12 +34,18 @@ const ( NydusMetaLayer = "containerd.io/snapshot/nydus-bootstrap" // The referenced blob sha256 in format of `sha256:xxx`, set by image builders. NydusRefLayer = "containerd.io/snapshot/nydus-ref" + // A bool flag to mark the layer as a nydus tarfs, set by the snapshotter + NydusTarfsLayer = "containerd.io/snapshot/nydus-tarfs" // Annotation containing secret to pull images from registry, set by the snapshotter. NydusImagePullSecret = "containerd.io/snapshot/pullsecret" // Annotation containing username to pull images from registry, set by the snapshotter. NydusImagePullUsername = "containerd.io/snapshot/pullusername" // A bool flag to enable integrity verification of meta data blob NydusSignature = "containerd.io/snapshot/nydus-signature" + // Information for image block device + NydusImageBlockInfo = "containerd.io/snapshot/nydus-image-block" + // Information for layer block device + NydusLayerBlockInfo = "containerd.io/snapshot/nydus-layer-block" // A bool flag to mark the blob as a estargz data blob, set by the snapshotter. StargzLayer = "containerd.io/snapshot/stargz" @@ -48,6 +54,10 @@ const ( // If this optional label of a snapshot is specified, when mounted to rootdir // this snapshot will include volatile option OverlayfsVolatileOpt = "containerd.io/snapshot/overlay.volatile" + + // A bool flag to mark it is recommended to run this image with tarfs mode, set by image builders. + // runtime can decide whether to rely on this annotation + TarfsHint = "containerd.io/snapshot/tarfs-hint" ) func IsNydusDataLayer(labels map[string]string) bool { @@ -56,9 +66,16 @@ func IsNydusDataLayer(labels map[string]string) bool { } func IsNydusMetaLayer(labels map[string]string) bool { - if labels == nil { - return false - } _, ok := labels[NydusMetaLayer] return ok } + +func IsTarfsDataLayer(labels map[string]string) bool { + _, ok := labels[NydusTarfsLayer] + return ok +} + +func HasTarfsHint(labels map[string]string) bool { + _, ok := labels[TarfsHint] + return ok +} diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 2ace2fb296..2c7d63e331 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -127,9 +127,8 @@ type Manager struct { // supposed to refilled when nydus-snapshotter restarting. daemonStates *DaemonStates - monitor LivenessMonitor - // TODO: Close me - LivenessNotifier chan deathEvent + monitor LivenessMonitor + LivenessNotifier chan deathEvent // TODO: Close me RecoverPolicy config.DaemonRecoverPolicy SupervisorSet *supervisor.SupervisorsSet @@ -151,12 +150,10 @@ type Opt struct { Database *store.Database CacheDir string RecoverPolicy config.DaemonRecoverPolicy - // Nydus-snapshotter work directory - RootDir string - DaemonConfig daemonconfig.DaemonConfig - CgroupMgr *cgroup.Manager - // In order to validate daemon fs driver is consistent with the latest snapshotter boot - FsDriver string + RootDir string // Nydus-snapshotter work directory + DaemonConfig daemonconfig.DaemonConfig + CgroupMgr *cgroup.Manager + FsDriver string // In order to validate daemon fs driver is consistent with the latest snapshotter boot } func (m *Manager) doDaemonFailover(d *daemon.Daemon) { @@ -328,6 +325,10 @@ func (m *Manager) NewInstance(r *daemon.Rafs) error { return m.store.AddInstance(r) } +func (m *Manager) RemoveInstance(snapshotID string) error { + return m.store.DeleteInstance(snapshotID) +} + func (m *Manager) Lock() { m.mu.Lock() } @@ -353,10 +354,6 @@ func (m *Manager) UnsubscribeDaemonEvent(d *daemon.Daemon) error { return nil } -func (m *Manager) RemoveInstance(snapshotID string) error { - return m.store.DeleteInstance(snapshotID) -} - func (m *Manager) UpdateDaemon(daemon *daemon.Daemon) error { m.mu.Lock() defer m.mu.Unlock() diff --git a/pkg/metrics/serve.go b/pkg/metrics/serve.go index 6544270d7b..73b68511af 100644 --- a/pkg/metrics/serve.go +++ b/pkg/metrics/serve.go @@ -36,7 +36,9 @@ type Server struct { func WithProcessManager(pm *manager.Manager) ServerOpt { return func(s *Server) error { - s.managers = append(s.managers, pm) + if pm != nil { + s.managers = append(s.managers, pm) + } return nil } } diff --git a/pkg/snapshot/storage.go b/pkg/snapshot/storage.go index 0da112bef2..ea03b9aa58 100644 --- a/pkg/snapshot/storage.go +++ b/pkg/snapshot/storage.go @@ -83,8 +83,6 @@ func IterateParentSnapshots(ctx context.Context, ms *storage.MetaStore, key stri return id, info, nil } - log.L.Debugf("continue to check snapshot %s parent", id) - cKey = info.Parent } diff --git a/pkg/tarfs/tarfs.go b/pkg/tarfs/tarfs.go new file mode 100755 index 0000000000..53082f5702 --- /dev/null +++ b/pkg/tarfs/tarfs.go @@ -0,0 +1,809 @@ +/* + * Copyright (c) 2023. Nydus Developers. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package tarfs + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "os" + "os/exec" + "path" + "path/filepath" + "strconv" + "strings" + "sync" + "syscall" + + "github.com/containerd/containerd/archive/compression" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/snapshots/storage" + "github.com/containerd/nydus-snapshotter/config" + "github.com/containerd/nydus-snapshotter/pkg/auth" + "github.com/containerd/nydus-snapshotter/pkg/daemon" + "github.com/containerd/nydus-snapshotter/pkg/errdefs" + "github.com/containerd/nydus-snapshotter/pkg/label" + "github.com/containerd/nydus-snapshotter/pkg/remote" + "github.com/containerd/nydus-snapshotter/pkg/remote/remotes" + losetup "github.com/freddierice/go-losetup" + "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" + "golang.org/x/sync/semaphore" + "golang.org/x/sync/singleflight" + "golang.org/x/sys/unix" + "k8s.io/utils/lru" +) + +const ( + TarfsStatusInit = 0 + TarfsStatusPrepare = 1 + TarfsStatusReady = 2 + TarfsStatusFailed = 3 +) + +const ( + MaxManifestConfigSize = 0x100000 + TarfsLayerBootstrapName = "layer.boot" + TarfsImageBootstrapName = "image.boot" + TarfsLayerDiskName = "layer.disk" + TarfsImageDiskName = "image.disk" +) + +type Manager struct { + snapshotMap map[string]*snapshotStatus // tarfs snapshots status, indexed by snapshot ID + mutex sync.Mutex + mutexLoopDev sync.Mutex + cacheDirPath string + nydusImagePath string + insecure bool + validateDiffID bool // whether to validate digest for uncompressed content + checkTarfsHint bool // whether to rely on tarfs hint annotation + maxConcurrentProcess int64 + processLimiterCache *lru.Cache // cache image ref and concurrent limiter for blob processes + tarfsHintCache *lru.Cache // cache oci image ref and tarfs hint annotation + diffIDCache *lru.Cache // cache oci blob digest and diffID + sg singleflight.Group +} + +type snapshotStatus struct { + mutex sync.Mutex + status int + blobID string + blobTarFilePath string + erofsMountPoint string + dataLoopdev *losetup.Device + metaLoopdev *losetup.Device + wg *sync.WaitGroup + cancel context.CancelFunc +} + +func NewManager(insecure, checkTarfsHint bool, cacheDirPath, nydusImagePath string, maxConcurrentProcess int64) *Manager { + return &Manager{ + snapshotMap: map[string]*snapshotStatus{}, + cacheDirPath: cacheDirPath, + nydusImagePath: nydusImagePath, + insecure: insecure, + validateDiffID: true, + checkTarfsHint: checkTarfsHint, + maxConcurrentProcess: maxConcurrentProcess, + tarfsHintCache: lru.New(50), + processLimiterCache: lru.New(50), + diffIDCache: lru.New(1000), + sg: singleflight.Group{}, + } +} + +// Fetch image manifest and config contents, cache frequently used information. +// FIXME need an update policy +func (t *Manager) fetchImageInfo(ctx context.Context, remote *remote.Remote, ref string, manifestDigest digest.Digest) error { + manifest, err := t.fetchImageManifest(ctx, remote, ref, manifestDigest) + if err != nil { + return err + } + config, err := t.fetchImageConfig(ctx, remote, ref, &manifest) + if err != nil { + return err + } + + if t.checkTarfsHint { + // cache ref & tarfs hint annotation + t.tarfsHintCache.Add(ref, label.HasTarfsHint(manifest.Annotations)) + } + if t.validateDiffID { + // cache OCI blob digest & diff id + for i := range manifest.Layers { + t.diffIDCache.Add(manifest.Layers[i].Digest, config.RootFS.DiffIDs[i]) + } + } + + return nil +} + +func (t *Manager) fetchImageManifest(ctx context.Context, remote *remote.Remote, ref string, manifestDigest digest.Digest) (ocispec.Manifest, error) { + rc, desc, err := t.getBlobStream(ctx, remote, ref, manifestDigest) + if err != nil { + return ocispec.Manifest{}, err + } + defer rc.Close() + if desc.Size > MaxManifestConfigSize { + return ocispec.Manifest{}, errors.Errorf("image manifest content size %x is too big", desc.Size) + } + bytes, err := io.ReadAll(rc) + if err != nil { + return ocispec.Manifest{}, errors.Wrap(err, "read image manifest content") + } + + var manifestOCI ocispec.Manifest + if err := json.Unmarshal(bytes, &manifestOCI); err != nil { + return ocispec.Manifest{}, errors.Wrap(err, "unmarshal OCI image manifest") + } + if len(manifestOCI.Layers) < 1 { + return ocispec.Manifest{}, errors.Errorf("invalid OCI image manifest without any layer") + } + + return manifestOCI, nil +} + +func (t *Manager) fetchImageConfig(ctx context.Context, remote *remote.Remote, ref string, manifest *ocispec.Manifest) (ocispec.Image, error) { + // fetch image config content and extract diffIDs + rc, desc, err := t.getBlobStream(ctx, remote, ref, manifest.Config.Digest) + if err != nil { + return ocispec.Image{}, errors.Wrap(err, "fetch image config content") + } + defer rc.Close() + if desc.Size > MaxManifestConfigSize { + return ocispec.Image{}, errors.Errorf("image config content size %x is too big", desc.Size) + } + bytes, err := io.ReadAll(rc) + if err != nil { + return ocispec.Image{}, errors.Wrap(err, "read image config content") + } + + var config ocispec.Image + if err := json.Unmarshal(bytes, &config); err != nil { + return ocispec.Image{}, errors.Wrap(err, "unmarshal image config") + } + if len(config.RootFS.DiffIDs) != len(manifest.Layers) { + return ocispec.Image{}, errors.Errorf("number of diffIDs does not match manifest layers") + } + + return config, nil +} + +func (t *Manager) getBlobDiffID(ctx context.Context, remote *remote.Remote, ref string, manifestDigest, layerDigest digest.Digest) (digest.Digest, error) { + if diffid, ok := t.diffIDCache.Get(layerDigest); ok { + return diffid.(digest.Digest), nil + } + + if _, err, _ := t.sg.Do(ref, func() (interface{}, error) { + err := t.fetchImageInfo(ctx, remote, ref, manifestDigest) + return nil, err + }); err != nil { + return "", err + } + + if diffid, ok := t.diffIDCache.Get(layerDigest); ok { + return diffid.(digest.Digest), nil + } + + return "", errors.Errorf("get blob diff id failed") +} + +func (t *Manager) getBlobStream(ctx context.Context, remote *remote.Remote, ref string, contentDigest digest.Digest) (io.ReadCloser, ocispec.Descriptor, error) { + fetcher, err := remote.Fetcher(ctx, ref) + if err != nil { + return nil, ocispec.Descriptor{}, errors.Wrap(err, "get remote fetcher") + } + + fetcherByDigest, ok := fetcher.(remotes.FetcherByDigest) + if !ok { + return nil, ocispec.Descriptor{}, errors.Errorf("fetcher %T does not implement remotes.FetcherByDigest", fetcher) + } + + return fetcherByDigest.FetchByDigest(ctx, contentDigest) +} + +// generate tar file and layer bootstrap, return if this blob is an empty blob +func (t *Manager) generateBootstrap(tarReader io.Reader, snapshotID, layerBlobID, upperDirPath string) (err error) { + snapshotImageDir := filepath.Join(upperDirPath, "image") + if err := os.MkdirAll(snapshotImageDir, 0750); err != nil { + return errors.Wrapf(err, "create data dir %s for tarfs snapshot", snapshotImageDir) + } + layerMetaFile := t.layerMetaFilePath(upperDirPath) + if _, err := os.Stat(layerMetaFile); err == nil { + return errdefs.ErrAlreadyExists + } + layerMetaFileTmp := layerMetaFile + ".tarfs.tmp" + defer os.Remove(layerMetaFileTmp) + + layerTarFile := t.layerTarFilePath(layerBlobID) + layerTarFileTmp := layerTarFile + ".tarfs.tmp" + tarFile, err := os.Create(layerTarFileTmp) + if err != nil { + return errors.Wrap(err, "create temporary file to store tar stream") + } + defer tarFile.Close() + defer os.Remove(layerTarFileTmp) + + fifoName := filepath.Join(upperDirPath, "layer_"+snapshotID+"_"+"tar.fifo") + if err = syscall.Mkfifo(fifoName, 0644); err != nil { + return err + } + defer os.Remove(fifoName) + + go func() { + fifoFile, err := os.OpenFile(fifoName, os.O_WRONLY, os.ModeNamedPipe) + if err != nil { + log.L.Warnf("can not open fifo file, err %v", err) + return + } + defer fifoFile.Close() + if _, err := io.Copy(fifoFile, io.TeeReader(tarReader, tarFile)); err != nil { + log.L.Warnf("tar stream copy err %v", err) + } + }() + + options := []string{ + "create", + "--type", "tar-tarfs", + "--bootstrap", layerMetaFileTmp, + "--blob-id", layerBlobID, + "--blob-dir", t.cacheDirPath, + fifoName, + } + cmd := exec.Command(t.nydusImagePath, options...) + var errb, outb bytes.Buffer + cmd.Stderr = &errb + cmd.Stdout = &outb + log.L.Debugf("nydus image command %v", options) + err = cmd.Run() + if err != nil { + log.L.Warnf("nydus image exec failed, %s", errb.String()) + return errors.Wrap(err, "converting OCIv1 layer blob to tarfs") + } + log.L.Debugf("nydus image output %s", outb.String()) + log.L.Debugf("nydus image err %s", errb.String()) + + if err := os.Rename(layerTarFileTmp, layerTarFile); err != nil { + return errors.Wrapf(err, "rename file %s to %s", layerTarFileTmp, layerTarFile) + } + if err := os.Rename(layerMetaFileTmp, layerMetaFile); err != nil { + return errors.Wrapf(err, "rename file %s to %s", layerMetaFileTmp, layerMetaFile) + } + + return nil +} + +func (t *Manager) getImageBlobInfo(metaFilePath string) (string, error) { + if _, err := os.Stat(metaFilePath); err != nil { + return "", err + } + + options := []string{ + "inspect", + "-R blobs", + metaFilePath, + } + cmd := exec.Command(t.nydusImagePath, options...) + var errb, outb bytes.Buffer + cmd.Stderr = &errb + cmd.Stdout = &outb + log.L.Debugf("nydus image command %v", options) + err := cmd.Run() + if err != nil { + log.L.Warnf("nydus image exec failed, %s", errb.String()) + return "", errors.Wrap(err, "converting OCIv1 layer blob to tarfs") + } + + return outb.String(), nil +} + +// download & uncompress an oci/docker blob, and then generate the tarfs bootstrap +func (t *Manager) blobProcess(ctx context.Context, snapshotID, ref string, manifestDigest, layerDigest digest.Digest, + layerBlobID, upperDirPath string) error { + keyChain, err := auth.GetKeyChainByRef(ref, nil) + if err != nil { + return err + } + remote := remote.New(keyChain, t.insecure) + + handle := func() error { + rc, _, err := t.getBlobStream(ctx, remote, ref, layerDigest) + if err != nil { + return err + } + defer rc.Close() + ds, err := compression.DecompressStream(rc) + if err != nil { + return errors.Wrap(err, "unpack layer blob stream for tarfs") + } + defer ds.Close() + + if t.validateDiffID { + diffID, err := t.getBlobDiffID(ctx, remote, ref, manifestDigest, layerDigest) + if err != nil { + return errors.Wrap(err, "get image layer diffID") + } + digester := digest.Canonical.Digester() + dr := io.TeeReader(ds, digester.Hash()) + err = t.generateBootstrap(dr, snapshotID, layerBlobID, upperDirPath) + if err != nil && !errdefs.IsAlreadyExists(err) { + return errors.Wrap(err, "generate tarfs data from image layer blob") + } + if err == nil { + if digester.Digest() != diffID { + return errors.Errorf("image layer diffID %s for tarfs does not match", diffID) + } + log.L.Infof("tarfs data for layer %s is ready, digest %s", snapshotID, digester.Digest()) + } + } else { + err = t.generateBootstrap(ds, snapshotID, layerBlobID, upperDirPath) + if err != nil && !errdefs.IsAlreadyExists(err) { + return errors.Wrap(err, "generate tarfs data from image layer blob") + } + log.L.Infof("tarfs data for layer %s is ready", snapshotID) + } + return nil + } + + err = handle() + if err != nil && remote.RetryWithPlainHTTP(ref, err) { + err = handle() + } + + return err +} + +func (t *Manager) PrepareLayer(snapshotID, ref string, manifestDigest, layerDigest digest.Digest, + upperDirPath string) error { + t.mutex.Lock() + if _, ok := t.snapshotMap[snapshotID]; ok { + t.mutex.Unlock() + return errors.Errorf("snapshot %s has already been prapared", snapshotID) + } + wg := &sync.WaitGroup{} + wg.Add(1) + ctx, cancel := context.WithCancel(context.Background()) + + t.snapshotMap[snapshotID] = &snapshotStatus{ + status: TarfsStatusPrepare, + wg: wg, + cancel: cancel, + } + t.mutex.Unlock() + + go func() { + defer wg.Done() + + layerBlobID := layerDigest.Hex() + err := t.blobProcess(ctx, snapshotID, ref, manifestDigest, layerDigest, layerBlobID, upperDirPath) + + st, err1 := t.getSnapshotStatus(snapshotID, true) + if err1 != nil { + // return errors.Errorf("can not found status object for snapshot %s after prepare", snapshotID) + err1 = errors.Wrapf(err1, "can not found status object for snapshot %s after prepare", snapshotID) + log.L.WithError(err1).Errorf("async prepare tarfs layer of snapshot ID %s", snapshotID) + return + } + defer st.mutex.Unlock() + + st.blobID = layerBlobID + st.blobTarFilePath = t.layerTarFilePath(layerBlobID) + if err != nil { + log.L.WithError(err).Errorf("failed to convert OCI image to tarfs") + st.status = TarfsStatusFailed + } else { + st.status = TarfsStatusReady + } + log.L.Debugf("finish converting snapshot %s to tarfs, status %d", snapshotID, st.status) + }() + + return nil +} + +func (t *Manager) MergeLayers(s storage.Snapshot, storageLocater func(string) string) error { + mergedBootstrap := t.imageMetaFilePath(storageLocater(s.ParentIDs[0])) + if _, err := os.Stat(mergedBootstrap); err == nil { + log.L.Debugf("tarfs snapshot %s already has merged bootstrap %s", s.ParentIDs[0], mergedBootstrap) + return nil + } + + bootstraps := []string{} + // When merging bootstrap, we need to arrange layer bootstrap in order from low to high + for idx := len(s.ParentIDs) - 1; idx >= 0; idx-- { + snapshotID := s.ParentIDs[idx] + err := t.waitLayerReady(snapshotID) + if err != nil { + return errors.Wrapf(err, "wait for tarfs snapshot %s to get ready", snapshotID) + } + + st, err := t.getSnapshotStatus(snapshotID, false) + if err != nil { + return err + } + if st.status != TarfsStatusReady { + return errors.Errorf("tarfs snapshot %s is not ready, %d", snapshotID, st.status) + } + + metaFilePath := t.layerMetaFilePath(storageLocater(snapshotID)) + bootstraps = append(bootstraps, metaFilePath) + } + + mergedBootstrapTmp := mergedBootstrap + ".tarfs.tmp" + defer os.Remove(mergedBootstrapTmp) + + options := []string{ + "merge", + "--bootstrap", mergedBootstrapTmp, + } + options = append(options, bootstraps...) + cmd := exec.Command(t.nydusImagePath, options...) + var errb, outb bytes.Buffer + cmd.Stderr = &errb + cmd.Stdout = &outb + log.L.Debugf("nydus image command %v", options) + err := cmd.Run() + if err != nil { + return errors.Wrap(err, "merge tarfs image layers") + } + + err = os.Rename(mergedBootstrapTmp, mergedBootstrap) + if err != nil { + return errors.Wrap(err, "rename merged bootstrap file") + } + + return nil +} + +func (t *Manager) ExportBlockData(s storage.Snapshot, perLayer bool, labels map[string]string, storageLocater func(string) string) ([]string, error) { + updateFields := []string{} + + wholeImage, exportDisk, withVerity := config.GetTarfsExportFlags() + // Nothing to do for this case, all needed datum are ready. + if !exportDisk && !withVerity { + return updateFields, nil + } else if !wholeImage != perLayer { + return updateFields, nil + } + + var snapshotID string + if perLayer { + snapshotID = s.ID + } else { + if len(s.ParentIDs) == 0 { + return updateFields, errors.Errorf("snapshot %s has no parent", s.ID) + } + snapshotID = s.ParentIDs[0] + } + err := t.waitLayerReady(snapshotID) + if err != nil { + return updateFields, errors.Wrapf(err, "wait for tarfs snapshot %s to get ready", snapshotID) + } + st, err := t.getSnapshotStatus(snapshotID, false) + if err != nil { + return updateFields, err + } + if st.status != TarfsStatusReady { + return updateFields, errors.Errorf("tarfs snapshot %s is not ready, %d", snapshotID, st.status) + } + + var metaFileName, diskFileName string + if wholeImage { + metaFileName = t.imageMetaFilePath(storageLocater(snapshotID)) + diskFileName = t.imageDiskFilePath(st.blobID) + } else { + metaFileName = t.layerMetaFilePath(storageLocater(snapshotID)) + diskFileName = t.layerDiskFilePath(st.blobID) + } + + // Do not regenerate if the disk image already exists. + if _, err := os.Stat(diskFileName); err == nil { + return updateFields, nil + } + diskFileNameTmp := diskFileName + ".tarfs.tmp" + defer os.Remove(diskFileNameTmp) + + options := []string{ + "export", + "--block", + "--localfs-dir", t.cacheDirPath, + "--bootstrap", metaFileName, + "--output", diskFileNameTmp, + } + if withVerity { + options = append(options, "--verity") + } + log.L.Warnf("nydus image command %v", options) + cmd := exec.Command(t.nydusImagePath, options...) + var errb, outb bytes.Buffer + cmd.Stderr = &errb + cmd.Stdout = &outb + err = cmd.Run() + if err != nil { + return updateFields, errors.Wrap(err, "merge tarfs image layers") + } + log.L.Debugf("nydus image export command, stdout: %s, stderr: %s", &outb, &errb) + + if withVerity { + pattern := "dm-verity options: --no-superblock --format=1 -s \"\" --hash=sha256 --data-block-size=512 --hash-block-size=4096 --data-blocks %d --hash-offset %d %s\n" + var dataBlobks, hashOffset uint64 + var rootHash string + if count, err := fmt.Sscanf(outb.String(), pattern, &dataBlobks, &hashOffset, &rootHash); err != nil || count != 3 { + return updateFields, errors.Errorf("failed to parse dm-verity options from nydus image output: %s", outb.String()) + } + + blockInfo := strconv.FormatUint(dataBlobks, 10) + "," + strconv.FormatUint(hashOffset, 10) + "," + "sha256:" + rootHash + if wholeImage { + labels[label.NydusImageBlockInfo] = blockInfo + updateFields = append(updateFields, "labels."+label.NydusImageBlockInfo) + } else { + labels[label.NydusLayerBlockInfo] = blockInfo + updateFields = append(updateFields, "labels."+label.NydusLayerBlockInfo) + } + log.L.Warnf("export block labels %v", labels) + } + + err = os.Rename(diskFileNameTmp, diskFileName) + if err != nil { + return updateFields, errors.Wrap(err, "rename disk image file") + } + + return updateFields, nil +} + +func (t *Manager) MountTarErofs(snapshotID string, s *storage.Snapshot, rafs *daemon.Rafs) error { + if s == nil { + return errors.New("snapshot object for MountTarErofs() is nil") + } + + upperDirPath := path.Join(rafs.GetSnapshotDir(), "fs") + mergedBootstrap := t.imageMetaFilePath(upperDirPath) + blobInfo, err := t.getImageBlobInfo(mergedBootstrap) + if err != nil { + return errors.Wrapf(err, "get image blob info") + } + + var devices []string + // When merging bootstrap, we need to arrange layer bootstrap in order from low to high + for idx := len(s.ParentIDs) - 1; idx >= 0; idx-- { + snapshotID := s.ParentIDs[idx] + err := t.waitLayerReady(snapshotID) + if err != nil { + return errors.Wrapf(err, "wait for tarfs conversion task") + } + + st, err := t.getSnapshotStatus(snapshotID, true) + if err != nil { + return err + } + if st.status != TarfsStatusReady { + st.mutex.Unlock() + return errors.Errorf("snapshot %s tarfs format error %d", snapshotID, st.status) + } + + var blobMarker = "\"blob_id\":\"" + st.blobID + "\"" + if strings.Contains(blobInfo, blobMarker) { + if st.dataLoopdev == nil { + loopdev, err := t.attachLoopdev(st.blobTarFilePath) + if err != nil { + st.mutex.Unlock() + return errors.Wrapf(err, "attach layer tar file %s to loopdev", st.blobTarFilePath) + } + st.dataLoopdev = loopdev + } + devices = append(devices, "device="+st.dataLoopdev.Path()) + } + + st.mutex.Unlock() + } + mountOpts := strings.Join(devices, ",") + + st, err := t.getSnapshotStatus(snapshotID, true) + if err != nil { + return err + } + defer st.mutex.Unlock() + + mountPoint := path.Join(rafs.GetSnapshotDir(), "mnt") + if len(st.erofsMountPoint) > 0 { + if st.erofsMountPoint == mountPoint { + log.L.Debugf("tarfs for snapshot %s has already been mounted at %s", snapshotID, mountPoint) + return nil + } + return errors.Errorf("tarfs for snapshot %s has already been mounted at %s", snapshotID, st.erofsMountPoint) + } + + if st.metaLoopdev == nil { + loopdev, err := t.attachLoopdev(mergedBootstrap) + if err != nil { + return errors.Wrapf(err, "attach merged bootstrap %s to loopdev", mergedBootstrap) + } + st.metaLoopdev = loopdev + } + devName := st.metaLoopdev.Path() + + if err = os.MkdirAll(mountPoint, 0750); err != nil { + return errors.Wrapf(err, "create tarfs mount dir %s", mountPoint) + } + + err = unix.Mount(devName, mountPoint, "erofs", 0, mountOpts) + if err != nil { + return errors.Wrapf(err, "mount erofs at %s with opts %s", mountPoint, mountOpts) + } + st.erofsMountPoint = mountPoint + rafs.SetMountpoint(mountPoint) + return nil +} + +func (t *Manager) UmountTarErofs(snapshotID string) error { + st, err := t.getSnapshotStatus(snapshotID, true) + if err != nil { + return errors.Wrapf(err, "umount a tarfs snapshot %s which is already removed", snapshotID) + } + defer st.mutex.Unlock() + + if len(st.erofsMountPoint) > 0 { + err := unix.Unmount(st.erofsMountPoint, 0) + if err != nil { + return errors.Wrapf(err, "umount erofs tarfs %s", st.erofsMountPoint) + } + } + st.erofsMountPoint = "" + return nil +} + +func (t *Manager) DetachLayer(snapshotID string) error { + st, err := t.getSnapshotStatus(snapshotID, true) + if err != nil { + return os.ErrNotExist + } + + if len(st.erofsMountPoint) > 0 { + err := unix.Unmount(st.erofsMountPoint, 0) + if err != nil { + st.mutex.Unlock() + return errors.Wrapf(err, "umount erofs tarfs %s", st.erofsMountPoint) + } + } + + if st.metaLoopdev != nil { + err := st.metaLoopdev.Detach() + if err != nil { + st.mutex.Unlock() + return errors.Wrapf(err, "detach merged bootstrap loopdev for tarfs snapshot %s", snapshotID) + } + st.metaLoopdev = nil + } + + if st.dataLoopdev != nil { + err := st.dataLoopdev.Detach() + if err != nil { + st.mutex.Unlock() + return errors.Wrapf(err, "detach layer bootstrap loopdev for tarfs snapshot %s", snapshotID) + } + st.dataLoopdev = nil + } + + st.mutex.Unlock() + // TODO: check order + st.cancel() + + t.mutex.Lock() + delete(t.snapshotMap, snapshotID) + t.mutex.Unlock() + return nil +} + +func (t *Manager) getSnapshotStatus(snapshotID string, lock bool) (*snapshotStatus, error) { + t.mutex.Lock() + defer t.mutex.Unlock() + st, ok := t.snapshotMap[snapshotID] + if ok { + if lock { + st.mutex.Lock() + } + return st, nil + } + return nil, errors.Errorf("not found snapshot %s", snapshotID) +} + +func (t *Manager) waitLayerReady(snapshotID string) error { + st, err := t.getSnapshotStatus(snapshotID, false) + if err != nil { + return err + } + if st.status != TarfsStatusReady { + log.L.Debugf("wait tarfs conversion task for snapshot %s", snapshotID) + } + st.wg.Wait() + if st.status != TarfsStatusReady { + return errors.Errorf("snapshot %s is in state %d instead of ready state", snapshotID, st.status) + } + return nil +} + +func (t *Manager) attachLoopdev(blob string) (*losetup.Device, error) { + // losetup.Attach() is not thread-safe hold lock here + t.mutexLoopDev.Lock() + defer t.mutexLoopDev.Unlock() + dev, err := losetup.Attach(blob, 0, false) + return &dev, err +} + +func (t *Manager) CheckTarfsHintAnnotation(ctx context.Context, ref string, manifestDigest digest.Digest) (bool, error) { + if !t.checkTarfsHint { + return true, nil + } + + keyChain, err := auth.GetKeyChainByRef(ref, nil) + if err != nil { + return false, err + } + remote := remote.New(keyChain, t.insecure) + + handle := func() (bool, error) { + if tarfsHint, ok := t.tarfsHintCache.Get(ref); ok { + return tarfsHint.(bool), nil + } + + if _, err, _ := t.sg.Do(ref, func() (interface{}, error) { + err := t.fetchImageInfo(ctx, remote, ref, manifestDigest) + return nil, err + }); err != nil { + return false, err + } + + if tarfsHint, ok := t.tarfsHintCache.Get(ref); ok { + return tarfsHint.(bool), nil + } + + return false, errors.Errorf("get tarfs hint annotation failed") + } + + tarfsHint, err := handle() + if err != nil && remote.RetryWithPlainHTTP(ref, err) { + tarfsHint, err = handle() + } + return tarfsHint, err +} + +func (t *Manager) GetConcurrentLimiter(ref string) *semaphore.Weighted { + if t.maxConcurrentProcess <= 0 { + return nil + } + + if limiter, ok := t.processLimiterCache.Get(ref); ok { + return limiter.(*semaphore.Weighted) + } + + limiter := semaphore.NewWeighted(t.maxConcurrentProcess) + t.processLimiterCache.Add(ref, limiter) + return limiter +} + +func (t *Manager) layerTarFilePath(blobID string) string { + return filepath.Join(t.cacheDirPath, blobID) +} + +func (t *Manager) layerDiskFilePath(blobID string) string { + return filepath.Join(t.cacheDirPath, blobID+"."+TarfsLayerDiskName) +} + +func (t *Manager) imageDiskFilePath(blobID string) string { + return filepath.Join(t.cacheDirPath, blobID+"."+TarfsImageDiskName) +} + +func (t *Manager) layerMetaFilePath(upperDirPath string) string { + return filepath.Join(upperDirPath, "image", TarfsLayerBootstrapName) +} + +func (t *Manager) imageMetaFilePath(upperDirPath string) string { + return filepath.Join(upperDirPath, "image", TarfsImageBootstrapName) +} diff --git a/snapshot/mount_option.go b/snapshot/mount_option.go new file mode 100644 index 0000000000..59784fe696 --- /dev/null +++ b/snapshot/mount_option.go @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2023. Nydus Developers. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package snapshot + +import ( + "context" + "encoding/base64" + "encoding/json" + "fmt" + "os" + + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/snapshots/storage" + "github.com/containerd/nydus-snapshotter/config/daemonconfig" + "github.com/containerd/nydus-snapshotter/pkg/daemon" + "github.com/containerd/nydus-snapshotter/pkg/layout" + "github.com/pkg/errors" +) + +type ExtraOption struct { + Source string `json:"source"` + Config string `json:"config"` + Snapshotdir string `json:"snapshotdir"` + Version string `json:"fs_version"` +} + +func (o *snapshotter) remoteMountWithExtraOptions(ctx context.Context, s storage.Snapshot, id string, overlayOptions []string) ([]mount.Mount, error) { + source, err := o.fs.BootstrapFile(id) + if err != nil { + return nil, err + } + + instance := daemon.RafsSet.Get(id) + daemon, err := o.fs.GetDaemonByID(instance.DaemonID) + if err != nil { + return nil, errors.Wrapf(err, "get daemon with ID %s", instance.DaemonID) + } + + var c daemonconfig.DaemonConfig + if daemon.IsSharedDaemon() { + c, err = daemonconfig.NewDaemonConfig(daemon.States.FsDriver, daemon.ConfigFile(instance.SnapshotID)) + if err != nil { + return nil, errors.Wrapf(err, "Failed to load instance configuration %s", + daemon.ConfigFile(instance.SnapshotID)) + } + } else { + c = daemon.Config + } + configContent, err := c.DumpString() + if err != nil { + return nil, errors.Wrapf(err, "remoteMounts: failed to marshal config") + } + + // get version from bootstrap + f, err := os.Open(source) + if err != nil { + return nil, errors.Wrapf(err, "remoteMounts: check bootstrap version: failed to open bootstrap") + } + defer f.Close() + header := make([]byte, 4096) + sz, err := f.Read(header) + if err != nil { + return nil, errors.Wrapf(err, "remoteMounts: check bootstrap version: failed to read bootstrap") + } + version, err := layout.DetectFsVersion(header[0:sz]) + if err != nil { + return nil, errors.Wrapf(err, "remoteMounts: failed to detect filesystem version") + } + + // when enable nydus-overlayfs, return unified mount slice for runc and kata + extraOption := &ExtraOption{ + Source: source, + Config: configContent, + Snapshotdir: o.snapshotDir(s.ID), + Version: version, + } + no, err := json.Marshal(extraOption) + if err != nil { + return nil, errors.Wrapf(err, "remoteMounts: failed to marshal NydusOption") + } + // XXX: Log options without extraoptions as it might contain secrets. + log.G(ctx).Debugf("fuse.nydus-overlayfs mount options %v", overlayOptions) + // base64 to filter easily in `nydus-overlayfs` + opt := fmt.Sprintf("extraoption=%s", base64.StdEncoding.EncodeToString(no)) + overlayOptions = append(overlayOptions, opt) + + return []mount.Mount{ + { + Type: "fuse.nydus-overlayfs", + Source: "overlay", + Options: overlayOptions, + }, + }, nil +} diff --git a/snapshot/process.go b/snapshot/process.go index 7d36941c77..93e4387736 100644 --- a/snapshot/process.go +++ b/snapshot/process.go @@ -13,10 +13,10 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" - "github.com/containerd/containerd/log" "github.com/containerd/containerd/mount" snpkg "github.com/containerd/containerd/pkg/snapshotters" "github.com/containerd/containerd/snapshots/storage" + "github.com/containerd/nydus-snapshotter/config" "github.com/containerd/nydus-snapshotter/pkg/label" "github.com/containerd/nydus-snapshotter/pkg/snapshot" ) @@ -24,9 +24,8 @@ import ( // `storageLocater` provides a local storage for each handler to save their intermediates. // Different actions for different layer types func chooseProcessor(ctx context.Context, logger *logrus.Entry, - sn *snapshotter, s storage.Snapshot, - key, parent string, labels map[string]string, storageLocater func() string) (_ func() (bool, []mount.Mount, error), target string, err error) { - + sn *snapshotter, s storage.Snapshot, key, parent string, labels map[string]string, + storageLocater func() string) (_ func() (bool, []mount.Mount, error), target string, err error) { var handler func() (bool, []mount.Mount, error) // Handler to prepare a directory for containerd to download and unpacking layer. @@ -42,8 +41,8 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, remoteHandler := func(id string, labels map[string]string) func() (bool, []mount.Mount, error) { return func() (bool, []mount.Mount, error) { - logger.Debugf("Found nydus meta layer id %s", id) - if err := sn.prepareRemoteSnapshot(id, labels); err != nil { + logger.Debugf("Prepare remote snapshot %s", id) + if err := sn.fs.Mount(id, labels, &s); err != nil { return false, nil, err } @@ -52,15 +51,16 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, return false, nil, err } - log.L.Infof("Nydus remote snapshot %s is ready", id) - mounts, err := sn.remoteMounts(ctx, s, id) + logger.Infof("Nydus remote snapshot %s is ready", id) + mounts, err := sn.remoteMounts(ctx, labels, s, id) return false, mounts, err } } - target, remote := labels[label.TargetSnapshotRef] + // OCI image is also marked with "containerd.io/snapshot.ref" by Containerd + target, isRoLayer := labels[label.TargetSnapshotRef] - if remote { + if isRoLayer { // Containerd won't consume mount slice for below snapshots switch { case label.IsNydusMetaLayer(labels): @@ -72,35 +72,54 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, case sn.fs.CheckReferrer(ctx, labels): logger.Debugf("found referenced nydus manifest") handler = skipHandler - case sn.fs.StargzEnabled(): - // Check if the blob is format of estargz - if ok, blob := sn.fs.IsStargzDataLayer(labels); ok { - err := sn.fs.PrepareStargzMetaLayer(blob, storageLocater(), labels) + default: + if sn.fs.StargzEnabled() { + // Check if the blob is format of estargz + if ok, blob := sn.fs.IsStargzDataLayer(labels); ok { + err := sn.fs.PrepareStargzMetaLayer(blob, storageLocater(), labels) + if err != nil { + logger.Errorf("prepare stargz layer of snapshot ID %s, err: %v", s.ID, err) + } else { + logger.Debugf("found estargz data layer") + // Mark this snapshot as stargz layer since estargz image format does not + // has special annotation or media type. + labels[label.StargzLayer] = "true" + handler = skipHandler + } + } + } + + if handler == nil && sn.fs.TarfsEnabled() { + err := sn.fs.PrepareTarfsLayer(ctx, labels, s.ID, sn.upperPath(s.ID)) if err != nil { - logger.Errorf("prepare stargz layer of snapshot ID %s, err: %v", s.ID, err) + logger.Warnf("snapshot ID %s can't be converted into tarfs, fallback to containerd, err: %v", s.ID, err) } else { - // Mark this snapshot as stargz layer since estargz image format does not - // has special annotation or media type. - labels[label.StargzLayer] = "true" + logger.Debugf("convert OCIv1 layer to tarfs") + if config.GetTarfsExportEnabled() { + _, err = sn.fs.ExportBlockData(s, true, labels, func(id string) string { return sn.upperPath(id) }) + if err != nil { + return nil, "", errors.Wrap(err, "export layer as tarfs block device") + } + } + labels[label.NydusTarfsLayer] = "true" + handler = skipHandler } } - default: - // OCI image is also marked with "containerd.io/snapshot.ref" by Containerd - handler = defaultHandler } } else { - // Container writable layer comes into this branch. It can't be committed within this Prepare + // Container writable layer comes into this branch. + // It should not be committed during this Prepare() operation. // Hope to find bootstrap layer and prepares to start nydusd // TODO: Trying find nydus meta layer will slow down setting up rootfs to OCI images if id, info, err := sn.findMetaLayer(ctx, key); err == nil { - logger.Infof("Prepares active snapshot %s, nydusd should start afterwards", key) + logger.Infof("Prepare active Nydus snapshot %s", key) handler = remoteHandler(id, info.Labels) } if handler == nil && sn.fs.ReferrerDetectEnabled() { if id, info, err := sn.findReferrerLayer(ctx, key); err == nil { - logger.Infof("found referenced nydus manifest for image: %s", info.Labels[snpkg.TargetRefLabel]) + logger.Infof("Found referenced nydus manifest for image: %s", info.Labels[snpkg.TargetRefLabel]) metaPath := path.Join(sn.snapshotDir(id), "fs", "image.boot") if err := sn.fs.TryFetchMetadata(ctx, info.Labels, metaPath); err != nil { return nil, "", errors.Wrap(err, "try fetch metadata") @@ -111,7 +130,7 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, if handler == nil && sn.fs.StargzEnabled() { // `pInfo` must be the uppermost parent layer - _, pInfo, _, err := snapshot.GetSnapshotInfo(ctx, sn.ms, parent) + id, pInfo, _, err := snapshot.GetSnapshotInfo(ctx, sn.ms, parent) if err != nil { return nil, "", errors.Wrap(err, "get parent snapshot info") } @@ -120,6 +139,34 @@ func chooseProcessor(ctx context.Context, logger *logrus.Entry, if err := sn.fs.MergeStargzMetaLayer(ctx, s); err != nil { return nil, "", errors.Wrap(err, "merge stargz meta layers") } + handler = remoteHandler(id, pInfo.Labels) + logger.Infof("Generated estargz merged meta for %s", key) + } + } + + if handler == nil && sn.fs.TarfsEnabled() { + // Merge and mount tarfs on the uppermost parent layer. + id, pInfo, _, err := snapshot.GetSnapshotInfo(ctx, sn.ms, parent) + switch { + case err != nil: + logger.Warnf("Tarfs enabled but can't get parent of snapshot %s", s.ID) + case !label.IsTarfsDataLayer(pInfo.Labels): + logger.Debugf("Tarfs enabled but Parent (%s) of snapshot %s is not a tarfs layer", id, s.ID) + default: + // TODO may need to check all parrent layers, in case share layers with other images + // which have already been prepared by overlay snapshotter + + err := sn.fs.MergeTarfsLayers(s, func(id string) string { return sn.upperPath(id) }) + if err != nil { + return nil, "", errors.Wrap(err, "merge tarfs layers") + } + if config.GetTarfsExportEnabled() { + _, err = sn.fs.ExportBlockData(s, false, labels, func(id string) string { return sn.upperPath(id) }) + if err != nil { + return nil, "", errors.Wrap(err, "export image as tarfs block device") + } + } + handler = remoteHandler(id, pInfo.Labels) } } } diff --git a/snapshot/snapshot.go b/snapshot/snapshot.go index 007ec95755..ee384f2b65 100644 --- a/snapshot/snapshot.go +++ b/snapshot/snapshot.go @@ -9,8 +9,6 @@ package snapshot import ( "context" - "encoding/base64" - "encoding/json" "fmt" "os" "path/filepath" @@ -31,15 +29,14 @@ import ( "github.com/containerd/nydus-snapshotter/pkg/cache" "github.com/containerd/nydus-snapshotter/pkg/cgroup" v2 "github.com/containerd/nydus-snapshotter/pkg/cgroup/v2" - "github.com/containerd/nydus-snapshotter/pkg/daemon" "github.com/containerd/nydus-snapshotter/pkg/errdefs" - "github.com/containerd/nydus-snapshotter/pkg/layout" mgr "github.com/containerd/nydus-snapshotter/pkg/manager" "github.com/containerd/nydus-snapshotter/pkg/metrics" "github.com/containerd/nydus-snapshotter/pkg/metrics/collector" "github.com/containerd/nydus-snapshotter/pkg/pprof" "github.com/containerd/nydus-snapshotter/pkg/referrer" "github.com/containerd/nydus-snapshotter/pkg/system" + "github.com/containerd/nydus-snapshotter/pkg/tarfs" "github.com/containerd/nydus-snapshotter/pkg/store" @@ -52,12 +49,11 @@ import ( var _ snapshots.Snapshotter = &snapshotter{} type snapshotter struct { - root string - nydusdPath string - // Storing snapshots' state, parentage and other metadata - ms *storage.MetaStore + root string + nydusdPath string + ms *storage.MetaStore // Storing snapshots' state, parentage and other metadata fs *filesystem.Filesystem - manager *mgr.Manager + cgroupManager *cgroup.Manager enableNydusOverlayFS bool syncRemove bool cleanupOnClose bool @@ -101,23 +97,64 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho } } - manager, err := mgr.NewManager(mgr.Opt{ - NydusdBinaryPath: cfg.DaemonConfig.NydusdPath, - Database: db, - CacheDir: cfg.CacheManagerConfig.CacheDir, - RootDir: cfg.Root, - RecoverPolicy: rp, - FsDriver: config.GetFsDriver(), - DaemonConfig: daemonConfig, - CgroupMgr: cgroupMgr, - }) - if err != nil { - return nil, errors.Wrap(err, "create daemons manager") + var blockdevManager *mgr.Manager + if cfg.Experimental.TarfsConfig.EnableTarfs { + blockdevManager, err = mgr.NewManager(mgr.Opt{ + NydusdBinaryPath: "", + Database: db, + CacheDir: cfg.CacheManagerConfig.CacheDir, + RootDir: cfg.Root, + RecoverPolicy: rp, + FsDriver: config.FsDriverBlockdev, + DaemonConfig: daemonConfig, + CgroupMgr: cgroupMgr, + }) + if err != nil { + return nil, errors.Wrap(err, "create blockdevice manager") + } + } + + var fscacheManager *mgr.Manager + if config.GetFsDriver() == config.FsDriverFscache { + mgr, err := mgr.NewManager(mgr.Opt{ + NydusdBinaryPath: cfg.DaemonConfig.NydusdPath, + Database: db, + CacheDir: cfg.CacheManagerConfig.CacheDir, + RootDir: cfg.Root, + RecoverPolicy: rp, + FsDriver: config.FsDriverFscache, + DaemonConfig: daemonConfig, + CgroupMgr: cgroupMgr, + }) + if err != nil { + return nil, errors.Wrap(err, "create fscache manager") + } + fscacheManager = mgr + } + + var fusedevManager *mgr.Manager + if config.GetFsDriver() == config.FsDriverFusedev { + mgr, err := mgr.NewManager(mgr.Opt{ + NydusdBinaryPath: cfg.DaemonConfig.NydusdPath, + Database: db, + CacheDir: cfg.CacheManagerConfig.CacheDir, + RootDir: cfg.Root, + RecoverPolicy: rp, + FsDriver: config.FsDriverFusedev, + DaemonConfig: daemonConfig, + CgroupMgr: cgroupMgr, + }) + if err != nil { + return nil, errors.Wrap(err, "create fusedev manager") + } + fusedevManager = mgr } metricServer, err := metrics.NewServer( ctx, - metrics.WithProcessManager(manager), + metrics.WithProcessManager(blockdevManager), + metrics.WithProcessManager(fscacheManager), + metrics.WithProcessManager(fusedevManager), ) if err != nil { return nil, errors.Wrap(err, "create metrics server") @@ -138,7 +175,9 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho } opts := []filesystem.NewFSOpt{ - filesystem.WithManager(manager), + filesystem.WithManager(blockdevManager), + filesystem.WithManager(fscacheManager), + filesystem.WithManager(fusedevManager), filesystem.WithNydusImageBinaryPath(cfg.DaemonConfig.NydusdPath), filesystem.WithVerifier(verifier), filesystem.WithRootMountpoint(config.GetRootMountpoint()), @@ -165,13 +204,31 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho opts = append(opts, filesystem.WithReferrerManager(referrerMgr)) } + if cfg.Experimental.TarfsConfig.EnableTarfs { + // FIXME: get the insecure option from nydusd config. + _, backendConfig := daemonConfig.StorageBackend() + tarfsMgr := tarfs.NewManager(backendConfig.SkipVerify, cfg.Experimental.TarfsConfig.TarfsHint, + cacheConfig.CacheDir, cfg.DaemonConfig.NydusImagePath, + int64(cfg.Experimental.TarfsConfig.MaxConcurrentProc)) + opts = append(opts, filesystem.WithTarfsManager(tarfsMgr)) + } + nydusFs, err := filesystem.NewFileSystem(ctx, opts...) if err != nil { return nil, errors.Wrap(err, "initialize filesystem thin layer") } if config.IsSystemControllerEnabled() { - managers := []*mgr.Manager{manager} + managers := []*mgr.Manager{} + if blockdevManager != nil { + managers = append(managers, blockdevManager) + } + if fscacheManager != nil { + managers = append(managers, fscacheManager) + } + if fusedevManager != nil { + managers = append(managers, fusedevManager) + } systemController, err := system.NewSystemController(nydusFs, managers, config.SystemControllerAddress()) if err != nil { return nil, errors.Wrap(err, "create system controller") @@ -214,7 +271,7 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho syncRemove := cfg.SnapshotsConfig.SyncRemove if config.GetFsDriver() == config.FsDriverFscache { - log.L.Infof("for fscache mode enable syncRemove") + log.L.Infof("enable syncRemove for fscache mode") syncRemove = true } @@ -224,7 +281,7 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho ms: ms, syncRemove: syncRemove, fs: nydusFs, - manager: manager, + cgroupManager: cgroupMgr, enableNydusOverlayFS: cfg.SnapshotsConfig.EnableNydusOverlayFS, cleanupOnClose: cfg.CleanupOnClose, }, nil @@ -266,24 +323,28 @@ func (o *snapshotter) Usage(ctx context.Context, key string) (snapshots.Usage, e return snapshots.Usage{}, err } - if info.Kind == snapshots.KindActive { + switch info.Kind { + case snapshots.KindActive: upperPath := o.upperPath(id) du, err := fs.DiskUsage(ctx, upperPath) if err != nil { return snapshots.Usage{}, err } usage = snapshots.Usage(du) - } - - // Blob layers are all committed snapshots - if info.Kind == snapshots.KindCommitted && label.IsNydusDataLayer(info.Labels) { - blobDigest := info.Labels[snpkg.TargetLayerDigestLabel] - // Try to get nydus meta layer/snapshot disk usage - cacheUsage, err := o.fs.CacheUsage(ctx, blobDigest) - if err != nil { - return snapshots.Usage{}, errors.Wrapf(err, "try to get snapshot %s nydus disk usage", id) + case snapshots.KindCommitted: + // Caculate disk space usage under cacheDir of committed snapshots. + if label.IsNydusDataLayer(info.Labels) || label.IsTarfsDataLayer(info.Labels) { + if blobDigest, ok := info.Labels[snpkg.TargetLayerDigestLabel]; ok { + // Try to get nydus meta layer/snapshot disk usage + cacheUsage, err := o.fs.CacheUsage(ctx, blobDigest) + if err != nil { + return snapshots.Usage{}, errors.Wrapf(err, "try to get snapshot %s nydus disk usage", id) + } + usage.Add(cacheUsage) + } } - usage.Add(cacheUsage) + case snapshots.KindUnknown: + case snapshots.KindView: } return usage, nil @@ -305,41 +366,52 @@ func (o *snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, er } log.L.Infof("[Mounts] snapshot %s ID %s Kind %s", key, id, info.Kind) - if label.IsNydusMetaLayer(info.Labels) { - err = o.fs.WaitUntilReady(id) - if err != nil { - // Skip waiting if clients is unpacking nydus artifacts to `mounts` - // For example, nydus-snapshotter's client like Buildkit is calling snapshotter in below workflow: - // 1. [Prepare] snapshot for the uppermost layer - bootstrap - // 2. [Mounts] - // 3. Unpacking by applying the mounts, then we get bootstrap in its path position. - // In above steps, no container write layer is called to set up from nydus-snapshotter. So it has no - // chance to start nydusd, during which the Rafs instance is created. - if !errors.Is(err, errdefs.ErrNotFound) { - return nil, errors.Wrapf(err, "mounts: snapshot %s is not ready, err: %v", id, err) + switch info.Kind { + case snapshots.KindView: + if label.IsNydusMetaLayer(info.Labels) { + err = o.fs.WaitUntilReady(id) + if err != nil { + // Skip waiting if clients is unpacking nydus artifacts to `mounts` + // For example, nydus-snapshotter's client like Buildkit is calling snapshotter in below workflow: + // 1. [Prepare] snapshot for the uppermost layer - bootstrap + // 2. [Mounts] + // 3. Unpacking by applying the mounts, then we get bootstrap in its path position. + // In above steps, no container write layer is called to set up from nydus-snapshotter. So it has no + // chance to start nydusd, during which the Rafs instance is created. + if !errors.Is(err, errdefs.ErrNotFound) { + return nil, errors.Wrapf(err, "mounts: snapshot %s is not ready, err: %v", id, err) + } + } else { + needRemoteMounts = true + metaSnapshotID = id } - } else { + } else if label.IsTarfsDataLayer(info.Labels) { needRemoteMounts = true metaSnapshotID = id } - } - - if info.Kind == snapshots.KindActive && info.Parent != "" { - pKey := info.Parent - if pID, info, _, err := snapshot.GetSnapshotInfo(ctx, o.ms, pKey); err == nil { - if label.IsNydusMetaLayer(info.Labels) { - if err = o.fs.WaitUntilReady(pID); err != nil { - return nil, errors.Wrapf(err, "mounts: snapshot %s is not ready, err: %v", pID, err) + case snapshots.KindActive: + if info.Parent != "" { + pKey := info.Parent + if pID, info, _, err := snapshot.GetSnapshotInfo(ctx, o.ms, pKey); err == nil { + if label.IsNydusMetaLayer(info.Labels) { + if err = o.fs.WaitUntilReady(pID); err != nil { + return nil, errors.Wrapf(err, "mounts: snapshot %s is not ready, err: %v", pID, err) + } + needRemoteMounts = true + metaSnapshotID = pID + } else if o.fs.TarfsEnabled() && label.IsTarfsDataLayer(info.Labels) { + needRemoteMounts = true + metaSnapshotID = pID } - needRemoteMounts = true - metaSnapshotID = pID + } else { + return nil, errors.Wrapf(err, "get parent snapshot info, parent key=%q", pKey) } - } else { - return nil, errors.Wrapf(err, "get parent snapshot info, parent key=%q", pKey) } + case snapshots.KindCommitted: + case snapshots.KindUnknown: } - if o.fs.ReferrerDetectEnabled() { + if o.fs.ReferrerDetectEnabled() && !needRemoteMounts { if id, _, err := o.findReferrerLayer(ctx, key); err == nil { needRemoteMounts = true metaSnapshotID = id @@ -352,7 +424,7 @@ func (o *snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, er } if needRemoteMounts { - return o.remoteMounts(ctx, *snap, metaSnapshotID) + return o.remoteMounts(ctx, info.Labels, *snap, metaSnapshotID) } return o.mounts(ctx, info.Labels, *snap) @@ -410,7 +482,7 @@ func (o *snapshotter) View(ctx context.Context, key, parent string, opts ...snap // Nydusd might not be running. We should run nydusd to reflect the rootfs. if err = o.fs.WaitUntilReady(pID); err != nil { if errors.Is(err, errdefs.ErrNotFound) { - if err := o.fs.Mount(pID, pInfo.Labels); err != nil { + if err := o.fs.Mount(pID, pInfo.Labels, nil); err != nil { return nil, errors.Wrapf(err, "mount rafs, instance id %s", pID) } @@ -434,10 +506,33 @@ func (o *snapshotter) View(ctx context.Context, key, parent string, opts ...snap return nil, err } + if o.fs.TarfsEnabled() && label.IsTarfsDataLayer(pInfo.Labels) { + if err := o.fs.MergeTarfsLayers(s, func(id string) string { return o.upperPath(id) }); err != nil { + return nil, errors.Wrapf(err, "tarfs merge fail %s", pID) + } + if config.GetTarfsExportEnabled() { + updateFields, err := o.fs.ExportBlockData(s, false, pInfo.Labels, func(id string) string { return o.upperPath(id) }) + if err != nil { + return nil, errors.Wrap(err, "export tarfs as block image") + } + if len(updateFields) > 0 { + _, err = o.Update(ctx, pInfo, updateFields...) + if err != nil { + return nil, errors.Wrapf(err, "update snapshot label information") + } + } + } + if err := o.fs.Mount(pID, pInfo.Labels, &s); err != nil { + return nil, errors.Wrapf(err, "mount tarfs, snapshot id %s", pID) + } + needRemoteMounts = true + metaSnapshotID = pID + } + log.L.Infof("[View] snapshot with key %s parent %s", key, parent) if needRemoteMounts { - return o.remoteMounts(ctx, s, metaSnapshotID) + return o.remoteMounts(ctx, base.Labels, s, metaSnapshotID) } return o.mounts(ctx, base.Labels, s) @@ -460,21 +555,18 @@ func (o *snapshotter) Commit(ctx context.Context, name, key string, opts ...snap }() // grab the existing id - id, info, _, err := storage.GetInfo(ctx, key) + id, _, _, err := storage.GetInfo(ctx, key) if err != nil { return err } log.L.Infof("[Commit] snapshot with key %q snapshot id %s", key, id) - var usage fs.Usage - // For OCI compatibility, we calculate disk usage and commit the usage to DB. - // Nydus disk usage calculation will be delayed until containerd queries. - if !label.IsNydusMetaLayer(info.Labels) && !label.IsNydusDataLayer(info.Labels) { - usage, err = fs.DiskUsage(ctx, o.upperPath(id)) - if err != nil { - return err - } + // For OCI compatibility, we calculate disk usage of the snapshotDir and commit the usage to DB. + // Nydus disk usage under the cacheDir will be delayed until containerd queries. + usage, err := fs.DiskUsage(ctx, o.upperPath(id)) + if err != nil { + return err } if _, err = storage.CommitActive(ctx, key, name, snapshots.Usage(usage), opts...); err != nil { @@ -517,6 +609,8 @@ func (o *snapshotter) Remove(ctx context.Context, key string) error { if label.IsNydusMetaLayer(info.Labels) { log.L.Infof("[Remove] nydus meta snapshot with key %s snapshot id %s", key, id) + } else if label.IsTarfsDataLayer(info.Labels) { + log.L.Infof("[Remove] nydus tarfs snapshot with key %s snapshot id %s", key, id) } if info.Kind == snapshots.KindCommitted { @@ -581,8 +675,8 @@ func (o *snapshotter) Close() error { o.fs.TryStopSharedDaemon() - if o.manager.CgroupMgr != nil { - if err := o.manager.CgroupMgr.Delete(); err != nil { + if o.cgroupManager != nil { + if err := o.cgroupManager.Delete(); err != nil { log.L.Errorf("failed to destroy cgroup, err %v", err) } } @@ -717,24 +811,23 @@ func overlayMount(options []string) []mount.Mount { } } -func (o *snapshotter) prepareRemoteSnapshot(id string, labels map[string]string) error { - return o.fs.Mount(id, labels) -} - // `s` is the upmost snapshot and `id` refers to the nydus meta snapshot // `s` and `id` can represent a different layer, it's useful when View an image -func (o *snapshotter) remoteMounts(ctx context.Context, s storage.Snapshot, id string) ([]mount.Mount, error) { +func (o *snapshotter) remoteMounts(ctx context.Context, labels map[string]string, s storage.Snapshot, id string) ([]mount.Mount, error) { var overlayOptions []string - lowerPaths := make([]string, 0, 8) if s.Kind == snapshots.KindActive { overlayOptions = append(overlayOptions, fmt.Sprintf("workdir=%s", o.workPath(s.ID)), fmt.Sprintf("upperdir=%s", o.upperPath(s.ID)), ) + if _, ok := labels[label.OverlayfsVolatileOpt]; ok { + overlayOptions = append(overlayOptions, "volatile") + } } else if len(s.ParentIDs) == 1 { return bindMount(o.upperPath(s.ParentIDs[0]), "ro"), nil } + lowerPaths := make([]string, 0, 8) lowerPathNydus, err := o.lowerPath(id) if err != nil { return nil, errors.Wrapf(err, "failed to locate overlay lowerdir") @@ -761,82 +854,6 @@ func (o *snapshotter) remoteMounts(ctx context.Context, s storage.Snapshot, id s return overlayMount(overlayOptions), nil } -type ExtraOption struct { - Source string `json:"source"` - Config string `json:"config"` - Snapshotdir string `json:"snapshotdir"` - Version string `json:"fs_version"` -} - -func (o *snapshotter) remoteMountWithExtraOptions(ctx context.Context, s storage.Snapshot, id string, overlayOptions []string) ([]mount.Mount, error) { - source, err := o.fs.BootstrapFile(id) - if err != nil { - return nil, err - } - - instance := daemon.RafsSet.Get(id) - daemon, err := o.fs.GetDaemonByID(instance.DaemonID) - if err != nil { - return nil, errors.Wrapf(err, "get daemon with ID %s", instance.DaemonID) - } - - var c daemonconfig.DaemonConfig - if daemon.IsSharedDaemon() { - c, err = daemonconfig.NewDaemonConfig(daemon.States.FsDriver, daemon.ConfigFile(instance.SnapshotID)) - if err != nil { - return nil, errors.Wrapf(err, "Failed to load instance configuration %s", - daemon.ConfigFile(instance.SnapshotID)) - } - } else { - c = daemon.Config - } - configContent, err := c.DumpString() - if err != nil { - return nil, errors.Wrapf(err, "remoteMounts: failed to marshal config") - } - - // get version from bootstrap - f, err := os.Open(source) - if err != nil { - return nil, errors.Wrapf(err, "remoteMounts: check bootstrap version: failed to open bootstrap") - } - defer f.Close() - header := make([]byte, 4096) - sz, err := f.Read(header) - if err != nil { - return nil, errors.Wrapf(err, "remoteMounts: check bootstrap version: failed to read bootstrap") - } - version, err := layout.DetectFsVersion(header[0:sz]) - if err != nil { - return nil, errors.Wrapf(err, "remoteMounts: failed to detect filesystem version") - } - - // when enable nydus-overlayfs, return unified mount slice for runc and kata - extraOption := &ExtraOption{ - Source: source, - Config: configContent, - Snapshotdir: o.snapshotDir(s.ID), - Version: version, - } - no, err := json.Marshal(extraOption) - if err != nil { - return nil, errors.Wrapf(err, "remoteMounts: failed to marshal NydusOption") - } - // XXX: Log options without extraoptions as it might contain secrets. - log.G(ctx).Debugf("fuse.nydus-overlayfs mount options %v", overlayOptions) - // base64 to filter easily in `nydus-overlayfs` - opt := fmt.Sprintf("extraoption=%s", base64.StdEncoding.EncodeToString(no)) - overlayOptions = append(overlayOptions, opt) - - return []mount.Mount{ - { - Type: "fuse.nydus-overlayfs", - Source: "overlay", - Options: overlayOptions, - }, - }, nil -} - func (o *snapshotter) mounts(ctx context.Context, labels map[string]string, s storage.Snapshot) ([]mount.Mount, error) { if len(s.ParentIDs) == 0 { // if we only have one layer/no parents then just return a bind mount as overlay will not work @@ -948,6 +965,12 @@ func (o *snapshotter) cleanupSnapshotDirectory(ctx context.Context, dir string) log.G(ctx).WithError(err).WithField("dir", dir).Error("failed to unmount") } + if o.fs.TarfsEnabled() { + if err := o.fs.DetachTarfsLayer(snapshotID); err != nil && !os.IsNotExist(err) { + log.G(ctx).WithError(err).Errorf("failed to detach tarfs layer for snapshot %s", snapshotID) + } + } + if err := os.RemoveAll(dir); err != nil { return errors.Wrapf(err, "remove directory %q", dir) }