From 0527ca96202633625f79dfe06277b96cfb522000 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 25 Jul 2024 14:18:34 -0400 Subject: [PATCH] Initialize a containers-storage: owned by bootc, use for bound images Closes: https://github.com/containers/bootc/issues/721 - Initialize a containers-storage: instance at install time (that defaults to empty) - Open it at the same time we open the ostree repo/sysroot - Change bound images to use this We are *NOT* yet changing the base bootc image pull to use this. That's an obvious next step (xref https://github.com/containers/bootc/pull/215 ) but will come later. Signed-off-by: Colin Walters --- Makefile | 1 + .../experimental-logically-bound-images.md | 21 +- lib/src/boundimage.rs | 43 ++- lib/src/cli.rs | 66 +++- lib/src/deploy.rs | 104 +++--- lib/src/image.rs | 25 +- lib/src/imgstorage.rs | 295 ++++++++++++++++++ lib/src/install.rs | 69 ++-- lib/src/lib.rs | 2 +- lib/src/podman.rs | 11 +- lib/src/store/mod.rs | 14 +- tests-integration/src/install.rs | 1 + .../010-test-bootc-container-store.nu | 12 + tests/booted/test-logically-bound.nu | 2 +- 14 files changed, 550 insertions(+), 116 deletions(-) create mode 100644 lib/src/imgstorage.rs create mode 100644 tests/booted/readonly/010-test-bootc-container-store.nu diff --git a/Makefile b/Makefile index d3e82e0c7..0b8a86305 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,7 @@ install: install -D -m 0755 -t $(DESTDIR)$(prefix)/bin target/release/bootc install -d -m 0755 $(DESTDIR)$(prefix)/lib/bootc/bound-images.d install -d -m 0755 $(DESTDIR)$(prefix)/lib/bootc/kargs.d + ln -s /sysroot/ostree/bootc/storage $(DESTDIR)$(prefix)/lib/bootc/storage install -d -m 0755 $(DESTDIR)$(prefix)/lib/systemd/system-generators/ ln -f $(DESTDIR)$(prefix)/bin/bootc $(DESTDIR)$(prefix)/lib/systemd/system-generators/bootc-systemd-generator install -d $(DESTDIR)$(prefix)/lib/bootc/install diff --git a/docs/src/experimental-logically-bound-images.md b/docs/src/experimental-logically-bound-images.md index 8f3046f9d..306c9a2a2 100644 --- a/docs/src/experimental-logically-bound-images.md +++ b/docs/src/experimental-logically-bound-images.md @@ -14,7 +14,11 @@ This experimental feature enables an association of container "app" images to a ## Using logically bound images -Each image is defined in a [Podman Quadlet](https://docs.podman.io/en/latest/markdown/podman-systemd.unit.5.html) `.image` or `.container` file. An image is selected to be bound by creating a symlink in the `/usr/lib/bootc/bound-images.d` directory pointing to a `.image` or `.container` file. With these defined, during a `bootc upgrade` or `bootc switch` the bound images defined in the new bootc image will be automatically pulled via podman. +Each image is defined in a [Podman Quadlet](https://docs.podman.io/en/latest/markdown/podman-systemd.unit.5.html) `.image` or `.container` file. An image is selected to be bound by creating a symlink in the `/usr/lib/bootc/bound-images.d` directory pointing to a `.image` or `.container` file. + +With these defined, during a `bootc upgrade` or `bootc switch` the bound images defined in the new bootc image will be automatically pulled into the bootc image storage, and are available to container runtimes such as podman by explicitly configuring them to point to the bootc storage as an "additional image store", via e.g.: + +`podman --storage-opt=additionalimagestore=/usr/lib/bootc/storage run ...` An example Containerfile @@ -28,8 +32,17 @@ RUN ln -s /usr/share/containers/systemd/my-app.image /usr/lib/bootc/bound-images ln -s /usr/share/containers/systemd/my-app.image /usr/lib/bootc/bound-images.d/my-app.image ``` +In the `.container` definition, you should use: + +``` +GlobalArgs=--storage-opt=additionalimagestore=/usr/lib/bootc/storage +``` + +## Pull secret + +Images are fetched using the global bootc pull secret by default (`/etc/ostree/auth.json`). It is not yet supported to configure `PullSecret` in these image definitions. + ## Limitations -- Currently, only the Image field of a `.image` or `.container` file is used to pull the image. Any other field is ignored. -- There is no cleanup during rollback. -- Images are subject to default garbage collection semantics; e.g. a background job pruning images without a running container may prune them. They can also be manually removed via e.g. podman rmi. +- Currently, only the Image field of a `.image` or `.container` file is used to pull the image; per above not even `PullSecret=` is supported. +- Images are not yet garbage collected diff --git a/lib/src/boundimage.rs b/lib/src/boundimage.rs index 9a0d82e68..71df834b8 100644 --- a/lib/src/boundimage.rs +++ b/lib/src/boundimage.rs @@ -5,7 +5,6 @@ //! pre-pulled (and in the future, pinned) before a new image root //! is considered ready. -use crate::task::Task; use anyhow::{Context, Result}; use camino::Utf8Path; use cap_std_ext::cap_std::fs::Dir; @@ -13,7 +12,9 @@ use cap_std_ext::dirext::CapStdExtDirExt; use fn_error_context::context; use ostree_ext::containers_image_proxy; use ostree_ext::ostree::Deployment; -use ostree_ext::sysroot::SysrootLock; + +use crate::imgstorage::PullMode; +use crate::store::Storage; /// The path in a root for bound images; this directory should only contain /// symbolic links to `.container` or `.image` files. @@ -26,8 +27,8 @@ const BOUND_IMAGE_DIR: &str = "usr/lib/bootc/bound-images.d"; /// other pull options. #[derive(Debug, PartialEq, Eq)] pub(crate) struct BoundImage { - image: String, - auth_file: Option, + pub(crate) image: String, + pub(crate) auth_file: Option, } #[derive(Debug, PartialEq, Eq)] @@ -37,10 +38,18 @@ pub(crate) struct ResolvedBoundImage { } /// Given a deployment, pull all container images it references. -pub(crate) fn pull_bound_images(sysroot: &SysrootLock, deployment: &Deployment) -> Result<()> { +pub(crate) async fn pull_bound_images(sysroot: &Storage, deployment: &Deployment) -> Result<()> { + let bound_images = query_bound_images_for_deployment(sysroot, deployment)?; + pull_images(sysroot, bound_images).await +} + +#[context("Querying bound images")] +pub(crate) fn query_bound_images_for_deployment( + sysroot: &Storage, + deployment: &Deployment, +) -> Result> { let deployment_root = &crate::utils::deployment_fd(sysroot, deployment)?; - let bound_images = query_bound_images(deployment_root)?; - pull_images(deployment_root, bound_images) + query_bound_images(deployment_root) } #[context("Querying bound images")] @@ -133,18 +142,20 @@ fn parse_container_file(file_contents: &tini::Ini) -> Result { Ok(bound_image) } -#[context("pull bound images")] -pub(crate) fn pull_images(_deployment_root: &Dir, bound_images: Vec) -> Result<()> { +#[context("Pulling bound images")] +pub(crate) async fn pull_images(sysroot: &Storage, bound_images: Vec) -> Result<()> { tracing::debug!("Pulling bound images: {}", bound_images.len()); //TODO: do this in parallel for bound_image in bound_images { - let mut task = Task::new("Pulling bound image", "/usr/bin/podman") - .arg("pull") - .arg(&bound_image.image); - if let Some(auth_file) = &bound_image.auth_file { - task = task.arg("--authfile").arg(auth_file); - } - task.run()?; + let image = &bound_image.image; + let desc = format!("Updating bound image: {image}"); + crate::utils::async_task_with_spinner(&desc, async move { + sysroot + .imgstore + .pull(&bound_image.image, PullMode::IfNotExists) + .await + }) + .await?; } Ok(()) diff --git a/lib/src/cli.rs b/lib/src/cli.rs index 31707cfec..1bde26ed9 100644 --- a/lib/src/cli.rs +++ b/lib/src/cli.rs @@ -199,6 +199,31 @@ pub(crate) enum ContainerOpts { Lint, } +/// Subcommands which operate on images. +#[derive(Debug, clap::Subcommand, PartialEq, Eq)] +pub(crate) enum ImageCmdOpts { + /// Wrapper for `podman image list` in bootc storage. + List { + #[clap(allow_hyphen_values = true)] + args: Vec, + }, + /// Wrapper for `podman image build` in bootc storage. + Build { + #[clap(allow_hyphen_values = true)] + args: Vec, + }, + /// Wrapper for `podman image pull` in bootc storage. + Pull { + #[clap(allow_hyphen_values = true)] + args: Vec, + }, + /// Wrapper for `podman image push` in bootc storage. + Push { + #[clap(allow_hyphen_values = true)] + args: Vec, + }, +} + /// Subcommands which operate on images. #[derive(Debug, clap::Subcommand, PartialEq, Eq)] pub(crate) enum ImageOpts { @@ -232,6 +257,16 @@ pub(crate) enum ImageOpts { /// this will make the image accessible via e.g. `podman run localhost/bootc` and for builds. target: Option, }, + /// Copy a container image from the default `containers-storage:` to the bootc-owned container storage. + PullFromDefaultStorage { + /// The image to pull + image: String, + }, + /// List fetched images stored in the bootc storage. + /// + /// Note that these are distinct from images stored via e.g. `podman`. + #[clap(subcommand)] + Cmd(ImageCmdOpts), } /// Hidden, internal only options @@ -247,6 +282,8 @@ pub(crate) enum InternalsOpts { FixupEtcFstab, /// Should only be used by `make update-generated` PrintJsonSchema, + /// Perform cleanup actions + Cleanup, } impl InternalsOpts { @@ -430,10 +467,12 @@ pub(crate) async fn get_locked_sysroot() -> Result Result { + let global_run = Dir::open_ambient_dir("/run", cap_std::ambient_authority())?; let sysroot = get_locked_sysroot().await?; - crate::store::Storage::new(sysroot) + crate::store::Storage::new(sysroot, &global_run) } #[context("Querying root privilege")] @@ -798,6 +837,27 @@ async fn run_from_opt(opt: Opt) -> Result<()> { ImageOpts::CopyToStorage { source, target } => { crate::image::push_entrypoint(source.as_deref(), target.as_deref()).await } + ImageOpts::PullFromDefaultStorage { image } => { + let sysroot = get_storage().await?; + sysroot.imgstore.pull_from_host_storage(&image).await + } + ImageOpts::Cmd(opt) => { + let sysroot = get_storage().await?; + match opt { + ImageCmdOpts::List { args } => { + crate::image::imgcmd_entrypoint(&sysroot.imgstore, "list", &args).await + } + ImageCmdOpts::Build { args } => { + crate::image::imgcmd_entrypoint(&sysroot.imgstore, "build", &args).await + } + ImageCmdOpts::Pull { args } => { + crate::image::imgcmd_entrypoint(&sysroot.imgstore, "pull", &args).await + } + ImageCmdOpts::Push { args } => { + crate::image::imgcmd_entrypoint(&sysroot.imgstore, "push", &args).await + } + } + } }, #[cfg(feature = "install")] Opt::Install(opts) => match opts { @@ -831,6 +891,10 @@ async fn run_from_opt(opt: Opt) -> Result<()> { serde_json::to_writer_pretty(&mut stdout, &schema)?; Ok(()) } + InternalsOpts::Cleanup => { + let sysroot = get_storage().await?; + crate::deploy::cleanup(&sysroot).await + } }, #[cfg(feature = "docgen")] Opt::Man(manopts) => crate::docgen::generate_manpages(&manopts.directory), diff --git a/lib/src/deploy.rs b/lib/src/deploy.rs index e2b065f72..836f3de1e 100644 --- a/lib/src/deploy.rs +++ b/lib/src/deploy.rs @@ -2,6 +2,7 @@ //! //! Create a merged filesystem tree with the image and mounted configmaps. +use std::collections::HashSet; use std::io::{BufRead, Write}; use anyhow::Ok; @@ -268,53 +269,76 @@ pub(crate) async fn pull( Ok(Box::new((*import).into())) } +/// Gather all bound images in all deployments, then prune the image store, +/// using the gathered images as the roots (that will not be GC'd). +pub(crate) async fn prune_container_store(sysroot: &Storage) -> Result<()> { + let deployments = sysroot.deployments(); + let mut all_bound_images = Vec::new(); + for deployment in deployments { + let bound = crate::boundimage::query_bound_images_for_deployment(sysroot, &deployment)?; + all_bound_images.extend(bound.into_iter()); + } + // Convert to a hashset of just the image names + let image_names = HashSet::from_iter(all_bound_images.iter().map(|img| img.image.as_str())); + let pruned = sysroot.imgstore.prune_except_roots(&image_names).await?; + tracing::debug!("Pruned images: {}", pruned.len()); + Ok(()) +} + pub(crate) async fn cleanup(sysroot: &Storage) -> Result<()> { + let bound_prune = prune_container_store(sysroot); + // We create clones (just atomic reference bumps) here to move to the thread. let repo = sysroot.repo(); let sysroot = sysroot.sysroot.clone(); - ostree_ext::tokio_util::spawn_blocking_cancellable_flatten(move |cancellable| { - let locked_sysroot = &SysrootLock::from_assumed_locked(&sysroot); - let cancellable = Some(cancellable); - let repo = &repo; - let txn = repo.auto_transaction(cancellable)?; - let repo = txn.repo(); - - // Regenerate our base references. First, we delete the ones that exist - for ref_entry in repo - .list_refs_ext( - Some(BASE_IMAGE_PREFIX), - ostree::RepoListRefsExtFlags::NONE, - cancellable, - ) - .context("Listing refs")? - .keys() - { - repo.transaction_set_refspec(ref_entry, None); - } + let repo_prune = + ostree_ext::tokio_util::spawn_blocking_cancellable_flatten(move |cancellable| { + let locked_sysroot = &SysrootLock::from_assumed_locked(&sysroot); + let cancellable = Some(cancellable); + let repo = &repo; + let txn = repo.auto_transaction(cancellable)?; + let repo = txn.repo(); + + // Regenerate our base references. First, we delete the ones that exist + for ref_entry in repo + .list_refs_ext( + Some(BASE_IMAGE_PREFIX), + ostree::RepoListRefsExtFlags::NONE, + cancellable, + ) + .context("Listing refs")? + .keys() + { + repo.transaction_set_refspec(ref_entry, None); + } + + // Then, for each deployment which is derived (e.g. has configmaps) we synthesize + // a base ref to ensure that it's not GC'd. + for (i, deployment) in sysroot.deployments().into_iter().enumerate() { + let commit = deployment.csum(); + if let Some(base) = get_base_commit(repo, &commit)? { + repo.transaction_set_refspec(&format!("{BASE_IMAGE_PREFIX}/{i}"), Some(&base)); + } + } - // Then, for each deployment which is derived (e.g. has configmaps) we synthesize - // a base ref to ensure that it's not GC'd. - for (i, deployment) in sysroot.deployments().into_iter().enumerate() { - let commit = deployment.csum(); - if let Some(base) = get_base_commit(repo, &commit)? { - repo.transaction_set_refspec(&format!("{BASE_IMAGE_PREFIX}/{i}"), Some(&base)); + let pruned = + ostree_container::deploy::prune(locked_sysroot).context("Pruning images")?; + if !pruned.is_empty() { + let size = glib::format_size(pruned.objsize); + println!( + "Pruned images: {} (layers: {}, objsize: {})", + pruned.n_images, pruned.n_layers, size + ); + } else { + tracing::debug!("Nothing to prune"); } - } - let pruned = ostree_container::deploy::prune(locked_sysroot).context("Pruning images")?; - if !pruned.is_empty() { - let size = glib::format_size(pruned.objsize); - println!( - "Pruned images: {} (layers: {}, objsize: {})", - pruned.n_images, pruned.n_layers, size - ); - } else { - tracing::debug!("Nothing to prune"); - } + Ok(()) + }); - Ok(()) - }) - .await + // We run these in parallel mostly because we can. + tokio::try_join!(repo_prune, bound_prune)?; + Ok(()) } /// If commit is a bootc-derived commit (e.g. has configmaps), return its base. @@ -399,7 +423,7 @@ pub(crate) async fn stage( ) .await?; - crate::boundimage::pull_bound_images(sysroot, &deployment)?; + crate::boundimage::pull_bound_images(sysroot, &deployment).await?; crate::deploy::cleanup(sysroot).await?; println!("Queued for next boot: {:#}", spec.image); diff --git a/lib/src/image.rs b/lib/src/image.rs index 296f41e7d..e9d7051ef 100644 --- a/lib/src/image.rs +++ b/lib/src/image.rs @@ -6,19 +6,29 @@ use anyhow::{Context, Result}; use fn_error_context::context; use ostree_ext::container::{ImageReference, Transport}; +use crate::{imgstorage::Storage, utils::CommandRunExt}; + /// The name of the image we push to containers-storage if nothing is specified. const IMAGE_DEFAULT: &str = "localhost/bootc"; #[context("Listing images")] pub(crate) async fn list_entrypoint() -> Result<()> { - let sysroot = crate::cli::get_locked_sysroot().await?; + let sysroot = crate::cli::get_storage().await?; let repo = &sysroot.repo(); let images = ostree_ext::container::store::list_images(repo).context("Querying images")?; + println!("# Host images"); for image in images { println!("{image}"); } + println!(""); + + println!("# Logically bound images"); + let mut listcmd = sysroot.imgstore.new_image_cmd()?; + listcmd.arg("list"); + listcmd.run()?; + Ok(()) } @@ -64,3 +74,16 @@ pub(crate) async fn push_entrypoint(source: Option<&str>, target: Option<&str>) println!("Pushed: {target} {r}"); Ok(()) } + +/// Thin wrapper for invoking `podman image ` but set up for our internal +/// image store (as distinct from /var/lib/containers default). +pub(crate) async fn imgcmd_entrypoint( + storage: &Storage, + arg: &str, + args: &[std::ffi::OsString], +) -> std::result::Result<(), anyhow::Error> { + let mut cmd = storage.new_image_cmd()?; + cmd.arg(arg); + cmd.args(args); + cmd.run() +} diff --git a/lib/src/imgstorage.rs b/lib/src/imgstorage.rs new file mode 100644 index 000000000..c8007bfca --- /dev/null +++ b/lib/src/imgstorage.rs @@ -0,0 +1,295 @@ +//! # bootc-managed container storage +//! +//! The default storage for this project uses ostree, canonically storing all of its state in +//! `/sysroot/ostree`. +//! +//! This containers-storage: which canonically lives in `/sysroot/ostree/bootc`. + +use std::collections::HashSet; +use std::io::Seek; +use std::os::unix::process::CommandExt; +use std::process::{Command, Stdio}; +use std::sync::Arc; + +use anyhow::{Context, Result}; +use camino::Utf8Path; +use cap_std_ext::cap_std; +use cap_std_ext::cap_std::fs::Dir; +use cap_std_ext::cap_tempfile::TempDir; +use cap_std_ext::cmdext::CapStdExtCommandExt; +use cap_std_ext::dirext::CapStdExtDirExt; +use fn_error_context::context; +use std::os::fd::OwnedFd; +use tokio::process::Command as AsyncCommand; + +use crate::utils::{AsyncCommandRunExt, CommandRunExt, ExitStatusExt}; + +// Pass only 100 args at a time just to avoid potentially overflowing argument +// vectors; not that this should happen in reality, but just in case. +const SUBCMD_ARGV_CHUNKING: usize = 100; + +/// Global directory path which we use for podman to point +/// it at our storage. Unfortunately we can't yet use the +/// /proc/self/fd/N trick because it currently breaks due +/// to how the untar process is forked in the child. +pub(crate) const STORAGE_ALIAS_DIR: &str = "/run/bootc/storage"; +/// We pass this via /proc/self/fd to the child process. +const STORAGE_RUN_FD: i32 = 3; + +/// The path to the storage, relative to the physical system root. +pub(crate) const SUBPATH: &str = "ostree/bootc/storage"; +/// The path to the "runroot" with transient runtime state; this is +/// relative to the /run directory +const RUNROOT: &str = "bootc/storage"; +pub(crate) struct Storage { + /// The root directory + sysroot: Dir, + /// The location of container storage + storage_root: Dir, + #[allow(dead_code)] + /// Our runtime state + run: Dir, +} + +#[derive(Debug, PartialEq, Eq)] +pub(crate) enum PullMode { + /// Pull only if the image is not present + IfNotExists, + /// Always check for an update + #[allow(dead_code)] + Always, +} + +#[allow(unsafe_code)] +#[context("Binding storage roots")] +fn bind_storage_roots(cmd: &mut Command, storage_root: &Dir, run_root: &Dir) -> Result<()> { + // podman requires an absolute path, for two reasons right now: + // - It writes the file paths into `db.sql`, a sqlite database for unknown reasons + // - It forks helper binaries, so just giving it /proc/self/fd won't work as + // those helpers may not get the fd passed. (which is also true of skopeo) + // We create a new mount namespace, which also has the helpful side effect + // of automatically cleaning up the global bind mount that the storage stack + // creates. + + let storage_root = Arc::new(storage_root.try_clone().context("Cloning storage root")?); + let run_root: Arc = Arc::new(run_root.try_clone().context("Cloning runroot")?.into()); + // SAFETY: All the APIs we call here are safe to invoke between fork and exec. + unsafe { + cmd.pre_exec(move || { + use rustix::fs::{Mode, OFlags}; + // For reasons I don't understand, we can't just `mount("/proc/self/fd/N", "/path/to/target")` + // but it *does* work to fchdir(fd) + mount(".", "/path/to/target"). + // I think it may be that mount doesn't like operating on the magic links? + // This trick only works if we set our working directory to the target *before* + // creating the new namespace too. + // + // I think we may be hitting this: + // + // " EINVAL A bind operation (MS_BIND) was requested where source referred a mount namespace magic link (i.e., a /proc/pid/ns/mnt magic link or a bind mount to such a link) and the propagation type of the parent mount of target was + // MS_SHARED, but propagation of the requested bind mount could lead to a circular dependency that might prevent the mount namespace from ever being freed." + // + // But...how did we avoid that circular dependency by using the process cwd? + // + // I tried making the mounts recursively private, but that didn't help. + let oldwd = rustix::fs::open( + ".", + OFlags::DIRECTORY | OFlags::CLOEXEC | OFlags::RDONLY, + Mode::empty(), + )?; + rustix::process::fchdir(&storage_root)?; + rustix::thread::unshare(rustix::thread::UnshareFlags::NEWNS)?; + rustix::mount::mount_bind(".", STORAGE_ALIAS_DIR)?; + rustix::process::fchdir(&oldwd)?; + Ok(()) + }) + }; + cmd.take_fd_n(run_root, STORAGE_RUN_FD); + Ok(()) +} + +fn new_podman_cmd_in(storage_root: &Dir, run_root: &Dir) -> Result { + let mut cmd = Command::new("podman"); + bind_storage_roots(&mut cmd, storage_root, run_root)?; + let run_root = format!("/proc/self/fd/{}", STORAGE_RUN_FD); + cmd.args(["--root", STORAGE_ALIAS_DIR, "--runroot", run_root.as_str()]); + Ok(cmd) +} + +impl Storage { + /// Create a `podman image` Command instance prepared to operate on our alternative + /// root. + pub(crate) fn new_image_cmd(&self) -> Result { + let mut r = new_podman_cmd_in(&self.storage_root, &self.run)?; + // We want to limit things to only manipulating images by default. + r.arg("image"); + Ok(r) + } + + fn init_globals() -> Result<()> { + // Ensure our global storage alias dirs exist + for d in [STORAGE_ALIAS_DIR] { + std::fs::create_dir_all(d).with_context(|| format!("Creating {d}"))?; + } + Ok(()) + } + + #[context("Creating imgstorage")] + pub(crate) fn create(sysroot: &Dir, run: &Dir) -> Result { + Self::init_globals()?; + let subpath = Utf8Path::new(SUBPATH); + // SAFETY: We know there's a parent + let parent = subpath.parent().unwrap(); + if !sysroot + .try_exists(subpath) + .with_context(|| format!("Querying {subpath}"))? + { + let tmp = format!("{SUBPATH}.tmp"); + sysroot.remove_all_optional(&tmp).context("Removing tmp")?; + sysroot + .create_dir_all(parent) + .with_context(|| format!("Creating {parent}"))?; + sysroot.create_dir_all(&tmp).context("Creating tmpdir")?; + let storage_root = sysroot.open_dir(&tmp).context("Open tmp")?; + // There's no explicit API to initialize a containers-storage: + // root, simply passing a path will attempt to auto-create it. + // We run "podman images" in the new root. + new_podman_cmd_in(&storage_root, &run)? + .arg("images") + .run() + .context("Initializing images")?; + drop(storage_root); + sysroot + .rename(&tmp, sysroot, subpath) + .context("Renaming tmpdir")?; + } + Self::open(sysroot, run) + } + + #[context("Opening imgstorage")] + pub(crate) fn open(sysroot: &Dir, run: &Dir) -> Result { + Self::init_globals()?; + let storage_root = sysroot + .open_dir(SUBPATH) + .with_context(|| format!("Opening {SUBPATH}"))?; + // Always auto-create this if missing + run.create_dir_all(RUNROOT) + .with_context(|| format!("Creating {RUNROOT}"))?; + let run = run.open_dir(RUNROOT)?; + Ok(Self { + sysroot: sysroot.try_clone()?, + storage_root, + run, + }) + } + + #[context("Listing images")] + pub(crate) async fn list_images(&self) -> Result> { + let mut cmd = self.new_image_cmd()?; + cmd.args(["list", "--format=json"]); + cmd.stdin(Stdio::null()); + // It's maximally convenient for us to just pipe the whole output to a tempfile + let mut stdout = tempfile::tempfile()?; + cmd.stdout(stdout.try_clone()?); + // Allocate stderr, which is passed to the status checker + let stderr = tempfile::tempfile()?; + cmd.stderr(stderr.try_clone()?); + + // Spawn the child and wait + AsyncCommand::from(cmd) + .status() + .await? + .check_status(stderr)?; + // Spawn a helper thread to avoid blocking the main thread + // parsing JSON. + tokio::task::spawn_blocking(move || -> Result<_> { + stdout.seek(std::io::SeekFrom::Start(0))?; + let stdout = std::io::BufReader::new(stdout); + let r = serde_json::from_reader(stdout)?; + Ok(r) + }) + .await? + .map_err(Into::into) + } + + #[context("Pruning")] + pub(crate) async fn prune_except_roots(&self, roots: &HashSet<&str>) -> Result> { + let all_images = self.list_images().await?; + tracing::debug!("Images total: {}", all_images.len(),); + let mut garbage = Vec::new(); + for image in all_images { + if image + .names + .iter() + .flatten() + .any(|name| !roots.contains(name.as_str())) + { + garbage.push(image.id); + } + } + tracing::debug!("Images to prune: {}", garbage.len()); + for garbage in garbage.chunks(SUBCMD_ARGV_CHUNKING) { + let mut cmd = self.new_image_cmd()?; + cmd.stdin(Stdio::null()); + cmd.stdout(Stdio::null()); + cmd.arg("rm"); + cmd.args(garbage); + AsyncCommand::from(cmd).run().await?; + } + Ok(garbage) + } + + /// Fetch the image if it is not already present; return whether + /// or not the image was fetched. + pub(crate) async fn pull(&self, image: &str, mode: PullMode) -> Result { + match mode { + PullMode::IfNotExists => { + // Sadly https://docs.rs/containers-image-proxy/latest/containers_image_proxy/struct.ImageProxy.html#method.open_image_optional + // doesn't work with containers-storage yet + let mut cmd = AsyncCommand::from(self.new_image_cmd()?); + cmd.args(["exists", image]); + let exists = cmd.status().await?.success(); + if exists { + tracing::debug!("Image is already present: {image}"); + return Ok(false); + } + } + PullMode::Always => {} + }; + let mut cmd = self.new_image_cmd()?; + cmd.stdin(Stdio::null()); + cmd.stdout(Stdio::null()); + cmd.args(["pull", image]); + let authfile = ostree_ext::globals::get_global_authfile(&self.sysroot)? + .map(|(authfile, _fd)| authfile); + if let Some(authfile) = authfile { + cmd.args(["--authfile", authfile.as_str()]); + } + tracing::debug!("Pulling image: {image}"); + let mut cmd = AsyncCommand::from(cmd); + cmd.run().await.context("Failed to pull image")?; + Ok(true) + } + + /// Copy an image from the default container storage (/var/lib/containers/) + /// to this storage. + #[context("Pulling from host storage: {image}")] + pub(crate) async fn pull_from_host_storage(&self, image: &str) -> Result<()> { + let mut cmd = Command::new("podman"); + cmd.stdin(Stdio::null()); + cmd.stdout(Stdio::null()); + // An ephemeral place for the transient state; + let temp_runroot = TempDir::new(cap_std::ambient_authority())?; + bind_storage_roots(&mut cmd, &self.storage_root, &temp_runroot)?; + + // The destination (target stateroot) + container storage dest + let storage_dest = &format!( + "containers-storage:[overlay@{STORAGE_ALIAS_DIR}+/proc/self/fd/{STORAGE_RUN_FD}]" + ); + cmd.args(["image", "push", image]) + .arg(format!("{storage_dest}{image}")); + let mut cmd = AsyncCommand::from(cmd); + cmd.run().await?; + temp_runroot.close()?; + Ok(()) + } +} diff --git a/lib/src/install.rs b/lib/src/install.rs index 025293e93..16b19a817 100644 --- a/lib/src/install.rs +++ b/lib/src/install.rs @@ -12,7 +12,7 @@ mod osbuild; pub(crate) mod osconfig; use std::io::Write; -use std::os::fd::{AsFd, OwnedFd}; +use std::os::fd::AsFd; use std::os::unix::process::CommandExt; use std::path::Path; use std::process::Command; @@ -598,6 +598,19 @@ async fn initialize_ostree_root(state: &State, root_setup: &RootSetup) -> Result .cwd(rootfs_dir)? .run()?; + let sysroot = ostree::Sysroot::new(Some(&gio::File::for_path(rootfs))); + sysroot.load(cancellable)?; + let sysroot_dir = Dir::reopen_dir(&crate::utils::sysroot_fd(&sysroot))?; + + state.tempdir.create_dir("temp-run")?; + let temp_run = state.tempdir.open_dir("temp-run")?; + sysroot_dir + .create_dir_all(Utf8Path::new(crate::imgstorage::SUBPATH).parent().unwrap()) + .context("creating bootc dir")?; + let imgstore = crate::imgstorage::Storage::create(&sysroot_dir, &temp_run)?; + // And drop it again - we'll reopen it after this + drop(imgstore); + // Bootstrap the initial labeling of the /ostree directory as usr_t if let Some(policy) = sepolicy { let ostree_dir = rootfs_dir.open_dir("ostree")?; @@ -613,7 +626,7 @@ async fn initialize_ostree_root(state: &State, root_setup: &RootSetup) -> Result let sysroot = ostree::Sysroot::new(Some(&gio::File::for_path(rootfs))); sysroot.load(cancellable)?; let sysroot = SysrootLock::new_from_sysroot(&sysroot).await?; - Storage::new(sysroot) + Storage::new(sysroot, &temp_run) } #[context("Creating ostree deployment")] @@ -1252,8 +1265,7 @@ async fn install_with_sysroot( ) -> Result<()> { // And actually set up the container in that root, returning a deployment and // the aleph state (see below). - let (deployment, aleph) = install_container(state, rootfs, &sysroot).await?; - let stateroot = deployment.osname(); + let (_deployment, aleph) = install_container(state, rootfs, &sysroot).await?; // Write the aleph data that captures the system state at the time of provisioning for aid in future debugging. rootfs .rootfs_fd @@ -1276,51 +1288,10 @@ async fn install_with_sysroot( tracing::debug!("Installed bootloader"); tracing::debug!("Perfoming post-deployment operations"); - if !bound_images.is_empty() { - // TODO: We shouldn't hardcode the overlay driver for source or - // target, but we currently need to in order to reference the location. - // For this one, containers-storage: is actually the *host*'s /var/lib/containers - // which we are accessing directly. - let storage_src = "containers-storage:"; - // TODO: We only do this dance to initialize `/var` at install time if - // there are bound images today; it minimizes side effects. - // However going forward we really do need to handle a separate /var partition... - // and to do that we may in the general case need to run the `var.mount` - // target from the new root. - // Probably the best fix is for us to switch bound images to use the bootc storage. - let varpath = format!("ostree/deploy/{stateroot}/var"); - let var = rootfs - .rootfs_fd - .open_dir(&varpath) - .with_context(|| format!("Opening {varpath}"))?; - - // The skopeo API expects absolute paths, so we make a temporary bind - let tmp_dest_var_abs = tempfile::tempdir()?; - let tmp_dest_var_abs: &Utf8Path = tmp_dest_var_abs.path().try_into()?; - let mut t = Task::new("Mounting deployment /var", "mount") - .args(["--bind", "/proc/self/fd/3"]) - .arg(tmp_dest_var_abs); - t.cmd.take_fd_n(Arc::new(OwnedFd::from(var)), 3); - t.run()?; - - // And an ephemeral place for the transient state - let tmp_runroot = tempfile::tempdir()?; - let tmp_runroot: &Utf8Path = tmp_runroot.path().try_into()?; - - // The destination (target stateroot) + container storage dest - let storage_dest = &format!( - "containers-storage:[overlay@{tmp_dest_var_abs}/lib/containers/storage+{tmp_runroot}]" - ); - - // Now copy each bound image from the host's container storage into the target. - for image in bound_images { - let image = image.image.as_str(); - Task::new(format!("Copying image to target: {}", image), "skopeo") - .arg("copy") - .arg(format!("{storage_src}{image}")) - .arg(format!("{storage_dest}{image}")) - .run()?; - } + // Now copy each bound image from the host's container storage into the target. + for image in bound_images { + let image = image.image.as_str(); + sysroot.imgstore.pull_from_host_storage(image).await?; } Ok(()) diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 6d05aa64c..b2263b017 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -38,9 +38,9 @@ mod k8sapitypes; mod kernel; #[cfg(feature = "install")] pub(crate) mod mount; -#[cfg(feature = "install")] mod podman; pub mod spec; #[cfg(feature = "docgen")] mod docgen; +mod imgstorage; diff --git a/lib/src/podman.rs b/lib/src/podman.rs index a2ea36507..4e2d40445 100644 --- a/lib/src/podman.rs +++ b/lib/src/podman.rs @@ -3,7 +3,6 @@ use camino::Utf8Path; use cap_std_ext::cap_std::fs::Dir; use serde::Deserialize; -use crate::install::run_in_host_mountns; use crate::task::Task; /// Where we look inside our container to find our own image @@ -16,8 +15,18 @@ pub(crate) struct Inspect { pub(crate) digest: String, } +/// This is output from `podman image list --format=json`. +#[derive(Deserialize)] +#[serde(rename_all = "PascalCase")] +pub(crate) struct ImageListEntry { + pub(crate) id: String, + pub(crate) names: Option>, +} + /// Given an image ID, return its manifest digest +#[cfg(feature = "install")] pub(crate) fn imageid_to_digest(imgid: &str) -> Result { + use crate::install::run_in_host_mountns; let out = Task::new_cmd("podman inspect", run_in_host_mountns("podman")) .args(["inspect", imgid]) .quiet() diff --git a/lib/src/store/mod.rs b/lib/src/store/mod.rs index 713341772..b501347a1 100644 --- a/lib/src/store/mod.rs +++ b/lib/src/store/mod.rs @@ -2,6 +2,7 @@ use std::env; use std::ops::Deref; use anyhow::Result; +use cap_std_ext::cap_std::fs::Dir; use clap::ValueEnum; use ostree_ext::container::OstreeImageReference; @@ -15,6 +16,8 @@ mod ostree_container; pub(crate) struct Storage { pub sysroot: SysrootLock, + #[allow(dead_code)] + pub imgstore: crate::imgstorage::Storage, pub store: Box, } @@ -48,7 +51,7 @@ impl Deref for Storage { } impl Storage { - pub fn new(sysroot: SysrootLock) -> Result { + pub fn new(sysroot: SysrootLock, run: &Dir) -> Result { let store = match env::var("BOOTC_STORAGE") { Ok(val) => crate::spec::Store::from_str(&val, true).unwrap_or_else(|_| { let default = crate::spec::Store::default(); @@ -58,9 +61,16 @@ impl Storage { Err(_) => crate::spec::Store::default(), }; + let sysroot_dir = Dir::reopen_dir(&crate::utils::sysroot_fd(&sysroot))?; + let imgstore = crate::imgstorage::Storage::open(&sysroot_dir, run)?; + let store = load(store); - Ok(Self { sysroot, store }) + Ok(Self { + sysroot, + store, + imgstore, + }) } } diff --git a/tests-integration/src/install.rs b/tests-integration/src/install.rs index 87a0b6dcf..1f40dde85 100644 --- a/tests-integration/src/install.rs +++ b/tests-integration/src/install.rs @@ -57,6 +57,7 @@ fn find_deployment_root() -> Result { // Hook relatively cheap post-install tests here fn generic_post_install_verification() -> Result<()> { assert!(Utf8Path::new("/ostree/repo").try_exists()?); + assert!(Utf8Path::new("/ostree/bootc/storage/overlay").try_exists()?); Ok(()) } diff --git a/tests/booted/readonly/010-test-bootc-container-store.nu b/tests/booted/readonly/010-test-bootc-container-store.nu new file mode 100644 index 000000000..fc8a3d1d8 --- /dev/null +++ b/tests/booted/readonly/010-test-bootc-container-store.nu @@ -0,0 +1,12 @@ +use std assert +use tap.nu + +tap begin "verify bootc-owned container storage" + +# Just verifying that the additional store works +podman --storage-opt=additionalimagestore=/usr/lib/bootc/storage images + +# And verify this works +bootc image cmd list -q o>/dev/null + +tap ok diff --git a/tests/booted/test-logically-bound.nu b/tests/booted/test-logically-bound.nu index f29fe6d56..f1846a7aa 100644 --- a/tests/booted/test-logically-bound.nu +++ b/tests/booted/test-logically-bound.nu @@ -61,7 +61,7 @@ def verify_images [images containers] { let bound_containers = $containers | where bound == true let num_bound = ($bound_images | length) + ($bound_containers | length) - let image_names = podman images --format json | from json | select -i Names + let image_names = podman --storage-opt=additionalimagestore=/usr/lib/bootc/storage images --format json | from json | select -i Names for $image in $bound_images { let found = $image_names | where Names == [$image.image]