diff --git a/Cargo.lock b/Cargo.lock index 5831ad14162..0386ed2e945 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1303,6 +1303,7 @@ dependencies = [ "anyhow", "async-std", "document-features", + "gix", "gix-actor 0.32.0", "gix-archive", "gix-attributes 0.22.5", @@ -1360,6 +1361,7 @@ dependencies = [ "serial_test", "signal-hook", "smallvec", + "termtree", "thiserror", "walkdir", ] @@ -2089,6 +2091,8 @@ dependencies = [ "gix-date 0.9.0", "gix-features 0.38.2", "gix-hash 0.14.2", + "gix-hashtable 0.5.2", + "gix-odb", "gix-testtools", "gix-utils 0.1.12", "gix-validate 0.9.0", @@ -2096,6 +2100,7 @@ dependencies = [ "pretty_assertions", "serde", "smallvec", + "termtree", "thiserror", "winnow", ] @@ -2110,6 +2115,7 @@ dependencies = [ "gix-features 0.38.2", "gix-fs 0.11.3", "gix-hash 0.14.2", + "gix-hashtable 0.5.2", "gix-object 0.44.0", "gix-pack", "gix-path 0.10.10", @@ -4676,6 +4682,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "termtree" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" + [[package]] name = "thiserror" version = "1.0.63" diff --git a/Cargo.toml b/Cargo.toml index 332d0b9b76f..a4d4a2ad2fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ edition = "2021" license = "MIT OR Apache-2.0" version = "0.38.0" default-run = "gix" -include = ["src/**/*", "LICENSE-*", "README.md"] +include = ["src/**/*", "/build.rs", "LICENSE-*", "README.md"] resolver = "2" [[bin]] diff --git a/README.md b/README.md index 4e986796616..49bbcf1150f 100644 --- a/README.md +++ b/README.md @@ -198,20 +198,20 @@ There are various build configurations, all of them are [documented here](https: for packagers who need to tune external dependencies. ``` -# A certain way to install `gitoxide` with just Rust and a C compiler installed. +# A way to install `gitoxide` with just Rust and a C compiler installed. # If there are problems with SSL certificates during clones, try to omit `--locked`. cargo install gitoxide --locked --no-default-features --features max-pure -# The default installation, 'max', is the fastest, but also needs some libraries available to build successfully. -# Installing these is platform-dependent and thus can't be explained here. +# The default installation, 'max', is the fastest, but also needs `cmake` to build successfully. +# Installing it is platform-dependent. cargo install gitoxide -# For smaller binaries and even faster build times that are traded for a less fancy CLI implementation, use `lean` -# or `lean-termion` respectively. +# For smaller binaries and even faster build times that are traded for a less fancy CLI implementation, +# use the `lean` feature. cargo install gitoxide --locked --no-default-features --features lean ``` -The following installs the latest unpublished release directly from git: +The following installs the latest unpublished `max` release directly from git: ```sh cargo install --git https://github.com/Byron/gitoxide gitoxide diff --git a/gix-features/src/hash.rs b/gix-features/src/hash.rs index 4c5389fad1c..eebc40fa075 100644 --- a/gix-features/src/hash.rs +++ b/gix-features/src/hash.rs @@ -5,7 +5,7 @@ //! Otherwise, a minimal yet performant implementation is used instead for a decent trade-off between compile times and run-time performance. #[cfg(all(feature = "rustsha1", not(feature = "fast-sha1")))] mod _impl { - use super::Sha1Digest; + use super::Digest; /// A implementation of the Sha1 hash, which can be used once. #[derive(Default, Clone)] @@ -17,22 +17,20 @@ mod _impl { self.0.update(bytes); } /// Finalize the hash and produce a digest. - pub fn digest(self) -> Sha1Digest { + pub fn digest(self) -> Digest { self.0.digest().bytes() } } } -/// A 20 bytes digest produced by a [`Sha1`] hash implementation. +/// A hash-digest produced by a [`Hasher`] hash implementation. #[cfg(any(feature = "fast-sha1", feature = "rustsha1"))] -pub type Sha1Digest = [u8; 20]; +pub type Digest = [u8; 20]; #[cfg(feature = "fast-sha1")] mod _impl { use sha1::Digest; - use super::Sha1Digest; - /// A implementation of the Sha1 hash, which can be used once. #[derive(Default, Clone)] pub struct Sha1(sha1::Sha1); @@ -43,14 +41,14 @@ mod _impl { self.0.update(bytes); } /// Finalize the hash and produce a digest. - pub fn digest(self) -> Sha1Digest { + pub fn digest(self) -> super::Digest { self.0.finalize().into() } } } #[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] -pub use _impl::Sha1; +pub use _impl::Sha1 as Hasher; /// Compute a CRC32 hash from the given `bytes`, returning the CRC32 hash. /// @@ -76,9 +74,9 @@ pub fn crc32(bytes: &[u8]) -> u32 { /// Produce a hasher suitable for the given kind of hash. #[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] -pub fn hasher(kind: gix_hash::Kind) -> Sha1 { +pub fn hasher(kind: gix_hash::Kind) -> Hasher { match kind { - gix_hash::Kind::Sha1 => Sha1::default(), + gix_hash::Kind::Sha1 => Hasher::default(), } } @@ -127,7 +125,7 @@ pub fn bytes( pub fn bytes_with_hasher( read: &mut dyn std::io::Read, num_bytes_from_start: u64, - mut hasher: Sha1, + mut hasher: Hasher, progress: &mut dyn crate::progress::Progress, should_interrupt: &std::sync::atomic::AtomicBool, ) -> std::io::Result { @@ -160,12 +158,12 @@ pub fn bytes_with_hasher( #[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] mod write { - use crate::hash::Sha1; + use crate::hash::Hasher; /// A utility to automatically generate a hash while writing into an inner writer. pub struct Write { /// The hash implementation. - pub hash: Sha1, + pub hash: Hasher, /// The inner writer. pub inner: T, } @@ -194,7 +192,7 @@ mod write { match object_hash { gix_hash::Kind::Sha1 => Write { inner, - hash: Sha1::default(), + hash: Hasher::default(), }, } } diff --git a/gix-features/tests/hash.rs b/gix-features/tests/hash.rs index 4cccdd77043..7e7f6654dbd 100644 --- a/gix-features/tests/hash.rs +++ b/gix-features/tests/hash.rs @@ -1,16 +1,16 @@ -use gix_features::hash::Sha1; +use gix_features::hash::Hasher; #[cfg(not(feature = "fast-sha1"))] #[test] fn size_of_sha1() { - assert_eq!(std::mem::size_of::(), 96); + assert_eq!(std::mem::size_of::(), 96); } #[cfg(feature = "fast-sha1")] #[test] fn size_of_sha1() { assert_eq!( - std::mem::size_of::(), + std::mem::size_of::(), if cfg!(target_arch = "x86") { 96 } else { 104 } ); } diff --git a/gix-object/Cargo.toml b/gix-object/Cargo.toml index f3047525668..dbe6e1fc0d5 100644 --- a/gix-object/Cargo.toml +++ b/gix-object/Cargo.toml @@ -19,6 +19,11 @@ name = "decode-objects" harness = false path = "./benches/decode_objects.rs" +[[bench]] +name = "edit-tree" +harness = false +path = "./benches/edit_tree.rs" + [features] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. @@ -41,6 +46,7 @@ gix-features = { version = "^0.38.2", path = "../gix-features", features = [ "progress", ] } gix-hash = { version = "^0.14.2", path = "../gix-hash" } +gix-hashtable = { version = "^0.5.2", path = "../gix-hashtable" } gix-validate = { version = "^0.9.0", path = "../gix-validate" } gix-actor = { version = "^0.32.0", path = "../gix-actor" } gix-date = { version = "^0.9.0", path = "../gix-date" } @@ -64,6 +70,8 @@ document-features = { version = "0.2.0", optional = true } criterion = "0.5.1" pretty_assertions = "1.0.0" gix-testtools = { path = "../tests/tools" } +gix-odb = { path = "../gix-odb" } +termtree = "0.5.1" [package.metadata.docs.rs] all-features = true diff --git a/gix-object/benches/edit_tree.rs b/gix-object/benches/edit_tree.rs new file mode 100644 index 00000000000..ba063d89a08 --- /dev/null +++ b/gix-object/benches/edit_tree.rs @@ -0,0 +1,190 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput}; +use gix_hash::ObjectId; +use gix_hashtable::hash_map::Entry; +use gix_object::tree::EntryKind; +use gix_object::{tree, Tree, WriteTo}; +use std::cell::RefCell; +use std::rc::Rc; + +fn create_new_tree_add_and_remove(c: &mut Criterion) { + let (storage, mut write) = new_inmemory_writes(); + let mut editor = tree::Editor::new(Tree::default(), &gix_object::find::Never, gix_hash::Kind::Sha1); + let mut group = c.benchmark_group("editor"); + let small_throughput = Throughput::Elements((1 + 2 + 4) + 3); + group.throughput(small_throughput.clone()); + group.bench_function("small tree (empty -> full -> empty)", |b| { + b.iter(|| { + let tree_id = editor + .upsert(Some("file"), EntryKind::Blob, any_blob()) + .unwrap() + .upsert(["dir", "file"], EntryKind::Blob, any_blob()) + .unwrap() + .upsert(["more", "deeply", "nested", "file"], EntryKind::Blob, any_blob()) + .unwrap() + .write(&mut write) + .unwrap(); + black_box(tree_id); + let actual = editor + .remove(Some("file")) + .unwrap() + .remove(Some("dir")) + .unwrap() + .remove(Some("more")) + .unwrap() + .write(&mut write) + .unwrap(); + assert_eq!(actual, gix_hash::ObjectId::empty_tree(gix_hash::Kind::Sha1)); + }); + }); + + let odb = StorageOdb(storage); + let mut editor = tree::Editor::new(Tree::default(), &odb, gix_hash::Kind::Sha1); + let prefixed_throughput = Throughput::Elements((1 + 2 + 4) + 6 * 3 + (3 + 6 * 3)); + group.throughput(prefixed_throughput.clone()); + group.bench_function("deeply nested tree (empty -> full -> empty)", |b| { + b.iter(|| { + let tree_id = editor + .upsert(["a", "b", "c", "d", "e", "f", "file"], EntryKind::Blob, any_blob()) + .unwrap() + .upsert( + ["a", "b", "c", "d", "e", "f", "dir", "file"], + EntryKind::Blob, + any_blob(), + ) + .unwrap() + .upsert( + ["a", "b", "c", "d", "e", "f", "more", "deeply", "nested", "file"], + EntryKind::Blob, + any_blob(), + ) + .unwrap() + .write(&mut write) + .unwrap(); + black_box(tree_id); + let tree_id = editor + .remove(["a", "b", "c", "d", "e", "f", "file"]) + .unwrap() + .remove(["a", "b", "c", "d", "e", "f", "dir"]) + .unwrap() + .remove(["a", "b", "c", "d", "e", "f", "more"]) + .unwrap() + .write(&mut write) + .unwrap(); + black_box(tree_id); + }); + }); + + drop(group); + let mut group = c.benchmark_group("cursor"); + group.throughput(small_throughput); + group.bench_function("small tree (empty -> full -> empty)", |b| { + let mut editor = editor.to_cursor(); + b.iter(|| { + let tree_id = editor + .upsert(Some("file"), EntryKind::Blob, any_blob()) + .unwrap() + .upsert(["dir", "file"], EntryKind::Blob, any_blob()) + .unwrap() + .upsert(["more", "deeply", "nested", "file"], EntryKind::Blob, any_blob()) + .unwrap() + .write(&mut write) + .unwrap(); + black_box(tree_id); + let actual = editor + .remove(Some("file")) + .unwrap() + .remove(Some("dir")) + .unwrap() + .remove(Some("more")) + .unwrap() + .write(&mut write) + .unwrap(); + assert_eq!(actual, gix_hash::ObjectId::empty_tree(gix_hash::Kind::Sha1)); + }); + }); + + group.throughput(prefixed_throughput); + group.bench_function("deeply nested tree (empty -> full -> empty)", |b| { + let mut editor = editor.cursor_at(["a", "b", "c", "d", "e", "f"]).unwrap(); + b.iter(|| { + let tree_id = editor + .upsert(["file"], EntryKind::Blob, any_blob()) + .unwrap() + .upsert(["dir", "file"], EntryKind::Blob, any_blob()) + .unwrap() + .upsert(["more", "deeply", "nested", "file"], EntryKind::Blob, any_blob()) + .unwrap() + .write(&mut write) + .unwrap(); + black_box(tree_id); + let actual = editor + .remove(["file"]) + .unwrap() + .remove(["dir"]) + .unwrap() + .remove(["more"]) + .unwrap() + .write(&mut write) + .unwrap(); + assert_eq!(actual, gix_hash::ObjectId::empty_tree(gix_hash::Kind::Sha1)); + }); + }); +} + +criterion_group!(benches, create_new_tree_add_and_remove); +criterion_main!(benches); + +type TreeStore = Rc>>; + +fn new_inmemory_writes() -> (TreeStore, impl FnMut(&Tree) -> Result) { + let store = TreeStore::default(); + let write_tree = { + let store = store.clone(); + let mut buf = Vec::with_capacity(512); + move |tree: &Tree| { + buf.clear(); + tree.write_to(&mut buf)?; + let header = gix_object::encode::loose_header(gix_object::Kind::Tree, buf.len() as u64); + let mut hasher = gix_features::hash::hasher(gix_hash::Kind::Sha1); + hasher.update(&header); + hasher.update(&buf); + let id = hasher.digest().into(); + let mut borrowed = store.borrow_mut(); + match borrowed.entry(id) { + Entry::Occupied(_) => {} + Entry::Vacant(e) => { + e.insert(tree.clone()); + } + }; + Ok(id) + } + }; + (store, write_tree) +} + +struct StorageOdb(TreeStore); + +impl gix_object::Find for StorageOdb { + fn try_find<'a>( + &self, + id: &gix_hash::oid, + buffer: &'a mut Vec, + ) -> Result>, gix_object::find::Error> { + let borrow = self.0.borrow(); + match borrow.get(id) { + None => Ok(None), + Some(tree) => { + buffer.clear(); + tree.write_to(buffer).expect("valid trees can always be serialized"); + Ok(Some(gix_object::Data { + kind: gix_object::Kind::Tree, + data: &*buffer, + })) + } + } + } +} + +fn any_blob() -> ObjectId { + ObjectId::from_hex("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb".as_bytes()).unwrap() +} diff --git a/gix-object/src/lib.rs b/gix-object/src/lib.rs index 43cb5f5bfd0..0ef56d52954 100644 --- a/gix-object/src/lib.rs +++ b/gix-object/src/lib.rs @@ -232,7 +232,7 @@ pub struct TreeRefIter<'a> { } /// A mutable Tree, containing other trees, blobs or commits. -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Tree { /// The directories and files contained in this tree. They must be and remain sorted by [`filename`][tree::Entry::filename]. diff --git a/gix-object/src/tree/editor.rs b/gix-object/src/tree/editor.rs new file mode 100644 index 00000000000..d7632b7728d --- /dev/null +++ b/gix-object/src/tree/editor.rs @@ -0,0 +1,439 @@ +use crate::tree::{Editor, EntryKind}; +use crate::{tree, Tree}; +use bstr::{BStr, BString, ByteSlice, ByteVec}; +use gix_hash::ObjectId; +use std::cmp::Ordering; +use std::collections::{hash_map, HashMap}; +use std::fmt::Formatter; + +/// A way to constrain all [tree-edits](Editor) to a given subtree. +pub struct Cursor<'a, 'find> { + /// The underlying editor + parent: &'a mut Editor<'find>, + /// Our own location, used as prefix for all operations. + /// Note that it's assumed to always contain a tree. + prefix: BString, +} + +impl std::fmt::Debug for Editor<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Editor") + .field("object_hash", &self.object_hash) + .field("path_buf", &self.path_buf) + .field("trees", &self.trees) + .finish() + } +} + +/// The error returned by [Editor] or [Cursor] edit operation. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Empty path components are not allowed")] + EmptyPathComponent, + #[error(transparent)] + FindExistingObject(#[from] crate::find::existing_object::Error), +} + +/// Lifecycle +impl<'a> Editor<'a> { + /// Create a new editor that uses `root` as base for all edits. Use `find` to lookup existing + /// trees when edits are made. Each tree will only be looked-up once and then edited in place from + /// that point on. + /// `object_hash` denotes the kind of hash to create. + pub fn new(root: Tree, find: &'a dyn crate::FindExt, object_hash: gix_hash::Kind) -> Self { + Editor { + find, + object_hash, + trees: HashMap::from_iter(Some((empty_path(), root))), + path_buf: Vec::with_capacity(256).into(), + tree_buf: Vec::with_capacity(512), + } + } +} + +/// Operations +impl<'a> Editor<'a> { + /// Write the entire in-memory state of all changed trees (and only changed trees) to `out`. + /// Note that the returned object id *can* be the empty tree if everything was removed or if nothing + /// was added to the tree. + /// + /// The last call to `out` will be the changed root tree, whose object-id will also be returned. + /// `out` is free to do any kind of additional validation, like to assure that all entries in the tree exist. + /// We don't assure that as there is no validation that inserted entries are valid object ids. + /// + /// Future calls to [`upsert`](Self::upsert) or similar will keep working on the last seen state of the + /// just-written root-tree. + /// If this is not desired, use [set_root()](Self::set_root()). + /// + /// ### Validation + /// + /// Note that no additional validation is performed to assure correctness of entry-names. + /// It is absolutely and intentionally possible to write out invalid trees with this method. + /// Higher layers are expected to perform detailed validation. + pub fn write(&mut self, out: impl FnMut(&Tree) -> Result) -> Result { + self.path_buf.clear(); + self.write_at_pathbuf(out, WriteMode::Normal) + } + + /// Remove the entry at `rela_path`, loading all trees on the path accordingly. + /// It's no error if the entry doesn't exist, or if `rela_path` doesn't lead to an existing entry at all. + /// + /// Note that trying to remove a path with an empty component is also forbidden. + pub fn remove(&mut self, rela_path: I) -> Result<&mut Self, Error> + where + I: IntoIterator, + C: AsRef, + { + self.path_buf.clear(); + self.upsert_or_remove_at_pathbuf(rela_path, None) + } + + /// Insert a new entry of `kind` with `id` at `rela_path`, an iterator over each path component in the tree, + /// like `a/b/c`. Names are matched case-sensitively. + /// + /// Existing leaf-entries will be overwritten unconditionally, and it is assumed that `id` is available in the object database + /// or will be made available at a later point to assure the integrity of the produced tree. + /// + /// Intermediate trees will be created if they don't exist in the object database, otherwise they will be loaded and entries + /// will be inserted into them instead. + /// + /// Note that `id` can be [null](ObjectId::null()) to create a placeholder. These will not be written, and paths leading + /// through them will not be considered a problem. + /// + /// `id` can also be an empty tree, along with [the respective `kind`](EntryKind::Tree), even though that's normally not allowed + /// in Git trees. + pub fn upsert(&mut self, rela_path: I, kind: EntryKind, id: ObjectId) -> Result<&mut Self, Error> + where + I: IntoIterator, + C: AsRef, + { + self.path_buf.clear(); + self.upsert_or_remove_at_pathbuf(rela_path, Some((kind, id, UpsertMode::Normal))) + } + + fn write_at_pathbuf( + &mut self, + mut out: impl FnMut(&Tree) -> Result, + mode: WriteMode, + ) -> Result { + assert_ne!(self.trees.len(), 0, "there is at least the root tree"); + + // back is for children, front is for parents. + let mut parents = vec![( + None::, + self.path_buf.clone(), + self.trees + .remove(&path_hash(&self.path_buf)) + .expect("root tree is always present"), + )]; + let mut children = Vec::new(); + while let Some((parent_idx, mut rela_path, mut tree)) = children.pop().or_else(|| parents.pop()) { + let mut all_entries_unchanged_or_written = true; + for entry in &tree.entries { + if entry.mode.is_tree() { + let prev_len = push_path_component(&mut rela_path, &entry.filename); + if let Some(sub_tree) = self.trees.remove(&path_hash(&rela_path)) { + all_entries_unchanged_or_written = false; + let next_parent_idx = parents.len(); + children.push((Some(next_parent_idx), rela_path.clone(), sub_tree)); + } + rela_path.truncate(prev_len); + } + } + if all_entries_unchanged_or_written { + tree.entries.retain(|e| !e.oid.is_null()); + if let Some((_, _, parent_to_adjust)) = + parent_idx.map(|idx| parents.get_mut(idx).expect("always present, pointing towards zero")) + { + let name = filename(rela_path.as_bstr()); + let entry_idx = parent_to_adjust + .entries + .binary_search_by(|e| cmp_entry_with_name(e, name, true)) + .expect("the parent always knows us by name"); + if tree.entries.is_empty() { + parent_to_adjust.entries.remove(entry_idx); + } else { + match out(&tree) { + Ok(id) => { + parent_to_adjust.entries[entry_idx].oid = id; + } + Err(err) => { + let root_tree = parents.into_iter().next().expect("root wasn't consumed yet"); + self.trees.insert(root_tree.1, root_tree.2); + return Err(err); + } + } + } + } else if parents.is_empty() { + debug_assert!(children.is_empty(), "we consume children before parents"); + debug_assert_eq!(rela_path, self.path_buf, "this should always be the root tree"); + + // There may be left-over trees if they are replaced with blobs for example. + match out(&tree) { + Ok(id) => { + let root_tree_id = id; + match mode { + WriteMode::Normal => { + self.trees.clear(); + } + WriteMode::FromCursor => {} + } + self.trees.insert(rela_path, tree); + return Ok(root_tree_id); + } + Err(err) => { + self.trees.insert(rela_path, tree); + return Err(err); + } + } + } else if !tree.entries.is_empty() { + out(&tree)?; + } + } else { + parents.push((parent_idx, rela_path, tree)); + } + } + + unreachable!("we exit as soon as everything is consumed") + } + + fn upsert_or_remove_at_pathbuf( + &mut self, + rela_path: I, + kind_and_id: Option<(EntryKind, ObjectId, UpsertMode)>, + ) -> Result<&mut Self, Error> + where + I: IntoIterator, + C: AsRef, + { + let mut cursor = self + .trees + .get_mut(&path_hash(&self.path_buf)) + .expect("root is always present"); + let mut rela_path = rela_path.into_iter().peekable(); + let new_kind_is_tree = kind_and_id.map_or(false, |(kind, _, _)| kind == EntryKind::Tree); + while let Some(name) = rela_path.next() { + let name = name.as_ref(); + if name.is_empty() { + return Err(Error::EmptyPathComponent); + } + let is_last = rela_path.peek().is_none(); + let mut needs_sorting = false; + let current_level_must_be_tree = !is_last || new_kind_is_tree; + let check_type_change = |entry: &tree::Entry| entry.mode.is_tree() != current_level_must_be_tree; + let tree_to_lookup = match cursor + .entries + .binary_search_by(|e| cmp_entry_with_name(e, name, false)) + .or_else(|file_insertion_idx| { + cursor + .entries + .binary_search_by(|e| cmp_entry_with_name(e, name, true)) + .map_err(|dir_insertion_index| { + if current_level_must_be_tree { + dir_insertion_index + } else { + file_insertion_idx + } + }) + }) { + Ok(idx) => { + match kind_and_id { + None => { + if is_last { + cursor.entries.remove(idx); + break; + } else { + let entry = &cursor.entries[idx]; + if entry.mode.is_tree() { + Some(entry.oid) + } else { + break; + } + } + } + Some((kind, id, _mode)) => { + let entry = &mut cursor.entries[idx]; + if is_last { + // unconditionally overwrite what's there. + entry.oid = id; + needs_sorting = check_type_change(entry); + entry.mode = kind.into(); + None + } else if entry.mode.is_tree() { + // Possibly lookup the existing tree on our way down the path. + Some(entry.oid) + } else { + // it is no tree, but we are traversing a path, so turn it into one. + entry.oid = id.kind().null(); + needs_sorting = check_type_change(entry); + entry.mode = EntryKind::Tree.into(); + None + } + } + } + } + Err(insertion_idx) => match kind_and_id { + None => break, + Some((kind, id, _mode)) => { + cursor.entries.insert( + insertion_idx, + tree::Entry { + filename: name.into(), + mode: if is_last { kind.into() } else { EntryKind::Tree.into() }, + oid: if is_last { id } else { id.kind().null() }, + }, + ); + None + } + }, + }; + if needs_sorting { + cursor.entries.sort(); + } + if is_last && kind_and_id.map_or(false, |(_, _, mode)| mode == UpsertMode::Normal) { + break; + } + push_path_component(&mut self.path_buf, name); + let path_id = path_hash(&self.path_buf); + cursor = match self.trees.entry(path_id) { + hash_map::Entry::Occupied(e) => e.into_mut(), + hash_map::Entry::Vacant(e) => e.insert( + if let Some(tree_id) = tree_to_lookup.filter(|tree_id| !tree_id.is_empty_tree()) { + self.find.find_tree(&tree_id, &mut self.tree_buf)?.into() + } else { + Tree::default() + }, + ), + }; + } + Ok(self) + } + + /// Set the root tree of the modification to `root`, assuring it has a well-known state. + /// + /// Note that this erases all previous edits. + /// + /// This is useful if the same editor is re-used for various trees. + pub fn set_root(&mut self, root: Tree) -> &mut Self { + self.trees.clear(); + self.trees.insert(empty_path(), root); + self + } +} + +mod cursor { + use crate::tree::editor::{Cursor, UpsertMode, WriteMode}; + use crate::tree::{Editor, EntryKind}; + use crate::Tree; + use bstr::{BStr, BString}; + use gix_hash::ObjectId; + + /// Cursor handling + impl<'a> Editor<'a> { + /// Turn ourselves as a cursor, which points to the same tree as the editor. + /// + /// This is useful if a method takes a [`Cursor`], not an [`Editor`]. + pub fn to_cursor(&mut self) -> Cursor<'_, 'a> { + Cursor { + parent: self, + prefix: BString::default(), + } + } + + /// Create a cursor at the given `rela_path`, which must be a tree or is turned into a tree as its own edit. + /// + /// The returned cursor will then allow applying edits to the tree at `rela_path` as root. + /// If `rela_path` is a single empty string, it is equivalent to using the current instance itself. + pub fn cursor_at(&mut self, rela_path: I) -> Result, super::Error> + where + I: IntoIterator, + C: AsRef, + { + self.path_buf.clear(); + self.upsert_or_remove_at_pathbuf( + rela_path, + Some((EntryKind::Tree, self.object_hash.null(), UpsertMode::AssureTreeOnly)), + )?; + Ok(Cursor { + prefix: self.path_buf.clone(), /* set during the upsert call */ + parent: self, + }) + } + } + + impl<'a, 'find> Cursor<'a, 'find> { + /// Like [`Editor::upsert()`], but with the constraint of only editing in this cursor's tree. + pub fn upsert(&mut self, rela_path: I, kind: EntryKind, id: ObjectId) -> Result<&mut Self, super::Error> + where + I: IntoIterator, + C: AsRef, + { + self.parent.path_buf.clone_from(&self.prefix); + self.parent + .upsert_or_remove_at_pathbuf(rela_path, Some((kind, id, UpsertMode::Normal)))?; + Ok(self) + } + + /// Like [`Editor::remove()`], but with the constraint of only editing in this cursor's tree. + pub fn remove(&mut self, rela_path: I) -> Result<&mut Self, super::Error> + where + I: IntoIterator, + C: AsRef, + { + self.parent.path_buf.clone_from(&self.prefix); + self.parent.upsert_or_remove_at_pathbuf(rela_path, None)?; + Ok(self) + } + + /// Like [`Editor::write()`], but will write only the subtree of the cursor. + pub fn write(&mut self, out: impl FnMut(&Tree) -> Result) -> Result { + self.parent.path_buf.clone_from(&self.prefix); + self.parent.write_at_pathbuf(out, WriteMode::FromCursor) + } + } +} + +#[derive(Copy, Clone, Eq, PartialEq)] +enum UpsertMode { + Normal, + /// Only make sure there is a tree at the given location (requires kind tree and null-id) + AssureTreeOnly, +} + +enum WriteMode { + Normal, + /// Perform less cleanup to assure parent-editor still stays intact + FromCursor, +} + +fn cmp_entry_with_name(a: &tree::Entry, filename: &BStr, is_tree: bool) -> Ordering { + let common = a.filename.len().min(filename.len()); + a.filename[..common].cmp(&filename[..common]).then_with(|| { + let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/')); + let b = filename.get(common).or_else(|| is_tree.then_some(&b'/')); + a.cmp(&b) + }) +} + +fn filename(path: &BStr) -> &BStr { + path.rfind_byte(b'/').map_or(path, |pos| &path[pos + 1..]) +} + +fn empty_path() -> BString { + BString::default() +} + +fn path_hash(path: &[u8]) -> BString { + path.to_vec().into() +} + +fn push_path_component(base: &mut BString, component: &[u8]) -> usize { + let prev_len = base.len(); + debug_assert!(base.last() != Some(&b'/')); + if !base.is_empty() { + base.push_byte(b'/'); + } + base.push_str(component); + prev_len +} diff --git a/gix-object/src/tree/mod.rs b/gix-object/src/tree/mod.rs index 1014fcfde29..1d3c0838df0 100644 --- a/gix-object/src/tree/mod.rs +++ b/gix-object/src/tree/mod.rs @@ -1,14 +1,40 @@ -use std::cmp::Ordering; - use crate::{ bstr::{BStr, BString}, - tree, + tree, Tree, TreeRef, }; +use std::cmp::Ordering; + +/// +pub mod editor; mod ref_iter; /// pub mod write; +/// The state needed to apply edits instantly to in-memory trees. +/// +/// It's made so that each tree is looked at in the object database at most once, and held in memory for +/// all edits until everything is flushed to write all changed trees. +/// +/// The editor is optimized to edit existing trees, but can deal with building entirely new trees as well +/// with some penalties. +#[doc(alias = "TreeUpdateBuilder", alias = "git2")] +#[derive(Clone)] +pub struct Editor<'a> { + /// A way to lookup trees. + find: &'a dyn crate::FindExt, + /// The kind of hashes to produce + object_hash: gix_hash::Kind, + /// All trees we currently hold in memory. Each of these may change while adding and removing entries. + /// null-object-ids mark tree-entries whose value we don't know yet, they are placeholders that will be + /// dropped when writing at the latest. + trees: std::collections::HashMap, + /// A buffer to build up paths when finding the tree to edit. + path_buf: BString, + /// Our buffer for storing tree-data in, right before decoding it. + tree_buf: Vec, +} + /// The mode of items storable in a tree, similar to the file mode on a unix file system. /// /// Used in [`mutable::Entry`][crate::tree::Entry] and [`EntryRef`]. @@ -168,6 +194,21 @@ impl EntryMode { } } +impl TreeRef<'_> { + /// Convert this instance into its own version, creating a copy of all data. + /// + /// This will temporarily allocate an extra copy in memory, so at worst three copies of the tree exist + /// at some intermediate point in time. Use [`Self::into_owned()`] to avoid this. + pub fn to_owned(&self) -> Tree { + self.clone().into() + } + + /// Convert this instance into its own version, creating a copy of all data. + pub fn into_owned(self) -> Tree { + self.into() + } +} + /// An element of a [`TreeRef`][crate::TreeRef::entries]. #[derive(PartialEq, Eq, Debug, Hash, Clone, Copy)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -201,7 +242,7 @@ impl<'a> Ord for EntryRef<'a> { } } -/// An entry in a [`Tree`][crate::Tree], similar to an entry in a directory. +/// An entry in a [`Tree`], similar to an entry in a directory. #[derive(PartialEq, Eq, Debug, Hash, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Entry { diff --git a/gix-object/src/tree/write.rs b/gix-object/src/tree/write.rs index 9c43dcc99a4..831a65a236f 100644 --- a/gix-object/src/tree/write.rs +++ b/gix-object/src/tree/write.rs @@ -12,8 +12,8 @@ use crate::{ #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { - #[error("Newlines are invalid in file paths: {name:?}")] - NewlineInFilename { name: BString }, + #[error("Nullbytes are invalid in file paths as they are separators: {name:?}")] + NullbyteInFilename { name: BString }, } impl From for io::Error { @@ -27,12 +27,12 @@ impl crate::WriteTo for Tree { /// Serialize this tree to `out` in the git internal format. fn write_to(&self, out: &mut dyn io::Write) -> io::Result<()> { debug_assert_eq!( + &self.entries, &{ let mut entries_sorted = self.entries.clone(); entries_sorted.sort(); entries_sorted }, - &self.entries, "entries for serialization must be sorted by filename" ); let mut buf = Default::default(); @@ -40,8 +40,8 @@ impl crate::WriteTo for Tree { out.write_all(mode.as_bytes(&mut buf))?; out.write_all(SPACE)?; - if filename.find_byte(b'\n').is_some() { - return Err(Error::NewlineInFilename { + if filename.find_byte(0).is_some() { + return Err(Error::NullbyteInFilename { name: (*filename).to_owned(), } .into()); @@ -87,8 +87,8 @@ impl<'a> crate::WriteTo for TreeRef<'a> { out.write_all(mode.as_bytes(&mut buf))?; out.write_all(SPACE)?; - if filename.find_byte(b'\n').is_some() { - return Err(Error::NewlineInFilename { + if filename.find_byte(0).is_some() { + return Err(Error::NullbyteInFilename { name: (*filename).to_owned(), } .into()); diff --git a/gix-object/tests/encode/mod.rs b/gix-object/tests/encode/mod.rs index 503933c354d..00365cdd35e 100644 --- a/gix-object/tests/encode/mod.rs +++ b/gix-object/tests/encode/mod.rs @@ -88,6 +88,41 @@ mod commit { } mod tree { + use gix_object::tree::EntryKind; + use gix_object::{tree, WriteTo}; + + #[test] + fn write_to_does_not_validate() { + let mut tree = gix_object::Tree::empty(); + tree.entries.push(tree::Entry { + mode: EntryKind::Blob.into(), + filename: "".into(), + oid: gix_hash::Kind::Sha1.null(), + }); + tree.entries.push(tree::Entry { + mode: EntryKind::Tree.into(), + filename: "something\nwith\newlines\n".into(), + oid: gix_hash::ObjectId::empty_tree(gix_hash::Kind::Sha1), + }); + tree.write_to(&mut std::io::sink()) + .expect("write succeeds, no validation is performed"); + } + + #[test] + fn write_to_does_not_allow_separator() { + let mut tree = gix_object::Tree::empty(); + tree.entries.push(tree::Entry { + mode: EntryKind::Blob.into(), + filename: "hi\0ho".into(), + oid: gix_hash::Kind::Sha1.null(), + }); + let err = tree.write_to(&mut std::io::sink()).unwrap_err(); + assert_eq!( + err.to_string(), + "Nullbytes are invalid in file paths as they are separators: \"hi\\0ho\"" + ); + } + round_trip!(gix_object::Tree, gix_object::TreeRef, "tree/everything.tree"); } diff --git a/gix-object/tests/tree/editor.rs b/gix-object/tests/tree/editor.rs new file mode 100644 index 00000000000..f74b5c15602 --- /dev/null +++ b/gix-object/tests/tree/editor.rs @@ -0,0 +1,870 @@ +use gix_object::tree::EntryKind; +use gix_object::Tree; + +#[test] +fn from_empty_cursor() -> crate::Result { + let (storage, mut write, num_writes_and_clear) = new_inmemory_writes(); + let odb = StorageOdb::new(storage.clone()); + let mut edit = gix_object::tree::Editor::new(Tree::default(), &odb, gix_hash::Kind::Sha1); + + edit.upsert(Some("root-file"), EntryKind::Blob, any_blob())?.upsert( + ["nested", "from", "root"], + EntryKind::BlobExecutable, + any_blob(), + )?; + let cursor_path = ["some", "deeply", "nested", "path"]; + let mut cursor = edit.cursor_at(cursor_path)?; + let actual = cursor + .upsert(Some("file"), EntryKind::Blob, any_blob())? + .upsert(Some("empty-dir-via-cursor"), EntryKind::Tree, empty_tree())? + .upsert(["with-subdir", "dir", "file"], EntryKind::Blob, any_blob())? + .upsert(["with-subdir2", "dir", "file"], EntryKind::Blob, any_blob())? + .remove(Some("file"))? + .remove(["with-subdir", "dir", "file"])? + .remove(Some("with-subdir2"))? + .remove(Some("with-subdir2"))? + .write(&mut write)?; + assert_eq!( + display_tree(actual, &storage), + "e2339a3f62e2f3fc54a406739a62a4173ee3b5ac +└── empty-dir-via-cursor (empty) +", + "only one item is left in the tree, which also keeps it alive" + ); + assert_eq!(num_writes_and_clear(), 1, "root tree"); + + let actual = edit.write(&mut write)?; + assert_eq!( + display_tree(actual, &storage), + "76e0729de84047d19711d90cfcbb4e60bb432682 +├── nested +│ └── from +│ └── root bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100755 +├── root-file bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +└── some + └── deeply + └── nested + └── path + └── empty-dir-via-cursor (empty) +" + ); + + let mut cursor = edit.cursor_at(cursor_path)?; + let actual = cursor.remove(Some("empty-dir-via-cursor"))?.write(&mut write)?; + assert_eq!(actual, empty_tree(), "it keeps the empty tree like the editor would"); + + let actual = edit.write(&mut write)?; + assert_eq!( + display_tree(actual, &storage), + "6cc592046dcaac06d3c619b4892d9ac78738fb5d +├── nested +│ └── from +│ └── root bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100755 +└── root-file bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +", + "now the editor naturally prunes all empty trees thus far, removing the cursor root" + ); + + let mut cursor = edit.cursor_at(cursor_path)?; + let actual = cursor + .upsert(Some("root-file"), EntryKind::BlobExecutable, any_blob())? + .upsert(["nested", "from"], EntryKind::BlobExecutable, any_blob())? + .write(&mut write)?; + + assert_eq!( + display_tree(actual, &storage), + "4580ae6d4c22b407cee521d7575e69708ff980a1 +├── nested +│ └── from bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100755 +└── root-file bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100755 +", + "it is able to write the sub-tree, even though names from the top-level tree are re-used" + ); + + let actual = edit.write(&mut write)?; + assert_eq!( + display_tree(actual, &storage), + "8febb45a1c34e405d70a7ae059d57abdd8254063 +├── nested +│ └── from +│ └── root bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100755 +├── root-file bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +└── some + └── deeply + └── nested + └── path + ├── nested + │ └── from bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100755 + └── root-file bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100755 +", + "it places the subtree exactly where it's expected" + ); + Ok(()) +} + +#[test] +fn from_existing_cursor() -> crate::Result { + let (storage, mut write, num_writes_and_clear) = new_inmemory_writes(); + let odb = StorageOdb::new_with_odb(storage.clone(), tree_odb()?); + let root_tree_id = hex_to_id("ff7e7d2aecae1c3fb15054b289a4c58aa65b8646"); + let root_tree = find_tree(&odb, root_tree_id)?; + odb.access_count_and_clear(); + let mut edit = gix_object::tree::Editor::new(root_tree.clone(), &odb, gix_hash::Kind::Sha1); + + let mut cursor = edit.to_cursor(); + let actual = cursor + .remove(Some("bin"))? + .remove(Some("bin.d"))? + .remove(Some("file.to"))? + .remove(Some("file.toml"))? + .remove(Some("file.toml.bin"))? + .upsert(["some", "nested", "file"], EntryKind::Blob, any_blob())? + .write(&mut write)?; + assert_eq!( + num_writes_and_clear(), + 1 + 2, + "just the altered root tree, and two of trees towards `some/tested/file`" + ); + assert_eq!( + display_tree_with_odb(actual, &storage, &odb), + "10364deb76aeee372eb486c1216dca2a98dbd379 +├── file +│ └── a e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file0 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +└── some + └── nested + └── file bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +", + "a cursor at '' is equivalent to 'as_cursor()', or the editor itself" + ); + let mut cursor = edit.cursor_at(["some", "nested"])?; + let actual = cursor + .upsert(Some("hello-from-cursor"), EntryKind::Blob, any_blob())? + .remove(Some("file"))? + .write(&mut write)?; + assert_eq!( + display_tree_with_odb(actual, &storage, &odb), + "0f090b7c09c94f7895d0d8ce63c1da7693c026b3 +└── hello-from-cursor bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +" + ); + + let mut cursor = edit.set_root(root_tree).to_cursor(); + let actual = cursor + .remove(Some("bin"))? + .remove(Some("bin.d"))? + .remove(Some("file.to"))? + .remove(Some("file.toml"))? + .remove(Some("file.toml.bin"))? + .upsert(["some", "nested", "file"], EntryKind::Blob, any_blob())? + .write(&mut write)?; + assert_eq!( + display_tree_with_odb(actual, &storage, &odb), + "10364deb76aeee372eb486c1216dca2a98dbd379 +├── file +│ └── a e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file0 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +└── some + └── nested + └── file bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +", + "this cursor is the same as the editor" + ); + let actual = cursor.remove(["some", "nested", "file"])?.write(&mut write)?; + assert_eq!( + display_tree_with_odb(actual, &storage, &odb), + "6e2806ab1e4d4ae2c9d24ce113a9bb54f8eff97b +├── file +│ └── a e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +└── file0 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +", + "it's possible to delete a deeply nested file" + ); + Ok(()) +} + +#[test] +fn from_empty_removal() -> crate::Result { + let (storage, mut write, num_writes_and_clear) = new_inmemory_writes(); + let odb = StorageOdb::new(storage.clone()); + let mut edit = gix_object::tree::Editor::new(Tree::default(), &odb, gix_hash::Kind::Sha1); + + let actual = edit + .remove(Some("non-existing"))? + .remove(["still", "does", "not", "exist"])? + .write(&mut write)?; + assert_eq!(actual, empty_tree(), "nothing was actually done"); + assert_eq!(num_writes_and_clear(), 1, "it has to write the empty tree though"); + assert_eq!(storage.borrow().len(), 1, "the empty tree ends up in storage, too"); + assert_eq!(odb.access_count_and_clear(), 0); + + let actual = edit + .upsert(Some("file"), EntryKind::Blob, any_blob())? + .upsert(Some("empty-dir"), EntryKind::Tree, empty_tree())? + .upsert(["with-subdir", "dir", "file"], EntryKind::Blob, any_blob())? + .upsert(["with-subdir2", "dir", "file"], EntryKind::Blob, any_blob())? + .remove(Some("file"))? + .remove(Some("empty-dir"))? + .remove(Some("with-subdir"))? + .remove(["with-subdir2", "dir"])? + .remove(Some("with-subdir2"))? + .write(&mut write)?; + assert_eq!(actual, empty_tree(), "nothing was actually done"); + assert_eq!(num_writes_and_clear(), 1, "still nothing to write but the empty tree"); + assert_eq!(odb.access_count_and_clear(), 0); + + let actual = edit + .upsert(Some("file"), EntryKind::Blob, any_blob())? + .upsert(Some("empty-dir"), EntryKind::Tree, empty_tree())? + .upsert(["with-subdir", "dir", "file"], EntryKind::Blob, any_blob())? + .upsert(["with-subdir2", "dir", "file"], EntryKind::Blob, any_blob())? + .write(&mut write)?; + assert_eq!( + display_tree(actual, &storage), + "9e608223b4cbc733abd20fb6d5b8ea80b074be17 +├── empty-dir (empty) +├── file bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +├── with-subdir +│ └── dir +│ └── file bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +└── with-subdir2 + └── dir + └── file bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +" + ); + assert_eq!(num_writes_and_clear(), 5); + assert_eq!(odb.access_count_and_clear(), 0); + + let actual = edit + .remove(Some("file"))? + .remove(Some("empty-dir"))? + .remove(Some("with-subdir"))? + .remove(["with-subdir2", "dir"])? + .remove(Some("with-subdir2"))? + .write(&mut write)?; + assert_eq!(actual, empty_tree(), "everything was removed, leaving nothing"); + assert_eq!(num_writes_and_clear(), 1, "only the empty tree to write"); + assert_eq!( + odb.access_count_and_clear(), + 1, + "has to get `with-subdir2` to remove child-entry" + ); + + let actual = edit + .upsert(["with-subdir", "file"], EntryKind::Blob, any_blob())? + .upsert(["with-subdir", "dir", "file"], EntryKind::Blob, any_blob())? + .remove(["with-subdir", "dir"])? + .write(&mut write)?; + assert_eq!( + display_tree(actual, &storage), + "da2277079f7e9e5a012c9a03d1aac710866ee2c5 +└── with-subdir + └── file bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +", + "only one file remains, empty dirs are removed automatically" + ); + assert_eq!(num_writes_and_clear(), 1 + 1, "root and one subtree"); + assert_eq!(storage.borrow().len(), 1 + 4, "empty tree and 4 unique trees"); + assert_eq!(odb.access_count_and_clear(), 0); + + Ok(()) +} + +#[test] +fn from_existing_remove() -> crate::Result { + let (storage, mut write, num_writes_and_clear) = new_inmemory_writes(); + let odb = StorageOdb::new_with_odb(storage.clone(), tree_odb()?); + let root_tree_id = hex_to_id("ff7e7d2aecae1c3fb15054b289a4c58aa65b8646"); + let root_tree = find_tree(&odb, root_tree_id)?; + odb.access_count_and_clear(); + let mut edit = gix_object::tree::Editor::new(root_tree.clone(), &odb, gix_hash::Kind::Sha1); + + let actual = edit + .remove(["file"])? + .remove(Some("does not exist"))? + .remove(["also", "does", "not", "exist"])? + .remove(Some("bin.d"))? + .remove(Some("file.toml.bin"))? + .remove(Some("file.0"))? + .write(&mut write)?; + assert_eq!( + display_tree_with_odb(actual, &storage, &odb), + "dfd0d048f8e852879ad8e1a6a9b810873de16a9c +├── bin e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file.to e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file.toml e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +└── file0 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +" + ); + assert_eq!(num_writes_and_clear(), 1, "only the root tree is written"); + assert_eq!( + odb.access_count_and_clear(), + 0, + "no sub-tree has to be queried for removal" + ); + + let actual = edit + .remove(Some("bin"))? + .remove(Some("file.to"))? + .remove(Some("file.toml"))? + .remove(Some("file0"))? + .write(&mut write)?; + assert_eq!(actual, empty_tree(), "nothing is left"); + assert_eq!(num_writes_and_clear(), 1, "only the empty tree is written"); + assert_eq!(odb.access_count_and_clear(), 0); + + let actual = edit.set_root(root_tree).remove(["file", "a"])?.write(&mut write)?; + assert_eq!(num_writes_and_clear(), 1, "it writes the changed root-tree"); + assert_eq!( + odb.access_count_and_clear(), + 1, + "lookup `file` to delete its (only) content" + ); + assert_eq!( + display_tree_with_odb(actual, &storage, &odb), + "38a66b78d1d5dd8daedf6188d2fafd98357a870c +├── bin e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── bin.d e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file.to e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file.toml e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file.toml.bin e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +└── file0 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +", + "`file` is removed as it remains empty" + ); + + Ok(()) +} + +#[test] +fn from_empty_invalid_write() -> crate::Result { + let (storage, mut write, _num_writes_and_clear) = new_inmemory_writes(); + let odb = StorageOdb::new(storage.clone()); + let mut edit = gix_object::tree::Editor::new(Tree::default(), &odb, gix_hash::Kind::Sha1); + + let actual = edit + .upsert(["a", "\n"], EntryKind::Blob, any_blob())? + .write(&mut write) + .expect("no validation is performed"); + assert_eq!( + display_tree(actual, &storage), + "d23290ea39c284156731188dce62c17ac6b71bda +└── a + └── \n bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +" + ); + + let err = edit + .upsert(Some("with\0null"), EntryKind::Blob, any_blob())? + .write(&mut write) + .unwrap_err(); + assert_eq!( + err.to_string(), + "Nullbytes are invalid in file paths as they are separators: \"with\\0null\"" + ); + + let err = edit.upsert(Some(""), EntryKind::Blob, any_blob()).unwrap_err(); + let expected_msg = "Empty path components are not allowed"; + assert_eq!(err.to_string(), expected_msg); + let err = edit + .upsert(["fine", "", "previous is not fine"], EntryKind::Blob, any_blob()) + .unwrap_err(); + assert_eq!(err.to_string(), expected_msg); + + let actual = edit + .remove(Some("a"))? + .remove(Some("with\0null"))? + .upsert(Some("works"), EntryKind::Blob, any_blob())? + .write(&mut write)?; + assert_eq!( + display_tree(actual, &storage), + "d5b913c39b06507c7c64adb16c268ce1102ef5c1 +└── works bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +", + "after removing invalid entries, it can write again" + ); + Ok(()) +} + +#[test] +fn from_empty_add() -> crate::Result { + let (storage, mut write, num_writes_and_clear) = new_inmemory_writes(); + let odb = StorageOdb::new(storage.clone()); + let mut edit = gix_object::tree::Editor::new(Tree::default(), &odb, gix_hash::Kind::Sha1); + + let actual = edit.write(&mut write).expect("no changes are fine"); + assert_eq!(actual, empty_tree(), "empty stays empty"); + assert_eq!(num_writes_and_clear(), 1, "the empty tree was written"); + assert_eq!( + display_tree(actual, &storage), + "4b825dc642cb6eb9a060e54bf8d69288fbee4904\n" + ); + assert_eq!(odb.access_count_and_clear(), 0); + + let actual = edit + .upsert(Some("hi"), EntryKind::Blob, gix_hash::Kind::Sha1.null())? + .write(&mut write) + .expect("effectively no changes are fine"); + assert_eq!( + actual, + empty_tree(), + "null-ids are dropped automatically, they act as placeholders" + ); + assert_eq!(num_writes_and_clear(), 1, "the empty tree was written, nothing new"); + assert_eq!(odb.access_count_and_clear(), 0); + + let actual = edit + .upsert(["a", "b", "c"], EntryKind::Blob, gix_hash::Kind::Sha1.null())? + .upsert(["a", "b", "d", "e"], EntryKind::Blob, gix_hash::Kind::Sha1.null())? + .write(&mut write) + .expect("effectively no changes are fine"); + assert_eq!( + actual, + empty_tree(), + "null-ids are dropped automatically, recursively, they act as placeholders" + ); + assert_eq!( + num_writes_and_clear(), + 1, + "the empty tree was written as root, nothing new" + ); + assert_eq!(storage.borrow().len(), 1, "still nothing but empty trees"); + assert_eq!(odb.access_count_and_clear(), 0); + + let actual = edit + .upsert(["a", "b"], EntryKind::Tree, empty_tree())? + .upsert(["a", "b", "c"], EntryKind::Tree, empty_tree())? + .upsert(["a", "b", "d", "e"], EntryKind::Tree, empty_tree())? + .write(&mut write) + .expect("it's OK to write empty trees"); + assert_eq!( + display_tree(actual, &storage), + "bf91a94ae659ac8a9da70d26acf42df1a36adb6e +└── a + └── b + ├── c (empty) + └── d + └── e (empty) +", + "one can write through trees, and empty trees are also fine" + ); + assert_eq!(num_writes_and_clear(), 4, "it wrote the trees it needed to write"); + assert_eq!(odb.access_count_and_clear(), 0); + + let actual = edit + .upsert(["a"], EntryKind::Blob, any_blob())? + .upsert(["a", "b"], EntryKind::Blob, any_blob())? + .upsert(["a", "b", "c"], EntryKind::BlobExecutable, any_blob())? + .upsert(["x", "z"], EntryKind::Blob, any_blob())? + .write(&mut write) + .expect("writing made-up blobs is fine"); + assert_eq!( + display_tree(actual, &storage), + "7534352b2f718388b6c6d0f70aaf12399f38258c +├── a +│ └── b +│ └── c bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100755 +└── x + └── z bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +", + "it's possible to write through previously added blobs" + ); + assert_eq!(num_writes_and_clear(), 4); + assert_eq!(odb.access_count_and_clear(), 0); + + let actual = edit.upsert(["x"], EntryKind::Blob, any_blob())?.write(&mut write)?; + assert_eq!( + display_tree(actual, &storage), + "c2bb1616d4db21d99a30a1219d7d47e969f42e26 +├── a +│ └── b +│ └── c bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100755 +└── x bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +" + ); + assert_eq!(num_writes_and_clear(), 1, "just the root tree changed"); + assert_eq!(odb.access_count_and_clear(), 0); + + let prev_tree = actual; + let actual = edit + .upsert(["a", "b", "c"], EntryKind::BlobExecutable, any_blob())? + .write(&mut write)?; + assert_eq!(actual, prev_tree, "writing the same path again is a no-op"); + assert_eq!( + num_writes_and_clear(), + 3, + "it still rewrites all paths that (potentially) changed. \ + There is no actual change tracking as no-changes aren't the default case for an editor" + ); + assert_eq!(odb.access_count_and_clear(), 2, "`a` and `a/b`"); + + let actual = edit + .upsert(["a", "b", "c"], EntryKind::Blob, any_blob())? + .upsert(["a"], EntryKind::Blob, any_blob())? + .write(&mut write) + .expect("we can turn overwrite a newly added tree (at 'a/') with a blob"); + assert_eq!( + display_tree(actual, &storage), + "7c66d7d5cbfdbbb37085ff5c8c6e5b048727cf88 +├── a bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +└── x bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +", + "now a tree was once again changed into a blob" + ); + assert_eq!(num_writes_and_clear(), 1, "only the root-tree changes effectively"); + assert_eq!(odb.access_count_and_clear(), 2, "`a` and `a/b`"); + + let actual = edit + .set_root(Tree::default()) + .upsert(["a", "b", "c"], EntryKind::Blob, any_blob())? + .upsert(["a"], EntryKind::BlobExecutable, any_blob())? + .write(&mut write)?; + assert_eq!( + display_tree(actual, &storage), + "f7b85940c3afa596829cacf98e98ff8bfd7c68ed +└── a bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100755 +", + "now the root is back to a well-known state, so edits are more intuitive" + ); + assert_eq!( + num_writes_and_clear(), + 1, + "still, only the root-tree changes effectively" + ); + assert_eq!(odb.access_count_and_clear(), 0); + Ok(()) +} + +#[test] +fn from_existing_add() -> crate::Result { + let (storage, mut write, num_writes_and_clear) = new_inmemory_writes(); + let odb = StorageOdb::new_with_odb(storage.clone(), tree_odb()?); + let root_tree_id = hex_to_id("ff7e7d2aecae1c3fb15054b289a4c58aa65b8646"); + let root_tree = find_tree(&odb, root_tree_id)?; + odb.access_count_and_clear(); + let mut edit = gix_object::tree::Editor::new(root_tree.clone(), &odb, gix_hash::Kind::Sha1); + + let actual = edit.write(&mut write).expect("no changes are fine"); + assert_eq!(actual, root_tree_id, "it rewrites the same tree"); + assert_eq!(odb.access_count_and_clear(), 0); + assert_eq!( + display_tree_with_odb(actual, &storage, &odb), + "ff7e7d2aecae1c3fb15054b289a4c58aa65b8646 +├── bin e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── bin.d e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file.to e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file.toml e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file.toml.bin e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file +│ └── a e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +└── file0 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +" + ); + assert_eq!( + num_writes_and_clear(), + 1, + "only the root is written - there is no change tracking" + ); + + let actual = edit + .upsert(["file", "hi"], EntryKind::Blob, gix_hash::Kind::Sha1.null())? + .write(&mut write) + .expect("effectively no changes are fine"); + assert_eq!( + actual, root_tree_id, + "null-ids are dropped automatically, they act as placeholders, ultimately the tree is not changed" + ); + assert_eq!( + storage.borrow().len(), + 2, + "it writes two trees, even though none is new" + ); + assert_eq!(num_writes_and_clear(), 2, "the write-count reflects that"); + + odb.access_count_and_clear(); + let actual = edit + .upsert(["a", "b", "c"], EntryKind::Blob, gix_hash::Kind::Sha1.null())? + .upsert(["a", "b", "d", "e"], EntryKind::Blob, gix_hash::Kind::Sha1.null())? + .write(&mut write) + .expect("effectively no changes are fine"); + assert_eq!( + actual, root_tree_id, + "null-ids are dropped automatically, recursively, and empty intermediate trees are removed as well" + ); + assert_eq!(storage.borrow().len(), 2, "still the same amount of trees"); + assert_eq!( + num_writes_and_clear(), + 1, + "but only the root-tree is written (with nulls pruned)" + ); + assert_eq!(odb.access_count_and_clear(), 0); + + odb.access_count_and_clear(); + let actual = edit + .upsert(["bin", "b"], EntryKind::Tree, empty_tree())? + .upsert(["bin", "b", "c"], EntryKind::Tree, empty_tree())? + .upsert(["a", "b", "d", "e"], EntryKind::Tree, empty_tree())? + .write(&mut write) + .expect("it's OK to write empty leaf-trees"); + assert_eq!( + odb.access_count_and_clear(), + 0, + "we write through blobs, and thus create trees on the fly" + ); + assert_eq!( + display_tree_with_odb(actual, &storage, &odb), + "a57b69b5c216b8417d332e64a4f3014eb9962099 +├── a +│ └── b +│ └── d +│ └── e (empty) +├── bin.d e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── bin +│ └── b +│ └── c (empty) +├── file.to e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file.toml e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file.toml.bin e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file +│ └── a e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +└── file0 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +", + "one can write through trees and blobs, and empty leaf trees are also fine" + ); + assert_eq!( + num_writes_and_clear(), + 1 + 2 + 3, + "each changed tree is written: root, the two subtrees" + ); + + odb.access_count_and_clear(); + let actual = edit + .upsert(["a", "b", "c"], EntryKind::Blob, any_blob())? + .upsert(["a", "b"], EntryKind::Blob, any_blob())? + .upsert(["file"], EntryKind::BlobExecutable, any_blob())? + .write(&mut write)?; + assert_eq!(odb.access_count_and_clear(), 2, "`a` and `a/b`"); + assert_eq!( + display_tree_with_odb(actual, &storage, &odb), + "25a0bd46bc61342e17f03fafcc029e9db52d4c64 +├── a +│ └── b bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644 +├── bin.d e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── bin +│ └── b +│ └── c (empty) +├── file bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100755 +├── file.to e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file.toml e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file.toml.bin e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +└── file0 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +", + "it properly sorts entries after type-changes" + ); + assert_eq!(num_writes_and_clear(), 1 + 1, "the root and one subtree"); + + odb.access_count_and_clear(); + let actual = edit + .set_root(root_tree) + .upsert(["file", "subdir", "exe"], EntryKind::BlobExecutable, any_blob())? + .write(&mut write)?; + assert_eq!( + odb.access_count_and_clear(), + 1, + "`file` only, everything else is inserted" + ); + assert_eq!( + display_tree_with_odb(actual, &storage, &odb), + "6a5115bfb88b8303b837854199b90232621f8535 +├── bin e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── bin.d e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file.to e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file.toml e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file.toml.bin e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +├── file +│ ├── a e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +│ └── subdir +│ └── exe bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100755 +└── file0 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391.100644 +", + "now the root is back to a well-known state" + ); + assert_eq!(num_writes_and_clear(), 1 + 2, "the root and one subtree with directory"); + Ok(()) +} + +mod utils { + use crate::hex_to_id; + use bstr::{BStr, ByteSlice}; + use gix_hash::ObjectId; + use gix_object::{Tree, WriteTo}; + use std::cell::{Cell, RefCell}; + use std::rc::Rc; + + type TreeStore = Rc>>; + pub(super) struct StorageOdb(TreeStore, Option, Cell); + + pub(super) fn tree_odb() -> gix_testtools::Result { + let root = gix_testtools::scripted_fixture_read_only("make_trees.sh")?; + Ok(gix_odb::at(root.join(".git/objects"))?) + } + + pub(super) fn find_tree(odb: &impl gix_object::FindExt, id: ObjectId) -> gix_testtools::Result { + let mut buf = Vec::new(); + Ok(odb.find_tree(&id, &mut buf)?.into()) + } + + pub(super) fn new_inmemory_writes() -> ( + TreeStore, + impl FnMut(&Tree) -> Result, + impl Fn() -> usize, + ) { + let store = TreeStore::default(); + let num_writes = Rc::new(Cell::new(0_usize)); + let write_tree = { + let store = store.clone(); + let num_writes = num_writes.clone(); + let mut buf = Vec::with_capacity(512); + move |tree: &Tree| { + buf.clear(); + tree.write_to(&mut buf)?; + let header = gix_object::encode::loose_header(gix_object::Kind::Tree, buf.len() as u64); + let mut hasher = gix_features::hash::hasher(gix_hash::Kind::Sha1); + hasher.update(&header); + hasher.update(&buf); + let id = hasher.digest().into(); + store.borrow_mut().insert(id, tree.clone()); + let old = num_writes.get(); + num_writes.set(old + 1); + Ok(id) + } + }; + let obtain_num_writes = { + let c = num_writes.clone(); + move || { + let res = c.get(); + c.set(0); + res + } + }; + (store, write_tree, obtain_num_writes) + } + + impl StorageOdb { + pub fn new(storage: TreeStore) -> Self { + Self(storage, None, Cell::new(0)) + } + pub fn new_with_odb(storage: TreeStore, odb: gix_odb::Handle) -> Self { + Self(storage, Some(odb), Cell::new(0)) + } + pub fn access_count_and_clear(&self) -> usize { + let res = self.2.get(); + self.2.set(0); + res + } + } + + impl gix_object::Find for StorageOdb { + fn try_find<'a>( + &self, + id: &gix_hash::oid, + buffer: &'a mut Vec, + ) -> Result>, gix_object::find::Error> { + let borrow = self.0.borrow(); + let old = self.2.get(); + self.2.set(old + 1); + match borrow.get(id) { + None => self.1.as_ref().map_or(Ok(None), |odb| odb.try_find(id, buffer)), + Some(tree) => { + buffer.clear(); + tree.write_to(buffer).expect("valid trees can always be serialized"); + Ok(Some(gix_object::Data { + kind: gix_object::Kind::Tree, + data: &*buffer, + })) + } + } + } + } + + fn display_tree_recursive( + tree_id: ObjectId, + storage: &TreeStore, + odb: Option<&dyn gix_object::FindExt>, + name: Option<&BStr>, + buf: &mut Vec, + ) -> termtree::Tree { + let mut tree_storage = None; + let borrow = storage.borrow(); + let tree = borrow + .get(&tree_id) + .or_else(|| { + if tree_id.is_empty_tree() { + tree_storage = Some(Tree::default()); + tree_storage.as_ref() + } else { + odb.and_then(|odb| { + tree_storage = odb.find_tree(&tree_id, buf).map(Into::into).ok(); + tree_storage.as_ref() + }) + } + }) + .unwrap_or_else(|| panic!("tree {tree_id} is always present")); + + let mut termtree = termtree::Tree::new(if let Some(name) = name { + if tree.entries.is_empty() { + format!("{name} (empty)") + } else { + name.to_string() + } + } else { + tree_id.to_string() + }); + + for entry in &tree.entries { + if entry.mode.is_tree() { + termtree.push(display_tree_recursive( + entry.oid, + storage, + odb, + Some(entry.filename.as_bstr()), + buf, + )); + } else { + termtree.push(format!( + "{} {}.{}", + entry.filename, + entry.oid, + entry.mode.kind().as_octal_str() + )); + } + } + termtree + } + + pub(super) fn display_tree(tree_id: ObjectId, storage: &TreeStore) -> String { + let mut buf = Vec::new(); + display_tree_recursive(tree_id, storage, None, None, &mut buf).to_string() + } + + pub(super) fn display_tree_with_odb( + tree_id: ObjectId, + storage: &TreeStore, + odb: &impl gix_object::FindExt, + ) -> String { + let mut buf = Vec::new(); + display_tree_recursive(tree_id, storage, Some(odb), None, &mut buf).to_string() + } + + pub(super) fn empty_tree() -> ObjectId { + ObjectId::empty_tree(gix_hash::Kind::Sha1) + } + + pub(super) fn any_blob() -> ObjectId { + hex_to_id("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb") + } +} +use crate::hex_to_id; +use utils::{ + any_blob, display_tree, display_tree_with_odb, empty_tree, find_tree, new_inmemory_writes, tree_odb, StorageOdb, +}; diff --git a/gix-object/tests/tree/entries.rs b/gix-object/tests/tree/entries.rs new file mode 100644 index 00000000000..f60e342da9d --- /dev/null +++ b/gix-object/tests/tree/entries.rs @@ -0,0 +1,42 @@ +use gix_object::{Tree, TreeRef}; + +#[test] +fn sort_order_is_correct() -> crate::Result { + let root = gix_testtools::scripted_fixture_read_only("make_trees.sh")?; + let input = std::fs::read(root.join("tree.baseline"))?; + + let mut tree = TreeRef::from_bytes(&input)?; + let expected = tree.entries.clone(); + + tree.entries.sort(); + assert_eq!(tree.entries, expected); + let mut failures_when_searching_by_name = 0; + for entry in expected { + assert!( + tree.entries.binary_search_by(|e| e.cmp(&entry)).is_ok(), + "ordering works with binary search" + ); + failures_when_searching_by_name += usize::from( + tree.entries + .binary_search_by(|e| e.filename.cmp(entry.filename)) + .is_err(), + ); + assert_eq!( + tree.bisect_entry(entry.filename, entry.mode.is_tree()) + .expect("entry is present"), + entry + ); + } + + assert_ne!( + failures_when_searching_by_name, 0, + "it's not possible to do a binary search by name alone" + ); + + let mut tree: Tree = tree.into(); + let expected = tree.entries.clone(); + tree.entries.sort(); + + assert_eq!(tree.entries, expected); + Ok(()) +} diff --git a/gix-object/tests/tree/entry_mode.rs b/gix-object/tests/tree/entry_mode.rs new file mode 100644 index 00000000000..0fe4c17fc59 --- /dev/null +++ b/gix-object/tests/tree/entry_mode.rs @@ -0,0 +1,71 @@ +use gix_object::tree::{EntryKind, EntryMode}; + +#[test] +fn size_in_bytes() { + assert_eq!( + std::mem::size_of::(), + 2, + "it should not change without notice" + ); +} + +#[test] +fn is_methods() { + fn mode(kind: EntryKind) -> EntryMode { + kind.into() + } + + assert!(mode(EntryKind::Blob).is_blob()); + assert!(EntryMode(0o100645).is_blob()); + assert_eq!(EntryMode(0o100645).kind(), EntryKind::Blob); + assert!(!EntryMode(0o100675).is_executable()); + assert!(EntryMode(0o100700).is_executable()); + assert_eq!(EntryMode(0o100700).kind(), EntryKind::BlobExecutable); + assert!(!mode(EntryKind::Blob).is_link()); + assert!(mode(EntryKind::BlobExecutable).is_blob()); + assert!(mode(EntryKind::BlobExecutable).is_executable()); + assert!(mode(EntryKind::Blob).is_blob_or_symlink()); + assert!(mode(EntryKind::BlobExecutable).is_blob_or_symlink()); + + assert!(!mode(EntryKind::Link).is_blob()); + assert!(mode(EntryKind::Link).is_link()); + assert!(EntryMode(0o121234).is_link()); + assert_eq!(EntryMode(0o121234).kind(), EntryKind::Link); + assert!(mode(EntryKind::Link).is_blob_or_symlink()); + assert!(mode(EntryKind::Tree).is_tree()); + assert!(EntryMode(0o040101).is_tree()); + assert_eq!(EntryMode(0o040101).kind(), EntryKind::Tree); + assert!(mode(EntryKind::Commit).is_commit()); + assert!(EntryMode(0o167124).is_commit()); + assert_eq!(EntryMode(0o167124).kind(), EntryKind::Commit); + assert_eq!( + EntryMode(0o000000).kind(), + EntryKind::Commit, + "commit is really 'anything else' as `kind()` can't fail" + ); +} + +#[test] +fn as_bytes() { + let mut buf = Default::default(); + for (mode, expected) in [ + (EntryMode::from(EntryKind::Tree), EntryKind::Tree.as_octal_str()), + (EntryKind::Blob.into(), EntryKind::Blob.as_octal_str()), + ( + EntryKind::BlobExecutable.into(), + EntryKind::BlobExecutable.as_octal_str(), + ), + (EntryKind::Link.into(), EntryKind::Link.as_octal_str()), + (EntryKind::Commit.into(), EntryKind::Commit.as_octal_str()), + ( + EntryMode::try_from(b"100744 ".as_ref()).expect("valid"), + "100744".into(), + ), + ( + EntryMode::try_from(b"100644 ".as_ref()).expect("valid"), + "100644".into(), + ), + ] { + assert_eq!(mode.as_bytes(&mut buf), expected); + } +} diff --git a/gix-object/tests/tree/from_bytes.rs b/gix-object/tests/tree/from_bytes.rs new file mode 100644 index 00000000000..97b68d8daa0 --- /dev/null +++ b/gix-object/tests/tree/from_bytes.rs @@ -0,0 +1,106 @@ +use gix_object::{bstr::ByteSlice, tree, tree::EntryRef, Tree, TreeRef, TreeRefIter, WriteTo}; + +use crate::{fixture_name, hex_to_id}; + +#[test] +fn empty() -> crate::Result { + let tree_ref = TreeRef::from_bytes(&[])?; + assert_eq!( + tree_ref, + TreeRef { entries: vec![] }, + "empty trees are valid despite usually rare in the wild" + ); + + let mut buf = Vec::new(); + tree_ref.write_to(&mut buf)?; + assert!(buf.is_empty()); + + buf.clear(); + Tree::from(tree_ref).write_to(&mut buf)?; + assert!(buf.is_empty()); + Ok(()) +} + +#[test] +fn everything() -> crate::Result { + let fixture = fixture_name("tree", "everything.tree"); + let tree_ref = TreeRef::from_bytes(&fixture)?; + assert_eq!( + tree_ref, + TreeRef { + entries: vec![ + EntryRef { + mode: tree::EntryKind::BlobExecutable.into(), + filename: b"exe".as_bstr(), + oid: &hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") + }, + EntryRef { + mode: tree::EntryKind::Blob.into(), + filename: b"file".as_bstr(), + oid: &hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") + }, + EntryRef { + mode: tree::EntryKind::Commit.into(), + filename: b"grit-submodule".as_bstr(), + oid: &hex_to_id("b2d1b5d684bdfda5f922b466cc13d4ce2d635cf8") + }, + EntryRef { + mode: tree::EntryKind::Tree.into(), + filename: b"subdir".as_bstr(), + oid: &hex_to_id("4d5fcadc293a348e88f777dc0920f11e7d71441c") + }, + EntryRef { + mode: tree::EntryKind::Link.into(), + filename: b"symlink".as_bstr(), + oid: &hex_to_id("1a010b1c0f081b2e8901d55307a15c29ff30af0e") + } + ] + } + ); + Ok(()) +} + +#[test] +fn invalid() { + let fixture = fixture_name("tree", "definitely-special.tree"); + let partial_tree = &fixture[..fixture.len() / 2]; + let err = TreeRef::from_bytes(partial_tree).unwrap_err().to_string(); + if cfg!(feature = "verbose-object-parsing-errors") { + assert!(err.starts_with("object parsing failed at `100644"), "{err}"); + } else { + assert_eq!(err, "object parsing failed"); + } + assert_eq!( + TreeRefIter::from_bytes(partial_tree).take_while(Result::is_ok).count(), + 9, + "we can decode about half of it before failing" + ); +} + +#[test] +fn fuzzed() { + assert!(gix_object::TreeRef::from_bytes(b"2").is_err(), "fail, but don't crash"); +} + +#[test] +fn special_trees() -> crate::Result { + for (name, expected_entry_count) in [ + ("maybe-special", 160), + ("definitely-special", 19), + ("special-1", 5), + ("special-2", 18), + ("special-3", 5), + ("special-4", 18), + ("special-5", 17), + ] { + let fixture = fixture_name("tree", &format!("{name}.tree")); + let actual = TreeRef::from_bytes(&fixture)?; + assert_eq!(actual.entries.len(), expected_entry_count, "{name}"); + assert_eq!( + TreeRefIter::from_bytes(&fixture).map(Result::unwrap).count(), + expected_entry_count, + "{name}" + ); + } + Ok(()) +} diff --git a/gix-object/tests/tree/iter.rs b/gix-object/tests/tree/iter.rs new file mode 100644 index 00000000000..6ba637b2546 --- /dev/null +++ b/gix-object/tests/tree/iter.rs @@ -0,0 +1,54 @@ +use gix_object::{bstr::ByteSlice, tree, tree::EntryRef, TreeRefIter}; + +use crate::{fixture_name, hex_to_id}; + +#[test] +fn empty() { + assert_eq!(TreeRefIter::from_bytes(&[]).count(), 0, "empty trees are definitely ok"); +} + +#[test] +fn error_handling() { + let data = fixture_name("tree", "everything.tree"); + let iter = TreeRefIter::from_bytes(&data[..data.len() / 2]); + let entries = iter.collect::>(); + assert!( + entries.last().expect("at least one token").is_err(), + "errors are propagated and none is returned from that point on" + ); +} + +#[test] +fn everything() -> crate::Result { + assert_eq!( + TreeRefIter::from_bytes(&fixture_name("tree", "everything.tree")).collect::, _>>()?, + vec![ + EntryRef { + mode: tree::EntryKind::BlobExecutable.into(), + filename: b"exe".as_bstr(), + oid: &hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") + }, + EntryRef { + mode: tree::EntryKind::Blob.into(), + filename: b"file".as_bstr(), + oid: &hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") + }, + EntryRef { + mode: tree::EntryKind::Commit.into(), + filename: b"grit-submodule".as_bstr(), + oid: &hex_to_id("b2d1b5d684bdfda5f922b466cc13d4ce2d635cf8") + }, + EntryRef { + mode: tree::EntryKind::Tree.into(), + filename: b"subdir".as_bstr(), + oid: &hex_to_id("4d5fcadc293a348e88f777dc0920f11e7d71441c") + }, + EntryRef { + mode: tree::EntryKind::Link.into(), + filename: b"symlink".as_bstr(), + oid: &hex_to_id("1a010b1c0f081b2e8901d55307a15c29ff30af0e") + } + ] + ); + Ok(()) +} diff --git a/gix-object/tests/tree/mod.rs b/gix-object/tests/tree/mod.rs index bb1055c776a..1265f21b5d0 100644 --- a/gix-object/tests/tree/mod.rs +++ b/gix-object/tests/tree/mod.rs @@ -1,276 +1,5 @@ -mod iter { - use gix_object::{bstr::ByteSlice, tree, tree::EntryRef, TreeRefIter}; - - use crate::{fixture_name, hex_to_id}; - - #[test] - fn empty() { - assert_eq!(TreeRefIter::from_bytes(&[]).count(), 0, "empty trees are definitely ok"); - } - - #[test] - fn error_handling() { - let data = fixture_name("tree", "everything.tree"); - let iter = TreeRefIter::from_bytes(&data[..data.len() / 2]); - let entries = iter.collect::>(); - assert!( - entries.last().expect("at least one token").is_err(), - "errors are propagated and none is returned from that point on" - ); - } - - #[test] - fn everything() -> crate::Result { - assert_eq!( - TreeRefIter::from_bytes(&fixture_name("tree", "everything.tree")).collect::, _>>()?, - vec![ - EntryRef { - mode: tree::EntryKind::BlobExecutable.into(), - filename: b"exe".as_bstr(), - oid: &hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") - }, - EntryRef { - mode: tree::EntryKind::Blob.into(), - filename: b"file".as_bstr(), - oid: &hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") - }, - EntryRef { - mode: tree::EntryKind::Commit.into(), - filename: b"grit-submodule".as_bstr(), - oid: &hex_to_id("b2d1b5d684bdfda5f922b466cc13d4ce2d635cf8") - }, - EntryRef { - mode: tree::EntryKind::Tree.into(), - filename: b"subdir".as_bstr(), - oid: &hex_to_id("4d5fcadc293a348e88f777dc0920f11e7d71441c") - }, - EntryRef { - mode: tree::EntryKind::Link.into(), - filename: b"symlink".as_bstr(), - oid: &hex_to_id("1a010b1c0f081b2e8901d55307a15c29ff30af0e") - } - ] - ); - Ok(()) - } -} - -mod from_bytes { - use gix_object::{bstr::ByteSlice, tree, tree::EntryRef, TreeRef, TreeRefIter}; - - use crate::{fixture_name, hex_to_id}; - - #[test] - fn empty() -> crate::Result { - assert_eq!( - TreeRef::from_bytes(&[])?, - TreeRef { entries: vec![] }, - "empty trees are valid despite usually rare in the wild" - ); - Ok(()) - } - - #[test] - fn everything() -> crate::Result { - assert_eq!( - TreeRef::from_bytes(&fixture_name("tree", "everything.tree"))?, - TreeRef { - entries: vec![ - EntryRef { - mode: tree::EntryKind::BlobExecutable.into(), - filename: b"exe".as_bstr(), - oid: &hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") - }, - EntryRef { - mode: tree::EntryKind::Blob.into(), - filename: b"file".as_bstr(), - oid: &hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") - }, - EntryRef { - mode: tree::EntryKind::Commit.into(), - filename: b"grit-submodule".as_bstr(), - oid: &hex_to_id("b2d1b5d684bdfda5f922b466cc13d4ce2d635cf8") - }, - EntryRef { - mode: tree::EntryKind::Tree.into(), - filename: b"subdir".as_bstr(), - oid: &hex_to_id("4d5fcadc293a348e88f777dc0920f11e7d71441c") - }, - EntryRef { - mode: tree::EntryKind::Link.into(), - filename: b"symlink".as_bstr(), - oid: &hex_to_id("1a010b1c0f081b2e8901d55307a15c29ff30af0e") - } - ] - } - ); - Ok(()) - } - - #[test] - fn invalid() { - let fixture = fixture_name("tree", "definitely-special.tree"); - let partial_tree = &fixture[..fixture.len() / 2]; - let err = TreeRef::from_bytes(partial_tree).unwrap_err().to_string(); - if cfg!(feature = "verbose-object-parsing-errors") { - assert!(err.starts_with("object parsing failed at `100644"), "{err}"); - } else { - assert_eq!(err, "object parsing failed"); - } - assert_eq!( - TreeRefIter::from_bytes(partial_tree).take_while(Result::is_ok).count(), - 9, - "we can decode about half of it before failing" - ); - } - - #[test] - fn fuzzed() { - assert!(gix_object::TreeRef::from_bytes(b"2").is_err(), "fail, but don't crash"); - } - - #[test] - fn special_trees() -> crate::Result { - for (name, expected_entry_count) in [ - ("maybe-special", 160), - ("definitely-special", 19), - ("special-1", 5), - ("special-2", 18), - ("special-3", 5), - ("special-4", 18), - ("special-5", 17), - ] { - let fixture = fixture_name("tree", &format!("{name}.tree")); - assert_eq!( - TreeRef::from_bytes(&fixture)?.entries.len(), - expected_entry_count, - "{name}" - ); - assert_eq!( - TreeRefIter::from_bytes(&fixture).map(Result::unwrap).count(), - expected_entry_count, - "{name}" - ); - } - Ok(()) - } -} - -mod entries { - use gix_object::{Tree, TreeRef}; - - #[test] - fn sort_order_is_correct() -> crate::Result { - let root = gix_testtools::scripted_fixture_read_only("make_trees.sh")?; - let input = std::fs::read(root.join("tree.baseline"))?; - - let mut tree = TreeRef::from_bytes(&input)?; - let expected = tree.entries.clone(); - - tree.entries.sort(); - assert_eq!(tree.entries, expected); - let mut failures_when_searching_by_name = 0; - for entry in expected { - assert!( - tree.entries.binary_search_by(|e| e.cmp(&entry)).is_ok(), - "ordering works with binary search" - ); - failures_when_searching_by_name += usize::from( - tree.entries - .binary_search_by(|e| e.filename.cmp(entry.filename)) - .is_err(), - ); - assert_eq!( - tree.bisect_entry(entry.filename, entry.mode.is_tree()) - .expect("entry is present"), - entry - ); - } - - assert_ne!( - failures_when_searching_by_name, 0, - "it's not possible to do a binary search by name alone" - ); - - let mut tree: Tree = tree.into(); - let expected = tree.entries.clone(); - tree.entries.sort(); - - assert_eq!(tree.entries, expected); - Ok(()) - } -} - -mod entry_mode { - use gix_object::tree::{EntryKind, EntryMode}; - - #[test] - fn size_in_bytes() { - assert_eq!( - std::mem::size_of::(), - 2, - "it should not change without notice" - ); - } - - #[test] - fn is_methods() { - fn mode(kind: EntryKind) -> EntryMode { - kind.into() - } - - assert!(mode(EntryKind::Blob).is_blob()); - assert!(EntryMode(0o100645).is_blob()); - assert_eq!(EntryMode(0o100645).kind(), EntryKind::Blob); - assert!(!EntryMode(0o100675).is_executable()); - assert!(EntryMode(0o100700).is_executable()); - assert_eq!(EntryMode(0o100700).kind(), EntryKind::BlobExecutable); - assert!(!mode(EntryKind::Blob).is_link()); - assert!(mode(EntryKind::BlobExecutable).is_blob()); - assert!(mode(EntryKind::BlobExecutable).is_executable()); - assert!(mode(EntryKind::Blob).is_blob_or_symlink()); - assert!(mode(EntryKind::BlobExecutable).is_blob_or_symlink()); - - assert!(!mode(EntryKind::Link).is_blob()); - assert!(mode(EntryKind::Link).is_link()); - assert!(EntryMode(0o121234).is_link()); - assert_eq!(EntryMode(0o121234).kind(), EntryKind::Link); - assert!(mode(EntryKind::Link).is_blob_or_symlink()); - assert!(mode(EntryKind::Tree).is_tree()); - assert!(EntryMode(0o040101).is_tree()); - assert_eq!(EntryMode(0o040101).kind(), EntryKind::Tree); - assert!(mode(EntryKind::Commit).is_commit()); - assert!(EntryMode(0o167124).is_commit()); - assert_eq!(EntryMode(0o167124).kind(), EntryKind::Commit); - assert_eq!( - EntryMode(0o000000).kind(), - EntryKind::Commit, - "commit is really 'anything else' as `kind()` can't fail" - ); - } - - #[test] - fn as_bytes() { - let mut buf = Default::default(); - for (mode, expected) in [ - (EntryMode::from(EntryKind::Tree), EntryKind::Tree.as_octal_str()), - (EntryKind::Blob.into(), EntryKind::Blob.as_octal_str()), - ( - EntryKind::BlobExecutable.into(), - EntryKind::BlobExecutable.as_octal_str(), - ), - (EntryKind::Link.into(), EntryKind::Link.as_octal_str()), - (EntryKind::Commit.into(), EntryKind::Commit.as_octal_str()), - ( - EntryMode::try_from(b"100744 ".as_ref()).expect("valid"), - "100744".into(), - ), - ( - EntryMode::try_from(b"100644 ".as_ref()).expect("valid"), - "100644".into(), - ), - ] { - assert_eq!(mode.as_bytes(&mut buf), expected); - } - } -} +mod editor; +mod entries; +mod entry_mode; +mod from_bytes; +mod iter; diff --git a/gix-odb/Cargo.toml b/gix-odb/Cargo.toml index ce87448814c..bad6f16033b 100644 --- a/gix-odb/Cargo.toml +++ b/gix-odb/Cargo.toml @@ -21,6 +21,7 @@ serde = ["dep:serde", "gix-hash/serde", "gix-object/serde", "gix-pack/serde"] [dependencies] gix-features = { version = "^0.38.2", path = "../gix-features", features = ["rustsha1", "walkdir", "zlib", "crc32"] } +gix-hashtable = { version = "^0.5.2", path = "../gix-hashtable" } gix-hash = { version = "^0.14.2", path = "../gix-hash" } gix-date = { version = "^0.9.0", path = "../gix-date" } gix-path = { version = "^0.10.10", path = "../gix-path" } diff --git a/gix-odb/src/lib.rs b/gix-odb/src/lib.rs index 40d7c2aea90..aaedc74a175 100644 --- a/gix-odb/src/lib.rs +++ b/gix-odb/src/lib.rs @@ -66,6 +66,9 @@ pub fn sink(object_hash: gix_hash::Kind) -> Sink { } } +/// +pub mod memory; + mod sink; /// diff --git a/gix-odb/src/memory.rs b/gix-odb/src/memory.rs new file mode 100644 index 00000000000..5fdbd2abe3e --- /dev/null +++ b/gix-odb/src/memory.rs @@ -0,0 +1,258 @@ +use crate::find::Header; +use crate::Cache; +use gix_object::Data; +use std::cell::RefCell; +use std::ops::{Deref, DerefMut}; +use std::rc::Rc; +use std::sync::Arc; + +/// An object database to read from any implementation but write to memory. +/// Previously written objects can be returned from memory upon query, which makes the view of objects consistent. +/// In-Memory objects can be disabled by [taking out its storage](Proxy::take_object_memory). From there in-memory +/// object can also be persisted one by one. +/// +/// It's possible to turn off the memory by removing it from the instance. +pub struct Proxy { + /// The actual odb implementation + inner: T, + /// The kind of hash to produce when writing new objects. + object_hash: gix_hash::Kind, + /// The storage for in-memory objects. + /// If `None`, the proxy will always read from and write-through to `inner`. + memory: Option>, +} + +/// Lifecycle +impl Proxy { + /// Create a new instance using `odb` as actual object provider, with an empty in-memory store for + /// objects that are to be written. + /// Use `object_hash` to determine the kind of hash to produce when writing new objects. + pub fn new(odb: T, object_hash: gix_hash::Kind) -> Proxy { + Proxy { + inner: odb, + object_hash, + memory: Some(Default::default()), + } + } + + /// Turn ourselves into our inner object database, while deallocating objects stored in memory. + pub fn into_inner(self) -> T { + self.inner + } + + /// Strip object memory off this instance, which means that writes will go through to the inner object database + /// right away. + /// This mode makes the proxy fully transparent. + pub fn with_write_passthrough(mut self) -> Self { + self.memory.take(); + self + } +} + +impl Proxy>>> { + /// No op, as we are containing an arc handle already. + pub fn into_arc(self) -> std::io::Result>>>> { + Ok(self) + } +} + +impl Proxy>>> { + /// Create an entirely new instance, but with the in-memory objects moving between them. + pub fn into_arc(self) -> std::io::Result>>>> { + Ok(Proxy { + inner: self.inner.into_arc()?, + object_hash: self.object_hash, + memory: self.memory, + }) + } +} + +impl From for Proxy { + fn from(odb: crate::Handle) -> Self { + let object_hash = odb.store.object_hash; + Proxy::new(odb, object_hash) + } +} + +/// Memory Access +impl Proxy { + /// Take all the objects in memory so far, with the memory storage itself and return it. + /// + /// The instance will remain in a state where it won't be able to store objects in memory at all, + /// they will now be stored in the underlying object database. + /// This mode makes the proxy fully transparent. + /// + /// To avoid that, use [`reset_object_memory()`](Self::reset_object_memory()) or return the storage + /// using [`set_object_memory()`](Self::set_object_memory()). + pub fn take_object_memory(&mut self) -> Option { + self.memory.take().map(RefCell::into_inner) + } + + /// Set the object storage to contain only `new` objects, and return whichever objects were there previously. + pub fn set_object_memory(&mut self, new: Storage) -> Option { + let previous = self.take_object_memory(); + self.memory = Some(RefCell::new(new)); + previous + } + + /// If objects aren't written to memory yet, this will happen after the call. + /// + /// Otherwise, no change will be performed. + pub fn enable_object_memory(&mut self) -> &mut Self { + if self.memory.is_none() { + self.memory = Some(Default::default()); + } + self + } + + /// Reset the internal storage to be empty, and return the previous storage, with all objects + /// it contained. + /// + /// Note that this does nothing if this instance didn't contain object memory in the first place. + /// In that case, set it explicitly. + pub fn reset_object_memory(&self) -> Option { + self.memory.as_ref().map(|m| std::mem::take(&mut *m.borrow_mut())) + } + + /// Return the amount of objects currently stored in memory. + pub fn num_objects_in_memory(&self) -> usize { + self.memory.as_ref().map_or(0, |m| m.borrow().len()) + } +} + +impl Clone for Proxy +where + T: Clone, +{ + fn clone(&self) -> Self { + Proxy { + inner: self.inner.clone(), + object_hash: self.object_hash, + memory: self.memory.clone(), + } + } +} + +impl gix_object::Find for Proxy +where + T: gix_object::Find, +{ + fn try_find<'a>( + &self, + id: &gix_hash::oid, + buffer: &'a mut Vec, + ) -> Result>, gix_object::find::Error> { + if let Some(map) = self.memory.as_ref() { + let map = map.borrow(); + if let Some((kind, data)) = map.get(id) { + buffer.clear(); + buffer.extend_from_slice(data); + return Ok(Some(Data { + kind: *kind, + data: &*buffer, + })); + } + } + self.inner.try_find(id, buffer) + } +} + +impl gix_object::Exists for Proxy +where + T: gix_object::Exists, +{ + fn exists(&self, id: &gix_hash::oid) -> bool { + self.memory.as_ref().map_or(false, |map| map.borrow().contains_key(id)) || self.inner.exists(id) + } +} + +impl crate::Header for Proxy +where + T: crate::Header, +{ + fn try_header(&self, id: &gix_hash::oid) -> Result, gix_object::find::Error> { + if let Some(map) = self.memory.as_ref() { + let map = map.borrow(); + if let Some((kind, data)) = map.get(id) { + return Ok(Some(Header::Loose { + kind: *kind, + size: data.len() as u64, + })); + } + } + self.inner.try_header(id) + } +} + +impl gix_object::FindHeader for Proxy +where + T: gix_object::FindHeader, +{ + fn try_header(&self, id: &gix_hash::oid) -> Result, gix_object::find::Error> { + if let Some(map) = self.memory.as_ref() { + let map = map.borrow(); + if let Some((kind, data)) = map.get(id) { + return Ok(Some(gix_object::Header { + kind: *kind, + size: data.len() as u64, + })); + } + } + self.inner.try_header(id) + } +} + +impl crate::Write for Proxy +where + T: crate::Write, +{ + fn write_stream( + &self, + kind: gix_object::Kind, + size: u64, + from: &mut dyn std::io::Read, + ) -> Result { + let Some(map) = self.memory.as_ref() else { + return self.inner.write_stream(kind, size, from); + }; + + let mut buf = Vec::new(); + from.read_to_end(&mut buf)?; + + let id = gix_object::compute_hash(self.object_hash, kind, &buf); + map.borrow_mut().insert(id, (kind, buf)); + Ok(id) + } +} + +impl Deref for Proxy { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl DerefMut for Proxy { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +/// A mapping between an object id and all data corresponding to an object, acting like a `HashMap`. +#[derive(Default, Debug, Clone, Eq, PartialEq)] +pub struct Storage(gix_hashtable::HashMap)>); + +impl Deref for Storage { + type Target = gix_hashtable::HashMap)>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for Storage { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} diff --git a/gix-odb/tests/odb/memory.rs b/gix-odb/tests/odb/memory.rs new file mode 100644 index 00000000000..d0d8ddb2bfd --- /dev/null +++ b/gix-odb/tests/odb/memory.rs @@ -0,0 +1,115 @@ +use crate::odb::hex_to_id; +use gix_object::{tree, Exists, FindExt}; +use gix_odb::Write; +use gix_testtools::tempfile::TempDir; + +#[test] +fn without_memory() -> crate::Result { + use gix_odb::HeaderExt; + let (mut odb, _tmp) = db_rw()?; + let mut buf = Vec::new(); + let mem = odb.take_object_memory().expect("it starts out with memory set"); + assert_eq!(mem.len(), 0, "no object is stored initially"); + let existing = hex_to_id("21d3ba9a26b790a4858d67754ae05d04dfce4d0c"); + let tree = odb.find_tree(&existing, &mut buf).expect("present and valid"); + assert_eq!(tree.entries.len(), 1); + odb.header(existing).expect("header can be found just the same"); + assert!(odb.exists(&existing)); + + let mut tree = tree.to_owned(); + tree.entries.push(tree::Entry { + mode: tree::EntryKind::Blob.into(), + filename: "z-for-sorting_another-file-with-same-content".into(), + oid: existing, + }); + let new_tree_id = odb.write(&tree)?; + assert_eq!(new_tree_id, hex_to_id("249b0b4106a5e9e7875e446a26468e22ec47a05c")); + let actual = odb.header(new_tree_id).expect("header of new objects can be found"); + assert_eq!(actual.kind(), gix_object::Kind::Tree); + assert_eq!(actual.size(), 104); + + let new_tree = odb + .find_tree(&new_tree_id, &mut buf) + .expect("new tree is also available as object") + .to_owned(); + assert_eq!(new_tree, tree); + assert!(!odb.exists(&gix_hash::Kind::Sha1.null())); + + Ok(()) +} + +#[test] +fn with_memory() -> crate::Result { + use gix_object::FindHeader; + let mut odb = db()?; + assert_eq!( + (*odb).iter()?.count(), + 6, + "let's be sure we didn't accidentally write anything" + ); + let mut buf = Vec::new(); + let existing = hex_to_id("21d3ba9a26b790a4858d67754ae05d04dfce4d0c"); + let tree = odb.find_tree(&existing, &mut buf).expect("present and valid"); + assert!(odb.exists(&existing)); + assert_eq!(tree.entries.len(), 1); + odb.try_header(&existing)?.expect("header can be found just the same"); + assert_eq!( + odb.num_objects_in_memory(), + 0, + "nothing is stored when fetching objects - it's not an object cache" + ); + + let mut tree = tree.to_owned(); + tree.entries.push(tree::Entry { + mode: tree::EntryKind::Blob.into(), + filename: "z-for-sorting_another-file-with-same-content".into(), + oid: existing, + }); + let new_tree_id = odb.write(&tree)?; + assert_eq!(new_tree_id, hex_to_id("249b0b4106a5e9e7875e446a26468e22ec47a05c")); + let actual = odb + .try_header(&new_tree_id)? + .expect("header of new objects can be found"); + assert_eq!(actual.kind, gix_object::Kind::Tree); + assert_eq!(actual.size, 104); + + let new_tree = odb + .find_tree(&new_tree_id, &mut buf) + .expect("new tree is also available as object") + .to_owned(); + assert_eq!(new_tree, tree); + + let mem = odb.reset_object_memory().expect("memory is still available"); + assert_eq!(mem.len(), 1, "one new object was just written"); + + assert_eq!( + odb.try_header(&new_tree_id)?, + None, + "without memory, the object can't be found anymore" + ); + + let prev_mem = odb.set_object_memory(mem).expect("reset means it's just cleared"); + assert_eq!(prev_mem.len(), 0, "nothing was stored after the reset"); + + assert_eq!(odb.num_objects_in_memory(), 1, "we put all previous objects back"); + + let odb2 = odb.clone(); + assert_eq!(odb2.num_objects_in_memory(), 1, "it also clones the object memory"); + + assert!(!odb.exists(&gix_hash::Kind::Sha1.null())); + + Ok(()) +} + +fn db() -> crate::Result> { + let odb = gix_odb::at( + gix_testtools::scripted_fixture_read_only_standalone("repo_with_loose_objects.sh")?.join(".git/objects"), + )?; + Ok(gix_odb::memory::Proxy::new(odb, gix_hash::Kind::Sha1)) +} + +fn db_rw() -> crate::Result<(gix_odb::memory::Proxy, TempDir)> { + let tmp = gix_testtools::scripted_fixture_writable_standalone("repo_with_loose_objects.sh")?; + let odb = gix_odb::at(tmp.path().join(".git/objects"))?; + Ok((gix_odb::memory::Proxy::new(odb, gix_hash::Kind::Sha1), tmp)) +} diff --git a/gix-odb/tests/odb/mod.rs b/gix-odb/tests/odb/mod.rs index 6e4b1506bae..353bb78ee66 100644 --- a/gix-odb/tests/odb/mod.rs +++ b/gix-odb/tests/odb/mod.rs @@ -18,6 +18,7 @@ fn db_small_packs() -> gix_odb::Handle { pub mod alternate; pub mod find; pub mod header; +pub mod memory; pub mod regression; pub mod sink; pub mod store; diff --git a/gix-pack/src/data/input/bytes_to_entries.rs b/gix-pack/src/data/input/bytes_to_entries.rs index f42cf016444..ed5902d60b9 100644 --- a/gix-pack/src/data/input/bytes_to_entries.rs +++ b/gix-pack/src/data/input/bytes_to_entries.rs @@ -1,6 +1,6 @@ use std::{fs, io}; -use gix_features::{hash::Sha1, zlib::Decompress}; +use gix_features::{hash::Hasher, zlib::Decompress}; use gix_hash::ObjectId; use crate::data::input; @@ -15,7 +15,7 @@ pub struct BytesToEntriesIter
{ had_error: bool, version: crate::data::Version, objects_left: u32, - hash: Option, + hash: Option, mode: input::Mode, compressed: input::EntryDataMode, compressed_buf: Option>, @@ -303,7 +303,7 @@ where /// A utility to automatically generate a hash while writing into an inner writer. pub struct HashWrite<'a, T> { /// The hash implementation. - pub hash: &'a mut Sha1, + pub hash: &'a mut Hasher, /// The inner writer. pub inner: T, } diff --git a/gix-packetline-blocking/src/encode/async_io.rs b/gix-packetline-blocking/src/encode/async_io.rs index fbf734b685c..27566ada140 100644 --- a/gix-packetline-blocking/src/encode/async_io.rs +++ b/gix-packetline-blocking/src/encode/async_io.rs @@ -178,7 +178,7 @@ async fn prefixed_data_to_write(prefix: &[u8], data: &[u8], out: impl AsyncWrite /// Write a `text` message to `out`, which is assured to end in a newline. pub async fn text_to_write(text: &[u8], out: impl AsyncWrite + Unpin) -> io::Result { - prefixed_and_suffixed_data_to_write(&[], text, &[b'\n'], out).await + prefixed_and_suffixed_data_to_write(&[], text, b"\n", out).await } /// Write a `data` message to `out`. diff --git a/gix-packetline-blocking/src/write/async_io.rs b/gix-packetline-blocking/src/write/async_io.rs index 6684ab19fd8..a77dbe274a7 100644 --- a/gix-packetline-blocking/src/write/async_io.rs +++ b/gix-packetline-blocking/src/write/async_io.rs @@ -54,7 +54,7 @@ impl Writer { /// If called, each call to [`write()`][io::Write::write()] will write the input as text, appending a trailing newline /// if needed before writing. pub fn enable_text_mode(&mut self) { - self.inner.suffix = &[b'\n']; + self.inner.suffix = b"\n"; } } diff --git a/gix-packetline/src/encode/async_io.rs b/gix-packetline/src/encode/async_io.rs index 57b379225a8..983644c1d9f 100644 --- a/gix-packetline/src/encode/async_io.rs +++ b/gix-packetline/src/encode/async_io.rs @@ -176,7 +176,7 @@ async fn prefixed_data_to_write(prefix: &[u8], data: &[u8], out: impl AsyncWrite /// Write a `text` message to `out`, which is assured to end in a newline. pub async fn text_to_write(text: &[u8], out: impl AsyncWrite + Unpin) -> io::Result { - prefixed_and_suffixed_data_to_write(&[], text, &[b'\n'], out).await + prefixed_and_suffixed_data_to_write(&[], text, b"\n", out).await } /// Write a `data` message to `out`. diff --git a/gix-packetline/src/write/async_io.rs b/gix-packetline/src/write/async_io.rs index 35b93012675..707d2b7b576 100644 --- a/gix-packetline/src/write/async_io.rs +++ b/gix-packetline/src/write/async_io.rs @@ -52,7 +52,7 @@ impl Writer { /// If called, each call to [`write()`][io::Write::write()] will write the input as text, appending a trailing newline /// if needed before writing. pub fn enable_text_mode(&mut self) { - self.inner.suffix = &[b'\n']; + self.inner.suffix = b"\n"; } } diff --git a/gix/Cargo.toml b/gix/Cargo.toml index bffb10dc27a..db6616465c2 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -64,9 +64,12 @@ extras = [ "credentials", "interrupt", "status", - "dirwalk", + "dirwalk" ] +## A collection of features that need a larger MSRV, and thus are disabled by default. +need-more-recent-msrv = ["tree-editor"] + ## Various progress-related features that improve the look of progress message units. comfort = [ "gix-features/progress-unit-bytes", @@ -103,6 +106,12 @@ worktree-mutation = ["attributes", "dep:gix-worktree-state"] ## Retrieve a worktree stack for querying exclude information excludes = ["dep:gix-ignore", "dep:gix-worktree", "index"] +## Provide facilities to edit trees conveniently. +## +## Not that currently, this requires [Rust 1.75](https://caniuse.rs/features/return_position_impl_trait_in_trait). +## This feature toggle is likely going away then. +tree-editor = [] + ## Query attributes and excludes. Enables access to pathspecs, worktree checkouts, filter-pipelines and submodules. attributes = [ "excludes", @@ -384,6 +393,8 @@ parking_lot = { version = "0.12.1", optional = true } document-features = { version = "0.2.0", optional = true } [dev-dependencies] +# For additional features that aren't enabled by default due to MSRV +gix = { path = ".", default-features = false, features = ["tree-editor"] } pretty_assertions = "1.4.0" gix-testtools = { path = "../tests/tools" } is_ci = "1.1.1" @@ -391,6 +402,7 @@ anyhow = "1" walkdir = "2.3.2" serial_test = { version = "3.1.0", default-features = false } async-std = { version = "1.12.0", features = ["attributes"] } +termtree = "0.5.1" [package.metadata.docs.rs] features = [ @@ -398,5 +410,6 @@ features = [ "max-performance", "blocking-network-client", "blocking-http-transport-curl", + "need-more-recent-msrv", "serde", ] diff --git a/gix/src/config/cache/access.rs b/gix/src/config/cache/access.rs index 2ebf2c5c06b..2575901df0e 100644 --- a/gix/src/config/cache/access.rs +++ b/gix/src/config/cache/access.rs @@ -251,7 +251,7 @@ impl Cache { }) } - #[cfg(feature = "index")] + #[cfg(any(feature = "index", feature = "tree-editor"))] pub(crate) fn protect_options(&self) -> Result { const IS_WINDOWS: bool = cfg!(windows); const IS_MACOS: bool = cfg!(target_os = "macos"); diff --git a/gix/src/lib.rs b/gix/src/lib.rs index 56fd5e7e21d..6176e2461b5 100644 --- a/gix/src/lib.rs +++ b/gix/src/lib.rs @@ -158,7 +158,10 @@ pub mod path; /// The standard type for a store to handle git references. pub type RefStore = gix_ref::file::Store; /// A handle for finding objects in an object database, abstracting away caches for thread-local use. -pub type OdbHandle = gix_odb::Handle; +pub type OdbHandle = gix_odb::memory::Proxy; +/// A handle for finding objects in an object database, abstracting away caches for moving across threads. +pub type OdbHandleArc = gix_odb::memory::Proxy; + /// A way to access git configuration pub(crate) type Config = OwnShared>; diff --git a/gix/src/object/tree/editor.rs b/gix/src/object/tree/editor.rs new file mode 100644 index 00000000000..fdcdcf87d40 --- /dev/null +++ b/gix/src/object/tree/editor.rs @@ -0,0 +1,271 @@ +use crate::bstr::{BStr, BString}; +use crate::prelude::ObjectIdExt; +use crate::{Id, Repository}; +use gix_hash::ObjectId; +use gix_object::tree::EntryKind; + +/// +pub mod init { + /// The error returned by [`Editor::new()](crate::object::tree::Editor::new()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + DecodeTree(#[from] gix_object::decode::Error), + #[error(transparent)] + ValidationOptions(#[from] crate::config::boolean::Error), + } +} + +/// +pub mod write { + use crate::bstr::BString; + + /// The error returned by [`Editor::write()](crate::object::tree::Editor::write()) and [`Cursor::write()](super::Cursor::write). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + WriteTree(#[from] crate::object::write::Error), + #[error("The object {} ({}) at '{}' could not be found", id, kind.as_octal_str(), filename)] + MissingObject { + filename: BString, + kind: gix_object::tree::EntryKind, + id: gix_hash::ObjectId, + }, + #[error("The object {} ({}) has an invalid filename: '{}'", id, kind.as_octal_str(), filename)] + InvalidFilename { + filename: BString, + kind: gix_object::tree::EntryKind, + id: gix_hash::ObjectId, + source: gix_validate::path::component::Error, + }, + } +} + +/// A cursor at a specific portion of a tree to [edit](super::Editor). +pub struct Cursor<'a, 'repo> { + inner: gix_object::tree::editor::Cursor<'a, 'repo>, + validate: gix_validate::path::component::Options, + repo: &'repo Repository, +} + +/// Lifecycle +impl<'repo> super::Editor<'repo> { + /// Initialize a new editor from the given `tree`. + pub fn new(tree: &crate::Tree<'repo>) -> Result { + let tree_ref = tree.decode()?; + let repo = tree.repo; + let validate = repo.config.protect_options()?; + Ok(super::Editor { + inner: gix_object::tree::Editor::new(tree_ref.into(), &repo.objects, repo.object_hash()), + validate, + repo, + }) + } +} + +/// Tree editing +#[cfg(feature = "tree-editor")] +impl<'repo> crate::Tree<'repo> { + /// Start editing a new tree based on this one. + #[doc(alias = "treebuilder", alias = "git2")] + pub fn edit(&self) -> Result, init::Error> { + super::Editor::new(self) + } +} + +/// Obtain an iterator over `BStr`-components. +/// +/// Note that the implementation is simple, and it's mainly meant for statically known strings +/// or locations obtained during a merge. +pub trait ToComponents { + /// Return an iterator over the components of a path, without the separator. + fn to_components(&self) -> impl Iterator; +} + +impl ToComponents for &str { + fn to_components(&self) -> impl Iterator { + self.split('/').map(Into::into) + } +} + +impl ToComponents for String { + fn to_components(&self) -> impl Iterator { + self.split('/').map(Into::into) + } +} + +impl ToComponents for &String { + fn to_components(&self) -> impl Iterator { + self.split('/').map(Into::into) + } +} + +impl ToComponents for BString { + fn to_components(&self) -> impl Iterator { + self.split(|b| *b == b'/').map(Into::into) + } +} + +impl ToComponents for &BString { + fn to_components(&self) -> impl Iterator { + self.split(|b| *b == b'/').map(Into::into) + } +} + +impl ToComponents for &BStr { + fn to_components(&self) -> impl Iterator { + self.split(|b| *b == b'/').map(Into::into) + } +} + +/// Cursor Handling +impl<'repo> super::Editor<'repo> { + /// Turn ourselves as a cursor, which points to the same tree as the editor. + /// + /// This is useful if a method takes a [`Cursor`], not an [`Editor`](super::Editor). + pub fn to_cursor(&mut self) -> Cursor<'_, 'repo> { + Cursor { + inner: self.inner.to_cursor(), + validate: self.validate, + repo: self.repo, + } + } + + /// Create a cursor at the given `rela_path`, which must be a tree or is turned into a tree as its own edit. + /// + /// The returned cursor will then allow applying edits to the tree at `rela_path` as root. + /// If `rela_path` is a single empty string, it is equivalent to using the current instance itself. + pub fn cursor_at( + &mut self, + rela_path: impl ToComponents, + ) -> Result, gix_object::tree::editor::Error> { + Ok(Cursor { + inner: self.inner.cursor_at(rela_path.to_components())?, + validate: self.validate, + repo: self.repo, + }) + } +} +/// Operations +impl<'repo> Cursor<'_, 'repo> { + /// Like [`Editor::upsert()`](super::Editor::upsert()), but with the constraint of only editing in this cursor's tree. + pub fn upsert( + &mut self, + rela_path: impl ToComponents, + kind: EntryKind, + id: impl Into, + ) -> Result<&mut Self, gix_object::tree::editor::Error> { + self.inner.upsert(rela_path.to_components(), kind, id.into())?; + Ok(self) + } + + /// Like [`Editor::remove()`](super::Editor::remove), but with the constraint of only editing in this cursor's tree. + pub fn remove(&mut self, rela_path: impl ToComponents) -> Result<&mut Self, gix_object::tree::editor::Error> { + self.inner.remove(rela_path.to_components())?; + Ok(self) + } + + /// Like [`Editor::write()`](super::Editor::write()), but will write only the subtree of the cursor. + pub fn write(&mut self) -> Result, write::Error> { + write_cursor(self) + } +} + +/// Operations +impl<'repo> super::Editor<'repo> { + /// Set the root tree of the modification to `root`, assuring it has a well-known state. + /// + /// Note that this erases all previous edits. + /// + /// This is useful if the same editor is re-used for various trees. + pub fn set_root(&mut self, root: &crate::Tree<'repo>) -> Result<&mut Self, init::Error> { + let new_editor = super::Editor::new(root)?; + self.inner = new_editor.inner; + self.repo = new_editor.repo; + Ok(self) + } + /// Insert a new entry of `kind` with `id` at `rela_path`, an iterator over each path component in the tree, + /// like `a/b/c`. Names are matched case-sensitively. + /// + /// Existing leaf-entries will be overwritten unconditionally, and it is assumed that `id` is available in the object database + /// or will be made available at a later point to assure the integrity of the produced tree. + /// + /// Intermediate trees will be created if they don't exist in the object database, otherwise they will be loaded and entries + /// will be inserted into them instead. + /// + /// Note that `id` can be [null](ObjectId::null()) to create a placeholder. These will not be written, and paths leading + /// through them will not be considered a problem. + /// + /// `id` can also be an empty tree, along with [the respective `kind`](EntryKind::Tree), even though that's normally not allowed + /// in Git trees. + /// + /// Validation of path-components will not be performed here, but when [writing the tree](Self::write()). + pub fn upsert( + &mut self, + rela_path: impl ToComponents, + kind: EntryKind, + id: impl Into, + ) -> Result<&mut Self, gix_object::tree::editor::Error> { + self.inner.upsert(rela_path.to_components(), kind, id.into())?; + Ok(self) + } + + /// Remove the entry at `rela_path`, loading all trees on the path accordingly. + /// It's no error if the entry doesn't exist, or if `rela_path` doesn't lead to an existing entry at all. + pub fn remove(&mut self, rela_path: impl ToComponents) -> Result<&mut Self, gix_object::tree::editor::Error> { + self.inner.remove(rela_path.to_components())?; + Ok(self) + } + + /// Write the entire in-memory state of all changed trees (and only changed trees) to the object database. + /// Note that the returned object id *can* be the empty tree if everything was removed or if nothing + /// was added to the tree. + /// + /// The last call to `out` will be the changed root tree, whose object-id will also be returned. + /// `out` is free to do any kind of additional validation, like to assure that all entries in the tree exist. + /// We don't assure that as there is no validation that inserted entries are valid object ids. + /// + /// Future calls to [`upsert`](Self::upsert) or similar will keep working on the last seen state of the + /// just-written root-tree. + /// If this is not desired, use [set_root()](Self::set_root()). + /// + /// Before writing a tree, all of its entries (not only added ones), will be validated to assure they are + /// correct. The objects pointed to by entries also have to exist already. + pub fn write(&mut self) -> Result, write::Error> { + write_cursor(&mut self.to_cursor()) + } +} + +fn write_cursor<'repo>(cursor: &mut Cursor<'_, 'repo>) -> Result, write::Error> { + cursor + .inner + .write(|tree| -> Result { + for entry in &tree.entries { + gix_validate::path::component( + entry.filename.as_ref(), + entry + .mode + .is_link() + .then_some(gix_validate::path::component::Mode::Symlink), + cursor.validate, + ) + .map_err(|err| write::Error::InvalidFilename { + filename: entry.filename.clone(), + kind: entry.mode.into(), + id: entry.oid, + source: err, + })?; + if !cursor.repo.has_object(entry.oid) { + return Err(write::Error::MissingObject { + filename: entry.filename.clone(), + kind: entry.mode.into(), + id: entry.oid, + }); + } + } + Ok(cursor.repo.write_object(tree)?.detach()) + }) + .map(|id| id.attach(cursor.repo)) +} diff --git a/gix/src/object/tree/mod.rs b/gix/src/object/tree/mod.rs index 72f2a9e973c..65906c42fda 100644 --- a/gix/src/object/tree/mod.rs +++ b/gix/src/object/tree/mod.rs @@ -4,6 +4,14 @@ use gix_object::{bstr::BStr, FindExt, TreeRefIter}; use crate::{object::find, Id, ObjectDetached, Tree}; +/// All state needed to conveniently edit a tree, using only [update-or-insert](Editor::upsert()) and [removals](Editor::remove()). +#[cfg(feature = "tree-editor")] +pub struct Editor<'repo> { + inner: gix_object::tree::Editor<'repo>, + validate: gix_validate::path::component::Options, + repo: &'repo crate::Repository, +} + /// Initialization impl<'repo> Tree<'repo> { /// Obtain a tree instance by handing in all components that it is made up of. @@ -41,7 +49,6 @@ impl<'repo> Tree<'repo> { /// Follow a sequence of `path` components starting from this instance, and look them up one by one until the last component /// is looked up and its tree entry is returned. - /// Use `buf` as temporary location for sub-trees to avoid allocating a temporary buffer for each lookup. /// /// # Performance Notes /// @@ -49,16 +56,18 @@ impl<'repo> Tree<'repo> { /// to reuse a vector and use a binary search instead, which might be able to improve performance over all. /// However, a benchmark should be created first to have some data and see which trade-off to choose here. /// - pub fn lookup_entry(&self, path: I, buf: &mut Vec) -> Result>, find::existing::Error> + pub fn lookup_entry(&self, path: I) -> Result>, find::existing::Error> where I: IntoIterator, P: PartialEq, { - let mut path = path.into_iter().peekable(); + let mut buf = self.repo.shared_empty_buf(); buf.clear(); + + let mut path = path.into_iter().peekable(); buf.extend_from_slice(&self.data); while let Some(component) = path.next() { - match TreeRefIter::from_bytes(buf) + match TreeRefIter::from_bytes(&buf) .filter_map(Result::ok) .find(|entry| component.eq(entry.filename)) { @@ -70,7 +79,7 @@ impl<'repo> Tree<'repo> { })); } else { let next_id = entry.oid.to_owned(); - let obj = self.repo.objects.find(&next_id, buf)?; + let obj = self.repo.objects.find(&next_id, &mut buf)?; if !obj.kind.is_tree() { return Ok(None); } @@ -134,17 +143,13 @@ impl<'repo> Tree<'repo> { pub fn lookup_entry_by_path( &self, relative_path: impl AsRef, - buf: &mut Vec, ) -> Result>, find::existing::Error> { use crate::bstr::ByteSlice; - self.lookup_entry( - relative_path.as_ref().components().map(|c: std::path::Component<'_>| { - gix_path::os_str_into_bstr(c.as_os_str()) - .unwrap_or_else(|_| "".into()) - .as_bytes() - }), - buf, - ) + self.lookup_entry(relative_path.as_ref().components().map(|c: std::path::Component<'_>| { + gix_path::os_str_into_bstr(c.as_os_str()) + .unwrap_or_else(|_| "".into()) + .as_bytes() + })) } /// Like [`Self::peel_to_entry()`], but takes a `Path` directly via `relative_path`, a path relative to this tree. @@ -166,6 +171,10 @@ impl<'repo> Tree<'repo> { } } +/// +#[cfg(feature = "tree-editor")] +pub mod editor; + /// #[cfg(feature = "blob-diff")] pub mod diff; diff --git a/gix/src/repository/cache.rs b/gix/src/repository/cache.rs index cd765203e90..c499d6e5d41 100644 --- a/gix/src/repository/cache.rs +++ b/gix/src/repository/cache.rs @@ -39,3 +39,14 @@ impl crate::Repository { (ten_mb_for_every_10k_files as usize).max(4 * 1024) } } + +/// Handling of InMemory object writing +impl crate::Repository { + /// When writing objects, keep them in memory instead of writing them to disk. + /// This makes any change to the object database non-persisting, while keeping the view + /// to the object database consistent for this instance. + pub fn with_object_memory(mut self) -> Self { + self.objects.enable_object_memory(); + self + } +} diff --git a/gix/src/repository/impls.rs b/gix/src/repository/impls.rs index 36fd788dc6c..4b7fbf57d5b 100644 --- a/gix/src/repository/impls.rs +++ b/gix/src/repository/impls.rs @@ -38,7 +38,7 @@ impl From<&crate::ThreadSafeRepository> for crate::Repository { fn from(repo: &crate::ThreadSafeRepository) -> Self { crate::Repository::from_refs_and_objects( repo.refs.clone(), - repo.objects.to_handle().into(), + gix_odb::memory::Proxy::from(gix_odb::Cache::from(repo.objects.to_handle())).with_write_passthrough(), repo.work_tree.clone(), repo.common_dir.clone(), repo.config.clone(), @@ -56,7 +56,7 @@ impl From for crate::Repository { fn from(repo: crate::ThreadSafeRepository) -> Self { crate::Repository::from_refs_and_objects( repo.refs, - repo.objects.to_handle().into(), + gix_odb::memory::Proxy::from(gix_odb::Cache::from(repo.objects.to_handle())).with_write_passthrough(), repo.work_tree, repo.common_dir, repo.config, diff --git a/gix/src/repository/mod.rs b/gix/src/repository/mod.rs index 77467f77f9a..41b2aaeca52 100644 --- a/gix/src/repository/mod.rs +++ b/gix/src/repository/mod.rs @@ -72,6 +72,20 @@ mod submodule; mod thread_safe; mod worktree; +/// +#[cfg(feature = "tree-editor")] +pub mod edit_tree { + /// The error returned by [Repository::edit_tree()](crate::Repository::edit_tree). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + FindTree(#[from] crate::object::find::existing::with_conversion::Error), + #[error(transparent)] + InitEditor(#[from] crate::object::tree::editor::init::Error), + } +} + /// #[cfg(feature = "revision")] pub mod merge_base { diff --git a/gix/src/repository/object.rs b/gix/src/repository/object.rs index db2344a83c3..7fe3ded853f 100644 --- a/gix/src/repository/object.rs +++ b/gix/src/repository/object.rs @@ -12,7 +12,23 @@ use smallvec::SmallVec; use crate::{commit, ext::ObjectIdExt, object, tag, Blob, Commit, Id, Object, Reference, Tag, Tree}; -/// Methods related to object creation. +/// Tree editing +#[cfg(feature = "tree-editor")] +impl crate::Repository { + /// Return an editor for adjusting the tree at `id`. + /// + /// This can be the [empty tree id](ObjectId::empty_tree) to build a tree from scratch. + #[doc(alias = "treebuilder", alias = "git2")] + pub fn edit_tree( + &self, + id: impl Into, + ) -> Result, crate::repository::edit_tree::Error> { + let tree = self.find_tree(id)?; + Ok(tree.edit()?) + } +} + +/// Find objects of various kins impl crate::Repository { /// Find the object with `id` in the object database or return an error if it could not be found. /// @@ -93,7 +109,7 @@ impl crate::Repository { #[doc(alias = "exists", alias = "git2")] pub fn has_object(&self, id: impl AsRef) -> bool { let id = id.as_ref(); - if id == ObjectId::empty_tree(self.object_hash()) { + if id.to_owned().is_empty_tree() { true } else { self.objects.exists(id) @@ -138,8 +154,11 @@ impl crate::Repository { None => Ok(None), } } +} - fn shared_empty_buf(&self) -> std::cell::RefMut<'_, Vec> { +/// Write objects of any type. +impl crate::Repository { + pub(crate) fn shared_empty_buf(&self) -> std::cell::RefMut<'_, Vec> { let mut bufs = self.bufs.borrow_mut(); if bufs.last().is_none() { bufs.push(Vec::with_capacity(512)); @@ -217,7 +236,10 @@ impl crate::Repository { .map_err(Into::into) .map(|oid| oid.attach(self)) } +} +/// Create commits and tags +impl crate::Repository { /// Create a tag reference named `name` (without `refs/tags/` prefix) pointing to a newly created tag object /// which in turn points to `target` and return the newly created reference. /// diff --git a/gix/src/types.rs b/gix/src/types.rs index fc726309b17..fa03f601e95 100644 --- a/gix/src/types.rs +++ b/gix/src/types.rs @@ -67,7 +67,7 @@ impl<'a> Drop for Blob<'a> { /// A decoded tree object with access to its owning repository. #[derive(Clone)] pub struct Tree<'repo> { - /// The id of the tree + /// Thek[ id of the tree pub id: ObjectId, /// The fully decoded tree data pub data: Vec, @@ -247,7 +247,7 @@ pub struct PathspecDetached { /// The prepared search to use for checking matches. pub search: gix_pathspec::Search, /// A thread-safe version of an ODB. - pub odb: gix_odb::HandleArc, + pub odb: crate::OdbHandleArc, } /// A stand-in for the submodule of a particular name. diff --git a/gix/tests/object/tree/mod.rs b/gix/tests/object/tree/mod.rs index a331adb2fa4..4c0dc350db5 100644 --- a/gix/tests/object/tree/mod.rs +++ b/gix/tests/object/tree/mod.rs @@ -17,11 +17,6 @@ fn find_entry() -> crate::Result { fn lookup_entry_by_path() -> crate::Result { let repo = named_subrepo_opts("make_worktree_repo.sh", "repo", gix::open::Options::isolated())?; let tree = repo.head_commit()?.tree()?; - assert_eq!( - tree.lookup_entry_by_path("dir/c", &mut Vec::new())? - .expect("present") - .filename(), - "c" - ); + assert_eq!(tree.lookup_entry_by_path("dir/c")?.expect("present").filename(), "c"); Ok(()) } diff --git a/gix/tests/repository/object.rs b/gix/tests/repository/object.rs index 2aa2607cc69..27993e23b4b 100644 --- a/gix/tests/repository/object.rs +++ b/gix/tests/repository/object.rs @@ -1,5 +1,210 @@ use gix_testtools::tempfile; +#[cfg(feature = "tree-editor")] +mod edit_tree { + use crate::util::hex_to_id; + use gix::bstr::{BStr, BString}; + use gix_object::tree::EntryKind; + + #[test] + // Some part of the test validation the implementation for this exists, but it's needless nonetheless. + #[allow(clippy::needless_borrows_for_generic_args)] + fn from_head_tree() -> crate::Result { + let (repo, _tmp) = crate::repo_rw("make_packed_and_loose.sh")?; + let head_tree_id = repo.head_tree_id()?; + assert_eq!( + display_tree(head_tree_id, &repo), + "24374df94315568adfaee119d038f710d1f45397 +├── that ce013625030ba8dba906f756967f9e9ca394464a.100644 +└── this 317e9677c3bcffd006f9fc84bbb0a54ef1676197.100644 +" + ); + let this_id = hex_to_id("317e9677c3bcffd006f9fc84bbb0a54ef1676197"); + let that_id = hex_to_id("ce013625030ba8dba906f756967f9e9ca394464a"); + let mut editor = repo.edit_tree(head_tree_id)?; + let actual = editor + .upsert("a/b", EntryKind::Blob, this_id)? + .upsert(String::from("this/subdir/that"), EntryKind::Blob, this_id)? + .upsert(BString::from("that/other/that"), EntryKind::Blob, that_id)? + .remove(BStr::new("that"))? + .remove(&String::from("that"))? + .remove(&BString::from("that"))? + .write()?; + + assert_eq!( + display_tree(actual, &repo), + "fe02a8bd15e4c0476d938f772f1eece6d164b1bd +├── a +│ └── b 317e9677c3bcffd006f9fc84bbb0a54ef1676197.100644 +└── this + └── subdir + └── that 317e9677c3bcffd006f9fc84bbb0a54ef1676197.100644 +", + "all trees are actually written, or else we couldn't visualize them." + ); + + let actual = editor + .upsert("a/b", EntryKind::Blob, that_id)? + .upsert(String::from("this/subdir/that"), EntryKind::Blob, this_id)? + .remove(BStr::new("does-not-exist"))? + .write()?; + assert_eq!( + display_tree(actual, &repo), + "219596ff52fc84b6b39bc327f202d408cc02e1db +├── a +│ └── b ce013625030ba8dba906f756967f9e9ca394464a.100644 +└── this + └── subdir + └── that 317e9677c3bcffd006f9fc84bbb0a54ef1676197.100644 +", + "existing blobs can also be changed" + ); + + let mut cursor = editor.cursor_at("something/very/nested/to/add/entries/to")?; + let actual = cursor + .upsert("a/b", EntryKind::Blob, this_id)? + .upsert(String::from("this/subdir/that"), EntryKind::Blob, that_id)? + .upsert(BString::from("that/other/that"), EntryKind::Blob, that_id)? + .remove(BStr::new("that"))? + .write()?; + + assert_eq!( + display_tree(actual, &repo), + "35ea623106198f21b6959dd2731740e5153db2bb +├── a +│ └── b 317e9677c3bcffd006f9fc84bbb0a54ef1676197.100644 +└── this + └── subdir + └── that ce013625030ba8dba906f756967f9e9ca394464a.100644 +", + "all remaining subtrees are written from the cursor position" + ); + + let actual = editor.write()?; + assert_eq!( + display_tree(actual, &repo), + "9ebdc2c1d22e91636fa876a51521464f8a88dd6f +├── a +│ └── b ce013625030ba8dba906f756967f9e9ca394464a.100644 +├── something +│ └── very +│ └── nested +│ └── to +│ └── add +│ └── entries +│ └── to +│ ├── a +│ │ └── b 317e9677c3bcffd006f9fc84bbb0a54ef1676197.100644 +│ └── this +│ └── subdir +│ └── that ce013625030ba8dba906f756967f9e9ca394464a.100644 +└── this + └── subdir + └── that 317e9677c3bcffd006f9fc84bbb0a54ef1676197.100644 +", + "it looks as it should when seen from the root tree" + ); + + editor.set_root(&head_tree_id.object()?.into_tree())?; + let actual = editor.write()?; + assert_eq!( + display_tree(actual, &repo), + "24374df94315568adfaee119d038f710d1f45397 +├── that ce013625030ba8dba906f756967f9e9ca394464a.100644 +└── this 317e9677c3bcffd006f9fc84bbb0a54ef1676197.100644 +", + "it's possible to set the editor to any tree after creating it, could help with memory re-use" + ); + Ok(()) + } + + #[test] + fn missing_objects_and_illformed_path_components_trigger_error() -> crate::Result { + let (repo, _tmp) = crate::repo_rw("make_packed_and_loose.sh")?; + let tree = repo.head_tree_id()?.object()?.into_tree(); + let mut editor = tree.edit()?; + let actual = editor + .upsert("non-existing", EntryKind::Blob, repo.object_hash().null())? + .write()?; + assert_eq!( + actual, + tree.id(), + "nulls are pruned before writing the tree, so it just rewrites the same tree" + ); + + let err = editor + .upsert( + "non-existing", + EntryKind::Blob, + hex_to_id("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + )? + .write() + .unwrap_err(); + assert_eq!( + err.to_string(), + "The object aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa (100644) at 'non-existing' could not be found", + "each entry to be written is checked for existence" + ); + + let this_id = hex_to_id("317e9677c3bcffd006f9fc84bbb0a54ef1676197"); + let err = editor + .remove("non-existing")? + .upsert(".git", EntryKind::Blob, this_id)? + .write() + .expect_err(".git is universally forbidden in trees"); + assert_eq!( + err.to_string(), + "The object 317e9677c3bcffd006f9fc84bbb0a54ef1676197 (100644) has an invalid filename: '.git'", + "each component is validated" + ); + + Ok(()) + } + + mod utils { + use gix::bstr::{BStr, ByteSlice}; + use gix::Repository; + use gix_hash::ObjectId; + + fn display_tree_recursive( + tree_id: ObjectId, + repo: &Repository, + name: Option<&BStr>, + ) -> anyhow::Result> { + let tree = repo.find_tree(tree_id)?.decode()?.to_owned(); + let mut termtree = termtree::Tree::new(if let Some(name) = name { + if tree.entries.is_empty() { + format!("{name} (empty)") + } else { + name.to_string() + } + } else { + tree_id.to_string() + }); + + for entry in &tree.entries { + if entry.mode.is_tree() { + termtree.push(display_tree_recursive(entry.oid, repo, Some(entry.filename.as_bstr()))?); + } else { + termtree.push(format!( + "{} {}.{}", + entry.filename, + entry.oid, + entry.mode.kind().as_octal_str() + )); + } + } + Ok(termtree) + } + + pub(super) fn display_tree(tree_id: impl Into, odb: &Repository) -> String { + display_tree_recursive(tree_id.into(), odb, None) + .expect("tree exists and everything was written") + .to_string() + } + } + use utils::display_tree; +} mod write_object { use crate::repository::object::empty_bare_repo; @@ -24,8 +229,24 @@ mod write_blob { #[test] fn from_slice() -> crate::Result { let (_tmp, repo) = empty_bare_repo()?; + let expected = hex_to_id("95d09f2b10159347eece71399a7e2e907ea3df4f"); + assert!(!repo.has_object(expected)); + let oid = repo.write_blob(b"hello world")?; - assert_eq!(oid, hex_to_id("95d09f2b10159347eece71399a7e2e907ea3df4f")); + assert_eq!(oid, expected); + + let mut other_repo = gix::open_opts(repo.path(), gix::open::Options::isolated())?; + other_repo.objects.enable_object_memory(); + assert!( + other_repo.has_object(oid), + "we definitely don't accidentally write to memory only" + ); + let in_memory_id = other_repo.write_blob("hello world - to memory")?; + assert!(!repo.has_object(in_memory_id), "the object was never written to disk…"); + assert!( + other_repo.has_object(in_memory_id), + "…and exists only in the instance that wrote it" + ); Ok(()) } diff --git a/justfile b/justfile index 4a215d02f0a..758646d0f2c 100755 --- a/justfile +++ b/justfile @@ -146,8 +146,8 @@ check: # Run cargo doc on all crates doc $RUSTDOCFLAGS="-D warnings": - cargo doc --all --no-deps - cargo doc --features=max,lean,small --all --no-deps + cargo doc --all --no-deps --features need-more-recent-msrv + cargo doc --features=max,lean,small --all --no-deps --features need-more-recent-msrv # run all unit tests unit-tests: @@ -183,7 +183,7 @@ unit-tests: cargo nextest run -p gix-protocol --features blocking-client cargo nextest run -p gix-protocol --features async-client cargo nextest run -p gix --no-default-features - cargo nextest run -p gix --no-default-features --features basic,extras,comfort + cargo nextest run -p gix --no-default-features --features basic,extras,comfort,need-more-recent-msrv cargo nextest run -p gix --features async-network-client cargo nextest run -p gix --features blocking-network-client cargo nextest run -p gitoxide-core --lib diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index 82a74e51123..6265cf959f8 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -83,7 +83,7 @@ pub enum Subcommands { Archive(archive::Platform), #[cfg(feature = "gitoxide-core-tools-clean")] Clean(clean::Command), - /// Subcommands for interacting with commit-graphs + /// Subcommands for interacting with commit-graph files #[clap(subcommand)] CommitGraph(commitgraph::Subcommands), /// Interact with the object database. @@ -738,13 +738,13 @@ pub mod credential { pub mod commitgraph { #[derive(Debug, clap::Subcommand)] pub enum Subcommands { - /// Verify the integrity of a commit graph + /// Verify the integrity of a commit graph file Verify { - /// output statistical information about the pack + /// output statistical information about the graph. #[clap(long, short = 's')] statistics: bool, }, - /// List all entries in the commit-graph as reachable by starting from `HEAD`. + /// List all entries in the commit-graph file as reachable by starting from `HEAD`. List { /// The rev-spec to list reachable commits from. #[clap(default_value = "@")]