diff --git a/firewood/src/db.rs b/firewood/src/db.rs index 449770b22..b9a422619 100644 --- a/firewood/src/db.rs +++ b/firewood/src/db.rs @@ -262,7 +262,8 @@ where } } let nodestore = merkle.into_inner(); - let immutable: Arc> = Arc::new(nodestore.into()); + let immutable: Arc, FileBacked>> = + Arc::new(nodestore.into()); self.manager .write() .expect("poisoned lock") @@ -312,13 +313,13 @@ impl Db { #[derive(Debug)] pub struct Proposal<'p> { - nodestore: Arc>, + nodestore: Arc, FileBacked>>, db: &'p Db, } #[async_trait] impl<'a> api::DbView for Proposal<'a> { - type Stream<'b> = MerkleKeyValueStream<'b, NodeStore> where Self: 'b; + type Stream<'b> = MerkleKeyValueStream<'b, NodeStore, FileBacked>> where Self: 'b; async fn root_hash(&self) -> Result, api::Error> { todo!() @@ -363,7 +364,6 @@ impl<'a> api::Proposal for Proposal<'a> { todo!() } - // When committing a proposal, refuse to commit if there are any cloned proposals. async fn commit(self: Arc) -> Result<(), api::Error> { match Arc::into_inner(self) { Some(proposal) => { diff --git a/firewood/src/manager.rs b/firewood/src/manager.rs index b22196967..9daadede6 100644 --- a/firewood/src/manager.rs +++ b/firewood/src/manager.rs @@ -26,7 +26,7 @@ pub struct RevisionManagerConfig { } type CommittedRevision = Arc>; -type ProposedRevision = Arc>; +type ProposedRevision = Arc, FileBacked>>; #[derive(Debug)] pub(crate) struct RevisionManager { @@ -108,7 +108,7 @@ impl RevisionManager { let current_revision = self.current_revision(); if !proposal .kind - .parent_is(¤t_revision.kind.as_nodestore_parent()) + .parent_hash_is(current_revision.kind.root_hash()) { return Err(RevisionManagerError::NotLatest); } @@ -133,16 +133,16 @@ impl RevisionManager { proposal.flush_header()?; // 7. Proposal Cleanup - self.proposals.retain(|p| { - // TODO: reparent proposals; this needs a lock on the parent element of immutable proposals - // if p - // .kind - // .parent_is(&proposal.kind.as_nodestore_parent()) - // { - // p.kind.reparent_to(&committed.kind.as_nodestore_parent()); - // } - !Arc::ptr_eq(&proposal, p) - }); + // first remove the committing proposal from the list of outstanding proposals + self.proposals.retain(|p| !Arc::ptr_eq(&proposal, p)); + + // then reparent any proposals that have this proposal as a parent + for p in self.proposals.iter() { + proposal.commit_reparent(p); + } + + // 8. Revision reaping + // TODO Ok(()) } diff --git a/firewood/src/merkle.rs b/firewood/src/merkle.rs index b24f6fd51..5c9ffbc9d 100644 --- a/firewood/src/merkle.rs +++ b/firewood/src/merkle.rs @@ -385,7 +385,7 @@ impl Merkle { } impl From>> - for Merkle> + for Merkle, S>> { fn from(m: Merkle>) -> Self { Merkle { @@ -395,7 +395,7 @@ impl From>> } impl Merkle> { - pub fn hash(self) -> Merkle> { + pub fn hash(self) -> Merkle, S>> { self.into() } diff --git a/storage/Cargo.toml b/storage/Cargo.toml index 70a458e87..7bf019dbe 100644 --- a/storage/Cargo.toml +++ b/storage/Cargo.toml @@ -14,6 +14,7 @@ serde = { version = "1.0.199", features = ["derive"] } smallvec = { version = "1.13.2", features = ["serde", "write", "union"] } sha2 = "0.10.8" integer-encoding = "4.0.0" +arc-swap = "1.7.1" lru = "0.12.4" [dev-dependencies] diff --git a/storage/src/nodestore.rs b/storage/src/nodestore.rs index 6e4c59ba2..6eee06c20 100644 --- a/storage/src/nodestore.rs +++ b/storage/src/nodestore.rs @@ -1,6 +1,8 @@ // Copyright (C) 2023, Ava Labs, Inc. All rights reserved. // See the file LICENSE.md for licensing terms. +use arc_swap::access::DynAccess; +use arc_swap::ArcSwap; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::fmt::Debug; @@ -26,6 +28,24 @@ use std::fmt::Debug; /// I --> |commit|N("New commit NodeStore<Committed, S>") /// style E color:#FFFFFF, fill:#AA00FF, stroke:#AA00FF /// ``` +/// +/// Nodestores represent a revision of the trie. There are three types of nodestores: +/// - Committed: A committed revision of the trie. It has no in-memory changes. +/// - MutableProposal: A proposal that is still being modified. It has some nodes in memory. +/// - ImmutableProposal: A proposal that has been hashed and assigned addresses. It has no in-memory changes. +/// +/// The general lifecycle of nodestores is as follows: +/// ```mermaid +/// flowchart TD +/// subgraph subgraph["Committed Revisions"] +/// L("Latest Nodestore<Committed, S>") --- |...|O("Oldest NodeStore<Committed, S>") +/// end +/// O --> E("Expire") +/// L --> |start propose|M("NodeStore<ProposedMutable, S>") +/// M --> |finish propose + hash|I("NodeStore<ProposedImmutable, S>") +/// I --> |commit|N("New commit NodeStore<Committed, S>") +/// style E color:#FFFFFF, fill:#AA00FF, stroke:#AA00FF +/// ``` use std::io::{Error, ErrorKind, Write}; use std::iter::once; use std::mem::offset_of; @@ -222,15 +242,36 @@ pub trait Parentable { fn root_hash(&self) -> Option; } -impl Parentable for ImmutableProposal { +impl Parentable for Arc { fn as_nodestore_parent(&self) -> NodeStoreParent { - NodeStoreParent::Proposed(Arc::new(self.clone())) + NodeStoreParent::Proposed(self.clone()) } fn root_hash(&self) -> Option { self.root_hash.clone() } } +impl NodeStore, S> { + /// When an immutable proposal commits, we need to reparent any proposal that + /// has the committed proposal as it's parent + pub fn commit_reparent(&self, other: &Arc, S>>) -> bool { + match *other.kind.parent.load() { + NodeStoreParent::Proposed(ref parent) => { + if Arc::ptr_eq(&self.kind, parent) { + other + .kind + .parent + .store(NodeStoreParent::Committed(self.kind.root_hash()).into()); + true + } else { + false + } + } + NodeStoreParent::Committed(_) => false, + } + } +} + impl Parentable for Committed { fn as_nodestore_parent(&self) -> NodeStoreParent { NodeStoreParent::Committed(self.root_hash.clone()) @@ -313,7 +354,7 @@ impl NodeStore { } } -impl NodeStore { +impl NodeStore, S> { /// Attempts to allocate `n` bytes from the free lists. /// If successful returns the address of the newly allocated area /// and the index of the free list that was used. @@ -616,19 +657,26 @@ pub struct ImmutableProposal { /// Nodes that have been deleted in this proposal. deleted: Box<[LinearAddress]>, /// The parent of this proposal. - parent: NodeStoreParent, + parent: Arc>, /// The hash of the root node for this proposal root_hash: Option, } impl ImmutableProposal { - /// Returns true if the parent of this proposal is `parent`. - pub fn parent_is(&self, parent: &NodeStoreParent) -> bool { - &self.parent == parent + /// Returns true if the parent of this proposal is committed and has the given hash. + pub fn parent_hash_is(&self, hash: Option) -> bool { + match > as arc_swap::access::DynAccess>>::load( + &self.parent, + ) + .as_ref() + { + NodeStoreParent::Committed(root_hash) => *root_hash == hash, + _ => false, + } } } -impl ReadInMemoryNode for ImmutableProposal { +impl ReadInMemoryNode for Arc { fn read_in_memory_node(&self, addr: LinearAddress) -> Option> { // Check if the node being requested was created in this proposal. if let Some((_, node)) = self.new.get(&addr) { @@ -637,7 +685,7 @@ impl ReadInMemoryNode for ImmutableProposal { // It wasn't. Try our parent, and its parent, and so on until we find it or find // a committed revision. - match self.parent { + match *self.parent.load() { NodeStoreParent::Proposed(ref parent) => parent.read_in_memory_node(addr), NodeStoreParent::Committed(_) => None, } @@ -704,15 +752,6 @@ impl ReadInMemoryNode for MutableProposal { fn read_in_memory_node(&self, addr: LinearAddress) -> Option> { self.parent.read_in_memory_node(addr) } - - // fn read_in_memory_root(&self) -> Option>> { - // let Some(root) = &self.root else { - // return Some(None); - // }; - - // let root = Arc::new(root.clone()); - // Some(Some(root)) - // } } impl, S: ReadableStorage> From> @@ -745,10 +784,15 @@ impl From> for NodeStore NodeStore { +impl NodeStore, S> { /// Hashes `node`, which is at the given `path_prefix`, and its children recursively. /// Returns the hashed node and its hash. - fn hash_helper(&mut self, mut node: Node, path_prefix: &mut Path) -> (LinearAddress, TrieHash) { + fn hash_helper( + &mut self, + mut node: Node, + path_prefix: &mut Path, + new_nodes: &mut HashMap)>, + ) -> (LinearAddress, TrieHash) { // Allocate addresses and calculate hashes for all new nodes match node { Node::Branch(ref mut b) => { @@ -766,7 +810,8 @@ impl NodeStore { .0 .extend(b.partial_path.0.iter().copied().chain(once(nibble as u8))); - let (child_addr, child_hash) = self.hash_helper(child_node, path_prefix); + let (child_addr, child_hash) = + self.hash_helper(child_node, path_prefix, new_nodes); *child = Some(Child::AddressWithHash(child_addr, child_hash)); path_prefix.0.truncate(original_length); } @@ -777,7 +822,7 @@ impl NodeStore { let hash = hash_node(&node, path_prefix); let (addr, size) = self.allocate_node(&node).expect("TODO handle error"); - self.kind.new.insert(addr, (size, Arc::new(node))); + new_nodes.insert(addr, (size, Arc::new(node))); (addr, hash) } @@ -845,7 +890,71 @@ impl NodeStore { } } -impl From> for NodeStore { +impl NodeStore, S> { + /// Persist the freelist from this proposal to storage. + pub fn flush_freelist(&self) -> Result<(), Error> { + // Write the free lists to storage + let free_list_bytes = bincode::serialize(&self.header.free_lists) + .map_err(|e| Error::new(ErrorKind::InvalidData, e))?; + let free_list_offset = offset_of!(NodeStoreHeader, free_lists) as u64; + self.storage + .write(free_list_offset, free_list_bytes.as_slice())?; + Ok(()) + } + + /// Persist the header from this proposal to storage. + pub fn flush_header(&self) -> Result<(), Error> { + let header_bytes = bincode::serialize(&self.header).map_err(|e| { + Error::new( + ErrorKind::InvalidData, + format!("Failed to serialize header: {}", e), + ) + })?; + + self.storage.write(0, header_bytes.as_slice())?; + + Ok(()) + } + + /// Persist all the nodes of a proposal to storage. + pub fn flush_nodes(&self) -> Result<(), Error> { + for (addr, (area_size_index, node)) in self.kind.new.iter() { + let stored_area = StoredArea { + area_size_index: *area_size_index, + area: Area::<_, FreeArea>::Node(node.as_ref()), + }; + + let stored_area_bytes = bincode::serialize(&stored_area) + .map_err(|e| Error::new(ErrorKind::InvalidData, e))?; + + self.storage + .write(addr.get(), stored_area_bytes.as_slice())?; + } + self.storage + .write_cached_nodes(self.kind.new.iter().map(|(addr, (_, node))| (addr, node)))?; + + Ok(()) + } +} + +impl NodeStore, FileBacked> { + /// Return a Committed version of this proposal, which doesn't have any modified nodes. + /// This function is used during commit. + pub fn as_committed(&self) -> NodeStore { + NodeStore { + header: self.header.clone(), + kind: Committed { + deleted: self.kind.deleted.clone(), + root_hash: self.kind.root_hash.clone(), + }, + storage: self.storage.clone(), + } + } +} + +impl From> + for NodeStore, S> +{ fn from(val: NodeStore) -> Self { let NodeStore { header, @@ -855,12 +964,12 @@ impl From> for NodeStore From> for NodeStore RootReader for NodeStore { trait Hashed {} impl Hashed for Committed {} -impl Hashed for ImmutableProposal {} +impl Hashed for Arc {} impl RootReader for NodeStore { fn root_node(&self) -> Option> { @@ -932,6 +1049,7 @@ where #[allow(clippy::unwrap_used)] mod tests { use crate::linear::memory::MemStore; + use arc_swap::access::DynGuard; use super::*; @@ -963,6 +1081,39 @@ mod tests { assert!(area_size_to_index(MAX_AREA_SIZE + 1).is_err()); } + #[test] + fn test_reparent() { + // create an empty base revision + let memstore = MemStore::new(vec![]); + let base = NodeStore::new_empty_committed(memstore.into()) + .unwrap() + .into(); + + // create an empty r1, check that it's parent is the empty committed version + let r1 = NodeStore::new(base).unwrap(); + let r1: Arc, _>> = Arc::new(r1.into()); + let parent: DynGuard> = r1.kind.parent.load(); + assert!(matches!(**parent, NodeStoreParent::Committed(None))); + + // create an empty r2, check that it's parent is the proposed version r1 + let r2: NodeStore = NodeStore::new(r1.clone()).unwrap(); + let r2: Arc, _>> = Arc::new(r2.into()); + let parent: DynGuard> = r2.kind.parent.load(); + assert!(matches!(**parent, NodeStoreParent::Proposed(_))); + + // reparent r2 + r1.commit_reparent(&r2); + + // now check r2's parent, should match the hash of r1 (which is still None) + let parent: DynGuard> = r2.kind.parent.load(); + if let NodeStoreParent::Committed(hash) = &**parent { + assert_eq!(*hash, r1.root_hash().unwrap()); + assert_eq!(*hash, None); + } else { + panic!("expected committed parent"); + } + } + // TODO add new tests // #[test] // fn test_create() {