diff --git a/firewood/src/merkle/proof.rs b/firewood/src/merkle/proof.rs index a312ef0d8..6774cf641 100644 --- a/firewood/src/merkle/proof.rs +++ b/firewood/src/merkle/proof.rs @@ -3,7 +3,6 @@ use std::cmp::Ordering; use std::collections::HashMap; -use std::ops::Deref; use crate::shale::{disk_address::DiskAddress, ShaleError, ShaleStore}; use crate::v2::api::HashKey; @@ -19,7 +18,7 @@ use crate::{ merkle_util::{new_merkle, DataStoreError, MerkleSetup}, }; -use super::{BinarySerde, TRIE_HASH_LEN}; +use super::{BinarySerde, ObjRef, TRIE_HASH_LEN}; #[derive(Debug, Error)] pub enum ProofError { @@ -57,6 +56,8 @@ pub enum ProofError { Shale(ShaleError), #[error("invalid root hash")] InvalidRootHash, + #[error("failed writing the node")] + WriteError, } impl From for ProofError { @@ -258,261 +259,150 @@ impl + Send> Proof { /// necessary nodes will be resolved and leave the remaining as hashnode. /// /// The given edge proof is allowed to be an existent or non-existent proof. - fn proof_to_path, S: ShaleStore + Send + Sync, T: BinarySerde>( + fn proof_to_path, S: ShaleStore + Send + Sync, T: BinarySerde>( &self, - key: KV, + key: K, root_hash: [u8; 32], merkle_setup: &mut MerkleSetup, allow_non_existent_node: bool, ) -> Result>, ProofError> { // Start with the sentinel root - let root = merkle_setup.get_root(); + let sentinel = merkle_setup.get_sentinel_address(); let merkle = merkle_setup.get_merkle_mut(); - let mut u_ref = merkle.get_node(root).map_err(|_| ProofError::NoSuchNode)?; + let mut parent_node_ref = merkle + .get_node(sentinel) + .map_err(|_| ProofError::NoSuchNode)?; - let mut chunks = Vec::new(); - chunks.extend(key.as_ref().iter().copied().flat_map(to_nibble_array)); + let mut key_nibbles = Nibbles::<0>::new(key.as_ref()).into_iter(); - let mut cur_key: &[u8] = &chunks; - let mut cur_hash = root_hash; + let mut child_hash = root_hash; let proofs_map = &self.0; - let mut key_index = 0; - let mut branch_index = 0; - - loop { - let cur_proof = proofs_map - .get(&cur_hash) - .ok_or(ProofError::ProofNodeMissing)?; - // TODO(Hao): (Optimization) If a node is already decode we don't need to decode again. - let (mut chd_ptr, sub_proof, size) = - self.decode_node(merkle, cur_key, cur_proof.as_ref(), false)?; + let mut child_index = 0; + let sub_proof = loop { // Link the child to the parent based on the node type. - match &u_ref.inner() { + // if a child is already linked, use it instead + let child_node = match &parent_node_ref.inner() { #[allow(clippy::indexing_slicing)] - NodeType::Branch(n) => match n.chd()[branch_index] { + NodeType::Branch(n) => match n.chd()[child_index] { // If the child already resolved, then use the existing node. - Some(node) => { - chd_ptr = node; - } + Some(node) => merkle.get_node(node)?, None => { + let child_node = decode_subproof(merkle, proofs_map, &child_hash)?; + // insert the leaf to the empty slot - #[allow(clippy::unwrap_used)] - u_ref - .write(|u| { - let uu = u.inner_mut().as_branch_mut().unwrap(); + parent_node_ref + .write(|node| { #[allow(clippy::indexing_slicing)] - (uu.chd_mut()[branch_index] = Some(chd_ptr)); + let node = node + .inner_mut() + .as_branch_mut() + .expect("parent_node_ref is a branch"); + node.chd_mut()[child_index] = Some(child_node.as_ptr()); }) - .unwrap(); + .map_err(|_| ProofError::WriteError)?; + + child_node } }, - NodeType::Extension(_) => { - // If the child already resolved, then use the existing node. - #[allow(clippy::unwrap_used)] - let node = u_ref.inner().as_extension().unwrap().chd(); - - #[allow(clippy::unwrap_used)] - if node.is_null() { - u_ref - .write(|u| { - let uu = u.inner_mut().as_extension_mut().unwrap(); - *uu.chd_mut() = chd_ptr; - }) - .unwrap(); - } else { - chd_ptr = node; - } + NodeType::Extension(n) if n.chd().is_null() => { + let child_node = decode_subproof(merkle, proofs_map, &child_hash)?; + + parent_node_ref + .write(|node| { + let node = node + .inner_mut() + .as_extension_mut() + .expect("parent_node_ref is an extension"); + *node.chd_mut() = child_node.as_ptr(); + }) + .map_err(|_| ProofError::WriteError)?; + + child_node } + NodeType::Extension(n) => merkle.get_node(n.chd())?, + // We should not hit a leaf node as a parent. _ => return Err(ProofError::InvalidNode(MerkleError::ParentLeafBranch)), }; - u_ref = merkle.get_node(chd_ptr)?; - - // If the new parent is a branch node, record the index to correctly link the next child to it. - if u_ref.inner().as_branch().is_some() { - #[allow(clippy::indexing_slicing)] - (branch_index = chunks[key_index] as usize); - } - - key_index += size; - - match sub_proof { - // The trie doesn't contain the key. It's possible - // the proof is a non-existing proof, but at least - // we can prove all resolved nodes are correct, it's - // enough for us to prove range. - None => { - return allow_non_existent_node - .then_some(None) - .ok_or(ProofError::NodeNotInTrie); + // find the encoded subproof of the child if the partial-path and nibbles match + let encoded_sub_proof = match child_node.inner() { + NodeType::Leaf(n) => { + break n + .path + .iter() + .copied() + .eq(key_nibbles) // all nibbles have to match + .then(|| n.data().to_vec()); } - Some(p) => { - // Return when reaching the end of the key. - if key_index == chunks.len() { - #[allow(clippy::indexing_slicing)] - (cur_key = &chunks[key_index..]); - let mut data = None; - - // Decode the last subproof to get the value. - if let SubProof::Hash(p_hash) = p { - let proof = proofs_map - .get(&p_hash) - .ok_or(ProofError::ProofNodeMissing)?; - chd_ptr = self.decode_node(merkle, cur_key, proof.as_ref(), true)?.0; + NodeType::Extension(n) => { + let paths_match = n + .path + .iter() + .copied() + .all(|nibble| Some(nibble) == key_nibbles.next()); - // Link the child to the parent based on the node type. - match &u_ref.inner() { - // TODO: add path - #[allow(clippy::indexing_slicing)] - NodeType::Branch(n) if n.chd()[branch_index].is_none() => { - // insert the leaf to the empty slot - #[allow(clippy::unwrap_used)] - u_ref - .write(|u| { - let uu = u.inner_mut().as_branch_mut().unwrap(); - #[allow(clippy::indexing_slicing)] - (uu.chd_mut()[branch_index] = Some(chd_ptr)); - }) - .unwrap(); - } - - // If the child already resolved, then use the existing node. - NodeType::Branch(_) => {} - - #[allow(clippy::unwrap_used)] - NodeType::Extension(_) - if u_ref.inner().as_extension().unwrap().chd().is_null() => - { - u_ref - .write(|u| { - let uu = u.inner_mut().as_extension_mut().unwrap(); - *uu.chd_mut() = chd_ptr; - }) - .unwrap(); - } - - // If the child already resolved, then use the existing node. - NodeType::Extension(_) => {} - - // We should not hit a leaf node as a parent. - _ => { - return Err(ProofError::InvalidNode( - MerkleError::ParentLeafBranch, - )) - } - }; - } - - drop(u_ref); - - let c_ref = merkle.get_node(chd_ptr)?; - - match &c_ref.inner() { - NodeType::Branch(n) => { - if let Some(v) = n.value() { - data = Some(v.deref().to_vec()); - } - } - NodeType::Leaf(n) => { - // Return the value on the node only when the key matches exactly - // (e.g. the length path of subproof node is 0). - match p { - SubProof::Data(_) => data = Some(n.data().deref().to_vec()), - SubProof::Hash(_) if n.path().len() == 0 => { - data = Some(n.data().deref().to_vec()) - } - _ => (), - } - } - _ => (), - } - - return Ok(data); + if !paths_match { + break None; } - // The trie doesn't contain the key. - let SubProof::Hash(hash) = p else { - return if allow_non_existent_node { - Ok(None) - } else { - Err(ProofError::NodeNotInTrie) - }; - }; - - cur_hash = hash; - #[allow(clippy::indexing_slicing)] - (cur_key = &chunks[key_index..]); + Some(n.chd_encoded().ok_or(ProofError::InvalidData)?) } - } - } - } - - /// Decode the value to generate the corresponding type of node, and locate the subproof. - /// - /// # Arguments - /// - /// * `end_node` - A boolean indicates whether this is the end node to decode, thus no `key` - /// to be present. - fn decode_node + Send + Sync, T: BinarySerde>( - &self, - merkle: &Merkle, - key: &[u8], - buf: &[u8], - end_node: bool, - ) -> Result<(DiskAddress, Option, usize), ProofError> { - let node = NodeType::decode(buf)?; - let new_node = merkle - .put_node(Node::from(node)) - .map_err(ProofError::InvalidNode)?; - let addr = new_node.as_ptr(); - - match new_node.inner() { - NodeType::Leaf(n) => { - let cur_key = n.path().0.as_ref(); - if !key.contains_other(cur_key) { - return Ok((addr, None, 0)); - } + NodeType::Branch(n) => { + if let Some(index) = key_nibbles.next() { + let subproof = n + .chd_encode() + .get(index as usize) + .and_then(|inner| inner.as_ref()) + .map(|data| &**data); - let subproof = SubProof::Data(n.data().to_vec()); + child_index = index as usize; - Ok((addr, subproof.into(), cur_key.len())) - } - NodeType::Extension(n) => { - let cur_key = n.path.0.as_ref(); + subproof + } else { + break n.value.as_ref().map(|data| data.to_vec()); + } + } + }; - if !key.contains_other(cur_key) { - return Ok((addr, None, 0)); + match encoded_sub_proof { + None => break None, + Some(encoded) => { + let hash = generate_subproof_hash(encoded)?; + child_hash = hash; } + } - let encoded = n.chd_encoded().ok_or(ProofError::InvalidData)?.to_vec(); - let subproof = generate_subproof(encoded).map(Some)?; + parent_node_ref = child_node; + }; - Ok((addr, subproof, cur_key.len())) - } - // If the node is the last one to be decoded, then no subproof to be extracted. - NodeType::Branch(_) if end_node => Ok((addr, None, 1)), - NodeType::Branch(_) if key.is_empty() => Err(ProofError::NoSuchNode), - NodeType::Branch(n) => { - // Check if the subproof with the given key exist. - #[allow(clippy::indexing_slicing)] - let index = key[0] as usize; - #[allow(clippy::indexing_slicing)] - let Some(data) = &n.chd_encode()[index] else { - return Ok((addr, None, 1)); - }; - let subproof = generate_subproof(data.to_vec())?; - Ok((addr, Some(subproof), 1)) - } + match sub_proof { + Some(data) => Ok(Some(data)), + None if allow_non_existent_node => Ok(None), + None => Err(ProofError::NodeNotInTrie), } } } +fn decode_subproof<'a, S: ShaleStore, T, N: AsRef<[u8]>>( + merkle: &'a Merkle, + proofs_map: &HashMap<[u8; 32], N>, + child_hash: &[u8; 32], +) -> Result, ProofError> { + let child_proof = proofs_map + .get(child_hash) + .ok_or(ProofError::ProofNodeMissing)?; + let child_node = NodeType::decode(child_proof.as_ref())?; + merkle + .put_node(Node::from(child_node)) + .map_err(ProofError::InvalidNode) +} + fn locate_subproof( mut key_nibbles: NibblesIterator<'_, 0>, node: NodeType, @@ -545,7 +435,7 @@ fn locate_subproof( if does_not_match { return Ok((None, Nibbles::<0>::new(&[]).into_iter())); } - let data = n.chd_encoded().ok_or(ProofError::InvalidData)?.to_vec(); + let data = n.chd_encoded().ok_or(ProofError::InvalidData)?; let sub_proof = generate_subproof(data)?; Ok((sub_proof.into(), key_nibbles)) @@ -563,26 +453,25 @@ fn locate_subproof( #[allow(clippy::indexing_slicing)] let data = n.chd_encode()[index] .as_ref() - .ok_or(ProofError::InvalidData)? - .to_vec(); + .ok_or(ProofError::InvalidData)?; generate_subproof(data).map(|subproof| (Some(subproof), key_nibbles)) } } } -fn generate_subproof(encoded: Vec) -> Result { +fn generate_subproof_hash(encoded: &[u8]) -> Result<[u8; 32], ProofError> { match encoded.len() { 0..=31 => { - let sub_hash = sha3::Keccak256::digest(&encoded).into(); - Ok(SubProof::Hash(sub_hash)) + let sub_hash = sha3::Keccak256::digest(encoded).into(); + Ok(sub_hash) } 32 => { - let sub_hash: &[u8] = &encoded; - #[allow(clippy::unwrap_used)] - let sub_hash = sub_hash.try_into().unwrap(); + let sub_hash = encoded + .try_into() + .expect("slice length checked in match arm"); - Ok(SubProof::Hash(sub_hash)) + Ok(sub_hash) } len => Err(ProofError::DecodeError(Box::new( @@ -591,6 +480,10 @@ fn generate_subproof(encoded: Vec) -> Result { } } +fn generate_subproof(encoded: &[u8]) -> Result { + Ok(SubProof::Hash(generate_subproof_hash(encoded)?)) +} + // unset_internal removes all internal node references. // It should be called after a trie is constructed with two edge paths. Also // the given boundary keys must be the one used to construct the edge paths. @@ -615,7 +508,7 @@ fn unset_internal, S: ShaleStore + Send + Sync, T: BinarySe // Add the sentinel root let mut right_chunks = vec![0]; right_chunks.extend(right.as_ref().iter().copied().flat_map(to_nibble_array)); - let root = merkle_setup.get_root(); + let root = merkle_setup.get_sentinel_address(); let merkle = merkle_setup.get_merkle_mut(); let mut u_ref = merkle.get_node(root).map_err(|_| ProofError::NoSuchNode)?; let mut parent = DiskAddress::null(); @@ -1021,17 +914,3 @@ fn unset_node_ref, S: ShaleStore + Send + Sync, T: BinarySe } } } - -pub trait ContainsOtherExt { - fn contains_other(&self, other: Self) -> bool; -} - -impl ContainsOtherExt for &[T] -where - [T]: PartialEq<[T]>, -{ - #[allow(clippy::indexing_slicing)] - fn contains_other(&self, other: Self) -> bool { - self.len() >= other.len() && &self[..other.len()] == other - } -} diff --git a/firewood/src/merkle_util.rs b/firewood/src/merkle_util.rs index df2a1164a..328ae03cd 100644 --- a/firewood/src/merkle_util.rs +++ b/firewood/src/merkle_util.rs @@ -64,7 +64,7 @@ impl + Send + Sync, T: BinarySerde> MerkleSetup { .map_err(|_err| DataStoreError::GetError) } - pub const fn get_root(&self) -> DiskAddress { + pub const fn get_sentinel_address(&self) -> DiskAddress { self.root } diff --git a/firewood/src/shale/compact.rs b/firewood/src/shale/compact.rs index 35d3f8b7e..544f58103 100644 --- a/firewood/src/shale/compact.rs +++ b/firewood/src/shale/compact.rs @@ -629,7 +629,7 @@ impl Sh if ptr < DiskAddress::from(CompactSpaceHeader::MSIZE as usize) { return Err(ShaleError::InvalidAddressLength { expected: DiskAddress::from(CompactSpaceHeader::MSIZE as usize), - found: ptr.0.unwrap().get() as u64, + found: ptr.0.map(|inner| inner.get()).unwrap_or_default() as u64, }); }