From 279e7e2437fe0f6ab2fb8af3ef6845bfcaa34e2c Mon Sep 17 00:00:00 2001 From: Ron Kuris Date: Sun, 3 Nov 2024 12:34:51 -0800 Subject: [PATCH] Add feature branch_factor_256 (#746) --- firewood/Cargo.toml | 1 + firewood/src/merkle.rs | 39 +++++++++++++++++++++++------------ firewood/src/proof.rs | 4 +++- firewood/src/stream.rs | 33 +++++++++++++++++++++++------ storage/Cargo.toml | 1 + storage/benches/serializer.rs | 33 ++++++++++------------------- storage/src/node/branch.rs | 14 +++++++++---- storage/src/node/path.rs | 24 ++++++++++++++++++++- storage/src/nodestore.rs | 10 ++++++++- 9 files changed, 111 insertions(+), 48 deletions(-) diff --git a/firewood/Cargo.toml b/firewood/Cargo.toml index d5f4a253e..dac5243c8 100644 --- a/firewood/Cargo.toml +++ b/firewood/Cargo.toml @@ -39,6 +39,7 @@ default = [] nightly = [] iouring = ["io-uring"] logger = ["storage/logger"] +branch_factor_256 = [ "storage/branch_factor_256" ] [dev-dependencies] criterion = {version = "0.5.1", features = ["async_tokio"]} diff --git a/firewood/src/merkle.rs b/firewood/src/merkle.rs index 32e601161..66812c31d 100644 --- a/firewood/src/merkle.rs +++ b/firewood/src/merkle.rs @@ -50,6 +50,7 @@ pub enum MerkleError { // convert a set of nibbles into a printable string // panics if there is a non-nibble byte in the set +#[cfg(not(feature = "branch_factor_256"))] fn nibbles_formatter>(nib: X) -> String { nib.into_iter() .map(|c| { @@ -60,6 +61,12 @@ fn nibbles_formatter>(nib: X) -> String { .collect::() } +#[cfg(feature = "branch_factor_256")] +fn nibbles_formatter>(nib: X) -> String { + let collected: Box<[u8]> = nib.into_iter().collect(); + hex::encode(&collected) +} + macro_rules! write_attributes { ($writer:ident, $node:expr, $value:expr) => { if !$node.partial_path.0.is_empty() { @@ -182,7 +189,8 @@ impl Merkle { // No nodes, even the root, are before `key`. // The root alone proves the non-existence of `key`. // TODO reduce duplicate code with ProofNode::from - let mut child_hashes: [Option; BranchNode::MAX_CHILDREN] = Default::default(); + let mut child_hashes: [Option; BranchNode::MAX_CHILDREN] = + [const { None }; BranchNode::MAX_CHILDREN]; if let Some(branch) = root.as_branch() { // TODO danlaine: can we avoid indexing? #[allow(clippy::indexing_slicing)] @@ -480,7 +488,7 @@ impl Merkle> { let mut branch = BranchNode { partial_path: path_overlap.shared.into(), value: Some(value), - children: Default::default(), + children: [const { None }; BranchNode::MAX_CHILDREN], }; // Shorten the node's partial path since it has a new parent. @@ -528,7 +536,7 @@ impl Merkle> { let mut branch = BranchNode { partial_path: std::mem::replace(&mut leaf.partial_path, Path::new()), value: Some(std::mem::take(&mut leaf.value).into_boxed_slice()), - children: Default::default(), + children: [const { None }; BranchNode::MAX_CHILDREN], }; let new_leaf = Node::Leaf(LeafNode { @@ -554,7 +562,7 @@ impl Merkle> { let mut branch = BranchNode { partial_path: path_overlap.shared.into(), value: None, - children: Default::default(), + children: [const { None }; BranchNode::MAX_CHILDREN], }; node.update_partial_path(node_partial_path); @@ -1464,19 +1472,24 @@ mod tests { #[test_case(vec![(&[0],&[0]),(&[0,1],&[0,1])], Some("c3bdc20aff5cba30f81ffd7689e94e1dbeece4a08e27f0104262431604cf45c6"); "root with leaf child")] #[test_case(vec![(&[0],&[0]),(&[0,1],&[0,1]),(&[0,1,2],&[0,1,2])], Some("229011c50ad4d5c2f4efe02b8db54f361ad295c4eee2bf76ea4ad1bb92676f97"); "root with branch child")] #[test_case(vec![(&[0],&[0]),(&[0,1],&[0,1]),(&[0,8],&[0,8]),(&[0,1,2],&[0,1,2])], Some("a683b4881cb540b969f885f538ba5904699d480152f350659475a962d6240ef9"); "root with branch child and leaf child")] + #[allow(unused_variables)] fn test_root_hash_merkledb_compatible(kvs: Vec<(&[u8], &[u8])>, expected_hash: Option<&str>) { - let merkle = merkle_build_test(kvs).unwrap().hash(); - let Some(got_hash) = merkle.nodestore.root_hash().unwrap() else { - assert!(expected_hash.is_none()); - return; - }; + // TODO: get the hashes from merkledb and verify compatibility with branch factor 256 + #[cfg(not(feature = "branch_factor_256"))] + { + let merkle = merkle_build_test(kvs).unwrap().hash(); + let Some(got_hash) = merkle.nodestore.root_hash().unwrap() else { + assert!(expected_hash.is_none()); + return; + }; - let expected_hash = expected_hash.unwrap(); + let expected_hash = expected_hash.unwrap(); - // This hash is from merkledb - let expected_hash: [u8; 32] = hex::decode(expected_hash).unwrap().try_into().unwrap(); + // This hash is from merkledb + let expected_hash: [u8; 32] = hex::decode(expected_hash).unwrap().try_into().unwrap(); - assert_eq!(got_hash, TrieHash::from(expected_hash)); + assert_eq!(got_hash, TrieHash::from(expected_hash)); + } } #[test] diff --git a/firewood/src/proof.rs b/firewood/src/proof.rs index 615933e1c..8e82ade79 100644 --- a/firewood/src/proof.rs +++ b/firewood/src/proof.rs @@ -71,7 +71,8 @@ impl Hashable for ProofNode { impl From for ProofNode { fn from(item: PathIterItem) -> Self { - let mut child_hashes: [Option; BranchNode::MAX_CHILDREN] = Default::default(); + let mut child_hashes: [Option; BranchNode::MAX_CHILDREN] = + [const { None }; BranchNode::MAX_CHILDREN]; if let Some(branch) = item.node.as_branch() { // TODO danlaine: can we avoid indexing? @@ -170,6 +171,7 @@ impl Proof { // Assert that only nodes whose keys are an even number of nibbles // have a `value_digest`. + #[cfg(not(feature = "branch_factor_256"))] if node.key().count() % 2 != 0 && node.value_digest().is_some() { return Err(ProofError::ValueAtOddNibbleLength); } diff --git a/firewood/src/stream.rs b/firewood/src/stream.rs index 21a5154fa..6f305c4b0 100644 --- a/firewood/src/stream.rs +++ b/firewood/src/stream.rs @@ -561,6 +561,12 @@ fn as_enumerated_children_iter(branch: &BranchNode) -> impl Iterator>(nibbles: Iter) -> Key { + nibbles.collect() +} + +#[cfg(not(feature = "branch_factor_256"))] fn key_from_nibble_iter>(mut nibbles: Iter) -> Key { let mut data = Vec::with_capacity(nibbles.size_hint().0 / 2); @@ -620,10 +626,13 @@ mod tests { }; assert!(should_yield_elt); + #[cfg(not(feature = "branch_factor_256"))] assert_eq!( node.key_nibbles, vec![0x0B, 0x0E, 0x0E, 0x0F].into_boxed_slice() ); + #[cfg(feature = "branch_factor_256")] + assert_eq!(node.key_nibbles, vec![0xBE, 0xEF].into_boxed_slice()); assert_eq!(node.node.as_leaf().unwrap().value, SmallVec::from([0x42])); assert_eq!(node.next_nibble, None); @@ -643,7 +652,10 @@ mod tests { Some(Err(e)) => panic!("{:?}", e), None => panic!("unexpected end of iterator"), }; + #[cfg(not(feature = "branch_factor_256"))] assert_eq!(node.key_nibbles, vec![0x00, 0x00].into_boxed_slice()); + #[cfg(feature = "branch_factor_256")] + assert_eq!(node.key_nibbles, vec![0].into_boxed_slice()); assert_eq!(node.next_nibble, Some(0)); assert!(node.node.as_branch().unwrap().value.is_none()); @@ -652,11 +664,19 @@ mod tests { Some(Err(e)) => panic!("{:?}", e), None => panic!("unexpected end of iterator"), }; + #[cfg(not(feature = "branch_factor_256"))] assert_eq!( node.key_nibbles, vec![0x00, 0x00, 0x00, 0x00, 0x00, 0x00].into_boxed_slice() ); + #[cfg(feature = "branch_factor_256")] + assert_eq!(node.key_nibbles, vec![0, 0, 0].into_boxed_slice()); + + #[cfg(not(feature = "branch_factor_256"))] assert_eq!(node.next_nibble, Some(0x0F)); + #[cfg(feature = "branch_factor_256")] + assert_eq!(node.next_nibble, Some(0xFF)); + assert_eq!( node.node.as_branch().unwrap().value, Some(vec![0x00, 0x00, 0x00].into_boxed_slice()), @@ -667,6 +687,7 @@ mod tests { Some(Err(e)) => panic!("{:?}", e), None => panic!("unexpected end of iterator"), }; + #[cfg(not(feature = "branch_factor_256"))] assert_eq!( node.key_nibbles, vec![0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x0F].into_boxed_slice() @@ -693,7 +714,10 @@ mod tests { Some(Err(e)) => panic!("{:?}", e), None => panic!("unexpected end of iterator"), }; + // TODO: make this branch factor 16 compatible + #[cfg(not(feature = "branch_factor_256"))] assert_eq!(node.key_nibbles, vec![0x00, 0x00].into_boxed_slice()); + assert!(node.node.as_branch().unwrap().value.is_none()); assert_eq!(node.next_nibble, Some(0)); @@ -702,6 +726,7 @@ mod tests { Some(Err(e)) => panic!("{:?}", e), None => panic!("unexpected end of iterator"), }; + #[cfg(not(feature = "branch_factor_256"))] assert_eq!( node.key_nibbles, vec![0x00, 0x00, 0x00, 0x00, 0x00, 0x00].into_boxed_slice() @@ -799,40 +824,34 @@ mod tests { assert_eq!(key, vec![0x00].into_boxed_slice()); let node = node.as_branch().unwrap(); assert!(node.value.is_none()); - assert_eq!(node.partial_path.to_vec(), vec![0x00, 0x00]); // Covers case of branch with value let (key, node) = stream.next().await.unwrap().unwrap(); assert_eq!(key, vec![0x00, 0x00, 0x00].into_boxed_slice()); let node = node.as_branch().unwrap(); assert_eq!(node.value.clone().unwrap().to_vec(), vec![0x00, 0x00, 0x00]); - assert_eq!(node.partial_path.to_vec(), vec![0x00, 0x00, 0x00]); // Covers case of leaf with partial path let (key, node) = stream.next().await.unwrap().unwrap(); assert_eq!(key, vec![0x00, 0x00, 0x00, 0x01].into_boxed_slice()); let node = node.as_leaf().unwrap(); assert_eq!(node.clone().value.to_vec(), vec![0x00, 0x00, 0x00, 0x01]); - assert_eq!(node.partial_path.to_vec(), vec![0x01]); let (key, node) = stream.next().await.unwrap().unwrap(); assert_eq!(key, vec![0x00, 0x00, 0x00, 0xFF].into_boxed_slice()); let node = node.as_leaf().unwrap(); assert_eq!(node.clone().value.to_vec(), vec![0x00, 0x00, 0x00, 0xFF]); - assert_eq!(node.partial_path.to_vec(), vec![0x0F]); let (key, node) = stream.next().await.unwrap().unwrap(); assert_eq!(key, vec![0x00, 0xD0, 0xD0].into_boxed_slice()); let node = node.as_leaf().unwrap(); assert_eq!(node.clone().value.to_vec(), vec![0x00, 0xD0, 0xD0]); - assert_eq!(node.partial_path.to_vec(), vec![0x00, 0x0D, 0x00]); // 0x0D00 becomes 0xDO // Covers case of leaf with no partial path let (key, node) = stream.next().await.unwrap().unwrap(); assert_eq!(key, vec![0x00, 0xFF].into_boxed_slice()); let node = node.as_leaf().unwrap(); assert_eq!(node.clone().value.to_vec(), vec![0x00, 0xFF]); - assert_eq!(node.partial_path.to_vec(), vec![0x0F]); check_stream_is_done(stream).await; } @@ -1059,6 +1078,8 @@ mod tests { let mut stream = merkle.key_value_iter(); + println!("{}", merkle.dump().unwrap()); + assert_eq!( stream.next().await.unwrap().unwrap(), (branch.to_vec().into_boxed_slice(), branch.to_vec()) diff --git a/storage/Cargo.toml b/storage/Cargo.toml index ad9bf3013..95c16808b 100644 --- a/storage/Cargo.toml +++ b/storage/Cargo.toml @@ -28,6 +28,7 @@ criterion = { version = "0.5.1", features = ["async_tokio", "html_reports"] } [features] logger = ["log"] +branch_factor_256 = [] [[bench]] name = "serializer" diff --git a/storage/benches/serializer.rs b/storage/benches/serializer.rs index d96bd5cd1..75b0030e0 100644 --- a/storage/benches/serializer.rs +++ b/storage/benches/serializer.rs @@ -1,7 +1,7 @@ // Copyright (C) 2023, Ava Labs, Inc. All rights reserved. // See the file LICENSE.md for licensing terms. -use std::num::NonZeroU64; +use std::{array::from_fn, num::NonZeroU64}; use bincode::Options; use criterion::{criterion_group, criterion_main, Criterion}; @@ -27,27 +27,16 @@ fn branch(c: &mut Criterion) { let mut input = Node::Branch(Box::new(storage::BranchNode { partial_path: Path(SmallVec::from_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])), value: Some(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9].into_boxed_slice()), - children: [ - Some(storage::Child::AddressWithHash( - NonZeroU64::new(1).unwrap(), - storage::TrieHash::from([0; 32]), - )), - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - ], + children: from_fn(|i| { + if i == 0 { + Some(storage::Child::AddressWithHash( + NonZeroU64::new(1).unwrap(), + storage::TrieHash::from([0; 32]), + )) + } else { + None + } + }), })); let serializer = bincode::DefaultOptions::new().with_varint_encoding(); let benchfn = |b: &mut criterion::Bencher, input: &storage::Node| { diff --git a/storage/src/node/branch.rs b/storage/src/node/branch.rs index d0a6514a3..2248178bc 100644 --- a/storage/src/node/branch.rs +++ b/storage/src/node/branch.rs @@ -42,7 +42,7 @@ impl Serialize for BranchNode { state.serialize_field("partial_path", &self.partial_path)?; state.serialize_field("value", &self.value)?; - let children: SmallVec<[(u8, LinearAddress, TrieHash); 16]> = self + let children: SmallVec<[(u8, LinearAddress, TrieHash); Self::MAX_CHILDREN]> = self .children .iter() .enumerate() @@ -71,12 +71,13 @@ impl<'de> Deserialize<'de> for BranchNode { struct SerializedBranchNode { partial_path: Path, value: Option>, - children: SmallVec<[(u8, LinearAddress, TrieHash); 16]>, + children: SmallVec<[(u8, LinearAddress, TrieHash); BranchNode::MAX_CHILDREN]>, } let s: SerializedBranchNode = Deserialize::deserialize(deserializer)?; - let mut children: [Option; BranchNode::MAX_CHILDREN] = Default::default(); + let mut children: [Option; BranchNode::MAX_CHILDREN] = + [const { None }; BranchNode::MAX_CHILDREN]; for (offset, addr, hash) in s.children.iter() { children[*offset as usize] = Some(Child::AddressWithHash(*addr, hash.clone())); } @@ -119,6 +120,11 @@ impl Debug for BranchNode { impl BranchNode { /// The maximum number of children in a [BranchNode] + #[cfg(feature = "branch_factor_256")] + pub const MAX_CHILDREN: usize = 256; + + /// The maximum number of children in a [BranchNode] + #[cfg(not(feature = "branch_factor_256"))] pub const MAX_CHILDREN: usize = 16; /// Returns the address of the child at the given index. @@ -158,7 +164,7 @@ impl From<&LeafNode> for BranchNode { BranchNode { partial_path: leaf.partial_path.clone(), value: Some(Box::from(&leaf.value[..])), - children: Default::default(), + children: [const { None }; BranchNode::MAX_CHILDREN], } } } diff --git a/storage/src/node/path.rs b/storage/src/node/path.rs index f24ea01b9..777d1a395 100644 --- a/storage/src/node/path.rs +++ b/storage/src/node/path.rs @@ -157,6 +157,17 @@ impl<'a> FusedIterator for NibblesIterator<'a> {} impl<'a> Iterator for NibblesIterator<'a> { type Item = u8; + #[cfg(feature = "branch_factor_256")] + fn next(&mut self) -> Option { + if self.is_empty() { + return None; + } + let result = self.data[self.head]; + self.head += 1; + Some(result) + } + + #[cfg(not(feature = "branch_factor_256"))] fn next(&mut self) -> Option { if self.is_empty() { return None; @@ -184,6 +195,11 @@ impl<'a> Iterator for NibblesIterator<'a> { } impl<'a> NibblesIterator<'a> { + #[cfg(not(feature = "branch_factor_256"))] + const BYTES_PER_NIBBLE: usize = 2; + #[cfg(feature = "branch_factor_256")] + const BYTES_PER_NIBBLE: usize = 1; + #[inline(always)] const fn is_empty(&self) -> bool { self.head == self.tail @@ -195,7 +211,7 @@ impl<'a> NibblesIterator<'a> { NibblesIterator { data, head: 0, - tail: 2 * data.len(), + tail: Self::BYTES_PER_NIBBLE * data.len(), } } } @@ -231,9 +247,11 @@ mod test { use std::fmt::Debug; use test_case::test_case; + #[cfg(not(feature = "branch_factor_256"))] static TEST_BYTES: [u8; 4] = [0xde, 0xad, 0xbe, 0xef]; #[test] + #[cfg(not(feature = "branch_factor_256"))] fn happy_regular_nibbles() { let iter = NibblesIterator::new(&TEST_BYTES); let expected = [0xd, 0xe, 0xa, 0xd, 0xb, 0xe, 0xe, 0xf]; @@ -242,6 +260,7 @@ mod test { } #[test] + #[cfg(not(feature = "branch_factor_256"))] fn size_hint() { let mut iter = NibblesIterator::new(&TEST_BYTES); assert_eq!((8, Some(8)), iter.size_hint()); @@ -250,6 +269,7 @@ mod test { } #[test] + #[cfg(not(feature = "branch_factor_256"))] fn backwards() { let iter = NibblesIterator::new(&TEST_BYTES).rev(); let expected = [0xf, 0xe, 0xe, 0xb, 0xd, 0xa, 0xe, 0xd]; @@ -257,6 +277,7 @@ mod test { } #[test] + #[cfg(not(feature = "branch_factor_256"))] fn nth_back() { let mut iter = NibblesIterator::new(&TEST_BYTES); assert_eq!(iter.nth_back(0), Some(0xf)); @@ -277,6 +298,7 @@ mod test { } #[test] + #[cfg(not(feature = "branch_factor_256"))] fn not_empty_because_of_data() { let mut iter = NibblesIterator::new(&[1]); assert!(!iter.is_empty()); diff --git a/storage/src/nodestore.rs b/storage/src/nodestore.rs index a16895657..20232b9e2 100644 --- a/storage/src/nodestore.rs +++ b/storage/src/nodestore.rs @@ -1145,6 +1145,8 @@ impl NodeStore { #[cfg(test)] #[allow(clippy::unwrap_used)] mod tests { + use std::array::from_fn; + use crate::linear::memory::MemStore; use crate::{BranchNode, LeafNode}; use arc_swap::access::DynGuard; @@ -1232,7 +1234,13 @@ mod tests { #[test_case(BranchNode { partial_path: Path::from([6, 7, 8]), value: Some(vec![9, 10, 11].into_boxed_slice()), - children: [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, Some(Child::AddressWithHash(LinearAddress::new(1).unwrap(), std::array::from_fn::(|i| i as u8).into()))], + children: from_fn(|i| { + if i == 15 { + Some(Child::AddressWithHash(LinearAddress::new(1).unwrap(), std::array::from_fn::(|i| i as u8).into())) + } else { + None + } + }), }; "branch node with 1 child")] #[test_case( Node::Leaf(LeafNode {