Skip to content

Commit

Permalink
Generalize Node Encoding Scheme (#269)
Browse files Browse the repository at this point in the history
Co-authored-by: hhao <[email protected]>
  • Loading branch information
richardpringle and hhao authored Sep 25, 2023
1 parent 063e5a7 commit 9f6cc49
Show file tree
Hide file tree
Showing 6 changed files with 329 additions and 109 deletions.
1 change: 1 addition & 0 deletions firewood/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ sha3 = "0.10.2"
thiserror = "1.0.38"
tokio = { version = "1.21.1", features = ["rt", "sync", "macros"] }
typed-builder = "0.16.0"
bincode = "1.3.3"

[dev-dependencies]
criterion = "0.5.1"
Expand Down
87 changes: 86 additions & 1 deletion firewood/src/merkle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ mod node;
mod partial_path;
mod trie_hash;

pub(crate) use node::Encoded;
pub use node::{BranchNode, Data, ExtNode, LeafNode, Node, NodeType, NBRANCH};
pub use partial_path::PartialPath;
pub use trie_hash::{TrieHash, TRIE_HASH_LEN};
Expand Down Expand Up @@ -1256,9 +1257,10 @@ pub fn from_nibbles(nibbles: &[u8]) -> impl Iterator<Item = u8> + '_ {
#[cfg(test)]
mod test {
use super::*;
use shale::cached::PlainMem;
use shale::cached::{DynamicMem, PlainMem};
use shale::{CachedStore, Storable};
use std::ops::Deref;
use std::sync::Arc;
use test_case::test_case;

#[test_case(vec![0x12, 0x34, 0x56], vec![0x1, 0x2, 0x3, 0x4, 0x5, 0x6])]
Expand Down Expand Up @@ -1381,4 +1383,87 @@ mod test {
check(node);
}
}
#[test]
fn test_encode() {
const RESERVED: usize = 0x1000;

let mut dm = shale::cached::DynamicMem::new(0x10000, 0);
let compact_header = DiskAddress::null();
dm.write(
compact_header.into(),
&shale::to_dehydrated(&shale::compact::CompactSpaceHeader::new(
std::num::NonZeroUsize::new(RESERVED).unwrap(),
std::num::NonZeroUsize::new(RESERVED).unwrap(),
))
.unwrap(),
);
let compact_header = shale::StoredView::ptr_to_obj(
&dm,
compact_header,
shale::compact::CompactHeader::MSIZE,
)
.unwrap();
let mem_meta = Arc::new(dm);
let mem_payload = Arc::new(DynamicMem::new(0x10000, 0x1));

let cache = shale::ObjCache::new(1);
let space =
shale::compact::CompactSpace::new(mem_meta, mem_payload, compact_header, cache, 10, 16)
.expect("CompactSpace init fail");

let store = Box::new(space);
let merkle = Merkle::new(store);

{
let chd = Node::new(NodeType::Leaf(LeafNode(
PartialPath(vec![0x1, 0x2, 0x3]),
Data(vec![0x4, 0x5]),
)));
let chd_ref = merkle.new_node(chd.clone()).unwrap();
let chd_rlp = chd_ref.get_eth_rlp(merkle.store.as_ref());
let new_chd = Node::new(NodeType::decode(chd_rlp).unwrap());
let new_chd_rlp = new_chd.get_eth_rlp(merkle.store.as_ref());
assert_eq!(chd_rlp, new_chd_rlp);

let mut chd_eth_rlp: [Option<Vec<u8>>; NBRANCH] = Default::default();
chd_eth_rlp[0] = Some(chd_rlp.to_vec());
let node = Node::new(NodeType::Branch(BranchNode {
chd: [None; NBRANCH],
value: Some(Data("value1".as_bytes().to_vec())),
chd_eth_rlp,
}));

let node_ref = merkle.new_node(node.clone()).unwrap();

let r = node_ref.get_eth_rlp(merkle.store.as_ref());
let new_node = Node::new(NodeType::decode(r).unwrap());
let new_rlp = new_node.get_eth_rlp(merkle.store.as_ref());
assert_eq!(r, new_rlp);
}

{
let chd = Node::new(NodeType::Branch(BranchNode {
chd: [None; NBRANCH],
value: Some(Data("value1".as_bytes().to_vec())),
chd_eth_rlp: Default::default(),
}));
let chd_ref = merkle.new_node(chd.clone()).unwrap();
let chd_rlp = chd_ref.get_eth_rlp(merkle.store.as_ref());
let new_chd = Node::new(NodeType::decode(chd_rlp).unwrap());
let new_chd_rlp = new_chd.get_eth_rlp(merkle.store.as_ref());
assert_eq!(chd_rlp, new_chd_rlp);

let node = Node::new(NodeType::Extension(ExtNode(
PartialPath(vec![0x1, 0x2, 0x3]),
DiskAddress::null(),
Some(chd_rlp.to_vec()),
)));
let node_ref = merkle.new_node(node.clone()).unwrap();

let r = node_ref.get_eth_rlp(merkle.store.as_ref());
let new_node = Node::new(NodeType::decode(r).unwrap());
let new_rlp = new_node.get_eth_rlp(merkle.store.as_ref());
assert_eq!(r, new_rlp);
}
}
}
165 changes: 134 additions & 31 deletions firewood/src/merkle/node.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
// Copyright (C) 2023, Ava Labs, Inc. All rights reserved.
// See the file LICENSE.md for licensing terms.

use bincode::{Error, Options};
use enum_as_inner::EnumAsInner;
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use sha3::{Digest, Keccak256};
use shale::{disk_address::DiskAddress, CachedStore, ShaleError, ShaleStore, Storable};
use std::{
Expand All @@ -14,11 +16,15 @@ use std::{
};

use crate::merkle::to_nibble_array;
use crate::nibbles::Nibbles;

use super::{from_nibbles, PartialPath, TrieHash, TRIE_HASH_LEN};

pub const NBRANCH: usize = 16;

const EXT_NODE_SIZE: usize = 2;
const BRANCH_NODE_SIZE: usize = 17;

#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Data(pub(super) Vec<u8>);

Expand All @@ -29,6 +35,28 @@ impl std::ops::Deref for Data {
}
}

#[derive(Serialize, Deserialize, Debug)]
pub(crate) enum Encoded<T> {
Raw(T),
Data(T),
}

impl Default for Encoded<Vec<u8>> {
fn default() -> Self {
// This is the default serialized empty vector
Encoded::Data(vec![0])
}
}

impl<T: DeserializeOwned + AsRef<[u8]>> Encoded<T> {
pub fn decode(self) -> Result<T, bincode::Error> {
match self {
Encoded::Raw(raw) => Ok(raw),
Encoded::Data(data) => bincode::DefaultOptions::new().deserialize(data.as_ref()),
}
}
}

#[derive(PartialEq, Eq, Clone)]
pub struct BranchNode {
pub(super) chd: [Option<DiskAddress>; NBRANCH],
Expand Down Expand Up @@ -77,45 +105,73 @@ impl BranchNode {
(only_chd, has_chd)
}

pub fn decode(buf: &[u8]) -> Result<Self, Error> {
let mut items: Vec<Encoded<Vec<u8>>> = bincode::DefaultOptions::new().deserialize(buf)?;

// we've already validated the size, that's why we can safely unwrap
let data = items.pop().unwrap().decode()?;
// Extract the value of the branch node and set to None if it's an empty Vec
let value = Some(data).filter(|data| !data.is_empty());

// Record rlp values of all children.
let mut chd_eth_rlp: [Option<Vec<u8>>; NBRANCH] = Default::default();

// we popped the last element, so their should only be NBRANCH items left
for (i, chd) in items.into_iter().enumerate() {
let data = chd.decode()?;
chd_eth_rlp[i] = Some(data).filter(|data| !data.is_empty());
}

Ok(BranchNode::new([None; NBRANCH], value, chd_eth_rlp))
}

fn calc_eth_rlp<S: ShaleStore<Node>>(&self, store: &S) -> Vec<u8> {
let mut stream = rlp::RlpStream::new_list(17);
let mut list = <[Encoded<Vec<u8>>; NBRANCH + 1]>::default();

for (i, c) in self.chd.iter().enumerate() {
match c {
Some(c) => {
let mut c_ref = store.get_item(*c).unwrap();

if c_ref.get_eth_rlp_long::<S>(store) {
stream.append(&&(*c_ref.get_root_hash::<S>(store))[..]);
list[i] = Encoded::Data(
bincode::DefaultOptions::new()
.serialize(&&(*c_ref.get_root_hash::<S>(store))[..])
.unwrap(),
);

// See struct docs for ordering requirements
if c_ref.lazy_dirty.load(Ordering::Relaxed) {
c_ref.write(|_| {}).unwrap();
c_ref.lazy_dirty.store(false, Ordering::Relaxed)
}
} else {
let c_rlp = &c_ref.get_eth_rlp::<S>(store);
stream.append_raw(c_rlp, 1);
list[i] = Encoded::Raw(c_rlp.to_vec());
}
}
None => {
// Check if there is already a calculated rlp for the child, which
// can happen when manually constructing a trie from proof.
if self.chd_eth_rlp[i].is_none() {
stream.append_empty_data();
} else {
let v = self.chd_eth_rlp[i].clone().unwrap();
if let Some(v) = &self.chd_eth_rlp[i] {
if v.len() == TRIE_HASH_LEN {
stream.append(&v);
list[i] =
Encoded::Data(bincode::DefaultOptions::new().serialize(v).unwrap());
} else {
stream.append_raw(&v, 1);
list[i] = Encoded::Raw(v.clone());
}
}
}
};
}
match &self.value {
Some(val) => stream.append(&val.to_vec()),
None => stream.append_empty_data(),
};
stream.out().into()

if let Some(Data(val)) = &self.value {
list[NBRANCH] = Encoded::Data(bincode::DefaultOptions::new().serialize(val).unwrap());
}

bincode::DefaultOptions::new()
.serialize(list.as_slice())
.unwrap()
}

pub fn new(
Expand Down Expand Up @@ -162,11 +218,15 @@ impl Debug for LeafNode {

impl LeafNode {
fn calc_eth_rlp(&self) -> Vec<u8> {
rlp::encode_list::<Vec<u8>, _>(&[
from_nibbles(&self.0.encode(true)).collect(),
self.1.to_vec(),
])
.into()
bincode::DefaultOptions::new()
.serialize(
[
Encoded::Raw(from_nibbles(&self.0.encode(true)).collect()),
Encoded::Raw(self.1.to_vec()),
]
.as_slice(),
)
.unwrap()
}

pub fn new(path: Vec<u8>, data: Vec<u8>) -> Self {
Expand Down Expand Up @@ -197,34 +257,45 @@ impl Debug for ExtNode {

impl ExtNode {
fn calc_eth_rlp<S: ShaleStore<Node>>(&self, store: &S) -> Vec<u8> {
let mut stream = rlp::RlpStream::new_list(2);
let mut list = <[Encoded<Vec<u8>>; 2]>::default();
list[0] = Encoded::Data(
bincode::DefaultOptions::new()
.serialize(&from_nibbles(&self.0.encode(false)).collect::<Vec<_>>())
.unwrap(),
);

if !self.1.is_null() {
let mut r = store.get_item(self.1).unwrap();
stream.append(&from_nibbles(&self.0.encode(false)).collect::<Vec<_>>());

if r.get_eth_rlp_long(store) {
stream.append(&&(*r.get_root_hash(store))[..]);
list[1] = Encoded::Data(
bincode::DefaultOptions::new()
.serialize(&&(*r.get_root_hash(store))[..])
.unwrap(),
);

if r.lazy_dirty.load(Ordering::Relaxed) {
r.write(|_| {}).unwrap();
r.lazy_dirty.store(false, Ordering::Relaxed);
}
} else {
stream.append_raw(r.get_eth_rlp(store), 1);
list[1] = Encoded::Raw(r.get_eth_rlp(store).to_vec());
}
} else {
// Check if there is already a caclucated rlp for the child, which
// can happen when manually constructing a trie from proof.
if self.2.is_none() {
stream.append_empty_data();
} else {
let v = self.2.clone().unwrap();
if let Some(v) = &self.2 {
if v.len() == TRIE_HASH_LEN {
stream.append(&v);
list[1] = Encoded::Data(bincode::DefaultOptions::new().serialize(v).unwrap());
} else {
stream.append_raw(&v, 1);
list[1] = Encoded::Raw(v.clone());
}
}
}
stream.out().into()

bincode::DefaultOptions::new()
.serialize(list.as_slice())
.unwrap()
}

pub fn new(path: Vec<u8>, chd: DiskAddress, chd_eth_rlp: Option<Vec<u8>>) -> Self {
Expand Down Expand Up @@ -298,13 +369,45 @@ pub enum NodeType {
}

impl NodeType {
fn calc_eth_rlp<S: ShaleStore<Node>>(&self, store: &S) -> Vec<u8> {
pub fn calc_eth_rlp<S: ShaleStore<Node>>(&self, store: &S) -> Vec<u8> {
match &self {
NodeType::Leaf(n) => n.calc_eth_rlp(),
NodeType::Extension(n) => n.calc_eth_rlp(store),
NodeType::Branch(n) => n.calc_eth_rlp(store),
}
}

pub fn decode(buf: &[u8]) -> Result<NodeType, Error> {
let items: Vec<Encoded<Vec<u8>>> = bincode::DefaultOptions::new().deserialize(buf)?;

match items.len() {
EXT_NODE_SIZE => {
let mut items = items.into_iter();
let decoded_key: Vec<u8> = items.next().unwrap().decode()?;

let decoded_key_nibbles = Nibbles::<0>::new(&decoded_key);

let (cur_key_path, term) =
PartialPath::from_nibbles(decoded_key_nibbles.into_iter());
let cur_key = cur_key_path.into_inner();
let data: Vec<u8> = items.next().unwrap().decode()?;

if term {
Ok(NodeType::Leaf(LeafNode::new(cur_key, data)))
} else {
Ok(NodeType::Extension(ExtNode::new(
cur_key,
DiskAddress::null(),
Some(data),
)))
}
}
BRANCH_NODE_SIZE => Ok(NodeType::Branch(BranchNode::decode(buf)?)),
size => Err(Box::new(bincode::ErrorKind::Custom(format!(
"invalid size: {size}"
)))),
}
}
}

impl Node {
Expand Down
2 changes: 1 addition & 1 deletion firewood/src/nibbles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ mod test {
fn leading_zero_nibbles_iter() {
let nib = Nibbles::<1>(&TEST_BYTES);
let expected: [u8; 9] = [0u8, 0xd, 0xe, 0xa, 0xd, 0xb, 0xe, 0xe, 0xf];
expected.into_iter().eq(nib.into_iter());
expected.into_iter().eq(nib);
}

#[test]
Expand Down
Loading

0 comments on commit 9f6cc49

Please sign in to comment.