From 76dd6221aba81a54dc9cdc6564595d5ae349a3fa Mon Sep 17 00:00:00 2001 From: arvidn Date: Mon, 12 Feb 2024 09:21:46 +0100 Subject: [PATCH] introduce deserialize_backref_record() which records all the nodes being back-referenced to. This helps inform, e.g. tree_hash() which nodes' hashes should be cached. --- src/serde/de_br.rs | 71 +++++++++++++++++++++++++++++++++++++++++++++- src/serde/mod.rs | 2 +- 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/src/serde/de_br.rs b/src/serde/de_br.rs index a691882c..b5282614 100644 --- a/src/serde/de_br.rs +++ b/src/serde/de_br.rs @@ -1,3 +1,4 @@ +use std::collections::HashSet; use std::io; use std::io::{Cursor, Read}; @@ -19,6 +20,7 @@ enum ParseOp { pub fn node_from_stream_backrefs( allocator: &mut Allocator, f: &mut Cursor<&[u8]>, + mut backref_callback: impl FnMut(NodePtr), ) -> io::Result { let mut values = allocator.nil(); let mut ops = vec![ParseOp::SExp]; @@ -36,6 +38,7 @@ pub fn node_from_stream_backrefs( let path = parse_path(f)?; let reduction = traverse_path(allocator, path, values)?; let back_reference = reduction.1; + backref_callback(back_reference); values = allocator.new_pair(back_reference, values)?; } else { let new_atom = parse_atom(allocator, b[0], f)?; @@ -61,7 +64,19 @@ pub fn node_from_stream_backrefs( pub fn node_from_bytes_backrefs(allocator: &mut Allocator, b: &[u8]) -> io::Result { let mut buffer = Cursor::new(b); - node_from_stream_backrefs(allocator, &mut buffer) + node_from_stream_backrefs(allocator, &mut buffer, |_node| {}) +} + +pub fn node_from_bytes_backrefs_record( + allocator: &mut Allocator, + b: &[u8], +) -> io::Result<(NodePtr, HashSet)> { + let mut buffer = Cursor::new(b); + let mut backrefs = HashSet::::new(); + let ret = node_from_stream_backrefs(allocator, &mut buffer, |node| { + backrefs.insert(node); + })?; + Ok((ret, backrefs)) } #[cfg(test)] @@ -113,3 +128,57 @@ fn test_deserialize_with_backrefs() { "e23c73777f814e8a4e2785487b272b8b22ddaded1f7cfb808b43f1148602882f", ); } + +#[test] +fn test_deserialize_with_backrefs_record() { + fn deserialize_check(serialization_as_hex: &str, expected_backrefs: &[&'static str]) { + use crate::serde::node_to_bytes; + let buf = Vec::from_hex(serialization_as_hex).unwrap(); + let mut allocator = Allocator::new(); + let (_node, backrefs) = node_from_bytes_backrefs_record(&mut allocator, &buf) + .expect("node_from_bytes_backrefs_records"); + println!("backrefs: {:?}", backrefs); + assert_eq!(backrefs.len(), expected_backrefs.len()); + + let expected_backrefs = + HashSet::::from_iter(expected_backrefs.iter().map(|s| s.to_string())); + let backrefs = HashSet::from_iter( + backrefs + .iter() + .map(|br| hex::encode(node_to_bytes(&allocator, *br).expect("node_to_bytes"))), + ); + + assert_eq!(backrefs, expected_backrefs); + } + + // ("foobar" "foobar") + // no-backrefs + deserialize_check("ff86666f6f626172ff86666f6f62617280", &[]); + // with back-refs + deserialize_check( + "ff86666f6f626172fe01", // ("foobar" "foobar") + &["ff86666f6f62617280"], + ); + + // ((1 2 3 4) 1 2 3 4) + // no-backrefs + deserialize_check("ffff01ff02ff03ff0480ff01ff02ff03ff0480", &[]); + // with back-refs + deserialize_check( + "ffff01ff02ff03ff0480fe02", // ((1 2 3 4) 1 2 3 4) + &["ff01ff02ff03ff0480"], + ); + + // `(((((a_very_long_repeated_string . 1) . (2 . 3)) . ((4 . 5) . (6 . 7))) . (8 . 9)) 10 a_very_long_repeated_string)` + // no-backrefs + deserialize_check( + "ffffffffff9b615f766572795f6c6f6e675f72657065617465645f737472696e6701ff0203ffff04\ + 05ff0607ff0809ff0aff9b615f766572795f6c6f6e675f72657065617465645f737472696e6780", + &[], + ); + // with back-refs + deserialize_check( + "ffffffffff9b615f766572795f6c6f6e675f72657065617465645f737472696e6701ff0203ffff0405ff0607ff0809ff0afffe4180", + &["9b615f766572795f6c6f6e675f72657065617465645f737472696e67"], + ); +} diff --git a/src/serde/mod.rs b/src/serde/mod.rs index 430629a6..cbf597b6 100644 --- a/src/serde/mod.rs +++ b/src/serde/mod.rs @@ -16,7 +16,7 @@ pub mod write_atom; mod test; pub use de::node_from_bytes; -pub use de_br::node_from_bytes_backrefs; +pub use de_br::{node_from_bytes_backrefs, node_from_bytes_backrefs_record}; pub use de_tree::{parse_triples, ParsedTriple}; pub use ser::node_to_bytes; pub use ser_br::node_to_bytes_backrefs;