From d96d6f6266087c632d195b9141b3ba53b312020a Mon Sep 17 00:00:00 2001 From: arvidn Date: Sun, 7 Jan 2024 11:38:56 +0100 Subject: [PATCH 1/2] change the internal representation of NodePtr to use the top 6 bits as 'type' and the bottom 26 bits for 'index'. Currently we use positive numbers for atoms and negative numbers for pairs. The new representation supports more types --- src/allocator.rs | 134 +++++++++++++++++++++++++++++--------- src/op_utils.rs | 20 ++++-- src/serde/object_cache.rs | 25 +------ 3 files changed, 121 insertions(+), 58 deletions(-) diff --git a/src/allocator.rs b/src/allocator.rs index 6d75bbc7..11b1ad79 100644 --- a/src/allocator.rs +++ b/src/allocator.rs @@ -4,8 +4,56 @@ use crate::reduction::EvalErr; use chia_bls::{G1Element, G2Element}; use clvm_traits::{ClvmDecoder, ClvmEncoder, FromClvmError, ToClvmError}; +const MAX_NUM_ATOMS: usize = 62500000; +const MAX_NUM_PAIRS: usize = 62500000; +const NODE_PTR_IDX_BITS: u32 = 26; +const NODE_PTR_IDX_MASK: u32 = (1 << NODE_PTR_IDX_BITS) - 1; + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct NodePtr(pub i32); +pub struct NodePtr(u32); + +enum ObjectType { + Pair, + Bytes, +} + +// The top 6 bits of the NodePtr indicate what type of object it is +impl NodePtr { + pub fn null() -> Self { + Self::new(ObjectType::Bytes, 0) + } + + fn new(t: ObjectType, idx: usize) -> Self { + assert!(idx <= NODE_PTR_IDX_MASK as usize); + NodePtr(((t as u32) << NODE_PTR_IDX_BITS) | (idx as u32)) + } + + fn node_type(&self) -> (ObjectType, usize) { + ( + match self.0 >> NODE_PTR_IDX_BITS { + 0 => ObjectType::Pair, + 1 => ObjectType::Bytes, + _ => { + panic!("unknown NodePtr type"); + } + }, + (self.0 & NODE_PTR_IDX_MASK) as usize, + ) + } + + pub(crate) fn as_index(&self) -> usize { + match self.node_type() { + (ObjectType::Pair, idx) => idx * 2, + (ObjectType::Bytes, idx) => idx * 2 + 1, + } + } +} + +impl Default for NodePtr { + fn default() -> Self { + Self::null() + } +} pub enum SExp { Atom, @@ -58,9 +106,6 @@ pub struct Allocator { heap_limit: usize, } -const MAX_NUM_ATOMS: usize = 62500000; -const MAX_NUM_PAIRS: usize = 62500000; - impl Default for Allocator { fn default() -> Self { Self::new() @@ -122,13 +167,14 @@ impl Allocator { if (self.heap_limit - start as usize) < v.len() { return err(self.null(), "out of memory"); } - if self.atom_vec.len() == MAX_NUM_ATOMS { + let idx = self.atom_vec.len(); + if idx == MAX_NUM_ATOMS { return err(self.null(), "too many atoms"); } self.u8_vec.extend_from_slice(v); let end = self.u8_vec.len() as u32; self.atom_vec.push(AtomBuf { start, end }); - Ok(NodePtr(-(self.atom_vec.len() as i32))) + Ok(NodePtr::new(ObjectType::Bytes, idx)) } pub fn new_number(&mut self, v: Number) -> Result { @@ -144,22 +190,22 @@ impl Allocator { } pub fn new_pair(&mut self, first: NodePtr, rest: NodePtr) -> Result { - let r = self.pair_vec.len() as i32; - if self.pair_vec.len() == MAX_NUM_PAIRS { + let idx = self.pair_vec.len(); + if idx == MAX_NUM_PAIRS { return err(self.null(), "too many pairs"); } self.pair_vec.push(IntPair { first, rest }); - Ok(NodePtr(r)) + Ok(NodePtr::new(ObjectType::Pair, idx)) } pub fn new_substr(&mut self, node: NodePtr, start: u32, end: u32) -> Result { - if node.0 >= 0 { - return err(node, "(internal error) substr expected atom, got pair"); - } if self.atom_vec.len() == MAX_NUM_ATOMS { return err(self.null(), "too many atoms"); } - let atom = self.atom_vec[(-node.0 - 1) as usize]; + let (ObjectType::Bytes, idx) = node.node_type() else { + return err(node, "(internal error) substr expected atom, got pair"); + }; + let atom = self.atom_vec[idx]; let atom_len = atom.end - atom.start; if start > atom_len { return err(node, "substr start out of bounds"); @@ -170,11 +216,12 @@ impl Allocator { if end < start { return err(node, "substr invalid bounds"); } + let idx = self.atom_vec.len(); self.atom_vec.push(AtomBuf { start: atom.start + start, end: atom.start + end, }); - Ok(NodePtr(-(self.atom_vec.len() as i32))) + Ok(NodePtr::new(ObjectType::Bytes, idx)) } pub fn new_concat(&mut self, new_size: usize, nodes: &[NodePtr]) -> Result { @@ -189,12 +236,12 @@ impl Allocator { let mut counter: usize = 0; for node in nodes { - if node.0 >= 0 { + let (ObjectType::Bytes, idx) = node.node_type() else { self.u8_vec.truncate(start); return err(*node, "(internal error) concat expected atom, got pair"); - } + }; - let term = self.atom_vec[(-node.0 - 1) as usize]; + let term = self.atom_vec[idx]; if counter + term.len() > new_size { self.u8_vec.truncate(start); return err(*node, "(internal error) concat passed invalid new_size"); @@ -211,11 +258,12 @@ impl Allocator { ); } let end = self.u8_vec.len() as u32; + let idx = self.atom_vec.len(); self.atom_vec.push(AtomBuf { start: (start as u32), end, }); - Ok(NodePtr(-(self.atom_vec.len() as i32))) + Ok(NodePtr::new(ObjectType::Bytes, idx)) } pub fn atom_eq(&self, lhs: NodePtr, rhs: NodePtr) -> bool { @@ -223,13 +271,27 @@ impl Allocator { } pub fn atom(&self, node: NodePtr) -> &[u8] { - assert!(node.0 < 0, "expected atom, got pair"); - let atom = self.atom_vec[(-node.0 - 1) as usize]; - &self.u8_vec[atom.start as usize..atom.end as usize] + match node.node_type() { + (ObjectType::Bytes, idx) => { + let atom = self.atom_vec[idx]; + &self.u8_vec[atom.start as usize..atom.end as usize] + } + _ => { + panic!("expected atom, got pair"); + } + } } pub fn atom_len(&self, node: NodePtr) -> usize { - self.atom(node).len() + match node.node_type() { + (ObjectType::Bytes, idx) => { + let atom = self.atom_vec[idx]; + (atom.end - atom.start) as usize + } + _ => { + panic!("expected atom, got pair"); + } + } } pub fn number(&self, node: NodePtr) -> Number { @@ -265,11 +327,12 @@ impl Allocator { } pub fn sexp(&self, node: NodePtr) -> SExp { - if node.0 >= 0 { - let pair = self.pair_vec[node.0 as usize]; - SExp::Pair(pair.first, pair.rest) - } else { - SExp::Atom + match node.node_type() { + (ObjectType::Bytes, _) => SExp::Atom, + (ObjectType::Pair, idx) => { + let pair = self.pair_vec[idx]; + SExp::Pair(pair.first, pair.rest) + } } } @@ -286,11 +349,11 @@ impl Allocator { } pub fn null(&self) -> NodePtr { - NodePtr(-1) + NodePtr::new(ObjectType::Bytes, 0) } pub fn one(&self) -> NodePtr { - NodePtr(-2) + NodePtr::new(ObjectType::Bytes, 1) } #[cfg(feature = "counters")] @@ -345,6 +408,19 @@ impl ClvmDecoder for Allocator { } } +#[test] +fn test_node_as_index() { + assert_eq!(NodePtr::new(ObjectType::Pair, 0).as_index(), 0); + assert_eq!(NodePtr::new(ObjectType::Pair, 1).as_index(), 2); + assert_eq!(NodePtr::new(ObjectType::Pair, 2).as_index(), 4); + assert_eq!(NodePtr::new(ObjectType::Pair, 3).as_index(), 6); + assert_eq!(NodePtr::new(ObjectType::Bytes, 0).as_index(), 1); + assert_eq!(NodePtr::new(ObjectType::Bytes, 1).as_index(), 3); + assert_eq!(NodePtr::new(ObjectType::Bytes, 2).as_index(), 5); + assert_eq!(NodePtr::new(ObjectType::Bytes, 3).as_index(), 7); + assert_eq!(NodePtr::new(ObjectType::Bytes, 4).as_index(), 9); +} + #[test] fn test_atom_eq() { let mut a = Allocator::new(); diff --git a/src/op_utils.rs b/src/op_utils.rs index d3f2cda7..403ee912 100644 --- a/src/op_utils.rs +++ b/src/op_utils.rs @@ -18,7 +18,7 @@ pub fn get_args( ) -> Result<[NodePtr; N], EvalErr> { let mut next = args; let mut counter = 0; - let mut ret: [NodePtr; N] = [NodePtr(0); N]; + let mut ret: [NodePtr; N] = [NodePtr::null(); N]; while let Some((first, rest)) = a.next(next) { next = rest; @@ -91,7 +91,7 @@ pub fn get_varargs( ) -> Result<([NodePtr; N], usize), EvalErr> { let mut next = args; let mut counter = 0; - let mut ret: [NodePtr; N] = [NodePtr(0); N]; + let mut ret: [NodePtr; N] = [NodePtr::null(); N]; while let Some((first, rest)) = a.next(next) { next = rest; @@ -131,19 +131,27 @@ fn test_get_varargs() { ); assert_eq!( get_varargs::<4>(&a, args3, "test").unwrap(), - ([a1, a2, a3, NodePtr(0)], 3) + ([a1, a2, a3, NodePtr::null()], 3) ); assert_eq!( get_varargs::<4>(&a, args2, "test").unwrap(), - ([a2, a3, NodePtr(0), NodePtr(0)], 2) + ([a2, a3, NodePtr::null(), NodePtr::null()], 2) ); assert_eq!( get_varargs::<4>(&a, args1, "test").unwrap(), - ([a3, NodePtr(0), NodePtr(0), NodePtr(0)], 1) + ([a3, NodePtr::null(), NodePtr::null(), NodePtr::null()], 1) ); assert_eq!( get_varargs::<4>(&a, args0, "test").unwrap(), - ([NodePtr(0), NodePtr(0), NodePtr(0), NodePtr(0)], 0) + ( + [ + NodePtr::null(), + NodePtr::null(), + NodePtr::null(), + NodePtr::null() + ], + 0 + ) ); let r = get_varargs::<3>(&a, args4, "test").unwrap_err(); diff --git a/src/serde/object_cache.rs b/src/serde/object_cache.rs index ed479259..09bac2ab 100644 --- a/src/serde/object_cache.rs +++ b/src/serde/object_cache.rs @@ -26,18 +26,6 @@ pub struct ObjectCache<'a, T> { f: CachedFunction, } -/// turn a `NodePtr` into a `usize`. Positive values become even indices -/// and negative values become odd indices. - -fn node_to_index(node: &NodePtr) -> usize { - let value = node.0; - if value < 0 { - (-value - value - 1) as usize - } else { - (value + value) as usize - } -} - impl<'a, T: Clone> ObjectCache<'a, T> { pub fn new(allocator: &'a Allocator, f: CachedFunction) -> Self { let cache = vec![]; @@ -57,7 +45,7 @@ impl<'a, T: Clone> ObjectCache<'a, T> { /// return the cached value for this node, or `None` fn get_from_cache(&self, node: &NodePtr) -> Option<&T> { - let index = node_to_index(node); + let index = node.as_index(); if index < self.cache.len() { self.cache[index].as_ref() } else { @@ -67,7 +55,7 @@ impl<'a, T: Clone> ObjectCache<'a, T> { /// set the cached value for a node fn set(&mut self, node: &NodePtr, v: T) { - let index = node_to_index(node); + let index = node.as_index(); if index >= self.cache.len() { self.cache.resize(index + 1, None); } @@ -255,15 +243,6 @@ fn test_serialized_length() { check("ff01ff02ff03ff04ff05ff0680", 13); // (1 2 3 4 5 6) } -#[test] -fn test_node_to_index() { - assert_eq!(node_to_index(&NodePtr(0)), 0); - assert_eq!(node_to_index(&NodePtr(1)), 2); - assert_eq!(node_to_index(&NodePtr(2)), 4); - assert_eq!(node_to_index(&NodePtr(-1)), 1); - assert_eq!(node_to_index(&NodePtr(-2)), 3); -} - // this test takes a very long time (>60s) in debug mode, so it only runs in release mode #[cfg(not(debug_assertions))] From d9952fb663fb7f86c043d09c6fd5ea316003785f Mon Sep 17 00:00:00 2001 From: arvidn Date: Mon, 8 Jan 2024 19:59:23 +0100 Subject: [PATCH 2/2] make a hack in benchmark-clvm-cost more explicit --- src/allocator.rs | 5 +++++ tools/src/bin/benchmark-clvm-cost.rs | 25 ++++++++++++++----------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/allocator.rs b/src/allocator.rs index 11b1ad79..c2c52d4b 100644 --- a/src/allocator.rs +++ b/src/allocator.rs @@ -23,6 +23,11 @@ impl NodePtr { Self::new(ObjectType::Bytes, 0) } + // TODO: remove this + pub fn hack(val: usize) -> Self { + Self::new(ObjectType::Bytes, val) + } + fn new(t: ObjectType, idx: usize) -> Self { assert!(idx <= NODE_PTR_IDX_MASK as usize); NodePtr(((t as u32) << NODE_PTR_IDX_BITS) | (idx as u32)) diff --git a/tools/src/bin/benchmark-clvm-cost.rs b/tools/src/bin/benchmark-clvm-cost.rs index e24cb3f3..38ccbb43 100644 --- a/tools/src/bin/benchmark-clvm-cost.rs +++ b/tools/src/bin/benchmark-clvm-cost.rs @@ -16,7 +16,7 @@ enum OpArgs { // special argument to indicate it should be substituted for varied in the FreeBytes test to // measure cost per byte -const VARIABLE: NodePtr = NodePtr(999); +const VARIABLE_VAL: usize = 999; // builds calls in the form: // ( arg arg ...) @@ -99,9 +99,10 @@ fn quote(a: &mut Allocator, v: NodePtr) -> NodePtr { } fn subst_node(arg: NodePtr, substitution: NodePtr) -> NodePtr { - match arg { - VARIABLE => substitution, - _ => arg, + if arg == NodePtr::hack(VARIABLE_VAL) { + substitution + } else { + arg } } @@ -337,6 +338,8 @@ pub fn main() { let mut a = Allocator::new(); + let variable = NodePtr::hack(VARIABLE_VAL); + let g1 = a.new_atom(&hex::decode("97f1d3a73197d7942695638c4fa9ac0fc3688c4f9774b905a14e3a3f171bac586c55e83ff97a1aeffb3af00adb22c6bb").unwrap()).unwrap(); let g2 = a.new_atom(&hex::decode("93e02b6052719f607dacd3a088274f65596bd0d09920b61ab5da61bbdc7f5049334cf11213945d57e5ac7d055d042b7e024aa2b2f08f0a91260805272dc51051c6e47ad4fa403b02b4510b647ae3d1770bac0326a805bbefd48056c8c121bdb8").unwrap()).unwrap(); @@ -386,21 +389,21 @@ pub fn main() { Operator { opcode: 60, name: "modpow (modulus cost)", - arg: OpArgs::ThreeArgs(number, number, VARIABLE), + arg: OpArgs::ThreeArgs(number, number, variable), extra: None, flags: PER_BYTE_COST | EXPONENTIAL_COST, }, Operator { opcode: 60, name: "modpow (exponent cost)", - arg: OpArgs::ThreeArgs(number, VARIABLE, number), + arg: OpArgs::ThreeArgs(number, variable, number), extra: None, flags: PER_BYTE_COST | EXPONENTIAL_COST, }, Operator { opcode: 60, name: "modpow (value cost)", - arg: OpArgs::ThreeArgs(VARIABLE, number, number), + arg: OpArgs::ThreeArgs(variable, number, number), extra: None, flags: PER_BYTE_COST, }, @@ -421,7 +424,7 @@ pub fn main() { Operator { opcode: 50, name: "g1_multiply", - arg: OpArgs::TwoArgs(g1, VARIABLE), + arg: OpArgs::TwoArgs(g1, variable), extra: Some(g1), flags: PER_BYTE_COST, }, @@ -449,7 +452,7 @@ pub fn main() { Operator { opcode: 54, name: "g2_multiply", - arg: OpArgs::TwoArgs(g2, VARIABLE), + arg: OpArgs::TwoArgs(g2, variable), extra: Some(g2), flags: PER_BYTE_COST, }, @@ -463,14 +466,14 @@ pub fn main() { Operator { opcode: 56, name: "g1_map", - arg: OpArgs::SingleArg(VARIABLE), + arg: OpArgs::SingleArg(variable), extra: None, flags: PER_BYTE_COST | LARGE_BUFFERS, }, Operator { opcode: 57, name: "g2_map", - arg: OpArgs::SingleArg(VARIABLE), + arg: OpArgs::SingleArg(variable), extra: None, flags: PER_BYTE_COST | LARGE_BUFFERS, },