From d96d6f6266087c632d195b9141b3ba53b312020a Mon Sep 17 00:00:00 2001
From: arvidn <arvid@libtorrent.org>
Date: Sun, 7 Jan 2024 11:38:56 +0100
Subject: [PATCH 1/2] change the internal representation of NodePtr to use the
 top 6 bits as 'type' and the bottom 26 bits for 'index'. Currently we use
 positive numbers for atoms and negative numbers for pairs. The new
 representation supports more types

---
 src/allocator.rs          | 134 +++++++++++++++++++++++++++++---------
 src/op_utils.rs           |  20 ++++--
 src/serde/object_cache.rs |  25 +------
 3 files changed, 121 insertions(+), 58 deletions(-)

diff --git a/src/allocator.rs b/src/allocator.rs
index 6d75bbc7..11b1ad79 100644
--- a/src/allocator.rs
+++ b/src/allocator.rs
@@ -4,8 +4,56 @@ use crate::reduction::EvalErr;
 use chia_bls::{G1Element, G2Element};
 use clvm_traits::{ClvmDecoder, ClvmEncoder, FromClvmError, ToClvmError};
 
+const MAX_NUM_ATOMS: usize = 62500000;
+const MAX_NUM_PAIRS: usize = 62500000;
+const NODE_PTR_IDX_BITS: u32 = 26;
+const NODE_PTR_IDX_MASK: u32 = (1 << NODE_PTR_IDX_BITS) - 1;
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct NodePtr(pub i32);
+pub struct NodePtr(u32);
+
+enum ObjectType {
+    Pair,
+    Bytes,
+}
+
+// The top 6 bits of the NodePtr indicate what type of object it is
+impl NodePtr {
+    pub fn null() -> Self {
+        Self::new(ObjectType::Bytes, 0)
+    }
+
+    fn new(t: ObjectType, idx: usize) -> Self {
+        assert!(idx <= NODE_PTR_IDX_MASK as usize);
+        NodePtr(((t as u32) << NODE_PTR_IDX_BITS) | (idx as u32))
+    }
+
+    fn node_type(&self) -> (ObjectType, usize) {
+        (
+            match self.0 >> NODE_PTR_IDX_BITS {
+                0 => ObjectType::Pair,
+                1 => ObjectType::Bytes,
+                _ => {
+                    panic!("unknown NodePtr type");
+                }
+            },
+            (self.0 & NODE_PTR_IDX_MASK) as usize,
+        )
+    }
+
+    pub(crate) fn as_index(&self) -> usize {
+        match self.node_type() {
+            (ObjectType::Pair, idx) => idx * 2,
+            (ObjectType::Bytes, idx) => idx * 2 + 1,
+        }
+    }
+}
+
+impl Default for NodePtr {
+    fn default() -> Self {
+        Self::null()
+    }
+}
 
 pub enum SExp {
     Atom,
@@ -58,9 +106,6 @@ pub struct Allocator {
     heap_limit: usize,
 }
 
-const MAX_NUM_ATOMS: usize = 62500000;
-const MAX_NUM_PAIRS: usize = 62500000;
-
 impl Default for Allocator {
     fn default() -> Self {
         Self::new()
@@ -122,13 +167,14 @@ impl Allocator {
         if (self.heap_limit - start as usize) < v.len() {
             return err(self.null(), "out of memory");
         }
-        if self.atom_vec.len() == MAX_NUM_ATOMS {
+        let idx = self.atom_vec.len();
+        if idx == MAX_NUM_ATOMS {
             return err(self.null(), "too many atoms");
         }
         self.u8_vec.extend_from_slice(v);
         let end = self.u8_vec.len() as u32;
         self.atom_vec.push(AtomBuf { start, end });
-        Ok(NodePtr(-(self.atom_vec.len() as i32)))
+        Ok(NodePtr::new(ObjectType::Bytes, idx))
     }
 
     pub fn new_number(&mut self, v: Number) -> Result<NodePtr, EvalErr> {
@@ -144,22 +190,22 @@ impl Allocator {
     }
 
     pub fn new_pair(&mut self, first: NodePtr, rest: NodePtr) -> Result<NodePtr, EvalErr> {
-        let r = self.pair_vec.len() as i32;
-        if self.pair_vec.len() == MAX_NUM_PAIRS {
+        let idx = self.pair_vec.len();
+        if idx == MAX_NUM_PAIRS {
             return err(self.null(), "too many pairs");
         }
         self.pair_vec.push(IntPair { first, rest });
-        Ok(NodePtr(r))
+        Ok(NodePtr::new(ObjectType::Pair, idx))
     }
 
     pub fn new_substr(&mut self, node: NodePtr, start: u32, end: u32) -> Result<NodePtr, EvalErr> {
-        if node.0 >= 0 {
-            return err(node, "(internal error) substr expected atom, got pair");
-        }
         if self.atom_vec.len() == MAX_NUM_ATOMS {
             return err(self.null(), "too many atoms");
         }
-        let atom = self.atom_vec[(-node.0 - 1) as usize];
+        let (ObjectType::Bytes, idx) = node.node_type() else {
+            return err(node, "(internal error) substr expected atom, got pair");
+        };
+        let atom = self.atom_vec[idx];
         let atom_len = atom.end - atom.start;
         if start > atom_len {
             return err(node, "substr start out of bounds");
@@ -170,11 +216,12 @@ impl Allocator {
         if end < start {
             return err(node, "substr invalid bounds");
         }
+        let idx = self.atom_vec.len();
         self.atom_vec.push(AtomBuf {
             start: atom.start + start,
             end: atom.start + end,
         });
-        Ok(NodePtr(-(self.atom_vec.len() as i32)))
+        Ok(NodePtr::new(ObjectType::Bytes, idx))
     }
 
     pub fn new_concat(&mut self, new_size: usize, nodes: &[NodePtr]) -> Result<NodePtr, EvalErr> {
@@ -189,12 +236,12 @@ impl Allocator {
 
         let mut counter: usize = 0;
         for node in nodes {
-            if node.0 >= 0 {
+            let (ObjectType::Bytes, idx) = node.node_type() else {
                 self.u8_vec.truncate(start);
                 return err(*node, "(internal error) concat expected atom, got pair");
-            }
+            };
 
-            let term = self.atom_vec[(-node.0 - 1) as usize];
+            let term = self.atom_vec[idx];
             if counter + term.len() > new_size {
                 self.u8_vec.truncate(start);
                 return err(*node, "(internal error) concat passed invalid new_size");
@@ -211,11 +258,12 @@ impl Allocator {
             );
         }
         let end = self.u8_vec.len() as u32;
+        let idx = self.atom_vec.len();
         self.atom_vec.push(AtomBuf {
             start: (start as u32),
             end,
         });
-        Ok(NodePtr(-(self.atom_vec.len() as i32)))
+        Ok(NodePtr::new(ObjectType::Bytes, idx))
     }
 
     pub fn atom_eq(&self, lhs: NodePtr, rhs: NodePtr) -> bool {
@@ -223,13 +271,27 @@ impl Allocator {
     }
 
     pub fn atom(&self, node: NodePtr) -> &[u8] {
-        assert!(node.0 < 0, "expected atom, got pair");
-        let atom = self.atom_vec[(-node.0 - 1) as usize];
-        &self.u8_vec[atom.start as usize..atom.end as usize]
+        match node.node_type() {
+            (ObjectType::Bytes, idx) => {
+                let atom = self.atom_vec[idx];
+                &self.u8_vec[atom.start as usize..atom.end as usize]
+            }
+            _ => {
+                panic!("expected atom, got pair");
+            }
+        }
     }
 
     pub fn atom_len(&self, node: NodePtr) -> usize {
-        self.atom(node).len()
+        match node.node_type() {
+            (ObjectType::Bytes, idx) => {
+                let atom = self.atom_vec[idx];
+                (atom.end - atom.start) as usize
+            }
+            _ => {
+                panic!("expected atom, got pair");
+            }
+        }
     }
 
     pub fn number(&self, node: NodePtr) -> Number {
@@ -265,11 +327,12 @@ impl Allocator {
     }
 
     pub fn sexp(&self, node: NodePtr) -> SExp {
-        if node.0 >= 0 {
-            let pair = self.pair_vec[node.0 as usize];
-            SExp::Pair(pair.first, pair.rest)
-        } else {
-            SExp::Atom
+        match node.node_type() {
+            (ObjectType::Bytes, _) => SExp::Atom,
+            (ObjectType::Pair, idx) => {
+                let pair = self.pair_vec[idx];
+                SExp::Pair(pair.first, pair.rest)
+            }
         }
     }
 
@@ -286,11 +349,11 @@ impl Allocator {
     }
 
     pub fn null(&self) -> NodePtr {
-        NodePtr(-1)
+        NodePtr::new(ObjectType::Bytes, 0)
     }
 
     pub fn one(&self) -> NodePtr {
-        NodePtr(-2)
+        NodePtr::new(ObjectType::Bytes, 1)
     }
 
     #[cfg(feature = "counters")]
@@ -345,6 +408,19 @@ impl ClvmDecoder for Allocator {
     }
 }
 
+#[test]
+fn test_node_as_index() {
+    assert_eq!(NodePtr::new(ObjectType::Pair, 0).as_index(), 0);
+    assert_eq!(NodePtr::new(ObjectType::Pair, 1).as_index(), 2);
+    assert_eq!(NodePtr::new(ObjectType::Pair, 2).as_index(), 4);
+    assert_eq!(NodePtr::new(ObjectType::Pair, 3).as_index(), 6);
+    assert_eq!(NodePtr::new(ObjectType::Bytes, 0).as_index(), 1);
+    assert_eq!(NodePtr::new(ObjectType::Bytes, 1).as_index(), 3);
+    assert_eq!(NodePtr::new(ObjectType::Bytes, 2).as_index(), 5);
+    assert_eq!(NodePtr::new(ObjectType::Bytes, 3).as_index(), 7);
+    assert_eq!(NodePtr::new(ObjectType::Bytes, 4).as_index(), 9);
+}
+
 #[test]
 fn test_atom_eq() {
     let mut a = Allocator::new();
diff --git a/src/op_utils.rs b/src/op_utils.rs
index d3f2cda7..403ee912 100644
--- a/src/op_utils.rs
+++ b/src/op_utils.rs
@@ -18,7 +18,7 @@ pub fn get_args<const N: usize>(
 ) -> Result<[NodePtr; N], EvalErr> {
     let mut next = args;
     let mut counter = 0;
-    let mut ret: [NodePtr; N] = [NodePtr(0); N];
+    let mut ret: [NodePtr; N] = [NodePtr::null(); N];
 
     while let Some((first, rest)) = a.next(next) {
         next = rest;
@@ -91,7 +91,7 @@ pub fn get_varargs<const N: usize>(
 ) -> Result<([NodePtr; N], usize), EvalErr> {
     let mut next = args;
     let mut counter = 0;
-    let mut ret: [NodePtr; N] = [NodePtr(0); N];
+    let mut ret: [NodePtr; N] = [NodePtr::null(); N];
 
     while let Some((first, rest)) = a.next(next) {
         next = rest;
@@ -131,19 +131,27 @@ fn test_get_varargs() {
     );
     assert_eq!(
         get_varargs::<4>(&a, args3, "test").unwrap(),
-        ([a1, a2, a3, NodePtr(0)], 3)
+        ([a1, a2, a3, NodePtr::null()], 3)
     );
     assert_eq!(
         get_varargs::<4>(&a, args2, "test").unwrap(),
-        ([a2, a3, NodePtr(0), NodePtr(0)], 2)
+        ([a2, a3, NodePtr::null(), NodePtr::null()], 2)
     );
     assert_eq!(
         get_varargs::<4>(&a, args1, "test").unwrap(),
-        ([a3, NodePtr(0), NodePtr(0), NodePtr(0)], 1)
+        ([a3, NodePtr::null(), NodePtr::null(), NodePtr::null()], 1)
     );
     assert_eq!(
         get_varargs::<4>(&a, args0, "test").unwrap(),
-        ([NodePtr(0), NodePtr(0), NodePtr(0), NodePtr(0)], 0)
+        (
+            [
+                NodePtr::null(),
+                NodePtr::null(),
+                NodePtr::null(),
+                NodePtr::null()
+            ],
+            0
+        )
     );
 
     let r = get_varargs::<3>(&a, args4, "test").unwrap_err();
diff --git a/src/serde/object_cache.rs b/src/serde/object_cache.rs
index ed479259..09bac2ab 100644
--- a/src/serde/object_cache.rs
+++ b/src/serde/object_cache.rs
@@ -26,18 +26,6 @@ pub struct ObjectCache<'a, T> {
     f: CachedFunction<T>,
 }
 
-/// turn a `NodePtr` into a `usize`. Positive values become even indices
-/// and negative values become odd indices.
-
-fn node_to_index(node: &NodePtr) -> usize {
-    let value = node.0;
-    if value < 0 {
-        (-value - value - 1) as usize
-    } else {
-        (value + value) as usize
-    }
-}
-
 impl<'a, T: Clone> ObjectCache<'a, T> {
     pub fn new(allocator: &'a Allocator, f: CachedFunction<T>) -> Self {
         let cache = vec![];
@@ -57,7 +45,7 @@ impl<'a, T: Clone> ObjectCache<'a, T> {
 
     /// return the cached value for this node, or `None`
     fn get_from_cache(&self, node: &NodePtr) -> Option<&T> {
-        let index = node_to_index(node);
+        let index = node.as_index();
         if index < self.cache.len() {
             self.cache[index].as_ref()
         } else {
@@ -67,7 +55,7 @@ impl<'a, T: Clone> ObjectCache<'a, T> {
 
     /// set the cached value for a node
     fn set(&mut self, node: &NodePtr, v: T) {
-        let index = node_to_index(node);
+        let index = node.as_index();
         if index >= self.cache.len() {
             self.cache.resize(index + 1, None);
         }
@@ -255,15 +243,6 @@ fn test_serialized_length() {
     check("ff01ff02ff03ff04ff05ff0680", 13); // (1 2 3 4 5 6)
 }
 
-#[test]
-fn test_node_to_index() {
-    assert_eq!(node_to_index(&NodePtr(0)), 0);
-    assert_eq!(node_to_index(&NodePtr(1)), 2);
-    assert_eq!(node_to_index(&NodePtr(2)), 4);
-    assert_eq!(node_to_index(&NodePtr(-1)), 1);
-    assert_eq!(node_to_index(&NodePtr(-2)), 3);
-}
-
 // this test takes a very long time (>60s) in debug mode, so it only runs in release mode
 
 #[cfg(not(debug_assertions))]

From d9952fb663fb7f86c043d09c6fd5ea316003785f Mon Sep 17 00:00:00 2001
From: arvidn <arvid@libtorrent.org>
Date: Mon, 8 Jan 2024 19:59:23 +0100
Subject: [PATCH 2/2] make a hack in benchmark-clvm-cost more explicit

---
 src/allocator.rs                     |  5 +++++
 tools/src/bin/benchmark-clvm-cost.rs | 25 ++++++++++++++-----------
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/allocator.rs b/src/allocator.rs
index 11b1ad79..c2c52d4b 100644
--- a/src/allocator.rs
+++ b/src/allocator.rs
@@ -23,6 +23,11 @@ impl NodePtr {
         Self::new(ObjectType::Bytes, 0)
     }
 
+    // TODO: remove this
+    pub fn hack(val: usize) -> Self {
+        Self::new(ObjectType::Bytes, val)
+    }
+
     fn new(t: ObjectType, idx: usize) -> Self {
         assert!(idx <= NODE_PTR_IDX_MASK as usize);
         NodePtr(((t as u32) << NODE_PTR_IDX_BITS) | (idx as u32))
diff --git a/tools/src/bin/benchmark-clvm-cost.rs b/tools/src/bin/benchmark-clvm-cost.rs
index e24cb3f3..38ccbb43 100644
--- a/tools/src/bin/benchmark-clvm-cost.rs
+++ b/tools/src/bin/benchmark-clvm-cost.rs
@@ -16,7 +16,7 @@ enum OpArgs {
 
 // special argument to indicate it should be substituted for varied in the FreeBytes test to
 // measure cost per byte
-const VARIABLE: NodePtr = NodePtr(999);
+const VARIABLE_VAL: usize = 999;
 
 // builds calls in the form:
 // (<op> arg arg ...)
@@ -99,9 +99,10 @@ fn quote(a: &mut Allocator, v: NodePtr) -> NodePtr {
 }
 
 fn subst_node(arg: NodePtr, substitution: NodePtr) -> NodePtr {
-    match arg {
-        VARIABLE => substitution,
-        _ => arg,
+    if arg == NodePtr::hack(VARIABLE_VAL) {
+        substitution
+    } else {
+        arg
     }
 }
 
@@ -337,6 +338,8 @@ pub fn main() {
 
     let mut a = Allocator::new();
 
+    let variable = NodePtr::hack(VARIABLE_VAL);
+
     let g1 = a.new_atom(&hex::decode("97f1d3a73197d7942695638c4fa9ac0fc3688c4f9774b905a14e3a3f171bac586c55e83ff97a1aeffb3af00adb22c6bb").unwrap()).unwrap();
     let g2 = a.new_atom(&hex::decode("93e02b6052719f607dacd3a088274f65596bd0d09920b61ab5da61bbdc7f5049334cf11213945d57e5ac7d055d042b7e024aa2b2f08f0a91260805272dc51051c6e47ad4fa403b02b4510b647ae3d1770bac0326a805bbefd48056c8c121bdb8").unwrap()).unwrap();
 
@@ -386,21 +389,21 @@ pub fn main() {
         Operator {
             opcode: 60,
             name: "modpow (modulus cost)",
-            arg: OpArgs::ThreeArgs(number, number, VARIABLE),
+            arg: OpArgs::ThreeArgs(number, number, variable),
             extra: None,
             flags: PER_BYTE_COST | EXPONENTIAL_COST,
         },
         Operator {
             opcode: 60,
             name: "modpow (exponent cost)",
-            arg: OpArgs::ThreeArgs(number, VARIABLE, number),
+            arg: OpArgs::ThreeArgs(number, variable, number),
             extra: None,
             flags: PER_BYTE_COST | EXPONENTIAL_COST,
         },
         Operator {
             opcode: 60,
             name: "modpow (value cost)",
-            arg: OpArgs::ThreeArgs(VARIABLE, number, number),
+            arg: OpArgs::ThreeArgs(variable, number, number),
             extra: None,
             flags: PER_BYTE_COST,
         },
@@ -421,7 +424,7 @@ pub fn main() {
         Operator {
             opcode: 50,
             name: "g1_multiply",
-            arg: OpArgs::TwoArgs(g1, VARIABLE),
+            arg: OpArgs::TwoArgs(g1, variable),
             extra: Some(g1),
             flags: PER_BYTE_COST,
         },
@@ -449,7 +452,7 @@ pub fn main() {
         Operator {
             opcode: 54,
             name: "g2_multiply",
-            arg: OpArgs::TwoArgs(g2, VARIABLE),
+            arg: OpArgs::TwoArgs(g2, variable),
             extra: Some(g2),
             flags: PER_BYTE_COST,
         },
@@ -463,14 +466,14 @@ pub fn main() {
         Operator {
             opcode: 56,
             name: "g1_map",
-            arg: OpArgs::SingleArg(VARIABLE),
+            arg: OpArgs::SingleArg(variable),
             extra: None,
             flags: PER_BYTE_COST | LARGE_BUFFERS,
         },
         Operator {
             opcode: 57,
             name: "g2_map",
-            arg: OpArgs::SingleArg(VARIABLE),
+            arg: OpArgs::SingleArg(variable),
             extra: None,
             flags: PER_BYTE_COST | LARGE_BUFFERS,
         },