From a8581ab78933f9d36104366eff5f7a1055d6c25d Mon Sep 17 00:00:00 2001 From: arvidn Date: Tue, 13 Feb 2024 12:20:43 +0100 Subject: [PATCH] improve fidelity of the checks for small atoms to fit in the SmallAtom NodePtr representation --- fuzz/fuzz_targets/allocator.rs | 6 +++ src/allocator.rs | 96 +++++++++++++++++++++------------- 2 files changed, 65 insertions(+), 37 deletions(-) diff --git a/fuzz/fuzz_targets/allocator.rs b/fuzz/fuzz_targets/allocator.rs index 6c23b88c..809beaed 100644 --- a/fuzz/fuzz_targets/allocator.rs +++ b/fuzz/fuzz_targets/allocator.rs @@ -1,6 +1,7 @@ #![no_main] use libfuzzer_sys::fuzz_target; +use clvmr::allocator::fits_in_small_atom; use clvmr::{Allocator, NodePtr}; fn run_tests(a: &mut Allocator, atom1: NodePtr, data: &[u8]) { @@ -21,6 +22,11 @@ fn run_tests(a: &mut Allocator, atom1: NodePtr, data: &[u8]) { assert_eq!(a.number(atom2), val.into()); assert_eq!(a.atom_len(atom2), data.len()); assert!(canonical); + assert_eq!(fits_in_small_atom(data), Some(val)); + } else { + assert_eq!(fits_in_small_atom(data), None); + let val = a.number(atom1); + assert!(!canonical || val < 0.into() || val > ((1 << 26) - 1).into()); } // number diff --git a/src/allocator.rs b/src/allocator.rs index 92fc610e..4e66e492 100644 --- a/src/allocator.rs +++ b/src/allocator.rs @@ -167,21 +167,28 @@ impl Default for Allocator { } } -pub fn canonical_positive_integer(v: &[u8]) -> bool { - if v.is_empty() { - // empty buffer is 0/nil - true - } else if (v.len() == 1 && v[0] == 0) +pub fn fits_in_small_atom(v: &[u8]) -> Option { + if !v.is_empty() + && (v.len() > 4 + || (v.len() == 1 && v[0] == 0) // a 1-byte buffer of 0 is not the canonical representation of 0 || (v[0] & 0x80) != 0 // if the top bit is set, it's a negative number (i.e. not positive) || (v[0] == 0 && (v[1] & 0x80) == 0) + // if the buffer is 4 bytes, the top byte can't use more than 2 bits. + // otherwise the integer won't fit in 26 bits + || (v.len() == 4 && v[0] > 0x03)) { - // if the top byte is a 0 but the top bit of the next byte is not set, that's a redundant - // leading zero. i.e. not canonical representation - false + // if the top byte is a 0 but the top bit of the next byte is not set, + // that's a redundant leading zero. i.e. not canonical representation + None } else { - true + let mut ret: u32 = 0; + for b in v { + ret <<= 8; + ret |= *b as u32; + } + Some(ret) } } @@ -259,12 +266,7 @@ impl Allocator { } let idx = self.atom_vec.len(); self.check_atom_limit()?; - if v.len() <= 3 && canonical_positive_integer(v) { - let mut ret: u32 = 0; - for b in v { - ret <<= 8; - ret |= *b as u32; - } + if let Some(ret) = fits_in_small_atom(v) { self.small_atoms += 1; Ok(NodePtr::new(ObjectType::SmallAtom, ret as usize)) } else { @@ -345,7 +347,10 @@ impl Allocator { let buf: [u8; 4] = val.to_be_bytes(); let buf = &buf[4 - len as usize..]; let substr = &buf[start as usize..end as usize]; - if !canonical_positive_integer(substr) { + if let Some(new_val) = fits_in_small_atom(substr) { + self.small_atoms += 1; + Ok(NodePtr::new(ObjectType::SmallAtom, new_val as usize)) + } else { let start = self.u8_vec.len(); let end = start + substr.len(); self.u8_vec.extend_from_slice(substr); @@ -355,14 +360,6 @@ impl Allocator { end: end as u32, }); Ok(NodePtr::new(ObjectType::Bytes, idx)) - } else { - let mut new_val: u32 = 0; - for i in substr { - new_val <<= 8; - new_val |= *i as u32; - } - self.small_atoms += 1; - Ok(NodePtr::new(ObjectType::SmallAtom, new_val as usize)) } } } @@ -509,16 +506,7 @@ impl Allocator { ObjectType::Bytes => { let atom = self.atom_vec[node.index() as usize]; let buf = &self.u8_vec[atom.start as usize..atom.end as usize]; - if buf.len() > 3 || !canonical_positive_integer(buf) { - None - } else { - let mut ret: u32 = 0; - for v in buf { - ret <<= 8; - ret |= *v as u32; - } - Some(ret) - } + fits_in_small_atom(buf) } _ => None, } @@ -1808,14 +1796,16 @@ fn test_auto_small_number(#[case] value: Number, #[case] expect_small: bool) { #[case(&[0xff], false)] #[case(&[0xff, 0xff], false)] #[case(&[0x80, 0xff, 0xff], false)] -// we use a simple heuristic, for atoms. if we have more than 3 bytes, we assume -// it's not small. Even though it would have fit in 26 bits -#[case(&[0x1, 0xff, 0xff, 0xff], false)] // small positive intergers can be small #[case(&[0x01], true)] #[case(&[0x00, 0xff], true)] #[case(&[0x7f, 0xff], true)] #[case(&[0x7f, 0xff, 0xff], true)] +#[case(&[0x00, 0xff, 0xff, 0xff], true)] +#[case(&[0x02, 0x00, 0x00, 0x00], true)] +#[case(&[0x03, 0xff, 0xff, 0xff], true)] +// too big +#[case(&[0x04, 0x00, 0x00, 0x00], false)] fn test_auto_small_number_from_buf(#[case] buf: &[u8], #[case] expect_small: bool) { let mut a = Allocator::new(); let atom = a.new_atom(buf).expect("new_atom()"); @@ -1826,3 +1816,35 @@ fn test_auto_small_number_from_buf(#[case] buf: &[u8], #[case] expect_small: boo } assert_eq!(buf, a.atom(atom).as_ref()); } + +#[cfg(test)] +#[rstest] +// redundant leading zeros are not canoncial +#[case(&[0x00], None)] +#[case(&[0x00, 0x7f], None)] +// negative numbers cannot be small ints +#[case(&[0x80], None)] +#[case(&[0xff], None)] +// redundant leading 0xff are still negative +#[case(&[0xff, 0xff], None)] +#[case(&[0x80, 0xff, 0xff], None)] +// to big +#[case(&[0x04, 0x00, 0x00, 0x00], None)] +#[case(&[0x05, 0x00, 0x00, 0x00], None)] +#[case(&[0x04, 0x00, 0x00, 0x00, 0x00], None)] +// small positive intergers can be small +#[case(&[0x01], Some(0x01))] +#[case(&[0x00, 0x80], Some(0x80))] +#[case(&[0x00, 0xff], Some(0xff))] +#[case(&[0x7f, 0xff], Some(0x7fff))] +#[case(&[0x00, 0x80, 0x00], Some(0x8000))] +#[case(&[0x00, 0xff, 0xff], Some(0xffff))] +#[case(&[0x7f, 0xff, 0xff], Some(0x7fffff))] +#[case(&[0x00, 0x80, 0x00, 0x00], Some(0x800000))] +#[case(&[0x00, 0xff, 0xff, 0xff], Some(0xffffff))] +#[case(&[0x02, 0x00, 0x00, 0x00], Some(0x2000000))] +#[case(&[0x03, 0x00, 0x00, 0x00], Some(0x3000000))] +#[case(&[0x03, 0xff, 0xff, 0xff], Some(0x3ffffff))] +fn test_fits_in_small_atom(#[case] buf: &[u8], #[case] expected: Option) { + assert_eq!(fits_in_small_atom(buf), expected); +}