Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

serialized_length_from_bytes() #355

Merged
merged 1 commit into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,7 @@ criterion = "0.5.1"
[[bench]]
name = "run-program"
harness = false

[[bench]]
name = "deserialize"
harness = false
Binary file added benches/block_af9c3d98.bin
Binary file not shown.
58 changes: 58 additions & 0 deletions benches/deserialize.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
use clvmr::allocator::Allocator;
use clvmr::serde::node_from_bytes;
use clvmr::serde::node_from_bytes_backrefs;
use clvmr::serde::serialized_length_from_bytes;
use clvmr::serde::serialized_length_from_bytes_trusted;
use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
use std::include_bytes;
use std::time::Instant;

fn deserialize_benchmark(c: &mut Criterion) {
let block = include_bytes!("block_af9c3d98.bin");

let mut group = c.benchmark_group("deserialize");
group.sample_size(10);
group.sampling_mode(SamplingMode::Flat);

group.bench_function("serialized_length_from_bytes", |b| {
b.iter(|| {
let start = Instant::now();
let _ = serialized_length_from_bytes(block);
start.elapsed()
})
});

group.bench_function("serialized_length_from_bytes_trusted", |b| {
b.iter(|| {
let start = Instant::now();
let _ = serialized_length_from_bytes_trusted(block);
start.elapsed()
})
});

let mut a = Allocator::new();
let iter_checkpoint = a.checkpoint();

group.bench_function("node_from_bytes_backrefs", |b| {
b.iter(|| {
a.restore_checkpoint(&iter_checkpoint);
let start = Instant::now();
let _ = node_from_bytes_backrefs(&mut a, block);
start.elapsed()
})
});

group.bench_function("node_from_bytes", |b| {
b.iter(|| {
a.restore_checkpoint(&iter_checkpoint);
let start = Instant::now();
let _ = node_from_bytes(&mut a, block);
start.elapsed()
})
});

group.finish();
}

criterion_group!(deserialize, deserialize_benchmark);
criterion_main!(deserialize);
1 change: 0 additions & 1 deletion benches/run-program.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,6 @@ fn run_program_benchmark(c: &mut Criterion) {
] {
a.restore_checkpoint(&test_case_checkpoint);

println!("benchmark/{test}.hex");
let prg = read_to_string(format!("benchmark/{test}.hex"))
.expect("failed to load benchmark program");
let prg = hex::decode(prg.trim()).expect("invalid hex in benchmark program");
Expand Down
6 changes: 6 additions & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ path = "fuzz_targets/serialized_length.rs"
test = false
doc = false

[[bin]]
name = "fuzz_serialized_length_trusted"
path = "fuzz_targets/serialized_length_trusted.rs"
test = false
doc = false

[[bin]]
name = "fuzz_deserialize"
path = "fuzz_targets/deserialize.rs"
Expand Down
29 changes: 24 additions & 5 deletions fuzz/fuzz_targets/serialized_length.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,31 @@
#![no_main]
use clvmr::serde::node_from_bytes_backrefs;
use clvmr::serde::node_to_bytes;
use clvmr::serde::serialized_length_from_bytes;
use clvmr::Allocator;
use libfuzzer_sys::fuzz_target;

fuzz_target!(|data: &[u8]| {
let _len = match serialized_length_from_bytes(data) {
Err(_) => {
return;
let len = serialized_length_from_bytes(data);

let mut allocator = Allocator::new();
let program = node_from_bytes_backrefs(&mut allocator, data);

match (len, program) {
(Ok(_), Ok(_)) => {
// this is expected
}
(Err(_), Err(_)) => {
// this is expected
}
(Ok(len), Err(e)) => {
panic!("discrepancy between serialized_length and node_from_bytes_backrefs().\n {len}\n{e}");
}
(Err(e), Ok(program)) => {
panic!(
"discrepancy between serialized_length and node_from_bytes_backrefs().\n {e}\n{:?}",
node_to_bytes(&allocator, program)
);
}
Ok(r) => r,
};
}
});
12 changes: 12 additions & 0 deletions fuzz/fuzz_targets/serialized_length_trusted.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#![no_main]
use clvmr::serde::serialized_length_from_bytes_trusted;
use libfuzzer_sys::fuzz_target;

fuzz_target!(|data: &[u8]| {
let _len = match serialized_length_from_bytes_trusted(data) {
Err(_) => {
return;
}
Ok(r) => r,
};
});
4 changes: 3 additions & 1 deletion src/serde/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@ pub use de_br::node_from_bytes_backrefs;
pub use de_tree::{parse_triples, ParsedTriple};
pub use ser::node_to_bytes;
pub use ser_br::node_to_bytes_backrefs;
pub use tools::{serialized_length_from_bytes, tree_hash_from_stream};
pub use tools::{
serialized_length_from_bytes, serialized_length_from_bytes_trusted, tree_hash_from_stream,
};
196 changes: 158 additions & 38 deletions src/serde/tools.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const MAX_SINGLE_BYTE: u8 = 0x7f;
const BACK_REFERENCE: u8 = 0xfe;
const CONS_BOX_MARKER: u8 = 0xff;

pub fn serialized_length_from_bytes(b: &[u8]) -> io::Result<u64> {
pub fn serialized_length_from_bytes_trusted(b: &[u8]) -> io::Result<u64> {
let mut f = Cursor::new(b);
let mut ops_counter = 1;
let mut b = [0; 1];
Expand Down Expand Up @@ -107,6 +107,70 @@ pub fn tree_hash_from_stream(f: &mut Cursor<&[u8]>) -> io::Result<[u8; 32]> {
Ok(values.pop().unwrap())
}

/// validate that a buffer is a valid CLVM serialization, and return the length
/// of the CLVM object. This may fail if the serialization contains an invalid
/// back-reference or if the buffer is truncated.
pub fn serialized_length_from_bytes(b: &[u8]) -> io::Result<u64> {
use crate::serde::parse_atom::parse_path;
use crate::traverse_path::traverse_path;
use crate::{allocator::SExp, Allocator};

let mut f = Cursor::new(b);
let mut b = [0; 1];

// the allocator is just used to track the tree structure, in order to
// validate back-references
let mut allocator = Allocator::new();
arvidn marked this conversation as resolved.
Show resolved Hide resolved
let null = allocator.null();
let mut values = null;
let mut ops = vec![ParseOp::SExp];

while let Some(op) = ops.pop() {
match op {
ParseOp::SExp => {
f.read_exact(&mut b)?;
if b[0] == CONS_BOX_MARKER {
ops.push(ParseOp::Cons);
ops.push(ParseOp::SExp);
ops.push(ParseOp::SExp);
} else if b[0] == BACK_REFERENCE {
let path = parse_path(&mut f)?;
let back_reference = traverse_path(&allocator, path, values)?.1;
values = allocator.new_pair(back_reference, values)?;
} else if b[0] == 0x80 || b[0] <= MAX_SINGLE_BYTE {
// This one byte we just read was the whole atom.
// or the special case of NIL
values = allocator.new_pair(null, values)?;
} else {
let blob_size = decode_size(&mut f, b[0])?;
f.seek(SeekFrom::Current(blob_size as i64))?;
if (f.get_ref().len() as u64) < f.position() {
return Err(bad_encoding());
}
values = allocator.new_pair(null, values)?;
}
}
ParseOp::Cons => {
// cons
let SExp::Pair(v1, v2) = allocator.sexp(values) else {
return Err(bad_encoding());
};

let SExp::Pair(v3, v4) = allocator.sexp(v2) else {
return Err(bad_encoding());
};

let new_root = allocator.new_pair(v3, v1)?;
values = allocator.new_pair(new_root, v4)?;
}
}
}
match allocator.sexp(values) {
SExp::Pair(_, _) => Ok(f.position()),
_ => Err(bad_encoding()),
}
}

#[test]
fn test_tree_hash_max_single_byte() {
let mut ctx = Sha256::new();
Expand Down Expand Up @@ -212,50 +276,106 @@ fn test_tree_hash_tree_large_atom() {
);
}

#[test]
fn test_serialized_length_from_bytes() {
assert_eq!(
serialized_length_from_bytes(&[0x7f, 0x00, 0x00, 0x00]).unwrap(),
1
);
assert_eq!(
serialized_length_from_bytes(&[0x80, 0x00, 0x00, 0x00]).unwrap(),
1
);
assert_eq!(
serialized_length_from_bytes(&[0xff, 0x00, 0x00, 0x00]).unwrap(),
3
);
assert_eq!(
serialized_length_from_bytes(&[0xff, 0x01, 0xff, 0x80, 0x80, 0x00]).unwrap(),
5
);

let e = serialized_length_from_bytes(&[0x8f, 0xff]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

let e = serialized_length_from_bytes(&[0b11001111, 0xff]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

let e = serialized_length_from_bytes(&[0b11001111, 0xff, 0, 0]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

assert_eq!(
serialized_length_from_bytes(&[0x8f, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]).unwrap(),
16
);
}

#[cfg(test)]
mod test {
use super::*;
use crate::serde::node_from_bytes_backrefs;
use crate::Allocator;
use rstest::rstest;

#[test]
fn test_serialized_length_from_bytes_trusted() {
assert_eq!(
serialized_length_from_bytes_trusted(&[0x7f, 0x00, 0x00, 0x00]).unwrap(),
1
);
assert_eq!(
serialized_length_from_bytes_trusted(&[0x80, 0x00, 0x00, 0x00]).unwrap(),
1
);
assert_eq!(
serialized_length_from_bytes_trusted(&[0xff, 0x00, 0x00, 0x00]).unwrap(),
3
);
assert_eq!(
serialized_length_from_bytes_trusted(&[0xff, 0x01, 0xff, 0x80, 0x80, 0x00]).unwrap(),
5
);

// this is an invalid back-ref
// but it's not validated
assert_eq!(
serialized_length_from_bytes_trusted(&[0xff, 0x01, 0xff, 0xfe, 0x10, 0x80, 0x00])
.unwrap(),
6
);

let e = serialized_length_from_bytes_trusted(&[0x8f, 0xff]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

let e = serialized_length_from_bytes_trusted(&[0b11001111, 0xff]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

let e = serialized_length_from_bytes_trusted(&[0b11001111, 0xff, 0, 0]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

assert_eq!(
serialized_length_from_bytes_trusted(&[
0x8f, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
])
.unwrap(),
16
);
}

#[test]
fn test_serialized_length_from_bytes() {
use std::io::ErrorKind;
assert_eq!(
serialized_length_from_bytes(&[0x7f, 0x00, 0x00, 0x00]).unwrap(),
1
);
assert_eq!(
serialized_length_from_bytes(&[0x80, 0x00, 0x00, 0x00]).unwrap(),
1
);
assert_eq!(
serialized_length_from_bytes(&[0xff, 0x00, 0x00, 0x00]).unwrap(),
3
);
assert_eq!(
serialized_length_from_bytes(&[0xff, 0x01, 0xff, 0x80, 0x80, 0x00]).unwrap(),
5
);

// this is an invalid back-ref
let e =
serialized_length_from_bytes(&[0xff, 0x01, 0xff, 0xfe, 0x10, 0x80, 0x00]).unwrap_err();
assert_eq!(e.kind(), ErrorKind::Other);
assert_eq!(e.to_string(), "path into atom");

let e = serialized_length_from_bytes(&[0x8f, 0xff]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

let e = serialized_length_from_bytes(&[0b11001111, 0xff]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

let e = serialized_length_from_bytes(&[0b11001111, 0xff, 0, 0]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

assert_eq!(
serialized_length_from_bytes(&[0x8f, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
.unwrap(),
16
);
}

#[rstest]
// ("foobar" "foobar")
#[case("ff86666f6f626172ff86666f6f62617280")]
Expand Down
Loading