From fced1223fec01184e61a6b99bbb5b1d4b8bf3020 Mon Sep 17 00:00:00 2001 From: Juan-M-V <102986292+Juan-M-V@users.noreply.github.com> Date: Thu, 10 Aug 2023 16:02:22 -0300 Subject: [PATCH] Better json generation (#1356) * Generate program with * Simplify fuzz_json * Fuzz config * Test more args * Declare STEPS_LIMIT; derive Clone * Implement location arbitrary by hand * Implement arbitrary by hand * Use raw bytes for fuzz_json * Specify arbitrary for CairoConfig * Use and for json programs * Add words to dict * Run carg fix * Remove unused crates * Remove unused crates * Reduce likeliness of programs triggering parsing errors * Order crates and test input * Clean comments * Add hint code * Make arbitrary location generation better * Clean Cargo.toml * Remove testing code * Avoid serializing if None * Update fuzzer.yml * Run cargo fmt * Run cargo fmt --------- Co-authored-by: Juanma Co-authored-by: juan.mv --- .github/workflows/fuzzer.yml | 2 +- fuzzer/Cargo.lock | 15 +- fuzzer/Cargo.toml | 2 + fuzzer/src/fuzz_json.rs | 356 +++++++++++++++++++++++++++- vm/src/cairo_run.rs | 1 + vm/src/serde/deserialize_program.rs | 61 +++-- 6 files changed, 386 insertions(+), 51 deletions(-) diff --git a/.github/workflows/fuzzer.yml b/.github/workflows/fuzzer.yml index 4512a0d383..aabe4a54cf 100644 --- a/.github/workflows/fuzzer.yml +++ b/.github/workflows/fuzzer.yml @@ -41,7 +41,7 @@ jobs: - name: Initializing fuzzer with previous inputs run: | cd fuzzer - HFUZZ_RUN_ARGS="--dict=json.dict --run_time 10800 --timeout 60 -T" cargo hfuzz run fuzz_json + HFUZZ_RUN_ARGS="--dict=json.dict --run_time 10800 --timeout 60" cargo hfuzz run fuzz_json # run the fuzzer with minimize to reduce inputs - name: Initializing fuzzer with minimize diff --git a/fuzzer/Cargo.lock b/fuzzer/Cargo.lock index 046bb7e15a..6cd9ce3125 100644 --- a/fuzzer/Cargo.lock +++ b/fuzzer/Cargo.lock @@ -193,11 +193,12 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.79" +version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "51f1226cd9da55587234753d1245dd5b132343ea240f26b6a9003d68706141ba" dependencies = [ "jobserver", + "libc", ] [[package]] @@ -290,6 +291,8 @@ dependencies = [ "cairo-vm", "honggfuzz", "libfuzzer-sys", + "serde", + "serde_json", ] [[package]] @@ -660,18 +663,18 @@ checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" [[package]] name = "serde" -version = "1.0.179" +version = "1.0.180" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a5bf42b8d227d4abf38a1ddb08602e229108a517cd4e5bb28f9c7eaafdce5c0" +checksum = "0ea67f183f058fe88a4e3ec6e2788e003840893b91bac4559cabedd00863b3ed" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.179" +version = "1.0.180" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "741e124f5485c7e60c03b043f79f320bff3527f4bbf12cf3831750dc46a0ec2c" +checksum = "24e744d7782b686ab3b73267ef05697159cc0e5abbed3f47f9933165e5219036" dependencies = [ "proc-macro2", "quote", diff --git a/fuzzer/Cargo.toml b/fuzzer/Cargo.toml index 5f9b815329..8dfae9d9f6 100644 --- a/fuzzer/Cargo.toml +++ b/fuzzer/Cargo.toml @@ -17,6 +17,8 @@ honggfuzz = "0.5.55" libfuzzer-sys = "0.4" cairo-vm = { path = "../vm", features = ["arbitrary"] } cairo-felt = { path = "../felt", features = ["arbitrary"] } +serde = { version = "1.0.180", features = ["derive"] } +serde_json = "1.0.104" [[bin]] name = "fuzz_json" diff --git a/fuzzer/src/fuzz_json.rs b/fuzzer/src/fuzz_json.rs index 021e92cdd8..ea80ea5017 100644 --- a/fuzzer/src/fuzz_json.rs +++ b/fuzzer/src/fuzz_json.rs @@ -1,23 +1,355 @@ +use arbitrary::{self, Arbitrary, Unstructured}; +use cairo_felt::Felt252; use cairo_vm::{ cairo_run::{cairo_run, CairoRunConfig}, - hint_processor::builtin_hint_processor::builtin_hint_processor_definition::BuiltinHintProcessor, + hint_processor::builtin_hint_processor::{ + builtin_hint_processor_definition::BuiltinHintProcessor, hint_code::*, + }, + serde::deserialize_program::{ + Attribute, DebugInfo, FlowTrackingData, Member, ReferenceManager, + }, }; use honggfuzz::fuzz; +use serde::{Deserialize, Serialize, Serializer}; +use std::collections::HashMap; + +const BUILTIN_NAMES: [&str; 9] = [ + "output", + "range_check", + "pedersen", + "ecdsa", + "keccak", + "bitwise", + "ec_op", + "poseidon", + "segment_arena", +]; + +const HEX_SYMBOLS: [&str; 16] = [ + "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", +]; + +const HINTS_CODE: [&str; 184] = [ + ADD_SEGMENT, + VM_ENTER_SCOPE, + VM_EXIT_SCOPE, + MEMCPY_ENTER_SCOPE, + MEMCPY_CONTINUE_COPYING, + MEMSET_ENTER_SCOPE, + MEMSET_CONTINUE_LOOP, + POW, + IS_NN, + IS_NN_OUT_OF_RANGE, + IS_LE_FELT, + IS_POSITIVE, + ASSERT_NN, + ASSERT_NOT_ZERO, + ASSERT_NOT_ZERO, + ASSERT_NOT_EQUAL, + ASSERT_LE_FELT, + ASSERT_LE_FELT_V_0_6, + ASSERT_LE_FELT_V_0_8, + ASSERT_LE_FELT_EXCLUDED_0, + ASSERT_LE_FELT_EXCLUDED_1, + ASSERT_LE_FELT_EXCLUDED_2, + ASSERT_LT_FELT, + SPLIT_INT_ASSERT_RANGE, + ASSERT_250_BITS, + IS_250_BITS, + IS_ADDR_BOUNDED, + SPLIT_INT, + SPLIT_64, + SPLIT_FELT, + SQRT, + UNSIGNED_DIV_REM, + SIGNED_DIV_REM, + IS_QUAD_RESIDUE, + FIND_ELEMENT, + SEARCH_SORTED_LOWER, + SET_ADD, + DEFAULT_DICT_NEW, + DICT_NEW, + DICT_READ, + DICT_WRITE, + DICT_UPDATE, + SQUASH_DICT, + SQUASH_DICT_INNER_SKIP_LOOP, + SQUASH_DICT_INNER_FIRST_ITERATION, + SQUASH_DICT_INNER_CHECK_ACCESS_INDEX, + SQUASH_DICT_INNER_CONTINUE_LOOP, + SQUASH_DICT_INNER_ASSERT_LEN_KEYS, + SQUASH_DICT_INNER_LEN_ASSERT, + SQUASH_DICT_INNER_USED_ACCESSES_ASSERT, + SQUASH_DICT_INNER_NEXT_KEY, + DICT_SQUASH_COPY_DICT, + DICT_SQUASH_UPDATE_PTR, + BIGINT_TO_UINT256, + UINT256_ADD, + UINT256_ADD_LOW, + UINT128_ADD, + UINT256_SUB, + UINT256_SQRT, + UINT256_SQRT_FELT, + UINT256_SIGNED_NN, + UINT256_UNSIGNED_DIV_REM, + UINT256_EXPANDED_UNSIGNED_DIV_REM, + UINT256_MUL_DIV_MOD, + USORT_ENTER_SCOPE, + USORT_BODY, + USORT_VERIFY, + USORT_VERIFY_MULTIPLICITY_ASSERT, + USORT_VERIFY_MULTIPLICITY_BODY, + BLAKE2S_COMPUTE, + BLAKE2S_FINALIZE, + BLAKE2S_FINALIZE_V2, + BLAKE2S_FINALIZE_V3, + BLAKE2S_ADD_UINT256, + BLAKE2S_ADD_UINT256_BIGEND, + EXAMPLE_BLAKE2S_COMPRESS, + NONDET_BIGINT3_V1, + NONDET_BIGINT3_V2, + VERIFY_ZERO_V1, + VERIFY_ZERO_V2, + VERIFY_ZERO_V3, + VERIFY_ZERO_EXTERNAL_SECP, + REDUCE, + REDUCE_ED25519, + UNSAFE_KECCAK, + UNSAFE_KECCAK_FINALIZE, + IS_ZERO_NONDET, + IS_ZERO_INT, + IS_ZERO_PACK_V1, + IS_ZERO_PACK_V2, + IS_ZERO_PACK_EXTERNAL_SECP_V1, + IS_ZERO_PACK_EXTERNAL_SECP_V2, + IS_ZERO_PACK_ED25519, + IS_ZERO_ASSIGN_SCOPE_VARS, + IS_ZERO_ASSIGN_SCOPE_VARS_EXTERNAL_SECP, + IS_ZERO_ASSIGN_SCOPE_VARS_ED25519, + DIV_MOD_N_PACKED_DIVMOD_V1, + DIV_MOD_N_PACKED_DIVMOD_EXTERNAL_N, + DIV_MOD_N_SAFE_DIV, + GET_FELT_BIT_LENGTH, + BIGINT_PACK_DIV_MOD, + BIGINT_SAFE_DIV, + DIV_MOD_N_SAFE_DIV_PLUS_ONE, + GET_POINT_FROM_X, + EC_NEGATE, + EC_NEGATE_EMBEDDED_SECP, + EC_DOUBLE_SLOPE_V1, + EC_DOUBLE_SLOPE_V2, + EC_DOUBLE_SLOPE_V3, + EC_DOUBLE_SLOPE_EXTERNAL_CONSTS, + COMPUTE_SLOPE_V1, + COMPUTE_SLOPE_V2, + COMPUTE_SLOPE_SECP256R1, + IMPORT_SECP256R1_P, + COMPUTE_SLOPE_WHITELIST, + EC_DOUBLE_ASSIGN_NEW_X_V1, + EC_DOUBLE_ASSIGN_NEW_X_V2, + EC_DOUBLE_ASSIGN_NEW_X_V3, + EC_DOUBLE_ASSIGN_NEW_X_V4, + EC_DOUBLE_ASSIGN_NEW_Y, + SHA256_INPUT, + SHA256_MAIN_CONSTANT_INPUT_LENGTH, + SHA256_MAIN_ARBITRARY_INPUT_LENGTH, + SHA256_FINALIZE, + KECCAK_WRITE_ARGS, + COMPARE_BYTES_IN_WORD_NONDET, + COMPARE_KECCAK_FULL_RATE_IN_BYTES_NONDET, + BLOCK_PERMUTATION, + BLOCK_PERMUTATION_WHITELIST_V1, + BLOCK_PERMUTATION_WHITELIST_V2, + CAIRO_KECCAK_INPUT_IS_FULL_WORD, + CAIRO_KECCAK_FINALIZE_V1, + CAIRO_KECCAK_FINALIZE_V2, + FAST_EC_ADD_ASSIGN_NEW_X, + FAST_EC_ADD_ASSIGN_NEW_X_V2, + FAST_EC_ADD_ASSIGN_NEW_X_V3, + FAST_EC_ADD_ASSIGN_NEW_Y, + EC_MUL_INNER, + RELOCATE_SEGMENT, + TEMPORARY_ARRAY, + VERIFY_ECDSA_SIGNATURE, + SPLIT_OUTPUT_0, + SPLIT_OUTPUT_1, + SPLIT_INPUT_3, + SPLIT_INPUT_6, + SPLIT_INPUT_9, + SPLIT_INPUT_12, + SPLIT_INPUT_15, + SPLIT_N_BYTES, + SPLIT_OUTPUT_MID_LOW_HIGH, + NONDET_N_GREATER_THAN_10, + NONDET_N_GREATER_THAN_2, + RANDOM_EC_POINT, + CHAINED_EC_OP_RANDOM_EC_POINT, + RECOVER_Y, + PACK_MODN_DIV_MODN, + XS_SAFE_DIV, + UINT384_UNSIGNED_DIV_REM, + UINT384_SPLIT_128, + ADD_NO_UINT384_CHECK, + UINT384_SQRT, + SUB_REDUCED_A_AND_REDUCED_B, + UNSIGNED_DIV_REM_UINT768_BY_UINT384, + UNSIGNED_DIV_REM_UINT768_BY_UINT384_STRIPPED, + UINT384_SIGNED_NN, + IMPORT_SECP256R1_ALPHA, + IMPORT_SECP256R1_N, + UINT384_GET_SQUARE_ROOT, + UINT256_GET_SQUARE_ROOT, + UINT384_DIV, + INV_MOD_P_UINT256, + HI_MAX_BITLEN, + QUAD_BIT, + INV_MOD_P_UINT512, + DI_BIT, + EC_RECOVER_DIV_MOD_N_PACKED, + UINT512_UNSIGNED_DIV_REM, + EC_RECOVER_SUB_A_B, + A_B_BITAND_1, + EC_RECOVER_PRODUCT_MOD, + UINT256_MUL_INV_MOD_P, + EC_RECOVER_PRODUCT_DIV_M, + SQUARE_SLOPE_X_MOD_P, + SPLIT_XX, +]; + +#[derive(Arbitrary, Serialize, Deserialize)] +struct ProgramJson { + attributes: Vec, + #[arbitrary(with = arbitrary_builtins)] + builtins: Vec, + #[arbitrary(value = "0.11.0".to_string())] + compiler_version: String, + data: Vec, + debug_info: DebugInfo, + #[arbitrary(with = prepend_main_identifier)] + identifiers: HashMap, + hints: HashMap>, + #[arbitrary(value = "__main__".to_string())] + main_scope: String, + #[arbitrary(value = "0x800000000000011000000000000000000000000000000000000000000000001".to_string())] + prime: String, + reference_manager: ReferenceManager, +} + +#[derive(Deserialize)] +struct TextFelt { + value: String, +} + +#[derive(Serialize, Deserialize, Arbitrary)] +struct TextIdentifier { + #[serde(skip_serializing_if = "Option::is_none")] + pc: Option, + #[serde(rename(serialize = "type"))] + #[serde(skip_serializing_if = "Option::is_none")] + type_: Option, + #[serde(skip_serializing_if = "Option::is_none")] + value: Option, + #[serde(skip_serializing_if = "Option::is_none")] + full_name: Option, + #[serde(skip_serializing_if = "Option::is_none")] + members: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + cairo_type: Option, +} + +#[derive(Serialize, Deserialize, Arbitrary)] +pub struct TextHintParams { + #[arbitrary(with = get_hint_code)] + pub code: String, + #[arbitrary(with = prepend_mod_name)] + pub accessible_scopes: Vec, + pub flow_tracking_data: FlowTrackingData, +} + +impl<'a> Arbitrary<'a> for TextFelt { + fn arbitrary(u: &mut Unstructured) -> arbitrary::Result { + let felt_size = 16; + let mut digits = Vec::with_capacity(felt_size); + for _ in 0..felt_size { + digits.push(*u.choose(&HEX_SYMBOLS)?) + } + Ok(TextFelt { + value: digits.join(""), + }) + } +} + +impl Serialize for TextFelt { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.collect_str(&format!("0x{}", self.value)) + } +} + +fn arbitrary_builtins(u: &mut Unstructured) -> arbitrary::Result> { + let builtin_total = u.choose_index(BUILTIN_NAMES.len())?; + let mut selected_builtins = Vec::new(); + + for i in 0..=builtin_total { + if u.ratio(2, 3)? { + selected_builtins.push(BUILTIN_NAMES[i].to_string()) + } + } + + Ok(selected_builtins) +} + +fn prepend_main_identifier( + _u: &mut Unstructured, +) -> arbitrary::Result> { + let mut identifiers = HashMap::new(); + identifiers.insert( + String::from("__main__.main"), + TextIdentifier { + pc: Some(0), + type_: Some(String::from("function")), + value: None, + full_name: None, + members: None, + cairo_type: None, + }, + ); + Ok(identifiers) +} + +fn get_hint_code(u: &mut Unstructured) -> arbitrary::Result { + Ok(u.choose(&HINTS_CODE)?.to_string()) +} + +fn prepend_mod_name(u: &mut Unstructured) -> arbitrary::Result> { + let accessible_scopes: Vec = Vec::::arbitrary(u)? + .iter() + .map(|scope| "starkware.common.".to_string() + scope) + .collect(); + Ok(accessible_scopes) +} fn main() { loop { - fuzz!(|data: (CairoRunConfig, &[u8])| { + fuzz!(|data: (CairoRunConfig, ProgramJson)| { let (cairo_run_config, program_json) = data; - let _ = cairo_run( - program_json, - &CairoRunConfig::default(), - &mut BuiltinHintProcessor::new_empty(), - ); - let _ = cairo_run( - program_json, - &cairo_run_config, - &mut BuiltinHintProcessor::new_empty(), - ); + match serde_json::to_string_pretty(&program_json) { + Ok(program_raw) => { + let _ = cairo_run( + program_raw.as_bytes(), + &CairoRunConfig::default(), + &mut BuiltinHintProcessor::new_empty(), + ); + let _ = cairo_run( + program_raw.as_bytes(), + &cairo_run_config, + &mut BuiltinHintProcessor::new_empty(), + ); + } + Err(_) => {} + } }); } } diff --git a/vm/src/cairo_run.rs b/vm/src/cairo_run.rs index 57c0ef5ff2..e6438fb7ac 100644 --- a/vm/src/cairo_run.rs +++ b/vm/src/cairo_run.rs @@ -19,6 +19,7 @@ use arbitrary::{self, Arbitrary, Unstructured}; #[cfg_attr(feature = "arbitrary", derive(Arbitrary))] pub struct CairoRunConfig<'a> { + #[cfg_attr(feature = "arbitrary", arbitrary(value = "main"))] pub entrypoint: &'a str, pub trace_enabled: bool, pub relocate_mem: bool, diff --git a/vm/src/serde/deserialize_program.rs b/vm/src/serde/deserialize_program.rs index e892cf45dc..ee361a7d91 100644 --- a/vm/src/serde/deserialize_program.rs +++ b/vm/src/serde/deserialize_program.rs @@ -150,6 +150,10 @@ pub struct Attribute { pub start_pc: usize, pub end_pc: usize, pub value: String, + #[cfg_attr( + all(feature = "arbitrary", feature = "std"), + serde(skip_serializing_if = "Option::is_none") + )] pub flow_tracking_data: Option, } @@ -166,41 +170,34 @@ pub struct Location { #[cfg(all(feature = "arbitrary", feature = "std"))] impl<'a> Arbitrary<'a> for Location { fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { - let mut locations = Vec::new(); - - u.arbitrary_loop(Some(0), Some(512), |u| { - locations.push(Location { - end_line: u32::arbitrary(u)?, - end_col: u32::arbitrary(u)?, - input_file: InputFile::arbitrary(u)?, - parent_location: None, - start_line: u32::arbitrary(u)?, - start_col: u32::arbitrary(u)?, - }); - Ok(std::ops::ControlFlow::Continue(())) - })?; - - let mut iter_location = locations.pop().unwrap_or_else(|| Location { - end_line: 0, - end_col: 0, - input_file: InputFile { - filename: "".to_string(), - }, - parent_location: None, - start_line: 0, - start_col: 0, - }); - - while let Some(mut location) = locations.pop() { - location.parent_location = Some((Box::new(iter_location), String::arbitrary(u)?)); - iter_location = location; - } - - Ok(iter_location) + arbitrary_parent_location(u, 20) } } -#[cfg_attr(all(feature = "arbitrary", feature = "std"), derive(Arbitrary, Clone))] +#[cfg(all(feature = "arbitrary", feature = "std"))] +fn arbitrary_parent_location(u: &mut Unstructured, depth: u8) -> arbitrary::Result { + let parent_location = if depth > 0 { + Some(( + Box::new(arbitrary_parent_location(u, depth - 1)?), + String::arbitrary(u)?, + )) + } else { + None + }; + Ok(Location { + end_line: u32::arbitrary(u)?, + end_col: u32::arbitrary(u)?, + input_file: InputFile::arbitrary(u)?, + parent_location, + start_line: u32::arbitrary(u)?, + start_col: u32::arbitrary(u)?, + }) +} + +#[cfg_attr( + all(feature = "arbitrary", feature = "std"), + derive(Arbitrary, Clone, Serialize) +)] #[derive(Deserialize, Debug, PartialEq, Eq)] pub struct DebugInfo { instruction_locations: HashMap,