diff --git a/Cargo.lock b/Cargo.lock index 63a1ab2..c74314e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -226,6 +226,12 @@ version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3ac9f8b63eca6fd385229b3675f6cc0dc5c8a5c8a54a59d4f52ffd670d87b0c" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "cairo-felt" version = "0.8.7" @@ -637,6 +643,7 @@ dependencies = [ "cairo-lang-syntax", "cairo-lang-utils", "clap", + "fxhash", "graphviz-rust", "insta", "num-bigint", @@ -979,6 +986,15 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "genco" version = "0.17.8" diff --git a/Cargo.toml b/Cargo.toml index 8b7846a..bf2ab7e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ graphviz-rust = "0.6.2" cairo-felt = "0.8.7" thiserror = "1.0.47" rayon = "1.8" +fxhash = "0.2.1" cairo-lang-compiler = { git = "https://github.com/starkware-libs/cairo.git", tag = "v2.4.3" } cairo-lang-defs = { git = "https://github.com/starkware-libs/cairo.git", tag = "v2.4.3" } @@ -33,6 +34,7 @@ cairo-lang-utils = { git = "https://github.com/starkware-libs/cairo.git", tag = cairo-lang-sierra-generator = { git = "https://github.com/starkware-libs/cairo.git", tag = "v2.4.3" } cairo-lang-sierra = { git = "https://github.com/starkware-libs/cairo.git", tag = "v2.4.3" } + [dev-dependencies] insta = { version = "1.30", features = ["glob"] } diff --git a/src/analysis/taint.rs b/src/analysis/taint.rs index d8e8124..5a61c17 100644 --- a/src/analysis/taint.rs +++ b/src/analysis/taint.rs @@ -1,8 +1,5 @@ -use cairo_lang_sierra::{ - ids::VarId, - program::{GenStatement, Statement as SierraStatement}, -}; -use std::collections::{HashMap, HashSet}; +use cairo_lang_sierra::program::{GenStatement, Statement as SierraStatement}; +use fxhash::{FxHashMap, FxHashSet}; /// Wrapper around a VarId /// it's used to univocally identify variables @@ -11,11 +8,11 @@ pub struct WrapperVariable { /// The function where the variable appear function: String, /// The variable's id - variable: VarId, + variable: u64, } impl WrapperVariable { - pub fn new(function: String, variable: VarId) -> Self { + pub fn new(function: String, variable: u64) -> Self { WrapperVariable { function, variable } } @@ -25,8 +22,8 @@ impl WrapperVariable { } /// Return the variable - pub fn variable(&self) -> &VarId { - &self.variable + pub fn variable(&self) -> u64 { + self.variable } } @@ -35,26 +32,26 @@ pub struct Taint { /// Source WrapperVariable to set of sink WrapperVariable /// e.g. instruction reads variables 3, 4 and has 5 as output /// we will add (3, (5)) and (4, (5)); variable 5 is tainted by 3 and 4 - map: HashMap>, + map: FxHashMap>, } impl Taint { pub fn new(instructions: &[SierraStatement], function: String) -> Self { - let mut map = HashMap::new(); + let mut map = FxHashMap::default(); analyze(&mut map, instructions, function); Taint { map } } /// Returns variables tainted in a single step by source - pub fn single_step_taint(&self, source: &WrapperVariable) -> HashSet { + pub fn single_step_taint(&self, source: &WrapperVariable) -> FxHashSet { self.map.get(source).cloned().unwrap_or_default() } /// Returns variables tainted in zero or more steps by source - pub fn multi_step_taint(&self, source: &WrapperVariable) -> HashSet { - let mut result = HashSet::new(); - let mut update = HashSet::from([source.clone()]); + pub fn multi_step_taint(&self, source: &WrapperVariable) -> FxHashSet { + let mut result = FxHashSet::default(); + let mut update = FxHashSet::from_iter([source.clone()]); while !update.is_subset(&result) { result.extend(update.iter().cloned()); update = update @@ -69,7 +66,7 @@ impl Taint { pub fn taints_any_sinks_variable( &self, source: &WrapperVariable, - sinks: &HashSet, + sinks: &FxHashSet, ) -> Vec { self.multi_step_taint(source) .into_iter() @@ -81,7 +78,7 @@ impl Taint { pub fn taints_any_sinks( &self, source: &WrapperVariable, - sinks: &HashSet, + sinks: &FxHashSet, ) -> bool { self.multi_step_taint(source) .iter() @@ -91,7 +88,7 @@ impl Taint { /// Returns the sources that taint the sink pub fn taints_any_sources_variable( &self, - sources: &HashSet, + sources: &FxHashSet, sink: &WrapperVariable, ) -> Vec { sources @@ -104,7 +101,7 @@ impl Taint { /// Returns true if the sink is tainted by any source pub fn taints_any_sources( &self, - sources: &HashSet, + sources: &FxHashSet, sink: &WrapperVariable, ) -> bool { sources @@ -121,7 +118,7 @@ impl Taint { /// Analyze each instruction in the current function and populate the taint map fn analyze( - map: &mut HashMap>, + map: &mut FxHashMap>, instructions: &[SierraStatement], function: String, ) { @@ -140,12 +137,12 @@ fn analyze( let sinks = map .entry(WrapperVariable { function: function.clone(), - variable: source.clone(), + variable: source.id, }) .or_default(); sinks.insert(WrapperVariable { function: function.clone(), - variable: sink.clone(), + variable: sink.id, }); } } diff --git a/src/core/compilation_unit.rs b/src/core/compilation_unit.rs index 0202080..c234be8 100644 --- a/src/core/compilation_unit.rs +++ b/src/core/compilation_unit.rs @@ -1,5 +1,3 @@ -use std::collections::{HashMap, HashSet}; - use super::function::{Function, Type}; use crate::analysis::taint::Taint; use crate::analysis::taint::WrapperVariable; @@ -13,6 +11,8 @@ use cairo_lang_starknet::abi::{ Contract, Item::Function as AbiFunction, Item::Interface as AbiInterface, Item::L1Handler as AbiL1Handler, }; +use fxhash::FxHashSet; +use std::collections::{HashMap, HashSet}; pub struct CompilationUnit { /// The compiled sierra program @@ -85,18 +85,15 @@ impl CompilationUnit { /// Return true if the variable is tainted i.e. user inputs can control it in some way pub fn is_tainted(&self, function_name: String, variable: VarId) -> bool { - let wrapped_variable = WrapperVariable::new(function_name, variable); - let mut parameters = HashSet::new(); + let wrapped_variable = WrapperVariable::new(function_name, variable.id); + let mut parameters = FxHashSet::default(); for external_function in self .functions .iter() .filter(|f| matches!(f.ty(), Type::External | Type::L1Handler | Type::View)) { for param in external_function.params().skip(1) { - parameters.insert(WrapperVariable::new( - external_function.name(), - param.id.clone(), - )); + parameters.insert(WrapperVariable::new(external_function.name(), param.id.id)); } } // Get the taint for the function where the variable appear @@ -330,7 +327,7 @@ impl CompilationUnit { /// Propagate the taints from external/l1_handler functions to private functions fn propagate_taints(&mut self) { // Collect the arguments of all the external/l1_handler functions - let mut arguments_external_functions: HashSet = HashSet::new(); + let mut arguments_external_functions: FxHashSet = FxHashSet::default(); for function in self .functions .iter() @@ -338,7 +335,7 @@ impl CompilationUnit { { for param in function.params() { arguments_external_functions - .insert(WrapperVariable::new(function.name(), param.id.clone())); + .insert(WrapperVariable::new(function.name(), param.id.id)); } } @@ -395,12 +392,10 @@ impl CompilationUnit { let external_taint = taint_copy.get(&calling_function.name()).unwrap(); // Variables used as arguments in the call to the private function - let function_called_args: HashSet = invoc + let function_called_args: FxHashSet = invoc .args .iter() - .map(|arg| { - WrapperVariable::new(calling_function.name(), arg.clone()) - }) + .map(|arg| WrapperVariable::new(calling_function.name(), arg.id)) .collect(); // Calling function's parameters @@ -419,10 +414,7 @@ impl CompilationUnit { } // Check if the arguments used to call the private function are tainted by the calling function's parameters for sink in external_taint.taints_any_sinks_variable( - &WrapperVariable::new( - calling_function.name(), - param.id.clone(), - ), + &WrapperVariable::new(calling_function.name(), param.id.id), &function_called_args, ) { // If the sink is tainted by some parameters of external functions @@ -446,11 +438,11 @@ impl CompilationUnit { // so to convert the ID we have to iterate the arguments and use the index where we find // our sink VarId for (i, var) in invoc.args.iter().enumerate() { - if var.id == sink.variable().id { + if var.id == sink.variable() { // We convert the id to be the private function's formal parameter id and not the actual parameter id let sink_converted = WrapperVariable::new( function_called_name.clone(), - VarId::new(i.try_into().unwrap()), + i.try_into().unwrap(), ); // Add the source i.e. the variable of the external function diff --git a/src/core/core_unit.rs b/src/core/core_unit.rs index 9f6ddc1..e895f48 100644 --- a/src/core/core_unit.rs +++ b/src/core/core_unit.rs @@ -31,7 +31,6 @@ impl CoreUnit { compilation_unit }) .collect(); - Ok(CoreUnit { compilation_units }) } diff --git a/src/detectors/array_use_after_pop_front.rs b/src/detectors/array_use_after_pop_front.rs index c7edf30..52d213f 100644 --- a/src/detectors/array_use_after_pop_front.rs +++ b/src/detectors/array_use_after_pop_front.rs @@ -1,5 +1,3 @@ -use std::collections::HashSet; - use super::detector::{Confidence, Detector, Impact, Result}; use crate::analysis::taint::WrapperVariable; use crate::core::compilation_unit::CompilationUnit; @@ -8,6 +6,8 @@ use crate::core::function::{Function, Type}; use cairo_lang_sierra::extensions::array::ArrayConcreteLibfunc; use cairo_lang_sierra::extensions::core::{CoreConcreteLibfunc, CoreTypeConcrete}; use cairo_lang_sierra::program::{GenStatement, Statement as SierraStatement}; +use fxhash::FxHashSet; +use std::collections::HashSet; #[derive(Default)] pub struct ArrayUseAfterPopFront {} @@ -50,10 +50,7 @@ impl Detector for ArrayUseAfterPopFront { CoreConcreteLibfunc::Array(ArrayConcreteLibfunc::PopFront(_)) => { Some(( index, - WrapperVariable::new( - function.name(), - invoc.args[0].clone(), - ), + WrapperVariable::new(function.name(), invoc.args[0].id), )) } _ => None, @@ -78,7 +75,7 @@ impl Detector for ArrayUseAfterPopFront { if !bad_array_used.is_empty() { let array_ids = bad_array_used .iter() - .map(|f| f.1.variable().id) + .map(|f| f.1.variable()) .collect::>(); let message = match array_ids.len() { 1 => format!( @@ -163,8 +160,8 @@ impl ArrayUseAfterPopFront { match libfunc { CoreConcreteLibfunc::Array(ArrayConcreteLibfunc::Append(_)) => { - let mut sinks = HashSet::new(); - sinks.insert(WrapperVariable::new(function.name(), invoc.args[0].clone())); + let mut sinks = FxHashSet::default(); + sinks.insert(WrapperVariable::new(function.name(), invoc.args[0].id)); taint.taints_any_sinks(bad_array, &sinks) } @@ -192,10 +189,10 @@ impl ArrayUseAfterPopFront { .expect("Library function not found in the registry"); if let CoreConcreteLibfunc::FunctionCall(_) = lib_func { - let sinks: HashSet = invoc + let sinks: FxHashSet = invoc .args .iter() - .map(|v| WrapperVariable::new(function.name(), v.clone())) + .map(|v| WrapperVariable::new(function.name(), v.id)) .collect(); return taint.taints_any_sinks(bad_array, &sinks); @@ -271,7 +268,7 @@ impl ArrayUseAfterPopFront { .map(|i| { WrapperVariable::new( maybe_caller.name(), - invoc.branches[0].results[*i].clone(), + invoc.branches[0].results[*i].id, ) }) .collect(); @@ -325,16 +322,16 @@ impl ArrayUseAfterPopFront { return false; } - // Not sure if it is required because taint analysis adds all the arugments as + // Not sure if it is required because taint analysis adds all the arguments as // tainters of the all the return values. let returned_bad_arrays: Vec = function .get_statements() .iter() .flat_map(|s| { if let GenStatement::Return(return_vars) = s { - let sinks: HashSet = return_vars + let sinks: FxHashSet = return_vars .iter() - .map(|v| WrapperVariable::new(function.name(), v.clone())) + .map(|v| WrapperVariable::new(function.name(), v.id)) .collect(); return taint.taints_any_sinks_variable(bad_array, &sinks); diff --git a/src/detectors/unchecked_l1_handler_from.rs b/src/detectors/unchecked_l1_handler_from.rs index c34de6a..501ff74 100644 --- a/src/detectors/unchecked_l1_handler_from.rs +++ b/src/detectors/unchecked_l1_handler_from.rs @@ -1,5 +1,3 @@ -use std::collections::HashSet; - use super::detector::{Confidence, Detector, Impact, Result}; use crate::analysis::taint::WrapperVariable; use crate::core::compilation_unit::CompilationUnit; @@ -8,6 +6,8 @@ use crate::core::function::{Function, Type}; use cairo_lang_sierra::extensions::{core::CoreConcreteLibfunc, felt252::Felt252Concrete}; use cairo_lang_sierra::ids::VarId; use cairo_lang_sierra::program::{GenStatement, Statement as SierraStatement}; +use fxhash::FxHashSet; +use std::collections::HashSet; #[derive(Default)] pub struct UncheckedL1HandlerFrom {} @@ -42,8 +42,8 @@ impl Detector for UncheckedL1HandlerFrom { for f in l1_handler_funcs { let from_address = f.params().map(|p| p.id.clone()).collect::>()[1].clone(); - let mut sources = HashSet::new(); - sources.insert(WrapperVariable::new(f.name(), from_address)); + let mut sources = FxHashSet::default(); + sources.insert(WrapperVariable::new(f.name(), from_address.id)); // Used to avoid infinite recursion in case of recursive private function calls let mut checked_private_functions = HashSet::new(); @@ -78,7 +78,7 @@ impl Detector for UncheckedL1HandlerFrom { impl UncheckedL1HandlerFrom { fn is_from_checked_in_function( &self, - from_tainted_args: &HashSet, + from_tainted_args: &FxHashSet, compilation_unit: &CompilationUnit, function: &Function, checked_private_functions: &mut HashSet, @@ -98,7 +98,7 @@ impl UncheckedL1HandlerFrom { match libfunc { CoreConcreteLibfunc::Felt252(Felt252Concrete::IsZero(_)) => self - .is_felt252_is_zero_arg_taintaed_by_from_address( + .is_felt252_is_zero_arg_tainted_by_from_address( from_tainted_args, invoc.args.clone(), compilation_unit, @@ -126,19 +126,19 @@ impl UncheckedL1HandlerFrom { let taint = compilation_unit.get_taint(&function.name()).unwrap(); - let sinks: HashSet = invoc + let sinks: FxHashSet = invoc .args .iter() - .map(|v| WrapperVariable::new(function.name(), v.clone())) + .map(|v| WrapperVariable::new(function.name(), v.id)) .collect(); - let from_tainted_args: HashSet = from_tainted_args + let from_tainted_args: FxHashSet = from_tainted_args .iter() .flat_map(|source| taint.taints_any_sinks_variable(source, &sinks)) .map(|sink| { WrapperVariable::new( private_function.name(), - VarId::new(sink.variable().id - invoc.args[0].id), + sink.variable() - invoc.args[0].id, ) }) .collect(); @@ -158,14 +158,14 @@ impl UncheckedL1HandlerFrom { from_checked_in_private_functions } - fn is_felt252_is_zero_arg_taintaed_by_from_address( + fn is_felt252_is_zero_arg_tainted_by_from_address( &self, - sources: &HashSet, + sources: &FxHashSet, felt252_is_zero_args: Vec, compilation_unit: &CompilationUnit, function_name: &str, ) -> bool { - let sink = WrapperVariable::new(function_name.to_string(), felt252_is_zero_args[0].clone()); + let sink = WrapperVariable::new(function_name.to_string(), felt252_is_zero_args[0].id); let taint = compilation_unit.get_taint(function_name).unwrap(); // returns true If the felt252_is_zero arguments are tainted by the from_address taint.taints_any_sources(sources, &sink)