Skip to content

Commit

Permalink
Improve taint (#58)
Browse files Browse the repository at this point in the history
* update taint to use fxhash

* remove debug + timing

* clippy and fmt

* remove file
  • Loading branch information
technovision99 authored Jan 23, 2024
1 parent 72f3999 commit fb8c6cb
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 71 deletions.
16 changes: 16 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ graphviz-rust = "0.6.2"
cairo-felt = "0.8.7"
thiserror = "1.0.47"
rayon = "1.8"
fxhash = "0.2.1"

cairo-lang-compiler = { git = "https://github.com/starkware-libs/cairo.git", tag = "v2.4.3" }
cairo-lang-defs = { git = "https://github.com/starkware-libs/cairo.git", tag = "v2.4.3" }
Expand All @@ -33,6 +34,7 @@ cairo-lang-utils = { git = "https://github.com/starkware-libs/cairo.git", tag =
cairo-lang-sierra-generator = { git = "https://github.com/starkware-libs/cairo.git", tag = "v2.4.3" }
cairo-lang-sierra = { git = "https://github.com/starkware-libs/cairo.git", tag = "v2.4.3" }


[dev-dependencies]
insta = { version = "1.30", features = ["glob"] }

Expand Down
41 changes: 19 additions & 22 deletions src/analysis/taint.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
use cairo_lang_sierra::{
ids::VarId,
program::{GenStatement, Statement as SierraStatement},
};
use std::collections::{HashMap, HashSet};
use cairo_lang_sierra::program::{GenStatement, Statement as SierraStatement};
use fxhash::{FxHashMap, FxHashSet};

/// Wrapper around a VarId
/// it's used to univocally identify variables
Expand All @@ -11,11 +8,11 @@ pub struct WrapperVariable {
/// The function where the variable appear
function: String,
/// The variable's id
variable: VarId,
variable: u64,
}

impl WrapperVariable {
pub fn new(function: String, variable: VarId) -> Self {
pub fn new(function: String, variable: u64) -> Self {
WrapperVariable { function, variable }
}

Expand All @@ -25,8 +22,8 @@ impl WrapperVariable {
}

/// Return the variable
pub fn variable(&self) -> &VarId {
&self.variable
pub fn variable(&self) -> u64 {
self.variable
}
}

Expand All @@ -35,26 +32,26 @@ pub struct Taint {
/// Source WrapperVariable to set of sink WrapperVariable
/// e.g. instruction reads variables 3, 4 and has 5 as output
/// we will add (3, (5)) and (4, (5)); variable 5 is tainted by 3 and 4
map: HashMap<WrapperVariable, HashSet<WrapperVariable>>,
map: FxHashMap<WrapperVariable, FxHashSet<WrapperVariable>>,
}

impl Taint {
pub fn new(instructions: &[SierraStatement], function: String) -> Self {
let mut map = HashMap::new();
let mut map = FxHashMap::default();
analyze(&mut map, instructions, function);

Taint { map }
}

/// Returns variables tainted in a single step by source
pub fn single_step_taint(&self, source: &WrapperVariable) -> HashSet<WrapperVariable> {
pub fn single_step_taint(&self, source: &WrapperVariable) -> FxHashSet<WrapperVariable> {
self.map.get(source).cloned().unwrap_or_default()
}

/// Returns variables tainted in zero or more steps by source
pub fn multi_step_taint(&self, source: &WrapperVariable) -> HashSet<WrapperVariable> {
let mut result = HashSet::new();
let mut update = HashSet::from([source.clone()]);
pub fn multi_step_taint(&self, source: &WrapperVariable) -> FxHashSet<WrapperVariable> {
let mut result = FxHashSet::default();
let mut update = FxHashSet::from_iter([source.clone()]);
while !update.is_subset(&result) {
result.extend(update.iter().cloned());
update = update
Expand All @@ -69,7 +66,7 @@ impl Taint {
pub fn taints_any_sinks_variable(
&self,
source: &WrapperVariable,
sinks: &HashSet<WrapperVariable>,
sinks: &FxHashSet<WrapperVariable>,
) -> Vec<WrapperVariable> {
self.multi_step_taint(source)
.into_iter()
Expand All @@ -81,7 +78,7 @@ impl Taint {
pub fn taints_any_sinks(
&self,
source: &WrapperVariable,
sinks: &HashSet<WrapperVariable>,
sinks: &FxHashSet<WrapperVariable>,
) -> bool {
self.multi_step_taint(source)
.iter()
Expand All @@ -91,7 +88,7 @@ impl Taint {
/// Returns the sources that taint the sink
pub fn taints_any_sources_variable(
&self,
sources: &HashSet<WrapperVariable>,
sources: &FxHashSet<WrapperVariable>,
sink: &WrapperVariable,
) -> Vec<WrapperVariable> {
sources
Expand All @@ -104,7 +101,7 @@ impl Taint {
/// Returns true if the sink is tainted by any source
pub fn taints_any_sources(
&self,
sources: &HashSet<WrapperVariable>,
sources: &FxHashSet<WrapperVariable>,
sink: &WrapperVariable,
) -> bool {
sources
Expand All @@ -121,7 +118,7 @@ impl Taint {

/// Analyze each instruction in the current function and populate the taint map
fn analyze(
map: &mut HashMap<WrapperVariable, HashSet<WrapperVariable>>,
map: &mut FxHashMap<WrapperVariable, FxHashSet<WrapperVariable>>,
instructions: &[SierraStatement],
function: String,
) {
Expand All @@ -140,12 +137,12 @@ fn analyze(
let sinks = map
.entry(WrapperVariable {
function: function.clone(),
variable: source.clone(),
variable: source.id,
})
.or_default();
sinks.insert(WrapperVariable {
function: function.clone(),
variable: sink.clone(),
variable: sink.id,
});
}
}
Expand Down
32 changes: 12 additions & 20 deletions src/core/compilation_unit.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::collections::{HashMap, HashSet};

use super::function::{Function, Type};
use crate::analysis::taint::Taint;
use crate::analysis::taint::WrapperVariable;
Expand All @@ -13,6 +11,8 @@ use cairo_lang_starknet::abi::{
Contract, Item::Function as AbiFunction, Item::Interface as AbiInterface,
Item::L1Handler as AbiL1Handler,
};
use fxhash::FxHashSet;
use std::collections::{HashMap, HashSet};

pub struct CompilationUnit {
/// The compiled sierra program
Expand Down Expand Up @@ -85,18 +85,15 @@ impl CompilationUnit {

/// Return true if the variable is tainted i.e. user inputs can control it in some way
pub fn is_tainted(&self, function_name: String, variable: VarId) -> bool {
let wrapped_variable = WrapperVariable::new(function_name, variable);
let mut parameters = HashSet::new();
let wrapped_variable = WrapperVariable::new(function_name, variable.id);
let mut parameters = FxHashSet::default();
for external_function in self
.functions
.iter()
.filter(|f| matches!(f.ty(), Type::External | Type::L1Handler | Type::View))
{
for param in external_function.params().skip(1) {
parameters.insert(WrapperVariable::new(
external_function.name(),
param.id.clone(),
));
parameters.insert(WrapperVariable::new(external_function.name(), param.id.id));
}
}
// Get the taint for the function where the variable appear
Expand Down Expand Up @@ -330,15 +327,15 @@ impl CompilationUnit {
/// Propagate the taints from external/l1_handler functions to private functions
fn propagate_taints(&mut self) {
// Collect the arguments of all the external/l1_handler functions
let mut arguments_external_functions: HashSet<WrapperVariable> = HashSet::new();
let mut arguments_external_functions: FxHashSet<WrapperVariable> = FxHashSet::default();
for function in self
.functions
.iter()
.filter(|f| matches!(f.ty(), Type::External | Type::L1Handler))
{
for param in function.params() {
arguments_external_functions
.insert(WrapperVariable::new(function.name(), param.id.clone()));
.insert(WrapperVariable::new(function.name(), param.id.id));
}
}

Expand Down Expand Up @@ -395,12 +392,10 @@ impl CompilationUnit {
let external_taint = taint_copy.get(&calling_function.name()).unwrap();

// Variables used as arguments in the call to the private function
let function_called_args: HashSet<WrapperVariable> = invoc
let function_called_args: FxHashSet<WrapperVariable> = invoc
.args
.iter()
.map(|arg| {
WrapperVariable::new(calling_function.name(), arg.clone())
})
.map(|arg| WrapperVariable::new(calling_function.name(), arg.id))
.collect();

// Calling function's parameters
Expand All @@ -419,10 +414,7 @@ impl CompilationUnit {
}
// Check if the arguments used to call the private function are tainted by the calling function's parameters
for sink in external_taint.taints_any_sinks_variable(
&WrapperVariable::new(
calling_function.name(),
param.id.clone(),
),
&WrapperVariable::new(calling_function.name(), param.id.id),
&function_called_args,
) {
// If the sink is tainted by some parameters of external functions
Expand All @@ -446,11 +438,11 @@ impl CompilationUnit {
// so to convert the ID we have to iterate the arguments and use the index where we find
// our sink VarId
for (i, var) in invoc.args.iter().enumerate() {
if var.id == sink.variable().id {
if var.id == sink.variable() {
// We convert the id to be the private function's formal parameter id and not the actual parameter id
let sink_converted = WrapperVariable::new(
function_called_name.clone(),
VarId::new(i.try_into().unwrap()),
i.try_into().unwrap(),
);

// Add the source i.e. the variable of the external function
Expand Down
1 change: 0 additions & 1 deletion src/core/core_unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ impl CoreUnit {
compilation_unit
})
.collect();

Ok(CoreUnit { compilation_units })
}

Expand Down
27 changes: 12 additions & 15 deletions src/detectors/array_use_after_pop_front.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::collections::HashSet;

use super::detector::{Confidence, Detector, Impact, Result};
use crate::analysis::taint::WrapperVariable;
use crate::core::compilation_unit::CompilationUnit;
Expand All @@ -8,6 +6,8 @@ use crate::core::function::{Function, Type};
use cairo_lang_sierra::extensions::array::ArrayConcreteLibfunc;
use cairo_lang_sierra::extensions::core::{CoreConcreteLibfunc, CoreTypeConcrete};
use cairo_lang_sierra::program::{GenStatement, Statement as SierraStatement};
use fxhash::FxHashSet;
use std::collections::HashSet;

#[derive(Default)]
pub struct ArrayUseAfterPopFront {}
Expand Down Expand Up @@ -50,10 +50,7 @@ impl Detector for ArrayUseAfterPopFront {
CoreConcreteLibfunc::Array(ArrayConcreteLibfunc::PopFront(_)) => {
Some((
index,
WrapperVariable::new(
function.name(),
invoc.args[0].clone(),
),
WrapperVariable::new(function.name(), invoc.args[0].id),
))
}
_ => None,
Expand All @@ -78,7 +75,7 @@ impl Detector for ArrayUseAfterPopFront {
if !bad_array_used.is_empty() {
let array_ids = bad_array_used
.iter()
.map(|f| f.1.variable().id)
.map(|f| f.1.variable())
.collect::<Vec<u64>>();
let message = match array_ids.len() {
1 => format!(
Expand Down Expand Up @@ -163,8 +160,8 @@ impl ArrayUseAfterPopFront {

match libfunc {
CoreConcreteLibfunc::Array(ArrayConcreteLibfunc::Append(_)) => {
let mut sinks = HashSet::new();
sinks.insert(WrapperVariable::new(function.name(), invoc.args[0].clone()));
let mut sinks = FxHashSet::default();
sinks.insert(WrapperVariable::new(function.name(), invoc.args[0].id));

taint.taints_any_sinks(bad_array, &sinks)
}
Expand Down Expand Up @@ -192,10 +189,10 @@ impl ArrayUseAfterPopFront {
.expect("Library function not found in the registry");

if let CoreConcreteLibfunc::FunctionCall(_) = lib_func {
let sinks: HashSet<WrapperVariable> = invoc
let sinks: FxHashSet<WrapperVariable> = invoc
.args
.iter()
.map(|v| WrapperVariable::new(function.name(), v.clone()))
.map(|v| WrapperVariable::new(function.name(), v.id))
.collect();

return taint.taints_any_sinks(bad_array, &sinks);
Expand Down Expand Up @@ -271,7 +268,7 @@ impl ArrayUseAfterPopFront {
.map(|i| {
WrapperVariable::new(
maybe_caller.name(),
invoc.branches[0].results[*i].clone(),
invoc.branches[0].results[*i].id,
)
})
.collect();
Expand Down Expand Up @@ -325,16 +322,16 @@ impl ArrayUseAfterPopFront {
return false;
}

// Not sure if it is required because taint analysis adds all the arugments as
// Not sure if it is required because taint analysis adds all the arguments as
// tainters of the all the return values.
let returned_bad_arrays: Vec<WrapperVariable> = function
.get_statements()
.iter()
.flat_map(|s| {
if let GenStatement::Return(return_vars) = s {
let sinks: HashSet<WrapperVariable> = return_vars
let sinks: FxHashSet<WrapperVariable> = return_vars
.iter()
.map(|v| WrapperVariable::new(function.name(), v.clone()))
.map(|v| WrapperVariable::new(function.name(), v.id))
.collect();

return taint.taints_any_sinks_variable(bad_array, &sinks);
Expand Down
Loading

0 comments on commit fb8c6cb

Please sign in to comment.