From a200cecdb79854a438089593b148e6e5b819f73f Mon Sep 17 00:00:00 2001 From: hkctkuy Date: Wed, 17 May 2023 18:16:41 +0300 Subject: [PATCH] add casr-ubsan --- README.md | 30 +- casr/Cargo.toml | 1 + casr/src/bin/casr-cli.rs | 14 + casr/src/bin/casr-san.rs | 2 +- casr/src/bin/casr-ubsan.rs | 434 ++++++++++++++++++ casr/src/util.rs | 2 +- casr/tests/casr_tests/ubsan/input1/input | 0 .../casr_tests/ubsan/input1/input3/input | 0 casr/tests/casr_tests/ubsan/input2/input | 0 casr/tests/casr_tests/ubsan/test_ubsan.cpp | 8 + casr/tests/tests.rs | 97 ++++ docs/usage.md | 53 ++- libcasr/src/lib.rs | 2 + libcasr/src/report.rs | 13 + libcasr/src/ubsan.rs | 261 +++++++++++ 15 files changed, 901 insertions(+), 16 deletions(-) create mode 100644 casr/src/bin/casr-ubsan.rs create mode 100644 casr/tests/casr_tests/ubsan/input1/input create mode 100644 casr/tests/casr_tests/ubsan/input1/input3/input create mode 100644 casr/tests/casr_tests/ubsan/input2/input create mode 100644 casr/tests/casr_tests/ubsan/test_ubsan.cpp create mode 100644 libcasr/src/ubsan.rs diff --git a/README.md b/README.md index c449ba5e..85b39f38 100644 --- a/README.md +++ b/README.md @@ -18,8 +18,10 @@ CASR is maintained by: ## Overview CASR is a set of tools that allows you to collect crash reports in different -ways. Use `casr-core` binary to deal with coredumps. Use `casr-san` to analyze ASAN -reports. Try `casr-gdb` to get reports from gdb. Use `casr-python` to analyze python reports and get report from [Atheris](https://github.com/google/atheris). +ways. Use `casr-core` binary to deal with coredumps. Use `casr-san` to analyze +ASAN reports or `casr-ubsan` to analyze UBSAN reports. Try `casr-gdb` to get +reports from gdb. Use `casr-python` to analyze python reports and get report +from [Atheris](https://github.com/google/atheris). Crash report contains many useful information: severity (like [exploitable](https://github.com/jfoote/exploitable)) for x86, x86\_64, arm32, aarch64, rv32g, rv64g architectures, @@ -52,6 +54,7 @@ crashes. It can analyze crashes from different sources: * AddressSanitizer +* UndefinedBehaviorSanitizer * Gdb output and program languages: @@ -99,11 +102,17 @@ Create report from coredump: $ casr-core -f casr/tests/casr_tests/bin/core.test_destAv -e casr/tests/casr_tests/bin/test_destAv -o destAv.casrep -Create report from sanitizers output: +Create report from AddressSanitizer output: $ clang++ -fsanitize=address -O0 -g casr/tests/casr_tests/test_asan_df.cpp -o test_asan_df $ casr-san -o asan.casrep -- ./test_asan_df +Create report from UndefinedBehaviorSanitizer output: + + $ clang++ -fsanitize=undefined -O0 -g casr/tests/casr_tests/ubsan/test_ubsan.cpp -o test_ubsan + $ casr-ubsan -i input -o output -- ./test_ubsan @@ + $ casr-cli output + Create report from gdb: $ casr-gdb -o destAv.gdb.casrep -- casr/tests/casr_tests/bin/test_destAv $(printf 'A%.s' {1..200}) @@ -167,18 +176,21 @@ When you have crashes from fuzzing you may do the following steps: 1. Create reports for all crashes via `casr-san`, `casr-gdb` (if no sanitizers are present), or `casr-python`. -2. Deduplicate collected reports via `casr-cluster -d`. -3. Cluster deduplicated reports via `casr-cluster -c`. -4. View reports from clusters using `casr-cli` or upload them to +2. Deduplicate collected crash reports via `casr-cluster -d`. +3. Cluster deduplicated crash reports via `casr-cluster -c`. +4. Create reports and deduplicate them for all UBSAN errors via `casr-ubsan`. +5. View reports from clusters using `casr-cli` or upload them to [DefectDojo](https://github.com/DefectDojo/django-DefectDojo) with `casr-dojo`. -If you use [AFL++](https://github.com/AFLplusplus/AFLplusplus), whole pipeline -could be done automatically by `casr-afl`. +If you use [AFL++](https://github.com/AFLplusplus/AFLplusplus), the pipeline +(without `casr-ubsan` and `casr-dojo`) could be done automatically by +`casr-afl`. If you use [libFuzzer](https://www.llvm.org/docs/LibFuzzer.html) based fuzzer (C/C++/[go-fuzz](https://github.com/dvyukov/go-fuzz)/[Atheris](https://github.com/google/atheris)), -whole pipeline could be done automatically by `casr-libfuzzer`. +the pipeline (without `casr-ubsan` and `casr-dojo`) could be done automatically +by `casr-libfuzzer`. ## Contributing diff --git a/casr/Cargo.toml b/casr/Cargo.toml index 5e918f10..3694ccd8 100644 --- a/casr/Cargo.toml +++ b/casr/Cargo.toml @@ -33,6 +33,7 @@ walkdir = "2" reqwest = { version = "0.11", features = ["json", "multipart", "rustls-tls"], default_features = false, optional = true } tokio = { version = "1", features = ["rt", "macros"], optional = true } toml = { version = "0.7", optional = true } +wait-timeout = "0.2" libcasr = { path = "../libcasr", version = "2.6.0", features = ["serde", "exploitable"] } diff --git a/casr/src/bin/casr-cli.rs b/casr/src/bin/casr-cli.rs index dc2dccb7..0ce9adfe 100644 --- a/casr/src/bin/casr-cli.rs +++ b/casr/src/bin/casr-cli.rs @@ -342,6 +342,16 @@ fn build_tree_report( tree.expand_item(row); } + if !report.ubsan_report.is_empty() { + row = tree + .insert_container_item("UbsanReport".to_string(), Placement::After, row) + .unwrap(); + report.ubsan_report.iter().for_each(|e| { + tree.insert_item(e.clone(), Placement::LastChild, row); + }); + tree.expand_item(row); + } + if !report.python_report.is_empty() { row = tree .insert_container_item("PythonReport".to_string(), Placement::After, row) @@ -522,6 +532,10 @@ fn build_slider_report( select.add_item("AsanReport", report.asan_report.join("\n")); } + if !report.ubsan_report.is_empty() { + select.add_item("UbsanReport", report.ubsan_report.join("\n")); + } + if !report.python_report.is_empty() { select.add_item("PythonReport", report.python_report.join("\n")); } diff --git a/casr/src/bin/casr-san.rs b/casr/src/bin/casr-san.rs index 4b2e157f..e2ae3b69 100644 --- a/casr/src/bin/casr-san.rs +++ b/casr/src/bin/casr-san.rs @@ -28,7 +28,7 @@ use std::process::Command; fn main() -> Result<()> { let matches = clap::Command::new("casr-san") .version(clap::crate_version!()) - .about("Create CASR reports (.casrep) from sanitizer reports") + .about("Create CASR reports (.casrep) from AddressSanitizer reports") .term_width(90) .arg( Arg::new("output") diff --git a/casr/src/bin/casr-ubsan.rs b/casr/src/bin/casr-ubsan.rs new file mode 100644 index 00000000..b7e3511b --- /dev/null +++ b/casr/src/bin/casr-ubsan.rs @@ -0,0 +1,434 @@ +use casr::util; +use libcasr::report::CrashReport; +use libcasr::severity::Severity; +use libcasr::stacktrace::{CrashLine, CrashLineExt}; +use libcasr::ubsan; +use libcasr::ubsan::UbsanWarning; + +use anyhow::{bail, Context, Result}; +use clap::{ + error::{ContextKind, ContextValue, ErrorKind}, + Arg, ArgAction, +}; +use log::{debug, info, warn}; +use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; +use regex::Regex; +use wait_timeout::ChildExt; +use walkdir::WalkDir; + +use std::collections::HashSet; +use std::env; +use std::fs; +use std::fs::OpenOptions; +use std::io::{Read, Write}; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::time::Duration; + +/// Extract ubsan warnings for specified input file +/// +/// # Arguments +/// +/// * `input` - input file path +/// +/// * `argv` - target program argument vector +/// +/// * `timeout` - target program timeout +/// +/// # Returns value +/// +/// Vector of extracted ubsan warnings with crashlines +fn extract_warnings( + input: &PathBuf, + argv: &[&str], + timeout: u64, +) -> Result> { + // Get command line argv + let mut argv = argv.to_owned(); + let arg: String; + let stdin = if let Some(index) = argv.iter().position(|&arg| arg.contains("@@")) { + arg = argv[index].replace("@@", input.to_str().unwrap()); + argv[index] = &arg; + false + } else { + true + }; + // Run program. + let mut cmd = Command::new(argv[0]); + cmd.stdout(Stdio::null()).stderr(Stdio::piped()); + if stdin { + let Ok(file) = fs::File::open(input) else { + bail!("Can't open file {:?}", input); + }; + cmd.stdin(file); + } + if argv.len() > 1 { + cmd.args(&argv[1..]); + } + debug!("Run: {:?}", cmd); + + // Get stderr + let stderr: String = + // If timeout is specified, spawn and check timeout + // Else get output + if timeout != 0 { + let mut child = cmd + .spawn() + .with_context(|| "Failed to start command: {cmd:?}")?; + if child + .wait_timeout(Duration::from_secs(timeout)) + .unwrap() + .is_none() + { + child.kill()?; + warn!("Timeout: {:?}", cmd); + } + let mut buf = vec![]; + let _ = child.stderr.unwrap().read_to_end(&mut buf); + String::from_utf8_lossy(&buf).to_string() + } else { + let output = cmd + .output() + .with_context(|| "Failed to start command: {cmd:?}")?; + String::from_utf8_lossy(&output.stderr).to_string() + }; + + // Extract ubsan warnings + let extracted_warnings = ubsan::extract_ubsan_warnings(&stderr)?; + // Update warning vector + // Get position by input + let mut warnings: Vec<(UbsanWarning, CrashLine)> = vec![]; + for warning in extracted_warnings { + // Get crashline + if let Ok(crashline) = warning.crash_line() { + warnings.push((warning, crashline)); + } else { + bail!("Cannot get warning crashline {:?}", warning); + } + } + + Ok(warnings) +} + +/// Generate ubsan report for specified input file +/// +/// # Arguments +/// +/// * `input` - input file path +/// +/// * `warning` - target warning +/// +/// * `crashline` - warning crashile +/// +/// * `argv` - target program argument vector +/// +/// * `pre_report` - report template containing identic values +/// +/// # Returns value +/// +/// Generated report +fn gen_report( + input: &Path, + warning: &UbsanWarning, + crashline: &CrashLine, + argv: &[&str], + pre_report: &CrashReport, +) -> Result { + // Get command line argv + let mut argv = argv.to_owned(); + let arg: String; + let stdin = if let Some(index) = argv.iter().position(|&arg| arg.contains("@@")) { + arg = argv[index].replace("@@", input.to_str().unwrap()); + argv[index] = &arg; + false + } else { + true + }; + let args = argv.join(" "); + debug!("Generating reports for {:?}", args); + // Create report + let mut report = pre_report.clone(); + report.proc_cmdline = args; + report.ubsan_report = warning.ubsan_report(); + if stdin { + report.stdin = input.to_str().unwrap().to_string(); + } + // Get stacktrace + if let Ok(stacktrace) = warning.extract_stacktrace() { + report.stacktrace = stacktrace; + } + // Get execution class + if let Ok(execution_class) = warning.severity() { + report.execution_class = execution_class; + } + // Get crashline and source + report.crashline = crashline.to_string(); + if let CrashLine::Source(debug) = crashline { + if let Some(sources) = CrashReport::sources(debug) { + report.source = sources; + } + } + Ok(report) +} + +/// Save ubsan report +/// +/// # Arguments +/// +/// * `report` - saving report +/// +/// * `output_dir` - report saving directory +/// +/// * `input` - input file path (need for unique name) +/// +/// * `crashline` - crashline (need for unique name) +fn save_report(report: CrashReport, output_dir: &Path, input: &Path) -> Result<()> { + // Convert report to string. + let repstr = serde_json::to_string_pretty(&report).unwrap(); + + // Get input dir name + let dir_name = input.parent().unwrap().file_name().unwrap(); + let input_name = input.file_name().unwrap(); + // Get crashline file name and line num + let re = Regex::new(r#"(.+)\+0x([0-9a-f]+)"#).unwrap(); + let Some(cap) = re.captures(&report.crashline) else { + bail!("Couldn't parse error crashline: {}", report.crashline); + }; + let file_name = Path::new(cap.get(1).unwrap().as_str()).file_name().unwrap(); + let line = i64::from_str_radix(cap.get(2).unwrap().as_str(), 16).unwrap(); + let mut report_path = PathBuf::new(); + report_path.push(output_dir); + report_path.push(format!( + "{}_{}_{}_{}.casrep", + dir_name.to_str().unwrap(), + input_name.to_str().unwrap(), + file_name.to_str().unwrap(), + line + )); + if let Ok(mut file) = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(&report_path) + { + file.write_all(repstr.as_bytes()).with_context(|| { + format!( + "Couldn't write data to report file `{}`", + &report_path.display() + ) + })?; + } else { + bail!("Couldn't save report to file: {}", &report_path.display()); + } + Ok(()) +} + +fn main() -> Result<()> { + let matches = clap::Command::new("casr-ubsan") + .version(clap::crate_version!()) + .about("Triage errors found by UndefinedBehaviorSanitizer and create CASR reports (.casrep)") + .term_width(90) + .arg( + Arg::new("log-level") + .long("log-level") + .short('l') + .action(ArgAction::Set) + .default_value("info") + .value_parser(["info", "debug"]) + .help("Logging level") + ) + .arg( + Arg::new("jobs") + .long("jobs") + .short('j') + .action(ArgAction::Set) + .help("Number of parallel jobs for generating CASR reports [default: half of cpu cores]") + .value_parser(clap::value_parser!(u32).range(1..)) + ) + .arg( + Arg::new("timeout") + .short('t') + .long("timeout") + .action(ArgAction::Set) + .value_name("SECONDS") + .help("Timeout (in seconds) for target execution [default: disabled]") + .value_parser(clap::value_parser!(u64).range(1..)) + ) + .arg( + Arg::new("input") + .short('i') + .long("input") + .action(ArgAction::Set) + .required(true) + .num_args(1..) + .value_name("INPUT_DIRS") + .help("Target input directory list") + .value_parser(move |arg: &str| { + let i_dir = Path::new(arg); + if !i_dir.exists() { + let mut err = clap::Error::new(ErrorKind::ValueValidation); + err.insert(ContextKind::InvalidValue, ContextValue::String("Input directory doesn't exist.".to_owned())); + return Err(err); + } + if !i_dir.is_dir() { + let mut err = clap::Error::new(ErrorKind::ValueValidation); + err.insert(ContextKind::InvalidValue, ContextValue::String("Input path should be a directory.".to_owned())); + return Err(err); + } + Ok(i_dir.to_path_buf()) + }) + ) + .arg( + Arg::new("output") + .short('o') + .long("output") + .action(ArgAction::Set) + .required(true) + .value_name("OUTPUT_DIR") + .value_parser(clap::value_parser!(PathBuf)) + .help("Output directory with triaged reports") + ) + .arg( + Arg::new("ARGS") + .action(ArgAction::Set) + .required(false) + .num_args(1..) + .last(true) + .help("Add \"-- \" to run"), + ) + .get_matches(); + + // Init log. + util::initialize_logging(&matches); + + debug!("Get args"); + // Get input dict list + let input_dirs: Vec<_> = matches.get_many::("input").unwrap().collect(); + // Get output dir + let output_dir = matches.get_one::("output").unwrap(); + if !output_dir.exists() { + fs::create_dir_all(output_dir).with_context(|| { + format!("Couldn't create output directory {}", output_dir.display()) + })?; + } else if !output_dir.is_dir() { + bail!("Output directory must be a directory"); + } else if output_dir.read_dir()?.next().is_some() { + bail!("Output directory is not empty."); + } + // Get program args. + let argv: Vec<&str> = if let Some(argvs) = matches.get_many::("ARGS") { + argvs.map(|s| s.as_str()).collect() + } else { + bail!("Wrong arguments for starting program"); + }; + + // Get timeout + let timeout = if let Some(timeout) = matches.get_one::("timeout") { + *timeout + } else { + 0 + }; + + // Get input path list + debug!("Get input path list"); + let mut inputs: Vec = vec![]; + // Do without paralleling to preserve the specified order + for input_dir in input_dirs { + for path in WalkDir::new(input_dir) + .sort_by_file_name() + .into_iter() + .filter_map(|file| file.ok()) + .filter(|file| file.metadata().unwrap().is_file()) + .map(|file| file.path().to_path_buf()) + { + inputs.push(path); + } + } + + // Get number of threads + debug!("Get number of threads"); + let jobs = if let Some(jobs) = matches.get_one::("jobs") { + *jobs as usize + } else { + std::cmp::max(1, num_cpus::get() / 2) + }; + let num_of_threads = jobs.min(inputs.len()).max(1); + let custom_pool = rayon::ThreadPoolBuilder::new() + .num_threads(num_of_threads) + .build() + .unwrap(); + + // Set ubsan env options + debug!("Set environment"); + if let Ok(mut ubsan_options) = env::var("UBSAN_OPTIONS") { + if ubsan_options.starts_with(',') { + ubsan_options.remove(0); + } + ubsan_options = ubsan_options.replace("print_stacktrace=0", "print_stacktrace=1"); + ubsan_options = ubsan_options.replace("report_error_type=0", "report_error_type=1"); + env::set_var("UBSAN_OPTIONS", ubsan_options); + } else { + env::set_var("UBSAN_OPTIONS", "print_stacktrace=1,report_error_type=1"); + } + + // Extract ubsan warnings + info!("Extracting ubsan warnings..."); + info!("Using {} threads", num_of_threads); + let warnings: Vec<(&PathBuf, Vec<(UbsanWarning, CrashLine)>)> = custom_pool.install(|| { + inputs + .par_iter() + .map(|input| (input, extract_warnings(input, &argv, timeout))) + .filter(|(_input, input_warnings)| input_warnings.is_ok()) + .map(|(input, input_warnings)| (input, input_warnings.unwrap())) + .collect() + }); + + info!( + "Number of ubsan warnings: {}", + warnings + .iter() + .map(|(_input, input_warnings)| input_warnings.len()) + .sum::() + ); + + // Create report with equal parts for all reports + let mut pre_report = CrashReport::new(); + pre_report.executable_path = argv[0].to_string(); + let _ = pre_report.add_os_info(); + let _ = pre_report.add_proc_environ(); + + info!("Deduplicating CASR reports..."); + // Init dedup crashline list + let mut crashlines: HashSet = HashSet::new(); + let mut to_gen: Vec<(PathBuf, UbsanWarning, CrashLine)> = vec![]; + // Dedup warnings by crashline + // Do without paralleling to preserve the specified order + for (input, input_warnings) in warnings { + for (warning, crashline) in input_warnings { + if crashlines.insert(crashline.to_string()) { + to_gen.push((input.clone(), warning, crashline)); + } + } + } + + info!( + "Number of ubsan warnings after deduplication: {}", + crashlines.len() + ); + + // Generate CASR reports + info!("Generating CASR reports..."); + custom_pool.install(|| { + to_gen + .par_iter() + .try_for_each(|(input, warning, crashline)| { + let Ok(report) = gen_report(input, warning, crashline, &argv, &pre_report) else { + bail!("Can't generate report"); + }; + // Save report + save_report(report, output_dir, input) + }) + })?; + + Ok(()) +} diff --git a/casr/src/util.rs b/casr/src/util.rs index fc3de0a5..fd6edf8d 100644 --- a/casr/src/util.rs +++ b/casr/src/util.rs @@ -27,7 +27,7 @@ pub fn output_report(report: &CrashReport, matches: &ArgMatches, argv: &[&str]) // Convert report to string. let repstr = serde_json::to_string_pretty(&report).unwrap(); - if matches.get_flag("stdout") { + if matches.contains_id("stdout") && matches.get_flag("stdout") { println!("{repstr}\n"); } diff --git a/casr/tests/casr_tests/ubsan/input1/input b/casr/tests/casr_tests/ubsan/input1/input new file mode 100644 index 00000000..e69de29b diff --git a/casr/tests/casr_tests/ubsan/input1/input3/input b/casr/tests/casr_tests/ubsan/input1/input3/input new file mode 100644 index 00000000..e69de29b diff --git a/casr/tests/casr_tests/ubsan/input2/input b/casr/tests/casr_tests/ubsan/input2/input new file mode 100644 index 00000000..e69de29b diff --git a/casr/tests/casr_tests/ubsan/test_ubsan.cpp b/casr/tests/casr_tests/ubsan/test_ubsan.cpp new file mode 100644 index 00000000..fec2f06e --- /dev/null +++ b/casr/tests/casr_tests/ubsan/test_ubsan.cpp @@ -0,0 +1,8 @@ +#include + +int main() { + (void)(uint16_t(0xffff) * uint16_t(0x8001)); + int x = 1; + x / 0; + return 0; +} diff --git a/casr/tests/tests.rs b/casr/tests/tests.rs index 7a0ac9ea..fe0470d5 100644 --- a/casr/tests/tests.rs +++ b/casr/tests/tests.rs @@ -17,6 +17,7 @@ lazy_static::lazy_static! { static ref EXE_CASR_LIBFUZZER: RwLock<&'static str> = RwLock::new(env!("CARGO_BIN_EXE_casr-libfuzzer")); static ref EXE_CASR_CLUSTER: RwLock<&'static str> = RwLock::new(env!("CARGO_BIN_EXE_casr-cluster")); static ref EXE_CASR_SAN: RwLock<&'static str> = RwLock::new(env!("CARGO_BIN_EXE_casr-san")); + static ref EXE_CASR_UBSAN: RwLock<&'static str> = RwLock::new(env!("CARGO_BIN_EXE_casr-ubsan")); static ref EXE_CASR_PYTHON: RwLock<&'static str> = RwLock::new(env!("CARGO_BIN_EXE_casr-python")); static ref EXE_CASR_GDB: RwLock<&'static str> = RwLock::new(env!("CARGO_BIN_EXE_casr-gdb")); static ref PROJECT_DIR: RwLock<&'static str> = RwLock::new(env!("CARGO_MANIFEST_DIR")); @@ -3500,6 +3501,102 @@ fn test_casr_afl() { let _ = fs::remove_file("/tmp/load_afl"); } +#[test] +#[cfg(target_arch = "x86_64")] +fn test_casr_ubsan() { + // Copy files to tmp dir + let work_dir = abs_path("tests/casr_tests/ubsan"); + let test_dir = abs_path("tests/tmp_tests_casr/test_casr_ubsan"); + + let output = Command::new("cp") + .args(["-r", &work_dir, &test_dir]) + .output() + .expect("failed to copy dir"); + + assert!( + output.status.success(), + "Stdout {}.\n Stderr: {}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + let paths = [ + abs_path("tests/tmp_tests_casr/test_casr_ubsan/test_ubsan.cpp"), + abs_path("tests/tmp_tests_casr/test_casr_ubsan/test_ubsan"), + abs_path("tests/tmp_tests_casr/test_casr_ubsan/input1"), + abs_path("tests/tmp_tests_casr/test_casr_ubsan/input2"), + abs_path("tests/tmp_tests_casr/test_casr_ubsan/out"), + ]; + + // Create out dir + let output = Command::new("mkdir") + .arg(&paths[4]) + .output() + .expect("failed to create dir"); + + assert!( + output.status.success(), + "Stdout {}.\n Stderr: {}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + let clang = Command::new("bash") + .arg("-c") + .arg(format!( + "clang++ -fsanitize=undefined -O0 -g {} -o {}", + &paths[0], &paths[1] + )) + .status() + .expect("failed to execute clang++"); + + assert!(clang.success()); + + let output = Command::new(*EXE_CASR_UBSAN.read().unwrap()) + .args(["--input", &paths[2], &paths[3]]) + .args(["--output", &paths[4]]) + .args(["--", &paths[1], "@@"]) + .output() + .expect("failed to start casr-ubsan"); + + assert!( + output.status.success(), + "Stdout {}.\n Stderr: {}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + let res = String::from_utf8_lossy(&output.stderr); + + assert!(!res.is_empty()); + + let re = Regex::new(r"Number of ubsan warnings: (?P\d+)").unwrap(); + let casrep_cnt = re + .captures(&res) + .unwrap() + .name("casrep") + .map(|x| x.as_str()) + .unwrap() + .parse::() + .unwrap(); + + assert_eq!(casrep_cnt, 6, "Invalid number of warnings"); + + let re = Regex::new(r"Number of ubsan warnings after deduplication: (?P\d+)").unwrap(); + let unique_cnt = re + .captures(&res) + .unwrap() + .name("unique") + .map(|x| x.as_str()) + .unwrap() + .parse::() + .unwrap(); + + assert_eq!(unique_cnt, 2, "Invalid number of deduplicated reports"); + + let _ = fs::remove_dir_all(&test_dir); +} + #[test] #[cfg(target_arch = "x86_64")] fn test_casr_libfuzzer() { diff --git a/docs/usage.md b/docs/usage.md index 408614d1..a9f88e34 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,10 +1,17 @@ # Usage CASR is a set of tools that allows you to collect crash reports in different -ways. Use `casr-core` binary to deal with coredumps. Use `casr-san` to analyze ASAN -reports. Try `casr-gdb` to get reports from gdb. `casr-cli` is meant to provide -TUI for viewing reports. Reports triage (deduplication, clustering) is done by -`casr-cluster`. +ways. Use `casr-core` binary to deal with coredumps. Use `casr-san` to analyze +ASAN reports or `casr-ubsan` to analyze UBSAN reports. Try `casr-gdb` to get +reports from gdb. Use `casr-python` to analyze python reports and get report +from [Atheris](https://github.com/google/atheris). `casr-afl` is used to triage +crashes found by [AFL++](https://github.com/AFLplusplus/AFLplusplus). +`casr-libfuzzer` can triage crashes found by +[libFuzzer](https://www.llvm.org/docs/LibFuzzer.html). `casr-dojo` allows to +upload new and unique CASR reports to +[DefectDojo](https://github.com/DefectDojo/django-DefectDojo). `casr-cli` is +meant to provide TUI for viewing reports. Reports triage (deduplication, +clustering) is done by `casr-cluster`. ## casr-gdb @@ -31,7 +38,7 @@ Example: ## casr-san -Create CASR reports (.casrep) from sanitizer reports +Create CASR reports (.casrep) from AddressSanitizer reports Usage: casr-san [OPTIONS] <--stdout|--output > [-- ...] @@ -56,6 +63,42 @@ Run casr-san: $ casr-san -o asan.casrep -- ./test_asan_df +## casr-ubsan + +Triage errors found by UndefinedBehaviorSanitizer and create CASR reports (.casrep) + + Usage: casr-ubsan [OPTIONS] --input ... --output [-- ...] + + Arguments: + [ARGS]... Add "-- " to run + + Options: + -l, --log-level Logging level [default: info] [possible values: info, + debug] + -j, --jobs Number of parallel jobs for generating CASR reports + [default: half of cpu cores] + -t, --timeout Timeout (in seconds) for target execution [default: + disabled] + -i, --input ... Target input directory list + -o, --output Output directory with triaged reports + -h, --help Print help + -V, --version Print version + +Compile binary with UBSAN: + + $ clang++ -fsanitize=undefined -O0 -g casr/tests/casr_tests/ubsan/test_ubsan.cpp -o test_ubsan + +Run casr-ubsan: + + $ casr-ubsan -i input -o output -- ./test_ubsan @@ + +Get summary + + $ casr-cli output + +Ubsan error deduplication is based on crashline comparison. The idea is to run +deduplication to remove equal ubsan errors, then run report generation. + ## casr-python Create CASR reports (.casrep) from python reports diff --git a/libcasr/src/lib.rs b/libcasr/src/lib.rs index 55d1270a..951916dc 100644 --- a/libcasr/src/lib.rs +++ b/libcasr/src/lib.rs @@ -6,6 +6,7 @@ //! It can analyze crashes from different sources: //! //! * AddressSanitizer +//! * UndefinedBehaviorSanitizer //! * Gdb output //! //! and program languages: @@ -31,3 +32,4 @@ pub mod report; pub mod rust; pub mod severity; pub mod stacktrace; +pub mod ubsan; diff --git a/libcasr/src/report.rs b/libcasr/src/report.rs index 652f233b..0f2f2492 100644 --- a/libcasr/src/report.rs +++ b/libcasr/src/report.rs @@ -184,6 +184,13 @@ pub struct CrashReport { )] #[cfg_attr(feature = "serde", serde(default))] pub asan_report: Vec, + /// Ubsan report. + #[cfg_attr( + feature = "serde", + serde(rename(serialize = "UbsanReport", deserialize = "UbsanReport")) + )] + #[cfg_attr(feature = "serde", serde(default))] + pub ubsan_report: Vec, /// Python report. #[cfg_attr( feature = "serde", @@ -666,6 +673,12 @@ impl fmt::Display for CrashReport { report += &(self.asan_report.join("\n") + "\n"); } + // UBSANreport + if !self.ubsan_report.is_empty() { + report += "\n===UbsanReport===\n"; + report += &(self.ubsan_report.join("\n") + "\n"); + } + // PythonReport if !self.python_report.is_empty() { report += "\n===PythonReport===\n"; diff --git a/libcasr/src/ubsan.rs b/libcasr/src/ubsan.rs new file mode 100644 index 00000000..819b5482 --- /dev/null +++ b/libcasr/src/ubsan.rs @@ -0,0 +1,261 @@ +//! UndefinedBehaviorSanitizer module implements `Severity` and `CrashLineExt` traits for UndefinedBehaviorSanitizer warnings. +use crate::asan::AsanStacktrace; +use crate::severity::Severity; +use crate::stacktrace::{CrashLine, CrashLineExt}; +use crate::stacktrace::{ParseStacktrace, Stacktrace}; + +use crate::error::*; +use crate::execution_class::ExecutionClass; +use regex::Regex; + +use std::fmt; + +/// Structure provides an interface for parsing ubsan runtime error message. +#[derive(Clone)] +pub struct UbsanWarning { + message: String, +} + +impl fmt::Debug for UbsanWarning { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.message) + } +} + +impl UbsanWarning { + /// Return all ubsan error messages as a vector + pub fn extract_stacktrace(&self) -> Result> { + AsanStacktrace::extract_stacktrace(&self.message) + } + /// Return stacktrace by specified vector of lines + pub fn parse_stacktrace(entries: &[String]) -> Result { + AsanStacktrace::parse_stacktrace(entries) + } + /// Get ubsan runtime error message as a vector of lines + pub fn ubsan_report(&self) -> Vec { + self.message.split('\n').map(|s| s.to_string()).collect() + } +} + +impl Severity for UbsanWarning { + fn severity(&self) -> Result { + let message: Vec = self + .message + .split('\n') + .map(|l| l.trim_end().to_string()) + .collect(); + if message.len() <= 1 { + return Err(Error::Casr("Malformed ubsan message".to_string())); + } + // Get description (from first line) + let description = message.first().unwrap(); + let re = Regex::new(r#".+: runtime error: (.+)"#).unwrap(); + let Some(cap) = re.captures(description) else { + return Err(Error::Casr(format!( + "Couldn't parse error description: {description}" + ))); + }; + let description = cap.get(1).unwrap().as_str().to_string(); + // Get short description (from last line) + let short_description = message.last().unwrap(); + let re = Regex::new(r#"SUMMARY: UndefinedBehaviorSanitizer: (\S+)"#).unwrap(); + let Some(cap) = re.captures(short_description) else { + return Err(Error::Casr(format!( + "Couldn't parse ubsan summary: {short_description}" + ))); + }; + let short_description = cap.get(1).unwrap().as_str().to_string(); + + Ok(ExecutionClass::new(( + "NOT_EXPLOITABLE", + &short_description, + &description, + "", + ))) + } +} + +impl CrashLineExt for UbsanWarning { + fn crash_line(&self) -> Result { + let message: Vec = self + .message + .split('\n') + .map(|l| l.trim_end().to_string()) + .collect(); + let mut re = Regex::new(r#".+ (.+):(\d+):\d+"#).unwrap(); + // If there is no stacktrace use crashline from first string + // May be not absolute + // Else use first string from stacktrace + let crashline = + if message.len() == 2 || message.len() == 3 && message[2].contains(" note: ") { + re = Regex::new(r#"(.+):(\d+):\d+ runtime error: "#).unwrap(); + &message[0] + } else if message[1].contains(" note: ") { + &message[2] + } else { + &message[1] + }; + + // Get file path and offset from crashline + let Some(cap) = re.captures(crashline) else { + return Err(Error::Casr(format!( + "Couldn't parse error crashline: {crashline}" + ))); + }; + let file = cap.get(1).unwrap().as_str().to_string(); + let offset = cap.get(2).unwrap().as_str().parse::(); + let Ok(offset) = offset else { + return Err(Error::Casr(format!( + "Couldn't parse crashline offset: {crashline}" + ))); + }; + Ok(CrashLine::Module { file, offset }) + } +} + +/// Extract ubsan warnings form stderr +/// +/// # Arguments +/// +/// * `stderr` - output containing ubsan warnings +/// +/// # Return value +/// +/// Ubsan warning struct vector +pub fn extract_ubsan_warnings(stderr: &str) -> Result> { + let mut ubsan_warnings: Vec = vec![]; + let re = + Regex::new(r#"(.+: runtime error: (?:.*\n)*?SUMMARY: UndefinedBehaviorSanitizer: .*)"#) + .unwrap(); + for cap in re.captures_iter(stderr) { + let message = cap[0].to_string(); + ubsan_warnings.push(UbsanWarning { message }); + } + Ok(ubsan_warnings) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ubsan_parse() { + let stderr = + "/tarantool/src/box/sql/vdbeaux.c:1417:6: runtime error: implicit conversion from type 'int' of value -8 (32-bit, signed) to type 'unsigned long' changed the value to 18446744073709551608 (64-bit, unsigned) + #0 0x14529af in sqlVdbeMakeReady /tarantool/src/box/sql/vdbeaux.c:1417:6 + #1 0xd94ff7 in sql_finish_coding /tarantool/src/box/sql/build.c:109:3 + #2 0x1291e28 in sql_code_ast /tarantool/src/box/sql/tokenize.c:506:3 + #3 0x128f24c in sqlRunParser /tarantool/src/box/sql/tokenize.c:585:2 + #4 0x10d6e5b in sql_stmt_compile /tarantool/src/box/sql/prepare.c:79:4 + #5 0xd01caf in sql_fuzz /tarantool/src/box/sql.c:1730:6 + #6 0x8ced0e in TestOneProtoInput(sql_query::SQLQuery const&) /tarantool/test/fuzz/sql_fuzzer/sql_fuzzer.cc:50:2 + #7 0x8ce0d9 in LLVMFuzzerTestOneInput /tarantool/test/fuzz/sql_fuzzer/sql_fuzzer.cc:38:1 + #8 0x7f4131 in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) /llvm-project-llvmorg-14.0.6/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:611:15 + #9 0x7de03c in fuzzer::RunOneTest(fuzzer::Fuzzer*, char const*, unsigned long) /llvm-project-llvmorg-14.0.6/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:324:6 + #10 0x7e3d8b in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /llvm-project-llvmorg-14.0.6/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:860:9 + #11 0x80d342 in main /llvm-project-llvmorg-14.0.6/compiler-rt/lib/fuzzer/FuzzerMain.cpp:20:10 + #12 0x7f296f4d7082 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x24082) (BuildId: 1878e6b475720c7c51969e69ab2d276fae6d1dee) + #13 0x7d895d in _start (/sql_fuzzer+0x7d895d) + +SUMMARY: UndefinedBehaviorSanitizer: implicit-integer-sign-change /tarantool/src/box/sql/vdbeaux.c:1417:6 in +Executed sql-out/corpus/7daf7545bad605f9ea192f6523d5427c757e56a4 in 66 ms +*** +*** NOTE: fuzzing was not performed, you have only +*** executed the target code on a fixed set of inputs. +*** +/tarantool/src/lib/small/include/small/lf_lifo.h:86:59: runtime error: applying non-zero offset 1 to null pointer + #0 0x3f6a87e in lf_lifo_push /tarantool/src/lib/small/include/small/lf_lifo.h:86:59 + #1 0x3f6a162 in slab_unmap /tarantool/src/lib/small/small/slab_arena.c:275:2 + #2 0x3ebb1da in slab_cache_destroy /tarantool/src/lib/small/small/slab_cache.c:213:4 + #3 0x3c1773d in cord_destroy /tarantool/src/lib/core/fiber.c:1704:2 + #4 0x3c26a42 in fiber_free /tarantool/src/lib/core/fiber.c:2040:2 + #5 0x8cd6fa in teardown() /tarantool/test/fuzz/sql_fuzzer/sql_fuzzer.cc:34:2 + #6 0x7f296fe8df6a (/lib64/ld-linux-x86-64.so.2+0x11f6a) (BuildId: 4587364908de169dec62ffa538170118c1c3a078) + #7 0x7f296f4f98a6 (/lib/x86_64-linux-gnu/libc.so.6+0x468a6) (BuildId: 1878e6b475720c7c51969e69ab2d276fae6d1dee) + #8 0x7f296f4f9a5f in exit (/lib/x86_64-linux-gnu/libc.so.6+0x46a5f) (BuildId: 1878e6b475720c7c51969e69ab2d276fae6d1dee) + #9 0x7e3f43 in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /llvm-project-llvmorg-14.0.6/compiler-rt/lib/fuzzer/FuzzerDriver.cpp + #10 0x80d342 in main /llvm-project-llvmorg-14.0.6/compiler-rt/lib/fuzzer/FuzzerMain.cpp:20:10 + #11 0x7f296f4d7082 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x24082) (BuildId: 1878e6b475720c7c51969e69ab2d276fae6d1dee) + #12 0x7d895d in _start (/sql_fuzzer+0x7d895d) + +SUMMARY: UndefinedBehaviorSanitizer: nullptr-with-nonzero-offset /tarantool/src/lib/small/include/small/lf_lifo.h:86:59 in"; + // Check warning extract + let warnings = extract_ubsan_warnings(stderr); + let Ok(warnings) = warnings else { + panic!("{}", warnings.err().unwrap()); + }; + assert_eq!(warnings.len(), 2); + + // Check warning + let warning = &warnings[0]; + assert_eq!(warning.ubsan_report().len(), 17); + + // Check stacktrace + let stacktrace = warning.extract_stacktrace(); + let Ok(stacktrace) = stacktrace else { + panic!("{}", stacktrace.err().unwrap()); + }; + assert_eq!(stacktrace.len(), 14); + + // Check severity + let execution_class = warning.severity(); + let Ok(execution_class) = execution_class else { + panic!("{}", execution_class.err().unwrap()); + }; + assert_eq!(execution_class.severity, "NOT_EXPLOITABLE"); + assert_eq!( + execution_class.short_description, + "implicit-integer-sign-change" + ); + assert_eq!( + execution_class.description, + "implicit conversion from type 'int' of value -8 (32-bit, signed) to type 'unsigned long' changed the value to 18446744073709551608 (64-bit, unsigned)" + ); + assert_eq!(execution_class.explanation, ""); + + // Check crashline + let crash_line = warning.crash_line(); + if let Ok(CrashLine::Module { file, offset }) = crash_line { + assert_eq!(file, "/tarantool/src/box/sql/vdbeaux.c"); + assert_eq!(offset, 1417); + } else { + panic!("{}", crash_line.err().unwrap()); + } + + // Check warning + let warning = &warnings[1]; + assert_eq!(warning.ubsan_report().len(), 16); + + // Check stacktrace + let stacktrace = warning.extract_stacktrace(); + let Ok(stacktrace) = stacktrace else { + panic!("{}", stacktrace.err().unwrap()); + }; + assert_eq!(stacktrace.len(), 13); + + // Check severity + let execution_class = warning.severity(); + let Ok(execution_class) = execution_class else { + panic!("{}", execution_class.err().unwrap()); + }; + assert_eq!(execution_class.severity, "NOT_EXPLOITABLE"); + assert_eq!( + execution_class.short_description, + "nullptr-with-nonzero-offset" + ); + assert_eq!( + execution_class.description, + "applying non-zero offset 1 to null pointer" + ); + assert_eq!(execution_class.explanation, ""); + + // Check crashline + let crash_line = warning.crash_line(); + if let Ok(CrashLine::Module { file, offset }) = crash_line { + assert_eq!(file, "/tarantool/src/lib/small/include/small/lf_lifo.h"); + assert_eq!(offset, 86); + } else { + panic!("{}", crash_line.err().unwrap()); + } + } +}