diff --git a/README.md b/README.md index 7fcd6c4c..dd579e0a 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,8 @@ # CASR: Crash Analysis and Severity Report -CASR – collect crash reports, triage, and estimate severity. -It is based on ideas from [exploitable](https://github.com/jfoote/exploitable) and +CASR – collect crash reports, triage, and estimate severity. It is based +on ideas from [exploitable](https://github.com/jfoote/exploitable) and [apport](https://github.com/canonical/apport). CASR is maintained by: @@ -18,8 +18,10 @@ CASR is maintained by: ## Overview CASR is a set of tools that allows you to collect crash reports in different -ways. Use `casr-core` binary to deal with coredumps. Use `casr-san` to analyze ASAN -reports. Try `casr-gdb` to get reports from gdb. Use `casr-python` to analyze python reports and get report from [Atheris](https://github.com/google/atheris). +ways. Use `casr-core` binary to deal with coredumps. Use `casr-san` to analyze +ASAN reports or `casr-ubsan` to analyze UBSAN reports. Try `casr-gdb` to get +reports from gdb. Use `casr-python` to analyze python reports and get report +from [Atheris](https://github.com/google/atheris). Crash report contains many useful information: severity (like [exploitable](https://github.com/jfoote/exploitable)) for x86, x86\_64, arm32, aarch64, rv32g, rv64g architectures, @@ -52,6 +54,7 @@ crashes. It can analyze crashes from different sources: * AddressSanitizer +* UndefinedBehaviorSanitizer * Gdb output and program languages: @@ -95,11 +98,16 @@ Create report from coredump: $ casr-core -f casr/tests/casr_tests/bin/core.test_destAv -e casr/tests/casr_tests/bin/test_destAv -o destAv.casrep -Create report from sanitizers output: +Create report from AddressSanitizer output: $ clang++ -fsanitize=address -O0 -g casr/tests/casr_tests/test_asan_df.cpp -o test_asan_df $ casr-san -o asan.casrep -- ./test_asan_df +Create report from UndefinedBehaviorSanitizer output: + + $ clang++ -fsanitize=undefined,fuzzer-no-link -O0 -g test_ubsan.cpp -o test_ubsan + $ casr-ubsan -i input -o output -- /test_ubsan @@ + Create report from gdb: $ casr-gdb -o destAv.gdb.casrep -- casr/tests/casr_tests/bin/test_destAv $(printf 'A%.s' {1..200}) @@ -163,18 +171,20 @@ When you have crashes from fuzzing you may do the following steps: 1. Create reports for all crashes via `casr-san`, `casr-gdb` (if no sanitizers are present), or `casr-python`. -2. Deduplicate collected reports via `casr-cluster -d`. -3. Cluster deduplicated reports via `casr-cluster -c`. -4. View reports from clusters using `casr-cli` or upload them to +2. Deduplicate collected crash reports via `casr-cluster -d`. +3. Cluster deduplicated crash reports via `casr-cluster -c`. +4. Create reports and deduplicate them for all UBSAN error via `casr-ubsan`. +5. View reports from clusters using `casr-cli` or upload them to [DefectDojo](https://github.com/DefectDojo/django-DefectDojo) with `casr-dojo`. If you use [AFL++](https://github.com/AFLplusplus/AFLplusplus), whole pipeline -could be done automatically by `casr-afl`. +(without 4 step and dojo) could be done automatically by `casr-afl`. If you use [libFuzzer](https://www.llvm.org/docs/LibFuzzer.html) based fuzzer (C/C++/[go-fuzz](https://github.com/dvyukov/go-fuzz)/[Atheris](https://github.com/google/atheris)), -whole pipeline could be done automatically by `casr-libfuzzer`. +whole pipeline (without 4 step and dojo) could be done automatically by +`casr-libfuzzer`. ## Contributing diff --git a/casr/Cargo.toml b/casr/Cargo.toml index 5e918f10..0cc8d5de 100644 --- a/casr/Cargo.toml +++ b/casr/Cargo.toml @@ -33,6 +33,7 @@ walkdir = "2" reqwest = { version = "0.11", features = ["json", "multipart", "rustls-tls"], default_features = false, optional = true } tokio = { version = "1", features = ["rt", "macros"], optional = true } toml = { version = "0.7", optional = true } +wait-timeout = "0.1.5" libcasr = { path = "../libcasr", version = "2.6.0", features = ["serde", "exploitable"] } diff --git a/casr/src/bin/casr-cli.rs b/casr/src/bin/casr-cli.rs index dc2dccb7..0ce9adfe 100644 --- a/casr/src/bin/casr-cli.rs +++ b/casr/src/bin/casr-cli.rs @@ -342,6 +342,16 @@ fn build_tree_report( tree.expand_item(row); } + if !report.ubsan_report.is_empty() { + row = tree + .insert_container_item("UbsanReport".to_string(), Placement::After, row) + .unwrap(); + report.ubsan_report.iter().for_each(|e| { + tree.insert_item(e.clone(), Placement::LastChild, row); + }); + tree.expand_item(row); + } + if !report.python_report.is_empty() { row = tree .insert_container_item("PythonReport".to_string(), Placement::After, row) @@ -522,6 +532,10 @@ fn build_slider_report( select.add_item("AsanReport", report.asan_report.join("\n")); } + if !report.ubsan_report.is_empty() { + select.add_item("UbsanReport", report.ubsan_report.join("\n")); + } + if !report.python_report.is_empty() { select.add_item("PythonReport", report.python_report.join("\n")); } diff --git a/casr/src/bin/casr-san.rs b/casr/src/bin/casr-san.rs index 4b2e157f..e2ae3b69 100644 --- a/casr/src/bin/casr-san.rs +++ b/casr/src/bin/casr-san.rs @@ -28,7 +28,7 @@ use std::process::Command; fn main() -> Result<()> { let matches = clap::Command::new("casr-san") .version(clap::crate_version!()) - .about("Create CASR reports (.casrep) from sanitizer reports") + .about("Create CASR reports (.casrep) from AddressSanitizer reports") .term_width(90) .arg( Arg::new("output") diff --git a/casr/src/bin/casr-ubsan.rs b/casr/src/bin/casr-ubsan.rs new file mode 100644 index 00000000..4f10db3b --- /dev/null +++ b/casr/src/bin/casr-ubsan.rs @@ -0,0 +1,442 @@ +use casr::util; +use libcasr::report::CrashReport; +use libcasr::severity::Severity; +use libcasr::stacktrace::{CrashLine, CrashLineExt}; +use libcasr::ubsan; +use libcasr::ubsan::UbsanWarning; + +use anyhow::{bail, Context, Result}; +use clap::{ + error::{ContextKind, ContextValue, ErrorKind}, + Arg, ArgAction, +}; +use log::{debug, error, info, warn}; +use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; +use regex::Regex; +use wait_timeout::ChildExt; +use walkdir::WalkDir; + +use std::collections::HashMap; +use std::fs; +use std::fs::OpenOptions; +use std::io::{Read, Write}; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +/// Extract ubsan warnings for specified input file +/// +/// # Arguments +/// +/// * `input` - input file path +/// +/// * `argv` - target program argument vector +/// +/// * `warnings` - input-warning tuple vector +/// +/// * `warning_counter` - report amount counter +/// +/// * `timeout` - target program timeout +/// +/// * `timeout_dir` - timeout input file dir +/// +/// * `timeout_counter` - total timeout input file counter +fn extract_warnings( + input: &PathBuf, + argv: &[&str], + warnings: &Arc>>, + warning_counter: &Arc>, + timeout: u64, + timeout_dir: &Path, + timeout_counter: &Arc>, +) -> Result<()> { + // Get command line argv + let mut argv = argv.to_owned(); + let input_arg = &input.clone().into_os_string().into_string().unwrap(); + let stdin = !argv.contains(&"@@"); + if !stdin { + let input_index = argv.iter().position(|&val| val == "@@").unwrap(); + argv[input_index] = input_arg; + } + // Run program. + let mut cmd = Command::new(argv[0]); + cmd.stdout(Stdio::null()).stderr(Stdio::piped()); + if stdin { + if let Ok(file) = fs::File::open(input) { + cmd.stdin(file); + } + } + if argv.len() > 1 { + cmd.args(&argv[1..]); + } + debug!("Run {:?}", cmd); + let mut child = cmd + .spawn() + .unwrap_or_else(|_| panic!("Failed to start command: {cmd:?}")); + + // Check timeout + if timeout != 0 + && child + .wait_timeout(Duration::from_secs(timeout)) + .unwrap() + .is_none() + { + let _ = child.kill(); + let mut timeout_counter = timeout_counter.lock().unwrap(); + *timeout_counter += 1; + let timeout_name = format!("{}-timeout", input.file_name().unwrap().to_str().unwrap()); + let timeout_path = Path::new(timeout_dir).join(timeout_name); + if fs::copy(input, timeout_path.clone()).is_err() { + error!("Error occurred while copying the file: {:?}", input); + } + warn!("Timeout: {:?}", timeout_path); + return Ok(()); + } + + // Get stderr + let mut stderr = vec![]; + let _ = child.stderr.unwrap().read_to_end(&mut stderr); + let stderr = String::from_utf8_lossy(&stderr); + + // Extract ubsan warnings + let extracted_warnings = ubsan::extract_ubsan_warnings(&stderr)?; + for warning in &extracted_warnings { + // Add report to hashmap + let mut warnings = warnings.lock().unwrap(); + warnings.push((input.clone(), warning.clone())); + } + // Update counter + let mut warning_counter = warning_counter.lock().unwrap(); + *warning_counter += extracted_warnings.len(); + Ok(()) +} + +/// Generate ubsan report for specified input file +/// +/// # Arguments +/// +/// * `input` - input file path +/// +/// * `warning` - target warning +/// +/// * `reports` - report list +/// +/// * `argv` - target program argument vector +/// +/// * `pre_report` - report template containing identic values +fn gen_report( + input: &PathBuf, + warning: &UbsanWarning, + reports: &Arc>>>, + argv: &[&str], + pre_report: &CrashReport, +) -> Result<()> { + // Get command line argv + let mut argv = argv.to_owned(); + let input_arg = &input.clone().into_os_string().into_string().unwrap(); + if argv.contains(&"@@") { + let input_index = argv.iter().position(|&val| val == "@@").unwrap(); + argv[input_index] = input_arg; + } + let args = argv.join(" "); + debug!("Generating reports for {:?}", args); + // Create report + let mut report = pre_report.clone(); + report.proc_cmdline = args; + report.ubsan_report = warning.ubsan_report(); + // Get stacktrace + if let Ok(stacktrace) = warning.extract_stacktrace() { + report.stacktrace = stacktrace; + } + // Get execution class + if let Ok(execution_class) = warning.severity() { + report.execution_class = execution_class; + } + // Get crash line + if let Ok(crash_line) = warning.crash_line() { + report.crashline = crash_line.to_string(); + if let CrashLine::Source(debug) = crash_line { + if let Some(sources) = CrashReport::sources(&debug) { + report.source = sources; + } + } + } + // Add report to hashmap + if let Some(reports_by_input) = reports.lock().unwrap().get_mut(input) { + reports_by_input.push(report); + } + Ok(()) +} + +/// Save ubsan report +/// +/// # Arguments +/// +/// * `report` - saving report +/// +/// * `output_dir` - report saving directory +/// +/// * `input` - input file path (need for unique name) +/// +/// * `crashline` - crashline (need for unique name) +fn save_report( + report: &CrashReport, + output_dir: &Path, + input: &Path, + crashline: &str, +) -> Result<()> { + // Convert report to string. + let repstr = serde_json::to_string_pretty(&report).unwrap(); + + // Get input dir name + let dir_name = input.parent().unwrap().file_name().unwrap(); + let input_name = input.file_name().unwrap(); + // Get crashline file name and line num + let re = Regex::new(r#"(.+)\+0x([0-9a-f]+)"#).unwrap(); + let Some(cap) = re.captures(crashline) else { + bail!("Couldn't parse error crashline: {crashline}"); + }; + let file_name = Path::new(cap.get(1).unwrap().as_str()).file_name().unwrap(); + let line = i64::from_str_radix(cap.get(2).unwrap().as_str(), 16).unwrap(); + let mut report_path = PathBuf::new(); + report_path.push(output_dir); + report_path.push(format!( + "{}_{}_{}_{}.casrep", + dir_name.to_str().unwrap(), + input_name.to_str().unwrap(), + file_name.to_str().unwrap(), + line + )); + if let Ok(mut file) = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(&report_path) + { + file.write_all(repstr.as_bytes()).with_context(|| { + format!( + "Couldn't write data to report file `{}`", + &report_path.display() + ) + })?; + } else { + bail!("Couldn't save report to file: {}", &report_path.display()); + } + Ok(()) +} + +fn main() -> Result<()> { + let matches = clap::Command::new("casr-ubsan") + .version(clap::crate_version!()) + .about("Triage error found by UndefinedBehaviorSanitizer and create CASR reports (.casrep)") + .term_width(90) + .arg( + Arg::new("log-level") + .long("log-level") + .short('l') + .action(ArgAction::Set) + .default_value("info") + .value_parser(["info", "debug"]) + .help("Logging level") + ) + .arg( + Arg::new("jobs") + .long("jobs") + .short('j') + .action(ArgAction::Set) + .help("Number of parallel jobs for generating CASR reports [default: half of cpu cores]") + .value_parser(clap::value_parser!(u32).range(1..)) + ) + .arg( + Arg::new("timeout") + .short('t') + .long("timeout") + .action(ArgAction::Set) + .value_name("SECONDS") + .help("Timeout (in seconds) for target execution [default: disabled]") + .value_parser(clap::value_parser!(u64).range(0..)) + ) + .arg( + Arg::new("input") + .short('i') + .long("input") + .action(ArgAction::Set) + .required(true) + .num_args(1..) + .value_name("INPUT_DIRS") + .help("Target input directory list") + .value_parser(move |arg: &str| { + let i_dir = Path::new(arg); + if !i_dir.exists() { + let mut err = clap::Error::new(ErrorKind::ValueValidation); + err.insert(ContextKind::InvalidValue, ContextValue::String("Input directory doesn't exist.".to_owned())); + return Err(err); + } + if !i_dir.is_dir() { + let mut err = clap::Error::new(ErrorKind::ValueValidation); + err.insert(ContextKind::InvalidValue, ContextValue::String("Input path should be a directory.".to_owned())); + return Err(err); + } + Ok(i_dir.to_path_buf()) + }) + ) + .arg( + Arg::new("output") + .short('o') + .long("output") + .action(ArgAction::Set) + .required(true) + .value_name("OUTPUT_DIR") + .value_parser(clap::value_parser!(PathBuf)) + .help("Output directory with triaged reports") + ) + .arg( + Arg::new("ARGS") + .action(ArgAction::Set) + .required(false) + .num_args(1..) + .last(true) + .help("Add \"-- \" to run"), + ) + .get_matches(); + + // Init log. + util::initialize_logging(&matches); + + debug!("Get args"); + // Get input dict list + let input_dirs: Vec<_> = matches.get_many::("input").unwrap().collect(); + // Get output dir + let output_dir = matches.get_one::("output").unwrap(); + if !output_dir.exists() { + fs::create_dir_all(output_dir).with_context(|| { + format!("Couldn't create output directory {}", output_dir.display()) + })?; + } else if !output_dir.is_dir() { + bail!("Output directory must be a directory"); + } else if output_dir.read_dir()?.next().is_some() { + bail!("Output directory is not empty."); + } + // Get program args. + let argv: Vec<&str> = if let Some(argvs) = matches.get_many::("ARGS") { + argvs.map(|s| s.as_str()).collect() + } else { + bail!("Wrong arguments for starting program"); + }; + + // Get timeout + let timeout = if let Some(timeout) = matches.get_one::("timeout") { + *timeout + } else { + 0 + }; + + // Get timeout dir + let timeout_dir = Path::new(output_dir).join("timeout"); + if fs::create_dir_all(&timeout_dir).is_err() { + bail!("Failed to create dir {}", &timeout_dir.to_str().unwrap()); + } + + // Init reports by inputs hashmap + let reports = Arc::new(Mutex::new(HashMap::new())); + // Get input file list + debug!("Get input file list"); + let mut inputs: Vec = vec![]; + // Do without paralleling to preserve the specified order + for input_dir in input_dirs { + for file in WalkDir::new(input_dir) + .into_iter() + .filter_map(|file| file.ok()) + { + if file.metadata().unwrap().is_file() { + let path = file.path().to_path_buf(); + inputs.push(path.clone()); + reports.lock().unwrap().insert(path, vec![]); + } + } + } + + // Get thread number + debug!("Get thread number"); + let jobs = if let Some(jobs) = matches.get_one::("jobs") { + *jobs as usize + } else { + std::cmp::max(1, num_cpus::get() / 2) + }; + let num_of_threads = jobs.min(inputs.len()).max(1); + let custom_pool = rayon::ThreadPoolBuilder::new() + .num_threads(num_of_threads) + .build() + .unwrap(); + + // Set ubsan env options + debug!("Set environment"); + std::env::set_var("UBSAN_OPTIONS", "print_stacktrace=1,report_error_type=1"); + + // Extract ubsan warnings + info!("Extracting ubsan warnings..."); + info!("Using {} threads", num_of_threads); + let warnings = Arc::new(Mutex::new(Vec::new())); + let warning_counter = Arc::new(Mutex::new(0)); + let timeout_counter = Arc::new(Mutex::new(0)); + custom_pool.install(|| { + inputs.par_iter().try_for_each(|input| { + extract_warnings( + input, + &argv, + &warnings, + &warning_counter, + timeout, + &timeout_dir, + &timeout_counter, + ) + }) + })?; + + info!( + "Number of ubsan warnings: {}", + warning_counter.lock().unwrap() + ); + + // Create report with equal parts for all reports + let mut pre_report = CrashReport::new(); + pre_report.executable_path = argv[0].to_string(); + let _ = pre_report.add_os_info(); + let _ = pre_report.add_proc_environ(); + + // Generate CASR reports + info!("Generating CASR reports..."); + let warnings = warnings.lock().unwrap(); + custom_pool.install(|| { + warnings.par_iter().try_for_each(|(input, warning)| { + gen_report(input, warning, &reports, &argv, &pre_report) + }) + })?; + + info!("Deduplicating CASR reports..."); + + // Init dedup report crash line list + let mut crashlines: Vec = Vec::new(); + // Go throw inputs + // Dedup reports by crashline + // Do without paralleling to preserve the specified order + for input in &inputs { + if let Some(report_by_input) = reports.lock().unwrap().get(input) { + for report in report_by_input { + if crashlines.contains(&report.crashline) { + continue; + } + crashlines.push(report.crashline.clone()); + // Save report + save_report(report, output_dir, input, &report.crashline)?; + } + } + } + info!( + "Number of ubsan reports after deduplication: {}", + crashlines.len() + ); + + Ok(()) +} diff --git a/casr/src/util.rs b/casr/src/util.rs index fc3de0a5..fd6edf8d 100644 --- a/casr/src/util.rs +++ b/casr/src/util.rs @@ -27,7 +27,7 @@ pub fn output_report(report: &CrashReport, matches: &ArgMatches, argv: &[&str]) // Convert report to string. let repstr = serde_json::to_string_pretty(&report).unwrap(); - if matches.get_flag("stdout") { + if matches.contains_id("stdout") && matches.get_flag("stdout") { println!("{repstr}\n"); } diff --git a/casr/tests/casr_tests/ubsan/input1/input b/casr/tests/casr_tests/ubsan/input1/input new file mode 100644 index 00000000..e69de29b diff --git a/casr/tests/casr_tests/ubsan/input1/input3/input b/casr/tests/casr_tests/ubsan/input1/input3/input new file mode 100644 index 00000000..e69de29b diff --git a/casr/tests/casr_tests/ubsan/input2/input b/casr/tests/casr_tests/ubsan/input2/input new file mode 100644 index 00000000..e69de29b diff --git a/casr/tests/casr_tests/ubsan/test_ubsan b/casr/tests/casr_tests/ubsan/test_ubsan new file mode 100755 index 00000000..a157b022 Binary files /dev/null and b/casr/tests/casr_tests/ubsan/test_ubsan differ diff --git a/casr/tests/casr_tests/ubsan/test_ubsan.cpp b/casr/tests/casr_tests/ubsan/test_ubsan.cpp new file mode 100644 index 00000000..fec2f06e --- /dev/null +++ b/casr/tests/casr_tests/ubsan/test_ubsan.cpp @@ -0,0 +1,8 @@ +#include + +int main() { + (void)(uint16_t(0xffff) * uint16_t(0x8001)); + int x = 1; + x / 0; + return 0; +} diff --git a/casr/tests/tests.rs b/casr/tests/tests.rs index 7a0ac9ea..06228404 100644 --- a/casr/tests/tests.rs +++ b/casr/tests/tests.rs @@ -17,6 +17,7 @@ lazy_static::lazy_static! { static ref EXE_CASR_LIBFUZZER: RwLock<&'static str> = RwLock::new(env!("CARGO_BIN_EXE_casr-libfuzzer")); static ref EXE_CASR_CLUSTER: RwLock<&'static str> = RwLock::new(env!("CARGO_BIN_EXE_casr-cluster")); static ref EXE_CASR_SAN: RwLock<&'static str> = RwLock::new(env!("CARGO_BIN_EXE_casr-san")); + static ref EXE_CASR_UBSAN: RwLock<&'static str> = RwLock::new(env!("CARGO_BIN_EXE_casr-ubsan")); static ref EXE_CASR_PYTHON: RwLock<&'static str> = RwLock::new(env!("CARGO_BIN_EXE_casr-python")); static ref EXE_CASR_GDB: RwLock<&'static str> = RwLock::new(env!("CARGO_BIN_EXE_casr-gdb")); static ref PROJECT_DIR: RwLock<&'static str> = RwLock::new(env!("CARGO_MANIFEST_DIR")); @@ -3500,6 +3501,101 @@ fn test_casr_afl() { let _ = fs::remove_file("/tmp/load_afl"); } +#[test] +fn test_casr_ubsan() { + // Copy files to tmp dir + let work_dir = abs_path("tests/casr_tests/ubsan"); + let test_dir = abs_path("tests/tmp_tests_casr/test_casr_ubsan"); + + let output = Command::new("cp") + .args(["-r", &work_dir, &test_dir]) + .output() + .expect("failed to copy dir"); + + assert!( + output.status.success(), + "Stdout {}.\n Stderr: {}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + let paths = [ + abs_path("tests/tmp_tests_casr/test_casr_ubsan/test_ubsan.cpp"), + abs_path("tests/tmp_tests_casr/test_casr_ubsan/test_ubsan"), + abs_path("tests/tmp_tests_casr/test_casr_ubsan/input1"), + abs_path("tests/tmp_tests_casr/test_casr_ubsan/input2"), + abs_path("tests/tmp_tests_casr/test_casr_ubsan/out"), + ]; + + // Create out dir + let output = Command::new("mkdir") + .arg(&paths[4]) + .output() + .expect("failed to create dir"); + + assert!( + output.status.success(), + "Stdout {}.\n Stderr: {}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + let clang = Command::new("bash") + .arg("-c") + .arg(format!( + "clang++ -fsanitize=undefined,fuzzer-no-link -O0 -g {} -o {}", + &paths[0], &paths[1] + )) + .status() + .expect("failed to execute clang++"); + + assert!(clang.success()); + + let output = Command::new(*EXE_CASR_UBSAN.read().unwrap()) + .args(["--input", &paths[2], &paths[3]]) + .args(["--output", &paths[4]]) + .args(["--", &paths[1], "@@"]) + .output() + .expect("failed to start casr-ubsan"); + + assert!( + output.status.success(), + "Stdout {}.\n Stderr: {}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + let res = String::from_utf8_lossy(&output.stderr); + + assert!(!res.is_empty()); + + let re = Regex::new(r"Number of ubsan warnings: (?P\d+)").unwrap(); + let casrep_cnt = re + .captures(&res) + .unwrap() + .name("casrep") + .map(|x| x.as_str()) + .unwrap() + .parse::() + .unwrap(); + + assert_eq!(casrep_cnt, 6, "Invalid number of warnings"); + + let re = Regex::new(r"Number of ubsan reports after deduplication: (?P\d+)").unwrap(); + let unique_cnt = re + .captures(&res) + .unwrap() + .name("unique") + .map(|x| x.as_str()) + .unwrap() + .parse::() + .unwrap(); + + assert_eq!(unique_cnt, 2, "Invalid number of deduplicated reports"); + + let _ = fs::remove_dir_all(&test_dir); +} + #[test] #[cfg(target_arch = "x86_64")] fn test_casr_libfuzzer() { diff --git a/docs/usage.md b/docs/usage.md index 408614d1..29b2ebe6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,10 +1,10 @@ # Usage CASR is a set of tools that allows you to collect crash reports in different -ways. Use `casr-core` binary to deal with coredumps. Use `casr-san` to analyze ASAN -reports. Try `casr-gdb` to get reports from gdb. `casr-cli` is meant to provide -TUI for viewing reports. Reports triage (deduplication, clustering) is done by -`casr-cluster`. +ways. Use `casr-core` binary to deal with coredumps. Use `casr-san` to analyze +ASAN reports or `casr-ubsan` to analyze UBSAN reports. Try `casr-gdb` to get +reports from gdb. `casr-cli` is meant to provide TUI for viewing reports. +Reports triage (deduplication, clustering) is done by `casr-cluster`. ## casr-gdb @@ -31,7 +31,7 @@ Example: ## casr-san -Create CASR reports (.casrep) from sanitizer reports +Create CASR reports (.casrep) from AddressSanitizer reports Usage: casr-san [OPTIONS] <--stdout|--output > [-- ...] @@ -56,6 +56,35 @@ Run casr-san: $ casr-san -o asan.casrep -- ./test_asan_df +## casr-ubsan + +Triage error found by UndefinedBehaviorSanitizer and create CASR reports (.casrep) + +Usage: casr-ubsan [OPTIONS] --input ... --output [-- ...] + +Arguments: + [ARGS]... Add "-- " to run + +Options: + -l, --log-level Logging level [default: info] [possible values: info, + debug] + -j, --jobs Number of parallel jobs for generating CASR reports + [default: half of cpu cores] + -t, --timeout Timeout (in seconds) for target execution [default: + disabled] + -i, --input ... Target input directory list + -o, --output Output directory with triaged reports + -h, --help Print help + -V, --version Print version + +Compile binary with UBSAN: + + $ clang++ -fsanitize=undefined,fuzzer-no-link -O0 -g test_ubsan.cpp -o test_ubsan + +Run casr-ubsan: + + $ casr-ubsan -i input -o output -- /test_ubsan @@ + ## casr-python Create CASR reports (.casrep) from python reports diff --git a/libcasr/src/lib.rs b/libcasr/src/lib.rs index 55d1270a..4e569278 100644 --- a/libcasr/src/lib.rs +++ b/libcasr/src/lib.rs @@ -31,3 +31,4 @@ pub mod report; pub mod rust; pub mod severity; pub mod stacktrace; +pub mod ubsan; diff --git a/libcasr/src/report.rs b/libcasr/src/report.rs index 652f233b..0f2f2492 100644 --- a/libcasr/src/report.rs +++ b/libcasr/src/report.rs @@ -184,6 +184,13 @@ pub struct CrashReport { )] #[cfg_attr(feature = "serde", serde(default))] pub asan_report: Vec, + /// Ubsan report. + #[cfg_attr( + feature = "serde", + serde(rename(serialize = "UbsanReport", deserialize = "UbsanReport")) + )] + #[cfg_attr(feature = "serde", serde(default))] + pub ubsan_report: Vec, /// Python report. #[cfg_attr( feature = "serde", @@ -666,6 +673,12 @@ impl fmt::Display for CrashReport { report += &(self.asan_report.join("\n") + "\n"); } + // UBSANreport + if !self.ubsan_report.is_empty() { + report += "\n===UbsanReport===\n"; + report += &(self.ubsan_report.join("\n") + "\n"); + } + // PythonReport if !self.python_report.is_empty() { report += "\n===PythonReport===\n"; diff --git a/libcasr/src/ubsan.rs b/libcasr/src/ubsan.rs new file mode 100644 index 00000000..5024ef19 --- /dev/null +++ b/libcasr/src/ubsan.rs @@ -0,0 +1,282 @@ +//! UndefinedBehaviorSanitizer module implements `Severity` and `CrashLineExt` traits for UndefinedBehaviorSanitizer warnings. +use crate::asan::AsanStacktrace; +use crate::severity::Severity; +use crate::stacktrace::{CrashLine, CrashLineExt}; +use crate::stacktrace::{ParseStacktrace, Stacktrace}; + +use crate::error::*; +use crate::execution_class::ExecutionClass; +use regex::Regex; + +use std::fmt; + +/// Structure provides an interface for parsing ubsan runtime error message. +#[derive(Clone)] +pub struct UbsanWarning { + message: String, +} + +impl fmt::Debug for UbsanWarning { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.message) + } +} + +impl UbsanWarning { + pub fn extract_stacktrace(&self) -> Result> { + AsanStacktrace::extract_stacktrace(&self.message) + } + pub fn parse_stacktrace(entries: &[String]) -> Result { + AsanStacktrace::parse_stacktrace(entries) + } + /// Get ubsan runtime error message as a line vector + pub fn ubsan_report(&self) -> Vec { + self.message.split('\n').map(|s| s.to_string()).collect() + } +} + +impl Severity for UbsanWarning { + fn severity(&self) -> Result { + let message: Vec = self + .message + .split('\n') + .map(|l| l.trim_end().to_string()) + .collect(); + // Get description (from first line) + let description = message.first().unwrap(); + let re = Regex::new(r#".+: runtime error: (.+)"#).unwrap(); + let Some(cap) = re.captures(description) else { + return Err(Error::Casr(format!( + "Couldn't parse error description: {description}" + ))); + }; + let description = cap.get(1).unwrap().as_str().to_string(); + // Get short description (from last line) + let short_description = message.last().unwrap(); + let re = Regex::new(r#"SUMMARY: UndefinedBehaviorSanitizer: (\S+)"#).unwrap(); + let Some(cap) = re.captures(short_description) else { + return Err(Error::Casr(format!( + "Couldn't parse ubsan summary: {short_description}" + ))); + }; + let short_description = cap.get(1).unwrap().as_str().to_string(); + + Ok(ExecutionClass::new(( + "NOT_EXPLOITABLE", + &short_description, + &description, + "", + ))) + } +} + +impl CrashLineExt for UbsanWarning { + fn crash_line(&self) -> Result { + let message: Vec = self + .message + .split('\n') + .map(|l| l.trim_end().to_string()) + .collect(); + let crashline: String; + let mut re = Regex::new(r#".+ (.+):(\d+):\d+"#).unwrap(); + // If there is no stacktrace use crashline from first string + // May be not absolute + // Else use first string from stacktrace + if message.len() == 2 || message.len() == 3 && message[2].contains(" note: ") { + crashline = message[0].clone(); + re = Regex::new(r#"(.+):(\d+):\d+ runtime error: "#).unwrap(); + } else if message[1].contains(" note: ") { + crashline = message[2].clone(); + } else { + crashline = message[1].clone(); + } + + // Get file path and offset from crashline + let Some(cap) = re.captures(&crashline) else { + return Err(Error::Casr(format!( + "Couldn't parse error crashline: {crashline}" + ))); + }; + let file = cap.get(1).unwrap().as_str().to_string(); + let offset = cap.get(2).unwrap().as_str().parse::(); + let Ok(offset) = offset else { + return Err(Error::Casr(format!( + "Couldn't parse crashline offset: {crashline}" + ))); + }; + Ok(CrashLine::Module { file, offset }) + } +} + +/// Extract ubsan warnings form stderr +/// +/// # Arguments +/// +/// * `stderr` - output containing ubsan warnings +/// +/// # Return value +/// +/// Ubsan warning struct vector +pub fn extract_ubsan_warnings(stderr: &str) -> Result> { + let mut ubsan_warnings: Vec = vec![]; + for index in stderr.match_indices("runtime error: ").map(|(i, _)| i) { + // Get ubsan warning start + let start = if let Some(start) = stderr[..index].rfind('\n') { + start + 1 + } else { + // Check if first line + if stderr.find('\n').unwrap() > index { + 0 + } else { + return Err(Error::Casr("Couldn't find ubsan warning start".to_string())); + } + }; + let Some(end) = stderr[index..].find("SUMMARY: UndefinedBehaviorSanitizer: ").map(|i| i + index) else { + return Err(Error::Casr( + "Couldn't find ubsan warning end line".to_string() + )); + }; + let end = if let Some(end) = stderr[end..].find('\n').map(|i| i + end) { + end + } else { + // Check if last line + if stderr.rfind('\n').unwrap() < end { + stderr.len() + } else { + return Err(Error::Casr("Couldn't find ubsan warning end".to_string())); + } + }; + let message = stderr[start..end].to_string(); + if message.split('\n').collect::>().len() < 2 { + return Err(Error::Casr(format!("Corrupted ubsan warning: {message}"))); + } + ubsan_warnings.push(UbsanWarning { message }); + } + Ok(ubsan_warnings) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ubsan_parse() { + let stderr = + "/tarantool/src/box/sql/vdbeaux.c:1417:6: runtime error: implicit conversion from type 'int' of value -8 (32-bit, signed) to type 'unsigned long' changed the value to 18446744073709551608 (64-bit, unsigned) + #0 0x14529af in sqlVdbeMakeReady /tarantool/src/box/sql/vdbeaux.c:1417:6 + #1 0xd94ff7 in sql_finish_coding /tarantool/src/box/sql/build.c:109:3 + #2 0x1291e28 in sql_code_ast /tarantool/src/box/sql/tokenize.c:506:3 + #3 0x128f24c in sqlRunParser /tarantool/src/box/sql/tokenize.c:585:2 + #4 0x10d6e5b in sql_stmt_compile /tarantool/src/box/sql/prepare.c:79:4 + #5 0xd01caf in sql_fuzz /tarantool/src/box/sql.c:1730:6 + #6 0x8ced0e in TestOneProtoInput(sql_query::SQLQuery const&) /tarantool/test/fuzz/sql_fuzzer/sql_fuzzer.cc:50:2 + #7 0x8ce0d9 in LLVMFuzzerTestOneInput /tarantool/test/fuzz/sql_fuzzer/sql_fuzzer.cc:38:1 + #8 0x7f4131 in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) /llvm-project-llvmorg-14.0.6/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:611:15 + #9 0x7de03c in fuzzer::RunOneTest(fuzzer::Fuzzer*, char const*, unsigned long) /llvm-project-llvmorg-14.0.6/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:324:6 + #10 0x7e3d8b in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /llvm-project-llvmorg-14.0.6/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:860:9 + #11 0x80d342 in main /llvm-project-llvmorg-14.0.6/compiler-rt/lib/fuzzer/FuzzerMain.cpp:20:10 + #12 0x7f296f4d7082 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x24082) (BuildId: 1878e6b475720c7c51969e69ab2d276fae6d1dee) + #13 0x7d895d in _start (/sql_fuzzer+0x7d895d) + +SUMMARY: UndefinedBehaviorSanitizer: implicit-integer-sign-change /tarantool/src/box/sql/vdbeaux.c:1417:6 in +Executed sql-out/corpus/7daf7545bad605f9ea192f6523d5427c757e56a4 in 66 ms +*** +*** NOTE: fuzzing was not performed, you have only +*** executed the target code on a fixed set of inputs. +*** +/tarantool/src/lib/small/include/small/lf_lifo.h:86:59: runtime error: applying non-zero offset 1 to null pointer + #0 0x3f6a87e in lf_lifo_push /tarantool/src/lib/small/include/small/lf_lifo.h:86:59 + #1 0x3f6a162 in slab_unmap /tarantool/src/lib/small/small/slab_arena.c:275:2 + #2 0x3ebb1da in slab_cache_destroy /tarantool/src/lib/small/small/slab_cache.c:213:4 + #3 0x3c1773d in cord_destroy /tarantool/src/lib/core/fiber.c:1704:2 + #4 0x3c26a42 in fiber_free /tarantool/src/lib/core/fiber.c:2040:2 + #5 0x8cd6fa in teardown() /tarantool/test/fuzz/sql_fuzzer/sql_fuzzer.cc:34:2 + #6 0x7f296fe8df6a (/lib64/ld-linux-x86-64.so.2+0x11f6a) (BuildId: 4587364908de169dec62ffa538170118c1c3a078) + #7 0x7f296f4f98a6 (/lib/x86_64-linux-gnu/libc.so.6+0x468a6) (BuildId: 1878e6b475720c7c51969e69ab2d276fae6d1dee) + #8 0x7f296f4f9a5f in exit (/lib/x86_64-linux-gnu/libc.so.6+0x46a5f) (BuildId: 1878e6b475720c7c51969e69ab2d276fae6d1dee) + #9 0x7e3f43 in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /llvm-project-llvmorg-14.0.6/compiler-rt/lib/fuzzer/FuzzerDriver.cpp + #10 0x80d342 in main /llvm-project-llvmorg-14.0.6/compiler-rt/lib/fuzzer/FuzzerMain.cpp:20:10 + #11 0x7f296f4d7082 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x24082) (BuildId: 1878e6b475720c7c51969e69ab2d276fae6d1dee) + #12 0x7d895d in _start (/sql_fuzzer+0x7d895d) + +SUMMARY: UndefinedBehaviorSanitizer: nullptr-with-nonzero-offset /tarantool/src/lib/small/include/small/lf_lifo.h:86:59 in"; + // Check warning extract + let warnings = extract_ubsan_warnings(stderr); + let Ok(warnings) = warnings else { + panic!("{}", warnings.err().unwrap()); + }; + assert_eq!(warnings.len(), 2); + + // Check warning + let warning = &warnings[0]; + assert_eq!(warning.ubsan_report().len(), 17); + + // Check stacktrace + let stacktrace = warning.extract_stacktrace(); + let Ok(stacktrace) = stacktrace else { + panic!("{}", stacktrace.err().unwrap()); + }; + assert_eq!(stacktrace.len(), 14); + + // Check severity + let execution_class = warning.severity(); + let Ok(execution_class) = execution_class else { + panic!("{}", execution_class.err().unwrap()); + }; + assert_eq!(execution_class.severity, "NOT_EXPLOITABLE"); + assert_eq!( + execution_class.short_description, + "implicit-integer-sign-change" + ); + assert_eq!( + execution_class.description, + "implicit conversion from type 'int' of value -8 (32-bit, signed) to type 'unsigned long' changed the value to 18446744073709551608 (64-bit, unsigned)" + ); + assert_eq!(execution_class.explanation, ""); + + // Check crashline + let crash_line = warning.crash_line(); + if let Ok(CrashLine::Module { file, offset }) = crash_line { + assert_eq!(file, "/tarantool/src/box/sql/vdbeaux.c"); + assert_eq!(offset, 1417); + } else { + panic!("{}", crash_line.err().unwrap()); + } + + // Check warning + let warning = &warnings[1]; + assert_eq!(warning.ubsan_report().len(), 16); + + // Check stacktrace + let stacktrace = warning.extract_stacktrace(); + let Ok(stacktrace) = stacktrace else { + panic!("{}", stacktrace.err().unwrap()); + }; + assert_eq!(stacktrace.len(), 13); + + // Check severity + let execution_class = warning.severity(); + let Ok(execution_class) = execution_class else { + panic!("{}", execution_class.err().unwrap()); + }; + assert_eq!(execution_class.severity, "NOT_EXPLOITABLE"); + assert_eq!( + execution_class.short_description, + "nullptr-with-nonzero-offset" + ); + assert_eq!( + execution_class.description, + "applying non-zero offset 1 to null pointer" + ); + assert_eq!(execution_class.explanation, ""); + + // Check crashline + let crash_line = warning.crash_line(); + if let Ok(CrashLine::Module { file, offset }) = crash_line { + assert_eq!(file, "/tarantool/src/lib/small/include/small/lf_lifo.h"); + assert_eq!(offset, 86); + } else { + panic!("{}", crash_line.err().unwrap()); + } + } +}