From 126c3617077dcce9fa38dd6f7ac523acba98a933 Mon Sep 17 00:00:00 2001 From: fedotoff Date: Wed, 26 Jul 2023 20:39:26 +0300 Subject: [PATCH] sarif module to convert CrashReport into SARIF format. --- libcasr/src/lib.rs | 2 + libcasr/src/sarif.rs | 346 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 348 insertions(+) create mode 100644 libcasr/src/sarif.rs diff --git a/libcasr/src/lib.rs b/libcasr/src/lib.rs index 0ec4b438..f4622873 100644 --- a/libcasr/src/lib.rs +++ b/libcasr/src/lib.rs @@ -32,6 +32,8 @@ pub mod java; pub mod python; pub mod report; pub mod rust; +#[cfg(feature = "serde")] +pub mod sarif; pub mod severity; pub mod stacktrace; pub mod ubsan; diff --git a/libcasr/src/sarif.rs b/libcasr/src/sarif.rs new file mode 100644 index 00000000..5a6ae81b --- /dev/null +++ b/libcasr/src/sarif.rs @@ -0,0 +1,346 @@ +//! Sarif module contains `Sarif` struct that countains multiple `CrashReport` +//! structs in SARIF format. + +use crate::constants::*; +use crate::error::{Error, Result}; +use crate::execution_class::{ExecutionClass, CLASSES}; +use crate::init_ignored_frames; +use crate::report::CrashReport; +use crate::stacktrace::{STACK_FRAME_FUNCTION_IGNORE_REGEXES, STACK_FRAME_FILEPATH_IGNORE_REGEXES}; + +use serde_json::{Map, Value}; + +use std::path::Path; + +/// CASR CrashReports in SARIF format. +#[derive(Clone, Debug, Default)] +pub struct SarifReport { + /// SARIF json. + pub json: Value, + /// current rule id for generated ExecutionClasses. + id: u64, +} + +impl SarifReport { + /// Create new `SarifReport` with blank required fields + pub fn new() -> Self { + let mut map = Map::new(); + map.insert( + "$schema".to_string(), + Value::String("https://json.schemastore.org/sarif-2.1.0.json".to_string()), + ); + map.insert("version".to_string(), Value::String("2.1.0".to_string())); + let mut runs = Vec::new(); + let mut run = Map::new(); + let mut tool = Map::new(); + let mut driver = Map::new(); + driver.insert("name".to_string(), Value::String("CASR".to_string())); + driver.insert("rules".to_string(), Value::Array(Vec::new())); + tool.insert("driver".to_string(), Value::Object(driver)); + run.insert("tool".to_string(), Value::Object(tool)); + run.insert("results".to_string(), Value::Array(Vec::new())); + runs.push(Value::Object(run)); + map.insert("runs".to_string(), Value::Array(runs)); + Self { + json: Value::Object(map), + id: 0, + } + } + + + /// Set name for SARIF tool:driver. + /// NOTE: before use this method, + /// use SarifReprot::new() to get report. + /// + /// # Arguments + /// + /// * 'name' - tool:driver name (default is CASR) + pub fn set_name(&mut self, name: &str) { + let driver = self.json.as_object_mut().unwrap()["runs"] + .as_array_mut() + .unwrap()[0] + .as_object_mut() + .unwrap()["tool"] + .as_object_mut() + .unwrap()["driver"] + .as_object_mut() + .unwrap(); + driver.insert("name".to_string(), Value::String(name.to_string())); + } + + /// SARIF rule from ExecutionClass. + /// NOTE: before using this method, + /// use SarifReprot::new() to get report. + /// + /// # Arguments + /// + /// * 'class' - ExecutionClass from CrashReport + /// + /// # Return + /// + /// If Rule is new, than it's json object is returned and ruleId + /// else ruleId only is returned. + fn rule(&self, class: &ExecutionClass) -> (Option, String) { + let rule_id = if let Some(pos) = CLASSES + .iter() + .position(|item| item.1 == class.short_description) + { + format!("F{pos}") + } else { + let s = format!("G{}", self.id); + s + }; + + let rules = self.json.as_object().unwrap()["runs"] + .as_array() + .unwrap()[0] + .as_object() + .unwrap()["tool"] + .as_object() + .unwrap()["driver"] + .as_object() + .unwrap()["rules"] + .as_array() + .unwrap(); + if rules + .iter() + .any(|r| r.as_object().unwrap()["name"].as_str().unwrap() == class.short_description) + { + // ExecutionClass is already added. + return (None, rule_id); + } + let mut rule = Map::new(); + rule.insert("id".to_string(), Value::String(rule_id.clone())); + rule.insert( + "name".to_string(), + Value::String(class.short_description.clone()), + ); + let mut short_desc = Map::new(); + short_desc.insert("text".to_string(), Value::String(class.description.clone())); + rule.insert( + "shortDescription".to_string(), + Value::Object(short_desc.clone()), + ); + let mut full_desc = Map::new(); + full_desc.insert("text".to_string(), Value::String(class.explanation.clone())); + rule.insert( + "fullDescription".to_string(), + Value::Object(full_desc.clone()), + ); + let mut properties = Map::new(); + properties.insert( + "problem.severity".to_string(), + Value::String(class.severity.clone()), + ); + rule.insert("properties".to_string(), Value::Object(properties)); + + return (Some(Value::Object(rule)), rule_id); + } + + /// Add CASR CrashReport to SARIF report. + /// NOTE: before using this method, + /// use SarifReprot::new() to get report. + /// + /// # Arguments + /// + /// * 'report' - CrashReport + /// + /// * 'source_root' - Path to source root directory. + pub fn add_casr_report>(&mut self, report: &CrashReport, source_root: T) -> Result<()> { + let (rule, rule_id) = self.rule(&report.execution_class); + if let Some(rule) = rule { + let rules = self.json.as_object_mut().unwrap()["runs"] + .as_array_mut() + .unwrap()[0] + .as_object_mut() + .unwrap()["tool"] + .as_object_mut() + .unwrap()["driver"] + .as_object_mut() + .unwrap()["rules"] + .as_array_mut() + .unwrap(); + rules.push(rule); + if rule_id.starts_with("G") { + self.id += 1; + } + } + + let results = self.json.as_object_mut().unwrap()["runs"] + .as_array_mut() + .unwrap()[0] + .as_object_mut() + .unwrap()["results"] + .as_array_mut() + .unwrap(); + let mut result = Map::new(); + result.insert( + "ruleId".to_string(), + Value::String(rule_id), + ); + result.insert("level".to_string(), Value::String("error".to_string())); + let mut message = Map::new(); + let text = if !report.stdin.is_empty() { + format!("{} < {}", report.proc_cmdline, report.stdin) + } else { + report.proc_cmdline.clone() + }; + message.insert("text".to_string(), Value::String(text)); + result.insert("message".to_string(), Value::Object(message)); + let mut locations: Vec = Vec::new(); + let mut location = Map::new(); + let mut physical_loc = Map::new(); + let mut artifact_loc = Map::new(); + let mut region = Map::new(); + let parts: Vec<_> = report.crashline.split(':').map(|s| s.to_string()).collect(); + if parts.len() != 2 && parts.len() !=3 { + return Err(Error::Casr(format!("Unable to parse crashline: {}", report.crashline))); + } + + let source_path = Path::new(&parts[0]); + let norm_source_path = if let Ok(norm_source_path) = source_path.strip_prefix(source_root.as_ref()) { + norm_source_path + } else { + source_path + }; + + artifact_loc.insert("uri".to_string(), Value::String(norm_source_path.display().to_string())); + artifact_loc.insert("uriBaseId".to_string(), Value::String("%SRCROOT%".to_string())); + physical_loc.insert("artifactLocation".to_string(), Value::Object(artifact_loc)); + region.insert("startLine".to_string(), Value::String(parts[1].clone())); + if parts.len() == 3 { + region.insert("startColumn".to_string(), Value::String(parts[2].clone())); + } + physical_loc.insert("region".to_string(), Value::Object(region)); + location.insert("physicalLocation".to_string(), Value::Object(physical_loc)); + locations.push(Value::Object(location)); + + result.insert("locations".to_string(), Value::Array(locations)); + + let mut frames: Vec = Vec::new(); + init_ignored_frames!("cpp", "rust", "python", "go", "java"); + let stacktrace = report.filtered_stacktrace()?; + for entry in stacktrace { + let mut location = Map::new(); + let mut physical_loc = Map::new(); + let mut artifact_loc = Map::new(); + let mut region = Map::new(); + + if entry.debug.file.is_empty() || entry.debug.line == 0 { + continue; + } + + let source_path = Path::new(&entry.debug.file); + let norm_source_path = if let Ok(norm_source_path) = source_path.strip_prefix(source_root.as_ref()) { + norm_source_path + } else { + source_path + }; + artifact_loc.insert("uri".to_string(), Value::String(norm_source_path.display().to_string())); + artifact_loc.insert("uriBaseId".to_string(), Value::String("%SRCROOT%".to_string())); + physical_loc.insert("artifactLocation".to_string(), Value::Object(artifact_loc)); + region.insert("startLine".to_string(), Value::String(entry.debug.line.to_string())); + if entry.debug.column != 0 { + region.insert("startColumn".to_string(), Value::String(entry.debug.column.to_string())); + } + physical_loc.insert("region".to_string(), Value::Object(region)); + location.insert("physicalLocation".to_string(), Value::Object(physical_loc)); + frames.push(Value::Object(location)); + } + + result.insert("frames".to_string(), Value::Array(frames)); + results.push(Value::Object(result)); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sarif() { + let mut sarif = SarifReport::new(); + let data = "{\n\ + \"ProcCmdline\": \"/home/avgor46/test_threads_casr/xlnt/load_sydr /home/avgor46/test_threads_casr/xlnt/out/crashes/crash-b15d6963751a2d36f401d36abaaba7e1874f6f63\",\n\ + \"CrashSeverity\": {\n\ + \"Type\": \"NOT_EXPLOITABLE\",\n\ + \"ShortDescription\": \"SourceAv\",\n\ + \"Description\": \"Access violation on source operand\",\n\ + \"Explanation\": \"The target crashed on an access violation at an address matching the source operand of the current instruction. This likely indicates a read access violation.\"\n\ + },\n\ + \"Stacktrace\": [\n\ + \"#0 0x00000000005e3099 in xlnt::detail::compound_document::read_directory (this=0x7fffffffcee0) at /xlnt/source/detail/cryptography/compound_document.cpp:975\",\n\ + \"#1 0x00000000005e2956 in xlnt::detail::compound_document::compound_document (this=0x7fffffffcee0, in=...) at /xlnt/source/detail/cryptography/compound_document.cpp:517\",\n\ + \"#3 0x000000000048a2d9 in xlnt::detail::decrypt_xlsx (data=std::vector of length 3995, capacity 4096 = {...}, password=) at /xlnt/source/detail/cryptography/xlsx_crypto_consumer.cpp:339\",\n\ + \"#4 0x000000000048a7f6 in xlnt::detail::xlsx_consumer::read (this=0x7fffffffd8f0, source=..., password=) at /xlnt/source/detail/cryptography/xlsx_crypto_consumer.cpp:345\",\n\ + \"#5 0x000000000040ddd6 in xlnt::workbook::load (this=0x7fffffffdbc8, stream=...) at /xlnt/source/workbook/workbook.cpp:901\",\n\ + \"#6 0x00000000004142af in xlnt::workbook::load (this=0x7fffffffdbc8, data=std::vector of length 3995, capacity 3995 = {...}) at /xlnt/source/workbook/workbook.cpp:919\"\n\ + ],\n\ + \"CrashLine\": \"/xlnt/source/detail/cryptography/compound_document.cpp:975\"\n\ + }\n"; + + let report: CrashReport = serde_json::from_str(data).unwrap(); + assert!(sarif.add_casr_report(&report, "/xlnt").is_ok()); + let rule = sarif.json.as_object().unwrap()["runs"] + .as_array() + .unwrap()[0] + .as_object() + .unwrap()["tool"] + .as_object() + .unwrap()["driver"] + .as_object() + .unwrap()["rules"] + .as_array() + .unwrap()[0] + .as_object() + .unwrap(); + assert_eq!(rule["name"].as_str().unwrap(), "SourceAv"); + assert_eq!(rule["id"].as_str().unwrap(), "F11"); + + let location = sarif.json.as_object().unwrap()["runs"] + .as_array() + .unwrap()[0] + .as_object() + .unwrap()["results"] + .as_array() + .unwrap()[0] + .as_object() + .unwrap()["locations"] + .as_array() + .unwrap()[0] + .as_object() + .unwrap()["physicalLocation"] + .as_object() + .unwrap(); + + let artifact_loc = location["artifactLocation"] + .as_object() + .unwrap(); + + assert_eq!(artifact_loc["uri"].as_str().unwrap(), "source/detail/cryptography/compound_document.cpp"); + assert_eq!(location["region"].as_object().unwrap()["startLine"].as_str().unwrap(), "975"); + let location = sarif.json.as_object().unwrap()["runs"] + .as_array() + .unwrap()[0] + .as_object() + .unwrap()["results"] + .as_array() + .unwrap()[0] + .as_object() + .unwrap()["frames"] + .as_array() + .unwrap()[4] + .as_object() + .unwrap()["physicalLocation"] + .as_object() + .unwrap(); + + let artifact_loc = location["artifactLocation"] + .as_object() + .unwrap(); + + assert_eq!(artifact_loc["uri"].as_str().unwrap(), "source/workbook/workbook.cpp"); + assert_eq!(location["region"].as_object().unwrap()["startLine"].as_str().unwrap(), "901"); + } +}