From c40f319a4c65154e3fa42b5ef75ccb85310079e9 Mon Sep 17 00:00:00 2001 From: hkctkuy Date: Thu, 7 Dec 2023 15:43:13 +0300 Subject: [PATCH] Add strategy options --- casr/src/bin/casr-cluster.rs | 50 +++++++++++++++++++++++++++++++----- docs/usage.md | 8 ++++++ libcasr/src/stacktrace.rs | 10 ++++---- 3 files changed, 57 insertions(+), 11 deletions(-) diff --git a/casr/src/bin/casr-cluster.rs b/casr/src/bin/casr-cluster.rs index 75acd107..49113dc3 100644 --- a/casr/src/bin/casr-cluster.rs +++ b/casr/src/bin/casr-cluster.rs @@ -327,6 +327,8 @@ fn update_clusters( oldpath: &Path, jobs: usize, dedup: bool, + inner_strategy: AccumStrategy, + outer_strategy: AccumStrategy, ) -> Result<(usize, usize, usize, usize, usize, usize)> { // Get new casreps let casreps = util::get_reports(newpath)?; @@ -399,8 +401,8 @@ fn update_clusters( let relation = relation( stacktrace, cluster, - AccumStrategy::Dist, - AccumStrategy::Dist, + inner_strategy.clone(), + outer_strategy.clone(), ); match relation { Relation::Dup => { @@ -422,7 +424,7 @@ fn update_clusters( if dup { continue; } - // Get cluster with min measure + // Get cluster with min measure, a.k.a. "closest" one let number = if !inners.is_empty() { inners.iter().min_by(|a, b| a.1.total_cmp(&b.1)).unwrap().0 } else if !outers.is_empty() { @@ -556,6 +558,24 @@ fn main() -> Result<()> { "Update clusters from OLD_DIR using CASR reports from NEW_DIR.", ), ) + .arg( + Arg::new("inner-strategy") + .long("inner-strategy") + .value_name("STRATEGY") + .action(ArgAction::Set) + .value_parser(["Diam", "Dist"]) + .default_value("Dist") + .help("Strategy for inner cluster choosing when updating"), + ) + .arg( + Arg::new("outer-strategy") + .long("outer-strategy") + .value_name("STRATEGY") + .action(ArgAction::Set) + .value_parser(["Delta", "Diam", "Dist"]) + .default_value("Dist") + .help("Strategy for outer cluster choosing when updating"), + ) .arg( Arg::new("ignore") .long("ignore") @@ -633,12 +653,30 @@ fn main() -> Result<()> { } else if matches.contains_id("update") { let paths: Vec<&PathBuf> = matches.get_many::("update").unwrap().collect(); - let (added, duplicates, deduplicated, result, before, after) = - update_clusters(paths[0], paths[1], jobs, dedup_crashlines)?; + let inner_strategy = matches.get_one::("inner-strategy").unwrap(); + let inner_strategy = match inner_strategy.as_str() { + "Diam" => AccumStrategy::Diam, + _ => AccumStrategy::Dist, + }; + let outer_strategy = matches.get_one::("outer-strategy").unwrap(); + let outer_strategy = match outer_strategy.as_str() { + "Delta" => AccumStrategy::Delta, + "Diam" => AccumStrategy::Diam, + _ => AccumStrategy::Dist, + }; + + let (added, duplicates, deduplicated, result, before, after) = update_clusters( + paths[0], + paths[1], + jobs, + dedup_crashlines, + inner_strategy, + outer_strategy, + )?; println!("Number of casreps added to old clusters: {added}"); println!("Number of duplicates: {duplicates}"); if deduplicated != 0 { - println!("Number of casreps deduplicated by crashline"); + println!("Number of casreps deduplicated by crashline: {deduplicated}"); } if result != 0 { println!("Number of new clusters: {result}"); diff --git a/docs/usage.md b/docs/usage.md index 2646d9c2..5451d7d2 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -233,6 +233,14 @@ Tool for clustering CASR reports -m, --merge Merge INPUT_DIR into OUTPUT_DIR. Only new CASR reports from INPUT_DIR will be added to OUTPUT_DIR. + -u, --update + Update clusters from OLD_DIR using CASR reports from NEW_DIR. + --inner-strategy + Strategy for inner cluster choosing when updating [default: Dist] [possible + values: Diam, Dist] + --outer-strategy + Strategy for outer cluster choosing when updating [default: Dist] [possible + values: Delta, Diam, Dist] --ignore File with regular expressions for functions and file paths that should be ignored diff --git a/libcasr/src/stacktrace.rs b/libcasr/src/stacktrace.rs index ea05f40e..f05e4fee 100644 --- a/libcasr/src/stacktrace.rs +++ b/libcasr/src/stacktrace.rs @@ -39,7 +39,6 @@ lazy_static::lazy_static! { const THRESHOLD: f64 = 0.3; /// Relation between a CASR report and a cluster -#[derive(Clone, Debug)] pub enum Relation { /// The CASR report is a duplicate of one from cluster Dup, @@ -52,11 +51,12 @@ pub enum Relation { } /// Cluster accumulation strategy +#[derive(Clone, Debug)] pub enum AccumStrategy { + /// Argmin (diam (cluster + {new}) - diam (cluster)) + Delta, /// Argmin diam (cluster + {new}) Diam, - /// Argmin (diam (cluster + {new}) - diam (cluster)) - DiamDelta, /// Argmin dist (cluster, {new}) Dist, } @@ -394,7 +394,7 @@ pub fn relation( if diam >= max { // Inner let rel = match inner_strategy { - // DiamDelta is a nonsensical strategy in this case + // Delta is a nonsensical strategy in this case AccumStrategy::Diam => diam, _ => min, }; @@ -403,7 +403,7 @@ pub fn relation( // Outer let rel = match outer_strategy { AccumStrategy::Diam => max, - AccumStrategy::DiamDelta => max - diam, + AccumStrategy::Delta => max - diam, AccumStrategy::Dist => min, }; Relation::Outer(rel)