Skip to content

Commit

Permalink
Add strategy options
Browse files Browse the repository at this point in the history
  • Loading branch information
hkctkuy authored and hkctkuy committed Dec 7, 2023
1 parent 243708d commit c40f319
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 11 deletions.
50 changes: 44 additions & 6 deletions casr/src/bin/casr-cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,8 @@ fn update_clusters(
oldpath: &Path,
jobs: usize,
dedup: bool,
inner_strategy: AccumStrategy,
outer_strategy: AccumStrategy,
) -> Result<(usize, usize, usize, usize, usize, usize)> {
// Get new casreps
let casreps = util::get_reports(newpath)?;
Expand Down Expand Up @@ -399,8 +401,8 @@ fn update_clusters(
let relation = relation(
stacktrace,
cluster,
AccumStrategy::Dist,
AccumStrategy::Dist,
inner_strategy.clone(),
outer_strategy.clone(),
);
match relation {
Relation::Dup => {
Expand All @@ -422,7 +424,7 @@ fn update_clusters(
if dup {
continue;
}
// Get cluster with min measure
// Get cluster with min measure, a.k.a. "closest" one
let number = if !inners.is_empty() {
inners.iter().min_by(|a, b| a.1.total_cmp(&b.1)).unwrap().0
} else if !outers.is_empty() {
Expand Down Expand Up @@ -556,6 +558,24 @@ fn main() -> Result<()> {
"Update clusters from OLD_DIR using CASR reports from NEW_DIR.",
),
)
.arg(
Arg::new("inner-strategy")
.long("inner-strategy")
.value_name("STRATEGY")
.action(ArgAction::Set)
.value_parser(["Diam", "Dist"])
.default_value("Dist")
.help("Strategy for inner cluster choosing when updating"),
)
.arg(
Arg::new("outer-strategy")
.long("outer-strategy")
.value_name("STRATEGY")
.action(ArgAction::Set)
.value_parser(["Delta", "Diam", "Dist"])
.default_value("Dist")
.help("Strategy for outer cluster choosing when updating"),
)
.arg(
Arg::new("ignore")
.long("ignore")
Expand Down Expand Up @@ -633,12 +653,30 @@ fn main() -> Result<()> {
} else if matches.contains_id("update") {
let paths: Vec<&PathBuf> = matches.get_many::<PathBuf>("update").unwrap().collect();

let (added, duplicates, deduplicated, result, before, after) =
update_clusters(paths[0], paths[1], jobs, dedup_crashlines)?;
let inner_strategy = matches.get_one::<String>("inner-strategy").unwrap();
let inner_strategy = match inner_strategy.as_str() {
"Diam" => AccumStrategy::Diam,
_ => AccumStrategy::Dist,
};
let outer_strategy = matches.get_one::<String>("outer-strategy").unwrap();
let outer_strategy = match outer_strategy.as_str() {
"Delta" => AccumStrategy::Delta,
"Diam" => AccumStrategy::Diam,
_ => AccumStrategy::Dist,
};

let (added, duplicates, deduplicated, result, before, after) = update_clusters(
paths[0],
paths[1],
jobs,
dedup_crashlines,
inner_strategy,
outer_strategy,
)?;
println!("Number of casreps added to old clusters: {added}");
println!("Number of duplicates: {duplicates}");
if deduplicated != 0 {
println!("Number of casreps deduplicated by crashline");
println!("Number of casreps deduplicated by crashline: {deduplicated}");
}
if result != 0 {
println!("Number of new clusters: {result}");
Expand Down
8 changes: 8 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,14 @@ Tool for clustering CASR reports
-m, --merge <INPUT_DIR> <OUTPUT_DIR>
Merge INPUT_DIR into OUTPUT_DIR. Only new CASR reports from INPUT_DIR will be
added to OUTPUT_DIR.
-u, --update <NEW_DIR> <OLD_DIR>
Update clusters from OLD_DIR using CASR reports from NEW_DIR.
--inner-strategy <STRATEGY>
Strategy for inner cluster choosing when updating [default: Dist] [possible
values: Diam, Dist]
--outer-strategy <STRATEGY>
Strategy for outer cluster choosing when updating [default: Dist] [possible
values: Delta, Diam, Dist]
--ignore <FILE>
File with regular expressions for functions and file paths that should be
ignored
Expand Down
10 changes: 5 additions & 5 deletions libcasr/src/stacktrace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ lazy_static::lazy_static! {
const THRESHOLD: f64 = 0.3;

/// Relation between a CASR report and a cluster
#[derive(Clone, Debug)]
pub enum Relation {
/// The CASR report is a duplicate of one from cluster
Dup,
Expand All @@ -52,11 +51,12 @@ pub enum Relation {
}

/// Cluster accumulation strategy
#[derive(Clone, Debug)]
pub enum AccumStrategy {
/// Argmin (diam (cluster + {new}) - diam (cluster))
Delta,
/// Argmin diam (cluster + {new})
Diam,
/// Argmin (diam (cluster + {new}) - diam (cluster))
DiamDelta,
/// Argmin dist (cluster, {new})
Dist,
}
Expand Down Expand Up @@ -394,7 +394,7 @@ pub fn relation(
if diam >= max {
// Inner
let rel = match inner_strategy {
// DiamDelta is a nonsensical strategy in this case
// Delta is a nonsensical strategy in this case
AccumStrategy::Diam => diam,
_ => min,
};
Expand All @@ -403,7 +403,7 @@ pub fn relation(
// Outer
let rel = match outer_strategy {
AccumStrategy::Diam => max,
AccumStrategy::DiamDelta => max - diam,
AccumStrategy::Delta => max - diam,
AccumStrategy::Dist => min,
};
Relation::Outer(rel)
Expand Down

0 comments on commit c40f319

Please sign in to comment.