From 6b0eb622396b46502a492b9980092f1df7564c78 Mon Sep 17 00:00:00 2001 From: hkctkuy Date: Fri, 22 Dec 2023 19:01:01 +0300 Subject: [PATCH] Add Soft level --- casr/src/bin/casr-cluster.rs | 75 +++++++++++++++++++++++++++++++++++- libcasr/src/stacktrace.rs | 46 +++++++++++++++++++--- 2 files changed, 114 insertions(+), 7 deletions(-) diff --git a/casr/src/bin/casr-cluster.rs b/casr/src/bin/casr-cluster.rs index 1077f8d4..fabe04d6 100644 --- a/casr/src/bin/casr-cluster.rs +++ b/casr/src/bin/casr-cluster.rs @@ -451,7 +451,18 @@ fn update_clusters( // Handle deviant casreps let (result, before, after) = if !deviants.is_empty() { // Get clusters from deviants - let (deviant_clusters, before, after) = gen_clusters(&deviants, max, dedup)?; + let (mut deviant_clusters, before, mut after) = gen_clusters(&deviants, max, dedup)?; + if let ToleranceLevel::Soft = tolerance_level { + // Merge old and new clusters + let removed = if let Ok(removed) = + merge_clusters(&mut clusters, &mut deviant_clusters, oldpath, dedup) + { + removed + } else { + 0 + }; + after -= removed; + } // Save deviant clusters util::save_clusters(&deviant_clusters, oldpath)?; (deviant_clusters.len(), before, after) @@ -461,6 +472,68 @@ fn update_clusters( Ok((added, duplicates, deduplicated, result, before, after)) } +/// Try to merge new clusters to old clusters +/// +/// # Arguments +/// +/// * `olds` - list of old clusters represented as `HashMap` of `Cluster` +/// +/// * `news` - list of new clusters represented as `HashMap` of `Cluster` +/// +/// * `dir` - out directory +/// +/// * `dedup` - deduplicate crashline, if true +/// +/// # Return value +/// +/// Number of removed by crashline deduplication CASR reports +pub fn merge_clusters( + olds: &mut HashMap, + news: &mut HashMap, + dir: &Path, + dedup: bool, +) -> Result { + let mut duplicate = 0usize; + for old in olds.values_mut() { + let mut merged = Vec::new(); + for new in news.values() { + if !old.may_merge(new) { + continue; + } + // Copy casreps from new to old + for (casrep, stacktrace, crashline) in new.reports() { + // Update cluster (and dedup crashline) + if !old.insert( + casrep.to_path_buf(), + stacktrace.to_vec(), + crashline.to_string(), + dedup, + ) { + duplicate += 1; + continue; + } + // Save report + fs::copy( + &casrep, + format!( + "{}/cl{}/{}", + &dir.display(), + old.number, + &casrep.file_name().unwrap().to_str().unwrap() + ), + )?; + } + // Mark merged cluster for drop + merged.push(new.number); + } + // Drop marked cluster + for number in merged { + news.remove(&number); + } + } + Ok(duplicate) +} + /// Calculate silhouette coefficient /// /// # Arguments diff --git a/libcasr/src/stacktrace.rs b/libcasr/src/stacktrace.rs index 65446363..793e9b63 100644 --- a/libcasr/src/stacktrace.rs +++ b/libcasr/src/stacktrace.rs @@ -85,7 +85,7 @@ pub struct Cluster { /// Cluster diameter diam: Option, /// Cluster report crashlines - crashlines: HashSet, + crashlines: HashMap, } impl Cluster { @@ -96,8 +96,10 @@ impl Cluster { stacktraces: Vec, crashlines: Vec, ) -> Self { - let mut unique_crashlines: HashSet = HashSet::new(); - unique_crashlines.extend(crashlines); + let mut unique_crashlines: HashMap = HashMap::new(); + for (i, crashline) in crashlines.iter().enumerate().take(crashlines.len()) { + unique_crashlines.insert(crashline.clone(), i); + } Cluster { number, paths, @@ -135,12 +137,14 @@ impl Cluster { crashline: String, dedup: bool, ) -> bool { - if dedup && !crashline.is_empty() && !self.crashlines.insert(crashline.to_string()) { + if dedup && !crashline.is_empty() && self.crashlines.contains_key(&crashline) { return false; } self.paths.push(path); self.stacktraces.push(stacktrace); self.diam = None; + self.crashlines + .insert(crashline.to_string(), self.paths.len()); true } /// Get cluster diameter @@ -204,10 +208,40 @@ impl Cluster { Relation::Outer(rel) } } + /// Check if cluster may be merged with another one + pub fn may_merge(&self, cluster: &Cluster) -> bool { + let mut stacktraces1 = self.stacktraces.clone(); + let mut stacktraces2 = cluster.stacktraces().clone(); + stacktraces1.append(&mut stacktraces2); + diam(&stacktraces1) < THRESHOLD + } + // TODO: change type + /// Convert cluster to iterator + pub fn reports(&self) -> Vec<(PathBuf, Stacktrace, String)> { + let mut reports: Vec<(PathBuf, Stacktrace, String)> = Vec::new(); + let mut crashlines = self.crashlines.clone(); + for i in 0..self.paths.len() { + // Get crashline for cur casrep + let mut crashline = String::new(); + for (line, &number) in &crashlines { + if number == i { + crashline = line.to_string(); + break; + } + } + // Drop cur crashline from crashlines + crashlines.remove(&crashline); + // Update results + reports.push(( + self.paths[i].clone(), + self.stacktraces[i].clone(), + crashline, + )); + } + reports + } } -// TODO: Write a better description... -// NOTE: It's just interlayer between `Cluster` and `cluster_stacktrace` fn /// Generate clusters from CASR report info /// /// # Arguments