From 85d7b7fa06118a72b86aa7381f1ef4a080e2a169 Mon Sep 17 00:00:00 2001 From: hkctkuy Date: Wed, 13 Dec 2023 14:53:13 +0300 Subject: [PATCH] Fixes --- casr/src/bin/casr-cluster.rs | 33 +++++++++++++-------------------- casr/src/util.rs | 2 +- libcasr/src/stacktrace.rs | 20 ++++++++++---------- 3 files changed, 24 insertions(+), 31 deletions(-) diff --git a/casr/src/bin/casr-cluster.rs b/casr/src/bin/casr-cluster.rs index a3f96e4c..23da7615 100644 --- a/casr/src/bin/casr-cluster.rs +++ b/casr/src/bin/casr-cluster.rs @@ -61,7 +61,7 @@ fn make_clusters( } // Get casreps with stacktraces and crashlines - let (casreps, stacktraces, crashlines, badreports) = util::reports_from_dirs(casreps, jobs); + let (casreps, stacktraces, crashlines, badreports) = util::reports_from_paths(casreps, jobs); if !badreports.is_empty() { fs::create_dir_all(format!("{}/clerr", &outpath.display()))?; @@ -336,7 +336,7 @@ fn update_clusters( ) -> Result<(usize, usize, usize, usize, usize, usize)> { // Get new casreps let casreps = util::get_reports(newpath)?; - let (casreps, stacktraces, crashlines, _) = util::reports_from_dirs(casreps, jobs); + let (casreps, stacktraces, crashlines, _) = util::reports_from_paths(casreps, jobs); let casreps = casreps .iter() .zip(stacktraces.iter().zip(crashlines.iter())); @@ -346,12 +346,8 @@ fn update_clusters( .unwrap() .map(|path| path.unwrap().path()) .filter(|path| { - path.clone() - .file_name() - .unwrap() - .to_str() - .unwrap() - .starts_with("cl") + let name = path.file_name().unwrap().to_str().unwrap(); + name.starts_with("cl") && !name.starts_with("clerr") }) .collect(); cluster_dirs.sort(); @@ -363,15 +359,13 @@ fn update_clusters( // Get casreps from each existing cluster for cluster in &cluster_dirs { // Get cluster number - let Ok(i) = cluster.clone().file_name().unwrap().to_str().unwrap()[2..] + let i = cluster.clone().file_name().unwrap().to_str().unwrap()[2..] .to_string() .parse::() - else { - continue; - }; + .unwrap(); // Get casreps from cluster let casreps = util::get_reports(cluster)?; - let (_, stacktraces, crashlines, _) = util::reports_from_dirs(casreps, jobs); + let (_, stacktraces, crashlines, _) = util::reports_from_paths(casreps, jobs); // Fill cluster info structures clusters.push(Cluster::new(i, stacktraces)); if dedup { @@ -496,12 +490,8 @@ fn avg_sil(dir: &Path, jobs: usize) -> Result { .unwrap() .map(|path| path.unwrap().path()) .filter(|path| { - path.clone() - .file_name() - .unwrap() - .to_str() - .unwrap() - .starts_with("cl") + let name = path.file_name().unwrap().to_str().unwrap(); + name.starts_with("cl") && !name.starts_with("clerr") }) .collect(); dirs.sort(); @@ -519,12 +509,15 @@ fn avg_sil(dir: &Path, jobs: usize) -> Result { // Get casreps from cluster let casreps = util::get_reports(dir)?; // Get stacktraces from cluster - let (_, stacktraces, _, _) = util::reports_from_dirs(casreps, jobs); + let (_, stacktraces, _, _) = util::reports_from_paths(casreps, jobs); // Update size size += stacktraces.len(); // Add stacktraces clusters.push(stacktraces); } + if size == 0 { + bail!("{} valid reports, nothing to calculate...", size); + } // Init sil sum let mut sum = 0f64; // Calculate silhouette coefficient for each casrep diff --git a/casr/src/util.rs b/casr/src/util.rs index 8d0e5c38..d7646896 100644 --- a/casr/src/util.rs +++ b/casr/src/util.rs @@ -445,7 +445,7 @@ pub fn get_reports(dir: &Path) -> Result> { /// * A vector of reports stacktraces /// * A vector of reports crashlines /// * A vector of bad reports -pub fn reports_from_dirs( +pub fn reports_from_paths( casreps: Vec, jobs: usize, ) -> (Vec, Vec, Vec, Vec) { diff --git a/libcasr/src/stacktrace.rs b/libcasr/src/stacktrace.rs index c2f5d3e9..fc3a9e84 100644 --- a/libcasr/src/stacktrace.rs +++ b/libcasr/src/stacktrace.rs @@ -445,11 +445,11 @@ fn diam(stacktraces: &[Stacktrace]) -> f64 { /// "a" subcoefficient silhouette coefficient fn sil_subcoef_a(num: usize, stacktraces: &[Stacktrace]) -> f64 { let mut sum = 0f64; - for i in 0..stacktraces.len() { + for (i, stacktrace) in stacktraces.iter().enumerate() { if i == num { continue; } - sum += 1.0 - similarity(&stacktraces[num], &stacktraces[i]); + sum += 1.0 - similarity(&stacktraces[num], stacktrace); } sum / (stacktraces.len() - 1) as f64 } @@ -461,24 +461,24 @@ fn sil_subcoef_a(num: usize, stacktraces: &[Stacktrace]) -> f64 { /// /// * `num` - given stacktrace number /// -/// * `cl` - cluster number of given stacktrace +/// * `i` - cluster number of given stacktrace /// /// * `clusters` - a vector of clusters represented as slice of `Stacktrace` structures /// /// # Return value /// /// "b" subcoefficient silhouette coefficient -fn sil_subcoef_b(num: usize, cl: usize, clusters: &[Vec]) -> f64 { +fn sil_subcoef_b(num: usize, i: usize, clusters: &[Vec]) -> f64 { let mut min = MAX; - for j in 0..clusters.len() { - if j == cl { + for (j, cluster) in clusters.iter().enumerate() { + if j == i { continue; } let mut sum = 0f64; - for i in 0..clusters[j].len() { - sum += 1.0 - similarity(&clusters[cl][num], &clusters[j][i]); + for stacktrace in cluster { + sum += 1.0 - similarity(&clusters[i][num], stacktrace); } - let res = sum / clusters[j].len() as f64; + let res = sum / cluster.len() as f64; if res < min { min = res; } @@ -499,7 +499,7 @@ fn sil_subcoef_b(num: usize, cl: usize, clusters: &[Vec]) -> f64 { /// /// # Return value /// -/// "b" subcoefficient silhouette coefficient +/// Silhouette coefficient pub fn sil_coef(num: usize, i: usize, clusters: &[Vec]) -> f64 { if clusters[i].len() != 1 { let a = sil_subcoef_a(num, &clusters[i]);