Skip to content

Commit

Permalink
Fix sil
Browse files Browse the repository at this point in the history
  • Loading branch information
hkctkuy authored and hkctkuy committed Dec 8, 2023
1 parent a5b58a8 commit b80750d
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 22 deletions.
21 changes: 12 additions & 9 deletions casr/src/bin/casr-cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ fn update_clusters(
.zip(stacktraces.iter().zip(crashlines.iter()));

// Get casreps from existing clusters
let cluster_dirs: Vec<PathBuf> = fs::read_dir(oldpath)
let mut cluster_dirs: Vec<PathBuf> = fs::read_dir(oldpath)
.unwrap()
.map(|path| path.unwrap().path())
.filter(|path| {
Expand All @@ -350,6 +350,7 @@ fn update_clusters(
.starts_with("cl")
})
.collect();
cluster_dirs.sort();
let len = cluster_dirs.len();
// Init clusters vector
let mut clusters: Vec<Cluster> = Vec::new();
Expand Down Expand Up @@ -490,7 +491,7 @@ fn update_clusters(
/// Silhouette coefficient
fn get_sil(dir: &Path, jobs: usize) -> Result<f64> {
// Get cluster dirs
let dirs: Vec<PathBuf> = fs::read_dir(dir)
let mut dirs: Vec<PathBuf> = fs::read_dir(dir)
.unwrap()
.map(|path| path.unwrap().path())
.filter(|path| {
Expand All @@ -502,6 +503,7 @@ fn get_sil(dir: &Path, jobs: usize) -> Result<f64> {
.starts_with("cl")
})
.collect();
dirs.sort();

if dirs.len() < 2 {
bail!("{} valid cluster, nothing to calculate...", dirs.len());

Check warning on line 509 in casr/src/bin/casr-cluster.rs

View check run for this annotation

Codecov / codecov/patch

casr/src/bin/casr-cluster.rs#L509

Added line #L509 was not covered by tests
Expand All @@ -525,9 +527,10 @@ fn get_sil(dir: &Path, jobs: usize) -> Result<f64> {
// Init sil sum
let mut sum = 0f64;
// Calculate silhouette coefficient for each casrep
for i in 0..clusters.len() - 1 {
for num in 0..clusters[i].len() - 1 {
sum += sil_coef(num, i, &clusters);
for i in 0..clusters.len() {
for num in 0..clusters[i].len() {
let sil = sil_coef(num, i, &clusters);
sum += sil;
}
}
Ok(sum / size as f64)
Expand Down Expand Up @@ -629,8 +632,8 @@ fn main() -> Result<()> {
.help("Strategy for outer cluster choosing when updating"),
)
.arg(
Arg::new("estimation")
.long("estimation")
Arg::new("estimate")
.long("estimate")
.value_name("DIR")
.action(ArgAction::Set)
.value_parser(clap::value_parser!(PathBuf))
Expand Down Expand Up @@ -748,8 +751,8 @@ fn main() -> Result<()> {
}
let sil = get_sil(paths[1], jobs)?;
println!("Cluster silhouette index: {sil}");
} else if matches.contains_id("estimation") {
let path: &PathBuf = matches.get_one::<PathBuf>("estimation").unwrap();
} else if matches.contains_id("estimate") {
let path: &PathBuf = matches.get_one::<PathBuf>("estimate").unwrap();
let sil = get_sil(path, jobs)?;
println!("Cluster silhouette index: {sil}");
}
Expand Down
87 changes: 87 additions & 0 deletions casr/tests/casr_tests/casrep/test_clustering_small/40.casrep
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
{
"Date": "2021-07-14T19:56:09.276635+03:00",
"Uname": "Linux titanfall 5.8.0-59-generic #66~20.04.1-Ubuntu SMP Thu Jun 17 11:14:10 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux",
"OS": "Ubuntu",
"OSRelease": "20.04",
"Architecture": "amd64",
"ExecutablePath": "/usr/local/bin/tiff2pdf",
"ProcCmdline": "tiff2pdf ./fuz3tiff2pdf/main/crashes/id:000009,sig:06,src:000040+000049,time:43718,op:splice,rep:4",
"ProcMaps": [
" 0x555555554000 0x555555556000 0x2000 0x0 /usr/local/bin/tiff2pdf",
" 0x555555556000 0x555555561000 0xb000 0x2000 /usr/local/bin/tiff2pdf",
" 0x555555561000 0x555555565000 0x4000 0xd000 /usr/local/bin/tiff2pdf",
" 0x555555565000 0x555555566000 0x1000 0x10000 /usr/local/bin/tiff2pdf",
" 0x555555566000 0x555555567000 0x1000 0x11000 /usr/local/bin/tiff2pdf",
" 0x555555567000 0x555555588000 0x21000 0x0 [heap]",
" 0x7ffff7945000 0x7ffff7949000 0x4000 0x0 ",
" 0x7ffff7949000 0x7ffff7958000 0xf000 0x0 /usr/lib/x86_64-linux-gnu/libm-2.31.so",
" 0x7ffff7958000 0x7ffff79ff000 0xa7000 0xf000 /usr/lib/x86_64-linux-gnu/libm-2.31.so",
" 0x7ffff79ff000 0x7ffff7a96000 0x97000 0xb6000 /usr/lib/x86_64-linux-gnu/libm-2.31.so",
" 0x7ffff7a96000 0x7ffff7a97000 0x1000 0x14c000 /usr/lib/x86_64-linux-gnu/libm-2.31.so",
" 0x7ffff7a97000 0x7ffff7a98000 0x1000 0x14d000 /usr/lib/x86_64-linux-gnu/libm-2.31.so",
" 0x7ffff7a98000 0x7ffff7a9a000 0x2000 0x0 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11",
" 0x7ffff7a9a000 0x7ffff7aab000 0x11000 0x2000 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11",
" 0x7ffff7aab000 0x7ffff7ab1000 0x6000 0x13000 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11",
" 0x7ffff7ab1000 0x7ffff7ab2000 0x1000 0x19000 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11",
" 0x7ffff7ab2000 0x7ffff7ab3000 0x1000 0x19000 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11",
" 0x7ffff7ab3000 0x7ffff7ab4000 0x1000 0x1a000 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11",
" 0x7ffff7ab4000 0x7ffff7ab8000 0x4000 0x0 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2",
" 0x7ffff7ab8000 0x7ffff7afc000 0x44000 0x4000 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2",
" 0x7ffff7afc000 0x7ffff7b36000 0x3a000 0x48000 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2",
" 0x7ffff7b36000 0x7ffff7b37000 0x1000 0x82000 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2",
" 0x7ffff7b37000 0x7ffff7b38000 0x1000 0x82000 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2",
" 0x7ffff7b38000 0x7ffff7b39000 0x1000 0x83000 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2",
" 0x7ffff7b39000 0x7ffff7b44000 0xb000 0x0 /usr/lib/x86_64-linux-gnu/libjbig.so.0",
" 0x7ffff7b44000 0x7ffff7d43000 0x1ff000 0xb000 /usr/lib/x86_64-linux-gnu/libjbig.so.0",
" 0x7ffff7d43000 0x7ffff7d44000 0x1000 0xa000 /usr/lib/x86_64-linux-gnu/libjbig.so.0",
" 0x7ffff7d44000 0x7ffff7d47000 0x3000 0xb000 /usr/lib/x86_64-linux-gnu/libjbig.so.0",
" 0x7ffff7d47000 0x7ffff7d6c000 0x25000 0x0 /usr/lib/x86_64-linux-gnu/libc-2.31.so",
" 0x7ffff7d6c000 0x7ffff7ee4000 0x178000 0x25000 /usr/lib/x86_64-linux-gnu/libc-2.31.so",
" 0x7ffff7ee4000 0x7ffff7f2e000 0x4a000 0x19d000 /usr/lib/x86_64-linux-gnu/libc-2.31.so",
" 0x7ffff7f2e000 0x7ffff7f2f000 0x1000 0x1e7000 /usr/lib/x86_64-linux-gnu/libc-2.31.so",
" 0x7ffff7f2f000 0x7ffff7f32000 0x3000 0x1e7000 /usr/lib/x86_64-linux-gnu/libc-2.31.so",
" 0x7ffff7f32000 0x7ffff7f35000 0x3000 0x1ea000 /usr/lib/x86_64-linux-gnu/libc-2.31.so",
" 0x7ffff7f35000 0x7ffff7f39000 0x4000 0x0 ",
" 0x7ffff7f39000 0x7ffff7f41000 0x8000 0x0 /usr/local/lib/libtiff.so.3.9.6",
" 0x7ffff7f41000 0x7ffff7f76000 0x35000 0x8000 /usr/local/lib/libtiff.so.3.9.6",
" 0x7ffff7f76000 0x7ffff7f9f000 0x29000 0x3d000 /usr/local/lib/libtiff.so.3.9.6",
" 0x7ffff7f9f000 0x7ffff7fa0000 0x1000 0x66000 /usr/local/lib/libtiff.so.3.9.6",
" 0x7ffff7fa0000 0x7ffff7fa2000 0x2000 0x66000 /usr/local/lib/libtiff.so.3.9.6",
" 0x7ffff7fa2000 0x7ffff7fa3000 0x1000 0x68000 /usr/local/lib/libtiff.so.3.9.6",
" 0x7ffff7fa3000 0x7ffff7fa5000 0x2000 0x0 ",
" 0x7ffff7fc8000 0x7ffff7fc9000 0x1000 0x0 ",
" 0x7ffff7fc9000 0x7ffff7fcd000 0x4000 0x0 [vvar]",
" 0x7ffff7fcd000 0x7ffff7fcf000 0x2000 0x0 [vdso]",
" 0x7ffff7fcf000 0x7ffff7fd0000 0x1000 0x0 /usr/lib/x86_64-linux-gnu/ld-2.31.so",
" 0x7ffff7fd0000 0x7ffff7ff3000 0x23000 0x1000 /usr/lib/x86_64-linux-gnu/ld-2.31.so",
" 0x7ffff7ff3000 0x7ffff7ffb000 0x8000 0x24000 /usr/lib/x86_64-linux-gnu/ld-2.31.so",
" 0x7ffff7ffb000 0x7ffff7ffc000 0x1000 0x0 /home/avgor46/testdoc/fuz3tiff2pdf/main/crashes/id:000009,sig:06,src:000040+000049,time:43718,op:splice,rep:4",
" 0x7ffff7ffc000 0x7ffff7ffd000 0x1000 0x2c000 /usr/lib/x86_64-linux-gnu/ld-2.31.so",
" 0x7ffff7ffd000 0x7ffff7ffe000 0x1000 0x2d000 /usr/lib/x86_64-linux-gnu/ld-2.31.so",
" 0x7ffff7ffe000 0x7ffff7fff000 0x1000 0x0 ",
" 0x7ffffffde000 0x7ffffffff000 0x21000 0x0 [stack]",
" 0xffffffffff600000 0xffffffffff601000 0x1000 0x0 [vsyscall]"
],
"CrashSeverity": {
"Type": "NOT_CRITICAL",
"ShortDescription": "SafeFunctionCheck",
"Description": "Buffer overflow in safe function",
"Explanation": "The target stopped while handling a signal that was generated by libc due to detection of buffer overflow in safe copy function."
},
"Stacktrace": [
"#0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50",
"#1 0x00007ffff7d6c859 in __GI_abort () at abort.c:79",
"#2 0x00007ffff7dd73ee in __libc_message (action=action@entry=do_abort, fmt=fmt@entry=0x7ffff7f0107c \"*** %s ***: terminated\\n\") at ../sysdeps/posix/libc_fatal.c:155",
"#3 0x00007ffff7e79b4a in __GI___fortify_fail (msg=msg@entry=0x7ffff7f01012 \"buffer overflow detected\") at fortify_fail.c:26",
"#4 0x00007ffff7e783e6 in __GI___chk_fail () at chk_fail.c:28",
"#5 0x00007ffff7dcf1cf in _IO_str_chk_overflow (fp=<optimized out>, c=<optimized out>) at iovsprintf.c:35",
"#6 0x00007ffff7da7db0 in __GI___printf_fp_l (fp=<optimized out>, loc=<optimized out>, info=<optimized out>, args=<optimized out>) at printf_fp.c:1246",
"#7 0x00007ffff7dc163a in __vfprintf_internal (s=s@entry=0x7fffffffe070, format=format@entry=0x5555555613df \"%.4f\", ap=ap@entry=0x7fffffffe1b0, mode_flags=mode_flags@entry=6) at vfprintf-internal.c:1687",
"#8 0x00007ffff7dcf279 in __vsprintf_internal (string=0x7fffffffe2a0 \"79725330432.000\", maxlen=<optimized out>, format=0x5555555613df \"%.4f\", args=args@entry=0x7fffffffe1b0, mode_flags=6) at iovsprintf.c:95",
"#9 0x00007ffff7e77edb in ___sprintf_chk (s=<optimized out>, flag=<optimized out>, slen=<optimized out>, format=<optimized out>) at sprintf_chk.c:40",
"#10 0x000055555555c7a1 in sprintf (__fmt=0x5555555613df \"%.4f\", __s=0x7fffffffe2a0 \"79725330432.000\") at /usr/include/x86_64-linux-gnu/bits/stdio2.h:36",
"#12 0x00005555555601b8 in t2p_write_pdf (output=0x555555568f80, input=0x555555567ea0, t2p=0x5555555672a0) at tiff2pdf.c:5175",
"#13 t2p_write_pdf (t2p=0x5555555672a0, input=0x555555567ea0, output=0x555555568f80) at tiff2pdf.c:5133",
"#14 0x00005555555568d4 in main (argc=<optimized out>, argv=<optimized out>) at tiff2pdf.c:763"
]
}
52 changes: 43 additions & 9 deletions casr/tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2425,7 +2425,7 @@ fn test_casr_cluster_c() {
.parse::<u32>()
.unwrap();

assert_eq!(before_cnt, 11, "Before count mismatch.");
assert_eq!(before_cnt, 12, "Before count mismatch.");

let re =
Regex::new(r"Number of reports after crashline deduplication: (?P<after>\d+)").unwrap();
Expand All @@ -2438,15 +2438,16 @@ fn test_casr_cluster_c() {
.parse::<u32>()
.unwrap();

assert_eq!(after_cnt, 10, "After count mismatch.");
assert_eq!(after_cnt, 11, "After count mismatch.");

// 2.casrep and 20.caserp without crashlines => no dedup
// 3.casrep and 30.caserp with crashlines => dedup
// Thus, cluster (cl8) with 2.casrep has 2 casreps and others have 1 casrep
// Thus, cluster (cl7) with 2.casrep has 2 casreps and cl9 too
// But others have 1 casrep
for i in 1..clusters_cnt + 1 {
let cluster_path = paths[1].to_owned() + "/cl" + &i.to_string();
let size = std::fs::read_dir(cluster_path.clone()).unwrap().count();
let num = if i == 8 { 2 } else { 1 };
let num = if i == 7 || i == 9 { 2 } else { 1 };
assert_eq!(size, num);
}

Expand Down Expand Up @@ -2654,8 +2655,10 @@ fn test_casr_cluster_u() {
let paths = [
abs_path("tests/casr_tests/casrep/test_clustering_small"),
abs_path("tests/tmp_tests_casr/clustering_out"),
abs_path("tests/tmp_tests_casr/clustering_out/cl8/20.casrep"),
abs_path("tests/tmp_tests_casr/clustering_out/cl7/20.casrep"),
abs_path("tests/tmp_tests_casr/clustering_out/cl8"),
abs_path("tests/tmp_tests_casr/clustering_out/cl9"),
abs_path("tests/tmp_tests_casr/clustering_out/cl9/40.casrep"),
];

let _ = fs::remove_dir_all(&paths[1]);
Expand Down Expand Up @@ -2690,7 +2693,9 @@ fn test_casr_cluster_u() {
assert_eq!(clusters_cnt, 9, "Clusters count mismatch.");

let _ = std::fs::remove_file(&paths[2]);
let _ = std::fs::remove_file(&paths[5]);
let _ = std::fs::remove_dir_all(&paths[3]);
let _ = std::fs::rename(&paths[4], &paths[3]);

let output = Command::new(*EXE_CASR_CLUSTER.read().unwrap())
.args(["-u", &paths[0], &paths[1]])
Expand Down Expand Up @@ -2719,7 +2724,7 @@ fn test_casr_cluster_u() {
.parse::<u32>()
.unwrap();

assert_eq!(added_cnt, 0, "Added count mismatch.");
assert_eq!(added_cnt, 1, "Added count mismatch.");

let re = Regex::new(r"Number of duplicates: (?P<duplicates>\d+)").unwrap();
let duplicates_cnt = re
Expand Down Expand Up @@ -2775,17 +2780,46 @@ fn test_casr_cluster_u() {

assert_eq!(after_cnt, 1, "After count mismatch.");

let re = Regex::new(r"Cluster silhouette index: (?P<sil>\d+)").unwrap();
let re = Regex::new(r"Cluster silhouette index: (?P<sil>\d+.\d+)").unwrap();
let sil = re
.captures(&res)
.unwrap()
.name("sil")
.map(|x| x.as_str())
.unwrap()
.parse::<u32>()
.parse::<f64>()
.unwrap();

assert_eq!(sil, 0.15436556855344655, "Silhouette index mismatch.");

// Test estimation
let output = Command::new(*EXE_CASR_CLUSTER.read().unwrap())
.args(["--estimate", &paths[1]])
.output()
.expect("failed to start casr-cluster");

assert!(
output.status.success(),
"Stdout {}.\n Stderr: {}",
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
);

let res = String::from_utf8_lossy(&output.stdout);

assert!(!res.is_empty());

let re = Regex::new(r"Cluster silhouette index: (?P<sil>\d+.\d+)").unwrap();
let sil = re
.captures(&res)
.unwrap()
.name("sil")
.map(|x| x.as_str())
.unwrap()
.parse::<f64>()
.unwrap();

assert_eq!(sil, 0, "Silhouette index mismatch.");
assert_eq!(sil, 0.15436556855344655, "Silhouette index mismatch.");

let _ = std::fs::remove_dir_all(&paths[1]);
}
Expand Down
7 changes: 3 additions & 4 deletions libcasr/src/stacktrace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ pub enum AccumStrategy {
}

/// Structure provides an interface for leverages with CASR report clusters
#[derive(Clone, Debug)]
pub struct Cluster {
/// Cluster number
pub number: usize,
Expand Down Expand Up @@ -449,7 +448,7 @@ pub fn relation(
/// "a" subcoefficient silhouette coefficient
fn sil_subcoef_a(num: usize, stacktraces: &[Stacktrace]) -> f64 {
let mut sum = 0f64;
for i in 0..stacktraces.len() - 1 {
for i in 0..stacktraces.len() {
if i == num {
continue;
}
Expand All @@ -474,12 +473,12 @@ fn sil_subcoef_a(num: usize, stacktraces: &[Stacktrace]) -> f64 {
/// "b" subcoefficient silhouette coefficient
fn sil_subcoef_b(num: usize, cl: usize, clusters: &[Vec<Stacktrace>]) -> f64 {
let mut min = MAX;
for j in 0..clusters.len() - 1 {
for j in 0..clusters.len() {
if j == cl {
continue;
}
let mut sum = 0f64;
for i in 0..clusters[j].len() - 1 {
for i in 0..clusters[j].len() {
sum += 1.0 - similarity(&clusters[cl][num], &clusters[j][i]);
}
let res = sum / clusters[j].len() as f64;
Expand Down

0 comments on commit b80750d

Please sign in to comment.