diff --git a/casr/src/bin/casr-cluster.rs b/casr/src/bin/casr-cluster.rs index 017c5e4d..db60dcd5 100644 --- a/casr/src/bin/casr-cluster.rs +++ b/casr/src/bin/casr-cluster.rs @@ -58,7 +58,7 @@ fn make_clusters( } // Get casreps with stacktraces and crashlines - let (casreps, stacktraces, crashlines, badreports) = util::reports_from_paths(casreps, jobs); + let (casreps, badreports) = util::reports_from_paths(casreps, jobs); if !badreports.is_empty() { fs::create_dir_all(format!("{}/clerr", &outpath.display()))?; @@ -74,44 +74,16 @@ fn make_clusters( } } - if stacktraces.len() < 2 { - bail!("{} valid reports, nothing to cluster...", stacktraces.len()); + if casreps.len() < 2 { + bail!("{} valid reports, nothing to cluster...", casreps.len()); } // Get clusters - let mut clusters = cluster_stacktraces(&stacktraces)?; + let (clusters, before, after) = gen_clusters(&casreps, 0, dedup)?; + // Save clusters + util::save_clusters(&clusters, outpath)?; - // Cluster formation - let cluster_cnt: usize = *clusters.iter().max().unwrap(); - for i in 1..=cluster_cnt { - fs::create_dir_all(format!("{}/cl{}", &outpath.display(), i))?; - } - - // Init before and after dedup counters - let before_cnt = casreps.len(); - let mut after_cnt = before_cnt; - - // Get clusters with crashline deduplication - if dedup { - after_cnt = dedup_crashlines(&crashlines, &mut clusters); - } - - for i in 0..clusters.len() { - // Skip casreps with duplicate crashlines - if clusters[i] == 0 { - continue; - } - fs::copy( - &casreps[i], - format!( - "{}/cl{}/{}", - &outpath.display(), - clusters[i], - &casreps[i].file_name().unwrap().to_str().unwrap() - ), - )?; - } - Ok((cluster_cnt, before_cnt, after_cnt)) + Ok((clusters.len(), before, after)) } /// Remove duplicate casreps @@ -336,10 +308,7 @@ fn update_clusters( ) -> Result<(usize, usize, usize, usize, usize, usize)> { // Get new casreps let casreps = util::get_reports(newpath)?; - let (casreps, stacktraces, crashlines, _) = util::reports_from_paths(casreps, jobs); - let casreps = casreps - .iter() - .zip(stacktraces.iter().zip(crashlines.iter())); + let (casreps, _) = util::reports_from_paths(casreps, jobs); // Get casreps from existing clusters let mut cluster_dirs: Vec = fs::read_dir(oldpath) @@ -367,7 +336,7 @@ fn update_clusters( } // Init list of casreps, which aren't suitable for any cluster - let mut deviants: Vec<(&PathBuf, (Stacktrace, String))> = Vec::new(); + let mut deviants: Vec = Vec::new(); // Init added casreps counter let mut added = 0usize; // Init duplicates counter @@ -383,7 +352,7 @@ fn update_clusters( // Checker if casrep is duplicate of someone else let mut dup = false; for cluster in clusters.values_mut() { - let relation = cluster.relation(stacktrace, inner_strategy, outer_strategy); + let relation = cluster.relation(&stacktrace, inner_strategy, outer_strategy); match relation { Relation::Dup => { dup = true; @@ -430,7 +399,7 @@ fn update_clusters( // Save casrep added += 1; fs::copy( - casrep, + &casrep, format!( "{}/{}", &cluster_dirs[number - 1].display(), @@ -563,7 +532,8 @@ fn avg_sil(dir: &Path, jobs: usize) -> Result { // Get casreps from cluster let casreps = util::get_reports(dir)?; // Get stacktraces from cluster - let (_, stacktraces, _, _) = util::reports_from_paths(casreps, jobs); + let (casreps, _) = util::reports_from_paths(casreps, jobs); + let (_, (stacktraces, _)): (Vec<_>, (Vec<_>, Vec<_>)) = casreps.iter().cloned().unzip(); // Update size size += stacktraces.len(); // Add stacktraces diff --git a/casr/src/util.rs b/casr/src/util.rs index 1cb3dee6..809505a5 100644 --- a/casr/src/util.rs +++ b/casr/src/util.rs @@ -3,7 +3,7 @@ extern crate libcasr; use libcasr::report::CrashReport; use libcasr::stacktrace::{ - Cluster, Stacktrace, STACK_FRAME_FILEPATH_IGNORE_REGEXES, STACK_FRAME_FUNCTION_IGNORE_REGEXES, + Cluster, ReportInfo, STACK_FRAME_FILEPATH_IGNORE_REGEXES, STACK_FRAME_FUNCTION_IGNORE_REGEXES, }; use anyhow::{bail, Context, Result}; @@ -441,14 +441,9 @@ pub fn get_reports(dir: &Path) -> Result> { /// /// # Return value /// -/// * A vector of paths to correctly parsed reports -/// * A vector of reports stacktraces -/// * A vector of reports crashlines +/// * A vector of correctly parsed report info: paths, stacktraces and crashlines /// * A vector of bad reports -pub fn reports_from_paths( - casreps: Vec, - jobs: usize, -) -> (Vec, Vec, Vec, Vec) { +pub fn reports_from_paths(casreps: Vec, jobs: usize) -> (Vec, Vec) { // Get len let len = casreps.len(); // Start thread pool. @@ -457,7 +452,7 @@ pub fn reports_from_paths( .build() .unwrap(); // Report info from casreps: (casrep, (trace, crashline)) - let mut casrep_info: RwLock> = RwLock::new(Vec::new()); + let mut casrep_info: RwLock> = RwLock::new(Vec::new()); // Casreps with stacktraces, that we cannot parse let mut badreports: RwLock> = RwLock::new(Vec::new()); custom_pool.install(|| { @@ -487,11 +482,7 @@ pub fn reports_from_paths( .cmp(b.0.file_name().unwrap().to_str().unwrap()) }); - // Unzip casrep info - let (casreps, (stacktraces, crashlines)): (Vec<_>, (Vec<_>, Vec<_>)) = - casrep_info.iter().cloned().unzip(); - - (casreps, stacktraces, crashlines, badreports) + (casrep_info.to_vec(), badreports) } /// Get `Cluster` structure from specified directory path. @@ -513,7 +504,9 @@ pub fn cluster_from_dir(dir: &Path, jobs: usize) -> Result { .unwrap(); // Get casreps from cluster let casreps = get_reports(dir)?; - let (_, stacktraces, crashlines, _) = reports_from_paths(casreps, jobs); + let (casreps, _) = reports_from_paths(casreps, jobs); + let (_, (stacktraces, crashlines)): (Vec<_>, (Vec<_>, Vec<_>)) = + casreps.iter().cloned().unzip(); // Create cluster // NOTE: We don't care about paths of casreps from existing clusters Ok(Cluster::new(i, Vec::new(), stacktraces, crashlines)) diff --git a/libcasr/src/stacktrace.rs b/libcasr/src/stacktrace.rs index 793e9b63..946e8159 100644 --- a/libcasr/src/stacktrace.rs +++ b/libcasr/src/stacktrace.rs @@ -27,6 +27,9 @@ pub type DebugInfo = gdb_command::stacktrace::DebugInfo; /// Represents the information about one line of the stack trace. pub type StacktraceEntry = gdb_command::stacktrace::StacktraceEntry; +/// Represents the information about CASR report +pub type ReportInfo = (PathBuf, (Stacktrace, String)); + lazy_static::lazy_static! { /// Regular expressions for functions to be ignored. pub static ref STACK_FRAME_FUNCTION_IGNORE_REGEXES: RwLock> = RwLock::new( @@ -215,7 +218,6 @@ impl Cluster { stacktraces1.append(&mut stacktraces2); diam(&stacktraces1) < THRESHOLD } - // TODO: change type /// Convert cluster to iterator pub fn reports(&self) -> Vec<(PathBuf, Stacktrace, String)> { let mut reports: Vec<(PathBuf, Stacktrace, String)> = Vec::new(); @@ -258,7 +260,7 @@ impl Cluster { /// * Number of valid casreps before crashiline deduplication /// * Number of valid casreps after crashiline deduplication pub fn gen_clusters( - reports: &[(&PathBuf, (Stacktrace, String))], + reports: &[ReportInfo], offset: usize, dedup: bool, ) -> Result<(HashMap, usize, usize)> {