diff --git a/src/search.rs b/src/search.rs index 7013cf8..065f39c 100644 --- a/src/search.rs +++ b/src/search.rs @@ -4,6 +4,7 @@ use grep::matcher::{Match, Matcher}; use grep::regex::RegexMatcher; use grep::searcher::{Searcher, sinks::UTF8}; use std::path::PathBuf; +use std::sync::{Arc, Mutex}; #[derive(Clone, Debug, Eq, PartialEq)] pub struct LineSearchResult { @@ -26,83 +27,156 @@ pub struct ProjectSearchResult { pub files: Vec, } +// Buffer to accumulate results per worker and merge once at the end. +struct WorkerBuffer { + shared: Arc>>, + local: Vec, +} + +impl WorkerBuffer { + fn new(shared: Arc>>) -> Self { + Self { + shared, + local: Vec::with_capacity(256), + } + } +} + +impl Drop for WorkerBuffer { + fn drop(&mut self) { + if self.local.is_empty() { + return; + } + if let Ok(mut shared_vec) = self.shared.lock() { + shared_vec.extend(self.local.drain(..)); + } + } +} + impl ProjectSearchResult { pub fn search_projects(&mut self, projects: Vec<(String, PathBuf)>) { - //TODO: support literal search - //TODO: use ignore::WalkParallel? + // Build a single matcher up front. Clone or Arc it into workers. match RegexMatcher::new(&self.value) { Ok(matcher) => { - let mut searcher = Searcher::new(); + // Collect walk roots (deduplicated) let mut walk_builder_opt: Option = None; - for (_, project_path) in projects.iter() { - walk_builder_opt = match walk_builder_opt.take() { - Some(mut walk_builder) => { - walk_builder.add(project_path); - Some(walk_builder) - } - None => Some(ignore::WalkBuilder::new(project_path)), - }; - } - - if let Some(walk_builder) = walk_builder_opt { - for entry_res in walk_builder.build() { - let entry = match entry_res { - Ok(ok) => ok, - Err(err) => { - log::error!("failed to walk projects {:?}: {}", projects, err); - continue; + { + use std::collections::HashSet; + let mut uniq: HashSet = HashSet::new(); + for (_, project_path) in projects.iter() { + uniq.insert(project_path.clone()); + } + for project_path in uniq.into_iter() { + walk_builder_opt = match walk_builder_opt.take() { + Some(mut walk_builder) => { + walk_builder.add(&project_path); + Some(walk_builder) } + None => Some(ignore::WalkBuilder::new(&project_path)), }; + } + } - if let Some(file_type) = entry.file_type() { - if file_type.is_dir() { - continue; - } - } + // Share matcher between workers + let matcher = Arc::new(matcher); - let entry_path = entry.path(); + if let Some(mut walk_builder) = walk_builder_opt { + // Align walker flags with estimator/search + walk_builder + .git_ignore(true) + .git_global(true) + .git_exclude(true) + .follow_links(false); + // Tune threads to available parallelism + let threads = std::thread::available_parallelism() + .map(|n| n.get()) + .unwrap_or(1); + walk_builder.threads(threads); - let mut lines = Vec::new(); - match searcher.search_path( - &matcher, - entry_path, - UTF8(|number_u64, text| { - match usize::try_from(number_u64) { - Ok(number) => match matcher.find(text.as_bytes()) { - Ok(Some(first)) => { - lines.push(LineSearchResult { - number, - text: text.to_string(), - first, - }); - }, - Ok(None) => { - log::error!("first match in file {:?} line {} not found", entry_path, number); + // Shared results collected via per-worker buffering + let shared_results: Arc>> = + Arc::new(Mutex::new(Vec::new())); + let walker = walk_builder.build_parallel(); + let matcher_outer = matcher.clone(); + walker.run(|| { + // One Searcher and buffer per worker + let mut searcher = Searcher::new(); + let matcher = matcher_outer.clone(); + let mut buffer = WorkerBuffer::new(shared_results.clone()); + Box::new(move |entry_res| { + match entry_res { + Ok(entry) => { + if let Some(file_type) = entry.file_type() { + if file_type.is_dir() { + return ignore::WalkState::Continue; + } + } + + let entry_path = entry.path().to_path_buf(); + let mut lines: Vec = Vec::new(); + + match searcher.search_path( + &*matcher, + &entry_path, + UTF8(|number_u64, text| { + match usize::try_from(number_u64) { + Ok(number) => match matcher.find(text.as_bytes()) { + Ok(Some(first)) => { + lines.push(LineSearchResult { + number, + text: text.to_string(), + first, + }); + } + Ok(None) => { + log::error!( + "first match in file {:?} line {} not found", + entry_path, number + ); + } + Err(err) => { + log::error!( + "failed to find first match in file {:?} line {}: {}", + entry_path, number, err + ); + } + }, + Err(err) => { + log::error!( + "failed to convert file {:?} line {} to usize: {}", + entry_path, number_u64, err + ); + } + } + Ok(true) + }), + ) { + Ok(()) => { + if !lines.is_empty() { + // Buffer result locally; merged once at worker end + buffer.local.push(FileSearchResult { path: entry_path, lines }); + } } Err(err) => { - log::error!("failed to find first match in file {:?} line {}: {}", entry_path, number, err); + log::error!("failed to search file {:?}: {}", entry_path, err); } - }, - Err(err) => { - log::error!("failed to convert file {:?} line {} to usize: {}", entry_path, number_u64, err); } } - Ok(true) - }), - ) { - Ok(()) => { - if !lines.is_empty() { - self.files.push(FileSearchResult { - path: entry_path.to_path_buf(), - lines, - }); + Err(err) => { + log::error!("failed to walk project entry: {}", err); } } - Err(err) => { - log::error!("failed to search file {:?}: {}", entry_path, err); - } - } - } + ignore::WalkState::Continue + }) + }); + + // Replace existing results with merged contents + self.files.clear(); + let merged: Vec = match shared_results.lock() { + Ok(guard) => guard.clone(), + Err(poisoned) => poisoned.into_inner().clone(), + }; + self.files.extend(merged.into_iter()); } } Err(err) => {