From 11b92de296896bae3e6e221f59acb81826d7d252 Mon Sep 17 00:00:00 2001 From: Alexandre Pasmantier <47638216+alexpasmantier@users.noreply.github.com> Date: Sat, 31 Aug 2024 01:00:36 +0200 Subject: [PATCH] perf: better mutualization of data structures across threads (#1) --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/cli.rs | 6 ------ src/main.rs | 20 +++++++++++++------- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f1fb162..f1767ae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -297,7 +297,7 @@ dependencies = [ [[package]] name = "grip-grab" -version = "0.2.26" +version = "0.2.27" dependencies = [ "anyhow", "clap", diff --git a/Cargo.toml b/Cargo.toml index 8152c0d..8a6bf8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "grip-grab" -version = "0.2.26" +version = "0.2.27" edition = "2021" authors = ["Alexandre Pasmantier "] license = "Apache-2.0" diff --git a/src/cli.rs b/src/cli.rs index ab7e2b1..8f8c19c 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -32,10 +32,6 @@ pub struct Cli { #[clap(short = 'G', long, default_value_t = false)] pub disregard_gitignore: bool, - /// upper boundary for the number of results to expect (will panic if #results > max_results) - #[clap(short = 'M', long, default_value_t = 5000)] - pub max_results: usize, - /// number of threads to use #[clap(short = 'T', long, default_value_t = 4)] pub n_threads: usize, @@ -99,7 +95,6 @@ pub struct PostProcessedCli { pub patterns: Vec, pub paths: Vec, pub ignored_paths: Vec, - pub max_results: usize, pub n_threads: usize, pub disregard_gitignore: bool, pub multiline: bool, @@ -123,7 +118,6 @@ pub fn process_cli_args(mut cli: Cli) -> anyhow::Result { }, paths: utils::resolve_paths(cli.paths), ignored_paths: utils::resolve_paths(cli.ignore_paths), - max_results: cli.max_results, n_threads: cli.n_threads, disregard_gitignore: cli.disregard_gitignore, multiline: cli.multiline, diff --git a/src/main.rs b/src/main.rs index 290b061..8b636f1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,9 @@ +use std::sync::Arc; + use clap::Parser; -use crossbeam::queue::ArrayQueue; +use crossbeam::queue::SegQueue; +use grep::regex::RegexMatcher; use ignore::DirEntry; use printer::PrinterConfig; use search::build_searcher; @@ -18,9 +21,8 @@ mod utils; pub fn main() -> anyhow::Result<()> { let cli_args = process_cli_args(Cli::parse())?; - let queue: ArrayQueue = ArrayQueue::new(cli_args.max_results); + let queue: Arc> = Arc::new(SegQueue::new()); - let matcher = build_matcher(&cli_args.patterns)?; let haystack_builder = walk_builder( cli_args.paths.iter().map(|p| p.as_path()).collect(), &cli_args.ignored_paths, @@ -28,17 +30,20 @@ pub fn main() -> anyhow::Result<()> { !cli_args.disregard_gitignore, cli_args.filter_filetypes, ); + let matcher: Arc = Arc::new(build_matcher(&cli_args.patterns)?); haystack_builder.build_parallel().run(|| { - Box::new(|entry: Result| match entry { + let matcher = Arc::clone(&matcher); + let mut searcher = build_searcher(cli_args.multiline); + let queue = Arc::clone(&queue); + Box::new(move |entry: Result| match entry { Ok(entry) => { let file_type = entry.file_type().unwrap(); if !file_type.is_dir() { let path = entry.path().to_path_buf(); - let mut searcher = build_searcher(cli_args.multiline); match search_file(path, &matcher, &mut searcher) { Ok(file_results) => { if !file_results.is_empty() { - queue.push(file_results).unwrap(); + queue.push(file_results); } } Err(_err) => (), @@ -61,7 +66,8 @@ pub fn main() -> anyhow::Result<()> { ..Default::default() }; let mut printer = Printer::new(printer_config); - queue + let printer_queue = Arc::into_inner(queue).unwrap(); + printer_queue .into_iter() .for_each(|file_results| printer.write(file_results).unwrap());