From bd7c86f7d40ee221b9e5a2ac040bb09079aa6246 Mon Sep 17 00:00:00 2001 From: Vincent Thiberville Date: Sat, 6 Jul 2024 16:11:54 +0200 Subject: [PATCH] feat: add compiler profile to prioritize memory usage Add a compiler profile option to CompilerBuilder. This will allow picking between: - speed: prioritize scanning speed (current impl & default) - memory: prioritize memory usage - automatic: pick between the two depending on the size of the rules. The memory profile is only implemented for the aho-corasick for the moment. Changing the regex engine used for validators could be an option, however it isn't trivial to do, and we would make implementing an "automatic" profile harder, since "automatic" only works if compilation is done when converting the compiler to the scanner. --- boreal/src/compiler/builder.rs | 14 +++++++++++++- boreal/src/compiler/mod.rs | 34 +++++++++++++++++++++++++++++++++- boreal/src/compiler/tests.rs | 3 ++- boreal/src/scanner/ac_scan.rs | 13 ++++++++----- boreal/src/scanner/mod.rs | 3 ++- 5 files changed, 58 insertions(+), 9 deletions(-) diff --git a/boreal/src/compiler/builder.rs b/boreal/src/compiler/builder.rs index aad36af1..5593811d 100644 --- a/boreal/src/compiler/builder.rs +++ b/boreal/src/compiler/builder.rs @@ -7,6 +7,9 @@ use super::{AvailableModule, ModuleLocation}; pub struct CompilerBuilder { /// Modules that can be imported when compiling rules. modules: HashMap<&'static str, AvailableModule>, + + /// Profile to use when compiling rules. + profile: super::CompilerProfile, } impl CompilerBuilder { @@ -75,10 +78,19 @@ impl CompilerBuilder { self } + /// Set the profile to use when compiling rules. + /// + /// By default, [`CompilerProfile::Speed`] is used. + #[must_use] + pub fn set_profile(mut self, profile: super::CompilerProfile) -> Self { + self.profile = profile; + self + } + /// Build a [`Compiler`] object with the configuration set on this builder. #[must_use] pub fn build(self) -> super::Compiler { - super::Compiler::build(self.modules) + super::Compiler::build(self.modules, self.profile) } } diff --git a/boreal/src/compiler/mod.rs b/boreal/src/compiler/mod.rs index 4e753b1a..eec59f6b 100644 --- a/boreal/src/compiler/mod.rs +++ b/boreal/src/compiler/mod.rs @@ -69,6 +69,9 @@ pub struct Compiler { /// Compilation parameters params: CompilerParams, + + /// Profile to use when compiling rules. + pub(crate) profile: CompilerProfile, } #[derive(Debug)] @@ -98,6 +101,30 @@ struct ImportedModule { module_index: usize, } +/// Profile to use when compiling rules. +#[derive(Copy, Clone, Debug)] +pub enum CompilerProfile { + /// Prioritize scan speed. + /// + /// This profile will strive to get the best possible scan speed by using more memory + /// when possible. + // TODO: give a few memory consumption example. + Speed, + /// Prioritize memory usage + /// + /// This profile will strive to reduce memory usage as much as possible, even if it means + /// a slower scan speed overall. + // TODO: give a few memory consumption example. + Memory, + // TODO: add Automatic profile +} + +impl Default for CompilerProfile { + fn default() -> Self { + Self::Speed + } +} + impl Default for Compiler { fn default() -> Self { Self { @@ -113,6 +140,7 @@ impl Default for Compiler { external_symbols: Vec::new(), bytes_pool: BytesPoolBuilder::default(), params: CompilerParams::default(), + profile: CompilerProfile::default(), } } } @@ -143,9 +171,13 @@ impl Compiler { /// /// Returns false if a module with the same name is already registered, and the module /// was not added. - fn build(available_modules: HashMap<&'static str, AvailableModule>) -> Self { + fn build( + available_modules: HashMap<&'static str, AvailableModule>, + profile: CompilerProfile, + ) -> Self { Self { available_modules, + profile, ..Default::default() } } diff --git a/boreal/src/compiler/tests.rs b/boreal/src/compiler/tests.rs index 0a6d03c2..6d9c23d9 100644 --- a/boreal/src/compiler/tests.rs +++ b/boreal/src/compiler/tests.rs @@ -5,7 +5,7 @@ use super::module::compile_module; use super::rule::RuleCompiler; use super::{ AddRuleError, AddRuleErrorKind, AddRuleStatus, AvailableModule, CompilationError, Compiler, - CompilerParams, ImportedModule, ModuleLocation, Namespace, + CompilerParams, CompilerProfile, ImportedModule, ModuleLocation, Namespace, }; use crate::bytes_pool::BytesPoolBuilder; use crate::test_helpers::{test_type_traits, test_type_traits_non_clonable}; @@ -289,6 +289,7 @@ fn test_types_traits() { }), }); test_type_traits(CompilerParams::default()); + test_type_traits(CompilerProfile::default()); test_type_traits_non_clonable(AddRuleStatus { warnings: Vec::new(), statistics: Vec::new(), diff --git a/boreal/src/scanner/ac_scan.rs b/boreal/src/scanner/ac_scan.rs index 982b1c6b..1b579a3a 100644 --- a/boreal/src/scanner/ac_scan.rs +++ b/boreal/src/scanner/ac_scan.rs @@ -7,6 +7,7 @@ use aho_corasick::{AhoCorasick, AhoCorasickBuilder, AhoCorasickKind}; use super::{ScanError, ScanParams, StringMatch}; use crate::atoms::pick_atom_in_literal; use crate::compiler::variable::Variable; +use crate::compiler::CompilerProfile; use crate::matcher::{AcMatchStatus, Matcher}; use crate::memory::Region; use crate::{statistics, timeout}; @@ -79,7 +80,7 @@ impl ScanData<'_> { } impl AcScan { - pub(crate) fn new(variables: &[Variable]) -> Self { + pub(crate) fn new(variables: &[Variable], profile: CompilerProfile) -> Self { let mut lits = Vec::new(); let mut known_lits = HashMap::new(); let mut aho_index_to_literal_info = Vec::new(); @@ -128,9 +129,11 @@ impl AcScan { // optimizations are done. let mut builder = AhoCorasickBuilder::new(); - let builder = builder - .ascii_case_insensitive(true) - .kind(Some(AhoCorasickKind::DFA)); + let builder = builder.ascii_case_insensitive(true); + let builder = builder.kind(Some(match profile { + CompilerProfile::Speed => AhoCorasickKind::DFA, + CompilerProfile::Memory => AhoCorasickKind::ContiguousNFA, + })); // First try with a smaller size to reduce memory use and improve performances, otherwise // use the default version. @@ -312,7 +315,7 @@ mod tests { #[test] fn test_types_traits() { - test_type_traits_non_clonable(AcScan::new(&[])); + test_type_traits_non_clonable(AcScan::new(&[], CompilerProfile::Speed)); test_type_traits_non_clonable(LiteralInfo { variable_index: 0, literal_index: 0, diff --git a/boreal/src/scanner/mod.rs b/boreal/src/scanner/mod.rs index c00e1c8e..90bc99d2 100644 --- a/boreal/src/scanner/mod.rs +++ b/boreal/src/scanner/mod.rs @@ -110,11 +110,12 @@ impl Scanner { imported_modules, external_symbols, bytes_pool, + profile, .. } = compiler; let namespaces = namespaces.into_iter().map(|v| v.name).collect(); - let ac_scan = ac_scan::AcScan::new(&variables); + let ac_scan = ac_scan::AcScan::new(&variables, profile); let mut external_symbols_values = Vec::new(); let mut external_symbols_map = HashMap::new();