Skip to content

Commit

Permalink
feat: add compiler profile to prioritize memory usage
Browse files Browse the repository at this point in the history
Add a compiler profile option to CompilerBuilder. This will allow
picking between:

- speed: prioritize scanning speed (current impl & default)
- memory: prioritize memory usage
- automatic: pick between the two depending on the size of the rules.

The memory profile is only implemented for the aho-corasick for the
moment. Changing the regex engine used for validators could be an
option, however it isn't trivial to do, and we would make implementing
an "automatic" profile harder, since "automatic" only works if
compilation is done when converting the compiler to the scanner.
  • Loading branch information
vthib committed Jul 31, 2024
1 parent 132b91f commit bd7c86f
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 9 deletions.
14 changes: 13 additions & 1 deletion boreal/src/compiler/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ use super::{AvailableModule, ModuleLocation};
pub struct CompilerBuilder {
/// Modules that can be imported when compiling rules.
modules: HashMap<&'static str, AvailableModule>,

/// Profile to use when compiling rules.
profile: super::CompilerProfile,
}

impl CompilerBuilder {
Expand Down Expand Up @@ -75,10 +78,19 @@ impl CompilerBuilder {
self
}

/// Set the profile to use when compiling rules.
///
/// By default, [`CompilerProfile::Speed`] is used.
#[must_use]
pub fn set_profile(mut self, profile: super::CompilerProfile) -> Self {
self.profile = profile;
self
}

/// Build a [`Compiler`] object with the configuration set on this builder.
#[must_use]
pub fn build(self) -> super::Compiler {
super::Compiler::build(self.modules)
super::Compiler::build(self.modules, self.profile)
}
}

Expand Down
34 changes: 33 additions & 1 deletion boreal/src/compiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ pub struct Compiler {

/// Compilation parameters
params: CompilerParams,

/// Profile to use when compiling rules.
pub(crate) profile: CompilerProfile,
}

#[derive(Debug)]
Expand Down Expand Up @@ -98,6 +101,30 @@ struct ImportedModule {
module_index: usize,
}

/// Profile to use when compiling rules.
#[derive(Copy, Clone, Debug)]
pub enum CompilerProfile {
/// Prioritize scan speed.
///
/// This profile will strive to get the best possible scan speed by using more memory
/// when possible.
// TODO: give a few memory consumption example.
Speed,
/// Prioritize memory usage
///
/// This profile will strive to reduce memory usage as much as possible, even if it means
/// a slower scan speed overall.
// TODO: give a few memory consumption example.
Memory,
// TODO: add Automatic profile
}

impl Default for CompilerProfile {
fn default() -> Self {
Self::Speed
}
}

impl Default for Compiler {
fn default() -> Self {
Self {
Expand All @@ -113,6 +140,7 @@ impl Default for Compiler {
external_symbols: Vec::new(),
bytes_pool: BytesPoolBuilder::default(),
params: CompilerParams::default(),
profile: CompilerProfile::default(),
}
}
}
Expand Down Expand Up @@ -143,9 +171,13 @@ impl Compiler {
///
/// Returns false if a module with the same name is already registered, and the module
/// was not added.
fn build(available_modules: HashMap<&'static str, AvailableModule>) -> Self {
fn build(
available_modules: HashMap<&'static str, AvailableModule>,
profile: CompilerProfile,
) -> Self {
Self {
available_modules,
profile,
..Default::default()
}
}
Expand Down
3 changes: 2 additions & 1 deletion boreal/src/compiler/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use super::module::compile_module;
use super::rule::RuleCompiler;
use super::{
AddRuleError, AddRuleErrorKind, AddRuleStatus, AvailableModule, CompilationError, Compiler,
CompilerParams, ImportedModule, ModuleLocation, Namespace,
CompilerParams, CompilerProfile, ImportedModule, ModuleLocation, Namespace,
};
use crate::bytes_pool::BytesPoolBuilder;
use crate::test_helpers::{test_type_traits, test_type_traits_non_clonable};
Expand Down Expand Up @@ -289,6 +289,7 @@ fn test_types_traits() {
}),
});
test_type_traits(CompilerParams::default());
test_type_traits(CompilerProfile::default());
test_type_traits_non_clonable(AddRuleStatus {
warnings: Vec::new(),
statistics: Vec::new(),
Expand Down
13 changes: 8 additions & 5 deletions boreal/src/scanner/ac_scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use aho_corasick::{AhoCorasick, AhoCorasickBuilder, AhoCorasickKind};
use super::{ScanError, ScanParams, StringMatch};
use crate::atoms::pick_atom_in_literal;
use crate::compiler::variable::Variable;
use crate::compiler::CompilerProfile;
use crate::matcher::{AcMatchStatus, Matcher};
use crate::memory::Region;
use crate::{statistics, timeout};
Expand Down Expand Up @@ -79,7 +80,7 @@ impl ScanData<'_> {
}

impl AcScan {
pub(crate) fn new(variables: &[Variable]) -> Self {
pub(crate) fn new(variables: &[Variable], profile: CompilerProfile) -> Self {
let mut lits = Vec::new();
let mut known_lits = HashMap::new();
let mut aho_index_to_literal_info = Vec::new();
Expand Down Expand Up @@ -128,9 +129,11 @@ impl AcScan {
// optimizations are done.

let mut builder = AhoCorasickBuilder::new();
let builder = builder
.ascii_case_insensitive(true)
.kind(Some(AhoCorasickKind::DFA));
let builder = builder.ascii_case_insensitive(true);
let builder = builder.kind(Some(match profile {
CompilerProfile::Speed => AhoCorasickKind::DFA,
CompilerProfile::Memory => AhoCorasickKind::ContiguousNFA,
}));

// First try with a smaller size to reduce memory use and improve performances, otherwise
// use the default version.
Expand Down Expand Up @@ -312,7 +315,7 @@ mod tests {

#[test]
fn test_types_traits() {
test_type_traits_non_clonable(AcScan::new(&[]));
test_type_traits_non_clonable(AcScan::new(&[], CompilerProfile::Speed));
test_type_traits_non_clonable(LiteralInfo {
variable_index: 0,
literal_index: 0,
Expand Down
3 changes: 2 additions & 1 deletion boreal/src/scanner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,12 @@ impl Scanner {
imported_modules,
external_symbols,
bytes_pool,
profile,
..
} = compiler;
let namespaces = namespaces.into_iter().map(|v| v.name).collect();

let ac_scan = ac_scan::AcScan::new(&variables);
let ac_scan = ac_scan::AcScan::new(&variables, profile);

let mut external_symbols_values = Vec::new();
let mut external_symbols_map = HashMap::new();
Expand Down

0 comments on commit bd7c86f

Please sign in to comment.