Skip to content

Commit

Permalink
feat: move only_literals optimization to regex compilation
Browse files Browse the repository at this point in the history
  • Loading branch information
vthib committed Jul 16, 2023
1 parent e16f8fa commit 6b9e899
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 19 deletions.
15 changes: 1 addition & 14 deletions boreal/src/compiler/variable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,20 +89,7 @@ pub(crate) fn compile_variable(
regex::compile_regex(&ast.into(), dot_all, &modifiers)
}
VariableDeclarationValue::HexString(hex_string) => {
// Nocase, fullword and wide is not compatible with hex strings
modifiers.nocase = false;
modifiers.fullword = false;
modifiers.wide = false;

let hir = hex_string.into();
match only_literals::hir_to_only_literals(&hir) {
Some(literals) => Ok(CompiledVariable {
literals,
matcher_kind: matcher::MatcherKind::Literals,
non_wide_regex: None,
}),
None => regex::compile_regex(&hir, true, &modifiers),
}
regex::compile_regex(&hex_string.into(), true, &modifiers)
}
};

Expand Down
8 changes: 4 additions & 4 deletions boreal/src/compiler/variable/only_literals.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use crate::regex::{visit, Hir, VisitAction, Visitor};

/// Can the hex string be expressed using only literals.
pub(super) fn hir_to_only_literals(hir: &Hir) -> Option<Vec<Vec<u8>>> {
pub(super) fn hir_to_only_literals(hir: &Hir, dot_all: bool) -> Option<Vec<Vec<u8>>> {
// TODO: move this count into the HirStatistics visitor
match visit(hir, CountLiterals::new(true)) {
match visit(hir, CountLiterals::new(dot_all)) {
Some(count) if count < 100 => visit(hir, Literals::new()),
Some(_) | None => None,
}
Expand Down Expand Up @@ -297,7 +297,7 @@ mod tests {
let regex = parse_regex_string(regex);
let hir = regex.ast.into();

let res = hir_to_only_literals(&hir);
let res = hir_to_only_literals(&hir, true);
match &res {
Some(v) => assert_eq!(v, expected.unwrap()),
None => assert!(expected.is_none()),
Expand Down Expand Up @@ -342,7 +342,7 @@ mod tests {
let hex_string = parse_hex_string(hex_string);
let hir = hex_string.into();

let res = hir_to_only_literals(&hir);
let res = hir_to_only_literals(&hir, true);
match &res {
Some(v) => assert_eq!(v, expected.unwrap()),
None => assert!(expected.is_none()),
Expand Down
14 changes: 13 additions & 1 deletion boreal/src/compiler/variable/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::regex::{regex_hir_to_string, visit, Hir, Regex, VisitAction, Visitor}

use super::literals::LiteralsDetails;
use super::matcher::MatcherKind;
use super::{CompiledVariable, VariableCompilationError};
use super::{only_literals, CompiledVariable, VariableCompilationError};

/// Build a matcher for the given regex and string modifiers.
///
Expand All @@ -18,6 +18,18 @@ pub(super) fn compile_regex(
dot_all: bool,
modifiers: &VariableModifiers,
) -> Result<CompiledVariable, VariableCompilationError> {
// Try to convert into only literals if possible
// TODO: handle more modifiers
if !modifiers.nocase && !modifiers.wide {
if let Some(literals) = only_literals::hir_to_only_literals(hir, dot_all) {
return Ok(CompiledVariable {
literals,
matcher_kind: MatcherKind::Literals,
non_wide_regex: None,
});
}
}

let LiteralsDetails {
mut literals,
pre_hir,
Expand Down

0 comments on commit 6b9e899

Please sign in to comment.