From 64b85918bc1bb0ac0f785f45ac153738f6fdc6a2 Mon Sep 17 00:00:00 2001 From: Vincent Thiberville Date: Wed, 16 Aug 2023 00:27:53 +0200 Subject: [PATCH] test: tweak tests to improve coverage Add and adapt a few tests to properly use the right matcher and improve coverage. --- boreal/src/evaluator/ac_scan.rs | 10 +++----- boreal/src/matcher/literals.rs | 9 +++++++ boreal/tests/it/evaluation.rs | 40 ++++++++++++++++++++++++++++- boreal/tests/it/variables.rs | 45 ++++++++++++++++++++++++++++++--- 4 files changed, 92 insertions(+), 12 deletions(-) diff --git a/boreal/src/evaluator/ac_scan.rs b/boreal/src/evaluator/ac_scan.rs index 86fc3419..39bb1a70 100644 --- a/boreal/src/evaluator/ac_scan.rs +++ b/boreal/src/evaluator/ac_scan.rs @@ -166,9 +166,8 @@ impl AcScan { None => continue, }; let end = match mat.end().checked_add(end_offset) { - Some(v) if v > scan_data.mem.len() => continue, - Some(v) => v, - None => continue, + Some(v) if v <= scan_data.mem.len() => v, + _ => continue, }; let m = start..end; @@ -189,10 +188,7 @@ impl AcScan { // To avoid this, ensure the mem given to check_ac_match starts one byte after the last // saved match. let start_position = match &matches[variable_index] { - AcResult::Matches(v) => match v.last() { - Some(m) => m.start + 1, - None => 0, - }, + AcResult::Matches(v) if !v.is_empty() => v[v.len() - 1].start + 1, _ => 0, }; diff --git a/boreal/src/matcher/literals.rs b/boreal/src/matcher/literals.rs index 605ce9a4..5443bf86 100644 --- a/boreal/src/matcher/literals.rs +++ b/boreal/src/matcher/literals.rs @@ -874,6 +874,15 @@ mod tests { test("[ab]d[ef]", &[b"ade", b"adf", b"bde", b"bdf"], "", ""); test::<&str>("( () | () )", &[], "", ""); + + // Between a list of nul bytes and a single char, the single char is preferred + test("\x00\x00\x00\x00.*a", &[b"a"], r"\x00\x00\x00\x00.*a", ""); + test( + "(\x00\x00\x00\x00|abcd)a", + &[b"a"], + r"(\x00\x00\x00\x00|abcd)a", + "", + ); } #[test] diff --git a/boreal/tests/it/evaluation.rs b/boreal/tests/it/evaluation.rs index eff70a29..8aca0ed6 100644 --- a/boreal/tests/it/evaluation.rs +++ b/boreal/tests/it/evaluation.rs @@ -330,7 +330,7 @@ fn test_eval_var_length_regex() { r#" rule a { strings: - $a = /a.*b/ + $a = /^a.*b/ condition: !a == 3 }"#, @@ -390,6 +390,24 @@ rule a { checker.check(b" ab", false); checker.check(b"abab", false); + // Force the use of a raw matcher + let checker = Checker::new( + r#" +rule a { + strings: + $a = /a+b+/ + condition: + @a == 2 +}"#, + ); + checker.check(b"", false); + checker.check(b"ab", false); + checker.check(b" ab", false); + checker.check(b" ab", true); + checker.check(b" aab", true); + checker.check(b" ab", false); + checker.check(b"abab", false); + let checker = Checker::new( r#" rule a { @@ -706,6 +724,26 @@ rule a { checker.check(b" aaabb", true); checker.check(b" aaabb", false); + // Same but with a variable that uses a raw matcher. + let checker = Checker::new( + r#" +rule a { + strings: + $a = /a{1,2}.*b{1,1}/ + condition: + #a in (2..5) == 3 +}"#, + ); + checker.check(b"", false); + checker.check(b" abaabb", true); + checker.check(b" ababab", false); + checker.check(b" abab", false); + checker.check(b" aaaab", false); + checker.check(b" aaabb", false); + checker.check(b" aaabb", true); + checker.check(b" aaabb", true); + checker.check(b" aaabb", false); + let checker = Checker::new( r#" rule a { diff --git a/boreal/tests/it/variables.rs b/boreal/tests/it/variables.rs index a0447cdb..8e8a1e43 100644 --- a/boreal/tests/it/variables.rs +++ b/boreal/tests/it/variables.rs @@ -331,8 +331,8 @@ fn test_variable_regex_wide_fullword() { test_regex_wide_fullword("b.{1,5}?123", b"bb123"); test_regex_wide_fullword("b.{1,5}123", b"bb123"); - // Test the raw regex matcher, no literals of at least length 2 extracted. - test_regex_wide_fullword(r"a.{5}1", b"abcdef1"); + // Test the raw regex matcher, no literals possible to extract) + test_regex_wide_fullword(r"(a.{5}1|z+)", b"abcdef1"); // Same thing, but with a tricky regex: the literal looks "wide", but is considered ascii test_regex_wide_fullword(r"b\x00.{1,5}?i\x00j\x00k\x00", &to_wide(b"bbijk")); @@ -371,6 +371,20 @@ fn test_variable_regex_wide_fullword_raw() { checker.check(&join(mem_wide, b"<\0", b""), true); checker.check(&join(mem_wide, b"a\0", b""), false); + // Do the same with word boundaries instead of fullword modifier + let checker = build_checker(r"\bab$", "ascii wide"); + checker.check(&join(mem_ascii, b"", b""), true); + checker.check(&join(mem_ascii, b"a", b""), false); + checker.check(&join(mem_ascii, b"<", b""), true); + checker.check(&join(mem_ascii, b"<\0", b""), true); + checker.check(&join(mem_ascii, b"a\0", b""), true); + + checker.check(&join(mem_wide, b"", b""), true); + checker.check(&join(mem_wide, b"a", b""), true); + checker.check(&join(mem_wide, b"<", b""), true); + checker.check(&join(mem_wide, b"<\0", b""), true); + checker.check(&join(mem_wide, b"a\0", b""), false); + let checker = build_checker(r"^a\x00b\x00", "ascii wide fullword"); let mem_ascii = b"a\0b\0"; let mem_wide = &to_wide(mem_ascii); @@ -413,7 +427,7 @@ fn test_variable_regex_wide_word_boundaries() { checker.check_libyara(b"\0a", false); // Check word boundary at start - let checker = build_checker(r"\ba", "wide"); + let checker = build_checker(r"\ba+", "wide"); checker.check(b"", false); checker.check(b"a", false); checker.check(b"a\0", true); @@ -439,7 +453,7 @@ fn test_variable_regex_wide_word_boundaries() { checker.check(b"b\ra\0", false); // Check word boundary at end - let checker = build_checker(r"a\b", "wide"); + let checker = build_checker(r"a+\b", "wide"); checker.check(b"", false); checker.check(b"a", false); checker.check(b"a\0", true); @@ -1893,3 +1907,26 @@ rule a { )], ); } + +#[test] +fn test_variable_no_literals() { + // Test a var with no literals extracted + let checker = Checker::new( + r#" +rule a { + strings: + $a = /a+(b|)c+/ + condition: + any of them +}"#, + ); + checker.check(b"a", false); + checker.check(b"b", false); + checker.check(b"ab", false); + checker.check(b"bc", false); + checker.check(b"abc", true); + checker.check(b"ac", true); + checker.check(b"aac", true); + checker.check(b"abccc", true); + checker.check(b"aabbc", false); +}