From 6b31d576df2f0dc1b9b0fec460b4af35fe340d51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krist=C3=B3f=20Umann?= Date: Thu, 4 Jan 2024 15:44:54 +0100 Subject: [PATCH] Progress --- .../analyzers/sanitizers/address/parser.py | 77 +++++++++------- .../analyzers/sanitizers/ub/parser.py | 90 ++++++++++--------- .../asan_output_test_files/asan.plist | 2 +- .../tests/unit/analyzers/test_asan_parser.py | 23 ++++- 4 files changed, 114 insertions(+), 78 deletions(-) diff --git a/tools/report-converter/codechecker_report_converter/analyzers/sanitizers/address/parser.py b/tools/report-converter/codechecker_report_converter/analyzers/sanitizers/address/parser.py index 338b33607c..46f927b999 100644 --- a/tools/report-converter/codechecker_report_converter/analyzers/sanitizers/address/parser.py +++ b/tools/report-converter/codechecker_report_converter/analyzers/sanitizers/address/parser.py @@ -29,64 +29,80 @@ class Parser(SANParser): # Checker message. r'(?P[\S \t]+)') + # Sources: https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/ + # sanitizer_common/sanitizer_allocator_report.cpp + # asan/asan_errors.h + # asan/asan_errors.cpp + # -> both inside ErrorGeneric::ErrorGeneric() and + # ErrorStringFunction.*::Print() check_msg_pairs = { "double-free": re.compile( - "attempting double-free on .+ in thread .+:"), + "attempting double-free on .* in thread .*:"), "new-delete-type-mismatch": re.compile( - "new-delete-type-mismatch on .+ in thread .+:"), + "new-delete-type-mismatch on .* in thread .*:"), "bad-free": re.compile( - "attempting free on address which was not malloc()-ed: .+ in " - "thread .+"), + "attempting free on address which was not malloc()-ed: .* in " + "thread .*"), "alloc-dealloc-mismatch": re.compile( - "alloc-dealloc-mismatch (.+ vs .+) on .+"), + "alloc-dealloc-mismatch (.* vs .*) on .*"), "bad-malloc_usable_size": re.compile( "attempting to call malloc_usable_size() for pointer which is not " - "not owned: .+"), + "not owned: .*"), "bad-__sanitizer_get_allocated_size": re.compile( "attempting to call __sanitizer_get_allocated_size() for pointer " - "which is not owned: .+"), + "which is not owned: .*"), "calloc-overflow": re.compile( - "calloc parameters overflow: count * size (.+ * .+) cannot be " - "represented in type size_t (thread .+)"), + "calloc parameters overflow: count * size (.* * .*) cannot be " + "represented in type size_t (thread .*)"), "reallocarray-overflow": re.compile( - "reallocarray parameters overflow: count * size (.+ * .+) cannot " - "be represented in type size_t (thread .+)"), + "reallocarray parameters overflow: count * size (.* * .*) cannot " + "be represented in type size_t (thread .*)"), "pvalloc-overflow": re.compile( - "pvalloc parameters overflow: size 0x.+ rounded up to system page " - "size 0x.+ cannot be represented in type size_t (thread .+)"), + "pvalloc parameters overflow: size 0x.* rounded up to system page " + "size 0x.* cannot be represented in type size_t (thread .*)"), "invalid-allocation-alignment": re.compile( - "invalid allocation alignment: .+, alignment must be a power of " - "two (thread .+)"), + "invalid allocation alignment: .*, alignment must be a power of " + "two (thread .*)"), "invalid-aligned-alloc-alignment": re.compile( - "invalid alignment requested in aligned_alloc: .+ the requested " - "size 0x.+ must be a multiple of alignment (thread .+)"), + "invalid alignment requested in aligned_alloc: .* the requested " + "size 0x.* must be a multiple of alignment (thread .*)"), "invalid-posix-memalign-alignment": re.compile( - "invalid alignment requested in posix_memalign: .+, alignment " - "must be a power of two and a multiple of sizeof(void*) == .+ " - "(thread .+)"), + "invalid alignment requested in posix_memalign: .*, alignment " + "must be a power of two and a multiple of sizeof(void*) == .* " + "(thread .*)"), "allocation-size-too-big": re.compile( - "requested allocation size 0x.+ (0x.+ after adjustments for " + "requested allocation size 0x.* (0x.* after adjustments for " "alignment, red zones etc.) exceeds maximum supported size of " - "0x.+ (thread .+)"), + "0x.* (thread .*)"), "rss-limit-exceeded": re.compile( "specified RSS limit exceeded, currently set to " - "soft_rss_limit_mb=.+"), + "soft_rss_limit_mb=.*"), "out-of-memory": re.compile( - "allocator is trying to allocate 0x.+ bytes"), + "allocator is trying to allocate 0x.* bytes"), + # This is not a check name in LLVM. "string-function-memory-ranges-overlap": re.compile( - ".+: memory ranges .+ and .+ overlap"), + ".*: memory ranges .* and .* overlap"), "negative-size-param": re.compile( - "negative-size-param: (size=.+)"), + "negative-size-param: (size=.*)"), "bad-__sanitizer_annotate_contiguous_container": re.compile( "bad parameters to __sanitizer_annotate_contiguous_container:"), "bad-__sanitizer_annotate_double_ended_contiguous_container": re.compile( "bad parameters to " "__sanitizer_annotate_double_ended_contiguous_container:"), - "odr-violation": re.compile("odr-violation (.+):"), - "invalid-pointer-pair": re.compile("invalid-pointer-pair: .+ .+") + "odr-violation": re.compile("odr-violation (.*):"), + "invalid-pointer-pair": re.compile("invalid-pointer-pair: .* .*"), + "reallocarray-overflow": re.compile( + r"count \* size (.* \* .*) cannot be represented in type size_t"), + "generic-error": re.compile(".* on address .* at pc .* bp .* sp .*") } + def deduce_checker_name(self, message: str) -> str: + for check, pattern in self.check_msg_pairs.items(): + if pattern.search(message): + return f"{self.checker_name}.{check}" + return self.checker_name + def parse_sanitizer_message( self, it: Iterator[str], @@ -97,9 +113,6 @@ def parse_sanitizer_message( if not report: return report, line - for check, pattern in self.check_msg_pairs.items(): - if pattern.search(report.message): - report.checker_name = f"{self.checker_name}.{check}" - break + report.checker_name = self.deduce_checker_name(report.message) return report, line diff --git a/tools/report-converter/codechecker_report_converter/analyzers/sanitizers/ub/parser.py b/tools/report-converter/codechecker_report_converter/analyzers/sanitizers/ub/parser.py index 65bec1b221..403f20ad00 100644 --- a/tools/report-converter/codechecker_report_converter/analyzers/sanitizers/ub/parser.py +++ b/tools/report-converter/codechecker_report_converter/analyzers/sanitizers/ub/parser.py @@ -49,107 +49,107 @@ class Parser(SANParser): checks = { "alignment": [ re.compile( - ".+ misaligned address .+ for type .+, which requires .+ byte " + ".* misaligned address .* for type .*, which requires .* byte " "alignment"), re.compile( - "assumption of .+ byte alignment for pointer of type .+ " + "assumption of .* byte alignment for pointer of type .* " "failed"), re.compile( - "assumption of .+ byte alignment (with offset of .+ byte) for " - "pointer of type .+ failed")], + "assumption of .* byte alignment (with offset of .* byte) for " + "pointer of type .* failed")], "bool": [ re.compile( - "load of value .+, which is not a valid value for type " + "load of value .*, which is not a valid value for type " ".*bool.*")], "builtin": [ - re.compile("passing zero to .+, which is not a valid argument")], + re.compile("passing zero to .*, which is not a valid argument")], "bounds": [ - re.compile("index .+ out of bounds for type .+")], + re.compile("index .* out of bounds for type .*")], "enum": [ re.compile( - "load of value .+, which is not a valid value for type .+")], + "load of value .*, which is not a valid value for type .*")], "float-cast-overflow": [ re.compile( - ".+ is outside the range of representable values of type .+")], + ".* is outside the range of representable values of type .*")], "integer-divide-by-zero-or-float-divide-by-zero": [ re.compile("division by zero")], "implicit-signed-integer-truncation": [ re.compile( - "implicit conversion from type .+ of value .+ (.+-bit, " - "signed) to type .+ changed the value to .+ (.+-bit, " + "implicit conversion from type .* of value .* (.*-bit, " + "signed) to type .* changed the value to .* (.*-bit, " "signed)")], "implicit-unsigned-integer-truncation": [ re.compile( - "implicit conversion from type .+ of value .+ (.+-bit, " - "unsigned) to type .+ changed the value to .+ (.+-bit, " + "implicit conversion from type .* of value .* (.*-bit, " + "unsigned) to type .* changed the value to .* (.*-bit, " "unsigned)")], "implicit-integer-sign-change": [ re.compile( - "implicit conversion from type .+ of value .+ (.+-bit, " - ".*signed) to type .+ changed the value to .+ (.+-bit, " + "implicit conversion from type .* of value .* (.*-bit, " + ".*signed) to type .* changed the value to .* (.*-bit, " ".*signed)")], "nonnull-attribute-or-nullability-arg": [ re.compile( - "null pointer passed as argument .+, which is declared to " + "null pointer passed as argument .*, which is declared to " "never be null")], "null-or-nullability-assign": [ - re.compile(".+ null pointer of type .+")], + re.compile(".* null pointer of type .*")], "nullability-return-or-returns-nonnull-attribute": [ re.compile( "null pointer returned from function declared to never return " "null")], "objc-cast": [ re.compile( - "invalid ObjC cast, object is a '.+', but expected a .+")], + "invalid ObjC cast, object is a '.*', but expected a .*")], "object-size": [ re.compile( - ".+ address .+ with insufficient space for an object of type " - ".+")], + ".* address .* with insufficient space for an object of type " + ".*")], "pointer-overflow": [ re.compile("applying zero offset to null pointer"), - re.compile("applying non-zero offset .+ to null pointer"), + re.compile("applying non-zero offset .* to null pointer"), re.compile( - "applying non-zero offset to non-null pointer .+ produced " + "applying non-zero offset to non-null pointer .* produced " "null pointer"), - re.compile("addition of unsigned offset to .+ overflowed to .+"), + re.compile("addition of unsigned offset to .* overflowed to .*"), re.compile( - "subtraction of unsigned offset from .+ overflowed to .+"), + "subtraction of unsigned offset from .* overflowed to .*"), re.compile( - "pointer index expression with base .+ overflowed to .+")], + "pointer index expression with base .* overflowed to .*")], "return": [ re.compile( "execution reached the end of a value-returning function " "without returning a value")], "shift": [ - re.compile("shift exponent .+ is negative"), - re.compile("shift exponent .+ is too large for .+-bit type .+"), - re.compile("left shift of negative value .+")], + re.compile("shift exponent .* is negative"), + re.compile("shift exponent .* is too large for .*-bit type .*"), + re.compile("left shift of negative value .*")], "signed-integer-overflow": [ re.compile( - "signed integer overflow: .+ .+ .+ cannot be represented in " - "type .+"), + "signed integer overflow: .* .* .* cannot be represented in " + "type .*"), re.compile( - "negation of .+ cannot be represented in type .+; cast to an " + "negation of .* cannot be represented in type .*; cast to an " "unsigned type to negate this value to itself"), re.compile( - "division of .+ by -1 cannot be represented in type .+")], + "division of .* by -1 cannot be represented in type .*")], "unreachable": [ re.compile("execution reached an unreachable program point")], "unsigned-integer-overflow": [ re.compile( - "unsigned integer overflow: .+ .+ .+ cannot be represented " - "in type .+"), - re.compile("negation of .+ cannot be represented in type .+"), + "unsigned integer overflow: .* .* .* cannot be represented " + "in type .*"), + re.compile("negation of .* cannot be represented in type .*"), re.compile( - "left shift of .+ by .+ places cannot be represented in type " - ".+")], + "left shift of .* by .* places cannot be represented in type " + ".*")], "vla-bound": [ re.compile( "variable length array bound evaluates to non-positive value " - ".+")], + ".*")], "vptr": [ re.compile( - ".+ address .+ which does not point to an object of type .+")] + ".* address .* which does not point to an object of type .*")] } def parse_stack_trace(self, it, line): @@ -169,6 +169,12 @@ def parse_stack_trace(self, it, line): return stack_traces, events, line + def deduce_checker_name(self, message: str) -> str: + for check, patterns in self.checks.items(): + if any(pattern.search(message) for pattern in patterns): + return f"{self.checker_name}.{check}" + return self.checker_name + def parse_sanitizer_message( self, it: Iterable[str], @@ -188,11 +194,7 @@ def parse_sanitizer_message( stack_traces, events, line = self.parse_stack_trace(it, line) message = match.group('message').strip() - checker_name = None - for check, patterns in self.checks.items(): - if any(pattern.search(message) for pattern in patterns): - checker_name = f"{self.checker_name}.{check}" - break + checker_name = self.deduce_checker_name(message) report = self.create_report( events, report_file, report_line, report_col, diff --git a/tools/report-converter/tests/unit/analyzers/asan_output_test_files/asan.plist b/tools/report-converter/tests/unit/analyzers/asan_output_test_files/asan.plist index 264a13f421..85a4dc8d50 100644 --- a/tools/report-converter/tests/unit/analyzers/asan_output_test_files/asan.plist +++ b/tools/report-converter/tests/unit/analyzers/asan_output_test_files/asan.plist @@ -8,7 +8,7 @@ category unknown check_name - AddressSanitizer + AddressSanitizer.generic-error description heap-use-after-free on address 0x614000000044 at pc 0x0000004f4b45 bp 0x7ffd40559120 sp 0x7ffd40559118 issue_hash_content_of_line_in_context diff --git a/tools/report-converter/tests/unit/analyzers/test_asan_parser.py b/tools/report-converter/tests/unit/analyzers/test_asan_parser.py index d8b1af2b63..d6888bbe8d 100644 --- a/tools/report-converter/tests/unit/analyzers/test_asan_parser.py +++ b/tools/report-converter/tests/unit/analyzers/test_asan_parser.py @@ -18,7 +18,7 @@ import unittest from codechecker_report_converter.analyzers.sanitizers.address import \ - analyzer_result + analyzer_result, parser from codechecker_report_converter.report.parser import plist OLD_PWD = None @@ -43,6 +43,7 @@ class ASANAnalyzerResultTestCase(unittest.TestCase): def setUp(self): """ Setup the test. """ self.analyzer_result = analyzer_result.AnalyzerResult() + self.parser = parser.Parser() self.cc_result_dir = tempfile.mkdtemp() def tearDown(self): @@ -69,3 +70,23 @@ def test_asan(self): res['metadata']['generated_by']['version'] = "x.y.z" self.assertEqual(res, exp) + + def test_asan_checker_deduction(self): + checker_name = self.parser.deduce_checker_name( + "heap-use-after-free on address 0x614000000044 at" + "pc 0x0000004f4b45 bp 0x7ffd40559120 sp 0x7ffd40559118") + print(checker_name) + print(checker_name) + print(checker_name) + print(checker_name) + print(checker_name) + print(checker_name) + print(checker_name) + print(checker_name) + print(checker_name) + print(checker_name) + print(checker_name) + print(checker_name) + print(checker_name) + assert False + pass