-
Notifications
You must be signed in to change notification settings - Fork 11.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ASan][libc++] Turn on ASan annotations for short strings (#79049)
Originally merged here: #75882 Reverted here: #78627 Reverted due to failing buildbots. The problem was not caused by the annotations code, but by code in the `UniqueFunctionBase` class and in the `JSON.h` file. That code caused the program to write to memory that was already being used by string objects, which resulted in an ASan error. Fixes are implemented in: - #79065 - #79066 Problematic code from `UniqueFunctionBase` for example: ```cpp #ifndef NDEBUG // In debug builds, we also scribble across the rest of the storage. memset(RHS.getInlineStorage(), 0xAD, InlineStorageSize); #endif ``` --- Original description: This commit turns on ASan annotations in `std::basic_string` for short stings (SSO case). Originally suggested here: https://reviews.llvm.org/D147680 String annotations added here: #72677 Requires to pass CI without fails: - #75845 - #75858 Annotating `std::basic_string` with default allocator is implemented in #72677 but annotations for short strings (SSO - Short String Optimization) are turned off there. This commit turns them on. This also removes `_LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED`, because we do not plan to support turning on and off short string annotations. Support in ASan API exists since dd1b7b7. You can turn off annotations for a specific allocator based on changes from 2fa1bec. This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in `std::vector` and `std::deque` collections. These enhancements empower ASan to effectively detect instances where the instrumented program attempts to access memory within a collection's internal allocation that remains unused. This includes cases where access occurs before or after the stored elements in `std::deque`, or between the `std::basic_string`'s size (including the null terminator) and capacity bounds. The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the `std::equals` function. This function was taking iterators (`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison, using a custom comparison function. When the `iter1` object exceeded the length of `iter2`, an out-of-bounds read could occur on the `iter2` object. Container sanitization, upon enabling these annotations, would effectively identify and flag this potential vulnerability. If you have any questions, please email: [email protected] [email protected]
- Loading branch information
Tacet
authored
Jan 23, 2024
1 parent
c416b2e
commit cb528ec
Showing
5 changed files
with
429 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
182 changes: 182 additions & 0 deletions
182
libcxx/test/libcxx/containers/strings/basic.string/asan_deque_integration.pass.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
// REQUIRES: asan | ||
// UNSUPPORTED: c++03 | ||
|
||
#include <cassert> | ||
#include <string> | ||
#include <array> | ||
#include <deque> | ||
#include "test_macros.h" | ||
#include "asan_testing.h" | ||
#include "min_allocator.h" | ||
|
||
// This tests exists to check if strings work well with deque, as those | ||
// may be partialy annotated, we cannot simply call | ||
// is_double_ended_contiguous_container_asan_correct, as it assumes that | ||
// object memory inside is not annotated, so we check everything in a more careful way. | ||
|
||
template <typename D> | ||
void verify_inside(D const& d) { | ||
for (size_t i = 0; i < d.size(); ++i) { | ||
assert(is_string_asan_correct(d[i])); | ||
} | ||
} | ||
|
||
template <typename S, size_t N> | ||
S get_s(char c) { | ||
S s; | ||
for (size_t i = 0; i < N; ++i) | ||
s.push_back(c); | ||
|
||
return s; | ||
} | ||
|
||
template <class C, class S> | ||
void test_string() { | ||
size_t const N = sizeof(S) < 256 ? (4096 / sizeof(S)) : 16; | ||
|
||
{ | ||
C d1a(1), d1b(N), d1c(N + 1), d1d(5 * N); | ||
verify_inside(d1a); | ||
verify_inside(d1b); | ||
verify_inside(d1c); | ||
verify_inside(d1d); | ||
} | ||
{ | ||
C d2; | ||
for (size_t i = 0; i < 3 * N + 2; ++i) { | ||
d2.push_back(get_s<S, 1>(i % 10 + 'a')); | ||
verify_inside(d2); | ||
d2.push_back(get_s<S, 22>(i % 10 + 'b')); | ||
verify_inside(d2); | ||
|
||
d2.pop_front(); | ||
verify_inside(d2); | ||
} | ||
} | ||
{ | ||
C d3; | ||
for (size_t i = 0; i < 3 * N + 2; ++i) { | ||
d3.push_front(get_s<S, 1>(i % 10 + 'a')); | ||
verify_inside(d3); | ||
d3.push_front(get_s<S, 28>(i % 10 + 'b')); | ||
verify_inside(d3); | ||
|
||
d3.pop_back(); | ||
verify_inside(d3); | ||
} | ||
} | ||
{ | ||
C d4; | ||
for (size_t i = 0; i < 3 * N + 2; ++i) { | ||
// When there is no SSO, all elements inside should not be poisoned, | ||
// so we can verify deque poisoning. | ||
d4.push_front(get_s<S, 33>(i % 10 + 'a')); | ||
verify_inside(d4); | ||
assert(is_double_ended_contiguous_container_asan_correct(d4)); | ||
d4.push_back(get_s<S, 28>(i % 10 + 'b')); | ||
verify_inside(d4); | ||
assert(is_double_ended_contiguous_container_asan_correct(d4)); | ||
} | ||
} | ||
{ | ||
C d5; | ||
for (size_t i = 0; i < 3 * N + 2; ++i) { | ||
// In d4 we never had poisoned memory inside deque. | ||
// Here we start with SSO, so part of the inside of the container, | ||
// will be poisoned. | ||
d5.push_front(S()); | ||
verify_inside(d5); | ||
} | ||
for (size_t i = 0; i < d5.size(); ++i) { | ||
// We change the size to have long string. | ||
// Memory owne by deque should not be poisoned by string. | ||
d5[i].resize(100); | ||
verify_inside(d5); | ||
} | ||
|
||
assert(is_double_ended_contiguous_container_asan_correct(d5)); | ||
|
||
d5.erase(d5.begin() + 2); | ||
verify_inside(d5); | ||
|
||
d5.erase(d5.end() - 2); | ||
verify_inside(d5); | ||
|
||
assert(is_double_ended_contiguous_container_asan_correct(d5)); | ||
} | ||
{ | ||
C d6a; | ||
assert(is_double_ended_contiguous_container_asan_correct(d6a)); | ||
|
||
C d6b(N + 2, get_s<S, 100>('a')); | ||
d6b.push_front(get_s<S, 101>('b')); | ||
while (!d6b.empty()) { | ||
d6b.pop_back(); | ||
assert(is_double_ended_contiguous_container_asan_correct(d6b)); | ||
} | ||
|
||
C d6c(N + 2, get_s<S, 102>('c')); | ||
while (!d6c.empty()) { | ||
d6c.pop_back(); | ||
assert(is_double_ended_contiguous_container_asan_correct(d6c)); | ||
} | ||
} | ||
{ | ||
C d7(9 * N + 2); | ||
|
||
d7.insert(d7.begin() + 1, S()); | ||
verify_inside(d7); | ||
|
||
d7.insert(d7.end() - 3, S()); | ||
verify_inside(d7); | ||
|
||
d7.insert(d7.begin() + 2 * N, get_s<S, 1>('a')); | ||
verify_inside(d7); | ||
|
||
d7.insert(d7.end() - 2 * N, get_s<S, 1>('b')); | ||
verify_inside(d7); | ||
|
||
d7.insert(d7.begin() + 2 * N, 3 * N, get_s<S, 1>('c')); | ||
verify_inside(d7); | ||
|
||
// It may not be short for big element types, but it will be checked correctly: | ||
d7.insert(d7.end() - 2 * N, 3 * N, get_s<S, 2>('d')); | ||
verify_inside(d7); | ||
|
||
d7.erase(d7.begin() + 2); | ||
verify_inside(d7); | ||
|
||
d7.erase(d7.end() - 2); | ||
verify_inside(d7); | ||
} | ||
} | ||
|
||
template <class S> | ||
void test_container() { | ||
test_string<std::deque<S, std::allocator<S>>, S>(); | ||
test_string<std::deque<S, min_allocator<S>>, S>(); | ||
test_string<std::deque<S, safe_allocator<S>>, S>(); | ||
} | ||
|
||
int main(int, char**) { | ||
// Those tests support only types based on std::basic_string. | ||
test_container<std::string>(); | ||
test_container<std::wstring>(); | ||
#if TEST_STD_VER >= 11 | ||
test_container<std::u16string>(); | ||
test_container<std::u32string>(); | ||
#endif | ||
#if TEST_STD_VER >= 20 | ||
test_container<std::u8string>(); | ||
#endif | ||
|
||
return 0; | ||
} |
56 changes: 56 additions & 0 deletions
56
libcxx/test/libcxx/containers/strings/basic.string/asan_short.pass.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
// REQUIRES: asan | ||
// UNSUPPORTED: c++03 | ||
|
||
// <string> | ||
|
||
// Basic test if ASan annotations work for short strings. | ||
|
||
#include <string> | ||
#include <cassert> | ||
#include <cstdlib> | ||
|
||
#include "asan_testing.h" | ||
#include "min_allocator.h" | ||
#include "test_iterators.h" | ||
#include "test_macros.h" | ||
|
||
extern "C" void __sanitizer_set_death_callback(void (*callback)(void)); | ||
|
||
void do_exit() { exit(0); } | ||
|
||
int main(int, char**) { | ||
{ | ||
typedef cpp17_input_iterator<char*> MyInputIter; | ||
// Should not trigger ASan. | ||
std::basic_string<char, std::char_traits<char>, safe_allocator<char>> v; | ||
char i[] = {'a', 'b', 'c', 'd'}; | ||
|
||
v.insert(v.begin(), MyInputIter(i), MyInputIter(i + 4)); | ||
assert(v[0] == 'a'); | ||
assert(is_string_asan_correct(v)); | ||
} | ||
|
||
__sanitizer_set_death_callback(do_exit); | ||
{ | ||
using T = char; | ||
using C = std::basic_string<T, std::char_traits<T>, safe_allocator<T>>; | ||
const T t[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g'}; | ||
C c(std::begin(t), std::end(t)); | ||
assert(is_string_asan_correct(c)); | ||
assert(__sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) != | ||
0); | ||
volatile T foo = c[c.size() + 1]; // should trigger ASAN. Use volatile to prevent being optimized away. | ||
assert(false); // if we got here, ASAN didn't trigger | ||
((void)foo); | ||
} | ||
|
||
return 0; | ||
} |
Oops, something went wrong.
cb528ec
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reverted in a16f81f due to buildbot breakage (https://lab.llvm.org/buildbot/#/builders/5/builds/40364):