-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc++] Explicitly convert to masks in SIMD code #107983
Merged
Merged
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
philnik777
force-pushed
the
explicit_mask_conversion
branch
from
September 11, 2024 06:54
b40d7cd
to
b81af54
Compare
philnik777
changed the title
[libc++] Explicitly convert to masks
[libc++] Explicitly convert to masks in SIMD code
Sep 11, 2024
philnik777
force-pushed
the
explicit_mask_conversion
branch
2 times, most recently
from
September 13, 2024 11:11
7007f4d
to
3ea421a
Compare
philnik777
force-pushed
the
explicit_mask_conversion
branch
from
September 14, 2024 08:30
3ea421a
to
01b1db7
Compare
llvmbot
added
the
libc++
libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi.
label
Sep 17, 2024
@llvm/pr-subscribers-libcxx Author: Nikolas Klauser (philnik777) ChangesThis makes it clearer when we use masks and avoids MSan complaining. Full diff: https://github.com/llvm/llvm-project/pull/107983.diff 2 Files Affected:
diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h
index 0fae7f6e3fe323..043901d32798aa 100644
--- a/libcxx/include/__algorithm/mismatch.h
+++ b/libcxx/include/__algorithm/mismatch.h
@@ -77,7 +77,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
}
for (size_t __i = 0; __i != __unroll_count; ++__i) {
- if (auto __cmp_res = __lhs[__i] == __rhs[__i]; !std::__all_of(__cmp_res)) {
+ if (auto __cmp_res = std::__as_mask(__lhs[__i] == __rhs[__i]); !std::__all_of(__cmp_res)) {
auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
}
@@ -89,7 +89,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
// check the remaining 0-3 vectors
while (static_cast<size_t>(__last1 - __first1) >= __vec_size) {
- if (auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
+ if (auto __cmp_res = std::__as_mask(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2));
!std::__all_of(__cmp_res)) {
auto __offset = std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
@@ -106,8 +106,8 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
if (static_cast<size_t>(__first1 - __orig_first1) >= __vec_size) {
__first1 = __last1 - __vec_size;
__first2 = __last2 - __vec_size;
- auto __offset =
- std::__find_first_not_set(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2));
+ auto __offset = std::__find_first_not_set(
+ std::__as_mask(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2)));
return {__first1 + __offset, __first2 + __offset};
} // else loop over the elements individually
}
diff --git a/libcxx/include/__algorithm/simd_utils.h b/libcxx/include/__algorithm/simd_utils.h
index 56518dafa3193b..ec9840f60d87c0 100644
--- a/libcxx/include/__algorithm/simd_utils.h
+++ b/libcxx/include/__algorithm/simd_utils.h
@@ -116,42 +116,65 @@ template <class _VecT, class _Iter>
}(make_index_sequence<__simd_vector_size_v<_VecT>>{});
}
-template <class _Tp, size_t _Np>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept {
- return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector<bool, _Np>));
+template <size_t _Np>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<bool, _Np> __vec) noexcept {
+ return __builtin_reduce_and(__vec);
}
template <class _Tp, size_t _Np>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept {
- using __mask_vec = __simd_vector<bool, _Np>;
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI auto __as_mask(__simd_vector<_Tp, _Np> __vec) noexcept {
+ static_assert(!is_same<_Tp, bool>::value, "vector type should not be a bool!");
+ return __builtin_convertvector(__vec, __simd_vector<bool, _Np>);
+}
- // This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876
- auto __impl = [&]<class _MaskT>(_MaskT) _LIBCPP_NO_SANITIZE("memory") noexcept {
-# if defined(_LIBCPP_BIG_ENDIAN)
- return std::min<size_t>(
- _Np, std::__countl_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec))));
-# else
- return std::min<size_t>(
- _Np, std::__countr_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec))));
-# endif
- };
-
- if constexpr (sizeof(__mask_vec) == sizeof(uint8_t)) {
- return __impl(uint8_t{});
- } else if constexpr (sizeof(__mask_vec) == sizeof(uint16_t)) {
- return __impl(uint16_t{});
- } else if constexpr (sizeof(__mask_vec) == sizeof(uint32_t)) {
- return __impl(uint32_t{});
- } else if constexpr (sizeof(__mask_vec) == sizeof(uint64_t)) {
- return __impl(uint64_t{});
+// This uses __builtin_convertvector around the __builtin_shufflevector to work around #107981.
+template <size_t _Np>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI __simd_vector<bool, 8>
+__extend_vector(__simd_vector<bool, _Np> __vec) noexcept {
+ using _VecT = __simd_vector<bool, _Np>;
+ if constexpr (_Np == 4) {
+ return __builtin_convertvector(
+ __builtin_shufflevector(__vec, _VecT{}, 0, 1, 2, 3, 4, 5, 6, 7), __simd_vector<bool, 8>);
+ } else if constexpr (_Np == 2) {
+ return std::__extend_vector(
+ __builtin_convertvector(__builtin_shufflevector(__vec, _VecT{}, 0, 1, 2, 3), __simd_vector<bool, 4>));
+ } else if constexpr (_Np == 1) {
+ return std::__extend_vector(
+ __builtin_convertvector(__builtin_shufflevector(__vec, _VecT{}, 0, 1), __simd_vector<bool, 2>));
} else {
- static_assert(sizeof(__mask_vec) == 0, "unexpected required size for mask integer type");
+ static_assert(sizeof(_VecT) == 0, "Unexpected vector size");
+ }
+}
+
+template <size_t _Np>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI auto __to_int_mask(__simd_vector<bool, _Np> __vec) {
+ if constexpr (_Np < 8) {
+ return std::__bit_cast<uint8_t>(std::__extend_vector(__vec));
+ } else if constexpr (_Np == 8) {
+ return std::__bit_cast<uint8_t>(__vec);
+ } else if constexpr (_Np == 16) {
+ return std::__bit_cast<uint16_t>(__vec);
+ } else if constexpr (_Np == 32) {
+ return std::__bit_cast<uint32_t>(__vec);
+ } else if constexpr (_Np == 64) {
+ return std::__bit_cast<uint64_t>(__vec);
+ } else {
+ static_assert(sizeof(__simd_vector<bool, _Np>) == 0, "Unexpected vector size");
return 0;
}
}
-template <class _Tp, size_t _Np>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<_Tp, _Np> __vec) noexcept {
+template <size_t _Np>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<bool, _Np> __vec) noexcept {
+# if defined(_LIBCPP_BIG_ENDIAN)
+ return std::min<size_t>(_Np, std::__countl_zero(std::__to_int_mask(__vec)));
+# else
+ return std::min<size_t>(_Np, std::__countr_zero(std::__to_int_mask(__vec)));
+# endif
+}
+
+template <size_t _Np>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<bool, _Np> __vec) noexcept {
return std::__find_first_set(~__vec);
}
|
Probably unrelated, this is the only algorithm patch in the range https://lab.llvm.org/buildbot/#/builders/85/builds/1657/steps/11/logs/stdio |
thurstond
added a commit
that referenced
this pull request
Sep 17, 2024
This reverts commit 1603f99. Reason: buildbot breakage e.g., https://lab.llvm.org/buildbot/#/builders/55/builds/2061 llvm-libc++-shared.cfg.in :: std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp llvm-libc++-shared.cfg.in :: std/algorithms/alg.nonmodifying/mismatch/mismatch.pass.cpp llvm-libc++-shared.cfg.in :: std/algorithms/alg.nonmodifying/mismatch/ranges_mismatch.pass.cpp ... (Buildbot re-run passed with the previous revision, 1fc288b)
hamphet
pushed a commit
to hamphet/llvm-project
that referenced
this pull request
Sep 18, 2024
This reverts commit 1603f99. Reason: buildbot breakage e.g., https://lab.llvm.org/buildbot/#/builders/55/builds/2061 llvm-libc++-shared.cfg.in :: std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp llvm-libc++-shared.cfg.in :: std/algorithms/alg.nonmodifying/mismatch/mismatch.pass.cpp llvm-libc++-shared.cfg.in :: std/algorithms/alg.nonmodifying/mismatch/ranges_mismatch.pass.cpp ... (Buildbot re-run passed with the previous revision, 1fc288b)
tmsri
pushed a commit
to tmsri/llvm-project
that referenced
this pull request
Sep 19, 2024
This makes it clearer when we use masks and avoids MSan complaining.
tmsri
pushed a commit
to tmsri/llvm-project
that referenced
this pull request
Sep 19, 2024
This reverts commit 1603f99. Reason: buildbot breakage e.g., https://lab.llvm.org/buildbot/#/builders/55/builds/2061 llvm-libc++-shared.cfg.in :: std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp llvm-libc++-shared.cfg.in :: std/algorithms/alg.nonmodifying/mismatch/mismatch.pass.cpp llvm-libc++-shared.cfg.in :: std/algorithms/alg.nonmodifying/mismatch/ranges_mismatch.pass.cpp ... (Buildbot re-run passed with the previous revision, 1fc288b)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
This makes it clearer when we use masks and avoids MSan complaining.