Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[libc++] Explicitly convert to masks in SIMD code #107983

Merged
merged 1 commit into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions libcxx/include/__algorithm/mismatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
}

for (size_t __i = 0; __i != __unroll_count; ++__i) {
if (auto __cmp_res = __lhs[__i] == __rhs[__i]; !std::__all_of(__cmp_res)) {
if (auto __cmp_res = std::__as_mask(__lhs[__i] == __rhs[__i]); !std::__all_of(__cmp_res)) {
auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
}
Expand All @@ -89,7 +89,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {

// check the remaining 0-3 vectors
while (static_cast<size_t>(__last1 - __first1) >= __vec_size) {
if (auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
if (auto __cmp_res = std::__as_mask(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2));
!std::__all_of(__cmp_res)) {
auto __offset = std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
Expand All @@ -106,8 +106,8 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
if (static_cast<size_t>(__first1 - __orig_first1) >= __vec_size) {
__first1 = __last1 - __vec_size;
__first2 = __last2 - __vec_size;
auto __offset =
std::__find_first_not_set(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2));
auto __offset = std::__find_first_not_set(
std::__as_mask(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2)));
return {__first1 + __offset, __first2 + __offset};
} // else loop over the elements individually
}
Expand Down
77 changes: 50 additions & 27 deletions libcxx/include/__algorithm/simd_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,42 +116,65 @@ template <class _VecT, class _Iter>
}(make_index_sequence<__simd_vector_size_v<_VecT>>{});
}

template <class _Tp, size_t _Np>
[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept {
return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector<bool, _Np>));
template <size_t _Np>
[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<bool, _Np> __vec) noexcept {
return __builtin_reduce_and(__vec);
}

template <class _Tp, size_t _Np>
[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept {
using __mask_vec = __simd_vector<bool, _Np>;
[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI auto __as_mask(__simd_vector<_Tp, _Np> __vec) noexcept {
static_assert(!is_same<_Tp, bool>::value, "vector type should not be a bool!");
return __builtin_convertvector(__vec, __simd_vector<bool, _Np>);
}

// This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876
auto __impl = [&]<class _MaskT>(_MaskT) _LIBCPP_NO_SANITIZE("memory") noexcept {
# if defined(_LIBCPP_BIG_ENDIAN)
return std::min<size_t>(
_Np, std::__countl_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec))));
# else
return std::min<size_t>(
_Np, std::__countr_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec))));
# endif
};

if constexpr (sizeof(__mask_vec) == sizeof(uint8_t)) {
return __impl(uint8_t{});
} else if constexpr (sizeof(__mask_vec) == sizeof(uint16_t)) {
return __impl(uint16_t{});
} else if constexpr (sizeof(__mask_vec) == sizeof(uint32_t)) {
return __impl(uint32_t{});
} else if constexpr (sizeof(__mask_vec) == sizeof(uint64_t)) {
return __impl(uint64_t{});
// This uses __builtin_convertvector around the __builtin_shufflevector to work around #107981.
template <size_t _Np>
[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI __simd_vector<bool, 8>
__extend_vector(__simd_vector<bool, _Np> __vec) noexcept {
using _VecT = __simd_vector<bool, _Np>;
if constexpr (_Np == 4) {
return __builtin_convertvector(
__builtin_shufflevector(__vec, _VecT{}, 0, 1, 2, 3, 4, 5, 6, 7), __simd_vector<bool, 8>);
} else if constexpr (_Np == 2) {
return std::__extend_vector(
__builtin_convertvector(__builtin_shufflevector(__vec, _VecT{}, 0, 1, 2, 3), __simd_vector<bool, 4>));
} else if constexpr (_Np == 1) {
return std::__extend_vector(
__builtin_convertvector(__builtin_shufflevector(__vec, _VecT{}, 0, 1), __simd_vector<bool, 2>));
} else {
static_assert(sizeof(__mask_vec) == 0, "unexpected required size for mask integer type");
static_assert(sizeof(_VecT) == 0, "Unexpected vector size");
}
}

template <size_t _Np>
[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI auto __to_int_mask(__simd_vector<bool, _Np> __vec) {
if constexpr (_Np < 8) {
return std::__bit_cast<uint8_t>(std::__extend_vector(__vec));
} else if constexpr (_Np == 8) {
return std::__bit_cast<uint8_t>(__vec);
} else if constexpr (_Np == 16) {
return std::__bit_cast<uint16_t>(__vec);
} else if constexpr (_Np == 32) {
return std::__bit_cast<uint32_t>(__vec);
} else if constexpr (_Np == 64) {
return std::__bit_cast<uint64_t>(__vec);
} else {
static_assert(sizeof(__simd_vector<bool, _Np>) == 0, "Unexpected vector size");
return 0;
}
}

template <class _Tp, size_t _Np>
[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<_Tp, _Np> __vec) noexcept {
template <size_t _Np>
[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<bool, _Np> __vec) noexcept {
# if defined(_LIBCPP_BIG_ENDIAN)
return std::min<size_t>(_Np, std::__countl_zero(std::__to_int_mask(__vec)));
# else
return std::min<size_t>(_Np, std::__countr_zero(std::__to_int_mask(__vec)));
# endif
}

template <size_t _Np>
[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<bool, _Np> __vec) noexcept {
return std::__find_first_set(~__vec);
}

Expand Down
Loading