[libc++] Explicitly convert to masks in SIMD code #107983

philnik777 · 2024-09-10T08:21:22Z

This makes it clearer when we use masks and avoids MSan complaining.

llvmbot · 2024-09-17T10:05:26Z

@llvm/pr-subscribers-libcxx

Author: Nikolas Klauser (philnik777)

Changes

This makes it clearer when we use masks and avoids MSan complaining.

Full diff: https://github.com/llvm/llvm-project/pull/107983.diff

2 Files Affected:

(modified) libcxx/include/__algorithm/mismatch.h (+4-4)
(modified) libcxx/include/__algorithm/simd_utils.h (+50-27)

diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h
index 0fae7f6e3fe323..043901d32798aa 100644
--- a/libcxx/include/__algorithm/mismatch.h
+++ b/libcxx/include/__algorithm/mismatch.h
@@ -77,7 +77,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
       }
 
       for (size_t __i = 0; __i != __unroll_count; ++__i) {
-        if (auto __cmp_res = __lhs[__i] == __rhs[__i]; !std::__all_of(__cmp_res)) {
+        if (auto __cmp_res = std::__as_mask(__lhs[__i] == __rhs[__i]); !std::__all_of(__cmp_res)) {
           auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res);
           return {__first1 + __offset, __first2 + __offset};
         }
@@ -89,7 +89,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
 
     // check the remaining 0-3 vectors
     while (static_cast<size_t>(__last1 - __first1) >= __vec_size) {
-      if (auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
+      if (auto __cmp_res = std::__as_mask(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2));
           !std::__all_of(__cmp_res)) {
         auto __offset = std::__find_first_not_set(__cmp_res);
         return {__first1 + __offset, __first2 + __offset};
@@ -106,8 +106,8 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
     if (static_cast<size_t>(__first1 - __orig_first1) >= __vec_size) {
       __first1 = __last1 - __vec_size;
       __first2 = __last2 - __vec_size;
-      auto __offset =
-          std::__find_first_not_set(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2));
+      auto __offset = std::__find_first_not_set(
+          std::__as_mask(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2)));
       return {__first1 + __offset, __first2 + __offset};
     } // else loop over the elements individually
   }
diff --git a/libcxx/include/__algorithm/simd_utils.h b/libcxx/include/__algorithm/simd_utils.h
index 56518dafa3193b..ec9840f60d87c0 100644
--- a/libcxx/include/__algorithm/simd_utils.h
+++ b/libcxx/include/__algorithm/simd_utils.h
@@ -116,42 +116,65 @@ template <class _VecT, class _Iter>
   }(make_index_sequence<__simd_vector_size_v<_VecT>>{});
 }
 
-template <class _Tp, size_t _Np>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept {
-  return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector<bool, _Np>));
+template <size_t _Np>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<bool, _Np> __vec) noexcept {
+  return __builtin_reduce_and(__vec);
 }
 
 template <class _Tp, size_t _Np>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept {
-  using __mask_vec = __simd_vector<bool, _Np>;
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI auto __as_mask(__simd_vector<_Tp, _Np> __vec) noexcept {
+  static_assert(!is_same<_Tp, bool>::value, "vector type should not be a bool!");
+  return __builtin_convertvector(__vec, __simd_vector<bool, _Np>);
+}
 
-  // This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876
-  auto __impl = [&]<class _MaskT>(_MaskT) _LIBCPP_NO_SANITIZE("memory") noexcept {
-#  if defined(_LIBCPP_BIG_ENDIAN)
-    return std::min<size_t>(
-        _Np, std::__countl_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec))));
-#  else
-    return std::min<size_t>(
-        _Np, std::__countr_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec))));
-#  endif
-  };
-
-  if constexpr (sizeof(__mask_vec) == sizeof(uint8_t)) {
-    return __impl(uint8_t{});
-  } else if constexpr (sizeof(__mask_vec) == sizeof(uint16_t)) {
-    return __impl(uint16_t{});
-  } else if constexpr (sizeof(__mask_vec) == sizeof(uint32_t)) {
-    return __impl(uint32_t{});
-  } else if constexpr (sizeof(__mask_vec) == sizeof(uint64_t)) {
-    return __impl(uint64_t{});
+// This uses __builtin_convertvector around the __builtin_shufflevector to work around #107981.
+template <size_t _Np>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI __simd_vector<bool, 8>
+__extend_vector(__simd_vector<bool, _Np> __vec) noexcept {
+  using _VecT = __simd_vector<bool, _Np>;
+  if constexpr (_Np == 4) {
+    return __builtin_convertvector(
+        __builtin_shufflevector(__vec, _VecT{}, 0, 1, 2, 3, 4, 5, 6, 7), __simd_vector<bool, 8>);
+  } else if constexpr (_Np == 2) {
+    return std::__extend_vector(
+        __builtin_convertvector(__builtin_shufflevector(__vec, _VecT{}, 0, 1, 2, 3), __simd_vector<bool, 4>));
+  } else if constexpr (_Np == 1) {
+    return std::__extend_vector(
+        __builtin_convertvector(__builtin_shufflevector(__vec, _VecT{}, 0, 1), __simd_vector<bool, 2>));
   } else {
-    static_assert(sizeof(__mask_vec) == 0, "unexpected required size for mask integer type");
+    static_assert(sizeof(_VecT) == 0, "Unexpected vector size");
+  }
+}
+
+template <size_t _Np>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI auto __to_int_mask(__simd_vector<bool, _Np> __vec) {
+  if constexpr (_Np < 8) {
+    return std::__bit_cast<uint8_t>(std::__extend_vector(__vec));
+  } else if constexpr (_Np == 8) {
+    return std::__bit_cast<uint8_t>(__vec);
+  } else if constexpr (_Np == 16) {
+    return std::__bit_cast<uint16_t>(__vec);
+  } else if constexpr (_Np == 32) {
+    return std::__bit_cast<uint32_t>(__vec);
+  } else if constexpr (_Np == 64) {
+    return std::__bit_cast<uint64_t>(__vec);
+  } else {
+    static_assert(sizeof(__simd_vector<bool, _Np>) == 0, "Unexpected vector size");
     return 0;
   }
 }
 
-template <class _Tp, size_t _Np>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<_Tp, _Np> __vec) noexcept {
+template <size_t _Np>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<bool, _Np> __vec) noexcept {
+#  if defined(_LIBCPP_BIG_ENDIAN)
+  return std::min<size_t>(_Np, std::__countl_zero(std::__to_int_mask(__vec)));
+#  else
+  return std::min<size_t>(_Np, std::__countr_zero(std::__to_int_mask(__vec)));
+#  endif
+}
+
+template <size_t _Np>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<bool, _Np> __vec) noexcept {
   return std::__find_first_set(~__vec);
 }

vitalybuka · 2024-09-17T16:01:02Z

Probably unrelated, this is the only algorithm patch in the range https://lab.llvm.org/buildbot/#/builders/85/builds/1657/steps/11/logs/stdio

This reverts commit 1603f99. Reason: buildbot breakage e.g., https://lab.llvm.org/buildbot/#/builders/55/builds/2061 llvm-libc++-shared.cfg.in :: std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp llvm-libc++-shared.cfg.in :: std/algorithms/alg.nonmodifying/mismatch/mismatch.pass.cpp llvm-libc++-shared.cfg.in :: std/algorithms/alg.nonmodifying/mismatch/ranges_mismatch.pass.cpp ... (Buildbot re-run passed with the previous revision, 1fc288b)

This makes it clearer when we use masks and avoids MSan complaining.

This reverts commit 1603f99. Reason: buildbot breakage e.g., https://lab.llvm.org/buildbot/#/builders/55/builds/2061 llvm-libc++-shared.cfg.in :: std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp llvm-libc++-shared.cfg.in :: std/algorithms/alg.nonmodifying/mismatch/mismatch.pass.cpp llvm-libc++-shared.cfg.in :: std/algorithms/alg.nonmodifying/mismatch/ranges_mismatch.pass.cpp ... (Buildbot re-run passed with the previous revision, 1fc288b)

philnik777 force-pushed the explicit_mask_conversion branch from b40d7cd to b81af54 Compare September 11, 2024 06:54

philnik777 changed the title ~~[libc++] Explicitly convert to masks~~ [libc++] Explicitly convert to masks in SIMD code Sep 11, 2024

philnik777 force-pushed the explicit_mask_conversion branch 2 times, most recently from 7007f4d to 3ea421a Compare September 13, 2024 11:11

[libc++] Explicitly convert to masks

01b1db7

philnik777 force-pushed the explicit_mask_conversion branch from 3ea421a to 01b1db7 Compare September 14, 2024 08:30

philnik777 marked this pull request as ready for review September 17, 2024 10:04

philnik777 requested a review from a team as a code owner September 17, 2024 10:04

philnik777 merged commit 1603f99 into llvm:main Sep 17, 2024
63 checks passed

philnik777 deleted the explicit_mask_conversion branch September 17, 2024 10:04

llvmbot added the libc++ libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi. label Sep 17, 2024

tmsri pushed a commit to tmsri/llvm-project that referenced this pull request Sep 19, 2024

[libc++] Explicitly convert to masks in SIMD code (llvm#107983)

c68e40b

This makes it clearer when we use masks and avoids MSan complaining.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[libc++] Explicitly convert to masks in SIMD code #107983

[libc++] Explicitly convert to masks in SIMD code #107983

philnik777 commented Sep 10, 2024 •

edited

Loading

llvmbot commented Sep 17, 2024

vitalybuka commented Sep 17, 2024

[libc++] Explicitly convert to masks in SIMD code #107983

[libc++] Explicitly convert to masks in SIMD code #107983

Conversation

philnik777 commented Sep 10, 2024 • edited Loading

llvmbot commented Sep 17, 2024

vitalybuka commented Sep 17, 2024

philnik777 commented Sep 10, 2024 •

edited

Loading