Passing all LIT tests

llvm · Sep 27, 2023 · 96adadf · 96adadf
1 parent 9447481
commit 96adadf
Show file tree

Hide file tree

Showing 5 changed files with 23 additions and 18 deletions.
diff --git a/libcxx/include/__algorithm/pstl_backends/gpu_backends/fill.h b/libcxx/include/__algorithm/pstl_backends/gpu_backends/fill.h
@@ -30,16 +30,18 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
 _LIBCPP_HIDE_FROM_ABI void
 __pstl_fill(__gpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
-  // It is only safe to execute for_each on the GPU, it the execution policy is
+  // It is only safe to execute fill on the GPU, it the execution policy is
   // parallel unsequenced, as it is the only execution policy prohibiting throwing
   // exceptions and allowing SIMD instructions
   if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
                 __has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
                 __libcpp_is_contiguous_iterator<_ForwardIterator>::value) {
-    return std::__par_backend::__parallel_for_simd_val_1(__first, __last - __first, __value);
+    std::__par_backend::__parallel_for_simd_val_1(__first, __last - __first, __value);
+  }
+  // Otherwise, we execute fill on the CPU instead
+  else {
+    std::__pstl_fill<_ExecutionPolicy>(__cpu_backend_tag{}, __first, __last, __value);
   }
-  // Otherwise, we execute for_each on the CPU instead
-  return std::__pstl_fill<_ExecutionPolicy>(__cpu_backend_tag{}, __first, __last, __value);
 }
 
 _LIBCPP_END_NAMESPACE_STD

diff --git a/libcxx/include/__algorithm/pstl_backends/gpu_backends/for_each.h b/libcxx/include/__algorithm/pstl_backends/gpu_backends/for_each.h
@@ -35,10 +35,12 @@ __pstl_for_each(__gpu_backend_tag, _ForwardIterator __first, _ForwardIterator __
   if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
                 __has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
                 __libcpp_is_contiguous_iterator<_ForwardIterator>::value) {
-    return std::__par_backend::__parallel_for_simd_1(__first, __last - __first, __func);
+    std::__par_backend::__parallel_for_simd_1(__first, __last - __first, __func);
+  }
+  // Else we fall back to the GPU backend
+  else {
+    std::__pstl_for_each<_ExecutionPolicy>(__cpu_backend_tag{}, __first, __last, __func);
   }
-  // Else if the excution policy is parallel, we execute for_each on the CPU instead
-  return std::__pstl_for_each<_ExecutionPolicy>(__cpu_backend_tag{}, __first, __last, __func);
 }
 
 _LIBCPP_END_NAMESPACE_STD

diff --git a/libcxx/include/__algorithm/pstl_backends/gpu_backends/stable_sort.h b/libcxx/include/__algorithm/pstl_backends/gpu_backends/stable_sort.h
@@ -28,7 +28,7 @@ template <class _ExecutionPolicy, class _RandomAccessIterator, class _Comp>
 _LIBCPP_HIDE_FROM_ABI void
 __pstl_stable_sort(__gpu_backend_tag, _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
   // TODO: Implement GPU backend.
-  return __pstl_stable_sort<_ExecutionPolicy>(__cpu_backend_tag{}, __first, __last, __comp);
+  __pstl_stable_sort<_ExecutionPolicy>(__cpu_backend_tag{}, __first, __last, __comp);
 }
 
 _LIBCPP_END_NAMESPACE_STD

diff --git a/libcxx/include/__algorithm/pstl_backends/gpu_backends/transform.h b/libcxx/include/__algorithm/pstl_backends/gpu_backends/transform.h
@@ -38,11 +38,12 @@ _LIBCPP_HIDE_FROM_ABI _ForwardOutIterator __pstl_transform(
   if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
                 __has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
                 __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value &&
-                __libcpp_is_contiguous_iterator<_ForwardIterator>::value) {
-    // While the CPU backend captures by reference, [&], that is not valid when
-    // offloading to the GPU. Therefore we must capture by value, [=].
-    return std::__par_backend::__parallel_for_simd_2(__first, __last - __first, __result, __op);
+                __libcpp_is_contiguous_iterator<_ForwardIterator>::value &&
+                __libcpp_is_contiguous_iterator<_ForwardOutIterator>::value) {
+    std::__par_backend::__parallel_for_simd_2(__first, __last - __first, __result, __op);
+    return __result + (__last - __first);
   }
+  // If it is not safe to offload to the GPU, we rely on the CPU backend.
   return std::__pstl_transform<_ExecutionPolicy>(__cpu_backend_tag{}, __first, __last, __result, __op);
 }
 
@@ -66,10 +67,10 @@ _LIBCPP_HIDE_FROM_ABI _ForwardOutIterator __pstl_transform(
                 __libcpp_is_contiguous_iterator<_ForwardIterator1>::value &&
                 __libcpp_is_contiguous_iterator<_ForwardIterator2>::value &&
                 __libcpp_is_contiguous_iterator<_ForwardOutIterator>::value) {
-    // While the CPU backend captures by reference, [&], that is not valid when
-    // offloading to the GPU. Therefore we must capture by value, [=].
-    return std::__par_backend::__parallel_for_simd_3(__first1, __last1 - __first1, __first2, __result, __op);
+    std::__par_backend::__parallel_for_simd_3(__first1, __last1 - __first1, __first2, __result, __op);
+    return __result + (__last1 - __first1);
   }
+  // If it is not safe to offload to the GPU, we rely on the CPU backend.
   return std::__pstl_transform<_ExecutionPolicy>(__cpu_backend_tag{}, __first1, __last1, __first2, __result, __op);
 }
 

diff --git a/libcxx/include/__algorithm/pstl_backends/gpu_backends/transform_reduce.h b/libcxx/include/__algorithm/pstl_backends/gpu_backends/transform_reduce.h
@@ -31,13 +31,13 @@
 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
 
 template <class _T1, class _T2, class _T3>
-struct __is_supported_reduction : std::false_type {};
+_LIBCPP_HIDE_FROM_ABI struct __is_supported_reduction : std::false_type {};
 
 #  define __PSTL_IS_SUPPORTED_REDUCTION(funname)                                                                       \
     template <class _Tp>                                                                                               \
-    struct __is_supported_reduction<std::funname<_Tp>, _Tp, _Tp> : std::true_type {};                                  \
+    _LIBCPP_HIDE_FROM_ABI struct __is_supported_reduction<std::funname<_Tp>, _Tp, _Tp> : std::true_type {};            \
     template <class _Tp, class _Up>                                                                                    \
-    struct __is_supported_reduction<std::funname<>, _Tp, _Up> : std::true_type {};
+    _LIBCPP_HIDE_FROM_ABI struct __is_supported_reduction<std::funname<>, _Tp, _Up> : std::true_type {};
 
 // __is_trivial_plus_operation already exists
 __PSTL_IS_SUPPORTED_REDUCTION(plus)