From b347d20d20b583deaefd784604bf94dc87ae55a5 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Tue, 29 Oct 2024 10:49:36 +0100 Subject: [PATCH] Reapply "[libc++] Simplify the implementation of std::sort a bit (#104902)" This reverts commit ef44e4659878f256a46476efcf0398f3dd510f54. --- libcxx/include/__algorithm/comp.h | 3 + libcxx/include/__algorithm/ranges_minmax.h | 2 +- libcxx/include/__algorithm/sort.h | 285 ++++++++---------- libcxx/include/__functional/operations.h | 12 + .../include/__functional/ranges_operations.h | 6 + libcxx/include/__type_traits/desugars_to.h | 6 + .../__type_traits/is_trivially_copyable.h | 4 +- libcxx/src/algorithm.cpp | 3 +- 8 files changed, 150 insertions(+), 171 deletions(-) diff --git a/libcxx/include/__algorithm/comp.h b/libcxx/include/__algorithm/comp.h index 1f38f5d2d99b43..ab3c598418828a 100644 --- a/libcxx/include/__algorithm/comp.h +++ b/libcxx/include/__algorithm/comp.h @@ -42,6 +42,9 @@ struct __less { } }; +template +inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true; + template inline const bool __desugars_to_v<__totally_ordered_less_tag, __less<>, _Tp, _Tp> = is_integral<_Tp>::value; diff --git a/libcxx/include/__algorithm/ranges_minmax.h b/libcxx/include/__algorithm/ranges_minmax.h index 4f2b2bf26382da..5f2e5cb2a1eeab 100644 --- a/libcxx/include/__algorithm/ranges_minmax.h +++ b/libcxx/include/__algorithm/ranges_minmax.h @@ -89,7 +89,7 @@ struct __minmax { // vectorize the code. if constexpr (contiguous_range<_Range> && is_integral_v<_ValueT> && __is_cheap_to_copy<_ValueT> & __is_identity<_Proj>::value && - __desugars_to_v<__totally_ordered_less_tag, _Comp, _ValueT, _ValueT>) { + __desugars_to_v<__less_tag, _Comp, _ValueT, _ValueT>) { minmax_result<_ValueT> __result = {__r[0], __r[0]}; for (auto __e : __r) { if (__e < __result.min) diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h index 0b2137dee2f77e..39868b8b6a30ae 100644 --- a/libcxx/include/__algorithm/sort.h +++ b/libcxx/include/__algorithm/sort.h @@ -27,11 +27,13 @@ #include <__functional/ranges_operations.h> #include <__iterator/iterator_traits.h> #include <__type_traits/conditional.h> +#include <__type_traits/desugars_to.h> #include <__type_traits/disjunction.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_arithmetic.h> #include <__type_traits/is_constant_evaluated.h> #include <__type_traits/is_same.h> +#include <__type_traits/is_trivially_copyable.h> #include <__type_traits/remove_cvref.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -47,110 +49,11 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -// stable, 2-3 compares, 0-2 swaps - -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 unsigned -__sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, _Compare __c) { - using _Ops = _IterOps<_AlgPolicy>; - - unsigned __r = 0; - if (!__c(*__y, *__x)) // if x <= y - { - if (!__c(*__z, *__y)) // if y <= z - return __r; // x <= y && y <= z - // x <= y && y > z - _Ops::iter_swap(__y, __z); // x <= z && y < z - __r = 1; - if (__c(*__y, *__x)) // if x > y - { - _Ops::iter_swap(__x, __y); // x < y && y <= z - __r = 2; - } - return __r; // x <= y && y < z - } - if (__c(*__z, *__y)) // x > y, if y > z - { - _Ops::iter_swap(__x, __z); // x < y && y < z - __r = 1; - return __r; - } - _Ops::iter_swap(__x, __y); // x > y && y <= z - __r = 1; // x < y && x <= z - if (__c(*__z, *__y)) // if y > z - { - _Ops::iter_swap(__y, __z); // x <= y && y < z - __r = 2; - } - return __r; -} // x <= y && y <= z - -// stable, 3-6 compares, 0-5 swaps - -template -_LIBCPP_HIDE_FROM_ABI void -__sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4, _Compare __c) { - using _Ops = _IterOps<_AlgPolicy>; - std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); - if (__c(*__x4, *__x3)) { - _Ops::iter_swap(__x3, __x4); - if (__c(*__x3, *__x2)) { - _Ops::iter_swap(__x2, __x3); - if (__c(*__x2, *__x1)) { - _Ops::iter_swap(__x1, __x2); - } - } - } -} - -// stable, 4-10 compares, 0-9 swaps - -template -_LIBCPP_HIDE_FROM_ABI void -__sort5(_ForwardIterator __x1, - _ForwardIterator __x2, - _ForwardIterator __x3, - _ForwardIterator __x4, - _ForwardIterator __x5, - _Comp __comp) { - using _Ops = _IterOps<_AlgPolicy>; - - std::__sort4<_AlgPolicy, _Comp>(__x1, __x2, __x3, __x4, __comp); - if (__comp(*__x5, *__x4)) { - _Ops::iter_swap(__x4, __x5); - if (__comp(*__x4, *__x3)) { - _Ops::iter_swap(__x3, __x4); - if (__comp(*__x3, *__x2)) { - _Ops::iter_swap(__x2, __x3); - if (__comp(*__x2, *__x1)) { - _Ops::iter_swap(__x1, __x2); - } - } - } - } -} - -// The comparator being simple is a prerequisite for using the branchless optimization. -template -struct __is_simple_comparator : false_type {}; -template <> -struct __is_simple_comparator<__less<>&> : true_type {}; -template -struct __is_simple_comparator&> : true_type {}; -template -struct __is_simple_comparator&> : true_type {}; -#if _LIBCPP_STD_VER >= 20 -template <> -struct __is_simple_comparator : true_type {}; -template <> -struct __is_simple_comparator : true_type {}; -#endif - template ::value_type> -using __use_branchless_sort = - integral_constant::value && sizeof(_Tp) <= sizeof(void*) && - is_arithmetic<_Tp>::value && __is_simple_comparator<_Compare>::value>; +inline const bool __use_branchless_sort = + __libcpp_is_contiguous_iterator<_Iter>::value && __is_cheap_to_copy<_Tp> && is_arithmetic<_Tp>::value && + (__desugars_to_v<__less_tag, __remove_cvref_t<_Compare>, _Tp, _Tp> || + __desugars_to_v<__greater_tag, __remove_cvref_t<_Compare>, _Tp, _Tp>); namespace __detail { @@ -161,59 +64,88 @@ enum { __block_size = sizeof(uint64_t) * 8 }; // Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary. template -inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) { +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) { // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`). using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; bool __r = __c(*__x, *__y); value_type __tmp = __r ? *__x : *__y; *__y = __r ? *__y : *__x; *__x = __tmp; + return !__r; } // Ensures that *__x, *__y and *__z are ordered according to the comparator __c, // under the assumption that *__y and *__z are already ordered. template -inline _LIBCPP_HIDE_FROM_ABI void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool __partially_sorted_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) { // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`). using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; - bool __r = __c(*__z, *__x); - value_type __tmp = __r ? *__z : *__x; - *__z = __r ? *__x : *__z; - __r = __c(__tmp, *__y); - *__x = __r ? *__x : *__y; - *__y = __r ? *__y : __tmp; + bool __r1 = __c(*__z, *__x); + value_type __tmp = __r1 ? *__z : *__x; + *__z = __r1 ? *__x : *__z; + bool __r2 = __c(__tmp, *__y); + *__x = __r2 ? *__x : *__y; + *__y = __r2 ? *__y : __tmp; + return !__r1 || !__r2; } +// stable, 2-3 compares, 0-2 swaps + template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort3_maybe_branchless( - _RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { - std::__cond_swap<_Compare>(__x2, __x3, __c); - std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c); + __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__sort3(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { + bool __swapped1 = std::__cond_swap<_Compare>(__x2, __x3, __c); + bool __swapped2 = std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c); + return __swapped1 || __swapped2; } template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort3_maybe_branchless( - _RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { - std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); -} + __enable_if_t, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__sort3(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) { + using _Ops = _IterOps<_AlgPolicy>; + + if (!__c(*__y, *__x)) // if x <= y + { + if (!__c(*__z, *__y)) // if y <= z + return false; // x <= y && y <= z + // x <= y && y > z + _Ops::iter_swap(__y, __z); // x <= z && y < z + if (__c(*__y, *__x)) // if x > y + _Ops::iter_swap(__x, __y); // x < y && y <= z + return true; // x <= y && y < z + } + if (__c(*__z, *__y)) // x > y, if y > z + { + _Ops::iter_swap(__x, __z); // x < y && y < z + return true; + } + _Ops::iter_swap(__x, __y); // x > y && y <= z + // x < y && x <= z + if (__c(*__z, *__y)) // if y > z + _Ops::iter_swap(__y, __z); // x <= y && y < z + return true; +} // x <= y && y <= z + +// stable, 3-6 compares, 0-5 swaps template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _Compare __c) { + __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort4(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _Compare __c) { std::__cond_swap<_Compare>(__x1, __x3, __c); std::__cond_swap<_Compare>(__x2, __x4, __c); std::__cond_swap<_Compare>(__x1, __x2, __c); @@ -224,27 +156,39 @@ inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless( template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _Compare __c) { - std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c); + __enable_if_t, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort4(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _Compare __c) { + using _Ops = _IterOps<_AlgPolicy>; + std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); + if (__c(*__x4, *__x3)) { + _Ops::iter_swap(__x3, __x4); + if (__c(*__x3, *__x2)) { + _Ops::iter_swap(__x2, __x3); + if (__c(*__x2, *__x1)) { + _Ops::iter_swap(__x1, __x2); + } + } + } } +// stable, 4-10 compares, 0-9 swaps + template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _RandomAccessIterator __x5, - _Compare __c) { + __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort5(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _RandomAccessIterator __x5, + _Compare __c) { std::__cond_swap<_Compare>(__x1, __x2, __c); std::__cond_swap<_Compare>(__x4, __x5, __c); std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c); @@ -256,16 +200,29 @@ inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless( template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _RandomAccessIterator __x5, - _Compare __c) { - std::__sort5<_AlgPolicy, _Compare, _RandomAccessIterator>( - std::move(__x1), std::move(__x2), std::move(__x3), std::move(__x4), std::move(__x5), __c); + __enable_if_t, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort5(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _RandomAccessIterator __x5, + _Compare __comp) { + using _Ops = _IterOps<_AlgPolicy>; + + std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __comp); + if (__comp(*__x5, *__x4)) { + _Ops::iter_swap(__x4, __x5); + if (__comp(*__x4, *__x3)) { + _Ops::iter_swap(__x3, __x4); + if (__comp(*__x3, *__x2)) { + _Ops::iter_swap(__x2, __x3); + if (__comp(*__x2, *__x1)) { + _Ops::iter_swap(__x1, __x2); + } + } + } + } } // Assumes size > 0 @@ -355,14 +312,14 @@ __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator _Ops::iter_swap(__first, __last); return true; case 3: - std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp); + std::__sort3<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp); return true; case 4: - std::__sort4_maybe_branchless<_AlgPolicy, _Comp>( + std::__sort4<_AlgPolicy, _Comp>( __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp); return true; case 5: - std::__sort5_maybe_branchless<_AlgPolicy, _Comp>( + std::__sort5<_AlgPolicy, _Comp>( __first, __first + difference_type(1), __first + difference_type(2), @@ -373,7 +330,7 @@ __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator } typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; _RandomAccessIterator __j = __first + difference_type(2); - std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp); + std::__sort3<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp); const unsigned __limit = 8; unsigned __count = 0; for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) { @@ -780,14 +737,14 @@ void __introsort(_RandomAccessIterator __first, _Ops::iter_swap(__first, __last); return; case 3: - std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp); + std::__sort3<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp); return; case 4: - std::__sort4_maybe_branchless<_AlgPolicy, _Compare>( + std::__sort4<_AlgPolicy, _Compare>( __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp); return; case 5: - std::__sort5_maybe_branchless<_AlgPolicy, _Compare>( + std::__sort5<_AlgPolicy, _Compare>( __first, __first + difference_type(1), __first + difference_type(2), @@ -928,10 +885,8 @@ __sort_dispatch(_RandomAccessIterator __first, _RandomAccessIterator __last, _Co // Only use bitset partitioning for arithmetic types. We should also check // that the default comparator is in use so that we are sure that there are no // branches in the comparator. - std::__introsort<_AlgPolicy, - _Comp&, - _RandomAccessIterator, - __use_branchless_sort<_Comp, _RandomAccessIterator>::value>(__first, __last, __comp, __depth_limit); + std::__introsort<_AlgPolicy, _Comp&, _RandomAccessIterator, __use_branchless_sort<_Comp, _RandomAccessIterator> >( + __first, __last, __comp, __depth_limit); } template diff --git a/libcxx/include/__functional/operations.h b/libcxx/include/__functional/operations.h index 6022bd679ed3e3..67d9da289aead3 100644 --- a/libcxx/include/__functional/operations.h +++ b/libcxx/include/__functional/operations.h @@ -362,6 +362,9 @@ struct _LIBCPP_TEMPLATE_VIS less : __binary_function<_Tp, _Tp, bool> { }; _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(less); +template +inline const bool __desugars_to_v<__less_tag, less<_Tp>, _Tp, _Tp> = true; + template inline const bool __desugars_to_v<__totally_ordered_less_tag, less<_Tp>, _Tp, _Tp> = is_integral<_Tp>::value; @@ -377,6 +380,9 @@ struct _LIBCPP_TEMPLATE_VIS less { typedef void is_transparent; }; +template +inline const bool __desugars_to_v<__less_tag, less<>, _Tp, _Up> = true; + template inline const bool __desugars_to_v<__totally_ordered_less_tag, less<>, _Tp, _Tp> = is_integral<_Tp>::value; #endif @@ -446,6 +452,9 @@ struct _LIBCPP_TEMPLATE_VIS greater : __binary_function<_Tp, _Tp, bool> { }; _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(greater); +template +inline const bool __desugars_to_v<__greater_tag, greater<_Tp>, _Tp, _Tp> = true; + #if _LIBCPP_STD_VER >= 14 template <> struct _LIBCPP_TEMPLATE_VIS greater { @@ -457,6 +466,9 @@ struct _LIBCPP_TEMPLATE_VIS greater { } typedef void is_transparent; }; + +template +inline const bool __desugars_to_v<__greater_tag, greater<>, _Tp, _Up> = true; #endif // Logical operations diff --git a/libcxx/include/__functional/ranges_operations.h b/libcxx/include/__functional/ranges_operations.h index f023d765a6c8ab..df95843e7c9af6 100644 --- a/libcxx/include/__functional/ranges_operations.h +++ b/libcxx/include/__functional/ranges_operations.h @@ -102,6 +102,12 @@ inline const bool __desugars_to_v<__equal_tag, ranges::equal_to, _Tp, _Up> = tru template inline const bool __desugars_to_v<__totally_ordered_less_tag, ranges::less, _Tp, _Up> = true; +template +inline const bool __desugars_to_v<__less_tag, ranges::less, _Tp, _Up> = true; + +template +inline const bool __desugars_to_v<__greater_tag, ranges::greater, _Tp, _Up> = true; + #endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/desugars_to.h b/libcxx/include/__type_traits/desugars_to.h index b0ce7c414e5d77..452c70bfbad66d 100644 --- a/libcxx/include/__type_traits/desugars_to.h +++ b/libcxx/include/__type_traits/desugars_to.h @@ -25,6 +25,12 @@ struct __equal_tag {}; // syntactically, the operation is equivalent to calling `a + b` struct __plus_tag {}; +// syntactically, the operation is equivalent to calling `a < b` +struct __less_tag {}; + +// syntactically, the operation is equivalent to calling `a > b` +struct __greater_tag {}; + // syntactically, the operation is equivalent to calling `a < b`, and these expressions // have to be true for any `a` and `b`: // - `(a < b) == (b > a)` diff --git a/libcxx/include/__type_traits/is_trivially_copyable.h b/libcxx/include/__type_traits/is_trivially_copyable.h index e92af126ee94d9..8eb3ba7581af15 100644 --- a/libcxx/include/__type_traits/is_trivially_copyable.h +++ b/libcxx/include/__type_traits/is_trivially_copyable.h @@ -27,10 +27,8 @@ template inline constexpr bool is_trivially_copyable_v = __is_trivially_copyable(_Tp); #endif -#if _LIBCPP_STD_VER >= 20 template -inline constexpr bool __is_cheap_to_copy = is_trivially_copyable_v<_Tp> && sizeof(_Tp) <= sizeof(std::intmax_t); -#endif +inline const bool __is_cheap_to_copy = __is_trivially_copyable(_Tp) && sizeof(_Tp) <= sizeof(std::intmax_t); _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/src/algorithm.cpp b/libcxx/src/algorithm.cpp index af9d60a8e271e8..a7c39b5e5183a4 100644 --- a/libcxx/src/algorithm.cpp +++ b/libcxx/src/algorithm.cpp @@ -21,8 +21,7 @@ void __sort(RandomAccessIterator first, RandomAccessIterator last, Comp comp) { std::__introsort<_ClassicAlgPolicy, ranges::less, RandomAccessIterator, - __use_branchless_sort::value>( - first, last, ranges::less{}, depth_limit); + __use_branchless_sort>(first, last, ranges::less{}, depth_limit); } // clang-format off