From bfac501403960ebb0cedacf4a8a06a4b6d5d67e1 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Sun, 4 Aug 2024 10:02:43 +0200 Subject: [PATCH] [libc++] Optimize lexicographical_compare (#65279) If the comparison operation is equivalent to < and that is a total order, we know that we can use equality comparison on that type instead to extract some information. Furthermore, if equality comparison on that type is trivial, the user can't observe that we're calling it. So instead of using the user-provided total order, we use std::mismatch, which uses equality comparison (and is vertorized). Additionally, if the type is trivially lexicographically comparable, we can go one step further and use std::memcmp directly instead of calling std::mismatch. Benchmarks: ``` ------------------------------------------------------------------------------------- Benchmark old new ------------------------------------------------------------------------------------- bm_lexicographical_compare/1 1.17 ns 2.34 ns bm_lexicographical_compare/2 1.64 ns 2.57 ns bm_lexicographical_compare/3 2.23 ns 2.58 ns bm_lexicographical_compare/4 2.82 ns 2.57 ns bm_lexicographical_compare/5 3.34 ns 2.11 ns bm_lexicographical_compare/6 3.94 ns 2.21 ns bm_lexicographical_compare/7 4.56 ns 2.11 ns bm_lexicographical_compare/8 5.25 ns 2.11 ns bm_lexicographical_compare/16 9.88 ns 2.11 ns bm_lexicographical_compare/64 38.9 ns 2.36 ns bm_lexicographical_compare/512 317 ns 6.54 ns bm_lexicographical_compare/4096 2517 ns 41.4 ns bm_lexicographical_compare/32768 20052 ns 488 ns bm_lexicographical_compare/262144 159579 ns 4409 ns bm_lexicographical_compare/1048576 640456 ns 20342 ns bm_lexicographical_compare/1 1.18 ns 2.37 ns bm_lexicographical_compare/2 1.65 ns 2.60 ns bm_lexicographical_compare/3 2.23 ns 2.83 ns bm_lexicographical_compare/4 2.81 ns 3.06 ns bm_lexicographical_compare/5 3.35 ns 3.30 ns bm_lexicographical_compare/6 3.90 ns 3.99 ns bm_lexicographical_compare/7 4.56 ns 3.78 ns bm_lexicographical_compare/8 5.20 ns 4.02 ns bm_lexicographical_compare/16 9.80 ns 6.21 ns bm_lexicographical_compare/64 39.0 ns 3.16 ns bm_lexicographical_compare/512 318 ns 7.58 ns bm_lexicographical_compare/4096 2514 ns 47.4 ns bm_lexicographical_compare/32768 20096 ns 504 ns bm_lexicographical_compare/262144 156617 ns 4146 ns bm_lexicographical_compare/1048576 624265 ns 19810 ns bm_lexicographical_compare/1 1.15 ns 2.12 ns bm_lexicographical_compare/2 1.60 ns 2.36 ns bm_lexicographical_compare/3 2.21 ns 2.59 ns bm_lexicographical_compare/4 2.74 ns 2.83 ns bm_lexicographical_compare/5 3.26 ns 3.06 ns bm_lexicographical_compare/6 3.81 ns 4.53 ns bm_lexicographical_compare/7 4.41 ns 4.72 ns bm_lexicographical_compare/8 5.08 ns 2.36 ns bm_lexicographical_compare/16 9.54 ns 3.08 ns bm_lexicographical_compare/64 37.8 ns 4.71 ns bm_lexicographical_compare/512 309 ns 24.6 ns bm_lexicographical_compare/4096 2422 ns 204 ns bm_lexicographical_compare/32768 19362 ns 1947 ns bm_lexicographical_compare/262144 155727 ns 19793 ns bm_lexicographical_compare/1048576 623614 ns 80180 ns bm_ranges_lexicographical_compare/1 1.07 ns 2.35 ns bm_ranges_lexicographical_compare/2 1.72 ns 2.13 ns bm_ranges_lexicographical_compare/3 2.46 ns 2.12 ns bm_ranges_lexicographical_compare/4 3.17 ns 2.12 ns bm_ranges_lexicographical_compare/5 3.86 ns 2.12 ns bm_ranges_lexicographical_compare/6 4.55 ns 2.12 ns bm_ranges_lexicographical_compare/7 5.25 ns 2.12 ns bm_ranges_lexicographical_compare/8 5.95 ns 2.13 ns bm_ranges_lexicographical_compare/16 11.7 ns 2.13 ns bm_ranges_lexicographical_compare/64 45.5 ns 2.36 ns bm_ranges_lexicographical_compare/512 366 ns 6.35 ns bm_ranges_lexicographical_compare/4096 2886 ns 40.9 ns bm_ranges_lexicographical_compare/32768 23054 ns 489 ns bm_ranges_lexicographical_compare/262144 185302 ns 4339 ns bm_ranges_lexicographical_compare/1048576 741576 ns 19430 ns bm_ranges_lexicographical_compare/1 1.10 ns 2.12 ns bm_ranges_lexicographical_compare/2 1.66 ns 2.35 ns bm_ranges_lexicographical_compare/3 2.23 ns 2.58 ns bm_ranges_lexicographical_compare/4 2.82 ns 2.82 ns bm_ranges_lexicographical_compare/5 3.34 ns 3.06 ns bm_ranges_lexicographical_compare/6 3.92 ns 3.99 ns bm_ranges_lexicographical_compare/7 4.64 ns 4.10 ns bm_ranges_lexicographical_compare/8 5.21 ns 4.61 ns bm_ranges_lexicographical_compare/16 9.79 ns 7.42 ns bm_ranges_lexicographical_compare/64 38.9 ns 2.93 ns bm_ranges_lexicographical_compare/512 317 ns 7.31 ns bm_ranges_lexicographical_compare/4096 2500 ns 47.5 ns bm_ranges_lexicographical_compare/32768 19940 ns 496 ns bm_ranges_lexicographical_compare/262144 159166 ns 4393 ns bm_ranges_lexicographical_compare/1048576 638206 ns 19786 ns bm_ranges_lexicographical_compare/1 1.10 ns 2.12 ns bm_ranges_lexicographical_compare/2 1.64 ns 3.04 ns bm_ranges_lexicographical_compare/3 2.23 ns 2.58 ns bm_ranges_lexicographical_compare/4 2.81 ns 2.81 ns bm_ranges_lexicographical_compare/5 3.35 ns 3.05 ns bm_ranges_lexicographical_compare/6 3.94 ns 4.60 ns bm_ranges_lexicographical_compare/7 4.60 ns 4.81 ns bm_ranges_lexicographical_compare/8 5.19 ns 2.35 ns bm_ranges_lexicographical_compare/16 9.85 ns 2.87 ns bm_ranges_lexicographical_compare/64 38.9 ns 4.70 ns bm_ranges_lexicographical_compare/512 318 ns 24.5 ns bm_ranges_lexicographical_compare/4096 2494 ns 202 ns bm_ranges_lexicographical_compare/32768 20000 ns 1939 ns bm_ranges_lexicographical_compare/262144 160433 ns 19730 ns bm_ranges_lexicographical_compare/1048576 642636 ns 80760 ns ``` --- libcxx/docs/ReleaseNotes/20.rst | 3 +- libcxx/include/__algorithm/comp.h | 3 +- .../__algorithm/lexicographical_compare.h | 93 ++++++++++++++++--- .../ranges_lexicographical_compare.h | 27 +++--- libcxx/include/__algorithm/ranges_minmax.h | 2 +- libcxx/include/__functional/operations.h | 5 +- .../include/__functional/ranges_operations.h | 2 +- .../include/__string/constexpr_c_functions.h | 4 +- libcxx/include/__type_traits/desugars_to.h | 15 ++- ...s_trivially_lexicographically_comparable.h | 20 +++- libcxx/test/benchmarks/CMakeLists.txt | 1 + .../lexicographical_compare.bench.cpp | 44 +++++++++ .../test/libcxx/transitive_includes/cxx03.csv | 8 ++ .../test/libcxx/transitive_includes/cxx11.csv | 8 ++ .../test/libcxx/transitive_includes/cxx14.csv | 8 ++ .../test/libcxx/transitive_includes/cxx17.csv | 8 ++ .../test/libcxx/transitive_includes/cxx20.csv | 8 ++ .../test/libcxx/transitive_includes/cxx23.csv | 6 ++ .../test/libcxx/transitive_includes/cxx26.csv | 6 ++ .../lexicographical_compare.pass.cpp | 90 +++++++----------- .../sequences/array/compare.verify.cpp | 4 - 21 files changed, 269 insertions(+), 96 deletions(-) create mode 100755 libcxx/test/benchmarks/algorithms/lexicographical_compare.bench.cpp diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst index 960fdd7ce05626..b319067394099a 100644 --- a/libcxx/docs/ReleaseNotes/20.rst +++ b/libcxx/docs/ReleaseNotes/20.rst @@ -44,7 +44,8 @@ Implemented Papers Improvements and New Features ----------------------------- -- TODO +- The ``lexicographical_compare`` and ``ranges::lexicographical_compare`` algorithms have been optimized for trivially + equality comparable types, resulting in a performance improvement of up to 40x. Deprecations and Removals diff --git a/libcxx/include/__algorithm/comp.h b/libcxx/include/__algorithm/comp.h index a0fa88d6d2acd3..1f38f5d2d99b43 100644 --- a/libcxx/include/__algorithm/comp.h +++ b/libcxx/include/__algorithm/comp.h @@ -11,6 +11,7 @@ #include <__config> #include <__type_traits/desugars_to.h> +#include <__type_traits/is_integral.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -42,7 +43,7 @@ struct __less { }; template -inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true; +inline const bool __desugars_to_v<__totally_ordered_less_tag, __less<>, _Tp, _Tp> = is_integral<_Tp>::value; _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/lexicographical_compare.h b/libcxx/include/__algorithm/lexicographical_compare.h index edc29e269c88ca..df23e6a612c1a5 100644 --- a/libcxx/include/__algorithm/lexicographical_compare.h +++ b/libcxx/include/__algorithm/lexicographical_compare.h @@ -10,32 +10,93 @@ #define _LIBCPP___ALGORITHM_LEXICOGRAPHICAL_COMPARE_H #include <__algorithm/comp.h> -#include <__algorithm/comp_ref_type.h> +#include <__algorithm/min.h> +#include <__algorithm/mismatch.h> +#include <__algorithm/simd_utils.h> +#include <__algorithm/unwrap_iter.h> #include <__config> +#include <__functional/identity.h> #include <__iterator/iterator_traits.h> +#include <__string/constexpr_c_functions.h> +#include <__type_traits/desugars_to.h> +#include <__type_traits/invoke.h> +#include <__type_traits/is_equality_comparable.h> +#include <__type_traits/is_integral.h> +#include <__type_traits/is_trivially_lexicographically_comparable.h> +#include <__type_traits/is_volatile.h> + +#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# include +#endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD -template +template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __lexicographical_compare( - _InputIterator1 __first1, - _InputIterator1 __last1, - _InputIterator2 __first2, - _InputIterator2 __last2, - _Compare __comp) { - for (; __first2 != __last2; ++__first1, (void)++__first2) { - if (__first1 == __last1 || __comp(*__first1, *__first2)) + _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Comp& __comp, _Proj1& __proj1, _Proj2& __proj2) { + while (__first2 != __last2) { + if (__first1 == __last1 || + std::__invoke(__comp, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) return true; - if (__comp(*__first2, *__first1)) + if (std::__invoke(__comp, std::__invoke(__proj2, *__first2), std::__invoke(__proj1, *__first1))) return false; + ++__first1; + ++__first2; } return false; } +#if _LIBCPP_STD_VER >= 14 + +// If the comparison operation is equivalent to < and that is a total order, we know that we can use equality comparison +// on that type instead to extract some information. Furthermore, if equality comparison on that type is trivial, the +// user can't observe that we're calling it. So instead of using the user-provided total order, we use std::mismatch, +// which uses equality comparison (and is vertorized). Additionally, if the type is trivially lexicographically +// comparable, we can go one step further and use std::memcmp directly instead of calling std::mismatch. +template && !is_volatile<_Tp>::value && + __libcpp_is_trivially_equality_comparable<_Tp, _Tp>::value && + __is_identity<_Proj1>::value && __is_identity<_Proj2>::value, + int> = 0> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +__lexicographical_compare(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Tp* __last2, _Comp&, _Proj1&, _Proj2&) { + if constexpr (__is_trivially_lexicographically_comparable_v<_Tp, _Tp>) { + auto __res = + std::__constexpr_memcmp(__first1, __first2, __element_count(std::min(__last1 - __first1, __last2 - __first2))); + if (__res == 0) + return __last1 - __first1 < __last2 - __first2; + return __res < 0; + } +# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS + else if constexpr (is_same<__remove_cv_t<_Tp>, wchar_t>::value) { + auto __res = std::__constexpr_wmemcmp(__first1, __first2, std::min(__last1 - __first1, __last2 - __first2)); + if (__res == 0) + return __last1 - __first1 < __last2 - __first2; + return __res < 0; + } +# endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS + else { + auto __res = std::mismatch(__first1, __last1, __first2, __last2); + if (__res.second == __last2) + return false; + if (__res.first == __last1) + return true; + return *__res.first < *__res.second; + } +} + +#endif // _LIBCPP_STD_VER >= 14 + template _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool lexicographical_compare( _InputIterator1 __first1, @@ -43,7 +104,15 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 boo _InputIterator2 __first2, _InputIterator2 __last2, _Compare __comp) { - return std::__lexicographical_compare<__comp_ref_type<_Compare> >(__first1, __last1, __first2, __last2, __comp); + __identity __proj; + return std::__lexicographical_compare( + std::__unwrap_iter(__first1), + std::__unwrap_iter(__last1), + std::__unwrap_iter(__first2), + std::__unwrap_iter(__last2), + __comp, + __proj, + __proj); } template @@ -54,4 +123,6 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 boo _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_LEXICOGRAPHICAL_COMPARE_H diff --git a/libcxx/include/__algorithm/ranges_lexicographical_compare.h b/libcxx/include/__algorithm/ranges_lexicographical_compare.h index 024cc6b707cab1..ec12b0cc29acef 100644 --- a/libcxx/include/__algorithm/ranges_lexicographical_compare.h +++ b/libcxx/include/__algorithm/ranges_lexicographical_compare.h @@ -9,6 +9,8 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_LEXICOGRAPHICAL_COMPARE_H #define _LIBCPP___ALGORITHM_RANGES_LEXICOGRAPHICAL_COMPARE_H +#include <__algorithm/lexicographical_compare.h> +#include <__algorithm/unwrap_range.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> @@ -33,7 +35,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { struct __lexicographical_compare { template - _LIBCPP_HIDE_FROM_ABI constexpr static bool __lexicographical_compare_impl( + static _LIBCPP_HIDE_FROM_ABI constexpr bool __lexicographical_compare_unwrap( _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, @@ -41,15 +43,16 @@ struct __lexicographical_compare { _Comp& __comp, _Proj1& __proj1, _Proj2& __proj2) { - while (__first2 != __last2) { - if (__first1 == __last1 || std::invoke(__comp, std::invoke(__proj1, *__first1), std::invoke(__proj2, *__first2))) - return true; - if (std::invoke(__comp, std::invoke(__proj2, *__first2), std::invoke(__proj1, *__first1))) - return false; - ++__first1; - ++__first2; - } - return false; + auto [__first1_un, __last1_un] = std::__unwrap_range(std::move(__first1), std::move(__last1)); + auto [__first2_un, __last2_un] = std::__unwrap_range(std::move(__first2), std::move(__last2)); + return std::__lexicographical_compare( + std::move(__first1_un), + std::move(__last1_un), + std::move(__first2_un), + std::move(__last2_un), + __comp, + __proj1, + __proj2); } template [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool operator()( _Range1&& __range1, _Range2&& __range2, _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { - return __lexicographical_compare_impl( + return __lexicographical_compare_unwrap( ranges::begin(__range1), ranges::end(__range1), ranges::begin(__range2), diff --git a/libcxx/include/__algorithm/ranges_minmax.h b/libcxx/include/__algorithm/ranges_minmax.h index 1b43b1e19cdec9..9b8551d2213400 100644 --- a/libcxx/include/__algorithm/ranges_minmax.h +++ b/libcxx/include/__algorithm/ranges_minmax.h @@ -88,7 +88,7 @@ struct __minmax { // vectorize the code. if constexpr (contiguous_range<_Range> && is_integral_v<_ValueT> && __is_cheap_to_copy<_ValueT> & __is_identity<_Proj>::value && - __desugars_to_v<__less_tag, _Comp, _ValueT, _ValueT>) { + __desugars_to_v<__totally_ordered_less_tag, _Comp, _ValueT, _ValueT>) { minmax_result<_ValueT> __result = {__r[0], __r[0]}; for (auto __e : __r) { if (__e < __result.min) diff --git a/libcxx/include/__functional/operations.h b/libcxx/include/__functional/operations.h index 0a6320f19de3f3..6022bd679ed3e3 100644 --- a/libcxx/include/__functional/operations.h +++ b/libcxx/include/__functional/operations.h @@ -14,6 +14,7 @@ #include <__functional/binary_function.h> #include <__functional/unary_function.h> #include <__type_traits/desugars_to.h> +#include <__type_traits/is_integral.h> #include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -362,7 +363,7 @@ struct _LIBCPP_TEMPLATE_VIS less : __binary_function<_Tp, _Tp, bool> { _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(less); template -inline const bool __desugars_to_v<__less_tag, less<_Tp>, _Tp, _Tp> = true; +inline const bool __desugars_to_v<__totally_ordered_less_tag, less<_Tp>, _Tp, _Tp> = is_integral<_Tp>::value; #if _LIBCPP_STD_VER >= 14 template <> @@ -377,7 +378,7 @@ struct _LIBCPP_TEMPLATE_VIS less { }; template -inline const bool __desugars_to_v<__less_tag, less<>, _Tp, _Tp> = true; +inline const bool __desugars_to_v<__totally_ordered_less_tag, less<>, _Tp, _Tp> = is_integral<_Tp>::value; #endif #if _LIBCPP_STD_VER >= 14 diff --git a/libcxx/include/__functional/ranges_operations.h b/libcxx/include/__functional/ranges_operations.h index 27f06eadd0eb11..f023d765a6c8ab 100644 --- a/libcxx/include/__functional/ranges_operations.h +++ b/libcxx/include/__functional/ranges_operations.h @@ -100,7 +100,7 @@ template inline const bool __desugars_to_v<__equal_tag, ranges::equal_to, _Tp, _Up> = true; template -inline const bool __desugars_to_v<__less_tag, ranges::less, _Tp, _Up> = true; +inline const bool __desugars_to_v<__totally_ordered_less_tag, ranges::less, _Tp, _Up> = true; #endif // _LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__string/constexpr_c_functions.h b/libcxx/include/__string/constexpr_c_functions.h index a978f816f18978..32fc06e121b362 100644 --- a/libcxx/include/__string/constexpr_c_functions.h +++ b/libcxx/include/__string/constexpr_c_functions.h @@ -64,13 +64,13 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 size_t __constexpr_st return __builtin_strlen(reinterpret_cast(__str)); } -// Because of __libcpp_is_trivially_lexicographically_comparable we know that comparing the object representations is +// Because of __is_trivially_lexicographically_comparable_v we know that comparing the object representations is // equivalent to a std::memcmp. Since we have multiple objects contiguously in memory, we can call memcmp once instead // of invoking it on every object individually. template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __constexpr_memcmp(const _Tp* __lhs, const _Up* __rhs, __element_count __n) { - static_assert(__libcpp_is_trivially_lexicographically_comparable<_Tp, _Up>::value, + static_assert(__is_trivially_lexicographically_comparable_v<_Tp, _Up>, "_Tp and _Up have to be trivially lexicographically comparable"); auto __count = static_cast(__n); diff --git a/libcxx/include/__type_traits/desugars_to.h b/libcxx/include/__type_traits/desugars_to.h index 97a2ee5448f203..b0ce7c414e5d77 100644 --- a/libcxx/include/__type_traits/desugars_to.h +++ b/libcxx/include/__type_traits/desugars_to.h @@ -17,10 +17,21 @@ _LIBCPP_BEGIN_NAMESPACE_STD -// Tags to represent the canonical operations +// Tags to represent the canonical operations. + +// syntactically, the operation is equivalent to calling `a == b` struct __equal_tag {}; + +// syntactically, the operation is equivalent to calling `a + b` struct __plus_tag {}; -struct __less_tag {}; + +// syntactically, the operation is equivalent to calling `a < b`, and these expressions +// have to be true for any `a` and `b`: +// - `(a < b) == (b > a)` +// - `(!(a < b) && !(b < a)) == (a == b)` +// For example, this is satisfied for std::less on integral types, but also for ranges::less on all types due to +// additional semantic requirements on that operation. +struct __totally_ordered_less_tag {}; // This class template is used to determine whether an operation "desugars" // (or boils down) to a given canonical operation. diff --git a/libcxx/include/__type_traits/is_trivially_lexicographically_comparable.h b/libcxx/include/__type_traits/is_trivially_lexicographically_comparable.h index a310ea1b87e30c..337f878fea5c1d 100644 --- a/libcxx/include/__type_traits/is_trivially_lexicographically_comparable.h +++ b/libcxx/include/__type_traits/is_trivially_lexicographically_comparable.h @@ -16,6 +16,7 @@ #include <__type_traits/remove_cv.h> #include <__type_traits/void_t.h> #include <__utility/declval.h> +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -40,13 +41,22 @@ _LIBCPP_BEGIN_NAMESPACE_STD // unsigned integer types with sizeof(T) > 1: depending on the endianness, the LSB might be the first byte to be // compared. This means that when comparing unsigned(129) and unsigned(2) // using memcmp(), the result would be that 2 > 129. -// TODO: Do we want to enable this on big-endian systems? + +template +inline const bool __is_std_byte_v = false; + +#if _LIBCPP_STD_VER >= 17 +template <> +inline const bool __is_std_byte_v = true; +#endif template -struct __libcpp_is_trivially_lexicographically_comparable - : integral_constant, __remove_cv_t<_Up> >::value && sizeof(_Tp) == 1 && - is_unsigned<_Tp>::value> {}; +inline const bool __is_trivially_lexicographically_comparable_v = + is_same<__remove_cv_t<_Tp>, __remove_cv_t<_Up> >::value && +#ifdef _LIBCPP_LITTLE_ENDIAN + sizeof(_Tp) == 1 && +#endif + (is_unsigned<_Tp>::value || __is_std_byte_v<_Tp>); _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/test/benchmarks/CMakeLists.txt b/libcxx/test/benchmarks/CMakeLists.txt index d61367a3677388..616cf0ff8d2374 100644 --- a/libcxx/test/benchmarks/CMakeLists.txt +++ b/libcxx/test/benchmarks/CMakeLists.txt @@ -114,6 +114,7 @@ set(BENCHMARK_TESTS algorithms/find.bench.cpp algorithms/fill.bench.cpp algorithms/for_each.bench.cpp + algorithms/lexicographical_compare.bench.cpp algorithms/lower_bound.bench.cpp algorithms/make_heap.bench.cpp algorithms/make_heap_then_sort_heap.bench.cpp diff --git a/libcxx/test/benchmarks/algorithms/lexicographical_compare.bench.cpp b/libcxx/test/benchmarks/algorithms/lexicographical_compare.bench.cpp new file mode 100755 index 00000000000000..0c545263109d29 --- /dev/null +++ b/libcxx/test/benchmarks/algorithms/lexicographical_compare.bench.cpp @@ -0,0 +1,44 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +// Benchmarks the worst case: check the whole range just to find out that they compare equal +template +static void bm_lexicographical_compare(benchmark::State& state) { + std::vector vec1(state.range(), '1'); + std::vector vec2(state.range(), '1'); + + for (auto _ : state) { + benchmark::DoNotOptimize(vec1); + benchmark::DoNotOptimize(vec2); + benchmark::DoNotOptimize(std::lexicographical_compare(vec1.begin(), vec1.end(), vec2.begin(), vec2.end())); + } +} +BENCHMARK(bm_lexicographical_compare)->DenseRange(1, 8)->Range(16, 1 << 20); +BENCHMARK(bm_lexicographical_compare)->DenseRange(1, 8)->Range(16, 1 << 20); +BENCHMARK(bm_lexicographical_compare)->DenseRange(1, 8)->Range(16, 1 << 20); + +template +static void bm_ranges_lexicographical_compare(benchmark::State& state) { + std::vector vec1(state.range(), '1'); + std::vector vec2(state.range(), '1'); + + for (auto _ : state) { + benchmark::DoNotOptimize(vec1); + benchmark::DoNotOptimize(vec2); + benchmark::DoNotOptimize(std::ranges::lexicographical_compare(vec1.begin(), vec1.end(), vec2.begin(), vec2.end())); + } +} +BENCHMARK(bm_ranges_lexicographical_compare)->DenseRange(1, 8)->Range(16, 1 << 20); +BENCHMARK(bm_ranges_lexicographical_compare)->DenseRange(1, 8)->Range(16, 1 << 20); +BENCHMARK(bm_ranges_lexicographical_compare)->DenseRange(1, 8)->Range(16, 1 << 20); + +BENCHMARK_MAIN(); diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv index 51e659f52000b4..3bf39ea17c9129 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx03.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv @@ -40,7 +40,9 @@ array algorithm array compare array concepts array cstddef +array cstdint array cstdlib +array cwchar array initializer_list array iterator array limits @@ -291,6 +293,7 @@ forward_list cstddef forward_list cstdint forward_list cstdlib forward_list cstring +forward_list cwchar forward_list functional forward_list initializer_list forward_list iosfwd @@ -449,6 +452,7 @@ list cstddef list cstdint list cstdlib list cstring +list cwchar list functional list initializer_list list iosfwd @@ -489,7 +493,9 @@ locale version map compare map concepts map cstddef +map cstdint map cstdlib +map cwchar map functional map initializer_list map iterator @@ -723,7 +729,9 @@ semaphore version set compare set concepts set cstddef +set cstdint set cstdlib +set cwchar set functional set initializer_list set iterator diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv index 17e85e982729cf..49125486cfcf6e 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx11.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv @@ -40,7 +40,9 @@ array algorithm array compare array concepts array cstddef +array cstdint array cstdlib +array cwchar array initializer_list array iterator array limits @@ -292,6 +294,7 @@ forward_list cstddef forward_list cstdint forward_list cstdlib forward_list cstring +forward_list cwchar forward_list functional forward_list initializer_list forward_list iosfwd @@ -452,6 +455,7 @@ list cstddef list cstdint list cstdlib list cstring +list cwchar list functional list initializer_list list iosfwd @@ -493,7 +497,9 @@ locale version map compare map concepts map cstddef +map cstdint map cstdlib +map cwchar map functional map initializer_list map iterator @@ -729,7 +735,9 @@ semaphore version set compare set concepts set cstddef +set cstdint set cstdlib +set cwchar set functional set initializer_list set iterator diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv index 8aed93da9e6ccf..28dfb320fe06c9 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx14.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv @@ -41,7 +41,9 @@ array algorithm array compare array concepts array cstddef +array cstdint array cstdlib +array cwchar array initializer_list array iterator array limits @@ -295,6 +297,7 @@ forward_list cstddef forward_list cstdint forward_list cstdlib forward_list cstring +forward_list cwchar forward_list functional forward_list initializer_list forward_list iosfwd @@ -455,6 +458,7 @@ list cstddef list cstdint list cstdlib list cstring +list cwchar list functional list initializer_list list iosfwd @@ -496,7 +500,9 @@ locale version map compare map concepts map cstddef +map cstdint map cstdlib +map cwchar map functional map initializer_list map iterator @@ -732,7 +738,9 @@ semaphore version set compare set concepts set cstddef +set cstdint set cstdlib +set cwchar set functional set initializer_list set iterator diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv index 2c028462144eee..5b7b6cecf73f89 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx17.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv @@ -41,7 +41,9 @@ array algorithm array compare array concepts array cstddef +array cstdint array cstdlib +array cwchar array initializer_list array iterator array limits @@ -295,6 +297,7 @@ forward_list cstddef forward_list cstdint forward_list cstdlib forward_list cstring +forward_list cwchar forward_list functional forward_list initializer_list forward_list iosfwd @@ -455,6 +458,7 @@ list cstddef list cstdint list cstdlib list cstring +list cwchar list functional list initializer_list list iosfwd @@ -496,7 +500,9 @@ locale version map compare map concepts map cstddef +map cstdint map cstdlib +map cwchar map functional map initializer_list map iterator @@ -733,7 +739,9 @@ semaphore version set compare set concepts set cstddef +set cstdint set cstdlib +set cwchar set functional set initializer_list set iterator diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv index 982c2013e34170..84ea6433fb12df 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx20.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv @@ -40,7 +40,9 @@ array algorithm array compare array concepts array cstddef +array cstdint array cstdlib +array cwchar array initializer_list array iterator array limits @@ -303,6 +305,7 @@ forward_list cstddef forward_list cstdint forward_list cstdlib forward_list cstring +forward_list cwchar forward_list functional forward_list initializer_list forward_list iosfwd @@ -462,6 +465,7 @@ list cstddef list cstdint list cstdlib list cstring +list cwchar list functional list initializer_list list iosfwd @@ -503,7 +507,9 @@ locale version map compare map concepts map cstddef +map cstdint map cstdlib +map cwchar map functional map initializer_list map iterator @@ -741,7 +747,9 @@ semaphore version set compare set concepts set cstddef +set cstdint set cstdlib +set cwchar set functional set initializer_list set iterator diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv index 8ffb71d8b566b0..f341fb2c29d33e 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx23.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv @@ -23,6 +23,7 @@ any version array compare array cstddef array cstdint +array cwchar array initializer_list array limits array new @@ -199,6 +200,8 @@ format typeinfo format version forward_list compare forward_list cstddef +forward_list cstdint +forward_list cwchar forward_list initializer_list forward_list limits forward_list new @@ -312,6 +315,7 @@ list compare list cstddef list cstdint list cstring +list cwchar list initializer_list list limits list new @@ -340,6 +344,7 @@ locale version map compare map cstddef map cstdint +map cwchar map initializer_list map limits map new @@ -498,6 +503,7 @@ semaphore version set compare set cstddef set cstdint +set cwchar set initializer_list set limits set new diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv index 8ffb71d8b566b0..f341fb2c29d33e 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx26.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv @@ -23,6 +23,7 @@ any version array compare array cstddef array cstdint +array cwchar array initializer_list array limits array new @@ -199,6 +200,8 @@ format typeinfo format version forward_list compare forward_list cstddef +forward_list cstdint +forward_list cwchar forward_list initializer_list forward_list limits forward_list new @@ -312,6 +315,7 @@ list compare list cstddef list cstdint list cstring +list cwchar list initializer_list list limits list new @@ -340,6 +344,7 @@ locale version map compare map cstddef map cstdint +map cwchar map initializer_list map limits map new @@ -498,6 +503,7 @@ semaphore version set compare set cstddef set cstdint +set cwchar set initializer_list set limits set new diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.lex.comparison/lexicographical_compare.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.lex.comparison/lexicographical_compare.pass.cpp index 10e45df7cf9aa3..2cf675476026cd 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.lex.comparison/lexicographical_compare.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.lex.comparison/lexicographical_compare.pass.cpp @@ -20,66 +20,48 @@ #include "test_macros.h" #include "test_iterators.h" -#if TEST_STD_VER > 17 -TEST_CONSTEXPR bool test_constexpr() { - int ia[] = {1, 2, 3}; - int ib[] = {1, 3, 5, 2, 4, 6}; +template +struct Test { + template + TEST_CONSTEXPR_CXX20 void operator()() { + T ia[] = {1, 2, 3, 4}; + const unsigned sa = sizeof(ia) / sizeof(ia[0]); + T ib[] = {1, 2, 3}; + assert(!std::lexicographical_compare(Iter1(ia), Iter1(ia + sa), Iter2(ib), Iter2(ib + 2))); + assert(std::lexicographical_compare(Iter1(ib), Iter1(ib + 2), Iter2(ia), Iter2(ia + sa))); + assert(!std::lexicographical_compare(Iter1(ia), Iter1(ia + sa), Iter2(ib), Iter2(ib + 3))); + assert(std::lexicographical_compare(Iter1(ib), Iter1(ib + 3), Iter2(ia), Iter2(ia + sa))); + assert(std::lexicographical_compare(Iter1(ia), Iter1(ia + sa), Iter2(ib + 1), Iter2(ib + 3))); + assert(!std::lexicographical_compare(Iter1(ib + 1), Iter1(ib + 3), Iter2(ia), Iter2(ia + sa))); + } +}; - return std::lexicographical_compare(std::begin(ia), std::end(ia), std::begin(ib), std::end(ib)) - && !std::lexicographical_compare(std::begin(ib), std::end(ib), std::begin(ia), std::end(ia)) - ; - } -#endif - -template -void -test() -{ - int ia[] = {1, 2, 3, 4}; - const unsigned sa = sizeof(ia)/sizeof(ia[0]); - int ib[] = {1, 2, 3}; - assert(!std::lexicographical_compare(Iter1(ia), Iter1(ia+sa), Iter2(ib), Iter2(ib+2))); - assert( std::lexicographical_compare(Iter1(ib), Iter1(ib+2), Iter2(ia), Iter2(ia+sa))); - assert(!std::lexicographical_compare(Iter1(ia), Iter1(ia+sa), Iter2(ib), Iter2(ib+3))); - assert( std::lexicographical_compare(Iter1(ib), Iter1(ib+3), Iter2(ia), Iter2(ia+sa))); - assert( std::lexicographical_compare(Iter1(ia), Iter1(ia+sa), Iter2(ib+1), Iter2(ib+3))); - assert(!std::lexicographical_compare(Iter1(ib+1), Iter1(ib+3), Iter2(ia), Iter2(ia+sa))); -} +template +struct TestIter { + template + TEST_CONSTEXPR_CXX20 bool operator()() { + types::for_each(types::cpp17_input_iterator_list(), Test()); -int main(int, char**) -{ - test, cpp17_input_iterator >(); - test, forward_iterator >(); - test, bidirectional_iterator >(); - test, random_access_iterator >(); - test, const int*>(); + return true; + } +}; - test, cpp17_input_iterator >(); - test, forward_iterator >(); - test, bidirectional_iterator >(); - test, random_access_iterator >(); - test, const int*>(); - - test, cpp17_input_iterator >(); - test, forward_iterator >(); - test, bidirectional_iterator >(); - test, random_access_iterator >(); - test, const int*>(); +TEST_CONSTEXPR_CXX20 bool test() { + types::for_each(types::cpp17_input_iterator_list(), TestIter()); +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + types::for_each(types::cpp17_input_iterator_list(), TestIter()); +#endif + types::for_each(types::cpp17_input_iterator_list(), TestIter()); + types::for_each(types::cpp17_input_iterator_list(), TestIter()); - test, cpp17_input_iterator >(); - test, forward_iterator >(); - test, bidirectional_iterator >(); - test, random_access_iterator >(); - test, const int*>(); + return true; +} - test >(); - test >(); - test >(); - test >(); - test(); +int main(int, char**) { + test(); -#if TEST_STD_VER > 17 - static_assert(test_constexpr()); +#if TEST_STD_VER >= 20 + static_assert(test()); #endif return 0; diff --git a/libcxx/test/std/containers/sequences/array/compare.verify.cpp b/libcxx/test/std/containers/sequences/array/compare.verify.cpp index e03f001469344e..4b001601a4fe2c 100644 --- a/libcxx/test/std/containers/sequences/array/compare.verify.cpp +++ b/libcxx/test/std/containers/sequences/array/compare.verify.cpp @@ -28,10 +28,6 @@ template struct NoCompare {}; -#if TEST_STD_VER >= 14 && TEST_STD_VER <= 17 -// expected-error@*:* 3 {{no matching function for call to object of type 'std::__less'}} -#endif - int main(int, char**) { { typedef NoCompare<0> T;