-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libcxx][algorithm] Optimize std::stable_sort via radix sort algorithm #104683
base: main
Are you sure you want to change the base?
Conversation
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write If you have received no comments on your PR for a week, you can request a review If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
@llvm/pr-subscribers-libcxx Author: Дмитрий Изволов (izvolov) ChangesThe radix sort (MSD) algorithm allows to speed up std::stable_sort dramatically in case we sort integers.
Patch is 28.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/104683.diff 5 Files Affected:
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 32579272858a8e..95e4e3faf88671 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -74,6 +74,7 @@ set(files
__algorithm/prev_permutation.h
__algorithm/pstl.h
__algorithm/push_heap.h
+ __algorithm/radix_sort.h
__algorithm/ranges_adjacent_find.h
__algorithm/ranges_all_of.h
__algorithm/ranges_any_of.h
diff --git a/libcxx/include/__algorithm/radix_sort.h b/libcxx/include/__algorithm/radix_sort.h
new file mode 100644
index 00000000000000..5e14dec9df0918
--- /dev/null
+++ b/libcxx/include/__algorithm/radix_sort.h
@@ -0,0 +1,410 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_RADIX_SORT_H
+#define _LIBCPP___ALGORITHM_RADIX_SORT_H
+
+#include <__algorithm/copy.h>
+#include <__algorithm/for_each.h>
+#include <__config>
+#include <__iterator/iterator_traits.h>
+#include <__iterator/move_iterator.h>
+#include <__iterator/next.h>
+#include <__numeric/partial_sum.h>
+#include <__type_traits/decay.h>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/invoke.h>
+#include <__type_traits/is_assignable.h>
+#include <__type_traits/is_integral.h>
+#include <__type_traits/is_unsigned.h>
+#include <__type_traits/make_unsigned.h>
+#include <__utility/forward.h>
+#include <__utility/integer_sequence.h>
+#include <__utility/move.h>
+#include <__utility/pair.h>
+#include <climits>
+#include <cstdint>
+#include <initializer_list>
+#include <limits>
+#include <stdexcept>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 14
+
+inline void __variadic_expansion_dummy(initializer_list<int>) {}
+
+# define EXPAND_VARIADIC(expression) __variadic_expansion_dummy({(expression, 0)...})
+
+template <typename _Iterator>
+constexpr auto __move_assign_please(_Iterator __i)
+ -> enable_if_t<is_move_assignable<typename iterator_traits<_Iterator>::value_type>::value,
+ move_iterator<_Iterator> > {
+ return make_move_iterator(std::move(__i));
+}
+
+template <typename _Iterator>
+constexpr auto __move_assign_please(_Iterator __i)
+ -> enable_if_t<not is_move_assignable<typename iterator_traits<_Iterator>::value_type>::value, _Iterator> {
+ return __i;
+}
+
+template <typename _Integer>
+constexpr _Integer __intlog2_impl(_Integer __integer) {
+ auto __degree = _Integer{0};
+
+ while ((__integer >>= 1) > 0) {
+ ++__degree;
+ }
+
+ return __degree;
+}
+
+template <typename _Integer>
+constexpr _Integer __intlog2(_Integer __integer) {
+ static_assert(is_integral<_Integer>::value, "Must be an integral type");
+
+ return __integer > 0 ? __intlog2_impl(__integer)
+ : throw domain_error("The binary logarithm is not defined on non-positive numbers");
+}
+
+template <typename _InputIterator, typename _OutputIterator>
+pair<_OutputIterator, typename iterator_traits<_InputIterator>::value_type>
+__partial_sum_max(_InputIterator __first, _InputIterator __last, _OutputIterator __result) {
+ if (__first == __last)
+ return {__result, 0};
+
+ auto __max = *__first;
+ typename iterator_traits<_InputIterator>::value_type __sum = *__first;
+ *__result = __sum;
+
+ while (++__first != __last) {
+ if (__max < *__first) {
+ __max = *__first;
+ }
+ __sum = std::move(__sum) + *__first;
+ *++__result = __sum;
+ }
+ return {++__result, __max};
+}
+
+template <typename _Value, typename _Map, typename _Radix>
+struct __radix_sort_traits {
+ using image_type = decay_t<invoke_result_t<_Map, _Value> >;
+ static_assert(is_integral<image_type>::value, "");
+ static_assert(is_unsigned<image_type>::value, "");
+
+ using radix_type = decay_t<invoke_result_t<_Radix, image_type> >;
+ static_assert(is_integral<radix_type>::value, "");
+
+ constexpr static auto radix_value_range = numeric_limits<radix_type>::max() + 1;
+ constexpr static auto radix_size = __intlog2<uint64_t>(radix_value_range);
+ constexpr static auto radix_count = sizeof(image_type) * CHAR_BIT / radix_size;
+};
+
+template <typename _Value, typename _Map>
+struct __counting_sort_traits {
+ using image_type = decay_t<invoke_result_t<_Map, _Value> >;
+ static_assert(is_integral<image_type>::value, "");
+ static_assert(is_unsigned<image_type>::value, "");
+
+ constexpr static const auto value_range = numeric_limits<image_type>::max() + 1;
+ constexpr static auto radix_size = __intlog2<uint64_t>(value_range);
+};
+
+template <typename _Radix>
+auto __nth_radix(size_t __radix_number, _Radix __radix) {
+ return [__radix_number, __radix = std::move(__radix)](auto __n) {
+ using value_type = decltype(__n);
+ static_assert(is_integral<value_type>::value, "");
+ static_assert(is_unsigned<value_type>::value, "");
+ using traits = __counting_sort_traits<value_type, _Radix>;
+
+ return __radix(static_cast<value_type>(__n >> traits::radix_size * __radix_number));
+ };
+}
+
+template <typename _ForwardIterator, typename _Map, typename _RandomAccessIterator>
+void __count(_ForwardIterator __first, _ForwardIterator __last, _Map __map, _RandomAccessIterator __counters) {
+ std::for_each(__first, __last, [&__counters, &__map](const auto& __preimage) { ++__counters[__map(__preimage)]; });
+}
+
+template <typename _ForwardIterator, typename _Map, typename _RandomAccessIterator>
+void __collect(_ForwardIterator __first, _ForwardIterator __last, _Map __map, _RandomAccessIterator __counters) {
+ using value_type = typename iterator_traits<_ForwardIterator>::value_type;
+ using traits = __counting_sort_traits<value_type, _Map>;
+
+ __count(__first, __last, __map, __counters);
+
+ const auto __counters_end = __counters + traits::value_range;
+ partial_sum(__counters, __counters_end, __counters);
+}
+
+template <typename _ForwardIterator, typename _RandomAccessIterator1, typename _Map, typename _RandomAccessIterator2>
+void __dispose(_ForwardIterator __first,
+ _ForwardIterator __last,
+ _RandomAccessIterator1 __result,
+ _Map __map,
+ _RandomAccessIterator2 __counters) {
+ std::for_each(__first, __last, [&__result, &__counters, &__map](auto&& __preimage) {
+ auto __index = __counters[__map(__preimage)]++;
+ __result[__index] = std::forward<decltype(__preimage)>(__preimage);
+ });
+}
+
+template <typename _BidirectionalIterator,
+ typename _RandomAccessIterator1,
+ typename _Map,
+ typename _RandomAccessIterator2>
+void dispose_backward(_BidirectionalIterator __first,
+ _BidirectionalIterator __last,
+ _RandomAccessIterator1 __result,
+ _Map __map,
+ _RandomAccessIterator2 __counters) {
+ std::for_each(make_reverse_iterator(__last),
+ make_reverse_iterator(__first),
+ [&__result, &__counters, &__map](auto&& __preimage) {
+ auto __index = --__counters[__map(__preimage)];
+ __result[__index] = std::forward<decltype(__preimage)>(__preimage);
+ });
+}
+
+template <typename _ForwardIterator,
+ typename _Map,
+ typename _Radix,
+ typename _RandomAccessIterator1,
+ typename _RandomAccessIterator2,
+ size_t... _Radices>
+bool __collect_impl(
+ _ForwardIterator __first,
+ _ForwardIterator __last,
+ _Map __map,
+ _Radix __radix,
+ _RandomAccessIterator1 __counters,
+ _RandomAccessIterator2 __maximums,
+ index_sequence<_Radices...>) {
+ using value_type = typename iterator_traits<_ForwardIterator>::value_type;
+ constexpr auto __radix_value_range = __radix_sort_traits<value_type, _Map, _Radix>::radix_value_range;
+
+ auto __previous = numeric_limits<invoke_result_t<_Map, value_type> >::min();
+ auto __is_sorted = true;
+ for_each(__first, __last, [&__counters, &__map, &__radix, &__previous, &__is_sorted](const auto& value) {
+ auto __current = __map(value);
+ __is_sorted &= (__current >= __previous);
+ __previous = __current;
+
+ EXPAND_VARIADIC(++__counters[_Radices][__nth_radix(_Radices, __radix)(__current)]);
+ });
+
+ EXPAND_VARIADIC(
+ __maximums[_Radices] =
+ __partial_sum_max(__counters[_Radices], __counters[_Radices] + __radix_value_range, __counters[_Radices])
+ .second);
+
+ return __is_sorted;
+}
+
+template <typename _ForwardIterator,
+ typename _Map,
+ typename _Radix,
+ typename _RandomAccessIterator1,
+ typename _RandomAccessIterator2>
+bool __collect(_ForwardIterator __first,
+ _ForwardIterator __last,
+ _Map __map,
+ _Radix __radix,
+ _RandomAccessIterator1 __counters,
+ _RandomAccessIterator2 __maximums) {
+ using value_type = typename iterator_traits<_ForwardIterator>::value_type;
+ constexpr auto __radix_count = __radix_sort_traits<value_type, _Map, _Radix>::radix_count;
+ return __collect_impl(__first, __last, __map, __radix, __counters, __maximums, make_index_sequence<__radix_count>());
+}
+
+template <typename _BidirectionalIterator,
+ typename _RandomAccessIterator1,
+ typename _Map,
+ typename _RandomAccessIterator2>
+void __dispose_backward(_BidirectionalIterator __first,
+ _BidirectionalIterator __last,
+ _RandomAccessIterator1 __result,
+ _Map __map,
+ _RandomAccessIterator2 __counters) {
+ for_each(
+ make_reverse_iterator(__last), make_reverse_iterator(__first), [&__result, &__counters, &__map](auto&& preimage) {
+ auto __index = --__counters[__map(preimage)];
+ __result[__index] = std::forward<decltype(preimage)>(preimage);
+ });
+}
+
+template <typename _ForwardIterator, typename _RandomAccessIterator, typename _Map>
+_RandomAccessIterator
+__counting_sort_impl(_ForwardIterator __first, _ForwardIterator __last, _RandomAccessIterator __result, _Map __map) {
+ using value_type = typename iterator_traits<_ForwardIterator>::value_type;
+ using traits = __counting_sort_traits<value_type, _Map>;
+
+ using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
+ difference_type __counters[traits::value_range + 1] = {0};
+
+ __collect(__first, __last, __map, next(std::begin(__counters)));
+ __dispose(__first, __last, __result, __map, std::begin(__counters));
+
+ return __result + __counters[traits::value_range];
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Map, typename _Radix>
+typename enable_if<
+ __radix_sort_traits<typename iterator_traits<_RandomAccessIterator1>::value_type, _Map, _Radix>::radix_count == 1,
+ void>::type
+__radix_sort_impl(_RandomAccessIterator1 __first,
+ _RandomAccessIterator1 __last,
+ _RandomAccessIterator2 buffer,
+ _Map __map,
+ _Radix __radix) {
+ auto __buffer_end = __counting_sort_impl(
+ __move_assign_please(__first), __move_assign_please(__last), buffer, [&__map, &__radix](const auto& value) {
+ return __radix(__map(value));
+ });
+
+ std::copy(__move_assign_please(buffer), __move_assign_please(__buffer_end), __first);
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Map, typename _Radix>
+typename enable_if<
+ __radix_sort_traits<typename iterator_traits<_RandomAccessIterator1>::value_type, _Map, _Radix>::radix_count % 2 ==
+ 0,
+ void>::type
+__radix_sort_impl(_RandomAccessIterator1 __first,
+ _RandomAccessIterator1 __last,
+ _RandomAccessIterator2 __buffer_begin,
+ _Map __map,
+ _Radix __radix) {
+ using value_type = typename iterator_traits<_RandomAccessIterator1>::value_type;
+ using traits = __radix_sort_traits<value_type, _Map, _Radix>;
+
+ using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type;
+ difference_type __counters[traits::radix_count][traits::radix_value_range] = {{0}};
+ difference_type __maximums[traits::radix_count] = {0};
+ const auto __is_sorted = __collect(__first, __last, __map, __radix, __counters, __maximums);
+ if (not __is_sorted) {
+ const auto __range_size = distance(__first, __last);
+ auto __buffer_end = __buffer_begin + __range_size;
+ for (size_t __radix_number = 0; __radix_number < traits::radix_count; __radix_number += 2) {
+ const auto __n0th_is_single = __maximums[__radix_number] == __range_size;
+ const auto __n1th_is_single = __maximums[__radix_number + 1] == __range_size;
+
+ if (__n0th_is_single && __n1th_is_single) {
+ continue;
+ }
+
+ if (__n0th_is_single) {
+ copy(__move_assign_please(__first), __move_assign_please(__last), __buffer_begin);
+ } else {
+ auto __n0th = [__radix_number, &__map, &__radix](const auto& __v) {
+ return __nth_radix(__radix_number, __radix)(__map(__v));
+ };
+ __dispose_backward(
+ __move_assign_please(__first),
+ __move_assign_please(__last),
+ __buffer_begin,
+ __n0th,
+ __counters[__radix_number]);
+ }
+
+ if (__n1th_is_single) {
+ copy(__move_assign_please(__buffer_begin), __move_assign_please(__buffer_end), __first);
+ } else {
+ auto __n1th = [__radix_number, &__map, &__radix](const auto& __v) {
+ return __nth_radix(__radix_number + 1, __radix)(__map(__v));
+ };
+ __dispose_backward(
+ __move_assign_please(__buffer_begin),
+ __move_assign_please(__buffer_end),
+ __first,
+ __n1th,
+ __counters[__radix_number + 1]);
+ }
+ }
+ }
+}
+
+constexpr auto __to_unsigned(bool __b) { return __b; }
+
+template <typename _Ip>
+constexpr auto __to_unsigned(_Ip __n) {
+ constexpr const auto __min_value = numeric_limits<_Ip>::min();
+ return static_cast<make_unsigned_t<_Ip> >(__n ^ __min_value);
+}
+
+struct __identity_fn {
+ template <typename _Tp>
+ constexpr decltype(auto) operator()(_Tp&& __value) const {
+ return std::forward<_Tp>(__value);
+ }
+};
+
+struct __low_byte_fn {
+ template <typename _Ip>
+ constexpr uint8_t operator()(_Ip __integer) const {
+ static_assert(is_integral<_Ip>::value, "");
+ static_assert(is_unsigned<_Ip>::value, "");
+
+ return static_cast<uint8_t>(__integer & 0xff);
+ }
+};
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Map, typename _Radix>
+void __radix_sort(_RandomAccessIterator1 __first,
+ _RandomAccessIterator1 __last,
+ _RandomAccessIterator2 buffer,
+ _Map __map,
+ _Radix __radix) {
+ auto __map_to_unsigned = [__map = std::move(__map)](const auto& x) { return __to_unsigned(__map(x)); };
+ __radix_sort_impl(__first, __last, buffer, __map_to_unsigned, __radix);
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2>
+void __radix_sort(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 buffer) {
+ __radix_sort(__first, __last, buffer, __identity_fn{}, __low_byte_fn{});
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2>
+bool __radix_sort(
+ _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 buffer, _BoolConstant<true>) {
+ __radix_sort(__first, __last, buffer, __identity_fn{}, __low_byte_fn{});
+ return true;
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2>
+bool __radix_sort(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _BoolConstant<false>) {
+ return false;
+}
+
+# undef EXPAND_VARIADIC
+
+#else // _LIBCPP_STD_VER > 14
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, bool _B>
+bool __radix_sort(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _BoolConstant<_B>) {
+ return false;
+}
+
+#endif // _LIBCPP_STD_VER > 14
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___ALGORITHM_RADIX_SORT_H
diff --git a/libcxx/include/__algorithm/ranges_stable_sort.h b/libcxx/include/__algorithm/ranges_stable_sort.h
index 9c7df80ae98722..96d84b208687fc 100644
--- a/libcxx/include/__algorithm/ranges_stable_sort.h
+++ b/libcxx/include/__algorithm/ranges_stable_sort.h
@@ -24,6 +24,8 @@
#include <__ranges/access.h>
#include <__ranges/concepts.h>
#include <__ranges/dangling.h>
+#include <__type_traits/is_integral.h>
+#include <__type_traits/is_same.h>
#include <__utility/forward.h>
#include <__utility/move.h>
@@ -45,7 +47,18 @@ struct __stable_sort {
auto __last_iter = ranges::next(__first, __last);
auto&& __projected_comp = std::__make_projected(__comp, __proj);
- std::__stable_sort_impl<_RangeAlgPolicy>(std::move(__first), __last_iter, __projected_comp);
+ constexpr auto __default_comp = is_same_v<_Comp, ranges::less>;
+ constexpr auto __default_proj = is_same_v<_Proj, identity>;
+ constexpr auto __integral_value = is_integral_v<iter_value_t<_Iter>>;
+ constexpr auto __integral_projection = __default_proj && __integral_value;
+ // constexpr auto __integral_projection = is_integral_v<remove_reference_t<invoke_result_t<_Proj&,
+ // iter_value_t<_Iter>>>>;
+ // TODO: Support projection in stable_sort
+ std::__stable_sort_impl<_RangeAlgPolicy>(
+ std::move(__first),
+ __last_iter,
+ __projected_comp,
+ _BoolConstant < __default_comp && __integral_projection > {});
return __last_iter;
}
diff --git a/libcxx/include/__algorithm/stable_sort.h b/libcxx/include/__algorithm/stable_sort.h
index 726e7e16b3564a..f8624726a4e323 100644
--- a/libcxx/include/__algorithm/stable_sort.h
+++ b/libcxx/include/__algorithm/stable_sort.h
@@ -13,6 +13,7 @@
#include <__algorithm/comp_ref_type.h>
#include <__algorithm/inplace_merge.h>
#include <__algorithm/iterator_operations.h>
+#include <__algorithm/radix_sort.h>
#include <__algorithm/sort.h>
#include <__config>
#include <__debug_utils/strict_weak_ordering_check.h>
@@ -20,6 +21,9 @@
#include <__memory/destruct_n.h>
#include <__memory/temporary_buffer.h>
#include <__memory/unique_ptr.h>
+#include <__type_traits/integral_constant.h>
+#include <__type_traits/is_integral.h>
+#include <__type_traits/is_same.h>
#include <__type_traits/is_trivially_assignable.h>
#include <__utility/move.h>
#include <__utility/pair.h>
@@ -133,20 +137,24 @@ _LIBCPP_HIDE_FROM_ABI void __merge_move_assign(
*__result = _Ops::__iter_move(__first2);
}
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
-void __stable_sort(_RandomAccessIterator __first,
- _RandomAccessIterator __last,
- _Compare __comp,
- typename iterator_traits<_RandomAccessIterator>::difference_type __len,
- typename iterator_traits<_RandomAccessIterator>::value_type* __buff,
- ptrdiff_t __buff_size);
+template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, bool _EnableRadixSort>
+void __stable_sort(
+ _RandomAccessIterator __first,
+ _RandomAccessIterator __last,
+ _Compare __comp,
+ typename iterator_traits<_RandomAccessIterator>::difference_type __len,
+ typename iterator_traits<_RandomAccessIterator>::value_type* __buff,
+ ptrdiff_t __buff_size,
+ _BoolConstant<_EnableRadixSort>);
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
-void __stable_sort_move(_RandomAccessIterator ...
[truncated]
|
Couldn't we simply forward to |
Radix sort is much faster than comparison sorts (including |
If that is the case, why wouldn't we use it in |
Oh, I didn't understand the question first time. The key moment is that radix sort needs a temporary buffer, and |
c401e86
to
671ac8f
Compare
_LIBCPP_HIDE_FROM_ABI constexpr auto __to_unsigned(bool __b) { return __b; } | ||
|
||
template <typename _Ip> | ||
_LIBCPP_HIDE_FROM_ABI constexpr auto __to_unsigned(_Ip __n) { | ||
constexpr const auto __min_value = numeric_limits<_Ip>::min(); | ||
return static_cast<make_unsigned_t<_Ip> >(__n ^ __min_value); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can reuse __to_unsigned_like
in <__type_traits/make_unsigned.h>
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can't. As I can see, __to_unsigned_like
is a static cast to corresponding unsigned. But it's not just enough to cast to unsigned. We need to shift the value range from [0, 2^n)
to [-2^(n-1), 2^(n-1))
, so that if x
is less than y
in the old range, then x
will also be less than y
in the new range.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need to shift the value range from
[0, 2^n)
to[-2^(n-1), 2^(n-1))
, so that ifx
is less thany
in the old range, thenx
will also be less thany
in the new range.
Hmm, I think this function actually shifts from [-2^(n-1), 2^(n-1))
to [0, 2^n)
, which should be intended. How about changing the name to __shift_to_unsiged
or __plus_to_unsigned
as a plain to
has closer meaning to casting in the standard library?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, you're right, I wanted to write that values are shifted from [-2^(n-1), 2^(n-1))
to [0, 2^n)
.
__shift_to_unsigned
is a good name, thanks. Changed it.
_RandomAccessIterator __first, | ||
_RandomAccessIterator __last, | ||
_Compare __comp, | ||
typename iterator_traits<_RandomAccessIterator>::difference_type __len, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
typename iterator_traits<_RandomAccessIterator>::difference_type __len, | |
__iter_diff_t<_RandomAccessIterator> __len, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the code I didn't changed, just reformatted. Should I change to __iter_diff_t
and __iter_value_type
anyway?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm. This code should already be formatted correctly. Do you maybe have a clang-format that's too old?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm using
$ clang-format --version
clang-format version 18.1.8
and the following command called from the repo root.
git diff -U0 --no-color my/main | clang/tools/clang-format/clang-format-diff.py -style=file:libcxx/.clang-format -p1 -i
template <class _Tp, class = void> | ||
struct __radix_sort_min_switch { | ||
static const unsigned value = (1 << 10); | ||
}; | ||
|
||
template <class _Int8> | ||
struct __radix_sort_min_switch<_Int8, __enable_if_t<is_integral<_Int8>::value && sizeof(_Int8) == 1> > { | ||
static const unsigned value = (1 << 8); | ||
}; | ||
|
||
template <class _Tp, class = void> | ||
struct __radix_sort_max_switch { | ||
static const unsigned value = (1 << 16); | ||
}; | ||
|
||
template <class _Int64> | ||
struct __radix_sort_max_switch<_Int64, __enable_if_t<is_integral<_Int64>::value && sizeof(_Int64) == 8> > { | ||
static const unsigned value = (1 << 15); | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why are these values not simply in the function and how did you determine these values?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was an analogy to __stable_sort_switch
.
Values were determined empirically based on benchmarks of raw comparison of radix sort vs std::sort
vs std::stable_sort
, and with respect to special benchmark cases such as ascending, descending, pipe-organ and others.
The raw comparison is here
- https://github.com/izvolov/burst?tab=readme-ov-file#%D1%86%D0%B5%D0%BB%D0%BE%D1%87%D0%B8%D1%81%D0%BB%D0%B5%D0%BD%D0%BD%D1%8B%D0%B5-%D1%81%D0%BE%D1%80%D1%82%D0%B8%D1%80%D0%BE%D0%B2%D0%BA%D0%B8
- https://github.com/izvolov/burst/blob/master/doc/README.md#intsort
Code: - https://github.com/izvolov/burst/blob/master/benchmark/burst/algorithm/integer_sort_comparison.py.in
- https://github.com/izvolov/burst/blob/master/benchmark/burst/algorithm/radix_sort.cpp
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I'd make them constexpr
functions instead, since we can.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mean something like this?
template <class _Tp>
constexpr unsigned __radix_sort_min_bound () {
static_assert(std::is_integral_v<_Tp>);
if constexpr (sizeof(_Tp) == 1) {
return 1 << 8;
}
return 1 << 10;
}
Is it legal to use C++11 and later features in stable_sort.h
since there aren't any version guards there?
If it is, I'd also rewrite the code to remove _BoolConstant<_EnableRadixSort>
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
The only my concern is that now we require C++17 to enable radix sort. Is it ok?
@@ -0,0 +1,415 @@ | |||
// -*- C++ -*- | |||
//===----------------------------------------------------------------------===// |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not attached: Please add a release note about the improvement.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please give an example of how to do it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Simply something like "std::stable_sort
uses radix sort for integral types now, which can improve the performance by ".
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
template <typename _UnsignedInteger> | ||
_LIBCPP_HIDE_FROM_ABI constexpr _UnsignedInteger __intlog2(_UnsignedInteger __n) { | ||
static_assert(is_integral<_UnsignedInteger>::value, "Must be an integral type"); | ||
static_assert(is_unsigned<_UnsignedInteger>::value, "Must be unsigned"); | ||
|
||
return numeric_limits<_UnsignedInteger>::digits - 1 - std::__countl_zero(__n); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, it can be done in this PR. It's simple enough.
_LIBCPP_HIDE_FROM_ABI typename enable_if< | ||
__radix_sort_traits<typename iterator_traits<_RandomAccessIterator1>::value_type, _Map, _Radix>::__radix_count == 1, | ||
void>::type |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You've missed a few enable_if
s here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
1b2dedc
to
8b5d583
Compare
@@ -0,0 +1,415 @@ | |||
// -*- C++ -*- | |||
//===----------------------------------------------------------------------===// |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Simply something like "std::stable_sort
uses radix sort for integral types now, which can improve the performance by ".
// radix of the sorted numbers from low to high byte. | ||
// The algorithm uses a temporary buffer of size equal to size of the input range. Each `i`-th pass | ||
// of the algorithm sorts values by `i`-th radix and moves values to the temporary buffer (for each even `i`, counted | ||
// from zero), or moves them back to the initial range (for each odd `i`). It there is only one radix in sorted integers |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// from zero), or moves them back to the initial range (for each odd `i`). It there is only one radix in sorted integers | |
// from zero), or moves them back to the initial range (for each odd `i`). If there is only one radix in sorted integers |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
// The algorithm uses a temporary buffer of size equal to size of the input range. Each `i`-th pass | ||
// of the algorithm sorts values by `i`-th radix and moves values to the temporary buffer (for each even `i`, counted | ||
// from zero), or moves them back to the initial range (for each odd `i`). It there is only one radix in sorted integers | ||
// (e.g. int8), than sorted values are placed to the buffer, and then moved back to the initial range. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// (e.g. int8), than sorted values are placed to the buffer, and then moved back to the initial range. | |
// (e.g. int8), the sorted values are placed to the buffer, and then moved back to the initial range. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
_RandomAccessIterator __first, | ||
_RandomAccessIterator __last, | ||
_Compare __comp, | ||
typename iterator_traits<_RandomAccessIterator>::difference_type __len, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm. This code should already be formatted correctly. Do you maybe have a clang-format that's too old?
template <typename _UnsignedInteger> | ||
_LIBCPP_HIDE_FROM_ABI constexpr _UnsignedInteger __intlog2(_UnsignedInteger __n) { | ||
static_assert(is_integral<_UnsignedInteger>::value, "Must be an integral type"); | ||
static_assert(is_unsigned<_UnsignedInteger>::value, "Must be unsigned"); | ||
|
||
return numeric_limits<_UnsignedInteger>::digits - 1 - std::__countl_zero(__n); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can simply reduce the requirements (e.g. static_assert
them). Also no need to consolidate it with other functions, that was simply a comment with no action requested.
constexpr auto __default_comp = | ||
__desugars_to_v<__totally_ordered_less_tag, _Comp, iter_value_t<_Iter>, iter_value_t<_Iter> >; | ||
constexpr auto __default_proj = __is_identity<_Proj>::value; | ||
constexpr auto __integral_value = is_integral_v<iter_value_t<_Iter>>; | ||
constexpr auto __integral_projection = __default_proj && __integral_value; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we do the dispatching in __stable_sort_impl
instead we don't need to do the dispatching twice.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
template <class _Tp, class = void> | ||
struct __radix_sort_min_switch { | ||
static const unsigned value = (1 << 10); | ||
}; | ||
|
||
template <class _Int8> | ||
struct __radix_sort_min_switch<_Int8, __enable_if_t<is_integral<_Int8>::value && sizeof(_Int8) == 1> > { | ||
static const unsigned value = (1 << 8); | ||
}; | ||
|
||
template <class _Tp, class = void> | ||
struct __radix_sort_max_switch { | ||
static const unsigned value = (1 << 16); | ||
}; | ||
|
||
template <class _Int64> | ||
struct __radix_sort_max_switch<_Int64, __enable_if_t<is_integral<_Int64>::value && sizeof(_Int64) == 8> > { | ||
static const unsigned value = (1 << 15); | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I'd make them constexpr
functions instead, since we can.
c9192ec
to
8e8e007
Compare
12aa59c
to
dd3581e
Compare
@philnik777 ping |
The radix sort (LSD) algorithm allows to speed up std::stable_sort dramatically in case we sort integers.
The speed up varies from a relatively small to x10 times, depending on type of sorted elements and the initial state of the sorted array.