Skip to content

Commit

Permalink
[libc++] Optimize std::min_element
Browse files Browse the repository at this point in the history
[libc++] Fix formatting and move min_element.bench.cpp

[libc++] Use __invoke instead of invoke

[libc++] Fix build issues with find
  • Loading branch information
mrdaybird committed Jul 25, 2024
1 parent f1b76c5 commit 01c71b4
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 6 deletions.
1 change: 1 addition & 0 deletions libcxx/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ set(BENCHMARK_TESTS
algorithms/make_heap_then_sort_heap.bench.cpp
algorithms/min.bench.cpp
algorithms/minmax.bench.cpp
algorithms/min_element.bench.cpp
algorithms/min_max_element.bench.cpp
algorithms/mismatch.bench.cpp
algorithms/pop_heap.bench.cpp
Expand Down
80 changes: 80 additions & 0 deletions libcxx/benchmarks/algorithms/min_element.bench.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#include <algorithm>
#include <limits>
#include <vector>

#include <benchmark/benchmark.h>
#include <random>

template <typename T>
static void BM_stdmin_element_decreasing(benchmark::State& state) {
std::vector<T> v(state.range(0));
T start = std::numeric_limits<T>::max();
T end = std::numeric_limits<T>::min();

for (size_t i = 0; i < v.size(); i++)
v[i] = ((start != end) ? start-- : end);

for (auto _ : state) {
benchmark::DoNotOptimize(v);
benchmark::DoNotOptimize(std::min_element(v.begin(), v.end()));
}
}

BENCHMARK(BM_stdmin_element_decreasing<char>)
->DenseRange(1, 8)
->Range(32, 128)
->Range(256, 4096)
->DenseRange(5000, 10000, 1000)
->Range(1 << 14, 1 << 16)
->Arg(70000);
BENCHMARK(BM_stdmin_element_decreasing<short>)
->DenseRange(1, 8)
->Range(32, 128)
->Range(256, 4096)
->DenseRange(5000, 10000, 1000)
->Range(1 << 14, 1 << 16)
->Arg(70000);
BENCHMARK(BM_stdmin_element_decreasing<int>)
->DenseRange(1, 8)
->Range(32, 128)
->Range(256, 4096)
->DenseRange(5000, 10000, 1000)
->Range(1 << 14, 1 << 16)
->Arg(70000);
BENCHMARK(BM_stdmin_element_decreasing<long long>)
->DenseRange(1, 8)
->Range(32, 128)
->Range(256, 4096)
->DenseRange(5000, 10000, 1000)
->Range(1 << 14, 1 << 16)
->Arg(70000);
BENCHMARK(BM_stdmin_element_decreasing<unsigned char>)
->DenseRange(1, 8)
->Range(32, 128)
->Range(256, 4096)
->DenseRange(5000, 10000, 1000)
->Range(1 << 14, 1 << 16)
->Arg(70000);
BENCHMARK(BM_stdmin_element_decreasing<unsigned short>)
->DenseRange(1, 8)
->Range(32, 128)
->Range(256, 4096)
->DenseRange(5000, 10000, 1000)
->Range(1 << 14, 1 << 16)
->Arg(70000);
BENCHMARK(BM_stdmin_element_decreasing<unsigned int>)
->DenseRange(1, 8)
->Range(32, 128)
->Range(256, 4096)
->DenseRange(5000, 10000, 1000)
->Range(1 << 14, 1 << 16)
->Arg(70000);
BENCHMARK(BM_stdmin_element_decreasing<unsigned long long>)
->DenseRange(1, 8)
->Range(32, 128)
->Range(256, 4096)
->DenseRange(5000, 10000, 1000)
->Range(1 << 14, 1 << 16)
->Arg(70000);

BENCHMARK_MAIN();
2 changes: 1 addition & 1 deletion libcxx/include/__algorithm/find.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ __find_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n)
// do first partial word
if (__first.__ctz_ != 0) {
__storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
__storage_type __dn = std::min(__clz_f, __n);
__storage_type __dn = (__clz_f < __n) ? __clz_f : __n;
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
__storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
if (__b)
Expand Down
56 changes: 51 additions & 5 deletions libcxx/include/__algorithm/min_element.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

#include <__algorithm/comp.h>
#include <__algorithm/comp_ref_type.h>
#include <__algorithm/find.h>
#include <__algorithm/iterator_operations.h>
#include <__config>
#include <__functional/identity.h>
#include <__functional/invoke.h>
Expand All @@ -33,12 +35,56 @@ __min_element(_Iter __first, _Sent __last, _Comp __comp, _Proj& __proj) {
if (__first == __last)
return __first;

_Iter __i = __first;
while (++__i != __last)
if (std::__invoke(__comp, std::__invoke(__proj, *__i), std::__invoke(__proj, *__first)))
__first = __i;
const size_t __n = static_cast<size_t>(std::distance(__first, __last));

return __first;
if (__n <= 64) {
_Iter __i = __first;
while (++__i != __last)
if (std::__invoke(__comp, std::__invoke(__proj, *__i), std::__invoke(__proj, *__first)))
__first = __i;
return __first;
}

size_t __block_size = 256;

size_t __n_blocked = __n - (__n % __block_size);
_Iter __block_start = __first, __block_end = __first;

typedef typename std::iterator_traits<_Iter>::value_type value_type;
value_type __min_val = std::__invoke(__proj, *__first);

_Iter __curr = __first;
for (size_t __i = 0; __i < __n_blocked; __i += __block_size) {
_Iter __start = __curr;
value_type __block_min = __min_val;
for (size_t j = 0; j < __block_size; j++) {
if (std::__invoke(__comp, std::__invoke(__proj, *__curr), __block_min)) {
__block_min = *__curr;
}
__curr++;
}
if (std::__invoke(__comp, __block_min, __min_val)) {
__min_val = __block_min;
__block_start = __start;
__block_end = __curr;
}
}

value_type __epilogue_min = __min_val;
_Iter __epilogue_start = __curr;
while (__curr != __last) {
if (std::__invoke(__comp, std::__invoke(__proj, *__curr), __epilogue_min)) {
__epilogue_min = *__curr;
}
__curr++;
}
if (std::__invoke(__comp, __epilogue_min, __min_val)) {
__min_val = __epilogue_min;
__block_start = __epilogue_start;
__block_end = __last;
}

return std::__find(__block_start, __block_end, __min_val, __proj);
}

template <class _Comp, class _Iter, class _Sent>
Expand Down

0 comments on commit 01c71b4

Please sign in to comment.