diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt index d96ccc1e49f66b..952777d5346f7a 100644 --- a/libcxx/benchmarks/CMakeLists.txt +++ b/libcxx/benchmarks/CMakeLists.txt @@ -121,6 +121,7 @@ set(BENCHMARK_TESTS algorithms/make_heap_then_sort_heap.bench.cpp algorithms/min.bench.cpp algorithms/minmax.bench.cpp + algorithms/min_element.bench.cpp algorithms/min_max_element.bench.cpp algorithms/mismatch.bench.cpp algorithms/pop_heap.bench.cpp diff --git a/libcxx/benchmarks/algorithms/min_element.bench.cpp b/libcxx/benchmarks/algorithms/min_element.bench.cpp new file mode 100644 index 00000000000000..7edb8ddbb15f29 --- /dev/null +++ b/libcxx/benchmarks/algorithms/min_element.bench.cpp @@ -0,0 +1,80 @@ +#include +#include +#include + +#include +#include + +template +static void BM_stdmin_element_decreasing(benchmark::State& state) { + std::vector v(state.range(0)); + T start = std::numeric_limits::max(); + T end = std::numeric_limits::min(); + + for (size_t i = 0; i < v.size(); i++) + v[i] = ((start != end) ? start-- : end); + + for (auto _ : state) { + benchmark::DoNotOptimize(v); + benchmark::DoNotOptimize(std::min_element(v.begin(), v.end())); + } +} + +BENCHMARK(BM_stdmin_element_decreasing) + ->DenseRange(1, 8) + ->Range(32, 128) + ->Range(256, 4096) + ->DenseRange(5000, 10000, 1000) + ->Range(1 << 14, 1 << 16) + ->Arg(70000); +BENCHMARK(BM_stdmin_element_decreasing) + ->DenseRange(1, 8) + ->Range(32, 128) + ->Range(256, 4096) + ->DenseRange(5000, 10000, 1000) + ->Range(1 << 14, 1 << 16) + ->Arg(70000); +BENCHMARK(BM_stdmin_element_decreasing) + ->DenseRange(1, 8) + ->Range(32, 128) + ->Range(256, 4096) + ->DenseRange(5000, 10000, 1000) + ->Range(1 << 14, 1 << 16) + ->Arg(70000); +BENCHMARK(BM_stdmin_element_decreasing) + ->DenseRange(1, 8) + ->Range(32, 128) + ->Range(256, 4096) + ->DenseRange(5000, 10000, 1000) + ->Range(1 << 14, 1 << 16) + ->Arg(70000); +BENCHMARK(BM_stdmin_element_decreasing) + ->DenseRange(1, 8) + ->Range(32, 128) + ->Range(256, 4096) + ->DenseRange(5000, 10000, 1000) + ->Range(1 << 14, 1 << 16) + ->Arg(70000); +BENCHMARK(BM_stdmin_element_decreasing) + ->DenseRange(1, 8) + ->Range(32, 128) + ->Range(256, 4096) + ->DenseRange(5000, 10000, 1000) + ->Range(1 << 14, 1 << 16) + ->Arg(70000); +BENCHMARK(BM_stdmin_element_decreasing) + ->DenseRange(1, 8) + ->Range(32, 128) + ->Range(256, 4096) + ->DenseRange(5000, 10000, 1000) + ->Range(1 << 14, 1 << 16) + ->Arg(70000); +BENCHMARK(BM_stdmin_element_decreasing) + ->DenseRange(1, 8) + ->Range(32, 128) + ->Range(256, 4096) + ->DenseRange(5000, 10000, 1000) + ->Range(1 << 14, 1 << 16) + ->Arg(70000); + +BENCHMARK_MAIN(); diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h index 7f58dbb13a5776..8ecd53e63af21b 100644 --- a/libcxx/include/__algorithm/find.h +++ b/libcxx/include/__algorithm/find.h @@ -104,7 +104,7 @@ __find_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) // do first partial word if (__first.__ctz_ != 0) { __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); - __storage_type __dn = std::min(__clz_f, __n); + __storage_type __dn = (__clz_f < __n) ? __clz_f : __n; __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); __storage_type __b = std::__invert_if(*__first.__seg_) & __m; if (__b) diff --git a/libcxx/include/__algorithm/min_element.h b/libcxx/include/__algorithm/min_element.h index 65f3594d630cef..25964a9e3a52fe 100644 --- a/libcxx/include/__algorithm/min_element.h +++ b/libcxx/include/__algorithm/min_element.h @@ -11,6 +11,8 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> +#include <__algorithm/find.h> +#include <__algorithm/iterator_operations.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> @@ -33,12 +35,56 @@ __min_element(_Iter __first, _Sent __last, _Comp __comp, _Proj& __proj) { if (__first == __last) return __first; - _Iter __i = __first; - while (++__i != __last) - if (std::__invoke(__comp, std::__invoke(__proj, *__i), std::__invoke(__proj, *__first))) - __first = __i; + const size_t __n = static_cast(std::distance(__first, __last)); - return __first; + if (__n <= 64) { + _Iter __i = __first; + while (++__i != __last) + if (std::__invoke(__comp, std::__invoke(__proj, *__i), std::__invoke(__proj, *__first))) + __first = __i; + return __first; + } + + size_t __block_size = 256; + + size_t __n_blocked = __n - (__n % __block_size); + _Iter __block_start = __first, __block_end = __first; + + typedef typename std::iterator_traits<_Iter>::value_type value_type; + value_type __min_val = std::__invoke(__proj, *__first); + + _Iter __curr = __first; + for (size_t __i = 0; __i < __n_blocked; __i += __block_size) { + _Iter __start = __curr; + value_type __block_min = __min_val; + for (size_t j = 0; j < __block_size; j++) { + if (std::__invoke(__comp, std::__invoke(__proj, *__curr), __block_min)) { + __block_min = *__curr; + } + __curr++; + } + if (std::__invoke(__comp, __block_min, __min_val)) { + __min_val = __block_min; + __block_start = __start; + __block_end = __curr; + } + } + + value_type __epilogue_min = __min_val; + _Iter __epilogue_start = __curr; + while (__curr != __last) { + if (std::__invoke(__comp, std::__invoke(__proj, *__curr), __epilogue_min)) { + __epilogue_min = *__curr; + } + __curr++; + } + if (std::__invoke(__comp, __epilogue_min, __min_val)) { + __min_val = __epilogue_min; + __block_start = __epilogue_start; + __block_end = __last; + } + + return std::__find(__block_start, __block_end, __min_val, __proj); } template