From bbdf0e2d8bdda477315228d68447b15186a40b19 Mon Sep 17 00:00:00 2001 From: Joana Cruz Date: Mon, 9 Sep 2024 13:42:21 +0000 Subject: [PATCH] Integrate Google benchmarks into SLEEF Added new benchmark tool to SLEEF project using googlebench framework. In theory this tool can benchmark any unary and binary function in SLEEF. Benchmark is enabled in all functions listed in benchsleef.cpp. This list uses macros in benchmark_callers.cpp, so that we can enable benchmarking in multiple precisions using single lines of code. It is also possible to list the exact function we want, as each of the macros in benchmark_callers.cpp can be called independently and connected like building blocks. The tool is integrated with SLEEF via CMake, meaning it can be built automatically when SLEEF is built. To enable that, pass CMake argument -DSLEEF_BUILD_BENCH=ON. This tool depends on C++17 standard. Tested on aarch64 for scalar, vector and SVE routines Tested on x86 for different vector length extensions. Tested for llvm-17, gcc-11 and gcc-14. --- CMakeLists.txt | 1 + docs/1-user-guide/build-with-cmake/README.md | 4 + docs/4-tools/README.md | 56 +++++++++- src/CMakeLists.txt | 4 + src/benchmarks/CMakeLists.txt | 51 +++++++++ src/benchmarks/README.md | 107 +++++++++++++++++++ src/benchmarks/benchmark_callers.hpp | 105 ++++++++++++++++++ src/benchmarks/benchmark_templates.hpp | 47 ++++++++ src/benchmarks/benchsleef.cpp | 70 ++++++++++++ src/benchmarks/gen_input.hpp | 73 +++++++++++++ src/benchmarks/type_defs.hpp | 96 +++++++++++++++++ 11 files changed, 612 insertions(+), 2 deletions(-) create mode 100644 src/benchmarks/CMakeLists.txt create mode 100644 src/benchmarks/README.md create mode 100644 src/benchmarks/benchmark_callers.hpp create mode 100644 src/benchmarks/benchmark_templates.hpp create mode 100644 src/benchmarks/benchsleef.cpp create mode 100644 src/benchmarks/gen_input.hpp create mode 100644 src/benchmarks/type_defs.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f1bc05b..44c7345a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,7 @@ option(SLEEF_BUILD_QUAD "libsleefquad will be built." OFF) option(SLEEF_BUILD_GNUABI_LIBS "libsleefgnuabi will be built." ON) option(SLEEF_BUILD_SCALAR_LIB "libsleefscalar will be built." OFF) option(SLEEF_BUILD_TESTS "Tests will be built." ON) +option(SLEEF_BUILD_BENCH "Bench will be built." OFF) option(SLEEF_BUILD_INLINE_HEADERS "Build header for inlining whole SLEEF functions" OFF) option(SLEEF_TEST_ALL_IUT "Perform tests on implementations with all vector extensions" OFF) diff --git a/docs/1-user-guide/build-with-cmake/README.md b/docs/1-user-guide/build-with-cmake/README.md index a7ef060a..4ebe8861 100644 --- a/docs/1-user-guide/build-with-cmake/README.md +++ b/docs/1-user-guide/build-with-cmake/README.md @@ -162,6 +162,10 @@ optimized, or any other special set of flags. - `SLEEF_LLVM_AR_COMMAND` : Specify LLVM AR command when you build the library with thinLTO support with clang. - `SLEEF_ENABLE_LLVM_BITCODE` : Generate LLVM bitcode +### Benchmarks + +- `SLEEF_BUILD_BENCH` : Build benchmark tool if set to TRUE + ### Tests - `SLEEF_BUILD_TESTS` : Avoid building testing tools if set to FALSE diff --git a/docs/4-tools/README.md b/docs/4-tools/README.md index bb1f605d..861024a5 100644 --- a/docs/4-tools/README.md +++ b/docs/4-tools/README.md @@ -92,7 +92,7 @@ In some cases, it is desirable to fix the last few coefficients to values like Finding a set of good parameters is not a straightforward process. -

Benchmarking tool

+

Legacy Benchmarking tool

SLEEF has a tool for measuring and plotting execution time of each function in the library. It consists of an executable for measurements, a makefile for @@ -162,7 +162,7 @@ Install these with: ```sh sudo apt install openjdk-19-jdk-headless ``` -and +and ```sh sudo apt install gnuplot ``` @@ -171,3 +171,55 @@ Four graphs are generated : trigdp.png, nontrigdp.png, trigsp.png and nontrigsp.png. Please see our [benchmark results](../5-performance/) for an example of generated graphs by this tool. +

Benchmarking tool

+ +This tool uses the [googlebench](https://github.com/google/benchmark) framework to benchmark SLEEF +functions. +It is integrated with SLEEF via CMake. +In order to build this tool automatically when SLEEF is +built, pass the `-DSLEEF_BUILD_BENCH=ON` CMake option when +setting up the build directory: +```sh +cmake -S . -B build -DSLEEF_BUILD_BENCH=ON +``` +After building SLEEF: +```sh +cmake --build build -j +``` +in `build/bin` folder you will find an executable named +benchsleef128. +Run this executable with `./build/bin/benchsleef128` in +order to obtain microbenchmarks for the functions in the project. +A filter option can also be provided to the executable. +This feature in inherited from googlebench, and takes +a regular expression, and executes only the benchmarks +whose name matches the regular expression. +The set of all the benchmarks available can be obtained +when running the benchmark tool when no filter is set +and corresponds to all the benchmarks listed in +`benchsleef.cpp`. +```sh +# Examples: +# * This will benchmark Sleef_sinf_u10 on all intervals enabled in the tool. +./build/bin/benchsleef128 --benchmark_filter=sinf_u10 +# * This will benchmark all single precision sin functions (scalar, vector and sve if available): +./build/bin/benchsleef128 --benchmark_filter=sinf +# * This will benchmark all single precision vector functions: +./build/bin/benchsleef128 --benchmark_filter=vectorf +``` +Note: all corresponds to all functions available in SLEEF and enabled in the benchmarks in this context. +

Benchmarking on aarch64

+If you're running SLEEF on a machine with SVE support the executable generated will have SVE benchmarks +available for functions specified in `benchsleef.cpp`. +

Benchmarking on x86

+If you're running SLEEF on an x86 machine, two extra +executables may be built (according to feature detection): + +```sh +./build/bin/benchsleef256 +./build/bin/benchsleef512 +``` +These will benchmark 256bit and 512bit vector implementations +for vector functions respectively. +Note these executables can also be used to benchmark scalar +functions. \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0e60e536..b9fe6f96 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,6 +7,10 @@ if (SLEEF_BUILD_TESTS AND NOT MINGW) endif() add_subdirectory("common") +if (SLEEF_BUILD_BENCH) + add_subdirectory("benchmarks") +endif() + if (SLEEF_BUILD_DFT) add_subdirectory("dft") if (SLEEF_BUILD_TESTS) diff --git a/src/benchmarks/CMakeLists.txt b/src/benchmarks/CMakeLists.txt new file mode 100644 index 00000000..2ffad2a6 --- /dev/null +++ b/src/benchmarks/CMakeLists.txt @@ -0,0 +1,51 @@ +# CMakeList.txt : Microbenchmarks with google bench +cmake_minimum_required (VERSION 3.11) + +project ("SLEEF Microbenchmarks") + +find_package(Threads) +# Apply CMake options in Installation guide in https://github.com/google/benchmark +include(ExternalProject) +find_package(Git REQUIRED) +ExternalProject_Add(googlebenchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG origin/main + CMAKE_ARGS -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/googlebench + -DBENCHMARK_ENABLE_GTEST_TESTS=OFF +) +include_directories(${CMAKE_BINARY_DIR}/googlebench/include) +link_directories(${CMAKE_BINARY_DIR}/googlebench/lib) + +# include headers +include_directories(${sleef_BINARY_DIR}/include) # sleef.h +# include libs +link_directories(${sleef_BINARY_DIR}/lib) # libsleef + + +set(Extra_CFLAGS -Wall -O3 -Wno-attributes) +set(BENCH_SRC_FILE "benchsleef.cpp" "benchmark_callers.hpp" "benchmark_templates.hpp" "gen_input.hpp" "type_defs.hpp") +set(BENCH_PROPERTIES C_STANDARD 99 CXX_STANDARD 17) +set(BENCH_LIBS benchmark sleef Threads::Threads) # Link Google Benchmark and sleef to the project + +# Add source to this project's executable. +add_executable (benchsleef128 ${BENCH_SRC_FILE}) +set_target_properties(benchsleef128 PROPERTIES ${BENCH_PROPERTIES}) +target_compile_options(benchsleef128 PRIVATE ${Extra_CFLAGS} -march=native) +target_link_libraries(benchsleef128 ${BENCH_LIBS}) +add_dependencies(benchsleef128 googlebenchmark) + +if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") + add_executable (benchsleef256 ${BENCH_SRC_FILE}) + set_target_properties(benchsleef256 PROPERTIES ${BENCH_PROPERTIES}) + target_compile_options(benchsleef256 PRIVATE ${Extra_CFLAGS} "-march=native" "-DARCH_VECT_LEN=256") + target_link_libraries(benchsleef256 ${BENCH_LIBS}) + add_dependencies(benchsleef256 googlebenchmark) + + add_executable (benchsleef512 ${BENCH_SRC_FILE}) + set_target_properties(benchsleef512 PROPERTIES ${BENCH_PROPERTIES}) + target_compile_options(benchsleef512 PRIVATE ${Extra_CFLAGS} "-mavx512f" "-DARCH_VECT_LEN=512") + target_link_libraries(benchsleef512 ${BENCH_LIBS}) + add_dependencies(benchsleef512 googlebenchmark) +endif() \ No newline at end of file diff --git a/src/benchmarks/README.md b/src/benchmarks/README.md new file mode 100644 index 00000000..06f43235 --- /dev/null +++ b/src/benchmarks/README.md @@ -0,0 +1,107 @@ +

Benchmarking tool

+ +This tool uses the [googlebench](https://github.com/google/benchmark) framework to benchmark SLEEF +functions. +It is integrated with SLEEF via CMake. +In order to build this tool automatically when SLEEF is +built, pass the `-DSLEEF_BUILD_BENCH=ON` CMake option when +setting up the build directory: +```sh +cmake -S . -B build -DSLEEF_BUILD_BENCH=ON +``` +After building SLEEF: +```sh +cmake --build build -j +``` +in `build/bin` folder you will find an executable named +benchsleef128. +Run this executable with `./build/bin/benchsleef128` in +order to obtain microbenchmarks for the functions in the project. +A filter option can also be provided to the executable. +This feature in inherited from googlebench, and takes +a regular expression, and executes only the benchmarks +whose name matches the regular expression. +The set of all the benchmarks available can be obtained +when running the benchmark tool when no filter is set +and corresponds to all the benchmarks listed in +`benchsleef.cpp`. +```sh +# Examples: +# * This will benchmark Sleef_sinf_u10 on all intervals enabled in the tool. +./build/bin/benchsleef128 --benchmark_filter=sinf_u10 +# * This will benchmark all single precision sin functions (scalar, vector and sve if available): +./build/bin/benchsleef128 --benchmark_filter=sinf +# * This will benchmark all single precision vector functions: +./build/bin/benchsleef128 --benchmark_filter=vectorf +``` +Note: all corresponds to all functions available in SLEEF and enabled in the benchmarks in this context. +

Benchmarking on aarch64

+If you're running SLEEF on a machine with SVE support the executable generated will have SVE benchmarks +available for functions specified in `benchsleef.cpp`. +

Benchmarking on x86

+If you're running SLEEF on an x86 machine, two extra +executables may be built (according to feature detection): + +```sh + ./build/bin/benchsleef256 + ./build/bin/benchsleef512 +``` + +These will benchmark 256bit and 512bit vector implementations +for vector functions respectively. +Note these executables can also be used to benchmark scalar +functions. + +

Maintenance

+Some functions are still not enabled in the benchmarks. +In order to add a function which uses the types already +declared in `type_defs.hpp`, add a benchmark entry using +the macros declared in `benchmark_callers.hpp`. +These macros have been designed to group benchmarking +patterns observed in the previous benchmarking system, +and minimize the number of lines of code while preserving +readability as much as possible. + +Examples: + +(1) If a scalar float lower ulp precision version of +log1p gets implemented at some point in SLEEF one could +add benchmarks for it by adding a line to `sleefbench.cpp`: +```cpp +BENCH(Sleef_log10f_u35, scalarf, , ) +``` +This line can be repeated to provide benchmarks on +multiple intervals. + +(2) If the double precision of the function above gets +implemented as well then, we can simply add: +```cpp +BENCH_SCALAR(log10, u35, , ) +``` +which would be equivalent to adding: +```cpp +BENCH(Sleef_log10f_u35, scalarf, , ) +BENCH(Sleef_log10_u35, scalard, , ) +``` +If the function you want to add does not use the types in +`type_defs.hpp`, extend this file with the types required +(and ensure type detection is implemented correctly). +Most likely you will also have to make some changes to +`gen_input.hpp`: +* Add adequate declaration for `vector_len`: +```cpp +template <> const inline int vector_len = *; +``` +* and add adequate template specialization for `gen_input()`: +```cpp +template <> newtype gen_input (double lo, double hi) +{ your implementation } +``` +

Note

+This tool can also be built as a standalone project. +From `sleef/src/benchmarks` directory, run: +```sh +cmake -S . -B build -Dsleef_BINARY_DIR= +cmake --build build -j +./build/benchsleef128 +``` diff --git a/src/benchmarks/benchmark_callers.hpp b/src/benchmarks/benchmark_callers.hpp new file mode 100644 index 00000000..3afe7530 --- /dev/null +++ b/src/benchmarks/benchmark_callers.hpp @@ -0,0 +1,105 @@ +// Copyright Naoki Shibata and contributors 2024. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#pragma once +#include "benchmark_templates.hpp" + +// Define macros that can be used to generate benchmark calls (defined in +// benchmark_templates.hpp). +// Example to generate benchmarks for 1ULP sin(x) for x between 0 and 6.28: +// BENCH(Sleef_sin_u10, double, 0, 6.28); +// BENCHMARK_CAPTURE is a symbol from the google bench framework +// Note: type is only passed for name filtering reasons +#define BENCH(funname, typefilter, min, max) \ + BENCHMARK_CAPTURE(BM_Sleef_templated_function, #funname, funname, min, max) \ + ->Name("MB_" #funname "_" #typefilter "_" #min "_" #max); + +#define BENCH_SINGLE_SCALAR(fun, ulp, min, max) \ + BENCH(Sleef_##fun##f_##ulp, scalarf, min, max); +#define BENCH_DOUBLE_SCALAR(fun, ulp, min, max) \ + BENCH(Sleef_##fun##_##ulp, scalard, min, max); +// Generate benchmarks for scalar function implementations +#define BENCH_SCALAR(fun, ulp, min, max) \ + BENCH_SINGLE_SCALAR(fun, ulp, min, max); \ + BENCH_DOUBLE_SCALAR(fun, ulp, min, max); + +// Generate benchmarks for vector function implementations +#ifdef ENABLE_VECTOR_BENCHMARKS +#if !defined(ARCH_VECT_LEN) || ARCH_VECT_LEN == 128 +#define BENCH_SINGLE_VECTOR(fun, ulp, min, max) \ + BENCH(Sleef_##fun##f4_##ulp, vectorf128, min, max); +#define BENCH_DOUBLE_VECTOR(fun, ulp, min, max) \ + BENCH(Sleef_##fun##d2_##ulp, vectord128, min, max); +#elif ARCH_VECT_LEN == 256 +#define BENCH_SINGLE_VECTOR(fun, ulp, min, max) \ + BENCH(Sleef_##fun##f8_##ulp, vectorf256, min, max); +#define BENCH_DOUBLE_VECTOR(fun, ulp, min, max) \ + BENCH(Sleef_##fun##d4_##ulp, vectord256, min, max); +#elif ARCH_VECT_LEN == 512 +#define BENCH_SINGLE_VECTOR(fun, ulp, min, max) \ + BENCH(Sleef_##fun##f16_##ulp, vectorf512, min, max); +#define BENCH_DOUBLE_VECTOR(fun, ulp, min, max) \ + BENCH(Sleef_##fun##d8_##ulp, vectord512, min, max); +#endif +#define BENCH_VECTOR(fun, ulp, min, max) \ + BENCH_SINGLE_VECTOR(fun, ulp, min, max); \ + BENCH_DOUBLE_VECTOR(fun, ulp, min, max); +#else +#define BENCH_SINGLE_VECTOR(fun, ulp, min, max) +#define BENCH_DOUBLE_VECTOR(fun, ulp, min, max) +#define BENCH_VECTOR(fun, ulp, min, max) +#endif + +// Generate benchmarks for SVE function implementations +#ifdef ENABLE_SVECTOR_BENCHMARKS +#define BENCH_SINGLE_SVE(fun, ulp, min, max) \ + BENCH(Sleef_##fun##fx_##ulp##sve, scalarf, min, max); +#define BENCH_DOUBLE_SVE(fun, ulp, min, max) \ + BENCH(Sleef_##fun##dx_##ulp##sve, scalard, min, max); +#define BENCH_SVE(fun, ulp, min, max) \ + BENCH_SINGLE_SVE(fun, ulp, min, max); \ + BENCH_DOUBLE_SVE(fun, ulp, min, max); +#else +#define BENCH_SINGLE_SVE(fun, ulp, min, max) +#define BENCH_DOUBLE_SVE(fun, ulp, min, max) +#define BENCH_SVE(fun, ulp, min, max) +#endif + +// Given a function implemented meeting a specific ulp +// error (present in the name of the function), +// BENCH_ALL_W_FIX_ULP macro will +// generate benchmarks for +// * all vector extensions supported +// * all precisions +// * all vector lengths +#define BENCH_ALL_W_FIX_ULP(fun, ulp, min, max) \ + BENCH_SCALAR(fun, ulp, min, max); \ + BENCH_VECTOR(fun, ulp, min, max); \ + BENCH_SVE(fun, ulp, min, max); +#define BENCH_SINGLEP_W_FIX_ULP(fun, ulp, min, max) \ + BENCH_SINGLE_SCALAR(fun, ulp, min, max); \ + BENCH_SINGLE_VECTOR(fun, ulp, min, max); \ + BENCH_SINGLE_SVE(fun, ulp, min, max); +#define BENCH_DOUBLEP_W_FIX_ULP(fun, ulp, min, max) \ + BENCH_DOUBLE_SCALAR(fun, ulp, min, max); \ + BENCH_DOUBLE_VECTOR(fun, ulp, min, max); \ + BENCH_DOUBLE_SVE(fun, ulp, min, max); + +#define BENCH_ALL_SINGLEP(fun, min, max) \ + BENCH_SINGLEP_W_FIX_ULP(fun, u10, min, max); \ + BENCH_SINGLEP_W_FIX_ULP(fun, u35, min, max); +#define BENCH_ALL_DOUBLEP(fun, min, max) \ + BENCH_DOUBLEP_W_FIX_ULP(fun, u10, min, max); \ + BENCH_DOUBLEP_W_FIX_ULP(fun, u35, min, max); + +// Given a function, BENCH_ALL macro will +// generate benchmarks for +// * all ulp implementations available (u10 and u35) +// * all vector extensions supported +// * all precisions +// * all vector lengths +#define BENCH_ALL(fun, min, max) \ + BENCH_ALL_W_FIX_ULP(fun, u10, min, max); \ + BENCH_ALL_W_FIX_ULP(fun, u35, min, max); diff --git a/src/benchmarks/benchmark_templates.hpp b/src/benchmarks/benchmark_templates.hpp new file mode 100644 index 00000000..eaec9747 --- /dev/null +++ b/src/benchmarks/benchmark_templates.hpp @@ -0,0 +1,47 @@ +// Copyright Naoki Shibata and contributors 2024. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#pragma once +#include "gen_input.hpp" +#include + +/////////////////////////////////// +// Function Callers /////////////// +/////////////////////////////////// +template +__attribute__((noinline)) void call_fun(F f, In_T... x) { + f(x...); +} + +/////////////////////////////////// +// Benchmarkers /////////////////// +/////////////////////////////////// +template +static void BM_Sleef_templated_function(benchmark::State &state, Ret (*fun)(T), + double min, double max) { + T x = gen_input(min, max); + for (auto _ : state) { + call_fun(fun, x); + } + int num_els_processed = state.iterations() * vector_len; + state.counters["NSperEl"] = + benchmark::Counter(num_els_processed, benchmark::Counter::kIsRate | + benchmark::Counter::kInvert); +} + +template +static void BM_Sleef_templated_function(benchmark::State &state, + Ret (*fun)(T, T), double min, + double max) { + T x0 = gen_input(min, max); + T x1 = gen_input(min, max); + for (auto _ : state) { + call_fun(fun, x0, x1); + } + int num_els_processed = state.iterations() * vector_len; + state.counters["NSperEl"] = + benchmark::Counter(num_els_processed, benchmark::Counter::kIsRate | + benchmark::Counter::kInvert); +} diff --git a/src/benchmarks/benchsleef.cpp b/src/benchmarks/benchsleef.cpp new file mode 100644 index 00000000..8c7ae8a1 --- /dev/null +++ b/src/benchmarks/benchsleef.cpp @@ -0,0 +1,70 @@ +// Copyright Naoki Shibata and contributors 2024. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#include "benchmark_callers.hpp" +#include + +// ======================TRIG========================== +// sin on different intervals +BENCH_ALL(sin, 0, 6.28); +BENCH_ALL(sin, 0, 1e+6); +BENCH_ALL_SINGLEP(sin, 0, 1e20); +BENCH_ALL_DOUBLEP(sin, 0, 1e+100); + +// cos on different intervals +BENCH_ALL(cos, 0, 6.28); +BENCH_ALL(cos, 0, 1e+6); +BENCH_ALL_SINGLEP(cos, 0, 1e20); +BENCH_ALL_DOUBLEP(cos, 0, 1e+100); + +// tan on different intervals +BENCH_ALL(tan, 0, 6.28); +BENCH_ALL(tan, 0, 1e+6); +BENCH_ALL_SINGLEP(tan, 0, 1e20); +BENCH_ALL_DOUBLEP(tan, 0, 1e+100); + +// sincos on different intervals +BENCH_ALL(sincos, 0, 6.28); +BENCH_ALL(sincos, 0, 1e+6); +BENCH_ALL_SINGLEP(sincos, 0, 1e20); +BENCH_ALL_DOUBLEP(sincos, 0, 1e+100); + +// inverse trig +BENCH_ALL(asin, -1.0, 1.0); +BENCH_ALL(acos, -1.0, 1.0); +BENCH_ALL(atan, -10, 10); +BENCH_ALL(atan2, -10, 10) + +// ======================NON TRIG========================== +// log +BENCH_ALL_SINGLEP(log, 0, 1e+38); +BENCH_ALL_DOUBLEP(log, 0, 1e+100); + +BENCH_ALL_SINGLEP(log2, 0, 1e+38); +BENCH_ALL_DOUBLEP(log2, 0, 1e+100); + +BENCH_SINGLEP_W_FIX_ULP(log10, u10, 0, 1e+38); +BENCH_DOUBLEP_W_FIX_ULP(log10, u10, 0, 1e+100); + +BENCH_SINGLEP_W_FIX_ULP(log1p, u10, 0, 1e+38); +BENCH_DOUBLEP_W_FIX_ULP(log1p, u10, 0, 1e+100); + +// exp +BENCH_SINGLEP_W_FIX_ULP(exp, u10, -700, 700); +BENCH_DOUBLEP_W_FIX_ULP(exp, u10, -700, 700); + +BENCH_ALL_SINGLEP(exp2, -100, 100); +BENCH_ALL_DOUBLEP(exp2, -700, 700); + +BENCH_ALL_SINGLEP(exp10, -100, 100); +BENCH_ALL_DOUBLEP(exp10, -700, 700); + +BENCH_SINGLEP_W_FIX_ULP(expm1, u10, -100, 100); +BENCH_DOUBLEP_W_FIX_ULP(expm1, u10, -700, 700); + +// pow +BENCH_ALL_W_FIX_ULP(pow, u10, -30, 30); + +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/src/benchmarks/gen_input.hpp b/src/benchmarks/gen_input.hpp new file mode 100644 index 00000000..ac3186d8 --- /dev/null +++ b/src/benchmarks/gen_input.hpp @@ -0,0 +1,73 @@ +// Copyright Naoki Shibata and contributors 2024. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#pragma once +#include "type_defs.hpp" +#include +#include + +/////////////////////////////////// +// Random Generators ////////////// +/////////////////////////////////// +static_assert(sizeof(uint64_t) == sizeof(double)); +inline double asdouble(uint64_t i) { + union { + uint64_t i; + double f; + } u = {i}; + return u.f; +} + +uint64_t seed; +void init_rng() { seed = 0x0123456789abcdef; } + +double gen_rand(double lo, double hi) { + seed = 6364136223846793005ULL * seed + 1; + return lo + (hi - lo) * (asdouble(seed >> 12 | 0x3ffULL << 52) - 1.0); +} +float gen_randf(double lo, double hi) { + return static_cast(gen_rand(lo, hi)); +} + +/////////////////////////////////// +// Input Generators /////////////// +/////////////////////////////////// +template T gen_input(double, double); +template <> float gen_input(double lo, double hi) { return gen_randf(lo, hi); } +template <> double gen_input(double lo, double hi) { return gen_rand(lo, hi); } +#ifdef ENABLE_VECTOR_BENCHMARKS +template <> vfloat gen_input(double lo, double hi) { + int vlen = vector_len; + vfloat in; + for (int i = 0; i < vlen; i++) { + in[i] = gen_randf(lo, hi); + } + return in; +} +template <> vdouble gen_input(double lo, double hi) { + int vlen = vector_len; + vdouble in; + for (int i = 0; i < vlen; i++) { + in[i] = gen_rand(lo, hi); + } + return in; +} +#endif +#ifdef ENABLE_SVECTOR_BENCHMARKS +template <> svfloat gen_input(double lo, double hi) { + int vlen = vector_len; + std::vector in(vlen); + std::generate(in.begin(), in.end(), [&]() { return gen_randf(lo, hi); }); + svbool_t pg = svptrue_b32(); + return (svfloat)svld1(pg, in.data()); +} +template <> svdouble gen_input(double lo, double hi) { + int vlen = vector_len; + std::vector in(vlen); + std::generate(in.begin(), in.end(), [&]() { return gen_rand(lo, hi); }); + svbool_t pg = svptrue_b32(); + return (svdouble)svld1(pg, in.data()); +} +#endif \ No newline at end of file diff --git a/src/benchmarks/type_defs.hpp b/src/benchmarks/type_defs.hpp new file mode 100644 index 00000000..bd8af7ea --- /dev/null +++ b/src/benchmarks/type_defs.hpp @@ -0,0 +1,96 @@ +// Copyright Naoki Shibata and contributors 2024. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#pragma once +#include + +/////////////////////////////////// +// Library Includes and /////////// +/////// Type Definitions ////////// +/////////////////////////////////// +template const inline int vector_len = 1; +template <> const inline int vector_len = 1; +template <> const inline int vector_len = 1; + +#if defined(__i386__) || defined(__x86_64__) +#if defined(_MSC_VER) +#include +#else +#include +#endif +#if !defined(ARCH_VECT_LEN) || ARCH_VECT_LEN == 128 +#ifdef __SSE2__ +typedef __m128d vdouble; +typedef __m128 vfloat; +typedef Sleef___m128d_2 vdouble2; +typedef Sleef___m128_2 vfloat2; +template <> const inline int vector_len = 4; +template <> const inline int vector_len = 2; +#define ENABLE_VECTOR_BENCHMARKS +#endif +// * Bigger precisions: +#elif ARCH_VECT_LEN == 256 +#ifdef __AVX__ +typedef __m256d vdouble; +typedef __m256 vfloat; +typedef Sleef___m256d_2 vdouble2; +typedef Sleef___m256_2 vfloat2; +template <> const inline int vector_len = 8; +template <> const inline int vector_len = 4; +#define ENABLE_VECTOR_BENCHMARKS +#endif +#elif ARCH_VECT_LEN == 512 +#ifdef __AVX512F__ +typedef __m512d vdouble; +typedef __m512 vfloat; +typedef Sleef___m512d_2 vdouble2; +typedef Sleef___m512_2 vfloat2; +template <> const inline int vector_len = 16; +template <> const inline int vector_len = 8; +#define ENABLE_VECTOR_BENCHMARKS +#endif +#endif + +#elif defined(__ARM_NEON) +#include +typedef float64x2_t vdouble; +typedef float32x4_t vfloat; +typedef Sleef_float64x2_t_2 vdouble2; +typedef Sleef_float32x4_t_2 vfloat2; +template <> const inline int vector_len = 4; +template <> const inline int vector_len = 2; +#define ENABLE_VECTOR_BENCHMARKS + +#elif defined(__VSX__) +#include +typedef __vector double vdouble; +typedef __vector float vfloat; +typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2; +typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; +template <> const inline int vector_len = 4; +template <> const inline int vector_len = 2; +#define ENABLE_VECTOR_BENCHMARKS + +#elif defined(__VX__) +#include +typedef __vector double vdouble; +typedef __vector float vfloat; +typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2; +typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; +template <> const inline int vector_len = 4; +template <> const inline int vector_len = 2; +#define ENABLE_VECTOR_BENCHMARKS +#endif + +#if defined(__ARM_FEATURE_SVE) +#include +typedef svfloat64_t svdouble; +typedef svfloat32_t svfloat; +typedef svfloat64x2_t svdouble2; +typedef svfloat32x2_t svfloat2; +template <> const inline int vector_len = svcntw(); +template <> const inline int vector_len = svcntd(); +#define ENABLE_SVECTOR_BENCHMARKS +#endif \ No newline at end of file