-
Notifications
You must be signed in to change notification settings - Fork 131
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Integrate Google benchmarks into SLEEF
Added new benchmark tool to SLEEF project using googlebench framework. In theory this tool can benchmark any unary and binary function in SLEEF. Benchmark is enabled in all functions listed in benchsleef.cpp. This list uses macros in benchmark_callers.cpp, so that we can enable benchmarking in multiple precisions using single lines of code. It is also possible to list the exact function we want, as each of the macros in benchmark_callers.cpp can be called independently and connected like building blocks. The tool is integrated with SLEEF via CMake, meaning it can be built automatically when SLEEF is built. To enable that, pass CMake argument -DSLEEF_BUILD_BENCH=ON. This tool depends on C++17 standard. Tested on aarch64 for scalar, vector and SVE routines Tested on x86 for different vector length extensions. Tested for llvm-17, gcc-11 and gcc-14.
- Loading branch information
1 parent
686f2ce
commit bbdf0e2
Showing
11 changed files
with
612 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# CMakeList.txt : Microbenchmarks with google bench | ||
cmake_minimum_required (VERSION 3.11) | ||
|
||
project ("SLEEF Microbenchmarks") | ||
|
||
find_package(Threads) | ||
# Apply CMake options in Installation guide in https://github.com/google/benchmark | ||
include(ExternalProject) | ||
find_package(Git REQUIRED) | ||
ExternalProject_Add(googlebenchmark | ||
GIT_REPOSITORY https://github.com/google/benchmark.git | ||
GIT_TAG origin/main | ||
CMAKE_ARGS -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON | ||
-DCMAKE_BUILD_TYPE=Release | ||
-DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/googlebench | ||
-DBENCHMARK_ENABLE_GTEST_TESTS=OFF | ||
) | ||
include_directories(${CMAKE_BINARY_DIR}/googlebench/include) | ||
link_directories(${CMAKE_BINARY_DIR}/googlebench/lib) | ||
|
||
# include headers | ||
include_directories(${sleef_BINARY_DIR}/include) # sleef.h | ||
# include libs | ||
link_directories(${sleef_BINARY_DIR}/lib) # libsleef | ||
|
||
|
||
set(Extra_CFLAGS -Wall -O3 -Wno-attributes) | ||
set(BENCH_SRC_FILE "benchsleef.cpp" "benchmark_callers.hpp" "benchmark_templates.hpp" "gen_input.hpp" "type_defs.hpp") | ||
set(BENCH_PROPERTIES C_STANDARD 99 CXX_STANDARD 17) | ||
set(BENCH_LIBS benchmark sleef Threads::Threads) # Link Google Benchmark and sleef to the project | ||
|
||
# Add source to this project's executable. | ||
add_executable (benchsleef128 ${BENCH_SRC_FILE}) | ||
set_target_properties(benchsleef128 PROPERTIES ${BENCH_PROPERTIES}) | ||
target_compile_options(benchsleef128 PRIVATE ${Extra_CFLAGS} -march=native) | ||
target_link_libraries(benchsleef128 ${BENCH_LIBS}) | ||
add_dependencies(benchsleef128 googlebenchmark) | ||
|
||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") | ||
add_executable (benchsleef256 ${BENCH_SRC_FILE}) | ||
set_target_properties(benchsleef256 PROPERTIES ${BENCH_PROPERTIES}) | ||
target_compile_options(benchsleef256 PRIVATE ${Extra_CFLAGS} "-march=native" "-DARCH_VECT_LEN=256") | ||
target_link_libraries(benchsleef256 ${BENCH_LIBS}) | ||
add_dependencies(benchsleef256 googlebenchmark) | ||
|
||
add_executable (benchsleef512 ${BENCH_SRC_FILE}) | ||
set_target_properties(benchsleef512 PROPERTIES ${BENCH_PROPERTIES}) | ||
target_compile_options(benchsleef512 PRIVATE ${Extra_CFLAGS} "-mavx512f" "-DARCH_VECT_LEN=512") | ||
target_link_libraries(benchsleef512 ${BENCH_LIBS}) | ||
add_dependencies(benchsleef512 googlebenchmark) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
<h2 id="benchmark">Benchmarking tool</h2> | ||
|
||
This tool uses the [googlebench](https://github.com/google/benchmark) framework to benchmark SLEEF | ||
functions. | ||
It is integrated with SLEEF via CMake. | ||
In order to build this tool automatically when SLEEF is | ||
built, pass the `-DSLEEF_BUILD_BENCH=ON` CMake option when | ||
setting up the build directory: | ||
```sh | ||
cmake -S . -B build -DSLEEF_BUILD_BENCH=ON | ||
``` | ||
After building SLEEF: | ||
```sh | ||
cmake --build build -j | ||
``` | ||
in `build/bin` folder you will find an executable named | ||
benchsleef128. | ||
Run this executable with `./build/bin/benchsleef128` in | ||
order to obtain microbenchmarks for the functions in the project. | ||
A filter option can also be provided to the executable. | ||
This feature in inherited from googlebench, and takes | ||
a regular expression, and executes only the benchmarks | ||
whose name matches the regular expression. | ||
The set of all the benchmarks available can be obtained | ||
when running the benchmark tool when no filter is set | ||
and corresponds to all the benchmarks listed in | ||
`benchsleef.cpp`. | ||
```sh | ||
# Examples: | ||
# * This will benchmark Sleef_sinf_u10 on all intervals enabled in the tool. | ||
./build/bin/benchsleef128 --benchmark_filter=sinf_u10 | ||
# * This will benchmark all single precision sin functions (scalar, vector and sve if available): | ||
./build/bin/benchsleef128 --benchmark_filter=sinf | ||
# * This will benchmark all single precision vector functions: | ||
./build/bin/benchsleef128 --benchmark_filter=vectorf | ||
``` | ||
Note: all corresponds to all functions available in SLEEF and enabled in the benchmarks in this context. | ||
<h3 id="benchmark">Benchmarking on aarch64</h3> | ||
If you're running SLEEF on a machine with SVE support the executable generated will have SVE benchmarks | ||
available for functions specified in `benchsleef.cpp`. | ||
<h3 id="benchmark">Benchmarking on x86</h3> | ||
If you're running SLEEF on an x86 machine, two extra | ||
executables may be built (according to feature detection): | ||
|
||
```sh | ||
./build/bin/benchsleef256 | ||
./build/bin/benchsleef512 | ||
``` | ||
|
||
These will benchmark 256bit and 512bit vector implementations | ||
for vector functions respectively. | ||
Note these executables can also be used to benchmark scalar | ||
functions. | ||
|
||
<h3 id="benchmark">Maintenance</h3> | ||
Some functions are still not enabled in the benchmarks. | ||
In order to add a function which uses the types already | ||
declared in `type_defs.hpp`, add a benchmark entry using | ||
the macros declared in `benchmark_callers.hpp`. | ||
These macros have been designed to group benchmarking | ||
patterns observed in the previous benchmarking system, | ||
and minimize the number of lines of code while preserving | ||
readability as much as possible. | ||
|
||
Examples: | ||
|
||
(1) If a scalar float lower ulp precision version of | ||
log1p gets implemented at some point in SLEEF one could | ||
add benchmarks for it by adding a line to `sleefbench.cpp`: | ||
```cpp | ||
BENCH(Sleef_log10f_u35, scalarf, <min>, <max>) | ||
``` | ||
This line can be repeated to provide benchmarks on | ||
multiple intervals. | ||
(2) If the double precision of the function above gets | ||
implemented as well then, we can simply add: | ||
```cpp | ||
BENCH_SCALAR(log10, u35, <min>, <max>) | ||
``` | ||
which would be equivalent to adding: | ||
```cpp | ||
BENCH(Sleef_log10f_u35, scalarf, <min>, <max>) | ||
BENCH(Sleef_log10_u35, scalard, <min>, <max>) | ||
``` | ||
If the function you want to add does not use the types in | ||
`type_defs.hpp`, extend this file with the types required | ||
(and ensure type detection is implemented correctly). | ||
Most likely you will also have to make some changes to | ||
`gen_input.hpp`: | ||
* Add adequate declaration for `vector_len`: | ||
```cpp | ||
template <> const inline int vector_len<new_type> = *; | ||
``` | ||
* and add adequate template specialization for `gen_input()`: | ||
```cpp | ||
template <> newtype gen_input (double lo, double hi) | ||
{ your implementation } | ||
``` | ||
<h3 id="benchmark">Note</h3> | ||
This tool can also be built as a standalone project. | ||
From `sleef/src/benchmarks` directory, run: | ||
```sh | ||
cmake -S . -B build -Dsleef_BINARY_DIR=<build_dir> | ||
cmake --build build -j | ||
./build/benchsleef128 | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
// Copyright Naoki Shibata and contributors 2024. | ||
// Distributed under the Boost Software License, Version 1.0. | ||
// (See accompanying file LICENSE.txt or copy at | ||
// http://www.boost.org/LICENSE_1_0.txt) | ||
|
||
#pragma once | ||
#include "benchmark_templates.hpp" | ||
|
||
// Define macros that can be used to generate benchmark calls (defined in | ||
// benchmark_templates.hpp). | ||
// Example to generate benchmarks for 1ULP sin(x) for x between 0 and 6.28: | ||
// BENCH(Sleef_sin_u10, double, 0, 6.28); | ||
// BENCHMARK_CAPTURE is a symbol from the google bench framework | ||
// Note: type is only passed for name filtering reasons | ||
#define BENCH(funname, typefilter, min, max) \ | ||
BENCHMARK_CAPTURE(BM_Sleef_templated_function, #funname, funname, min, max) \ | ||
->Name("MB_" #funname "_" #typefilter "_" #min "_" #max); | ||
|
||
#define BENCH_SINGLE_SCALAR(fun, ulp, min, max) \ | ||
BENCH(Sleef_##fun##f_##ulp, scalarf, min, max); | ||
#define BENCH_DOUBLE_SCALAR(fun, ulp, min, max) \ | ||
BENCH(Sleef_##fun##_##ulp, scalard, min, max); | ||
// Generate benchmarks for scalar function implementations | ||
#define BENCH_SCALAR(fun, ulp, min, max) \ | ||
BENCH_SINGLE_SCALAR(fun, ulp, min, max); \ | ||
BENCH_DOUBLE_SCALAR(fun, ulp, min, max); | ||
|
||
// Generate benchmarks for vector function implementations | ||
#ifdef ENABLE_VECTOR_BENCHMARKS | ||
#if !defined(ARCH_VECT_LEN) || ARCH_VECT_LEN == 128 | ||
#define BENCH_SINGLE_VECTOR(fun, ulp, min, max) \ | ||
BENCH(Sleef_##fun##f4_##ulp, vectorf128, min, max); | ||
#define BENCH_DOUBLE_VECTOR(fun, ulp, min, max) \ | ||
BENCH(Sleef_##fun##d2_##ulp, vectord128, min, max); | ||
#elif ARCH_VECT_LEN == 256 | ||
#define BENCH_SINGLE_VECTOR(fun, ulp, min, max) \ | ||
BENCH(Sleef_##fun##f8_##ulp, vectorf256, min, max); | ||
#define BENCH_DOUBLE_VECTOR(fun, ulp, min, max) \ | ||
BENCH(Sleef_##fun##d4_##ulp, vectord256, min, max); | ||
#elif ARCH_VECT_LEN == 512 | ||
#define BENCH_SINGLE_VECTOR(fun, ulp, min, max) \ | ||
BENCH(Sleef_##fun##f16_##ulp, vectorf512, min, max); | ||
#define BENCH_DOUBLE_VECTOR(fun, ulp, min, max) \ | ||
BENCH(Sleef_##fun##d8_##ulp, vectord512, min, max); | ||
#endif | ||
#define BENCH_VECTOR(fun, ulp, min, max) \ | ||
BENCH_SINGLE_VECTOR(fun, ulp, min, max); \ | ||
BENCH_DOUBLE_VECTOR(fun, ulp, min, max); | ||
#else | ||
#define BENCH_SINGLE_VECTOR(fun, ulp, min, max) | ||
#define BENCH_DOUBLE_VECTOR(fun, ulp, min, max) | ||
#define BENCH_VECTOR(fun, ulp, min, max) | ||
#endif | ||
|
||
// Generate benchmarks for SVE function implementations | ||
#ifdef ENABLE_SVECTOR_BENCHMARKS | ||
#define BENCH_SINGLE_SVE(fun, ulp, min, max) \ | ||
BENCH(Sleef_##fun##fx_##ulp##sve, scalarf, min, max); | ||
#define BENCH_DOUBLE_SVE(fun, ulp, min, max) \ | ||
BENCH(Sleef_##fun##dx_##ulp##sve, scalard, min, max); | ||
#define BENCH_SVE(fun, ulp, min, max) \ | ||
BENCH_SINGLE_SVE(fun, ulp, min, max); \ | ||
BENCH_DOUBLE_SVE(fun, ulp, min, max); | ||
#else | ||
#define BENCH_SINGLE_SVE(fun, ulp, min, max) | ||
#define BENCH_DOUBLE_SVE(fun, ulp, min, max) | ||
#define BENCH_SVE(fun, ulp, min, max) | ||
#endif | ||
|
||
// Given a function implemented meeting a specific ulp | ||
// error (present in the name of the function), | ||
// BENCH_ALL_W_FIX_ULP macro will | ||
// generate benchmarks for | ||
// * all vector extensions supported | ||
// * all precisions | ||
// * all vector lengths | ||
#define BENCH_ALL_W_FIX_ULP(fun, ulp, min, max) \ | ||
BENCH_SCALAR(fun, ulp, min, max); \ | ||
BENCH_VECTOR(fun, ulp, min, max); \ | ||
BENCH_SVE(fun, ulp, min, max); | ||
#define BENCH_SINGLEP_W_FIX_ULP(fun, ulp, min, max) \ | ||
BENCH_SINGLE_SCALAR(fun, ulp, min, max); \ | ||
BENCH_SINGLE_VECTOR(fun, ulp, min, max); \ | ||
BENCH_SINGLE_SVE(fun, ulp, min, max); | ||
#define BENCH_DOUBLEP_W_FIX_ULP(fun, ulp, min, max) \ | ||
BENCH_DOUBLE_SCALAR(fun, ulp, min, max); \ | ||
BENCH_DOUBLE_VECTOR(fun, ulp, min, max); \ | ||
BENCH_DOUBLE_SVE(fun, ulp, min, max); | ||
|
||
#define BENCH_ALL_SINGLEP(fun, min, max) \ | ||
BENCH_SINGLEP_W_FIX_ULP(fun, u10, min, max); \ | ||
BENCH_SINGLEP_W_FIX_ULP(fun, u35, min, max); | ||
#define BENCH_ALL_DOUBLEP(fun, min, max) \ | ||
BENCH_DOUBLEP_W_FIX_ULP(fun, u10, min, max); \ | ||
BENCH_DOUBLEP_W_FIX_ULP(fun, u35, min, max); | ||
|
||
// Given a function, BENCH_ALL macro will | ||
// generate benchmarks for | ||
// * all ulp implementations available (u10 and u35) | ||
// * all vector extensions supported | ||
// * all precisions | ||
// * all vector lengths | ||
#define BENCH_ALL(fun, min, max) \ | ||
BENCH_ALL_W_FIX_ULP(fun, u10, min, max); \ | ||
BENCH_ALL_W_FIX_ULP(fun, u35, min, max); |
Oops, something went wrong.