From ccb1d873a2d64691ea45372c200b9ee36bac13c2 Mon Sep 17 00:00:00 2001 From: Matthias Werner Date: Sat, 4 Aug 2018 14:35:05 +0200 Subject: [PATCH] Better README and CMake (Boost >= 1.59. Adds Threads dependency). --- CMakeLists.txt | 7 +- README.md | 163 ++++++++++++++++++++++++++++++++++++--------- src/CMakeLists.txt | 10 +-- 3 files changed, 144 insertions(+), 36 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 46bbe41..5d48bd4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -104,11 +104,16 @@ if(GEARSHIFFT_FLOAT16_SUPPORT) include_directories(${DOWNLOAD_DIR}) endif() +#------------------------------------------------------------------------------ +# Threads +#------------------------------------------------------------------------------ +find_package( Threads ) + #------------------------------------------------------------------------------ # Boost #------------------------------------------------------------------------------ -find_package(Boost 1.56 QUIET COMPONENTS system unit_test_framework program_options REQUIRED) +find_package(Boost 1.59 QUIET COMPONENTS system unit_test_framework program_options REQUIRED) if(Boost_FOUND) include_directories(${Boost_INCLUDE_DIRS}) link_directories(${Boost_LIBRARY_DIRS}) diff --git a/README.md b/README.md index 8744ef5..67753b7 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ If you want to just browse our results, see the [raw benchmark data](https://www ## Build -Go to the gearshifft directory (created by git clone ...): +Go to the `gearshifft` directory (created by `git clone`): ```bash mkdir release && cd release cmake .. @@ -33,9 +33,34 @@ CMake tries to find the libraries and enables the corresponding make targets. After `make` have finished you can run e.g. `./gearshifft_cufft`. If the FFT library paths cannot be found, `CMAKE_PREFIX_PATH` has to be used, e.g.: + ```bash -export CMAKE_PREFIX_PATH=~/software/clFFT-cuda8.0-gcc5.4/:/opt/cuda:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=${HOME}/software/clFFT-cuda8.0-gcc5.4/:/opt/cuda:$CMAKE_PREFIX_PATH +``` + +Enable float16 support for cuFFT: + +```bash +cmake -DGEARSHIFFT_FLOAT16_SUPPORT=ON .. +make half-code # for float16 data type +make gearshifft_cufft +``` + +Another example with Boost and FFTW dependencies: + +```bash +BOOST_VER=1.67.0 +FFTW_VER=3.3.8 +BOOST_ROOT=${HOME}/sw/boost-${BOOST_VER} +FFTW_ROOT=${HOME}/sw/fftw-${FFTW_VER}/ + +export CMAKE_PREFIX_PATH==${BOOST_ROOT}/lib:${BOOST_ROOT}/include:${FFTW_ROOT}:${CMAKE_PREFIX_PATH} +if [ -d "CMakeFiles" ]; then + make clean + rm -rf CMakeFiles CMakeCache.txt cmake_install.cmake Makefile +fi cmake .. +make ``` ## Install @@ -50,6 +75,73 @@ make -j 4 install ``` +### Build Boost from Source + +
+ +```bash +## compile_boost.sh +# boost version to download +BOOST_VERSION=1.67.0 +# where to download +BOOST_SRC="${HOME}/Downloads/boost" +# boost install dir +BOOST_ROOT="${HOME}/sw/boost-${BOOST_VERSION}" +# if install dir is empty, then download && build +if [[ -z "$(ls -A ${BOOST_ROOT})" ]]; then + mkdir -p ${BOOST_SRC} + wget http://sourceforge.net/projects/boost/files/boost/${BOOST_VERSION}/boost_${BOOST_VERSION//\./_}.tar.bz2 -nc -O "${BOOST_SRC}/../boost.tar.bz2" + (cd ${BOOST_SRC}/../; tar jxf boost.tar.bz2 --strip-components=1 -C ${BOOST_SRC}) + (cd ${BOOST_SRC}; ./bootstrap.sh --with-libraries=program_options,filesystem,system,test) + (cd ${BOOST_SRC}; ./b2 --prefix="$BOOST_ROOT" -d0 install --variant=release) +fi +``` + +
+ +### Build FFTW from Source + +
+ +```bash +## compile_fftw.sh +# fftw version to download +VERS=3.3.8 +# we compile in separated directories, so binaries do not clash +VERS_single=${VERS}_single +VERS_double=${VERS}_double +# the install directory for fftw and fftwf +FFTW_ROOT=${HOME}/sw/fftw-${VERS}/ +# "./compile_fftw.sh clean" removes directories +if [ "$1" == "clean" ]; then + echo "clean sources" + rm -rf fftw-${VERS_single} fftw-${VERS_double} +fi +# if directories do not exist, create them and unpack fftw_**.tar.gz +if [ ! -d "fftw-${VERS_single}" ] && [ ! -d "fftw-${VERS_double}" ]; then + wget http://www.fftw.org/fftw-${VERS}.tar.gz + mkdir fftw-${VERS_single} + echo "unpacking" + tar xfz fftw-${VERS}.tar.gz -C fftw-${VERS_single} + cp -r fftw-${VERS_single} fftw-${VERS_double} +fi +IFLAG_STD="--enable-static=yes --enable-shared=yes --with-gnu-ld --enable-silent-rules --with-pic" +IFLAGS="--prefix=${FFTW_ROOT} --enable-openmp --enable-sse2 -q $IFLAG_STD" +# float +cd fftw-${VERS_single}/fftw-${VERS} +./configure $IFLAGS "--enable-float" +make -j8 +make install +# double +cd ../../fftw-${VERS_double}/fftw-${VERS} +./configure $IFLAGS +make -j8 +make install +``` + +
+ + ## Testing The tests can be executed by `make test` after you have compiled the binaries. @@ -82,9 +174,26 @@ See help message (pass `--help|-h`) for the command line options. --wisdom_dp arg Wisdom file for double-precision. --plan_timelimit arg (=-1) Timelimit in seconds for planning in FFTW. ``` -**Examples** -Runs complete benchmark for clFFT (also applies for cuFFT, fftw, ..) +### FFT Extents Presets + +`gearshifft` comes with preset files which contain a broad range of FFT extents and are located in: + +```bash +/gearshifft/share/gearshifft/*.conf + +``` + +For FFTW benchmarks with, e.g., `FFTW_MEASURE` runtimes grow very quickly. You can delete or comment out the unwanted lines in the configuration file with `#`. +The extents configurations are loaded by the `gearshifft` command-line interface: + +```bash +./gearshifft_fftw -f myextents.conf +``` + +### Examples + +Runs complete benchmark for clFFT (also applies for cuFFT, FFTW, ..) ```bash ./gearshifft_clfft # equals @@ -119,6 +228,19 @@ Select compute devices by id returned by `--list-devices|-l` ./gearshifft_clfft -r */double/1024x1024/Inplace_* ``` +#### Hints for FFTW Usage + +`gearshifft_fftw` offers some additional options, e.g., for the plan rigor or the time limit of plans, just check `./gearshifft_fftw -h`. +FFTW itself allows you to pregenerate FFT plans by the so-called wisdom files. These can be generated by FFTW tools, e.g.: +```bash +./fftw-3.3.5_single/tools/fftwf-wisdom -v -c -n -T 24 -o wisdomf # single precision +./fftw-3.3.5_double/tools/fftw-wisdom -v -c -n -T 24 -o wisdom # double precision +``` +It is recommended to compile double-precision (fftw-...) and single-precision (fftwf-...) to separate directories to avoid linker issues. +The wisdoms settings must match the `gearshifft_fftw` configuration (number of cores, precision, extents). +Most of the times you do not benefit from a multi-core setting, because the FFT is already computed almost in no time. +FFTW spends a lot of time in planning, except you use `FFTW_ESTIMATE` or time limits, which are also only lower borders. + ## Measurement The FFT scenario is a roundtrip FFT, i.e. forward and backward transformation. @@ -150,13 +272,13 @@ See CSV header for column titles and meta-information (memory, number of runs, e ## Tested on ... -- linux (CentOS, RHEL, ArchLinux, Ubuntu) -- gcc 5.3.0, gcc 6.2.0, gcc 7.1.1, clang 3.8 & 4.0.1 (fftw threads disabled) -- CUDA 8.0.*, CUDA 9.0.69-RC +- linux (CentOS, RHEL, ArchLinux, Ubuntu, Fedora) +- gcc 5.3.0, gcc 6.2.0, gcc 7.1.1, gcc 8.1.1, clang 3.8 & 4.0.1 (FFTW threads disabled) +- CUDA 8.0.x, CUDA 9.x - clFFT 2.12.0, 2.12.1, 2.12.2 -- FFTW 3.3.4, 3.3.5, 3.3.6pl1 +- FFTW 3.3.4, 3.3.5, 3.3.6pl1, 3.3.8 - OpenCL 1.2 (Nvidia, AMD, Intel) -- Nvidia Pascal P100, Kepler K80, K20Xm, GTX1080, Haswell and Sandybridge Xeon CPUs +- Nvidia Pascal V100, P100, Kepler K80, K20Xm, GTX1080, Broadwell, Haswell and Sandybridge Xeon CPUs ## Issues @@ -164,26 +286,7 @@ See CSV header for column titles and meta-information (memory, number of runs, e - clFFT on CPU cannot transform the 4096-FFT and 4096x4096-FFTs (see [this issue](https://github.com/clMathLibraries/clFFT/issues/171)) - clFFT seems to have lower transform size limits on CPU than on GPU (a complex 16384x16384 segfaults on clfft CPU, while it works on GPU). gearshifft marks these cases as "Unsupported lengths" and skips them. - At the moment this is for single-GPUs, batches are not considered -- if gearshifft is killed before, no output is created, which might be an issue on a job scheduler system like slurm (exceeding memory assignment, out-of-memory killings) -- in case the boost version (e.g. 1.62.0) you have is more recent than your cmake (say 2.8.12.2), use `cmake -DBoost_ADDITIONAL_VERSIONS=1.62.0 -DBOOST_ROOT=/path/to/boost/1.62.0 ` +- if `gearshifft` is killed before, no output is created, which might be an issue on a job scheduler system like slurm (exceeding memory assignment, out-of-memory killings) +- in case the Boost version (e.g. 1.62.0) you have is more recent than your `cmake` (say 2.8.12.2), use `cmake -DBoost_ADDITIONAL_VERSIONS=1.62.0 -DBOOST_ROOT=/path/to/boost/1.62.0 ` - Windows or MacOS is not supported yet, feel free to add a pull-request - cufft float16 transforms overflow at >=1048576 elements - - -## Results (FFTW) - -fftw/haswell contains results for `FFTW_MEASURE`, `FFTW_ESTIMATE` and `FFTW_WISDOM_ONLY`. The planning time limit is set to `FFTW_NO_TIMELIMIT` (can be set with cmake option `GEARSHIFFT_FFTW_TIMELIMIT`). -fftw was compiled with: -``` ---enable-static=yes --enable-shared=yes --with-gnu-ld --enable-silent-rules --with-pic --enable-openmp --enable-sse2 -``` -fftw/haswell was run on: -``` -2x Intel(R) Xeon(R) CPU E5-2680 v3 (12 cores) @ 2.50GHz, MultiThreading disabled, 128 GB SSD local disk, 64 GB RAM -``` -The wisdom files were generated with: -```bash -../fftw-3.3.5TF/fftw-3.3.5/tools/fftwf-wisdom -v -c -n -T 24 -o wisdomf # single precision -../fftw-3.3.5T/fftw-3.3.5/tools/fftw-wisdom -v -c -n -T 24 -o wisdom # double precision -``` -It is recommended to compile double-precision (fftw-...) and single-precision (fftwf-...) to separate directories to avoid linker issues. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 754bf01..c22bc86 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,7 +6,10 @@ cmake_minimum_required(VERSION 3.1) function(add_exec Tlib) set(PROJECT_EXEC gearshifft_${Tlib}) - add_executable(${PROJECT_EXEC} ${SOURCES}) + + add_executable(${PROJECT_EXEC} benchmark.cpp options.cpp) + + target_include_directories(${PROJECT_EXEC} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../inc") target_compile_definitions(${PROJECT_EXEC} PUBLIC -DGEARSHIFFT_NUMBER_WARM_RUNS=${GEARSHIFFT_NUMBER_WARM_RUNS} -DGEARSHIFFT_NUMBER_WARMUPS=${GEARSHIFFT_NUMBER_WARMUPS} @@ -28,7 +31,7 @@ function(add_exec Tlib) set(LIBS ${FFTW_LIBRARIES}) endif() - target_link_libraries(${PROJECT_EXEC} ${Boost_LIBRARIES} ${LIBS}) + target_link_libraries(${PROJECT_EXEC} ${Boost_LIBRARIES} ${LIBS} ${CMAKE_THREAD_LIBS_INIT}) add_test(NAME ${PROJECT_EXEC} COMMAND ${PROJECT_EXEC} -e 64) install(TARGETS ${PROJECT_EXEC} DESTINATION bin) @@ -38,9 +41,6 @@ endfunction() # #------------------------------------------------------------------------------ -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../inc) -set(SOURCES benchmark.cpp options.cpp) - # FFT lib clients foreach(FFTLIB ${FFTLIBS}) add_exec(${FFTLIB})