diff --git a/CMakeLists.txt b/CMakeLists.txt index 699fa0cd6..e21cc35d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,19 +86,20 @@ include(FindOrFetchBoost) # C++ coroutines find_package(CXXStdCoroutine MODULE REQUIRED COMPONENTS Final Experimental) - ########################## -#### CUDA: must come before PaRSEC +#### CUDA -- CUDA language must be added before PaRSEC detects CUDA ########################## -include(CheckLanguage) -check_language(CUDA) -if(CMAKE_CUDA_COMPILER) +option(TTG_DISABLE_CUDA "True iff support for CUDA files is disabled, even if it is possible" OFF) +if( NOT TTG_DISABLE_CUDA ) + include(CheckLanguage) + check_language(CUDA) + if(CMAKE_CUDA_COMPILER) enable_language(CUDA) -endif(CMAKE_CUDA_COMPILER) -set(TTG_HAVE_CUDA ${CMAKE_CUDA_COMPILER} CACHE BOOL "True if TTG supports compiling .cu files") - - - + endif(CMAKE_CUDA_COMPILER) + set(TTG_HAVE_CUDA ${CMAKE_CUDA_COMPILER} CACHE BOOL "True if TTG supports compiling .cu files") +else( NOT TTG_DISABLE_CUDA ) + set(TTG_HAVE_CUDA OFF CACHE BOOL "True if TTG supports compiling .cu files") +endif( NOT TTG_DISABLE_CUDA ) ########################## #### prerequisite runtimes @@ -114,7 +115,6 @@ if (TARGET MADworld) message(STATUS "MADNESS_FOUND=1") endif(TARGET MADworld) - ########################## #### Examples ########################## diff --git a/examples/potrf/testing_dpoinv.cc b/examples/potrf/testing_dpoinv.cc index 550e6435d..38244f59f 100644 --- a/examples/potrf/testing_dpoinv.cc +++ b/examples/potrf/testing_dpoinv.cc @@ -6,6 +6,7 @@ #endif // TTG_USE_PARSEC #include +#include #include "plgsy.h" #include "pmw.h" diff --git a/ttg/ttg/device.h b/ttg/ttg/device.h new file mode 100644 index 000000000..d1338bd22 --- /dev/null +++ b/ttg/ttg/device.h @@ -0,0 +1,14 @@ +#ifndef TTG_DEVICE_H +#define TTG_DEVICE_H + +#include "ttg/fwd.h" +#include "ttg/execution.h" + +namespace ttg { + namespace device { + using DeviceAllocator = TTG_IMPL_NS::device::DeviceAllocator; + std::size_t nb_devices() { return TTG_IMPL_NS::device::nb_devices(); } + } +} + +#endif /* TTG_DEVICE_H */ \ No newline at end of file diff --git a/ttg/ttg/madness/fwd.h b/ttg/ttg/madness/fwd.h index abcb771c5..469a251d1 100644 --- a/ttg/ttg/madness/fwd.h +++ b/ttg/ttg/madness/fwd.h @@ -45,6 +45,11 @@ namespace ttg_madness { template inline void ttg_broadcast(ttg::World world, T &data, int source_rank); + namespace device { + class DeviceAllocator; + std::size_t nb_devices(); + } + } // namespace ttg_madness #endif // TTG_MADNESS_FWD_H diff --git a/ttg/ttg/madness/ttg.h b/ttg/ttg/madness/ttg.h index 30ea2cad1..6cd484d4a 100644 --- a/ttg/ttg/madness/ttg.h +++ b/ttg/ttg/madness/ttg.h @@ -1277,6 +1277,28 @@ namespace ttg_madness { #include "ttg/make_tt.h" + namespace device { + class DeviceAllocator { + public: + DeviceAllocator(int did) { + if(did != 0) { + throw std::out_of_range("TTG MADNESS Backend: current implementation only supports CPU devices") + } + } + void *allocate(std::size_t size) { + return ::malloc(size); + }; + void free(void *ptr) { + ::free(ptr); + } + ::ttg::ExecutionSpace executionSpace() { + return ::ttg::ExecutionSpace::Host; + } + }; + + std::size_t nb_devices() { return 1; } + } + } // namespace ttg_madness #include "ttg/madness/watch.h" diff --git a/ttg/ttg/parsec/fwd.h b/ttg/ttg/parsec/fwd.h index 400338ccd..c3542af5e 100644 --- a/ttg/ttg/parsec/fwd.h +++ b/ttg/ttg/parsec/fwd.h @@ -69,6 +69,11 @@ namespace ttg_parsec { template static void ttg_broadcast(ttg::World world, T &data, int source_rank); + namespace device { + class DeviceAllocator; + std::size_t nb_devices(); + } + #if 0 template inline std::pair>...>> get_ptr(Args&&... args); @@ -79,7 +84,6 @@ namespace ttg_parsec { template inline ptr make_ptr(Args&&... args); - } // namespace ttg_parsec #endif // TTG_PARSEC_FWD_H diff --git a/ttg/ttg/parsec/ttg.h b/ttg/ttg/parsec/ttg.h index b044c0b53..9a7dac744 100644 --- a/ttg/ttg/parsec/ttg.h +++ b/ttg/ttg/parsec/ttg.h @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -3482,6 +3483,71 @@ namespace ttg_parsec { #include "ttg/make_tt.h" + namespace device { + + class DeviceAllocator { + private: + int ttg_did, parsec_did; + struct ::zone_malloc_s *zone; + ::ttg::ExecutionSpace exec_space; + public: + DeviceAllocator(int did); + void *allocate(std::size_t size); + void free(void *ptr); + ::ttg::ExecutionSpace executionSpace(); + }; + + DeviceAllocator::DeviceAllocator(int did) : ttg_did(-1), parsec_did(-1), zone(nullptr), exec_space(::ttg::ExecutionSpace::Invalid) { + for(int i = 0; i < parsec_nb_devices; i++) { + parsec_device_module_t *m = parsec_mca_device_get(i); + if(m->type == PARSEC_DEV_CPU || m->type == PARSEC_DEV_CUDA) { + if(did == 0) { + parsec_did = i; + ttg_did = did; + if(m->type == PARSEC_DEV_CUDA) { + parsec_device_gpu_module_t *gm = reinterpret_cast(m); + zone = gm->memory; + exec_space = ::ttg::ExecutionSpace::CUDA; + } else { + exec_space = ::ttg::ExecutionSpace::Host; + } + return; + } + did--; + } + } + throw std::out_of_range("Device identifier is out of range"); + } + + void *DeviceAllocator::allocate(std::size_t size) { + if(nullptr == zone) return malloc(size); + return zone_malloc(zone, size); + } + + void DeviceAllocator::free(void *ptr) { + if(nullptr == zone) { + free(ptr); + return; + } + zone_free(zone, ptr); + } + + ::ttg::ExecutionSpace DeviceAllocator::executionSpace() { + return exec_space; + } + + std::size_t nb_devices() { + std::size_t nb = 0; + for(int i = 0; i < parsec_nb_devices; i++) { + parsec_device_module_t *m = parsec_mca_device_get(i); + if(m->type == PARSEC_DEV_CPU || m->type == PARSEC_DEV_CUDA) { + nb++; + } + } + return nb; + } + } // namespace ttg_parsec::device + } // namespace ttg_parsec /**