diff --git a/CMakeLists.txt b/CMakeLists.txt index 699fa0cd6..aadd97e0d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,19 +86,20 @@ include(FindOrFetchBoost) # C++ coroutines find_package(CXXStdCoroutine MODULE REQUIRED COMPONENTS Final Experimental) - ########################## -#### CUDA: must come before PaRSEC +#### CUDA ########################## -include(CheckLanguage) -check_language(CUDA) -if(CMAKE_CUDA_COMPILER) +option(TTG_DISABLE_CUDA "True iff support for CUDA files is disabled, even if it is possible" OFF) +if( NOT TTG_DISABLE_CUDA ) + include(CheckLanguage) + check_language(CUDA) + if(CMAKE_CUDA_COMPILER) enable_language(CUDA) -endif(CMAKE_CUDA_COMPILER) -set(TTG_HAVE_CUDA ${CMAKE_CUDA_COMPILER} CACHE BOOL "True if TTG supports compiling .cu files") - - - + endif(CMAKE_CUDA_COMPILER) + set(TTG_HAVE_CUDA ${CMAKE_CUDA_COMPILER} CACHE BOOL "True if TTG supports compiling .cu files") +else( NOT TTG_DISABLE_CUDA ) + set(TTG_HAVE_CUDA OFF CACHE BOOL "True if TTG supports compiling .cu files") +endif( NOT TTG_DISABLE_CUDA ) ########################## #### prerequisite runtimes @@ -114,7 +115,6 @@ if (TARGET MADworld) message(STATUS "MADNESS_FOUND=1") endif(TARGET MADworld) - ########################## #### Examples ########################## diff --git a/examples/potrf/testing_dpoinv.cc b/examples/potrf/testing_dpoinv.cc index 550e6435d..38244f59f 100644 --- a/examples/potrf/testing_dpoinv.cc +++ b/examples/potrf/testing_dpoinv.cc @@ -6,6 +6,7 @@ #endif // TTG_USE_PARSEC #include +#include #include "plgsy.h" #include "pmw.h" diff --git a/ttg/ttg/device.h b/ttg/ttg/device.h index 769a147bf..aa953acd3 100644 --- a/ttg/ttg/device.h +++ b/ttg/ttg/device.h @@ -5,10 +5,17 @@ namespace ttg { namespace device { - std::size_t get_nb() { return TTG_IMPL_NS::device::get_nb(); } - void *memory_allocate(int did, std::size_t size) { return TTG_IMPL_NS::device::memory_allocate(did, size); } - void memory_free(int did, void *p) { TTG_IMPL_NS::device::memory_free(did, p); } - ttg::ExecutionSpace execution_space(int did) { return TTG_IMPL_NS::device::execution_space(did); } + class DeviceAllocator { + public: + virtual DeviceAllocator(int did) = 0; + virtual ~DeviceAllocator() = 0; + virtual void *allocate(std::size_t size) = 0; + virtual void free(void *ptr) = 0; + virtual ttg::ExecutionSpace executionSpace() = 0; + }; + + std::size_t nb_devices() { return TTG_IMPL_NS::device::nb_devices(); } + const DeviceAllocator &allocator(int did) { return TTG_IMPL_NS::device::get_device_allocator(did); } } } diff --git a/ttg/ttg/madness/fwd.h b/ttg/ttg/madness/fwd.h index e5fd5273e..1a9b991fb 100644 --- a/ttg/ttg/madness/fwd.h +++ b/ttg/ttg/madness/fwd.h @@ -46,10 +46,17 @@ namespace ttg_madness { inline void ttg_broadcast(ttg::World world, T &data, int source_rank); namespace device { - std::size_t get_nb(); - void *memory_allocate(int did, std::size_t size); - void memory_free(int did, void *ptr); - ttg::ExecutionSpace execution_space(int did); + class DeviceAllocator : ttg::device::DeviceAllocator { + public: + DeviceAllocator(int did); + ~DeviceAllocator(); + void *allocate(std::size_t size); + void free(void *ptr); + ttg::ExecutionSpace executionSpace(); + }; + + std::size_t nb_devices(); + const DeviceAllocator &allocator(int did); } } // namespace ttg_madness diff --git a/ttg/ttg/parsec/fwd.h b/ttg/ttg/parsec/fwd.h index e1d8a36ad..34ebc4482 100644 --- a/ttg/ttg/parsec/fwd.h +++ b/ttg/ttg/parsec/fwd.h @@ -69,6 +69,20 @@ namespace ttg_parsec { template static void ttg_broadcast(ttg::World world, T &data, int source_rank); + namespace device { + class DeviceAllocator : ttg::device::DeviceAllocator { + public: + virtual DeviceAllocator(int did); + virtual ~DeviceAllocator(); + virtual void *allocate(std::size_t size); + virtual void free(void *ptr); + virtual ttg::ExecutionSpace executionSpace(); + }; + + std::size_t nb_devices(); + const DeviceAllocator &allocator(int did); + } + #if 0 template inline std::pair>...>> get_ptr(Args&&... args); @@ -79,13 +93,6 @@ namespace ttg_parsec { template inline ptr make_ptr(Args&&... args); - namespace device { - std::size_t nb(); - void *allocate(int did, std::size_t size); - void free(int did, void *ptr); - ttg::ExecutionSpace space(int did); - } - } // namespace ttg_parsec #endif // TTG_PARSEC_FWD_H diff --git a/ttg/ttg/parsec/ttg.h b/ttg/ttg/parsec/ttg.h index 250074cbf..29f2c3c0d 100644 --- a/ttg/ttg/parsec/ttg.h +++ b/ttg/ttg/parsec/ttg.h @@ -3486,74 +3486,77 @@ namespace ttg_parsec { namespace device { namespace detail { static std::size_t nb_ttg_devices = 0; - static int *parsec_devid; - static int *ttg_devid; - } - - std::size_t get_nb() { - if(0 != detail::nb_ttg_devices) return detail::nb_ttg_devices; - detail::ttg_devid = new int[parsec_nb_devices]; - for(int i = 0; i < parsec_nb_devices; i++) { - parsec_device_module_t *m = parsec_mca_device_get(i); - if(m->type == PARSEC_DEV_CPU || m->type == PARSEC_DEV_CUDA) { - detail::ttg_devid[i] = detail::nb_ttg_devices; - detail::nb_ttg_devices++; - continue; + static std::vectordevice_allocators; + } + + class DeviceAllocator : ttg::device::DeviceAllocator { + private: + int ttg_did, parsec_did; + struct zone_malloc_s *zone; + ttg::ExecutionSpace exec_space; + + public: + virtual DeviceAllocator(int did) : ttg_did(-1), parsec_did(-1), zone(nullptr), exec_space(ttg::ExecutionSpace::Invalid) { + parsec_did = -1; + for(int i = 0; i < parsec_nb_devices; i++) { + parsec_device_module_t *m = parsec_mca_device_get(i); + if(m->type == PARSEC_DEV_CPU || m->type == PARSEC_DEV_CUDA) { + if(did == 0) { + parsec_did = i; + ttg_did = did; + if(m->ype == PARSEC_DEV_CUDA) { + parsec_device_gpu_module_t *gm = reinterpret_cast(m); + zone = gm->memory; + exec_space = ttg::ExecutionSpace::CUDA; + } else { + exec_space = ttg::ExecutionSpace::Host; + } + return; + } + did--; + } } - detail::ttg_devid[i] = -1; + throw std::out_of_range("Device identifier is out of range"); } - detail::parsec_devid = new int[detail::nb_ttg_devices]; - for(int i = 0; i < parsec_nb_devices; i++) { - if( detail::ttg_devid[i] != -1) { - detail::parsec_devid[ detail::ttg_devid[i] ] = i; + + virtual ~DeviceAllocator() = default; + + virtual void *allocate(std::size_t size) { + if(nullptr == zone) return malloc(size); + return zone_malloc(zone, size); + } + virtual void free(void *ptr) { + if(nullptr == zone) { + free(ptr); + return; } + zone_free(zone, ptr); } - return detail::nb_ttg_devices; - } - - void *memory_allocate(int did, std::size_t size) { - if(did >= get_nb()) { - throw std::out_of_range("TTG PaRSEC - device identifier out of range"); + virtual ttg::ExecutionSpace executionSpace() { + return exec_space; } - if(0 == did) - return ::malloc(size); - parsec_device_module_t *m = parsec_mca_device_get(detail::parsec_devid[did]); - assert(m->type == PARSEC_DEV_CUDA); - parsec_device_gpu_module_t *gm = reinterpret_cast(m); - if(nullptr == gm->memory) - return nullptr; - return zone_malloc(gm->memory, size); } - void memory_free(int did, void *ptr) { - if(did >= get_nb()) { - throw std::out_of_range("TTG PaRSEC - device identifier out of range"); - } - if(0 == did) { - ::free(ptr); - return; + std::size_t nb_devices() { + if( detail::nb_ttg_devices > 0 ) return detail::nb_ttg_devices; + for(int i = 0; i < parsec_nb_devices; i++) { + parsec_device_module_t *m = parsec_mca_device_get(i); + if(m->type == PARSEC_DEV_CPU || m->type == PARSEC_DEV_CUDA) { + device_allocators.append( std::move(DeviceAllocator(detail::nb_ttg_devices)) ); + detail::nb_ttg_devices++; + } } - parsec_device_module_t *m = parsec_mca_device_get(detail::parsec_devid[did]); - assert(m->type == PARSEC_DEV_CUDA); - parsec_device_gpu_module_t *gm = reinterpret_cast(m); - assert(nullptr != gm->memory); - zone_free(gm->memory, ptr); + return detail::nb_ttg_devices; } - ttg::ExecutionSpace execution_space(int did) { - if(did >= get_nb()) { - throw std::out_of_range("TTG PaRSEC - device identifier out of range"); - } - if(0 == did) { - return ttg::ExecutionSpace::Host; + const DeviceAllocator &allocator(int did) { + if( did >= nb_devices() ) { + throw std::out_of_range("Device identifier is out of range"); } -#ifndef _NDEBUG - parsec_device_module_t *m = parsec_mca_device_get(detail::parsec_devid[did]); - assert(m->type == PARSEC_DEV_CUDA); -#endif - return ttg::ExecutionSpace::CUDA; + return detail::device_allocators.at(did); } - } + + } // namespace ttg_parsec::device } // namespace ttg_parsec