From 8626c96c1cc029696ae164aa2f10d3f3ac71afff Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 26 Oct 2024 15:54:25 +0800 Subject: [PATCH] [WIP] Use memory pool for pinned allocation. --- src/common/device_vector.cu | 8 ++++++++ src/common/device_vector.cuh | 39 ++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/src/common/device_vector.cu b/src/common/device_vector.cu index b7f300df61e2..fc4ae4680241 100644 --- a/src/common/device_vector.cu +++ b/src/common/device_vector.cu @@ -29,6 +29,14 @@ void ThrowOOMError(std::string const &err, std::size_t bytes) { return std::accumulate(it, it + this->handles_.size(), static_cast(0)); } +void GrowOnlyPinnedMemPool::Grow(std::size_t n_bytes) { + if (n_bytes > this->cur_n_bytes) { + return; + } + safe_cuda(cudaFreeAsync(storage, dh::DefaultStream())); + safe_cuda(cudaMallocFromPoolAsync(&storage, n_bytes, pool.Handle(), dh::DefaultStream())); +} + void GrowOnlyVirtualMemVec::Reserve(std::size_t new_size) { auto va_capacity = this->Capacity(); if (new_size < va_capacity) { diff --git a/src/common/device_vector.cuh b/src/common/device_vector.cuh index 352ebf371d26..e9fa0a2b62cf 100644 --- a/src/common/device_vector.cuh +++ b/src/common/device_vector.cuh @@ -134,6 +134,45 @@ struct GrowOnlyPinnedMemoryImpl { } }; +class HostMemPool { + cudaMemPool_t pool_; + + public: + HostMemPool() { + cudaMemPoolProps props; + std::memset(&props, '\0', sizeof(props)); + props.allocType = cudaMemAllocationTypePinned; + props.location.type = cudaMemLocationTypeHostNuma; + std::int32_t numa_id = -1; + safe_cuda(cudaDeviceGetAttribute(&numa_id, cudaDevAttrNumaId, cub::CurrentDevice())); + CHECK_GE(numa_id, 0); + props.location.id = numa_id; + + safe_cuda(cudaMemPoolCreate(&pool_, &props)); + } + + auto Handle() { return pool_; } + ~HostMemPool() { safe_cuda(cudaMemPoolDestroy(pool_)); } +}; + +/** + * @brief Pinned host memory using CUDA memory pool. + */ +struct GrowOnlyPinnedMemPool { + HostMemPool pool; + void *storage{nullptr}; + std::size_t cur_n_bytes{0}; + + void Grow(std::size_t n_bytes); + + template + xgboost::common::Span GetSpan(std::size_t size) { + auto n_bytes = size * sizeof(T); + this->Grow(n_bytes); + return xgboost::common::Span(static_cast(storage), size); + } +}; + /** * @brief Use low-level virtual memory functions from CUDA driver API for grow-only memory * allocation.