[WIP] Use memory pool for pinned allocation.

dmlc · Oct 26, 2024 · 8626c96 · 8626c96
1 parent 8fe5186
commit 8626c96
Show file tree

Hide file tree

Showing 2 changed files with 47 additions and 0 deletions.
diff --git a/src/common/device_vector.cu b/src/common/device_vector.cu
@@ -29,6 +29,14 @@ void ThrowOOMError(std::string const &err, std::size_t bytes) {
   return std::accumulate(it, it + this->handles_.size(), static_cast<std::size_t>(0));
 }
 
+void GrowOnlyPinnedMemPool::Grow(std::size_t n_bytes) {
+  if (n_bytes > this->cur_n_bytes) {
+    return;
+  }
+  safe_cuda(cudaFreeAsync(storage, dh::DefaultStream()));
+  safe_cuda(cudaMallocFromPoolAsync(&storage, n_bytes, pool.Handle(), dh::DefaultStream()));
+}
+
 void GrowOnlyVirtualMemVec::Reserve(std::size_t new_size) {
   auto va_capacity = this->Capacity();
   if (new_size < va_capacity) {

diff --git a/src/common/device_vector.cuh b/src/common/device_vector.cuh
@@ -134,6 +134,45 @@ struct GrowOnlyPinnedMemoryImpl {
   }
 };
 
+class HostMemPool {
+  cudaMemPool_t pool_;
+
+ public:
+  HostMemPool() {
+    cudaMemPoolProps props;
+    std::memset(&props, '\0', sizeof(props));
+    props.allocType = cudaMemAllocationTypePinned;
+    props.location.type = cudaMemLocationTypeHostNuma;
+    std::int32_t numa_id = -1;
+    safe_cuda(cudaDeviceGetAttribute(&numa_id, cudaDevAttrNumaId, cub::CurrentDevice()));
+    CHECK_GE(numa_id, 0);
+    props.location.id = numa_id;
+
+    safe_cuda(cudaMemPoolCreate(&pool_, &props));
+  }
+
+  auto Handle() { return pool_; }
+  ~HostMemPool() { safe_cuda(cudaMemPoolDestroy(pool_)); }
+};
+
+/**
+ * @brief Pinned host memory using CUDA memory pool.
+ */
+struct GrowOnlyPinnedMemPool {
+  HostMemPool pool;
+  void *storage{nullptr};
+  std::size_t cur_n_bytes{0};
+
+  void Grow(std::size_t n_bytes);
+
+  template <typename T>
+  xgboost::common::Span<T> GetSpan(std::size_t size) {
+    auto n_bytes = size * sizeof(T);
+    this->Grow(n_bytes);
+    return xgboost::common::Span<T>(static_cast<T *>(storage), size);
+  }
+};
+
 /**
  * @brief Use low-level virtual memory functions from CUDA driver API for grow-only memory
  *        allocation.