Skip to content

Commit

Permalink
[WIP] Use memory pool for pinned allocation.
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed Oct 26, 2024
1 parent 8fe5186 commit 8626c96
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 0 deletions.
8 changes: 8 additions & 0 deletions src/common/device_vector.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ void ThrowOOMError(std::string const &err, std::size_t bytes) {
return std::accumulate(it, it + this->handles_.size(), static_cast<std::size_t>(0));
}

void GrowOnlyPinnedMemPool::Grow(std::size_t n_bytes) {
if (n_bytes > this->cur_n_bytes) {
return;
}
safe_cuda(cudaFreeAsync(storage, dh::DefaultStream()));
safe_cuda(cudaMallocFromPoolAsync(&storage, n_bytes, pool.Handle(), dh::DefaultStream()));
}

void GrowOnlyVirtualMemVec::Reserve(std::size_t new_size) {
auto va_capacity = this->Capacity();
if (new_size < va_capacity) {
Expand Down
39 changes: 39 additions & 0 deletions src/common/device_vector.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,45 @@ struct GrowOnlyPinnedMemoryImpl {
}
};

class HostMemPool {
cudaMemPool_t pool_;

public:
HostMemPool() {
cudaMemPoolProps props;
std::memset(&props, '\0', sizeof(props));
props.allocType = cudaMemAllocationTypePinned;
props.location.type = cudaMemLocationTypeHostNuma;
std::int32_t numa_id = -1;
safe_cuda(cudaDeviceGetAttribute(&numa_id, cudaDevAttrNumaId, cub::CurrentDevice()));
CHECK_GE(numa_id, 0);
props.location.id = numa_id;

safe_cuda(cudaMemPoolCreate(&pool_, &props));
}

auto Handle() { return pool_; }
~HostMemPool() { safe_cuda(cudaMemPoolDestroy(pool_)); }
};

/**
* @brief Pinned host memory using CUDA memory pool.
*/
struct GrowOnlyPinnedMemPool {
HostMemPool pool;
void *storage{nullptr};
std::size_t cur_n_bytes{0};

void Grow(std::size_t n_bytes);

template <typename T>
xgboost::common::Span<T> GetSpan(std::size_t size) {
auto n_bytes = size * sizeof(T);
this->Grow(n_bytes);
return xgboost::common::Span<T>(static_cast<T *>(storage), size);
}
};

/**
* @brief Use low-level virtual memory functions from CUDA driver API for grow-only memory
* allocation.
Expand Down

0 comments on commit 8626c96

Please sign in to comment.