From 8a6625ff993dea84a24b803e496363caf89858b5 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 26 Oct 2024 03:02:51 +0800 Subject: [PATCH] Add support for CUDA virtual device memory. (#10930) --- cmake/Utils.cmake | 3 +- src/common/io.h | 3 ++ src/common/ref_resource_view.cuh | 7 ++++ src/common/resource.cuh | 31 +++++++++++++- src/data/ellpack_page_source.cu | 3 +- tests/cpp/common/test_ref_resource_view.cu | 49 ++++++++++++++++++++++ 6 files changed, 92 insertions(+), 4 deletions(-) create mode 100644 tests/cpp/common/test_ref_resource_view.cu diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake index 4ad096ba0392..ec47bf6eb62a 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake @@ -115,8 +115,7 @@ function(xgboost_set_cuda_flags target) else() # If the downstream user is dynamically linking with libxgboost, it does not # need to link with CCCL and CUDA runtime. - target_link_libraries(${target} - PRIVATE CCCL::CCCL CUDA::cudart_static) + target_link_libraries(${target} PRIVATE CCCL::CCCL CUDA::cudart_static) endif() target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1) target_include_directories( diff --git a/src/common/io.h b/src/common/io.h index 1a540ea7338f..e3eaa4faf89b 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -287,6 +287,7 @@ class ResourceHandler { kCudaMalloc = 2, // CUDA device memory. kCudaMmap = 3, // CUDA with mmap. kCudaHostCache = 4, // CUDA pinned host memory. + kCudaGrowOnly = 5, // CUDA virtual memory allocator. }; private: @@ -313,6 +314,8 @@ class ResourceHandler { return "CudaMmap"; case kCudaHostCache: return "CudaHostCache"; + case kCudaGrowOnly: + return "CudaGrowOnly"; } LOG(FATAL) << "Unreachable."; return {}; diff --git a/src/common/ref_resource_view.cuh b/src/common/ref_resource_view.cuh index 985938e08128..21d49333b579 100644 --- a/src/common/ref_resource_view.cuh +++ b/src/common/ref_resource_view.cuh @@ -22,6 +22,13 @@ template return ref; } +template +[[nodiscard]] RefResourceView MakeCudaGrowOnly(std::size_t n_elements) { + auto resource = std::make_shared(n_elements * sizeof(T)); + auto ref = RefResourceView{resource->DataAs(), n_elements, resource}; + return ref; +} + template [[nodiscard]] RefResourceView MakeFixedVecWithCudaMalloc(Context const* ctx, std::size_t n_elements, T const& init) { diff --git a/src/common/resource.cuh b/src/common/resource.cuh index 4930f8368182..4936cb798a07 100644 --- a/src/common/resource.cuh +++ b/src/common/resource.cuh @@ -6,7 +6,7 @@ #include // for function #include "cuda_pinned_allocator.h" // for SamAllocator -#include "device_vector.cuh" // for DeviceUVector +#include "device_vector.cuh" // for DeviceUVector, GrowOnlyVirtualMemVec #include "io.h" // for ResourceHandler, MMAPFile #include "xgboost/string_view.h" // for StringView @@ -30,6 +30,35 @@ class CudaMallocResource : public ResourceHandler { void Resize(std::size_t n_bytes) { this->storage_.resize(n_bytes); } }; +/** + * @brief Device resource that only grows in size. + */ +class CudaGrowOnlyResource : public ResourceHandler { + static auto MakeNew() { + return std::make_unique(CU_MEM_LOCATION_TYPE_DEVICE); + } + + std::unique_ptr alloc_; + std::size_t n_bytes_{0}; + + public: + explicit CudaGrowOnlyResource(std::size_t n_bytes) + : ResourceHandler{kCudaGrowOnly}, alloc_{MakeNew()} { + this->Resize(n_bytes); + } + void Resize(std::size_t n_bytes) { + this->alloc_->GrowTo(n_bytes); + this->n_bytes_ = n_bytes; + } + void Clear() { + this->alloc_.reset(); + this->alloc_ = MakeNew(); + this->n_bytes_ = 0; + } + [[nodiscard]] void* Data() final { return this->alloc_->data(); } + [[nodiscard]] std::size_t Size() const final { return this->n_bytes_; } +}; + class CudaPinnedResource : public ResourceHandler { std::vector> storage_; diff --git a/src/data/ellpack_page_source.cu b/src/data/ellpack_page_source.cu index a186130b7b58..4901f900a7d5 100644 --- a/src/data/ellpack_page_source.cu +++ b/src/data/ellpack_page_source.cu @@ -25,7 +25,8 @@ namespace xgboost::data { namespace { [[nodiscard]] bool IsDevicePage(EllpackPageImpl const* page) { switch (page->gidx_buffer.Resource()->Type()) { - case common::ResourceHandler::kCudaMalloc: { + case common::ResourceHandler::kCudaMalloc: + case common::ResourceHandler::kCudaGrowOnly: { return true; } case common::ResourceHandler::kCudaHostCache: diff --git a/tests/cpp/common/test_ref_resource_view.cu b/tests/cpp/common/test_ref_resource_view.cu new file mode 100644 index 000000000000..ed69d087dc3c --- /dev/null +++ b/tests/cpp/common/test_ref_resource_view.cu @@ -0,0 +1,49 @@ +/** + * Copyright 2024, XGBoost Contributors + */ +#if defined(__linux__) + +#include +#include // for equal +#include // for fill_n +#include // for make_constant_iterator +#include // for sequence + +#include "../../../src/common/ref_resource_view.cuh" +#include "../helpers.h" // for MakeCUDACtx + +namespace xgboost::common { +class TestCudaGrowOnly : public ::testing::TestWithParam { + public: + void TestGrow(std::size_t m, std::size_t n) { + auto ctx = MakeCUDACtx(0); + ctx.CUDACtx()->Stream().Sync(); + + auto ref = MakeCudaGrowOnly(m); + ASSERT_EQ(ref.size_bytes(), m * sizeof(double)); + thrust::sequence(ctx.CUDACtx()->CTP(), ref.begin(), ref.end(), 0.0); + auto res = std::dynamic_pointer_cast(ref.Resource()); + CHECK(res); + res->Resize(n * sizeof(double)); + + auto ref1 = RefResourceView(res->DataAs(), res->Size() / sizeof(double), + ref.Resource()); + ASSERT_EQ(res->Size(), n * sizeof(double)); + ASSERT_EQ(ref1.size(), n); + thrust::sequence(ctx.CUDACtx()->CTP(), ref1.begin(), ref1.end(), static_cast(0.0)); + std::vector h_vec(ref1.size()); + dh::safe_cuda(cudaMemcpyAsync(h_vec.data(), ref1.data(), ref1.size_bytes(), cudaMemcpyDefault)); + for (std::size_t i = 0; i < h_vec.size(); ++i) { + ASSERT_EQ(h_vec[i], i); + } + } + + void Run(std::size_t n) { this->TestGrow(1024, n); } +}; + +TEST_P(TestCudaGrowOnly, Resize) { this->Run(this->GetParam()); } + +INSTANTIATE_TEST_SUITE_P(RefResourceView, TestCudaGrowOnly, ::testing::Values(1 << 20, 1 << 21)); +} // namespace xgboost::common + +#endif // defined(__linux__)