Skip to content

Commit

Permalink
Cleanup.
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed Oct 24, 2024
1 parent 2d28241 commit fae309c
Showing 1 changed file with 21 additions and 18 deletions.
39 changes: 21 additions & 18 deletions src/common/device_vector.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@

#include <cuda.h> // for CUmemGenericAllocationHandle

#include <atomic> // for atomic, memory_order
#include <cstddef> // for size_t
#include <cstdint> // for int64_t
#include <cub/util_allocator.cuh> // for CachingDeviceAllocator
#include <cub/util_device.cuh> // for CurrentDevice
#include <memory> // for unique_ptr
Expand All @@ -39,9 +41,10 @@

namespace dh {
namespace detail {
// std::atomic::fetch_max in c++26
template <typename T>
std::remove_cv_t<T> AtomicFetchMax(std::atomic<T> &atom, T val, // NOLINT
std::memory_order order = std::memory_order_seq_cst) {
T AtomicFetchMax(std::atomic<T> &atom, T val, // NOLINT
std::memory_order order = std::memory_order_seq_cst) {
auto expected = atom.load();
auto desired = expected > val ? expected : val;

Expand All @@ -56,7 +59,7 @@ std::remove_cv_t<T> AtomicFetchMax(std::atomic<T> &atom, T val, // NOLINT
class MemoryLogger {
// Information for a single device
struct DeviceStats {
// Use signed int to allow out-of-order allocation/deallocation.
// Use signed int to allow temporary under-flow.
std::atomic<std::int64_t> currently_allocated_bytes{0};
std::atomic<std::int64_t> peak_allocated_bytes{0};
void RegisterAllocation(std::int64_t n) {
Expand All @@ -71,20 +74,20 @@ class MemoryLogger {
/**
* @brief Register the allocation for logging.
*/
void RegisterAllocation(void *, size_t n) {
void RegisterAllocation(std::size_t n) {
if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) {
return;
}
stats_.RegisterAllocation(n);
stats_.RegisterAllocation(static_cast<std::int64_t>(n));
}
/**
* @brief Register the deallocation for logging.
*/
void RegisterDeallocation(void *, size_t n) {
void RegisterDeallocation(std::size_t n) {
if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) {
return;
}
stats_.RegisterDeallocation(n);
stats_.RegisterDeallocation(static_cast<std::int64_t>(n));
}
std::int64_t PeakMemory() const { return stats_.peak_allocated_bytes; }
std::int64_t CurrentlyAllocatedBytes() const { return stats_.currently_allocated_bytes; }
Expand All @@ -93,7 +96,7 @@ class MemoryLogger {
stats_.peak_allocated_bytes = 0;
}

void Log() {
void Log() const {
if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) {
return;
}
Expand Down Expand Up @@ -288,12 +291,11 @@ struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
} catch (const std::exception &e) {
detail::ThrowOOMError(e.what(), n * sizeof(T));
}
// We can't place a lock here as template allocator is transient.
GlobalMemoryLogger().RegisterAllocation(ptr.get(), n * sizeof(T));
GlobalMemoryLogger().RegisterAllocation(n * sizeof(T));
return ptr;
}
void deallocate(pointer ptr, size_t n) { // NOLINT
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
GlobalMemoryLogger().RegisterDeallocation(n * sizeof(T));
SuperT::deallocate(ptr, n);
}
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
Expand Down Expand Up @@ -342,14 +344,13 @@ struct XGBCachingDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
detail::ThrowOOMError(e.what(), n * sizeof(T));
}
}
// We can't place a lock here as template allocator is transient.
GlobalMemoryLogger().RegisterAllocation(thrust_ptr.get(), n * sizeof(T));
GlobalMemoryLogger().RegisterAllocation(n * sizeof(T));
return thrust_ptr;
}
void deallocate(pointer ptr, size_t n) { // NOLINT
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
GlobalMemoryLogger().RegisterDeallocation(n * sizeof(T));
if (use_cub_allocator_) {
GetGlobalCachingAllocator().DeviceFree(ptr.get());
GetGlobalCachingAllocator().DeviceFree(thrust::raw_pointer_cast(ptr));
} else {
SuperT::deallocate(ptr, n);
}
Expand Down Expand Up @@ -377,7 +378,9 @@ using XGBDeviceAllocator = detail::XGBDefaultDeviceAllocatorImpl<T>;
template <typename T>
using XGBCachingDeviceAllocator = detail::XGBCachingDeviceAllocatorImpl<T>;

/** @brief Specialisation of thrust device vector using custom allocator. */
/** @brief Specialisation of thrust device vector using custom allocator. In addition, it catches
* OOM errors.
*/
template <typename T>
using device_vector = thrust::device_vector<T, XGBDeviceAllocator<T>>; // NOLINT
template <typename T>
Expand Down Expand Up @@ -408,7 +411,7 @@ class LoggingResource : public rmm::mr::device_memory_resource {
void *do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override { // NOLINT
try {
auto const ptr = mr_->allocate(bytes, stream);
GlobalMemoryLogger().RegisterAllocation(ptr, bytes);
GlobalMemoryLogger().RegisterAllocation(bytes);
return ptr;
} catch (rmm::bad_alloc const &e) {
detail::ThrowOOMError(e.what(), bytes);
Expand All @@ -419,7 +422,7 @@ class LoggingResource : public rmm::mr::device_memory_resource {
void do_deallocate(void *ptr, std::size_t bytes, // NOLINT
rmm::cuda_stream_view stream) override {
mr_->deallocate(ptr, bytes, stream);
GlobalMemoryLogger().RegisterDeallocation(ptr, bytes);
GlobalMemoryLogger().RegisterDeallocation(bytes);
}

[[nodiscard]] bool do_is_equal( // NOLINT
Expand Down

0 comments on commit fae309c

Please sign in to comment.