diff --git a/src/common/device_vector.cuh b/src/common/device_vector.cuh index 6a996d7e8c69..8412e79b91d2 100644 --- a/src/common/device_vector.cuh +++ b/src/common/device_vector.cuh @@ -389,7 +389,6 @@ using caching_device_vector = thrust::device_vector guard{lock_, std::defer_lock}; - if (xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) { - guard.lock(); - } try { auto const ptr = mr_->allocate(bytes, stream); GlobalMemoryLogger().RegisterAllocation(ptr, bytes); @@ -423,10 +418,6 @@ class LoggingResource : public rmm::mr::device_memory_resource { void do_deallocate(void *ptr, std::size_t bytes, // NOLINT rmm::cuda_stream_view stream) override { - std::unique_lock guard{lock_, std::defer_lock}; - if (xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) { - guard.lock(); - } mr_->deallocate(ptr, bytes, stream); GlobalMemoryLogger().RegisterDeallocation(ptr, bytes); } diff --git a/tests/cpp/common/test_device_vector.cu b/tests/cpp/common/test_device_vector.cu index 9dff9c691c15..e749a7015309 100644 --- a/tests/cpp/common/test_device_vector.cu +++ b/tests/cpp/common/test_device_vector.cu @@ -2,6 +2,7 @@ * Copyright 2024, XGBoost Contributors */ #include +#include // for thread #include // for iota #include // for sequence @@ -115,4 +116,22 @@ TEST(TestVirtualMem, Version) { ASSERT_FALSE(pinned.IsVm()); } } + +TEST(AtomitFetch, Max) { + auto n_threads = std::thread::hardware_concurrency(); + std::vector threads; + std::atomic n{0}; + decltype(n)::value_type add = 64; + for (decltype(n_threads) t = 0; t < n_threads; ++t) { + threads.emplace_back([=, &n] { + for (std::size_t i = 0; i < add; ++i) { + detail::AtomicFetchMax(n, static_cast(t + i)); + } + }); + } + for (auto& t : threads) { + t.join(); + } + ASSERT_EQ(n, n_threads - 1 + add - 1); // 0-based indexing +} } // namespace dh