rapidsai · rapids-bot · Jul 19, 2024 · Jul 11, 2024 · Jul 11, 2024 · Jul 11, 2024
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/prefetch.hpp>
+#include <rmm/resource_ref.hpp>
+
+#include <cstddef>
+#include <mutex>
+#include <shared_mutex>
+#include <stack>
+
+namespace rmm::mr {
+/**
+ * @addtogroup device_resource_adaptors
+ * @{
+ * @file
+ */
+/**
+ * @brief Resource that prefetches all memory allocations.
+ *
+ * @tparam Upstream Type of the upstream resource used for
+ * allocation/deallocation.
+ */
+template <typename Upstream>
+class prefetch_resource_adaptor final : public device_memory_resource {
+ public:
+  /**
+   * @brief Construct a new prefetch resource adaptor using `upstream` to satisfy
+   * allocation requests.
+   *
+   * @throws rmm::logic_error if `upstream == nullptr`
+   *
+   * @param upstream The resource used for allocating/deallocating device memory
+   */
+  prefetch_resource_adaptor(Upstream* upstream) : upstream_{upstream}
+  {
+    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+  }
+
+  prefetch_resource_adaptor()                                            = delete;
+  ~prefetch_resource_adaptor() override                                  = default;
+  prefetch_resource_adaptor(prefetch_resource_adaptor const&)            = delete;
+  prefetch_resource_adaptor& operator=(prefetch_resource_adaptor const&) = delete;
+  prefetch_resource_adaptor(prefetch_resource_adaptor&&) noexcept =
+    default;  ///< @default_move_constructor
+  prefetch_resource_adaptor& operator=(prefetch_resource_adaptor&&) noexcept =
+    default;  ///< @default_move_assignment{prefetch_resource_adaptor}
+
+  /**
+   * @briefreturn{rmm::device_async_resource_ref to the upstream resource}
+   */
+  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept
+  {
+    return upstream_;
+  }
+
+  /**
+   * @briefreturn{Upstream* to the upstream memory resource}
+   */
+  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
+
+ private:
+  /**
+   * @brief Allocates memory of size at least `bytes` using the upstream
+   * resource as long as it fits inside the allocation limit.
+   *
+   * The returned pointer has at least 256B alignment.
+   *
+   * @throws rmm::bad_alloc if the requested allocation could not be fulfilled
+   * by the upstream resource.
+   *
+   * @param bytes The size, in bytes, of the allocation
+   * @param stream Stream on which to perform the allocation
+   * @return void* Pointer to the newly allocated memory
+   */
+  void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
+  {
+    void* ptr = upstream_->allocate(bytes, stream);
+    rmm::prefetch(ptr, bytes, rmm::get_current_cuda_device(), stream);
+    return ptr;
+  }
+
+  /**
+   * @brief Free allocation of size `bytes` pointed to by `ptr`
+   *
+   * @param ptr Pointer to be deallocated
+   * @param bytes Size of the allocation
+   * @param stream Stream on which to perform the deallocation
+   */
+  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
+  {
+    upstream_->deallocate(ptr, bytes, stream);
+  }
+
+  /**
+   * @brief Compare the upstream resource to another.
+   *
+   * @param other The other resource to compare to
+   * @return true If the two resources are equivalent
+   * @return false If the two resources are not equal
+   */
+  bool do_is_equal(device_memory_resource const& other) const noexcept override
+  {
+    if (this == &other) { return true; }
+    auto cast = dynamic_cast<prefetch_resource_adaptor<Upstream> const*>(&other);
+    if (cast == nullptr) { return upstream_->is_equal(other); }
+    return get_upstream_resource() == cast->get_upstream_resource();
+  }
+
+  Upstream* upstream_;  // the upstream resource used for satisfying allocation requests
+};
+
+/**
+ * @brief Convenience factory to return a `prefetch_resource_adaptor` around the
+ * upstream resource `upstream`.
+ *
+ * @tparam Upstream Type of the upstream `device_memory_resource`.
+ * @param upstream Pointer to the upstream resource
+ * @return The new prefetch resource adaptor
+ */
+template <typename Upstream>
+prefetch_resource_adaptor<Upstream> make_prefetch_adaptor(Upstream* upstream)
+{
+  return prefetch_resource_adaptor<Upstream>{upstream};
+}
+
+/** @} */  // end of group
+}  // namespace rmm::mr
@@ -91,4 +91,7 @@ cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor):
 cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor):
     cdef object _callback
 
+cdef class PrefetchResourceAdaptor(UpstreamResourceAdaptor):
+    pass
+
 cpdef DeviceMemoryResource get_current_device_resource()
@@ -219,6 +219,11 @@ cdef extern from "rmm/mr/device/failure_callback_resource_adaptor.hpp" \
             void* callback_arg
         ) except +
 
+cdef extern from "rmm/mr/device/prefetch_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass prefetch_resource_adaptor[Upstream](device_memory_resource):
+        prefetch_resource_adaptor(Upstream* upstream_mr) except +
+
 
 cdef class DeviceMemoryResource:
 
@@ -987,6 +992,32 @@ cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor):
         """
         pass
 
+cdef class PrefetchResourceAdaptor(UpstreamResourceAdaptor):
+
+    def __cinit__(
+        self,
+        DeviceMemoryResource upstream_mr
+    ):
+        self.c_obj.reset(
+            new prefetch_resource_adaptor[device_memory_resource](
+                upstream_mr.get_mr()
+            )
+        )
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr
+    ):
+        """
+        Memory resource that prefetches all allocations.
+
+        Parameters
+        ----------
+        upstream : DeviceMemoryResource
+            The upstream memory resource.
+        """
+        pass
+
 
 # Global per-device memory resources; dict of int:DeviceMemoryResource
 cdef _per_device_mrs = defaultdict(CudaMemoryResource)

@@ -23,6 +23,7 @@
     LoggingResourceAdaptor,
     ManagedMemoryResource,
     PoolMemoryResource,
+    PrefetchResourceAdaptor,
     StatisticsResourceAdaptor,
     TrackingResourceAdaptor,
     UpstreamResourceAdaptor,
@@ -52,6 +53,7 @@
     "LoggingResourceAdaptor",
     "ManagedMemoryResource",
     "PoolMemoryResource",
+    "PrefetchResourceAdaptor",
     "StatisticsResourceAdaptor",
     "TrackingResourceAdaptor",
     "FailureCallbackResourceAdaptor",

@@ -733,6 +733,30 @@ def callback(nbytes: int) -> bool:
     assert retried[0]
 
 
+@pytest.mark.parametrize("managed", [True, False])
+def test_prefetch_resource_adaptor(managed):
+    if managed:
+        upstream_mr = rmm.mr.ManagedMemoryResource()
+    else:
+        upstream_mr = rmm.mr.CudaMemoryResource()
+    mr = rmm.mr.PrefetchResourceAdaptor(upstream_mr)
+    rmm.mr.set_current_device_resource(mr)
+
+    # This allocation should be prefetched
+    db = rmm.DeviceBuffer.to_device(np.zeros(256, dtype="u1"))
+
+    err, device = cudart.cudaGetDevice()
+    assert err == cudart.cudaError_t.cudaSuccess
+
+    if managed:
+        assert_prefetched(db, device)
+    db.prefetch()  # just test that it doesn't throw
+    if managed:
+        err, device = cudart.cudaGetDevice()
+        assert err == cudart.cudaError_t.cudaSuccess
+        assert_prefetched(db, device)
+
+
 def test_failure_callback_resource_adaptor_error():
     def callback(nbytes: int) -> bool:
         raise RuntimeError("MyError")

@@ -148,6 +148,9 @@ ConfigureTest(TRACKING_TEST mr/device/tracking_mr_tests.cpp GPUS 1 PERCENT 100)
 # out-of-memory callback adaptor tests
 ConfigureTest(FAILURE_CALLBACK_TEST mr/device/failure_callback_mr_tests.cpp)
 
+# prefetch adaptor tests
+ConfigureTest(PREFETCH_ADAPTOR_TEST mr/device/prefetch_resource_adaptor_tests.cpp)
+
 # aligned adaptor tests
 ConfigureTest(ALIGNED_TEST mr/device/aligned_mr_tests.cpp)
 

@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../../byte_literals.hpp"
+
+#include <rmm/cuda_stream.hpp>
+#include <rmm/detail/error.hpp>
+#include <rmm/device_buffer.hpp>
+#include <rmm/mr/device/cuda_memory_resource.hpp>
+#include <rmm/mr/device/managed_memory_resource.hpp>
+#include <rmm/mr/device/prefetch_resource_adaptor.hpp>
+
+#include <gtest/gtest.h>
+
+#include <cstddef>
+#include <random>
+
+using prefetch_adaptor = rmm::mr::prefetch_resource_adaptor<rmm::mr::device_memory_resource>;
+
+template <typename MemoryResourceType>
+struct PrefetchAdaptorTest : public ::testing::Test {
+  rmm::cuda_stream stream{};
+  std::size_t size{};
+  MemoryResourceType mr{};
+
+  PrefetchAdaptorTest()
+  {
+    std::default_random_engine generator;
+
+    auto constexpr range_min{1000};
+    auto constexpr range_max{100000};
+    std::uniform_int_distribution<std::size_t> distribution(range_min, range_max);
+    size = distribution(generator);
+  }
+
+  // Test that the memory range was last prefetched to the specified device
+  void expect_prefetched(void const* ptr, std::size_t size, rmm::cuda_device_id device)
+  {
+    if constexpr (std::is_same_v<MemoryResourceType, rmm::mr::managed_memory_resource>) {
+      int prefetch_location{0};
+      RMM_CUDA_TRY(
+        cudaMemRangeGetAttribute(&prefetch_location,
+                                 4,
+                                 cudaMemRangeAttribute::cudaMemRangeAttributeLastPrefetchLocation,
+                                 ptr,
+                                 size));
+      EXPECT_EQ(prefetch_location, device.value());
+    }
+  }
+};
+
+using resources = ::testing::Types<rmm::mr::cuda_memory_resource, rmm::mr::managed_memory_resource>;
+
+TYPED_TEST_CASE(PrefetchAdaptorTest, resources);
+
+// The following tests simply test compilation and that there are no exceptions thrown
+// due to prefetching non-managed memory.
+
+TYPED_TEST(PrefetchAdaptorTest, PointerAndSize)
+{
+  auto* orig_device_resource = &this->mr;
+  prefetch_adaptor prefetch_mr{orig_device_resource};
+  rmm::device_buffer buff(this->size, this->stream, &prefetch_mr);
+  // verify data range has been prefetched
+  this->expect_prefetched(buff.data(), buff.size(), rmm::get_current_cuda_device());
+  // verify that prefetching does not error
+  rmm::prefetch(buff.data(), buff.size(), rmm::get_current_cuda_device(), this->stream);
+  // reverify data range has been prefetched
+  this->expect_prefetched(buff.data(), buff.size(), rmm::get_current_cuda_device());
+}
+
+TYPED_TEST(PrefetchAdaptorTest, NotPrefetchedWithoutAdaptor)
+{
+  // verify not prefetched without adaptor
+  rmm::device_buffer buff(this->size, this->stream, &this->mr);
+  this->expect_prefetched(buff.data(), buff.size(), rmm::cuda_device_id(cudaInvalidDeviceId));
+}
+
+TEST(PrefetchAdaptorTestNullUpstream, ThrowOnNullUpstream)
+{
+  auto construct_nullptr = []() { prefetch_adaptor mr{nullptr}; };
+  EXPECT_THROW(construct_nullptr(), rmm::logic_error);
+}