rapidsai · rapids-bot · Mar 18, 2025 · Mar 17, 2025 · Mar 17, 2025 · Mar 17, 2025
@@ -72,7 +72,8 @@ class cuda_async_view_memory_resource final : public device_memory_resource {
    */
   [[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; }
 
-  cuda_async_view_memory_resource() = default;
+  cuda_async_view_memory_resource()  = default;
+  ~cuda_async_view_memory_resource() = default;
   cuda_async_view_memory_resource(cuda_async_view_memory_resource const&) =
     default;  ///< @default_copy_constructor
   cuda_async_view_memory_resource(cuda_async_view_memory_resource&&) =

@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -23,6 +23,8 @@ from libcpp.optional cimport optional
 from libcpp.pair cimport pair
 from libcpp.string cimport string
 
+from cuda.bindings.cyruntime cimport cudaMemPool_t
+
 from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 from rmm.librmm.memory_resource cimport device_memory_resource
 
@@ -108,6 +110,14 @@ cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
             optional[size_t] release_threshold,
             optional[allocation_handle_type] export_handle_type) except +
 
+cdef extern from "rmm/mr/device/cuda_async_view_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+
+    cdef cppclass cuda_async_view_memory_resource(device_memory_resource):
+        cuda_async_view_memory_resource(
+            cudaMemPool_t valid_pool_handle) except +
+        cudaMemPool_t pool_handle() const
+
 # TODO: when we adopt Cython 3.0 use enum class
 cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
         namespace \

@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
     BinningMemoryResource,
     CallbackMemoryResource,
     CudaAsyncMemoryResource,
+    CudaAsyncViewMemoryResource,
     CudaMemoryResource,
     DeviceMemoryResource,
     FailureCallbackResourceAdaptor,
@@ -50,6 +51,7 @@
     "BinningMemoryResource",
     "CallbackMemoryResource",
     "CudaAsyncMemoryResource",
+    "CudaAsyncViewMemoryResource",
     "CudaMemoryResource",
     "DeviceMemoryResource",
     "FailureCallbackResourceAdaptor",

@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -44,6 +44,9 @@ cdef class SamHeadroomMemoryResource(DeviceMemoryResource):
 cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
     pass
 
+cdef class CudaAsyncViewMemoryResource(DeviceMemoryResource):
+    pass
+
 cdef class PoolMemoryResource(UpstreamResourceAdaptor):
     pass
 

@@ -28,7 +28,8 @@ from libcpp.memory cimport make_unique, unique_ptr
 from libcpp.optional cimport optional
 from libcpp.pair cimport pair
 
-from cuda.bindings.runtime import cudaError_t
+from cuda.bindings cimport cyruntime
+from cuda.bindings import runtime
 
 from rmm._cuda.gpu import CUDARuntimeError, getDevice, setDevice
 
@@ -54,6 +55,7 @@ from rmm.librmm.memory_resource cimport (
     binning_memory_resource,
     callback_memory_resource,
     cuda_async_memory_resource,
+    cuda_async_view_memory_resource,
     cuda_memory_resource,
     deallocate_callback_t,
     device_memory_resource,
@@ -203,6 +205,38 @@ cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
         )
 
 
+cdef class CudaAsyncViewMemoryResource(DeviceMemoryResource):
+    """
+    Memory resource that uses ``cudaMallocAsync``/``cudaFreeAsync`` for
+    allocation/deallocation with an existing CUDA memory pool.
+
+    This resource uses an existing CUDA memory pool handle (such as the default pool)
+    instead of creating a new one. This is useful for integrating with existing GPU
+    applications that already use a CUDA memory pool, or customizing the flags
+    used by the memory pool.
+
+    Parameters
+    ----------
+    valid_pool_handle : cudaMemPool_t
+        Handle to a CUDA memory pool which will be used to serve allocation
+        requests.
+    """
+    def __cinit__(
+        self,
+        valid_pool_handle
+    ):
+        cdef cyruntime.cudaMemPool_t c_memory_pool_handle = \
+            <cyruntime.cudaMemPool_t>valid_pool_handle
+        self.c_obj.reset(
+            new cuda_async_view_memory_resource(c_memory_pool_handle)
+        )
+
+    def pool_handle(self):
+        cdef cuda_async_view_memory_resource* c_mr = \
+            <cuda_async_view_memory_resource*>self.c_obj.get()
+        return <uintptr_t>c_mr.pool_handle()
+
+
 cdef class ManagedMemoryResource(DeviceMemoryResource):
     def __cinit__(self):
         self.c_obj.reset(
@@ -991,7 +1025,7 @@ cpdef void _initialize(
     try:
         original_device = getDevice()
     except CUDARuntimeError as e:
-        if e.status == cudaError_t.cudaErrorNoDevice:
+        if e.status == runtime.cudaError_t.cudaErrorNoDevice:
             warnings.warn(e.msg)
         else:
             raise e

@@ -1078,3 +1078,36 @@ def test_available_device_memory():
     assert initial_memory[1] == final_memory[1]
     assert initial_memory[0] > 0
     assert final_memory[0] > 0
+
+
+@pytest.mark.parametrize("dtype", _dtypes)
+@pytest.mark.parametrize("nelem", _nelems)
+@pytest.mark.parametrize("alloc", _allocs)
+def test_cuda_async_view_memory_resource_default_pool(dtype, nelem, alloc):
+    # Get the default memory pool handle
+    current_device = rmm._cuda.gpu.getDevice()
+    err, pool = runtime.cudaDeviceGetDefaultMemPool(current_device)
+    assert err == runtime.cudaError_t.cudaSuccess
+
+    mr = rmm.mr.CudaAsyncViewMemoryResource(pool)
+    rmm.mr.set_current_device_resource(mr)
+    assert rmm.mr.get_current_device_resource_type() is type(mr)
+    array_tester(dtype, nelem, alloc)
+
+
+@pytest.mark.parametrize("dtype", _dtypes)
+@pytest.mark.parametrize("nelem", _nelems)
+@pytest.mark.parametrize("alloc", _allocs)
+def test_cuda_async_view_memory_resource_custom_pool(dtype, nelem, alloc):
+    # Create a memory pool handle
+    props = runtime.cudaMemPoolProps()
+    props.allocType = runtime.cudaMemAllocationType.cudaMemAllocationTypePinned
+    props.location.id = rmm._cuda.gpu.getDevice()
+    props.location.type = runtime.cudaMemLocationType.cudaMemLocationTypeDevice
+    err, pool = runtime.cudaMemPoolCreate(props)
+    assert err == runtime.cudaError_t.cudaSuccess
+
+    mr = rmm.mr.CudaAsyncViewMemoryResource(pool)
+    rmm.mr.set_current_device_resource(mr)
+    assert rmm.mr.get_current_device_resource_type() is type(mr)
+    array_tester(dtype, nelem, alloc)