diff --git a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp index 92aea2072..0ef159c9f 100644 --- a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp @@ -47,13 +47,13 @@ class cuda_async_view_memory_resource final : public device_memory_resource { * * @throws rmm::logic_error if the CUDA version does not support `cudaMallocAsync` * - * @param valid_pool_handle Handle to a CUDA memory pool which will be used to + * @param pool_handle Handle to a CUDA memory pool which will be used to * serve allocation requests. */ - cuda_async_view_memory_resource(cudaMemPool_t valid_pool_handle) - : cuda_pool_handle_{[valid_pool_handle]() { - RMM_EXPECTS(nullptr != valid_pool_handle, "Unexpected null pool handle."); - return valid_pool_handle; + cuda_async_view_memory_resource(cudaMemPool_t pool_handle) + : cuda_pool_handle_{[pool_handle]() { + RMM_EXPECTS(nullptr != pool_handle, "Unexpected null pool handle."); + return pool_handle; }()} { // Check if cudaMallocAsync Memory pool supported @@ -72,7 +72,8 @@ class cuda_async_view_memory_resource final : public device_memory_resource { */ [[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; } - cuda_async_view_memory_resource() = default; + cuda_async_view_memory_resource() = default; + ~cuda_async_view_memory_resource() = default; cuda_async_view_memory_resource(cuda_async_view_memory_resource const&) = default; ///< @default_copy_constructor cuda_async_view_memory_resource(cuda_async_view_memory_resource&&) = diff --git a/python/rmm/rmm/librmm/memory_resource.pxd b/python/rmm/rmm/librmm/memory_resource.pxd index 9e7b70c4f..3ded7a9be 100644 --- a/python/rmm/rmm/librmm/memory_resource.pxd +++ b/python/rmm/rmm/librmm/memory_resource.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,6 +23,8 @@ from libcpp.optional cimport optional from libcpp.pair cimport pair from libcpp.string cimport string +from cuda.bindings.cyruntime cimport cudaMemPool_t + from rmm.librmm.cuda_stream_view cimport cuda_stream_view from rmm.librmm.memory_resource cimport device_memory_resource @@ -108,6 +110,14 @@ cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \ optional[size_t] release_threshold, optional[allocation_handle_type] export_handle_type) except + +cdef extern from "rmm/mr/device/cuda_async_view_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + + cdef cppclass cuda_async_view_memory_resource(device_memory_resource): + cuda_async_view_memory_resource( + cudaMemPool_t pool_handle) except + + cudaMemPool_t pool_handle() const + # TODO: when we adopt Cython 3.0 use enum class cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \ namespace \ diff --git a/python/rmm/rmm/mr.py b/python/rmm/rmm/mr.py index 673ffde82..eada9b21b 100644 --- a/python/rmm/rmm/mr.py +++ b/python/rmm/rmm/mr.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ BinningMemoryResource, CallbackMemoryResource, CudaAsyncMemoryResource, + CudaAsyncViewMemoryResource, CudaMemoryResource, DeviceMemoryResource, FailureCallbackResourceAdaptor, @@ -50,6 +51,7 @@ "BinningMemoryResource", "CallbackMemoryResource", "CudaAsyncMemoryResource", + "CudaAsyncViewMemoryResource", "CudaMemoryResource", "DeviceMemoryResource", "FailureCallbackResourceAdaptor", diff --git a/python/rmm/rmm/pylibrmm/memory_resource.pxd b/python/rmm/rmm/pylibrmm/memory_resource.pxd index d1e5610db..8ccd07a02 100644 --- a/python/rmm/rmm/pylibrmm/memory_resource.pxd +++ b/python/rmm/rmm/pylibrmm/memory_resource.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -44,6 +44,9 @@ cdef class SamHeadroomMemoryResource(DeviceMemoryResource): cdef class CudaAsyncMemoryResource(DeviceMemoryResource): pass +cdef class CudaAsyncViewMemoryResource(DeviceMemoryResource): + pass + cdef class PoolMemoryResource(UpstreamResourceAdaptor): pass diff --git a/python/rmm/rmm/pylibrmm/memory_resource.pyx b/python/rmm/rmm/pylibrmm/memory_resource.pyx index 0189a58b5..253352e7c 100644 --- a/python/rmm/rmm/pylibrmm/memory_resource.pyx +++ b/python/rmm/rmm/pylibrmm/memory_resource.pyx @@ -28,7 +28,8 @@ from libcpp.memory cimport make_unique, unique_ptr from libcpp.optional cimport optional from libcpp.pair cimport pair -from cuda.bindings.runtime import cudaError_t +from cuda.bindings cimport cyruntime +from cuda.bindings import driver, runtime from rmm._cuda.gpu import CUDARuntimeError, getDevice, setDevice @@ -54,6 +55,7 @@ from rmm.librmm.memory_resource cimport ( binning_memory_resource, callback_memory_resource, cuda_async_memory_resource, + cuda_async_view_memory_resource, cuda_memory_resource, deallocate_callback_t, device_memory_resource, @@ -203,6 +205,46 @@ cdef class CudaAsyncMemoryResource(DeviceMemoryResource): ) +cdef class CudaAsyncViewMemoryResource(DeviceMemoryResource): + """ + Memory resource that uses ``cudaMallocAsync``/``cudaFreeAsync`` for + allocation/deallocation with an existing CUDA memory pool. + + This resource uses an existing CUDA memory pool handle (such as the default pool) + instead of creating a new one. This is useful for integrating with existing GPU + applications that already use a CUDA memory pool, or customizing the flags + used by the memory pool. + + The memory pool passed in must not be destroyed during the lifetime of this + memory resource. + + Parameters + ---------- + pool_handle : cudaMemPool_t or CUmemoryPool + Handle to a CUDA memory pool which will be used to serve allocation + requests. + """ + def __cinit__( + self, + pool_handle + ): + # Convert the pool_handle to a cyruntime.cudaMemPool_t + if not isinstance(pool_handle, (runtime.cudaMemPool_t, driver.CUmemoryPool)): + raise ValueError("pool_handle must be a cudaMemPool_t or CUmemoryPool") + + cdef cyruntime.cudaMemPool_t c_pool_handle + c_pool_handle = <cyruntime.cudaMemPool_t><uintptr_t>int(pool_handle) + + self.c_obj.reset( + new cuda_async_view_memory_resource(c_pool_handle) + ) + + def pool_handle(self): + cdef cuda_async_view_memory_resource* c_mr = \ + <cuda_async_view_memory_resource*>self.c_obj.get() + return <uintptr_t>c_mr.pool_handle() + + cdef class ManagedMemoryResource(DeviceMemoryResource): def __cinit__(self): self.c_obj.reset( @@ -991,7 +1033,7 @@ cpdef void _initialize( try: original_device = getDevice() except CUDARuntimeError as e: - if e.status == cudaError_t.cudaErrorNoDevice: + if e.status == runtime.cudaError_t.cudaErrorNoDevice: warnings.warn(e.msg) else: raise e diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index ee02d5d0e..2fc917863 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -1078,3 +1078,42 @@ def test_available_device_memory(): assert initial_memory[1] == final_memory[1] assert initial_memory[0] > 0 assert final_memory[0] > 0 + + +@pytest.mark.parametrize("dtype", _dtypes) +@pytest.mark.parametrize("nelem", _nelems) +@pytest.mark.parametrize("alloc", _allocs) +def test_cuda_async_view_memory_resource_default_pool(dtype, nelem, alloc): + # Get the default memory pool handle + current_device = rmm._cuda.gpu.getDevice() + err, pool = runtime.cudaDeviceGetDefaultMemPool(current_device) + assert err == runtime.cudaError_t.cudaSuccess + + mr = rmm.mr.CudaAsyncViewMemoryResource(pool) + rmm.mr.set_current_device_resource(mr) + assert rmm.mr.get_current_device_resource_type() is type(mr) + array_tester(dtype, nelem, alloc) + + +@pytest.mark.parametrize("dtype", _dtypes) +@pytest.mark.parametrize("nelem", _nelems) +@pytest.mark.parametrize("alloc", _allocs) +def test_cuda_async_view_memory_resource_custom_pool(dtype, nelem, alloc): + # Create a memory pool handle + props = runtime.cudaMemPoolProps() + props.allocType = runtime.cudaMemAllocationType.cudaMemAllocationTypePinned + props.location.id = rmm._cuda.gpu.getDevice() + props.location.type = runtime.cudaMemLocationType.cudaMemLocationTypeDevice + err, pool = runtime.cudaMemPoolCreate(props) + assert err == runtime.cudaError_t.cudaSuccess + + mr = rmm.mr.CudaAsyncViewMemoryResource(pool) + rmm.mr.set_current_device_resource(mr) + assert rmm.mr.get_current_device_resource_type() is type(mr) + array_tester(dtype, nelem, alloc) + + # After the pool is destroyed, new allocations should raise + (err,) = runtime.cudaMemPoolDestroy(pool) + assert err == runtime.cudaError_t.cudaSuccess + with pytest.raises(MemoryError): + array_tester(dtype, nelem, alloc)