Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add async view memory resource bindings to Python. #1864

Merged
merged 5 commits into from
Mar 18, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion include/rmm/mr/device/cuda_async_view_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ class cuda_async_view_memory_resource final : public device_memory_resource {
*/
[[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; }

cuda_async_view_memory_resource() = default;
cuda_async_view_memory_resource() = default;
~cuda_async_view_memory_resource() = default;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed a missing rule-of-6 destructor declaration here, which raised a warning in my IDE.

cuda_async_view_memory_resource(cuda_async_view_memory_resource const&) =
default; ///< @default_copy_constructor
cuda_async_view_memory_resource(cuda_async_view_memory_resource&&) =
Expand Down
12 changes: 11 additions & 1 deletion python/rmm/rmm/librmm/memory_resource.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -23,6 +23,8 @@ from libcpp.optional cimport optional
from libcpp.pair cimport pair
from libcpp.string cimport string

from cuda.bindings.cyruntime cimport cudaMemPool_t

from rmm.librmm.cuda_stream_view cimport cuda_stream_view
from rmm.librmm.memory_resource cimport device_memory_resource

Expand Down Expand Up @@ -108,6 +110,14 @@ cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
optional[size_t] release_threshold,
optional[allocation_handle_type] export_handle_type) except +

cdef extern from "rmm/mr/device/cuda_async_view_memory_resource.hpp" \
namespace "rmm::mr" nogil:

cdef cppclass cuda_async_view_memory_resource(device_memory_resource):
cuda_async_view_memory_resource(
cudaMemPool_t valid_pool_handle) except +
cudaMemPool_t pool_handle() const

# TODO: when we adopt Cython 3.0 use enum class
cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
namespace \
Expand Down
4 changes: 3 additions & 1 deletion python/rmm/rmm/mr.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -16,6 +16,7 @@
BinningMemoryResource,
CallbackMemoryResource,
CudaAsyncMemoryResource,
CudaAsyncViewMemoryResource,
CudaMemoryResource,
DeviceMemoryResource,
FailureCallbackResourceAdaptor,
Expand Down Expand Up @@ -50,6 +51,7 @@
"BinningMemoryResource",
"CallbackMemoryResource",
"CudaAsyncMemoryResource",
"CudaAsyncViewMemoryResource",
"CudaMemoryResource",
"DeviceMemoryResource",
"FailureCallbackResourceAdaptor",
Expand Down
5 changes: 4 additions & 1 deletion python/rmm/rmm/pylibrmm/memory_resource.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -44,6 +44,9 @@ cdef class SamHeadroomMemoryResource(DeviceMemoryResource):
cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
pass

cdef class CudaAsyncViewMemoryResource(DeviceMemoryResource):
pass

cdef class PoolMemoryResource(UpstreamResourceAdaptor):
pass

Expand Down
38 changes: 36 additions & 2 deletions python/rmm/rmm/pylibrmm/memory_resource.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ from libcpp.memory cimport make_unique, unique_ptr
from libcpp.optional cimport optional
from libcpp.pair cimport pair

from cuda.bindings.runtime import cudaError_t
from cuda.bindings cimport cyruntime
from cuda.bindings import runtime

from rmm._cuda.gpu import CUDARuntimeError, getDevice, setDevice

Expand All @@ -54,6 +55,7 @@ from rmm.librmm.memory_resource cimport (
binning_memory_resource,
callback_memory_resource,
cuda_async_memory_resource,
cuda_async_view_memory_resource,
cuda_memory_resource,
deallocate_callback_t,
device_memory_resource,
Expand Down Expand Up @@ -203,6 +205,38 @@ cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
)


cdef class CudaAsyncViewMemoryResource(DeviceMemoryResource):
"""
Memory resource that uses ``cudaMallocAsync``/``cudaFreeAsync`` for
allocation/deallocation with an existing CUDA memory pool.

This resource uses an existing CUDA memory pool handle (such as the default pool)
instead of creating a new one. This is useful for integrating with existing GPU
applications that already use a CUDA memory pool, or customizing the flags
used by the memory pool.

Parameters
----------
valid_pool_handle : cudaMemPool_t
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like the name valid_pool_handle but I copied it from C++. Should we consider renaming it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, though please mention somewhere in this docstring that the user is responsible for keeping the mempool alive

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in b7e899d. The new sentence says,

The memory pool passed in must not be destroyed during the lifetime of this memory resource.

Handle to a CUDA memory pool which will be used to serve allocation
requests.
"""
def __cinit__(
self,
valid_pool_handle
):
cdef cyruntime.cudaMemPool_t c_memory_pool_handle = \
<cyruntime.cudaMemPool_t>valid_pool_handle
self.c_obj.reset(
new cuda_async_view_memory_resource(c_memory_pool_handle)
)

def pool_handle(self):
cdef cuda_async_view_memory_resource* c_mr = \
<cuda_async_view_memory_resource*>self.c_obj.get()
return <uintptr_t>c_mr.pool_handle()


cdef class ManagedMemoryResource(DeviceMemoryResource):
def __cinit__(self):
self.c_obj.reset(
Expand Down Expand Up @@ -991,7 +1025,7 @@ cpdef void _initialize(
try:
original_device = getDevice()
except CUDARuntimeError as e:
if e.status == cudaError_t.cudaErrorNoDevice:
if e.status == runtime.cudaError_t.cudaErrorNoDevice:
warnings.warn(e.msg)
else:
raise e
Expand Down
33 changes: 33 additions & 0 deletions python/rmm/rmm/tests/test_rmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1078,3 +1078,36 @@ def test_available_device_memory():
assert initial_memory[1] == final_memory[1]
assert initial_memory[0] > 0
assert final_memory[0] > 0


@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
@pytest.mark.parametrize("alloc", _allocs)
def test_cuda_async_view_memory_resource_default_pool(dtype, nelem, alloc):
# Get the default memory pool handle
current_device = rmm._cuda.gpu.getDevice()
err, pool = runtime.cudaDeviceGetDefaultMemPool(current_device)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this pool need to be released? I don't know how the default mempool behaves

Copy link
Contributor Author

@bdice bdice Mar 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so. It cannot be destroyed, per the docs. Attempting to destroy it returns an error code. 😄

Note:
A device's default memory pool cannot be destroyed.

https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html#group__CUDART__MEMORY__POOLS_1g709113128c1c52c3bf170022dc7723dd

assert err == runtime.cudaError_t.cudaSuccess

mr = rmm.mr.CudaAsyncViewMemoryResource(pool)
rmm.mr.set_current_device_resource(mr)
assert rmm.mr.get_current_device_resource_type() is type(mr)
array_tester(dtype, nelem, alloc)


@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
@pytest.mark.parametrize("alloc", _allocs)
def test_cuda_async_view_memory_resource_custom_pool(dtype, nelem, alloc):
# Create a memory pool handle
props = runtime.cudaMemPoolProps()
props.allocType = runtime.cudaMemAllocationType.cudaMemAllocationTypePinned
props.location.id = rmm._cuda.gpu.getDevice()
props.location.type = runtime.cudaMemLocationType.cudaMemLocationTypeDevice
err, pool = runtime.cudaMemPoolCreate(props)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test should also destroy the pool

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea! Tested in b7e899d. It also makes sure that the memory resource raises a MemoryError when asked to allocate after the pool is destroyed.

assert err == runtime.cudaError_t.cudaSuccess

mr = rmm.mr.CudaAsyncViewMemoryResource(pool)
rmm.mr.set_current_device_resource(mr)
assert rmm.mr.get_current_device_resource_type() is type(mr)
array_tester(dtype, nelem, alloc)
Loading