Skip to content

Commit

Permalink
Merge branch 'fea-remove-deprecated-funcs' of github.com:harrism/rmm …
Browse files Browse the repository at this point in the history
…into fea-remove-deprecated-funcs
  • Loading branch information
harrism committed Apr 23, 2024
2 parents d81f464 + eeb1c4b commit a4d6c96
Show file tree
Hide file tree
Showing 9 changed files with 45 additions and 43 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ endif()
# * add tests --------------------------------------------------------------------------------------

if(BUILD_TESTS AND CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
include(cmake/thirdparty/get_gtest.cmake)
include(${rapids-cmake-dir}/cpm/gtest.cmake)
rapids_cpm_gtest(BUILD_STATIC)
include(CTest) # calls enable_testing()

add_subdirectory(tests)
Expand Down
26 changes: 16 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -375,14 +375,16 @@ this code is correct:

#### Use of `rmm::device_vector` with multiple devices

> [!CAUTION] In contrast to the uninitialized `rmm:device_uvector`, `rmm::device_vector` **DOES
> NOT** store the active device during construction, and therefore cannot arrange for it to be
> active when the destructor runs. It is therefore the responsibility of the user to ensure the
> currently active device is correct.
`rmm:device_vector` uses an `rmm::mr::thrust_allocator` to enable `thrust::device_vector` to
allocate and deallocate memory using RMM. As such, the usual rules for usage of the backing memory
resource apply: the active device must match the active device at resource construction time. To
facilitate use in an RAII setting, `rmm::mr::thrust_allocator` records the active device at
construction time and ensures that device is active whenever it allocates or deallocates memory.
Usage of `rmm::device_vector` with multiple devices is therefore the same as `rmm::device_buffer`.
One must _create_ `device_vector`s with the correct device active, but it is safe to destroy them
with a different active device.

`rmm::device_vector` is therefore slightly less ergonomic to use in a multiple device setting since
the caller must arrange that active devices on allocation and deallocation match. Recapitulating the
previous example using `rmm::device_vector`:
For example, recapitulating the previous example using `rmm::device_vector`:

```c++
{
Expand All @@ -391,12 +393,16 @@ previous example using `rmm::device_vector`:
rmm::device_vector<int> vec(16, rmm::mr::thrust_allocator<int>(rmm::cuda_stream_default, &mr));
RMM_CUDA_TRY(cudaSetDevice(1));
...
// ERROR: ~vec runs with device 1 active, but needs device 0 to be active
// No need to switch back to device 0 before ~vec runs
}
```

A correct example adds a call to `cudaSetDevice(0)` on the line of the error comment before the dtor
for `~vec` runs.
> [!NOTE]
> Although allocation and deallocation in the `thrust_allocator` run with the correct active device,
> modification of `rmm::device_vector` might necessitate a kernel launch, and this must run with the
> correct device active. For example, `.resize()` might both allocate _and_ launch a kernel to
> initialize new elements: the user must arrange for this kernel launch to occur with the correct
> device for the memory resource active.
## `cuda_stream_view` and `cuda_stream`

Expand Down
22 changes: 0 additions & 22 deletions cmake/thirdparty/get_gtest.cmake

This file was deleted.

3 changes: 0 additions & 3 deletions conda/recipes/librmm/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@ cmake_version:
fmt_version:
- ">=10.1.1,<11"

gtest_version:
- ">=1.13.0"

spdlog_version:
- ">=1.12.0,<1.13"

Expand Down
4 changes: 0 additions & 4 deletions conda/recipes/librmm/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ requirements:
# added as a run requirement via the packages' run_exports.
- fmt {{ fmt_version }}
- spdlog {{ spdlog_version }}
- gtest {{ gtest_version }}
- gmock {{ gtest_version }}

build:
script_env:
Expand Down Expand Up @@ -150,8 +148,6 @@ outputs:
{% endif %}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
- {{ pin_subpackage('librmm', exact=True) }}
- gtest {{ gtest_version }}
- gmock {{ gtest_version }}
about:
home: https://rapids.ai/
license: Apache-2.0
Expand Down
1 change: 0 additions & 1 deletion include/rmm/mr/device/cuda_async_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

#include <cuda/std/type_traits>
#include <cuda_runtime_api.h>
#include <thrust/optional.h>

#include <cstddef>
#include <limits>
Expand Down
1 change: 0 additions & 1 deletion include/rmm/mr/device/cuda_async_view_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#include <rmm/mr/device/device_memory_resource.hpp>

#include <cuda_runtime_api.h>
#include <thrust/optional.h>

#include <cstddef>
#include <limits>
Expand Down
9 changes: 8 additions & 1 deletion include/rmm/mr/device/thrust_allocator_adaptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#pragma once

#include <rmm/cuda_device.hpp>
#include <rmm/detail/thrust_namespace.h>
#include <rmm/mr/device/per_device_resource.hpp>
#include <rmm/resource_ref.hpp>
Expand All @@ -39,6 +40,9 @@ namespace rmm::mr {
* allocate objects of a specific type `T`, but can be freely rebound to other
* types.
*
* The allocator records the current cuda device and may only be used with a backing
* `device_async_resource_ref` valid for the same device.
*
* @tparam T The type of the objects that will be allocated by this allocator
*/
template <typename T>
Expand Down Expand Up @@ -92,7 +96,7 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
*/
template <typename U>
thrust_allocator(thrust_allocator<U> const& other)
: _mr(other.resource()), _stream{other.stream()}
: _mr(other.resource()), _stream{other.stream()}, _device{other._device}
{
}

Expand All @@ -104,6 +108,7 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
*/
pointer allocate(size_type num)
{
cuda_set_device_raii dev{_device};
return thrust::device_pointer_cast(
static_cast<T*>(_mr.allocate_async(num * sizeof(T), _stream)));
}
Expand All @@ -117,6 +122,7 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
*/
void deallocate(pointer ptr, size_type num)
{
cuda_set_device_raii dev{_device};
return _mr.deallocate_async(thrust::raw_pointer_cast(ptr), num * sizeof(T), _stream);
}

Expand All @@ -143,6 +149,7 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
private:
cuda_stream_view _stream{};
rmm::device_async_resource_ref _mr{rmm::mr::get_current_device_resource()};
cuda_device_id _device{get_current_cuda_device()};
};
/** @} */ // end of group
} // namespace rmm::mr
19 changes: 19 additions & 0 deletions tests/mr/device/thrust_allocator_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@

#include "mr_ref_test.hpp"

#include <rmm/cuda_device.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/device_vector.hpp>
#include <rmm/mr/device/per_device_resource.hpp>
#include <rmm/mr/device/thrust_allocator_adaptor.hpp>
Expand All @@ -36,19 +38,36 @@ struct allocator_test : public mr_ref_test {};

TEST_P(allocator_test, first)
{
rmm::mr::set_current_device_resource(this->mr.get());
auto const num_ints{100};
rmm::device_vector<int> ints(num_ints, 1);
EXPECT_EQ(num_ints, thrust::reduce(ints.begin(), ints.end()));
}

TEST_P(allocator_test, defaults)
{
rmm::mr::set_current_device_resource(this->mr.get());
rmm::mr::thrust_allocator<int> allocator(rmm::cuda_stream_default);
EXPECT_EQ(allocator.stream(), rmm::cuda_stream_default);
EXPECT_EQ(allocator.get_upstream_resource(),
rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()});
}

TEST_P(allocator_test, multi_device)
{
if (rmm::get_num_cuda_devices() < 2) { GTEST_SKIP() << "Needs at least two devices"; }
cuda_set_device_raii with_device{rmm::get_current_cuda_device()};
rmm::cuda_stream stream{};
// make allocator on device-0
rmm::mr::thrust_allocator<int> allocator(stream.view(), this->ref);
auto const size{100};
EXPECT_NO_THROW([&]() {
auto vec = rmm::device_vector<int>(size, allocator);
// Destruct with device-1 active
RMM_CUDA_TRY(cudaSetDevice(1));
}());
}

INSTANTIATE_TEST_CASE_P(ThrustAllocatorTests,
allocator_test,
::testing::Values(mr_factory{"CUDA", &make_cuda},
Expand Down

0 comments on commit a4d6c96

Please sign in to comment.