|
| 1 | +/* |
| 2 | + * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | + * All rights reserved. |
| 4 | + * |
| 5 | + * This source code is licensed under the BSD-style license found in the |
| 6 | + * LICENSE file in the root directory of this source tree. |
| 7 | + */ |
| 8 | + |
| 9 | +#include <gtest/gtest.h> |
| 10 | + |
| 11 | +#include <cuda_runtime.h> |
| 12 | + |
| 13 | +#include <cstdint> |
| 14 | +#include <vector> |
| 15 | + |
| 16 | +#include <executorch/backends/cuda/runtime/cuda_allocator.h> |
| 17 | +#include <executorch/extension/cuda/caller_stream.h> |
| 18 | +#include <executorch/runtime/core/error.h> |
| 19 | +#include <executorch/runtime/platform/platform.h> |
| 20 | + |
| 21 | +using executorch::backends::cuda::CudaAllocator; |
| 22 | +using executorch::runtime::Error; |
| 23 | + |
| 24 | +namespace { |
| 25 | +bool cuda_device_available() { |
| 26 | + int device_count = 0; |
| 27 | + const cudaError_t err = cudaGetDeviceCount(&device_count); |
| 28 | + return err == cudaSuccess && device_count > 0; |
| 29 | +} |
| 30 | +} // namespace |
| 31 | + |
| 32 | +class CudaAllocatorTest : public testing::Test { |
| 33 | + protected: |
| 34 | + void SetUp() override { |
| 35 | + if (!cuda_device_available()) { |
| 36 | + GTEST_SKIP() << "CUDA device unavailable"; |
| 37 | + } |
| 38 | + et_pal_init(); |
| 39 | + } |
| 40 | +}; |
| 41 | + |
| 42 | +TEST_F(CudaAllocatorTest, CopyHostToDevice) { |
| 43 | + CudaAllocator& a = CudaAllocator::instance(); |
| 44 | + constexpr size_t N = 1024; |
| 45 | + auto res = a.allocate(N, 0); |
| 46 | + ASSERT_TRUE(res.ok()); |
| 47 | + void* dptr = res.get(); |
| 48 | + |
| 49 | + std::vector<uint8_t> h_src(N, 42); |
| 50 | + EXPECT_EQ(a.copy_host_to_device(dptr, h_src.data(), N, 0), Error::Ok); |
| 51 | + |
| 52 | + a.deallocate(dptr, 0); |
| 53 | +} |
| 54 | + |
| 55 | +TEST_F(CudaAllocatorTest, CopyDeviceToHost) { |
| 56 | + CudaAllocator& a = CudaAllocator::instance(); |
| 57 | + constexpr size_t N = 1024; |
| 58 | + auto res = a.allocate(N, 0); |
| 59 | + ASSERT_TRUE(res.ok()); |
| 60 | + void* dptr = res.get(); |
| 61 | + |
| 62 | + std::vector<uint8_t> h_src(N, 42), h_dst(N, 0); |
| 63 | + ASSERT_EQ(a.copy_host_to_device(dptr, h_src.data(), N, 0), Error::Ok); |
| 64 | + EXPECT_EQ(a.copy_device_to_host(h_dst.data(), dptr, N, 0), Error::Ok); |
| 65 | + EXPECT_EQ(h_src, h_dst); |
| 66 | + |
| 67 | + a.deallocate(dptr, 0); |
| 68 | +} |
| 69 | + |
| 70 | +TEST_F(CudaAllocatorTest, CopyHostToDeviceWithCallerStream) { |
| 71 | + int device = 0; |
| 72 | + ASSERT_EQ(cudaGetDevice(&device), cudaSuccess); |
| 73 | + ASSERT_EQ(device, 0) << "test assumes single GPU device 0"; |
| 74 | + // TODO: validate caller stream device matches index once CallerStreamGuard |
| 75 | + // exposes device. For now assert single-GPU case. |
| 76 | + cudaStream_t s; |
| 77 | + ASSERT_EQ(cudaStreamCreate(&s), cudaSuccess); |
| 78 | + { |
| 79 | + executorch::extension::cuda::CallerStreamGuard g(s); |
| 80 | + |
| 81 | + CudaAllocator& a = CudaAllocator::instance(); |
| 82 | + auto res = a.allocate(256, 0); |
| 83 | + ASSERT_TRUE(res.ok()); |
| 84 | + void* d = res.get(); |
| 85 | + std::vector<uint8_t> h(256, 7); |
| 86 | + // should take async branch internally, still return Ok |
| 87 | + EXPECT_EQ(a.copy_host_to_device(d, h.data(), 256, 0), Error::Ok); |
| 88 | + ASSERT_EQ(cudaStreamSynchronize(s), cudaSuccess); |
| 89 | + a.deallocate(d, 0); |
| 90 | + } |
| 91 | + ASSERT_EQ(cudaStreamDestroy(s), cudaSuccess); |
| 92 | +} |
| 93 | + |
| 94 | +TEST_F(CudaAllocatorTest, CopyDeviceToHostWithCallerStream) { |
| 95 | + int device = 0; |
| 96 | + ASSERT_EQ(cudaGetDevice(&device), cudaSuccess); |
| 97 | + ASSERT_EQ(device, 0) << "test assumes single GPU device 0"; |
| 98 | + // TODO: validate caller stream device matches index once CallerStreamGuard |
| 99 | + // exposes device. For now assert single-GPU case. |
| 100 | + cudaStream_t s; |
| 101 | + ASSERT_EQ(cudaStreamCreate(&s), cudaSuccess); |
| 102 | + { |
| 103 | + executorch::extension::cuda::CallerStreamGuard g(s); |
| 104 | + |
| 105 | + CudaAllocator& a = CudaAllocator::instance(); |
| 106 | + auto res = a.allocate(256, 0); |
| 107 | + ASSERT_TRUE(res.ok()); |
| 108 | + void* d = res.get(); |
| 109 | + std::vector<uint8_t> h_src(256, 5), h_dst(256, 0); |
| 110 | + ASSERT_EQ(a.copy_host_to_device(d, h_src.data(), 256, 0), Error::Ok); |
| 111 | + EXPECT_EQ(a.copy_device_to_host(h_dst.data(), d, 256, 0), Error::Ok); |
| 112 | + EXPECT_EQ(h_src, h_dst); |
| 113 | + |
| 114 | + a.deallocate(d, 0); |
| 115 | + } |
| 116 | + ASSERT_EQ(cudaStreamDestroy(s), cudaSuccess); |
| 117 | +} |
| 118 | + |
| 119 | +TEST_F(CudaAllocatorTest, CopyHostToDeviceNullDstReturnsInvalidArgument) { |
| 120 | + CudaAllocator& a = CudaAllocator::instance(); |
| 121 | + // null dst should fail gracefully not CHECK abort |
| 122 | + std::vector<uint8_t> h(8, 1); |
| 123 | + Error e = a.copy_host_to_device(nullptr, h.data(), 8, 0); |
| 124 | + EXPECT_EQ(e, Error::InvalidArgument) |
| 125 | + << "expected InvalidArgument for null dst, got " |
| 126 | + << static_cast<uint32_t>(e); |
| 127 | +} |
| 128 | + |
| 129 | +TEST_F(CudaAllocatorTest, CopyHostToDeviceNullSrcReturnsInvalidArgument) { |
| 130 | + CudaAllocator& a = CudaAllocator::instance(); |
| 131 | + void* dummy_dst = reinterpret_cast<void*>(0x1); |
| 132 | + Error e = a.copy_host_to_device(dummy_dst, nullptr, 8, 0); |
| 133 | + EXPECT_EQ(e, Error::InvalidArgument) |
| 134 | + << "expected InvalidArgument for null src, got " |
| 135 | + << static_cast<uint32_t>(e); |
| 136 | +} |
| 137 | + |
| 138 | +TEST_F(CudaAllocatorTest, CopyDeviceToHostNullDstReturnsInvalidArgument) { |
| 139 | + CudaAllocator& a = CudaAllocator::instance(); |
| 140 | + void* dummy_src = reinterpret_cast<void*>(0x1); |
| 141 | + Error e = a.copy_device_to_host(nullptr, dummy_src, 8, 0); |
| 142 | + EXPECT_EQ(e, Error::InvalidArgument) |
| 143 | + << "expected InvalidArgument for null dst, got " |
| 144 | + << static_cast<uint32_t>(e); |
| 145 | +} |
| 146 | + |
| 147 | +TEST_F(CudaAllocatorTest, CopyDeviceToHostNullSrcReturnsInvalidArgument) { |
| 148 | + CudaAllocator& a = CudaAllocator::instance(); |
| 149 | + std::vector<uint8_t> h(8, 1); |
| 150 | + // null src should fail gracefully not CHECK abort |
| 151 | + Error e = a.copy_device_to_host(h.data(), nullptr, 8, 0); |
| 152 | + EXPECT_EQ(e, Error::InvalidArgument) |
| 153 | + << "expected InvalidArgument for null src, got " |
| 154 | + << static_cast<uint32_t>(e); |
| 155 | +} |
0 commit comments