diff --git a/projects/hip-tests/catch/unit/event/CMakeLists.txt b/projects/hip-tests/catch/unit/event/CMakeLists.txt index b4d5f2c3b64..5183ff61dac 100644 --- a/projects/hip-tests/catch/unit/event/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/event/CMakeLists.txt @@ -10,6 +10,7 @@ set(TEST_SRC hipEventCreateWithFlags.cc hipEventSynchronize.cc Unit_hipEventMGpuMThreads.cc + hipEventRecord_spt.cc ) # The test used wait mechanism and doesnt play well with all arch of nvidia diff --git a/projects/hip-tests/catch/unit/event/hipEventRecord_spt.cc b/projects/hip-tests/catch/unit/event/hipEventRecord_spt.cc new file mode 100644 index 00000000000..41e26343eaa --- /dev/null +++ b/projects/hip-tests/catch/unit/event/hipEventRecord_spt.cc @@ -0,0 +1,151 @@ +/*Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +#include +#include +/** + * @addtogroup hipEventRecord_spt hipEventRecord_spt + * @{ + * @ingroup EventTest + * `hipEventRecord_spt(hipEvent_t event, hipStream_t stream = NULL)` - + * Record an event in the specified stream. + */ +/** + * Test Description + * ------------------------ + * - Creates regular events and events with flags. + * - Enqueues them to the streams and checks if events + * can be successfully used for synchronization. + * Test source + * ------------------------ + * - unit/event/hipEventRecord_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipEventRecord_spt_BasicTst") { + constexpr size_t N = 1024; + constexpr int iterations = 1; + constexpr int blocks = 1024; + constexpr size_t Nbytes = N * sizeof(float); + float *A_h, *B_h, *C_h; + float *A_d, *B_d, *C_d; + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); + enum TestType { + WithFlags_Default = hipEventDefault, + WithFlags_Blocking = hipEventBlockingSync, + WithFlags_DisableTiming = hipEventDisableTiming, +#if HT_AMD + WithFlags_ReleaseToDevice = hipEventReleaseToDevice, + WithFlags_ReleaseToSystem = hipEventReleaseToSystem, +#endif + WithoutFlags + }; +#if HT_AMD + auto flags = GENERATE(WithFlags_Default, WithFlags_Blocking, WithFlags_DisableTiming, + WithFlags_ReleaseToDevice, WithFlags_ReleaseToSystem, WithoutFlags); +#endif +#if HT_NVIDIA + auto flags = + GENERATE(WithFlags_Default, WithFlags_Blocking, WithFlags_DisableTiming, WithoutFlags); +#endif + hipEvent_t start{}, stop{}; + if (flags == WithoutFlags) { + HIP_CHECK(hipEventCreate(&start)); + HIP_CHECK(hipEventCreate(&stop)); + } else { + HIP_CHECK(hipEventCreateWithFlags(&start, flags)); + HIP_CHECK(hipEventCreateWithFlags(&stop, flags)); + } + HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + // Warmup + HipTest::launchKernel(HipTest::vectorADD, blocks, 1, 0, 0, + static_cast(A_d), static_cast(B_d), C_d, + N); + HIP_CHECK(hipDeviceSynchronize()); + for (int i = 0; i < iterations; i++) { + //--- START TIMED REGION + long long hostStart = HipTest::get_time(); + // Record the start event + HIP_CHECK(hipEventRecord_spt(start, NULL)); + HipTest::launchKernel(HipTest::vectorADD, blocks, 1, 0, 0, + static_cast(A_d), static_cast(B_d), + C_d, N); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipEventRecord_spt(stop, NULL)); + HIP_CHECK(hipEventSynchronize(stop)); + long long hostStop = HipTest::get_time(); + //--- STOP TIMED REGION + float hostMs = HipTest::elapsed_time(hostStart, hostStop); + INFO("host_time (chrono) = " << hostMs); + // Make sure timer is timing something... + if (flags != WithFlags_DisableTiming) { + float eventMs = 1.0f; + HIP_CHECK(hipEventElapsedTime(&eventMs, start, stop)); + INFO("kernel_time (hipEventElapsedTime) = " << eventMs); + REQUIRE(eventMs > 0.0f); + } + } + HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + HIP_CHECK(hipEventDestroy(start)); + HIP_CHECK(hipEventDestroy(stop)); + HipTest::checkVectorADD(A_h, B_h, C_h, N, true); + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + TestContext::get().cleanContext(); +} +/** + * Test Description + * ------------------------ + * - Validates handling of invalid arguments: + * -# When event is `nullptr` + * - Expected output: return `hipErrorInvalidResourceHandle` + * -# When event is created on one device but recorded on the other one + * - Expected output: return `hipErrorInvalidHandle` + * Test source + * ------------------------ + * - unit/event/hipEventRecord_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipEventRecord_spt_Negative") { + SECTION("Nullptr event") { + HIP_CHECK_ERROR(hipEventRecord_spt(nullptr, nullptr), hipErrorInvalidResourceHandle); + } + SECTION("Different devices") { + int devCount = 0; + HIP_CHECK(hipGetDeviceCount(&devCount)); + if (devCount > 1) { + // create event on dev=0 + HIP_CHECK(hipSetDevice(0)); + hipEvent_t start; + HIP_CHECK(hipEventCreate(&start)); + // start on device 0 but null stream on device 1 + HIP_CHECK(hipSetDevice(1)); + HIP_CHECK_ERROR(hipEventRecord_spt(start, nullptr), hipErrorInvalidHandle); + HIP_CHECK(hipEventDestroy(start)); + } + } +} +/** + * End doxygen group EventTest. + * @} + */ diff --git a/projects/hip-tests/catch/unit/executionControl/CMakeLists.txt b/projects/hip-tests/catch/unit/executionControl/CMakeLists.txt index 5e30302b885..bf082093438 100644 --- a/projects/hip-tests/catch/unit/executionControl/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/executionControl/CMakeLists.txt @@ -12,6 +12,7 @@ if(HIP_PLATFORM MATCHES "amd") hipExtLaunchMultiKernelMultiDevice.cc launch_api.cc hipGetProcAddressLaunchCbExecCtrlApis.cc + hipLaunchCooperativeKernel_spt.cc ) else() # These functions are currently unimplemented on AMD diff --git a/projects/hip-tests/catch/unit/executionControl/hipLaunchCooperativeKernel_spt.cc b/projects/hip-tests/catch/unit/executionControl/hipLaunchCooperativeKernel_spt.cc new file mode 100644 index 00000000000..5915e7e6b98 --- /dev/null +++ b/projects/hip-tests/catch/unit/executionControl/hipLaunchCooperativeKernel_spt.cc @@ -0,0 +1,201 @@ +/*Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +#include +#include +#include "execution_control_common.hh" +/** + * @addtogroup hipLaunchCooperativeKernel_spt hipLaunchCooperativeKernel_spt + * @{ + * @ingroup ExecutionTest + * `hipError_t hipLaunchCooperativeKernel_spt(const void* f, dim3 gridDim, dim3 blockDimX, + void** kernelParams, unsigned int sharedMemBytes, + hipStream_t stream);` - + * launches kernel f with launch parameters and shared memory on stream with arguments passed + * to kernelparams or extra, where thread blocks can cooperate and synchronize as they execute. + */ +/** + * Test Description + * ------------------------ + * - Basic test to check the functionality of hipLaunchCooperativeKernel_spt. + * Test source + * ------------------------ + * - catch\unit\executionControl\hipLaunchCooperativeKernel_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipLaunchCooperativeKernel_spt_Positive_Basic") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + SECTION("Cooperative kernel with no arguments") { + HIP_CHECK(hipLaunchCooperativeKernel_spt(reinterpret_cast(coop_kernel), dim3{2, 2, 1}, + dim3{1, 1, 1}, nullptr, 0, nullptr)); + HIP_CHECK(hipDeviceSynchronize()); + } + SECTION("Kernel with arguments using kernelParams") { + LinearAllocGuard result_dev(LinearAllocs::hipMalloc, sizeof(int)); + HIP_CHECK(hipMemset(result_dev.ptr(), 0, sizeof(*result_dev.ptr()))); + int* result_ptr = result_dev.ptr(); + void* kernel_args[1] = {&result_ptr}; + HIP_CHECK(hipLaunchCooperativeKernel_spt(reinterpret_cast(kernel_42), dim3{1, 1, 1}, + dim3{1, 1, 1}, kernel_args, 0, nullptr)); + int result = 0; + HIP_CHECK(hipMemcpy(&result, result_dev.ptr(), sizeof(result), hipMemcpyDefault)); + REQUIRE(result == 42); + } +} +/** + * Test Description + * ------------------------ + * - Basic test to check the functionality of hipLaunchCooperativeKernel_spt + * with positive parameters. + * Test source + * ------------------------ + * - catch\unit\executionControl\hipLaunchCooperativeKernel_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipLaunchCooperativeKernel_spt_Positive_Parameters") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + SECTION("blockDim.x == maxBlockDimX") { + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0); + HIP_CHECK(hipLaunchCooperativeKernel_spt(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{x, 1, 1}, nullptr, 0, nullptr)); + } + SECTION("blockDim.y == maxBlockDimY") { + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0); + HIP_CHECK(hipLaunchCooperativeKernel_spt(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{y, 1, 1}, nullptr, 0, nullptr)); + } + SECTION("blockDim.z == maxBlockDimZ") { + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0); + HIP_CHECK(hipLaunchCooperativeKernel_spt(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{z, 1, 1}, nullptr, 0, nullptr)); + } +} +/** + * Test Description + * ------------------------ + * - Basic test to check the functionality of hipLaunchCooperativeKernel_spt + * with negative parameters. + * Test source + * ------------------------ + * - catch\unit\executionControl\hipLaunchCooperativeKernel_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipLaunchCooperativeKernel_spt_Negative_Parameters") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + SECTION("f == nullptr") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernel_spt(static_cast(nullptr), dim3{1, 1, 1}, + dim3{1, 1, 1}, nullptr, 0, nullptr), + hipErrorInvalidDeviceFunction); + } + SECTION("gridDim.x == 0") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernel_spt(reinterpret_cast(kernel), dim3{0, 1, 1}, + dim3{1, 1, 1}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + SECTION("gridDim.y == 0") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernel_spt(reinterpret_cast(kernel), dim3{1, 0, 1}, + dim3{1, 1, 1}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + SECTION("gridDim.z == 0") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernel_spt(reinterpret_cast(kernel), dim3{1, 1, 0}, + dim3{1, 1, 1}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + SECTION("blockDim.x == 0") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernel_spt(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{0, 1, 1}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + SECTION("blockDim.y == 0") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernel_spt(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, 0, 1}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + SECTION("blockDim.z == 0") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernel_spt(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, 1, 0}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + SECTION("blockDim.x > maxBlockDimX") { + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0) + 1u; + HIP_CHECK_ERROR(hipLaunchCooperativeKernel_spt(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{x, 1, 1}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + SECTION("blockDim.y > maxBlockDimY") { + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0) + 1u; + HIP_CHECK_ERROR(hipLaunchCooperativeKernel_spt(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, y, 1}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + SECTION("blockDim.z > maxBlockDimZ") { + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0) + 1u; + HIP_CHECK_ERROR(hipLaunchCooperativeKernel_spt(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, 1, z}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") { + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxThreadsPerBlock, 0); + const unsigned int dim = std::ceil(std::cbrt(max)); + HIP_CHECK_ERROR(hipLaunchCooperativeKernel_spt(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{dim, dim, dim}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + SECTION( + "gridDim.x * gridDim.y * gridDim.z > maxActiveBlocksPerMultiprocessor * " + "multiProcessorCount") { + int max_blocks; + HIP_CHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks, + reinterpret_cast(kernel), 1, 0)); + const unsigned int multiproc_count = + GetDeviceAttribute(hipDeviceAttributeMultiprocessorCount, 0); + const unsigned int dim = std::ceil(std::cbrt(max_blocks * multiproc_count)); + HIP_CHECK_ERROR( + hipLaunchCooperativeKernel_spt(reinterpret_cast(kernel), dim3{dim, dim, dim}, + dim3{1, 1, 1}, nullptr, 0, nullptr), + hipErrorCooperativeLaunchTooLarge); + } + SECTION("sharedMemBytes > maxSharedMemoryPerBlock") { + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxSharedMemoryPerBlock, 0) + 1u; + HIP_CHECK_ERROR(hipLaunchCooperativeKernel_spt(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, 1, 1}, nullptr, max, nullptr), + hipErrorCooperativeLaunchTooLarge); + } +} +/** + * End doxygen group ExecutionTest. + * @} + */ diff --git a/projects/hip-tests/catch/unit/graph/CMakeLists.txt b/projects/hip-tests/catch/unit/graph/CMakeLists.txt index d2ddfc93f16..e578096ac81 100644 --- a/projects/hip-tests/catch/unit/graph/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/graph/CMakeLists.txt @@ -93,6 +93,12 @@ if(HIP_PLATFORM MATCHES "amd") hipStreamCaptureExtModuleLaunchKernel.cc hipStreamBeginCaptureToGraph.cc hipGetProcAddressGraphApis.cc + hipStreamGetCaptureInfo_spt.cc + hipStreamGetCaptureInfo_v2_spt.cc + hipStreamIsCapturing_spt.cc + hipStreamBeginCapture_spt.cc + hipStreamEndCapture_spt.cc + hipGraphLaunch_spt.cc # Below files are disbled in NVIDIA as PSDB builds are failing due to lower CUDA version. hipGraphExecNodeSetParams.cc hipGraphNodeSetParams.cc diff --git a/projects/hip-tests/catch/unit/graph/hipGraphLaunch_spt.cc b/projects/hip-tests/catch/unit/graph/hipGraphLaunch_spt.cc new file mode 100644 index 00000000000..18dfd978eed --- /dev/null +++ b/projects/hip-tests/catch/unit/graph/hipGraphLaunch_spt.cc @@ -0,0 +1,127 @@ +/*Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +/** + * @addtogroup hipGraphLaunch_spt hipGraphLaunch_spt + * @{ + * @ingroup GraphTest + * `hipGraphLaunch_spt(hipGraphExec_t graphExec, hipStream_t stream)` - + * Launches an executable graph in a stream + */ +static void HostFunctionSetToZero_spt(void* arg) { + int* test_number = reinterpret_cast(arg); + (*test_number) = 0; +} +static void HostFunctionAddOne_spt(void* arg) { + int* test_number = reinterpret_cast(arg); + (*test_number) += 1; +} +static void CreateTestExecutableGraph_spt(hipGraphExec_t* graph_exec, int* number) { + hipGraph_t graph; + hipGraphNode_t node_error; + hipGraphNode_t node_set_zero; + hipHostNodeParams params_set_to_zero = {HostFunctionSetToZero_spt, number}; + hipGraphNode_t node_add_one; + hipHostNodeParams params_set_add_one = {HostFunctionAddOne_spt, number}; + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphAddHostNode(&node_set_zero, graph, nullptr, 0, ¶ms_set_to_zero)); + HIP_CHECK(hipGraphAddHostNode(&node_add_one, graph, &node_set_zero, 1, ¶ms_set_add_one)); + HIP_CHECK(hipGraphInstantiate(graph_exec, graph, &node_error, nullptr, 0)); + HIP_CHECK(hipGraphDestroy(graph)); +} +static void HipGraphLaunch_spt_Positive_Simple(hipStream_t stream) { + int number = 5; + hipGraphExec_t graph_exec; + CreateTestExecutableGraph_spt(&graph_exec, &number); + HIP_CHECK(hipGraphLaunch_spt(graph_exec, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + REQUIRE(number == 1); + HIP_CHECK(hipGraphExecDestroy(graph_exec)); +} +/** + * Test Description + * ------------------------ + * - Basic positive test for hipGraphLaunch_spt + * - stream as a created stream + * - with stream as hipStreamPerThread + * Test source + * ------------------------ + * - unit/graph/hipGraphLaunch_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipGraphLaunch_spt_Positive") { + SECTION("stream as a created stream") { + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + HipGraphLaunch_spt_Positive_Simple(stream); + HIP_CHECK(hipStreamDestroy(stream)); + } + SECTION("with stream as hipStreamPerThread") { + HipGraphLaunch_spt_Positive_Simple(hipStreamPerThread); + } +} +/** + * Test Description + * ------------------------ + * - Negative parameter test for hipGraphLaunch_spt + * - graphExec is nullptr and stream is a created stream + * - graphExec is nullptr and stream is hipStreamPerThread + * - graphExec is an empty object + * - graphExec is destroyed before calling hipGraphLaunch_spt + * Test source + * ------------------------ + * - unit/graph/hipGraphLaunch_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipGraphLaunch_spt_Negative_Parameters") { + SECTION("graphExec is nullptr and stream is a created stream") { + hipStream_t stream; + hipError_t ret; + HIP_CHECK(hipStreamCreate(&stream)); + ret = hipGraphLaunch_spt(nullptr, stream); + HIP_CHECK(hipStreamDestroy(stream)); + REQUIRE(ret == hipErrorInvalidValue); + } + SECTION("graphExec is nullptr and stream is hipStreamPerThread") { + HIP_CHECK_ERROR(hipGraphLaunch_spt(nullptr, hipStreamPerThread), hipErrorInvalidValue); + } + SECTION("graphExec is an empty object") { + hipGraphExec_t graph_exec{}; + HIP_CHECK_ERROR(hipGraphLaunch_spt(graph_exec, hipStreamPerThread), hipErrorInvalidValue); + } + SECTION("graphExec is destroyed") { + int number = 5; + hipGraphExec_t graph_exec; + CreateTestExecutableGraph_spt(&graph_exec, &number); + HIP_CHECK(hipGraphLaunch_spt(graph_exec, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + REQUIRE(number == 1); + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK_ERROR(hipGraphLaunch_spt(graph_exec, hipStreamPerThread), hipErrorInvalidValue); + } +} +/** + * End doxygen group GraphTest. + * @} + */ diff --git a/projects/hip-tests/catch/unit/graph/hipStreamBeginCapture_spt.cc b/projects/hip-tests/catch/unit/graph/hipStreamBeginCapture_spt.cc new file mode 100644 index 00000000000..8eb06aba262 --- /dev/null +++ b/projects/hip-tests/catch/unit/graph/hipStreamBeginCapture_spt.cc @@ -0,0 +1,1348 @@ +/*Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +#include "stream_capture_common.hh" // NOLINT +/** + * @addtogroup hipStreamBeginCapture_spt hipStreamBeginCapture_spt + * @{ + * @ingroup GraphTest + * `hipStreamBeginCapture_spt(hipStream_t stream, hipStreamCaptureMode mode)` - + * begins graph capture on a stream + */ +static int gCbackIter = 0; +static __global__ void dummyKernel() { return; } +static __global__ void incrementKernel(int* data) { + atomicAdd(data, 1); + return; +} +static __global__ void myadd(int* A_d, int* B_d) { + int myId = threadIdx.x + blockDim.x * blockIdx.x; + A_d[myId] = A_d[myId] + B_d[myId]; +} +static __global__ void mymul(int* devMem, int value) { + int myId = threadIdx.x + blockDim.x * blockIdx.x; + devMem[myId] = devMem[myId] * value; +} +static void hostNodeCallback(void* data) { + REQUIRE(data == nullptr); + gCbackIter++; +} +template +void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode, hipStream_t stream) { + constexpr size_t N = 1000000; + size_t Nbytes = N * sizeof(T); + hipGraph_t graph{nullptr}; + hipGraphExec_t graphExec{nullptr}; + // Host and Device allocation + LinearAllocGuard A_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard B_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard A_d(LinearAllocs::hipMalloc, Nbytes); + LinearAllocGuard B_d(LinearAllocs::hipMalloc, Nbytes); + // Capture stream sequence + HIP_CHECK(hipStreamBeginCapture_spt(stream, mode)); + graphFunc(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream); + captureSequenceCompute(A_d.ptr(), B_h.ptr(), B_d.ptr(), N, stream); + HIP_CHECK(hipStreamEndCapture(stream, &graph)); + // Validate end capture is successful + REQUIRE(graph != nullptr); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + REQUIRE(graphExec != nullptr); + // Replay the recorded sequence multiple times + for (size_t i = 0; i < kLaunchIters; i++) { + std::fill_n(A_h.host_ptr(), N, static_cast(i)); + HIP_CHECK(hipGraphLaunch(graphExec, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + ArrayFindIfNot(B_h.host_ptr(), static_cast(i) * static_cast(i), N); + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); +} +/** + * Test Description + * ------------------------ + * - Basic Functional Test for capturing created/hipStreamPerThread stream + * and replaying sequence. Test exercises the API on all available modes: + * - Linear sequence capture - each graph node has only one dependency + * - Branched sequence capture - some graph nodes have more than one + * dependency + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Positive_Functional") { + const auto stream_type = GENERATE(Streams::perThread, Streams::created); + StreamGuard stream_guard(stream_type); + hipStream_t stream = stream_guard.stream(); + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + EventsGuard events_guard(3); + StreamsGuard streams_guard(2); + SECTION("Linear graph capture") { + captureStreamAndLaunchGraph( + [](float* A_h, float* A_d, float* B_h, float* B_d, size_t N, hipStream_t stream) { + return captureSequenceLinear(A_h, A_d, B_h, B_d, N, stream); + }, + captureMode, stream); + } + SECTION("Branched graph capture") { + captureStreamAndLaunchGraph( + [&streams_guard, &events_guard](float* A_h, float* A_d, float* B_h, float* B_d, size_t N, + hipStream_t stream) { + captureSequenceBranched(A_h, A_d, B_h, B_d, N, stream, streams_guard.stream_list(), + events_guard.event_list()); + }, + captureMode, stream); + } +} +/** + * Test Description + * ------------------------ + * - Test to verify API behavior with invalid arguments: + * - Begin capture on legacy/null stream + * - Begin capture on the already captured stream + * - Begin capture with invalid mode + * - Begin capture on uninitialized stream + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Negative_Parameters") { + const auto stream_type = GENERATE(Streams::created); + StreamGuard stream_guard(stream_type); + hipStream_t stream = stream_guard.stream(); + SECTION("Stream capture on legacy/null stream returns error code.") { + HIP_CHECK_ERROR(hipStreamBeginCapture_spt(nullptr, hipStreamCaptureModeGlobal), hipSuccess); + } + SECTION("Capturing hipStream status with same stream again") { + HIP_CHECK(hipStreamBeginCapture_spt(stream, hipStreamCaptureModeGlobal)); + HIP_CHECK_ERROR(hipStreamBeginCapture_spt(stream, hipStreamCaptureModeGlobal), + hipErrorIllegalState); + } + SECTION("Creating hipStream with invalid mode") { + HIP_CHECK_ERROR(hipStreamBeginCapture_spt(stream, hipStreamCaptureMode(-1)), + hipErrorInvalidValue); + } +} +/** + * Test Description + * ------------------------ + * - Basic Test to verify basic API functionality with + * created/hipStreamPerThread stream for available modes + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Positive_Basic") { + hipGraph_t graph{nullptr}; + const auto stream_type = GENERATE(Streams::perThread, Streams::created); + StreamGuard stream_guard(stream_type); + hipStream_t s = stream_guard.stream(); + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + HIP_CHECK(hipStreamBeginCapture_spt(s, captureMode)); + HIP_CHECK(hipStreamEndCapture(s, &graph)); + HIP_CHECK(hipGraphDestroy(graph)); +} +static void interStrmEventSyncCapture_spt(const hipStream_t& stream1, const hipStream_t& stream2) { + hipGraph_t graph1{nullptr}, graph2{nullptr}; + hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr}; + EventsGuard events_guard(1); + hipEvent_t event = events_guard[0]; + HIP_CHECK(hipEventCreate(&event)); + HIP_CHECK(hipStreamBeginCapture_spt(stream1, hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(event, stream1)); + HIP_CHECK(hipStreamWaitEvent(stream2, event, 0)); + dummyKernel<<<1, 1, 0, stream1>>>(); + HIP_CHECK(hipStreamEndCapture(stream1, &graph1)); + HIP_CHECK(hipStreamBeginCapture_spt(stream2, hipStreamCaptureModeGlobal)); + dummyKernel<<<1, 1, 0, stream2>>>(); + dummyKernel<<<1, 1, 0, stream2>>>(); + HIP_CHECK(hipStreamEndCapture(stream2, &graph2)); + size_t numNodes1 = 0, numNodes2 = 0; + HIP_CHECK(hipGraphGetNodes(graph1, nullptr, &numNodes1)); + HIP_CHECK(hipGraphGetNodes(graph2, nullptr, &numNodes2)); + REQUIRE(numNodes1 == 1); + REQUIRE(numNodes2 == 2); + HIP_CHECK(hipGraphInstantiate(&graphExec1, graph1, nullptr, nullptr, 0)); + REQUIRE(graphExec1 != nullptr); + HIP_CHECK(hipGraphInstantiate(&graphExec2, graph2, nullptr, nullptr, 0)); + REQUIRE(graphExec2 != nullptr); + // Replay the recorded sequence multiple times + for (size_t i = 0; i < kLaunchIters; i++) { + // Execute the Graphs + HIP_CHECK(hipGraphLaunch(graphExec1, stream1)); + HIP_CHECK(hipGraphLaunch(graphExec2, stream2)); + HIP_CHECK(hipStreamSynchronize(stream1)); + HIP_CHECK(hipStreamSynchronize(stream2)); + } + // Free + HIP_CHECK(hipGraphExecDestroy(graphExec2)); + HIP_CHECK(hipGraphExecDestroy(graphExec1)); + HIP_CHECK(hipGraphDestroy(graph2)); + HIP_CHECK(hipGraphDestroy(graph1)); +} +/** + * Test Description + * ------------------------ + * - Test to verify inter stream event synchronization- Waiting on an event + recorded on a captured stream. Initiate capture on stream1, record an event on + stream1, wait for the event on stream2, end the stream1 capture and initiate + stream capture on stream2 + * - Streams are created with hipStreamDefault/hipStreamNonBlocking flag + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Positive_InterStrmEventSync_Flags") { + const auto stream_flags1 = GENERATE(hipStreamDefault, hipStreamNonBlocking); + const auto stream_flags2 = GENERATE(hipStreamDefault, hipStreamNonBlocking); + StreamGuard stream_guard1(Streams::withFlags, stream_flags1); + hipStream_t stream1 = stream_guard1.stream(); + StreamGuard stream_guard2(Streams::withFlags, stream_flags2); + hipStream_t stream2 = stream_guard2.stream(); + interStrmEventSyncCapture_spt(stream1, stream2); +} +/** + * Test Description + * ------------------------ + * - Test to verify inter stream event synchronization- Waiting on an event + * recorded on a captured stream. Initiate capture on stream1, record an event + * on stream1, wait for the event on stream2, end the stream1 capture and + * initiate stream capture on stream2 + * - Stream1 is created with minimal priority, stream 2 is created with + * maximal priority + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Positive_InterStrmEventSync_Priority") { // NOLINT + int minPriority = 0, maxPriority = 0; + HIP_CHECK(hipDeviceGetStreamPriorityRange(&minPriority, &maxPriority)); + StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, minPriority); + hipStream_t stream1 = stream_guard1.stream(); + StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, maxPriority); + hipStream_t stream2 = stream_guard2.stream(); + interStrmEventSyncCapture_spt(stream1, stream2); +} +static void colligatedStrmCapture_spt(const hipStream_t& stream1, const hipStream_t& stream2) { + hipGraph_t graph1{nullptr}, graph2{nullptr}; + hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr}; + EventsGuard events_guard(1); + hipEvent_t event = events_guard[0]; + HIP_CHECK(hipEventCreate(&event)); + HIP_CHECK(hipStreamBeginCapture_spt(stream1, hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(event, stream1)); + HIP_CHECK(hipStreamBeginCapture_spt(stream2, hipStreamCaptureModeGlobal)); + HIP_CHECK(hipStreamWaitEvent(stream1, event, 0)); + dummyKernel<<<1, 1, 0, stream1>>>(); + HIP_CHECK(hipStreamEndCapture(stream1, &graph1)); + dummyKernel<<<1, 1, 0, stream2>>>(); + HIP_CHECK(hipStreamEndCapture(stream2, &graph2)); + // Validate end capture is successful + REQUIRE(graph2 != nullptr); + REQUIRE(graph1 != nullptr); + HIP_CHECK(hipGraphInstantiate(&graphExec1, graph1, nullptr, nullptr, 0)); + REQUIRE(graphExec1 != nullptr); + HIP_CHECK(hipGraphInstantiate(&graphExec2, graph2, nullptr, nullptr, 0)); + REQUIRE(graphExec2 != nullptr); + // Replay the recorded sequence multiple times + for (size_t i = 0; i < kLaunchIters; i++) { + // Execute the Graphs + HIP_CHECK(hipGraphLaunch(graphExec1, stream1)); + HIP_CHECK(hipGraphLaunch(graphExec2, stream2)); + HIP_CHECK(hipStreamSynchronize(stream1)); + HIP_CHECK(hipStreamSynchronize(stream2)); + } + // Free + HIP_CHECK(hipGraphExecDestroy(graphExec2)); + HIP_CHECK(hipGraphExecDestroy(graphExec1)); + HIP_CHECK(hipGraphDestroy(graph2)); + HIP_CHECK(hipGraphDestroy(graph1)); +} +/** + * Test Description + * ------------------------ + * - Test to verify colligated streams capture. Capture operation sequences + * queued in 2 streams by overlapping the 2 captures. Initiate capture on + * stream1, record an event on stream1, initiate capture on stream 2, end both + * stream captures + * - Streams are created with hipStreamDefault/hipStreamNonBlocking flag + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Positive_ColligatedStrmCapture_Flags") { // NOLINT + const auto stream_flags1 = GENERATE(hipStreamDefault, hipStreamNonBlocking); + const auto stream_flags2 = GENERATE(hipStreamDefault, hipStreamNonBlocking); + StreamGuard stream_guard1(Streams::withFlags, stream_flags1); + hipStream_t stream1 = stream_guard1.stream(); + StreamGuard stream_guard2(Streams::withFlags, stream_flags2); + hipStream_t stream2 = stream_guard2.stream(); + colligatedStrmCapture_spt(stream1, stream2); +} +/** + * Test Description + * ------------------------ + * - Test to verify colligated streams capture. Capture operation sequences + * queued in 2 streams by overlapping the 2 captures. Initiate capture on + * stream1, record an event on stream1, initiate capture on stream 2, end both + * stream captures + * - Stream1 is created with minimal priority, stream 2 is created with + * maximal priority + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Positive_ColligatedStrmCapture_Prio") { // NOLINT + int minPriority = 0, maxPriority = 0; + HIP_CHECK(hipDeviceGetStreamPriorityRange(&minPriority, &maxPriority)); + StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, minPriority); + hipStream_t stream1 = stream_guard1.stream(); + StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, maxPriority); + hipStream_t stream2 = stream_guard2.stream(); + colligatedStrmCapture_spt(stream1, stream2); +} +static void colligatedStrmCaptureFunc_spt(const hipStream_t& stream1, const hipStream_t& stream2) { + constexpr size_t N = 1000000; + size_t Nbytes = N * sizeof(int); + hipGraph_t graph1{nullptr}, graph2{nullptr}; + hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr}; + // Host and device allocation + LinearAllocGuard A_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard B_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard A_d(LinearAllocs::hipMalloc, Nbytes); + LinearAllocGuard B_d(LinearAllocs::hipMalloc, Nbytes); + LinearAllocGuard C_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard C_d(LinearAllocs::hipMalloc, Nbytes); + LinearAllocGuard D_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard D_d(LinearAllocs::hipMalloc, Nbytes); + // Capture 2 streams + HIP_CHECK(hipStreamBeginCapture_spt(stream1, hipStreamCaptureModeGlobal)); + HIP_CHECK(hipStreamBeginCapture_spt(stream2, hipStreamCaptureModeGlobal)); + captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream1); + captureSequenceLinear(C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), D_d.ptr(), N, stream2); + captureSequenceCompute(A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream1); + captureSequenceCompute(C_d.ptr(), D_h.host_ptr(), D_d.ptr(), N, stream2); + HIP_CHECK(hipStreamEndCapture(stream1, &graph1)); + HIP_CHECK(hipStreamEndCapture(stream2, &graph2)); + // Validate end capture is successful + REQUIRE(graph2 != nullptr); + REQUIRE(graph1 != nullptr); + // Create Executable Graphs + HIP_CHECK(hipGraphInstantiate(&graphExec1, graph1, nullptr, nullptr, 0)); + REQUIRE(graphExec1 != nullptr); + HIP_CHECK(hipGraphInstantiate(&graphExec2, graph2, nullptr, nullptr, 0)); + REQUIRE(graphExec2 != nullptr); + // Execute the Graphs + for (size_t iter = 0; iter < kLaunchIters; iter++) { + std::fill_n(A_h.host_ptr(), N, iter); + std::fill_n(C_h.host_ptr(), N, iter); + HIP_CHECK(hipGraphLaunch(graphExec1, stream1)); + HIP_CHECK(hipGraphLaunch(graphExec2, stream2)); + HIP_CHECK(hipStreamSynchronize(stream1)); + HIP_CHECK(hipStreamSynchronize(stream2)); + ArrayFindIfNot(B_h.host_ptr(), static_cast(iter * iter), N); + ArrayFindIfNot(D_h.host_ptr(), static_cast(iter * iter), N); + } + // Free + HIP_CHECK(hipGraphExecDestroy(graphExec2)); + HIP_CHECK(hipGraphExecDestroy(graphExec1)); + HIP_CHECK(hipGraphDestroy(graph2)); + HIP_CHECK(hipGraphDestroy(graph1)); +} +/** + * Test Description + * ------------------------ + * - Create 2 streams. Start capturing both stream1 and stream2 at the same + * time. On stream1 queue memcpy, kernel and memcpy operations and on stream2 + * queue memcpy, kernel and memcpy operations. Execute both the captured graphs + * and validate the results + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Positive_ColligatedStrmCaptureFunc") { // NOLINT + StreamGuard stream_guard1(Streams::created); + hipStream_t stream1 = stream_guard1.stream(); + StreamGuard stream_guard2(Streams::created); + hipStream_t stream2 = stream_guard2.stream(); + colligatedStrmCaptureFunc_spt(stream1, stream2); +} +static void threadStrmCaptureFunc_spt(hipStream_t stream, int* A_h, int* A_d, int* B_h, int* B_d, + hipGraph_t* graph, size_t N, hipStreamCaptureMode mode) { + // Capture stream + HIP_CHECK(hipStreamBeginCapture_spt(stream, mode)); + captureSequenceLinear(A_h, A_d, B_h, B_d, N, stream); + captureSequenceCompute(A_d, B_h, B_d, N, stream); + HIP_CHECK(hipStreamEndCapture(stream, graph)); +} +static void multithreadedTest_spt(hipStreamCaptureMode mode) { + constexpr size_t N = 1000000; + size_t Nbytes = N * sizeof(int); + hipGraph_t graph1{nullptr}, graph2{nullptr}; + hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr}; + StreamGuard stream_guard1(Streams::created); + hipStream_t stream1 = stream_guard1.stream(); + StreamGuard stream_guard2(Streams::created); + hipStream_t stream2 = stream_guard2.stream(); + // Host and device allocation + LinearAllocGuard A_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard B_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard A_d(LinearAllocs::hipMalloc, Nbytes); + LinearAllocGuard B_d(LinearAllocs::hipMalloc, Nbytes); + LinearAllocGuard C_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard D_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard C_d(LinearAllocs::hipMalloc, Nbytes); + LinearAllocGuard D_d(LinearAllocs::hipMalloc, Nbytes); + // Launch 2 threads to capture the 2 streams into graphs + std::thread t1(threadStrmCaptureFunc_spt, stream1, A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), + B_d.ptr(), &graph1, N, mode); + std::thread t2(threadStrmCaptureFunc_spt, stream2, C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), + D_d.ptr(), &graph2, N, mode); + t1.join(); + t2.join(); + // Create Executable Graphs + HIP_CHECK(hipGraphInstantiate(&graphExec1, graph1, nullptr, nullptr, 0)); + REQUIRE(graphExec1 != nullptr); + HIP_CHECK(hipGraphInstantiate(&graphExec2, graph2, nullptr, nullptr, 0)); + REQUIRE(graphExec2 != nullptr); + // Execute the Graphs + for (size_t iter = 0; iter < kLaunchIters; iter++) { + std::fill_n(A_h.host_ptr(), N, iter); + std::fill_n(C_h.host_ptr(), N, iter); + HIP_CHECK(hipGraphLaunch(graphExec1, stream1)); + HIP_CHECK(hipGraphLaunch(graphExec2, stream2)); + HIP_CHECK(hipStreamSynchronize(stream1)); + HIP_CHECK(hipStreamSynchronize(stream2)); + ArrayFindIfNot(B_h.host_ptr(), static_cast(iter * iter), N); + ArrayFindIfNot(D_h.host_ptr(), static_cast(iter * iter), N); + } + // Free + HIP_CHECK(hipGraphExecDestroy(graphExec2)); + HIP_CHECK(hipGraphExecDestroy(graphExec1)); + HIP_CHECK(hipGraphDestroy(graph2)); + HIP_CHECK(hipGraphDestroy(graph1)); +} +/** + * Test Description + * ------------------------ + * - Capture 2 streams in parallel using threads. Execute the graphs in + * sequence in main thread and validate the results for all available capture + * modes + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Positive_Multithreaded") { + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + multithreadedTest_spt(captureMode); +} +/** + * Test Description + * ------------------------ + * - Test to verify inter stream event synchronization- Waiting on an event + * recorded on a captured stream. + * - Initiate capture on stream1, record an event on stream1, wait for + * the event on stream2, end the stream1 capture and initiate stream capture on + * stream2. Repeat the same sequence between stream2 and stream3 + * - Initiate capture on stream1, record an event on stream1, wait for + * the event on stream2 and stream3, end the stream1 capture and initiate stream + * capture on stream2 and stream3 + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Positive_Multiplestrms") { + StreamsGuard streams(3); + hipGraph_t graphs[3]; + size_t numNodes1 = 0, numNodes2 = 0, numNodes3 = 0; + SECTION("Capture Multiple stream with interdependent events") { + EventsGuard events(2); + HIP_CHECK(hipStreamBeginCapture_spt(streams[0], hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(events[0], streams[0])); + HIP_CHECK(hipStreamWaitEvent(streams[1], events[0], 0)); + dummyKernel<<<1, 1, 0, streams[0]>>>(); + HIP_CHECK(hipStreamEndCapture(streams[0], &graphs[0])); + HIP_CHECK(hipStreamBeginCapture_spt(streams[1], hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(events[1], streams[1])); + HIP_CHECK(hipStreamWaitEvent(streams[2], events[1], 0)); + dummyKernel<<<1, 1, 0, streams[1]>>>(); + HIP_CHECK(hipStreamEndCapture(streams[1], &graphs[1])); + HIP_CHECK(hipStreamBeginCapture_spt(streams[2], hipStreamCaptureModeGlobal)); + dummyKernel<<<1, 1, 0, streams[2]>>>(); + HIP_CHECK(hipStreamEndCapture(streams[2], &graphs[2])); + HIP_CHECK(hipGraphGetNodes(graphs[0], nullptr, &numNodes1)); + HIP_CHECK(hipGraphGetNodes(graphs[1], nullptr, &numNodes2)); + HIP_CHECK(hipGraphGetNodes(graphs[2], nullptr, &numNodes3)); + REQUIRE(numNodes1 == 1); + REQUIRE(numNodes2 == 1); + REQUIRE(numNodes3 == 1); + } + SECTION("Capture Multiple stream with single event for Stream per Thread") { + EventsGuard events(1); + hipEvent_t event = events[0]; + HIP_CHECK(hipEventCreate(&event)); + HIP_CHECK(hipStreamBeginCapture_spt(streams[0], hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(event, streams[0])); + HIP_CHECK(hipStreamWaitEvent(streams[1], event, 0)); + HIP_CHECK(hipStreamWaitEvent(streams[2], event, 0)); + dummyKernel<<<1, 1, 0, streams[0]>>>(); + HIP_CHECK(hipStreamEndCapture(streams[0], &graphs[0])); + HIP_CHECK(hipStreamBeginCapture_spt(streams[1], hipStreamCaptureModeGlobal)); + dummyKernel<<<1, 1, 0, streams[1]>>>(); + HIP_CHECK(hipStreamEndCapture(streams[1], &graphs[1])); + HIP_CHECK(hipStreamBeginCapture_spt(streams[2], hipStreamCaptureModeGlobal)); + dummyKernel<<<1, 1, 0, streams[2]>>>(); + HIP_CHECK(hipStreamEndCapture(streams[2], &graphs[2])); + HIP_CHECK(hipGraphGetNodes(graphs[0], nullptr, &numNodes1)); + HIP_CHECK(hipGraphGetNodes(graphs[1], nullptr, &numNodes2)); + HIP_CHECK(hipGraphGetNodes(graphs[2], nullptr, &numNodes3)); + REQUIRE(numNodes1 == 1); + REQUIRE(numNodes2 == 1); + REQUIRE(numNodes3 == 1); + } + for (int i = 0; i < 3; i++) { + HIP_CHECK(hipGraphDestroy(graphs[i])); + } +} +/** + * Test Description + * ------------------------ + * - Test to verify queue operations (increment kernels) in 3 streams. Start + * capturing the streams after some operations have been queued. This scenario + * validates that only operations queued after hipStreamBeginCapture_spt are + * captured in the graph + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Positive_CapturingFromWithinStrms") { + constexpr int INCREMENT_KERNEL_FINALEXP_VAL = 7; + hipGraph_t graph{nullptr}; + hipGraphExec_t graphExec{nullptr}; + StreamsGuard streams(3); + EventsGuard events(3); + // Create a device memory of size int and initialize it to 0 + LinearAllocGuard hostMem_g(LinearAllocs::malloc, sizeof(int)); + LinearAllocGuard devMem_g(LinearAllocs::hipMalloc, sizeof(int)); + int* hostMem = hostMem_g.host_ptr(); + int* devMem = devMem_g.ptr(); + HIP_CHECK(hipMemset(devMem, 0, sizeof(int))); + HIP_CHECK(hipDeviceSynchronize()); + // Start Capturing + incrementKernel<<<1, 1, 0, streams[0]>>>(devMem); + HIP_CHECK(hipStreamBeginCapture_spt(streams[0], hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(events[0], streams[0])); + incrementKernel<<<1, 1, 0, streams[1]>>>(devMem); + incrementKernel<<<1, 1, 0, streams[1]>>>(devMem); + incrementKernel<<<1, 1, 0, streams[2]>>>(devMem); + HIP_CHECK(hipStreamWaitEvent(streams[1], events[0], 0)); + HIP_CHECK(hipStreamWaitEvent(streams[2], events[0], 0)); + incrementKernel<<<1, 1, 0, streams[0]>>>(devMem); + incrementKernel<<<1, 1, 0, streams[1]>>>(devMem); + incrementKernel<<<1, 1, 0, streams[2]>>>(devMem); + incrementKernel<<<1, 1, 0, streams[0]>>>(devMem); + incrementKernel<<<1, 1, 0, streams[1]>>>(devMem); + incrementKernel<<<1, 1, 0, streams[2]>>>(devMem); + incrementKernel<<<1, 1, 0, streams[2]>>>(devMem); + HIP_CHECK(hipEventRecord(events[1], streams[1])); + HIP_CHECK(hipEventRecord(events[2], streams[2])); + HIP_CHECK(hipStreamWaitEvent(streams[0], events[1], 0)); + HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); + HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, streams[0])); + HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture + // Reset device memory + HIP_CHECK(hipMemset(devMem, 0, sizeof(int))); + HIP_CHECK(hipDeviceSynchronize()); + // Create Executable Graphs + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + REQUIRE(graphExec != nullptr); + HIP_CHECK(hipGraphLaunch(graphExec, streams[0])); + HIP_CHECK(hipStreamSynchronize(streams[0])); + REQUIRE((*hostMem) == INCREMENT_KERNEL_FINALEXP_VAL); + HIP_CHECK(hipGraphExecDestroy(graphExec)) + HIP_CHECK(hipGraphDestroy(graph)); +} +/** + * Test Description + * ------------------------ + * - Detecting invalid capture. Create 2 streams s1 and s2. Start capturing + * s1. Create event dependency between s1 and s2 using event record and event + * wait. Try capturing s2. hipStreamBeginCapture_spt must return error + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Negative_DetectingInvalidCapture") { + StreamsGuard streams(2); + EventsGuard events(1); + hipEvent_t event = events[0]; + hipGraph_t graph; + HIP_CHECK(hipStreamBeginCapture_spt(streams[0], hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(event, streams[0])); + HIP_CHECK(hipStreamWaitEvent(streams[1], event, 0)); + dummyKernel<<<1, 1, 0, streams[0]>>>(); + // Since stream[1] is already in capture mode due to event wait + // hipStreamBeginCapture_spt on stream[1] is expected to return error. + HIP_CHECK_ERROR(hipStreamBeginCapture_spt(streams[1], hipStreamCaptureModeGlobal), + hipErrorIllegalState); + HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); +} +/** + * Test Description + * ------------------------ + * - Test to verify wtream reuse. Capture multiple graphs from the same + * stream. Validate graphs are captured correctly + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Positive_CapturingMultGraphsFrom1Strm") { // NOLINT + hipGraph_t graphs[3]; + StreamGuard stream_guard(Streams::created); + hipStream_t stream1 = stream_guard.stream(); + // Create a device memory of size int and initialize it to 0 + LinearAllocGuard hostMem_g(LinearAllocs::malloc, sizeof(int)); + LinearAllocGuard devMem_g(LinearAllocs::hipMalloc, sizeof(int)); + int* hostMem = hostMem_g.host_ptr(); + int* devMem = devMem_g.ptr(); + HIP_CHECK(hipMemset(devMem, 0, sizeof(int))); + HIP_CHECK(hipDeviceSynchronize()); + for (int i = 0; i < 3; i++) { + HIP_CHECK(hipStreamBeginCapture_spt(stream1, hipStreamCaptureModeGlobal)); + for (int j = 0; j <= i; j++) incrementKernel<<<1, 1, 0, stream1>>>(devMem); + HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, stream1)); + HIP_CHECK(hipStreamEndCapture(stream1, &graphs[i])); + } + // Instantiate and execute all graphs + for (int i = 0; i < 3; i++) { + hipGraphExec_t graphExec{nullptr}; + HIP_CHECK(hipMemset(devMem, 0, sizeof(int))); + HIP_CHECK(hipGraphInstantiate(&graphExec, graphs[i], nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, stream1)); + HIP_CHECK(hipStreamSynchronize(stream1)); + REQUIRE((*hostMem) == (i + 1)); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graphs[i])); + } +} +/** + * Test Description + * ------------------------ + * - Test to verify synchronization during stream capture returns an error: + * - Synchronize stream during capture + * - Synchronize device during capture + * - Synchronize event during capture + * - Query stream during capture + * - Query for an event during capture + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Negative_CheckingSyncDuringCapture") { // NOLINT + StreamGuard stream_guard(Streams::created); + hipStream_t stream = stream_guard.stream(); + EventsGuard events_guard(1); + hipEvent_t e = events_guard[0]; + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + HIP_CHECK(hipStreamBeginCapture_spt(stream, captureMode)); + SECTION("Synchronize stream during capture") { + HIP_CHECK_ERROR(hipStreamSynchronize(stream), hipErrorStreamCaptureUnsupported); + } + SECTION("Query stream during capture") { + HIP_CHECK_ERROR(hipStreamQuery(stream), hipErrorStreamCaptureUnsupported); + } + SECTION("Synchronize device during capture") { + HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorStreamCaptureUnsupported); + } + SECTION("Synchronize event during capture") { + HIP_CHECK(hipEventRecord(e, stream)); + HIP_CHECK_ERROR(hipEventSynchronize(e), hipErrorCapturedEvent); + } + SECTION("Query for an event during capture") { + HIP_CHECK(hipEventRecord(e, stream)); + HIP_CHECK_ERROR(hipEventQuery(e), hipErrorCapturedEvent); + } +} +/** + * Test Description + * ------------------------ + * - Test to verify unsafe API calls during stream capture with initiated + * with hipStreamCaptureModeGlobal and hipStreamCaptureModeThreadLocal return an + * error: + * - hipMalloc during capture + * - hipMemcpy during capture + * - hipMemset during capture + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_UnsafeCallsDuringCapture") { + StreamGuard stream_guard(Streams::created); + hipStream_t stream = stream_guard.stream(); + LinearAllocGuard hostMem(LinearAllocs::malloc, sizeof(int)); + LinearAllocGuard devMem(LinearAllocs::hipMalloc, sizeof(int)); + int* devMem2; + const hipStreamCaptureMode captureMode = + GENERATE(hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal); + HIP_CHECK(hipStreamBeginCapture_spt(stream, captureMode)); + SECTION("hipMalloc during capture") { + HIP_CHECK_ERROR(hipMalloc(&devMem2, sizeof(int)), hipErrorStreamCaptureUnsupported); + } + SECTION("hipMemcpy during capture") { + HIP_CHECK_ERROR(hipMemcpy(devMem.ptr(), hostMem.host_ptr(), sizeof(int), hipMemcpyHostToDevice), + hipErrorStreamCaptureImplicit); + } + SECTION("hipMemset during capture") { + HIP_CHECK_ERROR(hipMemset(devMem.ptr(), 0, sizeof(int)), hipErrorStreamCaptureImplicit); + } +} +/** + * Test Description + * ------------------------ + * - Test to verify end stream capture when the stream capture is still in + * progress: + * -# Abruptly end stream capture when stream capture is in progress in + * forked stream. hipStreamEndCapture must return an error + * -# Abruptly end stream capture when operations in forked stream are + * still waiting to be captured. hipStreamEndCapture must return an error + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Negative_EndingCapwhenCapInProg") { + hipGraph_t graph{nullptr}; + StreamsGuard streams_guard(2); + hipStream_t stream1 = streams_guard[0]; + hipStream_t stream2 = streams_guard[1]; + SECTION("Abruptly end strm capture when in progress in forked strm") { + EventsGuard events_guard(1); + hipEvent_t e = events_guard[0]; + HIP_CHECK(hipEventCreate(&e)); + HIP_CHECK(hipStreamBeginCapture_spt(stream1, hipStreamCaptureModeGlobal)); + dummyKernel<<<1, 1, 0, stream1>>>(); + HIP_CHECK(hipEventRecord(e, stream1)); + HIP_CHECK(hipStreamWaitEvent(stream2, e, 0)); + dummyKernel<<<1, 1, 0, stream2>>>(); + HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), hipErrorStreamCaptureUnjoined); + } + SECTION("End strm capture when forked strm still has operations") { + EventsGuard events_guard(2); + hipEvent_t e1 = events_guard[0]; + hipEvent_t e2 = events_guard[1]; + HIP_CHECK(hipStreamBeginCapture_spt(stream1, hipStreamCaptureModeGlobal)); + dummyKernel<<<1, 1, 0, stream1>>>(); + HIP_CHECK(hipEventRecord(e1, stream1)); + HIP_CHECK(hipStreamWaitEvent(stream2, e1, 0)); + dummyKernel<<<1, 1, 0, stream2>>>(); + HIP_CHECK(hipEventRecord(e2, stream2)); + HIP_CHECK(hipStreamWaitEvent(stream1, e2, 0)); + dummyKernel<<<1, 1, 0, stream2>>>(); + HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), hipErrorStreamCaptureUnjoined); + } +} +/** + * Test Description + * ------------------------ + * - Testing independent stream capture using multiple GPUs. Capture a stream + * in each device context and execute the captured graph in the context GPU + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Positive_MultiGPU") { + int devcount = 0; + HIP_CHECK(hipGetDeviceCount(&devcount)); + // If only single GPU is detected then return + if (devcount < 2) { + SUCCEED("skipping the testcases as numDevices < 2"); + return; + } + hipStream_t* stream = reinterpret_cast(malloc(devcount * sizeof(hipStream_t))); + REQUIRE(stream != nullptr); + hipGraph_t* graph = reinterpret_cast(malloc(devcount * sizeof(hipGraph_t))); + REQUIRE(graph != nullptr); + int **devMem{nullptr}, **hostMem{nullptr}; + hostMem = reinterpret_cast(malloc(sizeof(int*) * devcount)); + REQUIRE(hostMem != nullptr); + devMem = reinterpret_cast(malloc(sizeof(int*) * devcount)); + REQUIRE(devMem != nullptr); + hipGraphExec_t* graphExec = + reinterpret_cast(malloc(devcount * sizeof(hipGraphExec_t))); + // Capture stream in each device + for (int dev = 0; dev < devcount; dev++) { + HIP_CHECK(hipSetDevice(dev)); + HIP_CHECK(hipStreamCreate(&stream[dev])); + hostMem[dev] = reinterpret_cast(malloc(sizeof(int))); + HIP_CHECK(hipMalloc(&devMem[dev], sizeof(int))); + HIP_CHECK(hipStreamBeginCapture_spt(stream[dev], hipStreamCaptureModeGlobal)); + HIP_CHECK(hipMemsetAsync(devMem[dev], 0, sizeof(int), stream[dev])); + for (int i = 0; i < (dev + 1); i++) { + incrementKernel<<<1, 1, 0, stream[dev]>>>(devMem[dev]); + } + HIP_CHECK( + hipMemcpyAsync(hostMem[dev], devMem[dev], sizeof(int), hipMemcpyDefault, stream[dev])); + HIP_CHECK(hipStreamEndCapture(stream[dev], &graph[dev])); + } + // Launch the captured graphs in the respective device + for (int dev = 0; dev < devcount; dev++) { + HIP_CHECK(hipSetDevice(dev)); + HIP_CHECK(hipGraphInstantiate(&graphExec[dev], graph[dev], nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec[dev], stream[dev])); + } + // Validate output + for (int dev = 0; dev < devcount; dev++) { + HIP_CHECK(hipSetDevice(dev)); + HIP_CHECK(hipStreamSynchronize(stream[dev])); + REQUIRE((*hostMem[dev]) == (dev + 1)); + } + // Destroy all device resources + for (int dev = 0; dev < devcount; dev++) { + HIP_CHECK(hipSetDevice(dev)); + HIP_CHECK(hipGraphExecDestroy(graphExec[dev])); + HIP_CHECK(hipStreamDestroy(stream[dev])); + } + free(graphExec); + free(hostMem); + free(devMem); + free(stream); + free(graph); +} +/** + * Test Description + * ------------------------ + * - Test Nested Stream Capture Functionality: Create 3 streams. Capture s1, + * record event e1 on s1, wait for event e1 on s2 and queue operations in s1. + * Record event e2 on s2 and wait for it on s3. Queue operations on both s2 and + * s3. Record event e4 on s3 and wait for it in s1. Record event e3 on s2 and + * wait for it in s1. End stream capture on s1. Execute the graph and verify the + * result. + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Positive_nestedStreamCapture") { + constexpr int INCREMENT_KERNEL_FINALEXP_VAL = 7; + hipGraph_t graph{nullptr}; + StreamsGuard streams(3); + EventsGuard events(4); + // Create a device memory of size int and initialize it to 0 + LinearAllocGuard hostMem_g(LinearAllocs::malloc, sizeof(int)); + LinearAllocGuard devMem_g(LinearAllocs::hipMalloc, sizeof(int)); + HIP_CHECK(hipMemset(devMem_g.ptr(), 0, sizeof(int))); + HIP_CHECK(hipDeviceSynchronize()); + // Start Capturing stream1 + HIP_CHECK(hipStreamBeginCapture_spt(streams[0], hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(events[0], streams[0])); + HIP_CHECK(hipStreamWaitEvent(streams[1], events[0], 0)); + HIP_CHECK(hipEventRecord(events[1], streams[1])); + HIP_CHECK(hipStreamWaitEvent(streams[2], events[1], 0)); + incrementKernel<<<1, 1, 0, streams[0]>>>(devMem_g.ptr()); + incrementKernel<<<1, 1, 0, streams[1]>>>(devMem_g.ptr()); + incrementKernel<<<1, 1, 0, streams[2]>>>(devMem_g.ptr()); + incrementKernel<<<1, 1, 0, streams[0]>>>(devMem_g.ptr()); + incrementKernel<<<1, 1, 0, streams[1]>>>(devMem_g.ptr()); + incrementKernel<<<1, 1, 0, streams[2]>>>(devMem_g.ptr()); + incrementKernel<<<1, 1, 0, streams[2]>>>(devMem_g.ptr()); + HIP_CHECK(hipEventRecord(events[2], streams[1])); + HIP_CHECK(hipEventRecord(events[3], streams[2])); + HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0)); + HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); + HIP_CHECK(hipMemcpyAsync(hostMem_g.host_ptr(), devMem_g.ptr(), sizeof(int), hipMemcpyDefault, + streams[0])); + HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture + // Reset device memory + HIP_CHECK(hipMemset(devMem_g.ptr(), 0, sizeof(int))); + HIP_CHECK(hipDeviceSynchronize()); + // Create Executable Graphs + hipGraphExec_t graphExec{nullptr}; + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streams[0])); + HIP_CHECK(hipStreamSynchronize(streams[0])); + REQUIRE((*hostMem_g.host_ptr()) == INCREMENT_KERNEL_FINALEXP_VAL); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); +} +/** + * Test Description + * ------------------------ + * - Test Nested Stream Capture Functionality: Create 3 streams. Capture s1, + * record event e1 on s1, wait for event e1 on s2 and queue operations in s1. + * Record event e2 on s2 and wait for it on s3. Queue operations on both s2 and + * s3. Record event e4 on s3 and wait for it in s1. Record event e3 on s2 and + * wait for it in s1. End stream capture on s1. Queue operations on both s2 and + * s3, and capture their graphs. Execute the graphs and verify the result. + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Positive_streamReuse") { + constexpr int increment_kernel_vals[3] = {7, 3, 5}; + hipGraph_t graphs[3]; + StreamsGuard streams(3); + EventsGuard events(4); + LinearAllocGuard hostMem_g1 = LinearAllocGuard(LinearAllocs::malloc, sizeof(int)); + LinearAllocGuard hostMem_g2 = LinearAllocGuard(LinearAllocs::malloc, sizeof(int)); + LinearAllocGuard hostMem_g3 = LinearAllocGuard(LinearAllocs::malloc, sizeof(int)); + LinearAllocGuard devMem_g1 = LinearAllocGuard(LinearAllocs::hipMalloc, sizeof(int)); + LinearAllocGuard devMem_g2 = LinearAllocGuard(LinearAllocs::hipMalloc, sizeof(int)); + LinearAllocGuard devMem_g3 = LinearAllocGuard(LinearAllocs::hipMalloc, sizeof(int)); + std::vector hostMem = {hostMem_g1.host_ptr(), hostMem_g2.host_ptr(), hostMem_g3.host_ptr()}; + std::vector devMem = {devMem_g1.ptr(), devMem_g2.ptr(), devMem_g3.ptr()}; + // Create a device memory of size int and initialize it to 0 + for (int i = 0; i < 3; i++) { + memset(hostMem[i], 0, sizeof(int)); + HIP_CHECK(hipMemset(devMem[i], 0, sizeof(int))); + } + HIP_CHECK(hipDeviceSynchronize()); + // Start Capturing stream1 + HIP_CHECK(hipStreamBeginCapture_spt(streams[0], hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(events[0], streams[0])); + HIP_CHECK(hipStreamWaitEvent(streams[1], events[0], 0)); + HIP_CHECK(hipEventRecord(events[1], streams[1])); + HIP_CHECK(hipStreamWaitEvent(streams[2], events[1], 0)); + incrementKernel<<<1, 1, 0, streams[0]>>>(devMem[0]); + incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[0]); + incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[0]); + incrementKernel<<<1, 1, 0, streams[0]>>>(devMem[0]); + incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[0]); + incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[0]); + incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[0]); + HIP_CHECK(hipEventRecord(events[2], streams[1])); + HIP_CHECK(hipEventRecord(events[3], streams[2])); + HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0)); + HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); + HIP_CHECK(hipMemcpyAsync(hostMem[0], devMem[0], sizeof(int), hipMemcpyDefault, streams[0])); + HIP_CHECK(hipStreamEndCapture(streams[0], &graphs[0])); // End Capture + // Start capturing graph2 from stream 2 + HIP_CHECK(hipStreamBeginCapture_spt(streams[1], hipStreamCaptureModeGlobal)); + incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]); + incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]); + incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]); + HIP_CHECK(hipMemcpyAsync(hostMem[1], devMem[1], sizeof(int), hipMemcpyDefault, streams[1])); + HIP_CHECK(hipStreamEndCapture(streams[1], &graphs[1])); // End Capture + // Start capturing graph3 from stream 3 + HIP_CHECK(hipStreamBeginCapture_spt(streams[2], hipStreamCaptureModeGlobal)); + incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); + incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); + incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); + incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); + incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); + HIP_CHECK(hipMemcpyAsync(hostMem[2], devMem[2], sizeof(int), hipMemcpyDefault, streams[2])); + HIP_CHECK(hipStreamEndCapture(streams[2], &graphs[2])); // End Capture + // Reset device memory + HIP_CHECK(hipMemset(devMem[0], 0, sizeof(int))); + HIP_CHECK(hipMemset(devMem[1], 0, sizeof(int))); + HIP_CHECK(hipMemset(devMem[2], 0, sizeof(int))); + HIP_CHECK(hipDeviceSynchronize()); + // Create Executable Graphs and verify graphs + for (int i = 0; i < 3; i++) { + hipGraphExec_t graphExec{nullptr}; + HIP_CHECK(hipMemset(devMem[i], 0, sizeof(int))); + HIP_CHECK(hipGraphInstantiate(&graphExec, graphs[i], nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streams[i])); + HIP_CHECK(hipStreamSynchronize(streams[i])); + REQUIRE((*hostMem[i]) == increment_kernel_vals[i]); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graphs[i])); + } +} +/** + * Test Description + * ------------------------ + * - Capture a complex graph containing multiple independent memcpy, kernel + * and host nodes. Launch the graph on random input data and validate the output + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Positive_captureComplexGraph") { + constexpr int GRIDSIZE = 256; + constexpr int BLOCKSIZE = 256; + constexpr int CONST_KER1_VAL = 3; + constexpr int CONST_KER2_VAL = 2; + constexpr int CONST_KER3_VAL = 5; + hipGraph_t graph{nullptr}; + StreamsGuard streams(5); + EventsGuard events(7); + // Allocate Device memory and Host memory + size_t N = GRIDSIZE * BLOCKSIZE; + LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, N * sizeof(int)); + LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, N * sizeof(int)); + LinearAllocGuard Ch = LinearAllocGuard(LinearAllocs::malloc, N * sizeof(int)); + LinearAllocGuard Ad = LinearAllocGuard(LinearAllocs::hipMalloc, N * sizeof(int)); + LinearAllocGuard Bd = LinearAllocGuard(LinearAllocs::hipMalloc, N * sizeof(int)); + // Capture streams into graph + HIP_CHECK(hipStreamBeginCapture_spt(streams[0], hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(events[0], streams[0])); + HIP_CHECK(hipStreamWaitEvent(streams[3], events[0], 0)); + HIP_CHECK(hipStreamWaitEvent(streams[4], events[0], 0)); + HIP_CHECK( + hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[0])); + HIP_CHECK( + hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[4])); + hipHostFn_t fn = hostNodeCallback; + HIPCHECK(hipLaunchHostFunc(streams[3], fn, nullptr)); + HIP_CHECK(hipEventRecord(events[1], streams[0])); + HIP_CHECK(hipStreamWaitEvent(streams[1], events[1], 0)); + int* Ad_2nd_half = Ad.ptr() + N / 2; + int* Ad_1st_half = Ad.ptr(); + mymul<<>>(Ad_2nd_half, CONST_KER2_VAL); + mymul<<>>(Ad_1st_half, CONST_KER1_VAL); + HIP_CHECK(hipEventRecord(events[2], streams[1])); + HIP_CHECK(hipStreamWaitEvent(streams[2], events[2], 0)); + mymul<<>>(Ad_1st_half, CONST_KER3_VAL); + HIPCHECK(hipLaunchHostFunc(streams[2], fn, nullptr)); + HIP_CHECK(hipEventRecord(events[6], streams[1])); + HIP_CHECK(hipStreamWaitEvent(streams[0], events[6], 0)); + HIP_CHECK(hipEventRecord(events[5], streams[4])); + HIP_CHECK(hipStreamWaitEvent(streams[0], events[5], 0)); + myadd<<>>(Ad.ptr(), Bd.ptr()); + HIP_CHECK(hipEventRecord(events[3], streams[2])); + HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0)); + HIP_CHECK(hipEventRecord(events[4], streams[3])); + HIP_CHECK(hipStreamWaitEvent(streams[0], events[4], 0)); + HIP_CHECK( + hipMemcpyAsync(Ch.host_ptr(), Ad.ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[0])); + HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture + // Execute and test the graph + hipGraphExec_t graphExec{nullptr}; + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + // Verify graph + for (size_t iter = 0; iter < kLaunchIters; iter++) { + std::fill_n(Ah.host_ptr(), N, iter); + std::fill_n(Bh.host_ptr(), N, iter); + HIP_CHECK(hipGraphLaunch(graphExec, streams[0])); + HIP_CHECK(hipStreamSynchronize(streams[0])); + for (size_t i = 0; i < N; i++) { + if (i > (N / 2 - 1)) { + REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] + Ah.host_ptr()[i] * CONST_KER2_VAL)); + } else { + REQUIRE(Ch.host_ptr()[i] == + (Bh.host_ptr()[i] + Ah.host_ptr()[i] * CONST_KER1_VAL * CONST_KER3_VAL)); + } + } + } + REQUIRE(gCbackIter == (2 * kLaunchIters)); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); +} +/** + * Test Description + * ------------------------ + * - Test to verify capturing empty streams (parent + forked streams) and + * validate the captured graph has no nodes + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_Positive_captureEmptyStreams") { + hipGraph_t graph{nullptr}; + // Stream and event create + StreamsGuard streams(3); + EventsGuard events(3); + // Capture streams into graph + HIP_CHECK(hipStreamBeginCapture_spt(streams[0], hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(events[0], streams[0])); + HIP_CHECK(hipStreamWaitEvent(streams[1], events[0], 0)); + HIP_CHECK(hipStreamWaitEvent(streams[2], events[0], 0)); + HIP_CHECK(hipEventRecord(events[1], streams[1])); + HIP_CHECK(hipStreamWaitEvent(streams[0], events[1], 0)); + HIP_CHECK(hipEventRecord(events[2], streams[2])); + HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); + HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture + size_t numNodes = 0; + HIP_CHECK(hipGraphGetNodes(graph, nullptr, &numNodes)); + REQUIRE(numNodes == 0); + HIP_CHECK(hipGraphDestroy(graph)); +} +/** + * Test Description + * ------------------------ + * - Test to verify hipStreamSynchronize on a stream works when + * stream capture on another stream is ongoing. + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_StreamSync_OngoingCapture") { + hipStreamCaptureMode flag = hipStreamCaptureModeRelaxed; + constexpr int GRIDSIZE = 1; + constexpr int BLOCKSIZE = 512; + constexpr int VALUE1 = 7, VALUE2 = 11; + hipGraph_t graph{nullptr}; + hipGraphExec_t graphExec{nullptr}; + // Allocate device memory + LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Ad = + LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Bd = + LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); + // Fill input data + std::fill_n(Ah.host_ptr(), BLOCKSIZE, VALUE1); + std::fill_n(Bh.host_ptr(), BLOCKSIZE, VALUE2); + // Stream create + StreamsGuard stream0(1); + // Capture streams into graph + SECTION("Stream Creation Before Capture") { + StreamsGuard stream1(1); + HIP_CHECK(hipStreamBeginCapture_spt(stream0[0], flag)); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipStreamSynchronize(stream1[0])); + myadd<<>>(Ad.ptr(), Bd.ptr()); + HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture + } + SECTION("Synchronizing multiple streams during Capture") { + StreamsGuard stream1(1), stream2(1); + HIP_CHECK(hipStreamBeginCapture_spt(stream0[0], flag)); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream2[0])); + HIP_CHECK(hipStreamSynchronize(stream1[0])); + HIP_CHECK(hipStreamSynchronize(stream2[0])); + myadd<<>>(Ad.ptr(), Bd.ptr()); + HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture + } + SECTION("Stream Creation After Capture") { + HIP_CHECK(hipStreamBeginCapture_spt(stream0[0], flag)); + StreamsGuard stream1(1); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipStreamSynchronize(stream1[0])); + myadd<<>>(Ad.ptr(), Bd.ptr()); + HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture + } + SECTION("Stream Synchronize Before Capture") { + StreamsGuard stream1(1); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipStreamSynchronize(stream1[0])); + HIP_CHECK(hipStreamBeginCapture_spt(stream0[0], flag)); + myadd<<>>(Ad.ptr(), Bd.ptr()); + HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture + } + SECTION("Stream Synchronize After Capture") { + HIP_CHECK(hipStreamBeginCapture_spt(stream0[0], flag)); + myadd<<>>(Ad.ptr(), Bd.ptr()); + HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture + StreamsGuard stream1(1); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipStreamSynchronize(stream1[0])); + } + // Execute and test the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, stream0[0])); + HIP_CHECK(hipStreamSynchronize(stream0[0])); + // Check output + HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDeviceToHost)); + for (int idx = 0; idx < BLOCKSIZE; idx++) { + REQUIRE(Ah.host_ptr()[idx] == (VALUE1 + VALUE2)); + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); +} +static void strmSyncThread(int* Ah, int* Ad, int* Bh, int* Bd, int BLOCKSIZE, hipError_t* error) { + StreamsGuard stream(1); + HIP_CHECK(hipMemcpyAsync(Ad, Ah, BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[0])); + HIP_CHECK(hipMemcpyAsync(Bd, Bh, BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[0])); + *error = hipStreamSynchronize(stream[0]); +} +// Local function executed as thread +static void captureStrmThread(hipGraph_t* graph, int* Ah, int* Ad, int* Bh, int* Bd, int BLOCKSIZE, + int GRIDSIZE, hipStreamCaptureMode flag, hipError_t* error) { + StreamsGuard stream(1); + // Capture streams into graph + HIP_CHECK(hipStreamBeginCapture_spt(stream[0], flag)); + std::thread t1(strmSyncThread, Ah, Ad, Bh, Bd, BLOCKSIZE, error); + t1.join(); + myadd<<>>(Ad, Bd); + if (flag == hipStreamCaptureModeGlobal) { + HIP_CHECK_ERROR(hipStreamEndCapture(stream[0], graph), + hipErrorStreamCaptureInvalidated); // End Capture + } else { + HIP_CHECK(hipStreamEndCapture(stream[0], graph)); // End Capture + } +} +/** + * Test Description + * ------------------------ + * - Test to verify hipStreamSynchronize on a stream behavior when + * stream capture on another stream is ongoing in another thread. + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamBeginCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamBeginCapture_spt_StreamSync_OngoingCapture_MThread") { // NOLINT + constexpr int GRIDSIZE = 1; + constexpr int BLOCKSIZE = 512; + constexpr int VALUE1 = 7, VALUE2 = 11; + hipGraph_t graph{nullptr}; + // Allocate device memory + LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Ad = + LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Bd = + LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); + // Fill input data + std::fill_n(Ah.host_ptr(), BLOCKSIZE, VALUE1); + std::fill_n(Bh.host_ptr(), BLOCKSIZE, VALUE2); + // Stream create + hipError_t error = hipSuccess; + SECTION("Capture Flag = hipStreamCaptureModeGlobal Single Threaded") { + StreamsGuard stream(2); + // Capture streams into graph + HIP_CHECK(hipStreamBeginCapture_spt(stream[0], hipStreamCaptureModeGlobal)); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream[1])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream[1])); + error = hipStreamSynchronize(stream[1]); + REQUIRE(error == hipErrorStreamCaptureUnsupported); + } + SECTION("Capture Flag = hipStreamCaptureModeThreadLocal Single Threaded") { + StreamsGuard stream(2); + // Capture streams into graph + HIP_CHECK(hipStreamBeginCapture_spt(stream[0], hipStreamCaptureModeThreadLocal)); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream[1])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream[1])); + error = hipStreamSynchronize(stream[1]); + REQUIRE(error == hipErrorStreamCaptureUnsupported); + } + SECTION("Capture Flag = hipStreamCaptureModeGlobal Multithreaded") { + captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, + hipStreamCaptureModeGlobal, &error); + REQUIRE(error == hipErrorStreamCaptureUnsupported); + } + SECTION("Capture Flag = hipStreamCaptureModeThreadLocal Multithreaded") { + captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, + hipStreamCaptureModeThreadLocal, &error); + REQUIRE(error == hipSuccess); + } + SECTION("Capture Flag = hipStreamCaptureModeRelaxed Multithreaded") { + captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, + hipStreamCaptureModeRelaxed, &error); + REQUIRE(error == hipSuccess); + } + if (graph != nullptr) { + hipGraphExec_t graphExec{nullptr}; + StreamsGuard stream(1); + // Execute and test the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, stream[0])); + HIP_CHECK(hipStreamSynchronize(stream[0])); + // Check output + HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDeviceToHost)); + for (int idx = 0; idx < BLOCKSIZE; idx++) { + REQUIRE(Ah.host_ptr()[idx] == (VALUE1 + VALUE2)); + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + } +} +/** + * End doxygen group GraphTest. + * @} + */ diff --git a/projects/hip-tests/catch/unit/graph/hipStreamEndCapture_spt.cc b/projects/hip-tests/catch/unit/graph/hipStreamEndCapture_spt.cc new file mode 100644 index 00000000000..f58cd2cc75e --- /dev/null +++ b/projects/hip-tests/catch/unit/graph/hipStreamEndCapture_spt.cc @@ -0,0 +1,170 @@ +/*Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +#include "stream_capture_common.hh" +/** + * @addtogroup hipStreamEndCapture_spt hipStreamEndCapture_spt + * @{ + * @ingroup GraphTest + * `hipError_t hipStreamEndCapture_spt(hipStream_t stream, hipGraph_t* pGraph)` - + * Ends capture on a stream, returning the captured graph. + */ +/** + * Test Description + * ------------------------ + * - Test to verify API behavior with invalid arguments: + * -# End capture on legacy/null stream + * -# End capture when graph is nullptr + * -# End capture on stream where capture has not yet started + * -# Destroy stream and try to end capture + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamEndCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamEndCapture_spt_Negative_Parameters") { + hipGraph_t graph{nullptr}; + const auto stream_type = GENERATE(Streams::perThread, Streams::created); + StreamGuard stream_guard(stream_type); + hipStream_t stream = stream_guard.stream(); + SECTION("Pass stream as nullptr") { + HIP_CHECK_ERROR(hipStreamEndCapture_spt(nullptr, &graph), hipErrorIllegalState); + } + SECTION("Pass graph as nullptr") { + HIP_CHECK_ERROR(hipStreamEndCapture_spt(stream, nullptr), hipErrorIllegalState); + } + SECTION("End capture on stream where capture has not yet started") { + HIP_CHECK_ERROR(hipStreamEndCapture_spt(stream, &graph), hipErrorIllegalState); + } +} +/** + * Test Description + * ------------------------ + * - Test to verify no error occurs when graph is destroyed before capture + * - ends + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamEndCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamEndCapture_spt_Positive_GraphDestroy") { + hipGraph_t graph{nullptr}; + constexpr size_t N = 1000000; + size_t Nbytes = N * sizeof(float); + LinearAllocGuard A_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard B_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard A_d(LinearAllocs::hipMalloc, Nbytes); + StreamGuard stream_guard(Streams::created); + hipStream_t stream = stream_guard.stream(); + const hipStreamCaptureMode captureMode = hipStreamCaptureModeGlobal; + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); + captureSequenceSimple(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), N, stream); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamEndCapture_spt(stream, &graph)); +} +static void thread_func_neg(hipStream_t stream, hipGraph_t graph) { + HIP_ASSERT(hipErrorStreamCaptureWrongThread == hipStreamEndCapture_spt(stream, &graph)); +} +/** + * Test Description + * ------------------------ + * - Test to verify that when capture is initiated on a thread with mode + * other than hipStreamCaptureModeRelaxed and try to end capture from different + * thread, it is expected to return hipErrorStreamCaptureWrongThread + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamEndCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamEndCapture_spt_Negative_Thread") { + constexpr size_t N = 1000000; + size_t Nbytes = N * sizeof(float); + LinearAllocGuard A_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard B_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard A_d(LinearAllocs::hipMalloc, Nbytes); + hipGraph_t graph{nullptr}; + StreamGuard stream_guard(Streams::created); + hipStream_t stream = stream_guard.stream(); + const hipStreamCaptureMode captureMode = hipStreamCaptureModeGlobal; + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); + captureSequenceSimple(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), N, stream); + std::thread t(thread_func_neg, stream, graph); + t.join(); +#if HT_AMD + HIP_CHECK(hipStreamEndCapture_spt(stream, &graph)); +#endif + HIP_CHECK(hipGraphDestroy(graph)); +} +static void thread_func_pos(hipStream_t stream, hipGraph_t* graph) { + HIP_CHECK(hipStreamEndCapture_spt(stream, graph)); +} +/** + * Test Description + * ------------------------ + * - Test to verify that when capture is initiated on a thread with + * hipStreamCaptureModeRelaxed mode, end capture in a different thread is + * successful + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamEndCapture_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamEndCapture_spt_Positive_Thread") { + constexpr size_t N = 1000000; + size_t Nbytes = N * sizeof(float); + LinearAllocGuard A_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard B_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard A_d(LinearAllocs::hipMalloc, Nbytes); + hipGraph_t graph{nullptr}; + hipGraphExec_t graphExec{nullptr}; + StreamGuard stream_guard(Streams::created); + hipStream_t stream = stream_guard.stream(); + const hipStreamCaptureMode captureMode = hipStreamCaptureModeRelaxed; + HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); + captureSequenceSimple(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), N, stream); + std::thread t(thread_func_pos, stream, &graph); + t.join(); + // Validate end capture is successful + REQUIRE(graph != nullptr); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + // Replay the recorded sequence multiple times + for (size_t i = 0; i < kLaunchIters; i++) { + std::fill_n(A_h.host_ptr(), N, static_cast(i)); + HIP_CHECK(hipGraphLaunch(graphExec, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + ArrayFindIfNot(B_h.host_ptr(), static_cast(i), N); + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); +} +/** + * End doxygen group GraphTest. + * @} + */ diff --git a/projects/hip-tests/catch/unit/graph/hipStreamGetCaptureInfo_spt.cc b/projects/hip-tests/catch/unit/graph/hipStreamGetCaptureInfo_spt.cc new file mode 100644 index 00000000000..37b72628c2f --- /dev/null +++ b/projects/hip-tests/catch/unit/graph/hipStreamGetCaptureInfo_spt.cc @@ -0,0 +1,156 @@ +/*Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +#include "stream_capture_common.hh" +/** + * @addtogroup hipStreamGetCaptureInfo_spt hipStreamGetCaptureInfo_spt + * @{ + * @ingroup GraphTest + * `hipStreamGetCaptureInfo_spt(hipStream_t stream, hipStreamCaptureStatus + * *pCaptureStatus, unsigned long long *pId)` - + * Get capture status of a stream + */ +void checkStreamCaptureInfo_spt(hipStreamCaptureMode mode, hipStream_t stream) { + constexpr size_t N = 1000000; + size_t Nbytes = N * sizeof(float); + hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone}; + unsigned long long capSequenceID = 0; // NOLINT + hipGraph_t graph{nullptr}; + hipGraphExec_t graphExec{nullptr}; + LinearAllocGuard A_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard B_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard A_d(LinearAllocs::hipMalloc, Nbytes); + HIP_CHECK(hipStreamBeginCapture(stream, mode)); + captureSequenceSimple(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), N, stream); + // Capture status is active and sequence id is valid + HIP_CHECK(hipStreamGetCaptureInfo_spt(stream, &captureStatus, &capSequenceID)); + REQUIRE(captureStatus == hipStreamCaptureStatusActive); + REQUIRE(capSequenceID > 0); + // End capture and verify graph is returned + HIP_CHECK(hipStreamEndCapture(stream, &graph)); + REQUIRE(graph != nullptr); + // verify capture status is inactive and sequence id is not updated + capSequenceID = 0; + HIP_CHECK(hipStreamGetCaptureInfo_spt(stream, &captureStatus, &capSequenceID)); + REQUIRE(captureStatus == hipStreamCaptureStatusNone); + REQUIRE(capSequenceID == 0); + // Verify api still returns capture status when capture ID is nullptr + HIP_CHECK(hipStreamGetCaptureInfo_spt(stream, &captureStatus, nullptr)); + REQUIRE(captureStatus == hipStreamCaptureStatusNone); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + REQUIRE(graphExec != nullptr); + // Replay the recorded sequence multiple times + for (size_t i = 0; i < kLaunchIters; i++) { + std::fill_n(A_h.host_ptr(), N, static_cast(i)); + HIP_CHECK(hipGraphLaunch(graphExec, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + ArrayFindIfNot(B_h.host_ptr(), static_cast(i), N); + } + HIP_CHECK(hipGraphExecDestroy(graphExec)) + HIP_CHECK(hipGraphDestroy(graph)); +} +/** + * Test Description + * ------------------------ + * - Test to verify that hipStreamCaptureStatusActive is returned during + * stream capture. When capture is ended, status is changed to + * hipStreamCaptureStatusNone and error is not reported when some arguments + * are not passed + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamGetCaptureInfo_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamGetCaptureInfo_spt_Positive_Functional") { + const auto stream_type = GENERATE(Streams::perThread, Streams::created); + StreamGuard stream_guard(stream_type); + hipStream_t stream = stream_guard.stream(); + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + checkStreamCaptureInfo_spt(captureMode, stream); +} +/** + * Test Description + * ------------------------ + * - Test starts stream capture on multiple streams and verifies + * uniqueness of identifiers returned. + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamGetCaptureInfo_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamGetCaptureInfo_spt_Positive_UniqueID") { + constexpr int numStreams = 100; + hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone}; + std::vector idlist; + unsigned long long capSequenceID{}; // NOLINT + hipGraph_t graph{nullptr}; + StreamsGuard streams(numStreams); + for (int i = 0; i < numStreams; i++) { + HIP_CHECK(hipStreamBeginCapture(streams[i], hipStreamCaptureModeGlobal)); + HIP_CHECK(hipStreamGetCaptureInfo_spt(streams[i], &captureStatus, &capSequenceID)); + REQUIRE(captureStatus == hipStreamCaptureStatusActive); + REQUIRE(capSequenceID > 0); + idlist.push_back(capSequenceID); + } + for (int i = 0; i < numStreams; i++) { + for (int j = i + 1; j < numStreams; j++) { + if (idlist[i] == idlist[j]) { + REQUIRE(false); + } + } + } + for (int i = 0; i < numStreams; i++) { + HIP_CHECK(hipStreamEndCapture(streams[i], &graph)); + HIP_CHECK(hipGraphDestroy(graph)); + } +} +/** + * Test Description + * ------------------------ + * - Test to verify API behavior with invalid arguments: + * - Capture status is nullptr + * - Capture status checked on legacy/null stream + * - Stream is uninitialized + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamGetCaptureInfo_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamGetCaptureInfo_spt_Negative_Parameters") { + unsigned long long capSequenceID; // NOLINT + const auto stream_type = GENERATE(Streams::perThread, Streams::created); + StreamGuard stream_guard(stream_type); + hipStream_t stream = stream_guard.stream(); + SECTION("Capture Status location as nullptr") { + HIP_CHECK_ERROR(hipStreamGetCaptureInfo_spt(stream, nullptr, &capSequenceID), + hipErrorInvalidValue); + } +} +/** + * End doxygen group GraphTest. + * @} + */ diff --git a/projects/hip-tests/catch/unit/graph/hipStreamGetCaptureInfo_v2_spt.cc b/projects/hip-tests/catch/unit/graph/hipStreamGetCaptureInfo_v2_spt.cc new file mode 100644 index 00000000000..718cd56126c --- /dev/null +++ b/projects/hip-tests/catch/unit/graph/hipStreamGetCaptureInfo_v2_spt.cc @@ -0,0 +1,224 @@ +/*Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +#include "stream_capture_common.hh" + +/** + * @addtogroup hipStreamGetCaptureInfo_v2_spt hipStreamGetCaptureInfo_v2_spt + * @{ + * @ingroup GraphTest + * `hipStreamGetCaptureInfo_v2_spt(hipStream_t stream, hipStreamCaptureStatus + * *captureStatus_out, unsigned long long *id_out __dparm(0), hipGraph_t + * *graph_out __dparm(0), const hipGraphNode_t **dependencies_out __dparm(0), + * size_t *numDependencies_out __dparm(0)))` - + * Get stream's capture state + */ + +void checkStreamCaptureInfo_v2_spt(hipStreamCaptureMode mode, hipStream_t stream) { + constexpr size_t N = 1000000; + size_t Nbytes = N * sizeof(float); + hipGraph_t graph{nullptr}, capInfoGraph{nullptr}; + hipGraphExec_t graphExec{nullptr}; + const hipGraphNode_t* nodelist{}; + size_t numDepsCreated = 0; + hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone}; + hipGraphNodeType type(hipGraphNodeTypeEmpty); + unsigned long long capSequenceID = 0; // NOLINT + size_t numDependencies; + LinearAllocGuard A_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard B_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard A_d(LinearAllocs::hipMalloc, Nbytes); + LinearAllocGuard B_d(LinearAllocs::hipMalloc, Nbytes); + EventsGuard events_guard(3); + StreamsGuard streams_guard(2); + SECTION("Linear sequence graph") { + HIP_CHECK(hipStreamBeginCapture(stream, mode)); + captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream); + HIP_CHECK(hipStreamGetCaptureInfo_v2_spt(stream, &captureStatus, &capSequenceID, &capInfoGraph, + &nodelist, &numDependencies)); + numDepsCreated = 1; + HIP_CHECK(hipGraphNodeGetType(nodelist[0], &type)); + if ((type != hipGraphNodeTypeMemset) && (type != hipGraphNodeTypeMemcpy)) { + INFO("Type0 returned as " << type); + REQUIRE(false); + } + } + SECTION("Branched sequence graph") { + HIP_CHECK(hipStreamBeginCapture(stream, mode)); + captureSequenceBranched(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream, + streams_guard.stream_list(), events_guard.event_list()); + HIP_CHECK(hipStreamGetCaptureInfo_v2_spt(stream, &captureStatus, &capSequenceID, &capInfoGraph, + &nodelist, &numDependencies)); + numDepsCreated = 2; + HIP_CHECK(hipGraphNodeGetType(nodelist[0], &type)); + if ((type != hipGraphNodeTypeMemset) && (type != hipGraphNodeTypeMemcpy)) { + INFO("Type0 returned as " << type); + REQUIRE(false); + } + HIP_CHECK(hipGraphNodeGetType(nodelist[1], &type)); + if ((type != hipGraphNodeTypeMemset) && (type != hipGraphNodeTypeMemcpy)) { + INFO("Type1 returned as " << type); + REQUIRE(false); + } + } + // verify capture status is active, sequence id is valid, graph is returned, + REQUIRE(captureStatus == hipStreamCaptureStatusActive); + REQUIRE(capSequenceID > 0); + REQUIRE(capInfoGraph != nullptr); + REQUIRE(numDependencies == numDepsCreated); + captureSequenceCompute(A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream); + // End capture and verify graph is returned + HIP_CHECK(hipStreamEndCapture(stream, &graph)); + REQUIRE(graph != nullptr); + // verify capture status is inactive and other params are not updated + capSequenceID = 0; + capInfoGraph = nullptr; + numDependencies = 0; + nodelist = nullptr; + HIP_CHECK(hipStreamGetCaptureInfo_v2_spt(stream, &captureStatus, &capSequenceID, &capInfoGraph, + &nodelist, &numDependencies)); + REQUIRE(captureStatus == hipStreamCaptureStatusNone); + REQUIRE(capSequenceID == 0); + REQUIRE(capInfoGraph == nullptr); + REQUIRE(nodelist == nullptr); + REQUIRE(numDependencies == 0); + + // Verify api still returns capture status when optional args are not passed + HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus)); + REQUIRE(captureStatus == hipStreamCaptureStatusNone); + + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + REQUIRE(graphExec != nullptr); + + // Replay the recorded sequence multiple times + for (size_t i = 0; i < kLaunchIters; i++) { + std::fill_n(A_h.host_ptr(), N, static_cast(i)); + HIP_CHECK(hipGraphLaunch(graphExec, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + ArrayFindIfNot(B_h.host_ptr(), static_cast(i) * static_cast(i), N); + } + HIP_CHECK(hipGraphExecDestroy(graphExec)) + HIP_CHECK(hipGraphDestroy(graph)); +} +/** + * Test Description + * ------------------------ + * - Test to verify that hipStreamCaptureStatusActive is returned during + * stream capture, correct number of created dependencies is returned and + * sequence ID is valid. When capture is ended, status is changed to + * hipStreamCaptureStatusNone and error is not reported when some arguments are + * not passed. + * - Sequence graph is linear, number of created dependencies is 1, node + * type is correct + * - Sequence graph is branched, number of created dependencies is 2, + * node types are correct + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamGetCaptureInfo_v2_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamGetCaptureInfo_v2_spt_Positive_Functional") { + const auto stream_type = GENERATE(Streams::perThread, Streams::created); + StreamGuard stream_guard(stream_type); + hipStream_t stream = stream_guard.stream(); + + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + checkStreamCaptureInfo_v2_spt(captureMode, stream); +} +/** + * Test Description + * ------------------------ + * - Test to verify stream capture on multiple streams and verifies + * uniqueness of identifiers returned from capture Info V2: + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamGetCaptureInfo_v2_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamGetCaptureInfo_v2_spt_Positive_UniqueID") { + constexpr int numStreams = 100; + hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone}; + std::vector idlist; + unsigned long long capSequenceID{}; // NOLINT + hipGraph_t graph{nullptr}; + + StreamsGuard streams(numStreams); + + for (int i = 0; i < numStreams; i++) { + HIP_CHECK(hipStreamBeginCapture(streams[i], hipStreamCaptureModeGlobal)); + HIP_CHECK(hipStreamGetCaptureInfo_v2_spt(streams[i], &captureStatus, &capSequenceID, nullptr, + nullptr, nullptr)); + REQUIRE(captureStatus == hipStreamCaptureStatusActive); + REQUIRE(capSequenceID > 0); + idlist.push_back(capSequenceID); + } + for (int i = 0; i < numStreams; i++) { + for (int j = i + 1; j < numStreams; j++) { + if (idlist[i] == idlist[j]) { + REQUIRE(false); + } + } + } + for (int i = 0; i < numStreams; i++) { + HIP_CHECK(hipStreamEndCapture(streams[i], &graph)); + HIP_CHECK(hipGraphDestroy(graph)); + } +} +/** + * Test Description + * ------------------------ + * - Test to verify API behavior with invalid arguments: + * - Capture status is nullptr + * - Capture status checked on legacy/null stream + * - Capture status when stream is uninitialized + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamGetCaptureInfo_v2_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamGetCaptureInfo_v2_spt_Negative_Parameters") { + hipGraph_t capInfoGraph{}; +#if HT_NVIDIA + hipStreamCaptureStatus captureStatus; +#endif + unsigned long long capSequenceID; // NOLINT + size_t numDependencies; + const hipGraphNode_t* nodelist{}; + + const auto stream_type = GENERATE(Streams::perThread, Streams::created); + StreamGuard stream_guard(stream_type); + hipStream_t stream = stream_guard.stream(); + SECTION("Capture Status location as nullptr") { + HIP_CHECK_ERROR(hipStreamGetCaptureInfo_v2_spt(stream, nullptr, &capSequenceID, &capInfoGraph, + &nodelist, &numDependencies), + hipErrorInvalidValue); + } +} +/** + * End doxygen group GraphTest. + * @} + */ diff --git a/projects/hip-tests/catch/unit/graph/hipStreamIsCapturing_spt.cc b/projects/hip-tests/catch/unit/graph/hipStreamIsCapturing_spt.cc new file mode 100644 index 00000000000..359e6c01bda --- /dev/null +++ b/projects/hip-tests/catch/unit/graph/hipStreamIsCapturing_spt.cc @@ -0,0 +1,154 @@ +/*Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include "stream_capture_common.hh" +/** + * @addtogroup hipStreamIsCapturing_spt hipStreamIsCapturing_spt + * @{ + * @ingroup GraphTest + * `hipError_t hipStreamIsCapturing_spt(hipStream_t stream, + * hipStreamCaptureStatus* pCaptureStatus)`- + * Get stream's capture state. + */ +/** + * Test Description + * ------------------------ + * - Initiate stream capture per thread with different modes on custom + * - stream. Check that capture status is correct in different + * - capturing phases + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamIsCapturing_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamIsCapturing_spt_BasicFntl") { + const auto stream_type = Streams::created; + StreamGuard stream_guard(stream_type); + hipStream_t stream = stream_guard.stream(); + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + constexpr size_t N = 1000000; + hipStreamCaptureStatus cStatus; + size_t Nbytes = N * sizeof(float); + hipGraph_t graph{nullptr}; + hipGraphExec_t graphExec{nullptr}; + LinearAllocGuard A_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard B_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard A_d(LinearAllocs::hipMalloc, Nbytes); + // Status is none before capture begins + HIP_CHECK(hipStreamIsCapturing_spt(stream, &cStatus)); + REQUIRE(hipStreamCaptureStatusNone == cStatus); + HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); + captureSequenceSimple(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), N, stream); + // Status is active during stream capture + HIP_CHECK(hipStreamIsCapturing_spt(stream, &cStatus)); + REQUIRE(hipStreamCaptureStatusActive == cStatus); + HIP_CHECK(hipStreamEndCapture(stream, &graph)); + REQUIRE(graph != nullptr); + // Status is none after capture ends + HIP_CHECK(hipStreamIsCapturing_spt(stream, &cStatus)); + REQUIRE(hipStreamCaptureStatusNone == cStatus); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + REQUIRE(graphExec != nullptr); + // Replay the recorded sequence multiple times + for (size_t i = 0; i < kLaunchIters; i++) { + std::fill_n(A_h.host_ptr(), N, static_cast(i)); + HIP_CHECK(hipGraphLaunch(graphExec, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + ArrayFindIfNot(B_h.host_ptr(), static_cast(i), N); + } + HIP_CHECK(hipGraphExecDestroy(graphExec)) + HIP_CHECK(hipGraphDestroy(graph)); +} +/** + * Test Description + * ------------------------ + * - Test to verify API behavior with invalid arguments: + * -# Capture status is nullptr + * -# Capture status is checked on null stream + * -# Stream is uninitialized + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamIsCapturing_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamIsCapturing_spt_Negative_Parameters") { + const auto stream_type = GENERATE(Streams::perThread, Streams::created); + StreamGuard stream_guard(stream_type); + hipStream_t stream = stream_guard.stream(); + SECTION("Check capture status with null pCaptureStatus.") { + HIP_CHECK_ERROR(hipStreamIsCapturing_spt(stream, nullptr), hipErrorInvalidValue); + } + SECTION("Check capture status when checked on null stream") { + hipStreamCaptureStatus cStatus; + hipGraph_t graph{nullptr}; + HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); + HIP_CHECK_ERROR(hipStreamIsCapturing_spt(nullptr, &cStatus), hipSuccess); + if (stream_type == Streams::perThread) { + REQUIRE(cStatus == hipStreamCaptureStatusActive); + } else { + REQUIRE(cStatus == hipStreamCaptureStatusNone); + } + HIP_CHECK(hipStreamEndCapture(stream, &graph)); + HIP_CHECK(hipGraphDestroy(graph)); + } +} +static void thread_func(hipStream_t stream) { + hipStreamCaptureStatus cStatus; + HIP_CHECK(hipStreamIsCapturing_spt(stream, &cStatus)); + REQUIRE(hipStreamCaptureStatusActive == cStatus); +} +/** + * Test Description + * ------------------------ + * - Initiate stream capture with different modes on custom + * - stream/hipStreamPerThread. Check that capture status is + * - correct when status is checked in a separate thread. + * Test source + * ------------------------ + * - catch\unit\graph\hipStreamIsCapturing_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamIsCapturing_spt_Positive_Thread") { + constexpr size_t N = 1000000; + size_t Nbytes = N * sizeof(float); + hipGraph_t graph{nullptr}; + StreamGuard stream_guard(Streams::created); + hipStream_t stream = stream_guard.stream(); + LinearAllocGuard A_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard B_h(LinearAllocs::malloc, Nbytes); + LinearAllocGuard A_d(LinearAllocs::hipMalloc, Nbytes); + const hipStreamCaptureMode captureMode = hipStreamCaptureModeGlobal; + HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); + captureSequenceSimple(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), N, stream); + std::thread t(thread_func, stream); + t.join(); + HIP_CHECK(hipStreamEndCapture(stream, &graph)); + HIP_CHECK(hipGraphDestroy(graph)); +} +/** + * End doxygen group GraphTest. + * @} + */ diff --git a/projects/hip-tests/catch/unit/stream/CMakeLists.txt b/projects/hip-tests/catch/unit/stream/CMakeLists.txt index 0795d86b50d..0cd4580d300 100644 --- a/projects/hip-tests/catch/unit/stream/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/stream/CMakeLists.txt @@ -28,7 +28,10 @@ if(HIP_PLATFORM MATCHES "amd") hipStreamGetPriority_spt.cc hipStreamQuery_spt.cc hipStreamSynchronize_spt.cc - hipStreamBatchMemOp.cc) + hipStreamBatchMemOp.cc + hipStreamWaitEvent_spt.cc + hipStreamAddCallback_spt.cc + hipLaunchHostFunc_spt.cc) else() set(TEST_SRC ${TEST_SRC} diff --git a/projects/hip-tests/catch/unit/stream/hipLaunchHostFunc_spt.cc b/projects/hip-tests/catch/unit/stream/hipLaunchHostFunc_spt.cc new file mode 100644 index 00000000000..0c00a034833 --- /dev/null +++ b/projects/hip-tests/catch/unit/stream/hipLaunchHostFunc_spt.cc @@ -0,0 +1,589 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +#include +#define GRIDSIZE 512 +#define BLOCKSIZE 256 +#define NUM_OF_STREAM 3 +#define THREADS_PER_BLOCK 512 +#define GRAPH_LAUNCH_ITERATIONS 1000 +/** + * @addtogroup hipLaunchHostFunc_spt hipLaunchHostFunc_spt + * @{ + * @ingroup StreamTest + * `hipError_t hipLaunchHostFunc_spt(hipStream_t stream, hipHostFn_t fn, void* userData);` - + * Enqueues a host function call in a stream per thread + */ +static __global__ void reduce(float* d_in, double* d_out) { + int myId = threadIdx.x + blockDim.x * blockIdx.x; + int tid = threadIdx.x; + for (int s = blockDim.x / 2; s > 0; s >>= 1) { + if (tid < s) { + d_in[myId] += d_in[myId + s]; + } + __syncthreads(); + } + if (tid == 0) { + d_out[blockIdx.x] = d_in[myId]; + } +} +static __global__ void reduceFinal(double* d_in, double* d_out) { + int myId = threadIdx.x + blockDim.x * blockIdx.x; + int tid = threadIdx.x; + for (int s = blockDim.x / 2; s > 0; s >>= 1) { + if (tid < s) { + d_in[myId] += d_in[myId + s]; + } + __syncthreads(); + } + if (tid == 0) { + *d_out = d_in[myId]; + } +} +static void init_input(float* a, size_t size) { + unsigned int seed = time(nullptr); + for (size_t i = 0; i < size; i++) { + a[i] = (HipTest::RAND_R(&seed) & 0xFF) / static_cast(RAND_MAX); + } +} +static bool gPassed = true; +static void* gusrptr; +static void* ptr0xff = reinterpret_cast(0xffffffff); +static size_t NSize = GRIDSIZE * BLOCKSIZE; +static size_t Nbytes = NSize * sizeof(float); +typedef struct userDataStruct { + float* A_h; + float* C_h; + float* A_d; + float* C_d; + bool isPassed; + bool isOpCompleted; +} usrDataS; +// Common callback function. +static void Fn_validateSq(void* userData) { + REQUIRE(userData != nullptr); + usrDataS* ptrUsrData = reinterpret_cast(userData); + for (size_t i = 0; i < NSize; i++) { + if (ptrUsrData->C_h[i] != (ptrUsrData->A_h[i] * ptrUsrData->A_h[i])) { + ptrUsrData->isPassed = false; + return; + } + } + ptrUsrData->isPassed = true; +} +static void Fn_ChkUserdataPtr(void* userData) { + gPassed = true; + if (gusrptr != userData) { + gPassed = false; + } +} +/** + * Test Description + * ------------------------ + * - Basic test to validates passing userData to host function. + * Test source + * ------------------------ + * - /unit/stream/hipLaunchHostFunc_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipLaunchHostFunc_spt_basic") { + hipStream_t mystream; + HIP_CHECK(hipStreamCreate(&mystream)); + gusrptr = ptr0xff; + gPassed = true; + HIP_CHECK(hipLaunchHostFunc_spt(mystream, Fn_ChkUserdataPtr, gusrptr)); + HIP_CHECK(hipStreamSynchronize(mystream)); + HIP_CHECK(hipStreamDestroy(mystream)); + REQUIRE(gPassed); +} +/** + * Test Description + * ------------------------ + * - Basic test to validate Negative cases of hipLaunchHostFunc_spt. + * Test source + * ------------------------ + * - /unit/stream/hipLaunchHostFunc_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipLaunchHostFunc_spt_Negative") { + hipStream_t mystream; + HIP_CHECK(hipStreamCreate(&mystream)); + SECTION("Pass nullptr as function") { + REQUIRE(hipLaunchHostFunc_spt(mystream, nullptr, 0) == hipErrorInvalidValue); + } + HIP_CHECK(hipStreamDestroy(mystream)); +} +// Local Function +static void launchOperationOnStrm(usrDataS* usrDataptr, hipStream_t stream) { + usrDataptr->isPassed = false; + HIP_CHECK(hipMallocAsync(reinterpret_cast(&(usrDataptr->A_d)), Nbytes, stream)); + HIP_CHECK(hipMallocAsync(reinterpret_cast(&(usrDataptr->C_d)), Nbytes, stream)); + HIP_CHECK( + hipMemcpyAsync(usrDataptr->A_d, usrDataptr->A_h, Nbytes, hipMemcpyHostToDevice, stream)); + hipLaunchKernelGGL((HipTest::vector_square), dim3(GRIDSIZE), dim3(BLOCKSIZE), 0, stream, + usrDataptr->A_d, usrDataptr->C_d, NSize); + HIP_CHECK( + hipMemcpyAsync(usrDataptr->C_h, usrDataptr->C_d, Nbytes, hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipLaunchHostFunc_spt(stream, Fn_validateSq, reinterpret_cast(usrDataptr))); + HIP_CHECK(hipFreeAsync(reinterpret_cast(usrDataptr->A_d), stream)); + HIP_CHECK(hipFreeAsync(reinterpret_cast(usrDataptr->C_d), stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + REQUIRE(usrDataptr->isPassed); +} +/** + * Test Description + * ------------------------ + * - scenario that validates the host launch function on 3 different streams, + * - created stream, default/null stream and hipStreamPerThread. + * Test source + * ------------------------ + * - /unit/stream/hipLaunchHostFunc_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipLaunchHostFunc_spt_streams") { + hipStream_t stream[NUM_OF_STREAM]; + HIP_CHECK(hipStreamCreate(&stream[0])); + stream[1] = 0; // Null stream + stream[2] = hipStreamPerThread; + usrDataS* usrDataptr = reinterpret_cast(malloc(sizeof(usrDataS))); + REQUIRE(usrDataptr != nullptr); + usrDataptr->A_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(usrDataptr->A_h != nullptr); + usrDataptr->C_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(usrDataptr->C_h != nullptr); + for (size_t i = 0; i < NSize; i++) { + usrDataptr->A_h[i] = 21.0f; + } + for (int idx = 0; idx < NUM_OF_STREAM; idx++) { + launchOperationOnStrm(usrDataptr, stream[idx]); + } + HIP_CHECK(hipStreamDestroy(stream[0])); + free(usrDataptr->A_h); + free(usrDataptr->C_h); + free(usrDataptr); +} +static void Fn_validateMul_stream(void* userData) { + REQUIRE(userData != nullptr); + usrDataS* ptrUsrData = reinterpret_cast(userData); + for (size_t i = 0; i < NSize; i++) { + if (ptrUsrData->C_h[i] != (ptrUsrData->A_h[i] * ptrUsrData->A_h[i])) { + ptrUsrData->isPassed = false; + return; + } + } + ptrUsrData->isPassed = true; +} +/** + * Test Description + * ------------------------ + * - Test case to validate hipLaunchHostFunc_spt for multi stream scenario + * - Create 2 different streams and call hipLaunchHostFunc_spt, stream synchronize. + * Test source + * ------------------------ + * - /unit/stream/hipLaunchHostFunc_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipLaunchHostFunc_spt_multistreams") { + hipStream_t mystream1, mystream2; + HIP_CHECK(hipStreamCreateWithFlags(&mystream1, hipStreamNonBlocking)); + HIP_CHECK(hipStreamCreateWithFlags(&mystream2, hipStreamNonBlocking)); + usrDataS* usrDataptr1 = reinterpret_cast(malloc(sizeof(usrDataS))); + REQUIRE(usrDataptr1 != nullptr); + usrDataS* usrDataptr2 = reinterpret_cast(malloc(sizeof(usrDataS))); + REQUIRE(usrDataptr2 != nullptr); + usrDataptr1->A_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(usrDataptr1->A_h != nullptr); + usrDataptr1->C_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(usrDataptr1->C_h != nullptr); + // input data + for (size_t i = 0; i < NSize; i++) { + usrDataptr1->A_h[i] = 11.0f; + } + usrDataptr1->isPassed = false; + usrDataptr2->isPassed = false; + HIP_CHECK(hipMallocAsync(reinterpret_cast(&(usrDataptr1->A_d)), Nbytes, mystream1)); + HIP_CHECK(hipMallocAsync(reinterpret_cast(&(usrDataptr1->C_d)), Nbytes, mystream1)); + HIP_CHECK( + hipMemcpyAsync(usrDataptr1->A_d, usrDataptr1->A_h, Nbytes, hipMemcpyHostToDevice, mystream1)); + const unsigned blocks = GRIDSIZE; + const unsigned threadsPerBlock = BLOCKSIZE; + hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks), dim3(threadsPerBlock), 0, mystream1, + usrDataptr1->A_d, usrDataptr1->C_d, NSize); + HIP_CHECK( + hipMemcpyAsync(usrDataptr1->C_h, usrDataptr1->C_d, Nbytes, hipMemcpyDeviceToHost, mystream1)); + HIP_CHECK(hipLaunchHostFunc_spt(mystream1, Fn_validateMul_stream, + reinterpret_cast(usrDataptr1))); + // launch kernel function for mystream2 + usrDataptr2->A_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(usrDataptr2->A_h != nullptr); + usrDataptr2->C_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(usrDataptr2->C_h != nullptr); + // input data + for (size_t i = 0; i < NSize; i++) { + usrDataptr2->A_h[i] = 9.0f; + } + HIP_CHECK(hipMallocAsync(reinterpret_cast(&(usrDataptr2->A_d)), Nbytes, mystream2)); + HIP_CHECK(hipMallocAsync(reinterpret_cast(&(usrDataptr2->C_d)), Nbytes, mystream2)); + HIP_CHECK( + hipMemcpyAsync(usrDataptr2->A_d, usrDataptr2->A_h, Nbytes, hipMemcpyHostToDevice, mystream2)); + hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks), dim3(threadsPerBlock), 0, mystream2, + usrDataptr2->A_d, usrDataptr2->C_d, NSize); + HIP_CHECK( + hipMemcpyAsync(usrDataptr2->C_h, usrDataptr2->C_d, Nbytes, hipMemcpyDeviceToHost, mystream2)); + HIP_CHECK(hipLaunchHostFunc_spt(mystream2, Fn_validateMul_stream, + reinterpret_cast(usrDataptr2))); + HIP_CHECK(hipFreeAsync(reinterpret_cast(usrDataptr1->A_d), mystream1)); + HIP_CHECK(hipFreeAsync(reinterpret_cast(usrDataptr1->C_d), mystream1)); + HIP_CHECK(hipFreeAsync(reinterpret_cast(usrDataptr2->A_d), mystream2)); + HIP_CHECK(hipFreeAsync(reinterpret_cast(usrDataptr2->C_d), mystream2)); + HIP_CHECK(hipStreamSynchronize(mystream1)); + HIP_CHECK(hipStreamSynchronize(mystream2)); + HIP_CHECK(hipStreamDestroy(mystream1)); + HIP_CHECK(hipStreamDestroy(mystream2)); + REQUIRE(usrDataptr1->isPassed); + REQUIRE(usrDataptr2->isPassed); + free(usrDataptr1->A_h); + free(usrDataptr1->C_h); + free(usrDataptr2->A_h); + free(usrDataptr2->C_h); + free(usrDataptr2); + free(usrDataptr1); +} +static void Fn_Completion_state(void* userData) { + REQUIRE(userData != nullptr); + usrDataS* ptrUsrData = reinterpret_cast(userData); + ptrUsrData->isOpCompleted = true; +} +/** + * Test Description + * ------------------------ + * - Test case to validate hipLaunchHostFunc_spt for the kernel, + * - validate hipLaunchHostFunc_spt after kernel launch. + * Test source + * ------------------------ + * - /unit/stream/hipLaunchHostFunc_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipLaunchHostFunc_spt_KernelHost") { + hipStream_t stream1, stream2, stream3; + HIP_CHECK(hipStreamCreate(&stream1)); + HIP_CHECK(hipStreamCreate(&stream2)); + HIP_CHECK(hipStreamCreate(&stream3)); + usrDataS* usrDataptr = reinterpret_cast(malloc(sizeof(usrDataS))); + REQUIRE(usrDataptr != nullptr); + usrDataptr->A_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(usrDataptr->A_h != nullptr); + usrDataptr->C_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(usrDataptr->C_h != nullptr); + // input data + for (size_t i = 0; i < NSize; i++) { + usrDataptr->A_h[i] = 7.0f; + } + usrDataptr->isOpCompleted = false; + HIP_CHECK(hipMallocAsync(reinterpret_cast(&(usrDataptr->A_d)), Nbytes, stream1)); + HIP_CHECK(hipMallocAsync(reinterpret_cast(&(usrDataptr->C_d)), Nbytes, stream1)); + HIP_CHECK( + hipMemcpyAsync(usrDataptr->A_d, usrDataptr->A_h, Nbytes, hipMemcpyHostToDevice, stream1)); + HIP_CHECK( + hipLaunchHostFunc_spt(stream1, Fn_Completion_state, reinterpret_cast(usrDataptr))); + while (!usrDataptr->isOpCompleted) { + std::this_thread::sleep_for(std::chrono::microseconds(100000)); + } // Sleep for 100 ms + usrDataptr->isOpCompleted = false; + const unsigned blocks = GRIDSIZE; + const unsigned threadsPerBlock = BLOCKSIZE; + hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks), dim3(threadsPerBlock), 0, stream2, + usrDataptr->A_d, usrDataptr->C_d, NSize); + HIP_CHECK( + hipLaunchHostFunc_spt(stream2, Fn_Completion_state, reinterpret_cast(usrDataptr))); + while (!usrDataptr->isOpCompleted) { + std::this_thread::sleep_for(std::chrono::microseconds(100000)); + } // Sleep for 100 ms + usrDataptr->isOpCompleted = false; + HIP_CHECK( + hipMemcpyAsync(usrDataptr->C_h, usrDataptr->C_d, Nbytes, hipMemcpyDeviceToHost, stream3)); + HIP_CHECK( + hipLaunchHostFunc_spt(stream2, Fn_Completion_state, reinterpret_cast(usrDataptr))); + HIP_CHECK(hipFreeAsync(reinterpret_cast(usrDataptr->A_d), stream3)); + HIP_CHECK(hipFreeAsync(reinterpret_cast(usrDataptr->C_d), stream3)); + while (!usrDataptr->isOpCompleted) { + std::this_thread::sleep_for(std::chrono::microseconds(100000)); + } // Sleep for 100 ms + for (size_t i = 0; i < NSize; i++) { + if (usrDataptr->C_h[i] != (usrDataptr->A_h[i] * usrDataptr->A_h[i])) { + REQUIRE(false); + } + } + HIP_CHECK(hipStreamSynchronize(stream3)); + HIP_CHECK(hipStreamDestroy(stream3)); + HIP_CHECK(hipStreamDestroy(stream2)); + HIP_CHECK(hipStreamDestroy(stream1)); + free(usrDataptr->A_h); + free(usrDataptr->C_h); + free(usrDataptr); +} +/** + * Test Description + * ------------------------ + * - Scenario that validates the host launch function on multi device + * - environment. + * Test source + * ------------------------ + * - /unit/stream/hipLaunchHostFunc_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipLaunchHostFunc_spt_multidevice") { + int num_devices; + HIP_CHECK(hipGetDeviceCount(&num_devices)); + if (num_devices < 2) { + SUCCEED("Skipping the testcases as numDevices < 2"); + return; + } + usrDataS* usrDataptr = reinterpret_cast(malloc(sizeof(usrDataS))); + REQUIRE(usrDataptr != nullptr); + usrDataptr->A_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(usrDataptr->A_h != nullptr); + usrDataptr->C_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(usrDataptr->C_h != nullptr); + for (size_t i = 0; i < NSize; i++) { + usrDataptr->A_h[i] = 21.0f; + } + for (int dev = 0; dev < num_devices; dev++) { + HIP_CHECK(hipSetDevice(dev)); + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + launchOperationOnStrm(usrDataptr, stream); + HIP_CHECK(hipStreamDestroy(stream)); + } + free(usrDataptr->A_h); + free(usrDataptr->C_h); + free(usrDataptr); +} +/** + * Test Description + * ------------------------ + * - Scenario that validates the host launch function on created + * - stream with same priority. + * Test source + * ------------------------ + * - /unit/stream/hipLaunchHostFunc_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipLaunchHostFunc_spt_Samepriority") { + int priority = 0; + unsigned int flags = 0; + usrDataS* usrDataptr = reinterpret_cast(malloc(sizeof(usrDataS))); + REQUIRE(usrDataptr != nullptr); + usrDataptr->A_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(usrDataptr->A_h != nullptr); + usrDataptr->C_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(usrDataptr->C_h != nullptr); + for (size_t i = 0; i < NSize; i++) { + usrDataptr->A_h[i] = 21.0f; + } + for (int idx = 0; idx < NUM_OF_STREAM; idx++) { + hipStream_t stream[NUM_OF_STREAM]; + HIP_CHECK(hipStreamCreateWithPriority(&stream[idx], flags, priority)); + launchOperationOnStrm(usrDataptr, stream[idx]); + HIP_CHECK(hipStreamDestroy(stream[idx])); + } + free(usrDataptr->A_h); + free(usrDataptr->C_h); + free(usrDataptr); +} +/** + * Test Description + * ------------------------ + * - Scenario that validates the host launch function on + * - created stream with different priority. + * Test source + * ------------------------ + * - /unit/stream/hipLaunchHostFunc_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipLaunchHostFunc_spt_Diffpriority") { + int priority; + int priority_low{}; + int priority_high{}; + unsigned int flags = 0; + HIP_CHECK(hipDeviceGetStreamPriorityRange(&priority_low, &priority_high)); + int numOfPriorities = priority_low - priority_high; + const float arr_size = numOfPriorities + 1; + hipStream_t* stream = reinterpret_cast(malloc(arr_size * sizeof(hipStream_t))); + stream[0] = 0; + int count = 1; + // Create a stream for each of the priority levels + for (priority = priority_high; priority < priority_low; priority++) { + HIP_CHECK(hipStreamCreateWithPriority(&stream[count++], flags, priority)); + } + usrDataS* usrDataptr = reinterpret_cast(malloc(sizeof(usrDataS))); + REQUIRE(usrDataptr != nullptr); + usrDataptr->A_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(usrDataptr->A_h != nullptr); + usrDataptr->C_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(usrDataptr->C_h != nullptr); + for (size_t i = 0; i < NSize; i++) { + usrDataptr->A_h[i] = 11.0f; + } + for (int idx = 0; idx < arr_size; idx++) { + launchOperationOnStrm(usrDataptr, stream[idx]); + } + count = 1; + for (priority = priority_high; priority < priority_low; priority++) { + HIP_CHECK(hipStreamDestroy(stream[count++])); + } + free(usrDataptr->A_h); + free(usrDataptr->C_h); + free(usrDataptr); +} +typedef struct callBackData { + const char* fn_name; + double* data; +} callBackData_t; +double gpu_result = 0.0; +void myHostNodeCallback_spt(void* data) { + static int iter = 0; + iter++; + // Check status of GPU after stream operations are done + callBackData_t* tmp = reinterpret_cast(data); + // checkCudaErrors(tmp->status); + double* result = reinterpret_cast(tmp->data); + const char* function = reinterpret_cast(tmp->fn_name); + if (iter == GRAPH_LAUNCH_ITERATIONS) + printf("[%s] Host callback final reduced sum = %lf\n", function, *result); + gpu_result = *result; + *result = 0.0; // reset the result +} +/** + * Test Description + * ------------------------ + * - Create a graph by using hipGraphsUsingStreamCapture and call host function. + * Test source + * ------------------------ + * - /unit/stream/hipLaunchHostFunc_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipLaunchHostFunc_spt_Graph") { + size_t size = 1 << 12; + size_t maxBlocks = 512; + float *inputVec_d = NULL, *inputVec_h = NULL; + double *outputVec_d = NULL, *result_d; + inputVec_h = reinterpret_cast(malloc(sizeof(float) * size)); + HIP_CHECK(hipMalloc(&inputVec_d, sizeof(float) * size)); + HIP_CHECK(hipMalloc(&outputVec_d, sizeof(double) * maxBlocks)); + HIP_CHECK(hipMalloc(&result_d, sizeof(double))); + init_input(inputVec_h, size); + hipStream_t stream1, stream2, stream3, streamForGraph; + hipEvent_t forkStreamEvent, memsetEvent1, memsetEvent2; + hipGraph_t graph; + double result_h = 0.0; + HIP_CHECK(hipStreamCreate(&stream1)); + HIP_CHECK(hipStreamCreate(&stream2)); + HIP_CHECK(hipStreamCreate(&stream3)); + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipEventCreate(&forkStreamEvent)); + HIP_CHECK(hipEventCreate(&memsetEvent1)); + HIP_CHECK(hipEventCreate(&memsetEvent2)); + auto start = std::chrono::high_resolution_clock::now(); + HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(forkStreamEvent, stream1)); + HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0)); + HIP_CHECK(hipStreamWaitEvent(stream3, forkStreamEvent, 0)); + HIP_CHECK( + hipMemcpyAsync(inputVec_d, inputVec_h, sizeof(float) * size, hipMemcpyDefault, stream1)); + HIP_CHECK(hipMemsetAsync(outputVec_d, 0, sizeof(double) * maxBlocks, stream2)); + HIP_CHECK(hipEventRecord(memsetEvent1, stream2)); + HIP_CHECK(hipMemsetAsync(result_d, 0, sizeof(double), stream3)); + HIP_CHECK(hipEventRecord(memsetEvent2, stream3)); + HIP_CHECK(hipStreamWaitEvent(stream1, memsetEvent1, 0)); + hipLaunchKernelGGL(reduce, dim3(size / THREADS_PER_BLOCK, 1, 1), dim3(THREADS_PER_BLOCK, 1, 1), 0, + stream1, inputVec_d, outputVec_d); + HIP_CHECK(hipStreamWaitEvent(stream1, memsetEvent2, 0)); + hipLaunchKernelGGL(reduceFinal, dim3(1, 1, 1), dim3(THREADS_PER_BLOCK, 1, 1), 0, stream1, + outputVec_d, result_d); + HIP_CHECK(hipMemcpyAsync(&result_h, result_d, sizeof(double), hipMemcpyDefault, stream1)); + callBackData_t hostFnData; + hostFnData.data = &result_h; + hostFnData.fn_name = "hipGraphsUsingStreamCapture"; + hipHostFn_t fn = myHostNodeCallback_spt; + HIP_CHECK(hipLaunchHostFunc_spt(stream1, fn, &hostFnData)); + HIP_CHECK(hipStreamEndCapture(stream1, &graph)); + hipGraphNode_t* nodes = NULL; + size_t numNodes = 0; + HIP_CHECK(hipGraphGetNodes(graph, nodes, &numNodes)); + printf( + "\nNum of nodes in the graph created using stream" + "capture API = %zu\n", + numNodes); + HIP_CHECK(hipGraphGetRootNodes(graph, nodes, &numNodes)); + printf("root nodes in the graph created using stream capture API = %zu\n", numNodes); + hipGraphExec_t graphExec; + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); + auto start1 = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < GRAPH_LAUNCH_ITERATIONS; i++) { + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + } + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + auto stop = std::chrono::high_resolution_clock::now(); + auto WithInit = std::chrono::duration(stop - start); + auto WithoutInit = std::chrono::duration(stop - start1); + std::cout << "Time taken for hipGraphsUsingStreamCapture with Init: " + << std::chrono::duration_cast(WithInit).count() + << " milliseconds without Init:" + << std::chrono::duration_cast(WithoutInit).count() + << " milliseconds " << std::endl; + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipStreamDestroy(stream2)); + HIP_CHECK(hipStreamDestroy(stream3)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + double result_h_cpu = 0.0; + for (size_t i = 0; i < size; i++) { + result_h_cpu += inputVec_h[i]; + } + REQUIRE(result_h_cpu == gpu_result); + HIP_CHECK(hipFree(inputVec_d)); + HIP_CHECK(hipFree(outputVec_d)); + HIP_CHECK(hipFree(result_d)); +} +/** + * End doxygen group StreamTest. + * @} + */ diff --git a/projects/hip-tests/catch/unit/stream/hipStreamAddCallback_spt.cc b/projects/hip-tests/catch/unit/stream/hipStreamAddCallback_spt.cc new file mode 100644 index 00000000000..8082e127345 --- /dev/null +++ b/projects/hip-tests/catch/unit/stream/hipStreamAddCallback_spt.cc @@ -0,0 +1,243 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +#include +#include +#define UNUSED(expr) \ + do { \ + (void)(expr); \ + } while (0) +#ifdef __HIP_PLATFORM_AMD__ +#define HIPRT_CB +#endif +/** + * @addtogroup hipStreamAddCallback_spt hipStreamAddCallback_spt + * @{ + * @ingroup StreamTest + * `hipError_t hipStreamAddCallback_spt(hipStream_t stream, + * hipStreamCallback_t callback, + * void* userData, + * unsigned int flags)` - + * Adds a callback to be called on the host after all currently enqueued + * items in the stream have completed. + */ +namespace hipStreamAddCallbackTest_spt { +size_t NSize = 4 * 1024 * 1024; +float *A_h, *C_h; +bool gcbDone = false; +bool gPassed = true; +void* ptr0xff = reinterpret_cast(0xffffffff); +void* gusrptr; +hipStream_t gstream; +void HIPRT_CB Callback(hipStream_t stream, hipError_t status, void* userData) { + UNUSED(stream); + HIP_CHECK(status); + REQUIRE(userData == NULL); + gPassed = true; + for (size_t i = 0; i < NSize; i++) { + if (C_h[i] != A_h[i] * A_h[i]) { + UNSCOPED_INFO("Data mismatch :" << i); + gPassed = false; + break; + } + } + gcbDone = true; +} +bool testStreamCallbackFunctionality(bool isDefault) { + float *A_d, *C_d; + size_t Nbytes = NSize * sizeof(float); + gcbDone = false; + A_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(A_h != nullptr); + C_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(C_h != nullptr); + // Fill with Phi + i + for (size_t i = 0; i < NSize; i++) { + A_h[i] = 1.618f + i; + } + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + HIP_CHECK(hipMalloc(&C_d, Nbytes)); + if (isDefault) { + HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, 0)); + const unsigned blocks = 512; + const unsigned threadsPerBlock = 256; + hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, + C_d, NSize); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, 0)); + HIP_CHECK(hipStreamAddCallback_spt(0, Callback, nullptr, 0)); + while (!gcbDone) + // Sleep for 100 ms + std::this_thread::sleep_for(std::chrono::microseconds(100000)); + } else { + hipStream_t mystream; + HIP_CHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); + HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mystream)); + const unsigned blocks = 512; + const unsigned threadsPerBlock = 256; + hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks), dim3(threadsPerBlock), 0, mystream, + A_d, C_d, NSize); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); + HIP_CHECK(hipStreamAddCallback_spt(mystream, Callback, nullptr, 0)); + while (!gcbDone) + // Sleep for 100 ms + std::this_thread::sleep_for(std::chrono::microseconds(100000)); + HIP_CHECK(hipStreamDestroy(mystream)); + } + HIP_CHECK(hipFree(reinterpret_cast(C_d))); + HIP_CHECK(hipFree(reinterpret_cast(A_d))); + free(C_h); + free(A_h); + return gPassed; +} +void Callback_ChkUsrdataPtr(hipStream_t stream, hipError_t status, void* userData) { + REQUIRE(stream == gstream); + HIP_CHECK(status); + gPassed = true; + if (gusrptr != userData) { + gPassed = false; + } + gcbDone = true; +} +void Callback_ChkStreamValue(hipStream_t stream, hipError_t status, void* userData) { + REQUIRE(userData == nullptr); + HIP_CHECK(status); + gPassed = true; + if (stream != gstream) { + gPassed = false; + } + gcbDone = true; +} +} // namespace hipStreamAddCallbackTest_spt +using hipStreamAddCallbackTest_spt::Callback; +using hipStreamAddCallbackTest_spt::Callback_ChkStreamValue; +using hipStreamAddCallbackTest_spt::Callback_ChkUsrdataPtr; +using hipStreamAddCallbackTest_spt::gcbDone; +using hipStreamAddCallbackTest_spt::gPassed; +using hipStreamAddCallbackTest_spt::gstream; +using hipStreamAddCallbackTest_spt::gusrptr; +using hipStreamAddCallbackTest_spt::ptr0xff; +using hipStreamAddCallbackTest_spt::testStreamCallbackFunctionality; +/** + * Test Description + * ------------------------ + * - Validates if userData pointer is passed properly to callback. + * - Validates if stream value is passed properly to callback. + * Test source + * ------------------------ + * - /unit/stream/hipStreamAddCallback_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamAddCallback_spt_ParamTst_Positive") { + hipStream_t mystream; + HIP_CHECK(hipStreamCreate(&mystream)); + SECTION("userData pointer value validation") { + gstream = mystream; + gusrptr = ptr0xff; + gPassed = true; + gcbDone = false; + HIP_CHECK(hipStreamAddCallback_spt(mystream, Callback_ChkUsrdataPtr, gusrptr, 0)); + while (!gcbDone) { + // Sleep for 100 ms + std::this_thread::sleep_for(std::chrono::microseconds(100000)); + } + REQUIRE(gPassed); + } + SECTION("stream value validation") { + gstream = mystream; + gPassed = true; + gcbDone = false; + HIP_CHECK(hipStreamAddCallback_spt(mystream, Callback_ChkStreamValue, nullptr, 0)); + while (!gcbDone) { + // Sleep for 100 ms + std::this_thread::sleep_for(std::chrono::microseconds(100000)); + } + REQUIRE(gPassed); + } + HIP_CHECK(hipStreamDestroy(mystream)); +} +/** + * Test Description + * ------------------------ + * - Basic test to validate Negative cases of hipStreamAddCallback_spt. + * Test source + * ------------------------ + * - /unit/stream/hipStreamAddCallback_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamAddCallback_spt_ParamTst_Negative") { + hipStream_t mystream; + HIP_CHECK(hipStreamCreate(&mystream)); + SECTION("callback is nullptr for non-default stream") { + REQUIRE_FALSE(hipSuccess == hipStreamAddCallback_spt(mystream, nullptr, nullptr, 0)); + } + SECTION("callback is nullptr for default stream") { + REQUIRE_FALSE(hipSuccess == hipStreamAddCallback_spt(0, nullptr, nullptr, 0)); + } + SECTION("flag is nonzero for non-default stream") { + REQUIRE_FALSE(hipSuccess == hipStreamAddCallback_spt(mystream, Callback, nullptr, 10)); + } + SECTION("flag is nonzero for default stream") { + REQUIRE_FALSE(hipSuccess == hipStreamAddCallback_spt(0, Callback, nullptr, 10)); + } + HIP_CHECK(hipStreamDestroy(mystream)); +} +/** + * Test Description + * ------------------------ + * - Validates hipStreamAddCallback_spt functionality with default stream. + * Test source + * ------------------------ + * - /unit/stream/hipStreamAddCallback_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamAddCallback_spt_WithDefaultStream") { + bool TestPassed = true; + TestPassed = testStreamCallbackFunctionality(true); + REQUIRE(TestPassed); +} +/** + * Test Description + * ------------------------ + * - Validates hipStreamAddCallback_spt functionality with defined stream. + * Test source + * ------------------------ + * - /unit/stream/hipStreamAddCallback_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamAddCallback_spt_WithCreatedStream") { + bool TestPassed = true; + TestPassed = testStreamCallbackFunctionality(false); + REQUIRE(TestPassed); +} +/** + * End doxygen group StreamTest. + * @} + */ diff --git a/projects/hip-tests/catch/unit/stream/hipStreamWaitEvent_spt.cc b/projects/hip-tests/catch/unit/stream/hipStreamWaitEvent_spt.cc new file mode 100644 index 00000000000..44e07a28f39 --- /dev/null +++ b/projects/hip-tests/catch/unit/stream/hipStreamWaitEvent_spt.cc @@ -0,0 +1,140 @@ +/*Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +/** + * @addtogroup hipStreamWaitEvent_spt hipStreamWaitEvent_spt + * @{ + * @ingroup StreamTest + * `hipError_t hipStreamWaitEvent_spt(hipStream_t stream, + * hipEvent_t event, + * unsigned int flags __dparm(0))` - + * Make the specified compute stream wait for an event + */ +/** + * Test Description + * ------------------------ + * - Test unsuccessful hipStreamWaitEvent_spt when either event or flags are invalid + * Test source + * ------------------------ + * - /unit/stream/hipStreamWaitEvent_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamWaitEvent_spt_Negative") { + enum class StreamTestType { NullStream = 0, StreamPerThread, CreatedStream }; + auto streamType = GENERATE(StreamTestType::NullStream, StreamTestType::StreamPerThread, + StreamTestType::CreatedStream); + hipStream_t stream{nullptr}; + hipEvent_t event{nullptr}; + if (streamType == StreamTestType::StreamPerThread) { + stream = hipStreamPerThread; + } else if (streamType == StreamTestType::CreatedStream) { + HIP_CHECK(hipStreamCreate(&stream)); + } + HIP_CHECK(hipEventCreate(&event)); + REQUIRE((stream != nullptr) != (streamType == StreamTestType::NullStream)); + REQUIRE(event != nullptr); + SECTION("Invalid Event") { + INFO("Running against Invalid Event"); + HIP_CHECK_ERROR(hipStreamWaitEvent_spt(stream, nullptr, 0), hipErrorInvalidResourceHandle); + } + SECTION("Invalid Flags") { + INFO("Running against Invalid Flags"); + constexpr unsigned flag = ~0u; + REQUIRE(flag != 0); + HIP_CHECK_ERROR(hipStreamWaitEvent_spt(stream, event, flag), hipErrorInvalidValue); + } + HIP_CHECK(hipEventDestroy(event)); + if (streamType == StreamTestType::CreatedStream) { + HIP_CHECK(hipStreamDestroy(stream)); + } +} +/** + * Test Description + * ------------------------ + * - Test simple waiting for an event with hipStreamWaitEvent_spt api + * Test source + * ------------------------ + * - /unit/stream/hipStreamWaitEvent_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamWaitEvent_spt_Default") { + hipStream_t stream{nullptr}; + hipEvent_t waitEvent{nullptr}; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipEventCreate(&waitEvent)); + REQUIRE(stream != nullptr); + REQUIRE(waitEvent != nullptr); + LaunchDelayKernel(std::chrono::milliseconds(2000), stream); + HIP_CHECK(hipEventRecord(waitEvent, stream)); + // Make sure stream is waiting for data to be set + HIP_CHECK_ERROR(hipEventQuery(waitEvent), hipErrorNotReady); + HIP_CHECK(hipStreamWaitEvent_spt(stream, waitEvent, 0)); + HIP_CHECK(hipStreamSynchronize(stream)); + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK(hipEventDestroy(waitEvent)); +} +/** + * Test Description + * ------------------------ + * - Test waiting for an event on a different stream with hipStreamWaitEvent_spt api + * Test source + * ------------------------ + * - /unit/stream/hipStreamWaitEvent_spt.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipStreamWaitEvent_spt_DifferentStreams") { + hipStream_t blockedStreamA{nullptr}, streamBlockedOnStreamA{nullptr}, unblockingStream{nullptr}; + hipEvent_t waitEvent{nullptr}; + HIP_CHECK(hipStreamCreate(&blockedStreamA)); + HIP_CHECK(hipStreamCreate(&streamBlockedOnStreamA)); + HIP_CHECK(hipStreamCreate(&unblockingStream)); + HIP_CHECK(hipEventCreate(&waitEvent)); + REQUIRE(blockedStreamA != nullptr); + REQUIRE(streamBlockedOnStreamA != nullptr); + REQUIRE(waitEvent != nullptr); + LaunchDelayKernel(std::chrono::milliseconds(3000), blockedStreamA); + HIP_CHECK(hipEventRecord(waitEvent, blockedStreamA)); + // Make sure stream is waiting for data to be set + HIP_CHECK_ERROR(hipEventQuery(waitEvent), hipErrorNotReady); + HIP_CHECK(hipStreamWaitEvent_spt(streamBlockedOnStreamA, waitEvent, 0)); + LaunchDelayKernel(std::chrono::milliseconds(2000), streamBlockedOnStreamA); + HIP_CHECK(hipStreamSynchronize(unblockingStream)); + HIP_CHECK(hipStreamSynchronize(blockedStreamA)); + // Make sure streamBlockedOnStreamA waited for event on blockedStreamA + HIP_CHECK_ERROR(hipStreamQuery(streamBlockedOnStreamA), hipErrorNotReady); + HIP_CHECK(hipStreamSynchronize(streamBlockedOnStreamA)); + // Check that both streams have finished + HIP_CHECK(hipStreamQuery(blockedStreamA)); + HIP_CHECK(hipStreamQuery(streamBlockedOnStreamA)); + HIP_CHECK(hipStreamDestroy(blockedStreamA)); + HIP_CHECK(hipStreamDestroy(streamBlockedOnStreamA)); + HIP_CHECK(hipStreamDestroy(unblockingStream)); + HIP_CHECK(hipEventDestroy(waitEvent)); +} +/** + * End doxygen group StreamTest. + * @} + */