diff --git a/catch/include/hip_test_checkers.hh b/catch/include/hip_test_checkers.hh index bd1fa6261..19c5040f9 100644 --- a/catch/include/hip_test_checkers.hh +++ b/catch/include/hip_test_checkers.hh @@ -350,49 +350,4 @@ bool freeArrays(T* A_d, T* B_d, T* C_d, T* A_h, T* B_h, T* C_h, bool usePinnedHo return freeArraysForHost(A_h, B_h, C_h, usePinnedHost); } - -template -static bool assemblyFile_Verification(std::string assemfilename, std::string inst) { - std::string filePath = "./catch/unit/deviceLib/"; - bool result = false; - std::string filename; - filename = filePath + assemfilename; - std::ifstream file(filename.c_str(), std::ios::out); - if (file) { - std::string line; - int line_pos = 0, start_pos = 0; - int last_pos = 0; - int start_match = 0; - while (getline(file, line)) { - line_pos++; - if ((std::is_same::value)) { - if (!start_pos && std::regex_search(line, std::regex("Begin function (.*)AtomicCheck"))) { - start_pos = line_pos; - } - if (!last_pos && std::regex_search(line, std::regex(".Lfunc_end0-(.*)AtomicCheck"))) { - last_pos = line_pos; - break; - } - } else { - if ((start_match != 2) && - std::regex_search(line, std::regex("Begin function (.*)AtomicCheck"))) { - start_match++; - if (start_match == 2) start_pos = line_pos; - } - if (!last_pos && std::regex_search(line, std::regex("func_end1-(.*)AtomicCheck"))) { - last_pos = line_pos; - break; - } - } - if (start_pos) { - result = std::regex_search(line, std::regex(inst)); - if (result) break; - } - } - } else { - result = true; - SUCCEED("Assembly file does not exist"); - } - return result; -} } // namespace HipTest diff --git a/catch/unit/deviceLib/AtomicAdd_Coherent_withnoUnsafeflag.cc b/catch/unit/deviceLib/AtomicAdd_Coherent_withnoUnsafeflag.cc deleted file mode 100644 index 2c6effaa8..000000000 --- a/catch/unit/deviceLib/AtomicAdd_Coherent_withnoUnsafeflag.cc +++ /dev/null @@ -1,94 +0,0 @@ -/* - Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ -/* -AtomicAdd on FineGrainMemory -1. The following test scenario verifies -atomicAdd on fineGrain memory with -mno-unsafe-atomics flag -This testcase works only on gfx90a, gfx940, gfx941, gfx942. -*/ - -#include -#include -#include - - -#define INC_VAL 10 -#define INITIAL_VAL 5 - -template -static __global__ void AtomicCheck(T* Ad, T* result) { - T inc_val = 10; - *result = atomicAdd(Ad, inc_val); -} - -/*atomicAdd API for the fine grained memory variable - with -mno-unsafe-atomics flag -Input: Ad{5}, INC_VAL{10} -Output: atomicAdd API would work and the 0/P is INITIAL_VAL + INC_VAL - Generate the assembly file and check whether - global_atomic_cmpswap instruction is generated - or not */ - -TEMPLATE_TEST_CASE("Unit_AtomicAdd_Coherentwithnounsafeflag", "", - float, double) { - hipDeviceProp_t prop; - int device; - HIP_CHECK(hipGetDevice(&device)); - HIP_CHECK(hipGetDeviceProperties(&prop, device)); - std::string gfxName(prop.gcnArchName); - - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - if (prop.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - TestType *A_h{nullptr}, *result{nullptr}; - TestType *A_d{nullptr}, *result_d{nullptr}; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), - hipHostMallocCoherent)); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType), - hipHostMallocCoherent)); - result[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); - hipLaunchKernelGGL(AtomicCheck, dim3(1), dim3(1), - 0, 0, A_d, - result_d); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); - bool testResult; - testResult = HipTest::assemblyFile_Verification( - "AtomicAdd_Coherent_withnoUnsafeflag-hip-amdgcn(.*)\\.s", - "global_atomic_cmpswap"); - REQUIRE(testResult == true); - REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL); - REQUIRE(result[0] == INITIAL_VAL); - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipHostFree(result)); - } - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); - } -} diff --git a/catch/unit/deviceLib/AtomicAdd_Coherent_withoutflag.cc b/catch/unit/deviceLib/AtomicAdd_Coherent_withoutflag.cc deleted file mode 100644 index e068b94cc..000000000 --- a/catch/unit/deviceLib/AtomicAdd_Coherent_withoutflag.cc +++ /dev/null @@ -1,91 +0,0 @@ -/* - Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ -/* -AtomicAdd on FineGrainMemory -1. The following test scenario verifies -atomicAdd on fineGrain memory without any unsafeatomics flag -This testcase works only on gfx90a, gfx940, gfx941, gfx942. -*/ - -#include -#include -#include - -#define INC_VAL 10 -#define INITIAL_VAL 5 -template -static __global__ void AtomicCheck(T* Ad, T* result) { - T inc_val = 10; - *result = atomicAdd(Ad, inc_val); -} - -/*atomicAdd API for the fine grained memory variable - without any flag -Input: Ad{5}, INC_VAL{10} -Output: atomicAdd API would work and the 0/P is INITIAL_VAL + INC_VAL - Generate the assembly file and check whether - global_atomic_cmpswap instruction is generated - or not */ - -TEMPLATE_TEST_CASE("Unit_AtomicAdd_Coherentwithoutflag", "", - float, double) { - hipDeviceProp_t prop; - int device; - HIP_CHECK(hipGetDevice(&device)); - HIP_CHECK(hipGetDeviceProperties(&prop, device)); - std::string gfxName(prop.gcnArchName); - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - if (prop.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - TestType *A_h{nullptr}, *result{nullptr}; - TestType *A_d{nullptr}, *result_d{nullptr}; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), - hipHostMallocCoherent)); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType), - hipHostMallocCoherent)); - result[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); - hipLaunchKernelGGL(AtomicCheck, dim3(1), dim3(1), - 0, 0, A_d, - result_d); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); - bool testResult; - testResult = HipTest::assemblyFile_Verification( - "AtomicAdd_Coherent_withoutflag-hip-amdgcn(.*)\\.s", - "global_atomic_cmpswap"); - REQUIRE(result[0] == INITIAL_VAL); - REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL); - REQUIRE(testResult == true); - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipHostFree(result)); - } - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); - } -} diff --git a/catch/unit/deviceLib/AtomicAdd_Coherent_withunsafeflag.cc b/catch/unit/deviceLib/AtomicAdd_Coherent_withunsafeflag.cc deleted file mode 100644 index b9fffb5de..000000000 --- a/catch/unit/deviceLib/AtomicAdd_Coherent_withunsafeflag.cc +++ /dev/null @@ -1,101 +0,0 @@ -/* - Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ -/* -AtomicAdd on FineGrainMemory -1. The following test scenario verifies -atomicAdd on fineGrain memory with -munsafe-fp-atomics flag -This testcase works only on gfx90a, gfx940, gfx941, gfx942. -*/ - -#include -#include -#include - -#define INC_VAL 10 -#define INITIAL_VAL 5 -template -static __global__ void AtomicCheck(T* Ad, T* result) { - T inc_val = 10; - *result = atomicAdd(Ad, inc_val); -} - - -/*atomicAdd API for the fine grained memory variable - with -m-unsafe-atomics flag -Input: Ad{5}, INC_VAL{10} -Output: atomicAdd API would return 0 and the 0/P is 5 - Generate the assembly file and check whether - global_atomic_cmpswap instruction is generated - or not */ - -TEMPLATE_TEST_CASE("Unit_AtomicAdd_CoherentwithUnsafeflag", "", - float, double) { - hipDeviceProp_t prop; - int device; - HIP_CHECK(hipGetDevice(&device)); - HIP_CHECK(hipGetDeviceProperties(&prop, device)); - std::string gfxName(prop.gcnArchName); - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - if (prop.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - TestType *A_h{nullptr}, *result{nullptr}; - TestType *A_d{nullptr}, *result_d{nullptr}; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), - hipHostMallocCoherent)); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType), - hipHostMallocCoherent)); - result[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); - hipLaunchKernelGGL(AtomicCheck, dim3(1), dim3(1), - 0, 0, A_d, - result_d); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); - bool testResult; - - if ((std::is_same::value)) { - testResult = HipTest::assemblyFile_Verification( - "AtomicAdd_Coherent_withunsafeflag-hip-amdgcn(.*)\\.s", - "global_atomic_add_f32"); - REQUIRE(testResult == true); - } else { - testResult = HipTest::assemblyFile_Verification( - "AtomicAdd_Coherent_withunsafeflag-hip-amdgcn(.*)\\.s", - "global_atomic_add_f64"); - REQUIRE(testResult == true); - } - - REQUIRE(A_h[0] == INITIAL_VAL); - REQUIRE(result[0] == 0); - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipHostFree(result)); - } - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); - } -} diff --git a/catch/unit/deviceLib/AtomicAdd_NonCoherent_withnoUnsafeflag.cc b/catch/unit/deviceLib/AtomicAdd_NonCoherent_withnoUnsafeflag.cc deleted file mode 100644 index 00ded36a2..000000000 --- a/catch/unit/deviceLib/AtomicAdd_NonCoherent_withnoUnsafeflag.cc +++ /dev/null @@ -1,92 +0,0 @@ -/* - Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ -/* -AtomicAdd on CoarseGrainMemory -1. The following test scenario verifies -atomicAdd on CoarseGrain memory with -mno-unsafe-atomics flag -This testcase works only on gfx90a, gfx940, gfx941, gfx942. -*/ - -#include -#include -#include - -#define INC_VAL 10 -#define INITIAL_VAL 5 -template -static __global__ void AtomicCheck(T* Ad, T* result) { - T inc_val = 10; - *result = atomicAdd(Ad, inc_val); -} - -/*atomicAdd API for the coarse grained memory variable - with -mno-unsafe-atomics flag -Input: Ad{5}, INC_VAL{10} -Output: atomicAdd API would work and the 0/P is INITIAL_VAL + INC_VAL - Generate the assembly file and check whether - global_atomic_cmpswap instruction is generated - or not */ - -TEMPLATE_TEST_CASE("Unit_AtomicAdd_NonCoherentwithnounsafeflag", "", - float, double) { - hipDeviceProp_t prop; - int device; - HIP_CHECK(hipGetDevice(&device)); - HIP_CHECK(hipGetDeviceProperties(&prop, device)); - std::string gfxName(prop.gcnArchName); - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - if (prop.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - TestType *A_h{nullptr}, *result{nullptr}; - TestType *A_d{nullptr}, *result_d{nullptr}; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), - hipHostMallocNonCoherent)); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType), - hipHostMallocNonCoherent)); - result[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); - hipLaunchKernelGGL(AtomicCheck, - dim3(1), dim3(1), - 0, 0, A_d, - result_d); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); - bool testResult; - REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL); - REQUIRE(result[0] == INITIAL_VAL); - testResult = HipTest::assemblyFile_Verification( - "AtomicAdd_NonCoherent_withnounsafeflag-hip-amdgcn(.*)\\.s", - "global_atomic_cmpswap"); - REQUIRE(testResult == true); - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipHostFree(result)); - } - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); - } -} diff --git a/catch/unit/deviceLib/AtomicAdd_NonCoherent_withoutflag.cc b/catch/unit/deviceLib/AtomicAdd_NonCoherent_withoutflag.cc deleted file mode 100644 index 6e88a26af..000000000 --- a/catch/unit/deviceLib/AtomicAdd_NonCoherent_withoutflag.cc +++ /dev/null @@ -1,93 +0,0 @@ -/* - Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ -/* -AtomicAdd on CoarseGrainMemory -1. The following test scenario verifies -atomicAdd on CoarseGrain memory without any unsafeatomics flag -This testcase works only on gfx90a, gfx940, gfx941, gfx942. -*/ - -#include -#include -#include - -#define INC_VAL 10 -#define INITIAL_VAL 5 - -template -static __global__ void AtomicCheck(T* Ad, T* result) { - T inc_val = 10; - *result = atomicAdd(Ad, inc_val); -} - -/*atomicAdd API for the coarse grained memory variable - without any flag -Input: Ad{5}, INC_VAL{10} -Output: atomicAdd API would work and the 0/P is INITIAL_VAL + INC_VAL - Generate the assembly file and check whether - global_atomic_cmpswap instruction is generated - or not */ - -TEMPLATE_TEST_CASE("Unit_AtomicAdd_NonCoherentwithoutflag", "", - float, double) { - hipDeviceProp_t prop; - int device; - HIP_CHECK(hipGetDevice(&device)); - HIP_CHECK(hipGetDeviceProperties(&prop, device)); - std::string gfxName(prop.gcnArchName); - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - if (prop.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - TestType *A_h{nullptr}, *result{nullptr}; - TestType *A_d{nullptr}, *result_d{nullptr}; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), - hipHostMallocNonCoherent)); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType), - hipHostMallocNonCoherent)); - result[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); - hipLaunchKernelGGL(AtomicCheck, - dim3(1), dim3(1), - 0, 0, A_d, - result_d); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); - bool testResult; - REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL); - REQUIRE(result[0] == INITIAL_VAL); - testResult = HipTest::assemblyFile_Verification( - "AtomicAdd_NonCoherent_withoutflag-hip-amdgcn(.*)\\.s", - "global_atomic_cmpswap"); - REQUIRE(testResult == true); - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipHostFree(result)); - } - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); - } -} diff --git a/catch/unit/deviceLib/AtomicAdd_NonCoherent_withunsafeflag.cc b/catch/unit/deviceLib/AtomicAdd_NonCoherent_withunsafeflag.cc deleted file mode 100644 index 5cfb70c81..000000000 --- a/catch/unit/deviceLib/AtomicAdd_NonCoherent_withunsafeflag.cc +++ /dev/null @@ -1,99 +0,0 @@ -/* - Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ -/* -AtomicAdd on CoarseGrainMemory -1. The following test scenario verifies -atomicAdd on CoarseGrain memory with -munsafe-fp-atomics flag -This testcase works only on gfx90a, gfx940, gfx941, gfx942. -*/ - -#include -#include -#include - -#define INC_VAL 10 -#define INITIAL_VAL 5 -template -static __global__ void AtomicCheck(T* Ad, T* result) { - T inc_val = 10; - *result = atomicAdd(Ad, inc_val); -} - -/*atomicAdd API for the fine grained memory variable - with -m-unsafe-atomics flag -Input: Ad{5}, INC_VAL{10} -Output: atomicAdd API would work and the 0/P is 15 - Generate the assembly file and check whether - global_atomic_add_float/double instruction is generated - or not */ - -TEMPLATE_TEST_CASE("Unit_AtomicAdd_NonCoherentwithUnsafeflag", "", - float, double) { - hipDeviceProp_t prop; - int device; - HIP_CHECK(hipGetDevice(&device)); - HIP_CHECK(hipGetDeviceProperties(&prop, device)); - std::string gfxName(prop.gcnArchName); - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - if (prop.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - TestType *A_h{nullptr}, *result{nullptr}; - TestType *A_d{nullptr}, *result_d{nullptr}; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), - hipHostMallocNonCoherent)); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType), - hipHostMallocNonCoherent)); - result[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); - hipLaunchKernelGGL(AtomicCheck, dim3(1), dim3(1), - 0, 0, A_d, - result_d); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); - bool testResult; - REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL); - REQUIRE(result[0] == INITIAL_VAL); - if ((std::is_same::value)) { - testResult = HipTest::assemblyFile_Verification( - "AtomicAdd_NonCoherent_withunsafeflag-hip-amdgcn(.*)\\.s", - "global_atomic_add_f32"); - REQUIRE(testResult == true); - } else { - testResult = HipTest::assemblyFile_Verification( - "AtomicAdd_NonCoherent_withunsafeflag-hip-amdgcn(.*)\\.s", - "global_atomic_add_f64"); - REQUIRE(testResult == true); - } - - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipHostFree(result)); - } - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); - } -} diff --git a/catch/unit/deviceLib/BuiltIns_fadd.cc b/catch/unit/deviceLib/BuiltIns_fadd.cc index 5303afb51..20fae0f16 100644 --- a/catch/unit/deviceLib/BuiltIns_fadd.cc +++ b/catch/unit/deviceLib/BuiltIns_fadd.cc @@ -26,193 +26,22 @@ This testfile verifies __builtin_amdgcn_global_atomic_fadd_f64 API scenarios 4. AtomicAdd on Non-Coherent Memory with RTC */ -#include -#include -#include +#include +#include +#include #include #define INC_VAL 10 #define INITIAL_VAL 5 -__global__ void AtomicAdd_GlobalMem(double* addr, double* result) { - double inc_val = 10; - *result = unsafeAtomicAdd(addr, inc_val); -} + static constexpr auto AtomicAddGlobalMem{ -R"( + R"( extern "C" __global__ void AtomicAdd_GlobalMem(double* addr, double* result) { double inc_val = 10; *result = unsafeAtomicAdd(addr, inc_val); } )"}; -/* -This test verifies the built in atomic add API on Coherent Memory -Input: A_h with INITIAL_VAL -Output: A_h will not get updated with Coherent Memory - A_h will be INITIAL_VAL - ret value would be 0, B_h would be 0 -*/ -TEST_CASE("Unit_BuiltInAtomicAdd_CoherentGlobalMem") { - hipDeviceProp_t prop; - int device; - HIP_CHECK(hipGetDevice(&device)); - HIP_CHECK(hipGetDeviceProperties(&prop, device)); - std::string gfxName(prop.gcnArchName); - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - if (prop.canMapHostMemory != 1) { - SUCCEED("Does support HostPinned Memory"); - } else { - double *A_h, *result_h, *result; - double *A_d; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(double), - hipHostMallocCoherent)); - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result_h), - sizeof(double), hipHostMallocCoherent)); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result), - result_h, 0)); - std::cout << "test" << std::endl; - hipLaunchKernelGGL(AtomicAdd_GlobalMem, dim3(1), dim3(1), - 0, 0, A_d, - result); - HIP_CHECK(hipGetLastError()); - std::cout << "test 1" << std::endl; - HIP_CHECK(hipDeviceSynchronize()); - REQUIRE(A_h[0] == INITIAL_VAL); - REQUIRE(*result_h == 0); - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipFree(result)); - } - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942 Hence" - "skipping the testcase for this GPU " << device); - } -} - -/* -This test verifies the built in atomic add API on Non-Coherent Memory -Input: A_h with INITIAL_VAL -Output: A_h will not get updated with Coherent Memory - A_h will be INITIAL_VAL+INC_VAL - B_h would be initial value of A_h, B_h would be INITIAL_VAL -*/ -TEST_CASE("Unit_BuiltInAtomicAdd_NonCoherentGlobalMem") { - hipDeviceProp_t prop; - int device; - HIP_CHECK(hipGetDevice(&device)); - HIP_CHECK(hipGetDeviceProperties(&prop, device)); - std::string gfxName(prop.gcnArchName); - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - if (prop.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - double *A_h, *result, *B_h; - double *A_d; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(double), - hipHostMallocNonCoherent)); - B_h = reinterpret_cast(malloc(sizeof(double))); - HIP_CHECK(hipMalloc(reinterpret_cast(&result), sizeof(double))); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - hipLaunchKernelGGL(AtomicAdd_GlobalMem, dim3(1), dim3(1), - 0, 0, static_cast(A_d), - static_cast(result)); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost)); - REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL); - REQUIRE(*B_h == INITIAL_VAL); - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipFree(result)); - free(B_h); - } - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942" - "Hence skipping the testcase for GPU-0"); - } -} -/* -This test verifies the built in atomic add API on Coherent Memory with RTC -Input: A_h with INITIAL_VAL -Output: A_h will not get updated with Coherent Memory - A_h will be INITIAL_VAL - ret value would be 0, B_h would be 0 -*/ -TEST_CASE("Unit_BuiltInAtomicAdd_CoherentGlobalMemWithRtc") { - hipDeviceProp_t prop; - int device; - HIP_CHECK(hipGetDevice(&device)); - HIP_CHECK(hipGetDeviceProperties(&prop, device)); - std::string gfxName(prop.gcnArchName); - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - if (prop.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - hiprtcProgram prog; - hiprtcCreateProgram(&prog, // prog - AtomicAddGlobalMem, // buffer - "kernel.cu", // name - 0, nullptr, nullptr); - std::string sarg = std::string("--gpu-architecture=") + prop.gcnArchName; - const char* options[] = {sarg.c_str()}; - hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, options)}; - - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - INFO(log); - } - - REQUIRE(compileResult == HIPRTC_SUCCESS); - size_t codeSize; - HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); - - std::vector code(codeSize); - HIPRTC_CHECK(hiprtcGetCode(prog, code.data())); - HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); - - hipModule_t module; - hipFunction_t fmaxkernel; - HIP_CHECK(hipModuleLoadData(&module, code.data())); - HIP_CHECK(hipModuleGetFunction(&fmaxkernel, module, - "AtomicAdd_GlobalMem")); - double *A_h, *result, *B_h; - double *A_d; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(double), - hipHostMallocCoherent)); - B_h = reinterpret_cast(malloc(sizeof(double))); - HIP_CHECK(hipMalloc(reinterpret_cast(&result), sizeof(double))); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - struct { - double* p; - double* res; - } args_f{A_d, result}; - auto size = sizeof(args_f); - void* config_d[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args_f, - HIP_LAUNCH_PARAM_BUFFER_SIZE, - &size, HIP_LAUNCH_PARAM_END}; - HIP_CHECK(hipModuleLaunchKernel(fmaxkernel, 1, 1, 1, 1, 1, 1, 0, - nullptr, nullptr, config_d)); - HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost)); - REQUIRE(A_h[0] == INITIAL_VAL); - REQUIRE(*B_h == 0); - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipFree(result)); - free(B_h); - } - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); - } -} /* This test verifies the built in atomic add API on Non-Coherent Memory @@ -232,9 +61,9 @@ TEST_CASE("Unit_BuiltInAtomicAdd_NonCoherentGlobalMemWithRtc") { SUCCEED("Does support HostPinned Memory"); } else { hiprtcProgram prog; - hiprtcCreateProgram(&prog, // prog - AtomicAddGlobalMem, // buffer - "kernel.cu", // name + hiprtcCreateProgram(&prog, // prog + AtomicAddGlobalMem, // buffer + "kernel.cu", // name 0, nullptr, nullptr); std::string sarg = std::string("--gpu-architecture=") + prop.gcnArchName; const char* options[] = {sarg.c_str()}; @@ -259,25 +88,23 @@ TEST_CASE("Unit_BuiltInAtomicAdd_NonCoherentGlobalMemWithRtc") { hipModule_t module; hipFunction_t fmaxkernel; HIP_CHECK(hipModuleLoadData(&module, code.data())); - HIP_CHECK(hipModuleGetFunction(&fmaxkernel, module, - "AtomicAdd_GlobalMem")); + HIP_CHECK(hipModuleGetFunction(&fmaxkernel, module, "AtomicAdd_GlobalMem")); double *A_h, *result, *B_h; - double *A_d; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(double), - hipHostMallocNonCoherent)); + double* A_d; + HIP_CHECK( + hipHostMalloc(reinterpret_cast(&A_h), sizeof(double), hipHostMallocNonCoherent)); B_h = reinterpret_cast(malloc(sizeof(double))); HIP_CHECK(hipMalloc(reinterpret_cast(&result), sizeof(double))); A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), A_h, 0)); struct { double* p; double* res; } args_f{A_d, result}; auto size = sizeof(args_f); - void* config_d[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args_f, - HIP_LAUNCH_PARAM_BUFFER_SIZE, + void* config_d[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args_f, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; + HIP_CHECK(hipModuleLaunchKernel(fmaxkernel, 1, 1, 1, 1, 1, 1, 0, nullptr, nullptr, config_d)); HIP_CHECK(hipDeviceSynchronize()); @@ -289,7 +116,9 @@ TEST_CASE("Unit_BuiltInAtomicAdd_NonCoherentGlobalMemWithRtc") { free(B_h); } } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); + SUCCEED( + "Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" + "skipping the testcase for this GPU " + << device); } } diff --git a/catch/unit/deviceLib/CMakeLists.txt b/catch/unit/deviceLib/CMakeLists.txt index 5317d42ca..2f2b28ec0 100644 --- a/catch/unit/deviceLib/CMakeLists.txt +++ b/catch/unit/deviceLib/CMakeLists.txt @@ -87,22 +87,12 @@ set(AMD_TEST_SRC bfloat16.cc ) set(AMD_ARCH_SPEC_TEST_SRC - AtomicAdd_Coherent_withunsafeflag.cc - AtomicAdd_Coherent_withoutflag.cc - AtomicAdd_Coherent_withnoUnsafeflag.cc - AtomicAdd_NonCoherent_withoutflag.cc - AtomicAdd_NonCoherent_withnoUnsafeflag.cc - AtomicAdd_NonCoherent_withunsafeflag.cc + atomicAdd.cc BuiltIns_fmax.cc BuiltIns_fmin.cc BuiltIns_fadd.cc unsafeAtomicAdd_RTC.cc - unsafeAtomicAdd_Coherent_withunsafeflag.cc - unsafeAtomicAdd_Coherent_withoutflag.cc - unsafeAtomicAdd_Coherent_withnounsafeflag.cc - unsafeAtomicAdd_NonCoherent_withoutflag.cc - unsafeAtomicAdd_NonCoherent_withnounsafeflag.cc - unsafeAtomicAdd_NonCoherent_withunsafeflag.cc + unsafeatomicAdd_withFlag.cc ) # Note to pass arch use format like -DOFFLOAD_ARCH_STR="--offload-arch=gfx900 --offload-arch=gfx906" @@ -148,40 +138,29 @@ function(CheckAcceptedArchs OFFLOAD_ARCH_STR_LOCAL) endfunction() # CheckAcceptedArchs if(HIP_PLATFORM MATCHES "amd") - if (DEFINED OFFLOAD_ARCH_STR) - CheckAcceptedArchs(${OFFLOAD_ARCH_STR}) - elseif(DEFINED $ENV{HCC_AMDGPU_TARGET}) - CheckAcceptedArchs($ENV{HCC_AMDGPU_TARGET}) - else() - set(ARCH_CHECK -1) - endif() - set(TEST_SRC ${TEST_SRC} ${AMD_TEST_SRC}) - set_source_files_properties(floatTM.cc PROPERTIES COMPILE_FLAGS -std=c++17) -if(${ARCH_CHECK} GREATER_EQUAL 0) - set(TEST_SRC ${TEST_SRC} ${AMD_ARCH_SPEC_TEST_SRC}) - set_source_files_properties(AtomicAdd_Coherent_withunsafeflag.cc PROPERTIES COMPILE_OPTIONS "-munsafe-fp-atomics") - set_source_files_properties(AtomicAdd_NonCoherent_withunsafeflag.cc PROPERTIES COMPILE_OPTIONS "-munsafe-fp-atomics") - set_source_files_properties(AtomicAdd_Coherent_withnoUnsafeflag.cc PROPERTIES COMPILE_OPTIONS "-mno-unsafe-fp-atomics") - set_source_files_properties(AtomicAdd_NonCoherent_withnoUnsafeflag.cc PROPERTIES COMPILE_OPTIONS "-mno-unsafe-fp-atomics") - set_source_files_properties(unsafeAtomicAdd_Coherent_withunsafeflag.cc PROPERTIES COMPILE_OPTIONS "-munsafe-fp-atomics") - set_source_files_properties(unsafeAtomicAdd_NonCoherent_withunsafeflag.cc PROPERTIES COMPILE_OPTIONS "-munsafe-fp-atomics") - set_source_files_properties(unsafeAtomicAdd_Coherent_withnounsafeflag.cc PROPERTIES COMPILE_OPTIONS "-mno-unsafe-fp-atomics") - set_source_files_properties(unsafeAtomicAdd_NonCoherent_withnounsafeflag.cc PROPERTIES COMPILE_OPTIONS "-mno-unsafe-fp-atomics") + if (DEFINED OFFLOAD_ARCH_STR) + CheckAcceptedArchs(${OFFLOAD_ARCH_STR}) + elseif(DEFINED $ENV{HCC_AMDGPU_TARGET}) + CheckAcceptedArchs($ENV{HCC_AMDGPU_TARGET}) + else() + set(ARCH_CHECK -1) + endif() + set(TEST_SRC ${TEST_SRC} ${AMD_TEST_SRC}) + set_source_files_properties(floatTM.cc PROPERTIES COMPILE_FLAGS -std=c++17) + if(${ARCH_CHECK} GREATER_EQUAL 0) + set(TEST_SRC ${TEST_SRC} ${AMD_ARCH_SPEC_TEST_SRC}) + set_source_files_properties(unsafeatomicAdd_withFlag.cc PROPERTIES COMPILE_OPTIONS "-munsafe-fp-atomics") set_source_files_properties(hipMathFunctions.cc PROPERTIES COMPILE_FLAGS "-Xclang -fallow-half-arguments-and-returns") - file(GLOB AtomicAdd_files *AtomicAdd_*_*.cc) - set_property(SOURCE ${AtomicAdd_files} PROPERTY COMPILE_FLAGS --save-temps) - file(GLOB unsafeAtomicAdd_files *unsafeAtomicAdd_*_*.cc) - set_property(SOURCE ${unsafeAtomicAdd_files} PROPERTY COMPILE_FLAGS --save-temps) -endif() - hip_add_exe_to_target(NAME UnitDeviceTests - TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests - LINKER_LIBS hiprtc) + endif() + hip_add_exe_to_target(NAME UnitDeviceTests + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS hiprtc) elseif(HIP_PLATFORM MATCHES "nvidia") - hip_add_exe_to_target(NAME UnitDeviceTests - TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests - COMPILE_OPTIONS --Wno-deprecated-declarations) + hip_add_exe_to_target(NAME UnitDeviceTests + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + COMPILE_OPTIONS --Wno-deprecated-declarations) endif() add_dependencies(build_tests kerDevAllocMultCO.code kerDevWriteMultCO.code kerDevFreeMultCO.code kerDevAllocSingleKer.code) diff --git a/catch/unit/deviceLib/atomicAdd.cc b/catch/unit/deviceLib/atomicAdd.cc new file mode 100644 index 000000000..ecc1f0338 --- /dev/null +++ b/catch/unit/deviceLib/atomicAdd.cc @@ -0,0 +1,66 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +template +static __global__ void atomicAdd_Kernel(Type* ptr, Type* old_res, Type inc_val) { + *old_res = atomicAdd(ptr, inc_val); +} + +TEMPLATE_TEST_CASE("Unit_AtomicAdd_Sanity", "", float, double) { + hipDeviceProp_t prop; + int device = 0; + HIP_CHECK(hipGetDevice(&device)); + HIP_CHECK(hipGetDeviceProperties(&prop, device)); + std::string gfxName(prop.gcnArchName); + if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { + if (prop.canMapHostMemory != 1) { + SUCCEED("Does not support HostPinned Memory"); + } else { + constexpr TestType init_val = 5; + TestType *h_ptr{nullptr}, *h_result{nullptr}; + TestType *d_ptr{nullptr}, *d_result{nullptr}; + HIP_CHECK( + hipHostMalloc(reinterpret_cast(&h_ptr), sizeof(TestType), hipHostMallocCoherent)); + *h_ptr = init_val; + HIP_CHECK(hipHostMalloc(reinterpret_cast(&h_result), sizeof(TestType), + hipHostMallocCoherent)); + *h_result = init_val; + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&d_ptr), h_ptr, 0)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&d_result), h_result, 0)); + constexpr TestType inc_val = 10; + + hipLaunchKernelGGL(atomicAdd_Kernel, dim3(1), dim3(1), 0, 0, d_ptr, d_result, + inc_val); + + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(*h_ptr == (init_val + inc_val)); + REQUIRE(*h_result == init_val); + HIP_CHECK(hipHostFree(h_ptr)); + HIP_CHECK(hipHostFree(h_result)); + } + } +} diff --git a/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withnounsafeflag.cc b/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withnounsafeflag.cc deleted file mode 100644 index 91b2fc433..000000000 --- a/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withnounsafeflag.cc +++ /dev/null @@ -1,101 +0,0 @@ -/* - Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ -/* -AtomicAdd on FineGrainMemory -1. The following test scenario verifies -unsafeatomicAdd on fineGrain memory with -mno-unsafe-fp-atomics flag -This testcase works only on gfx90a, gfx940, gfx941, gfx942. -*/ - -#include -#include -#include - -#define INC_VAL 10 -#define INITIAL_VAL 5 -template -static __global__ void AtomicCheck(T* Ad, T* result) { - T inc_val = 10; - *result = unsafeAtomicAdd(Ad, inc_val); -} - - -/*unsafeatomicAdd API for the fine grained memory variable - with -mno-unsafe-fp-atomics flag -Input: Ad{5}, INC_VAL{10} -Output: unsafeatomicAdd API would return 0 and the 0/P is 5 - Generate the assembly file and check whether - atomic add instruction is generated - or not */ - -TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_CoherentwithnoUnsafeflag", "", - float, double) { - hipDeviceProp_t prop; - int device; - HIP_CHECK(hipGetDevice(&device)); - HIP_CHECK(hipGetDeviceProperties(&prop, device)); - std::string gfxName(prop.gcnArchName); - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - if (prop.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - TestType *A_h{nullptr}, *result{nullptr}; - TestType *A_d{nullptr}, *result_d{nullptr}; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), - hipHostMallocCoherent)); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType), - hipHostMallocCoherent)); - result[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); - hipLaunchKernelGGL(AtomicCheck, dim3(1), dim3(1), - 0, 0, A_d, - result_d); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); - bool testResult; - - if ((std::is_same::value)) { - testResult = HipTest::assemblyFile_Verification( - "unsafeAtomicAdd_Coherent_withnounsafeflag-hip-amdgcn(.*)\\.s", - "global_atomic_add_f32"); - REQUIRE(testResult == true); - } else { - testResult = HipTest::assemblyFile_Verification( - "unsafeAtomicAdd_Coherent_withnounsafeflag-hip-amdgcn(.*)\\.s", - "global_atomic_add_f64"); - REQUIRE(testResult == true); - } - - REQUIRE(A_h[0] == INITIAL_VAL); - REQUIRE(result[0] == 0); - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipHostFree(result)); - } - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); - } -} diff --git a/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withoutflag.cc b/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withoutflag.cc deleted file mode 100644 index 9999c3934..000000000 --- a/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withoutflag.cc +++ /dev/null @@ -1,101 +0,0 @@ -/* - Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ -/* -AtomicAdd on FineGrainMemory -1. The following test scenario verifies -unsafeatomicAdd on fineGrain memory without atomics flag -This testcase works only on gfx90a, gfx940, gfx941, gfx942. -*/ - -#include -#include -#include - -#define INC_VAL 10 -#define INITIAL_VAL 5 -template -static __global__ void AtomicCheck(T* Ad, T* result) { - T inc_val = 10; - *result = unsafeAtomicAdd(Ad, inc_val); -} - - -/*unsafeatomicAdd API for the fine grained memory variable - without atomics flag -Input: Ad{5}, INC_VAL{10} -Output: unsafeatomicAdd API would return 0 and the 0/P is 5 - Generate the assembly file and check whether - atomic add instruction is generated - or not */ - -TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_Coherentwithoutflag", "", - float, double) { - hipDeviceProp_t prop; - int device; - HIP_CHECK(hipGetDevice(&device)); - HIP_CHECK(hipGetDeviceProperties(&prop, device)); - std::string gfxName(prop.gcnArchName); - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - if (prop.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - TestType *A_h{nullptr}, *result{nullptr}; - TestType *A_d{nullptr}, *result_d{nullptr}; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), - hipHostMallocCoherent)); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType), - hipHostMallocCoherent)); - result[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); - hipLaunchKernelGGL(AtomicCheck, dim3(1), dim3(1), - 0, 0, A_d, - result_d); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); - bool testResult; - - if ((std::is_same::value)) { - testResult = HipTest::assemblyFile_Verification( - "unsafeAtomicAdd_Coherent_withoutflag-hip-amdgcn(.*)\\.s", - "global_atomic_add_f32"); - REQUIRE(testResult == true); - } else { - testResult = HipTest::assemblyFile_Verification( - "unsafeAtomicAdd_Coherent_withoutflag-hip-amdgcn(.*)\\.s", - "global_atomic_add_f64"); - REQUIRE(testResult == true); - } - - REQUIRE(A_h[0] == INITIAL_VAL); - REQUIRE(result[0] == 0); - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipHostFree(result)); - } - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); - } -} diff --git a/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withunsafeflag.cc b/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withunsafeflag.cc deleted file mode 100644 index cbe614995..000000000 --- a/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withunsafeflag.cc +++ /dev/null @@ -1,102 +0,0 @@ -/* - Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ -/* -AtomicAdd on FineGrainMemory -1. The following test scenario verifies -unsafeatomicAdd on fineGrain memory with -munsafe-fp-atomics flag -This testcase works only on gfx90a. -*/ - -#include -#include -#include - -#define INC_VAL 10 -#define INITIAL_VAL 5 - -template -static __global__ void AtomicCheck(T* Ad, T* result) { - T inc_val = 10; - *result = unsafeAtomicAdd(Ad, inc_val); -} - - -/*unsafeatomicAdd API for the fine grained memory variable - with -m-unsafe-atomics flag -Input: Ad{5}, INC_VAL{10} -Output: atomicAdd API would return 0 and the 0/P is 5 - Generate the assembly file and check whether - atomic add instruction is generated - or not */ - -TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_CoherentwithUnsafeflag", "", - float, double) { - hipDeviceProp_t prop; - int device; - HIP_CHECK(hipGetDevice(&device)); - HIP_CHECK(hipGetDeviceProperties(&prop, device)); - std::string gfxName(prop.gcnArchName); - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - if (prop.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - TestType *A_h{nullptr}, *result{nullptr}; - TestType *A_d{nullptr}, *result_d{nullptr}; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), - hipHostMallocCoherent)); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType), - hipHostMallocCoherent)); - result[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); - hipLaunchKernelGGL(AtomicCheck, dim3(1), dim3(1), - 0, 0, A_d, - result_d); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); - bool testResult; - - if ((std::is_same::value)) { - testResult = HipTest::assemblyFile_Verification( - "unsafeAtomicAdd_Coherent_withunsafeflag-hip-amdgcn(.*)\\.s", - "global_atomic_add_f32"); - REQUIRE(testResult == true); - } else { - testResult = HipTest::assemblyFile_Verification( - "unsafeAtomicAdd_Coherent_withunsafeflag-hip-amdgcn(.*)\\.s", - "global_atomic_add_f64"); - REQUIRE(testResult == true); - } - - REQUIRE(A_h[0] == INITIAL_VAL); - REQUIRE(result[0] == 0); - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipHostFree(result)); - } - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); - } -} diff --git a/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withnounsafeflag.cc b/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withnounsafeflag.cc deleted file mode 100644 index c12b30d9f..000000000 --- a/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withnounsafeflag.cc +++ /dev/null @@ -1,99 +0,0 @@ -/* - Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ -/* -AtomicAdd on CoarseGrainMemory -1. The following test scenario verifies -unsafeAtomicAdd on CoarseGrain memory with -mno-unsafe-fp-atomics flag -This testcase works only on gfx90a, gfx940, gfx941, gfx942. -*/ - -#include -#include -#include - -#define INC_VAL 10 -#define INITIAL_VAL 5 -template -static __global__ void AtomicCheck(T* Ad, T* result) { - T inc_val = 10; - *result = unsafeAtomicAdd(Ad, inc_val); -} - -/*unsafeAtomicAdd API for the coarse grained memory variable - with -mno-unsafe-fp-atomics flag -Input: Ad{5}, INC_VAL{10} -Output: unsafeAtomicAdd API would work and the 0/P is INITIAL_VAL + INC_VAL - Generate the assembly file and check whether - global_atomic_add instruction is generated - or not */ - -TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentnounsafeatomicsflag", "", - float, double) { - hipDeviceProp_t prop; - int device; - HIP_CHECK(hipGetDevice(&device)); - HIP_CHECK(hipGetDeviceProperties(&prop, device)); - std::string gfxName(prop.gcnArchName); - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - if (prop.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - TestType *A_h{nullptr}, *result{nullptr}; - TestType *A_d{nullptr}, *result_d{nullptr}; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), - hipHostMallocNonCoherent)); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType), - hipHostMallocNonCoherent)); - result[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); - hipLaunchKernelGGL(AtomicCheck, - dim3(1), dim3(1), - 0, 0, A_d, - result_d); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); - bool testResult; - REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL); - REQUIRE(result[0] == INITIAL_VAL); - if ((std::is_same::value)) { - testResult = HipTest::assemblyFile_Verification( - "unsafeAtomicAdd_NonCoherent_withnounsafeflag-hip-amdgcn(.*)\\.s", - "global_atomic_add_f32"); - REQUIRE(testResult == true); - } else { - testResult = HipTest::assemblyFile_Verification( - "unsafeAtomicAdd_NonCoherent_withnounsafeflag-hip-amdgcn(.*)\\.s", - "global_atomic_add_f64"); - REQUIRE(testResult == true); - } - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipHostFree(result)); - } - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); - } -} diff --git a/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withoutflag.cc b/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withoutflag.cc deleted file mode 100644 index 21e071493..000000000 --- a/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withoutflag.cc +++ /dev/null @@ -1,99 +0,0 @@ -/* - Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ -/* -unsafeAtomicAdd on CoarseGrainMemory -1. The following test scenario verifies -unsafeAtomicAdd on CoarseGrain memory without any unsafeatomics flag -This testcase works only on gfx90a, gfx940, gfx941, gfx942. -*/ - -#include -#include -#include - -#define INC_VAL 10 -#define INITIAL_VAL 5 -template -static __global__ void AtomicCheck(T* Ad, T* result) { - T inc_val = 10; - *result = unsafeAtomicAdd(Ad, inc_val); -} - -/*unsafeAtomicAdd API for the coarse grained memory variable - without any flag -Input: Ad{5}, INC_VAL{10} -Output: unsafeAtomicAdd API would work and the 0/P is INITIAL_VAL + INC_VAL - Generate the assembly file and check whether - global_atomic_cmpswap instruction is generated - or not */ - -TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentwithoutflag", "", - float, double) { - hipDeviceProp_t prop; - int device; - HIP_CHECK(hipGetDevice(&device)); - HIP_CHECK(hipGetDeviceProperties(&prop, device)); - std::string gfxName(prop.gcnArchName); - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - if (prop.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - TestType *A_h{nullptr}, *result{nullptr}; - TestType *A_d{nullptr}, *result_d{nullptr}; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), - hipHostMallocNonCoherent)); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType), - hipHostMallocNonCoherent)); - result[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); - hipLaunchKernelGGL(AtomicCheck, - dim3(1), dim3(1), - 0, 0, A_d, - result_d); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); - bool testResult; - REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL); - REQUIRE(result[0] == INITIAL_VAL); - if ((std::is_same::value)) { - testResult = HipTest::assemblyFile_Verification( - "unsafeAtomicAdd_NonCoherent_withoutflag-hip-amdgcn(.*)\\.s", - "global_atomic_add_f32"); - REQUIRE(testResult == true); - } else { - testResult = HipTest::assemblyFile_Verification( - "unsafeAtomicAdd_NonCoherent_withoutflag-hip-amdgcn(.*)\\.s", - "global_atomic_add_f64"); - REQUIRE(testResult == true); - } - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipHostFree(result)); - } - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); - } -} diff --git a/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withunsafeflag.cc b/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withunsafeflag.cc deleted file mode 100644 index f8d6cf0b5..000000000 --- a/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withunsafeflag.cc +++ /dev/null @@ -1,99 +0,0 @@ -/* - Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ -/* -unsafeAtomicAdd on CoarseGrainMemory -1. The following test scenario verifies -unsafeAtomicAdd on CoarseGrain memory with unsafeatomics flag -This testcase works only on gfx90a, gfx940, gfx941, gfx942. -*/ - -#include -#include -#include - -#define INC_VAL 10 -#define INITIAL_VAL 5 -template -static __global__ void AtomicCheck(T* Ad, T* result) { - T inc_val = 10; - *result = unsafeAtomicAdd(Ad, inc_val); -} - -/*unsafeAtomicAdd API for the coarse grained memory variable - with -munsafe-fp-atomics flag -Input: Ad{5}, INC_VAL{10} -Output: unsafeAtomicAdd API would work and the 0/P is INITIAL_VAL + INC_VAL - Generate the assembly file and check whether - global_atomic_add instruction is generated - or not */ - -TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentwithunsafeatomicsflag", "", - float, double) { - hipDeviceProp_t prop; - int device; - HIP_CHECK(hipGetDevice(&device)); - HIP_CHECK(hipGetDeviceProperties(&prop, device)); - std::string gfxName(prop.gcnArchName); - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - if (prop.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - TestType *A_h{nullptr}, *result{nullptr}; - TestType *A_d{nullptr}, *result_d{nullptr}; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), - hipHostMallocNonCoherent)); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType), - hipHostMallocNonCoherent)); - result[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); - hipLaunchKernelGGL(AtomicCheck, - dim3(1), dim3(1), - 0, 0, A_d, - result_d); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipDeviceSynchronize()); - bool testResult; - REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL); - REQUIRE(result[0] == INITIAL_VAL); - if ((std::is_same::value)) { - testResult = HipTest::assemblyFile_Verification( - "unsafeAtomicAdd_NonCoherent_withunsafeflag-hip-amdgcn(.*)\\.s", - "global_atomic_add_f32"); - REQUIRE(testResult == true); - } else { - testResult = HipTest::assemblyFile_Verification( - "unsafeAtomicAdd_NonCoherent_withunsafeflag-hip-amdgcn(.*)\\.s", - "global_atomic_add_f64"); - REQUIRE(testResult == true); - } - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipHostFree(result)); - } - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); - } -} diff --git a/catch/unit/deviceLib/unsafeAtomicAdd_RTC.cc b/catch/unit/deviceLib/unsafeAtomicAdd_RTC.cc index 6a96dbe32..794e090fd 100644 --- a/catch/unit/deviceLib/unsafeAtomicAdd_RTC.cc +++ b/catch/unit/deviceLib/unsafeAtomicAdd_RTC.cc @@ -1,22 +1,22 @@ /* - Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. + Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ /* @@ -29,15 +29,15 @@ unsafeAtomicAdd Scenarios with hipRTC: 6. CoarseGrainMemory without -munsafe-fp-atomics flag */ -#include -#include -#include +#include +#include +#include #include #define INCREMENT_VAL 10 #define INITIAL_VAL 5 static constexpr auto fkernel{ -R"( + R"( extern "C" __global__ void AtomicCheck(float* Ad, float *result) { *result = unsafeAtomicAdd(Ad, 10); @@ -45,306 +45,36 @@ __global__ void AtomicCheck(float* Ad, float *result) { )"}; static constexpr auto dkernel{ -R"( + R"( extern "C" __global__ void AtomicCheck(double* Ad, double *result) { *result = unsafeAtomicAdd(Ad, 10); } )"}; -/* - Test unsafeAtomicAdd API for the fine grained memory variable - where kernel is compiled using hipRTC and with - compilation flag -mno-unsafe-fp-atomics. - Input: Ad{5}, INCREMENT_VAL{10} - Output: unsafeAtomicAdd API will not work and returns 0 so - the initial value will be intact. expected O/P is 5 -*/ -TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_CoherentRTCnounsafeatomicflag", "", - float, double) { - int device = 0; - hipDeviceProp_t props; - HIP_CHECK(hipGetDeviceProperties(&props, device)); - std::string gfxName(props.gcnArchName); - - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - hiprtcProgram prog; - if (std::is_same::value) { - hiprtcCreateProgram(&prog, // prog - fkernel, // buffer - "kernel.cu", // name - 0, nullptr, nullptr); - } else { - hiprtcCreateProgram(&prog, // prog - dkernel, // buffer - "kernel.cu", // name - 0, nullptr, nullptr); - } - std::string sarg = std::string("--gpu-architecture=") + props.gcnArchName; - const char* options[] = {sarg.c_str(), "-mno-unsafe-fp-atomics"}; - hiprtcResult compileResult{hiprtcCompileProgram(prog, 2, options)}; - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - INFO(log); - } - - REQUIRE(compileResult == HIPRTC_SUCCESS); - size_t codeSize; - HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); - - std::vector code(codeSize); - HIPRTC_CHECK(hiprtcGetCode(prog, code.data())); - HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); - - hipModule_t module; - hipFunction_t f_kernel; - HIP_CHECK(hipModuleLoadData(&module, code.data())); - HIP_CHECK(hipModuleGetFunction(&f_kernel, module, "AtomicCheck")); - if (props.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - TestType *A_h, *result; - TestType *A_d, *result_d; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), - hipHostMallocCoherent)); - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType), - hipHostMallocCoherent)); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); - struct { - TestType* p; - TestType* result; - } args_f{A_d, result_d}; - auto size = sizeof(args_f); - void* config_d[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args_f, - HIP_LAUNCH_PARAM_BUFFER_SIZE, - &size, HIP_LAUNCH_PARAM_END}; - HIP_CHECK(hipModuleLaunchKernel(f_kernel, 1, 1, 1, 1, 1, 1, 0, - nullptr, nullptr, config_d)); - HIP_CHECK(hipDeviceSynchronize()); - REQUIRE(A_h[0] == INITIAL_VAL); - REQUIRE(*result == 0); - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipHostFree(result)); - } - HIP_CHECK(hipModuleUnload(module)); - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); - } -} - - -/* - Test unsafeAtomicAdd API for the fine grained memory variable - where kernel is compiled using hipRTC and with - compilation flag -munsafe-fp-atomics. - Input: Ad{5}, INCREMENT_VAL{10} - Output: unsafeAtomicAdd API will not work and r`eturns 0 so - the initial value will be intact. expected O/P is 5 -*/ -TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_CoherentRTCunsafeatomicflag", "", - float, double) { - int device = 0; - hipDeviceProp_t props; - HIP_CHECK(hipGetDeviceProperties(&props, device)); - std::string gfxName(props.gcnArchName); - - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - hiprtcProgram prog; - if (std::is_same::value) { - hiprtcCreateProgram(&prog, // prog - fkernel, // buffer - "kernel.cu", // name - 0, nullptr, nullptr); - } else { - hiprtcCreateProgram(&prog, // prog - dkernel, // buffer - "kernel.cu", // name - 0, nullptr, nullptr); - } - std::string sarg = std::string("--gpu-architecture=") + props.gcnArchName; - const char* options[] = {sarg.c_str(), "-munsafe-fp-atomics"}; - hiprtcResult compileResult{hiprtcCompileProgram(prog, 2, options)}; - - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - INFO(log); - } - - REQUIRE(compileResult == HIPRTC_SUCCESS); - size_t codeSize; - HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); - - std::vector code(codeSize); - HIPRTC_CHECK(hiprtcGetCode(prog, code.data())); - HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); - - hipModule_t module; - hipFunction_t f_kernel; - HIP_CHECK(hipModuleLoadData(&module, code.data())); - HIP_CHECK(hipModuleGetFunction(&f_kernel, module, "AtomicCheck")); - - if (props.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - TestType *A_h, *result; - TestType *A_d, *result_d; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), - hipHostMallocCoherent)); - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType), - hipHostMallocCoherent)); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); - struct { - TestType* p; - TestType* result; - } args_f{A_d, result_d}; - auto size = sizeof(args_f); - void* config_d[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args_f, - HIP_LAUNCH_PARAM_BUFFER_SIZE, - &size, HIP_LAUNCH_PARAM_END}; - HIP_CHECK(hipModuleLaunchKernel(f_kernel, 1, 1, 1, 1, 1, 1, 0, - nullptr, nullptr, config_d)); - HIP_CHECK(hipDeviceSynchronize()); - REQUIRE(A_h[0] == INITIAL_VAL); - REQUIRE(*result == 0); - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipHostFree(result)); - } - HIP_CHECK(hipModuleUnload(module)); - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); - } -} - -/* Test unsafeAtomicAdd API for the fine grained memory variable - where kernel is compiled using hipRTC and without compilation flag - Input: Ad{5}, INCREMENT_VAL{10} - Output: unsafeAtomicAdd API will not work and returns 0 so - the initial value will be intact. expected O/P is 5*/ - -TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_CoherentRTCwithoutflag", "", - float, double) { - int device = 0; - hipDeviceProp_t props; - HIP_CHECK(hipGetDeviceProperties(&props, device)); - std::string gfxName(props.gcnArchName); - - if(CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - hiprtcProgram prog; - if (std::is_same::value) { - hiprtcCreateProgram(&prog, // prog - fkernel, // buffer - "kernel.cu", // name - 0, nullptr, nullptr); - } else { - hiprtcCreateProgram(&prog, // prog - dkernel, // buffer - "kernel.cu", // name - 0, nullptr, nullptr); - } - std::string sarg = std::string("--gpu-architecture=") + props.gcnArchName; - const char* options[] = {sarg.c_str()}; - hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, options)}; - - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - INFO(log); - } - - REQUIRE(compileResult == HIPRTC_SUCCESS); - size_t codeSize; - HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); - - std::vector code(codeSize); - HIPRTC_CHECK(hiprtcGetCode(prog, code.data())); - HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); - - hipModule_t module; - hipFunction_t f_kernel; - HIP_CHECK(hipModuleLoadData(&module, code.data())); - HIP_CHECK(hipModuleGetFunction(&f_kernel, module, "AtomicCheck")); - - if (props.canMapHostMemory != 1) { - SUCCEED("Does not support HostPinned Memory"); - } else { - TestType *A_h, *result; - TestType *A_d, *result_d; - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(float), - hipHostMallocCoherent)); - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), sizeof(float), - hipHostMallocCoherent)); - A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); - struct { - TestType* p; - TestType* result; - } args_f{A_d, result_d}; - auto size = sizeof(args_f); - void* config_d[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args_f, - HIP_LAUNCH_PARAM_BUFFER_SIZE, - &size, HIP_LAUNCH_PARAM_END}; - HIP_CHECK(hipModuleLaunchKernel(f_kernel, 1, 1, 1, 1, 1, - 1, 0, nullptr, nullptr, config_d)); - HIP_CHECK(hipDeviceSynchronize()); - REQUIRE(A_h[0] == INITIAL_VAL); - REQUIRE(*result == 0); - HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipHostFree(result)); - } - HIP_CHECK(hipModuleUnload(module)); - } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); - } -} - /* Test unsafeAtomicAdd API for the coarse grained memory variable where kernel is compiled using hipRTC and with compilation flag -mno-unsafe-fp-atomics Input: Ad{5}, INCREMENT_VAL{10} Output: Expected O/P is 15 */ -TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentRTCnounsafeatomicflag", "", - float, double) { +TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentRTCnounsafeatomicflag", "", float, double) { int device = 0; hipDeviceProp_t props; HIP_CHECK(hipGetDeviceProperties(&props, device)); std::string gfxName(props.gcnArchName); if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - hiprtcProgram prog; + hiprtcProgram prog; if (std::is_same::value) { - hiprtcCreateProgram(&prog, // prog - fkernel, // buffer - "kernel.cu", // name - 0, nullptr, nullptr); + hiprtcCreateProgram(&prog, // prog + fkernel, // buffer + "kernel.cu", // name + 0, nullptr, nullptr); } else { hiprtcCreateProgram(&prog, // prog - dkernel, // buffer - "kernel.cu", // name - 0, nullptr, nullptr); + dkernel, // buffer + "kernel.cu", // name + 0, nullptr, nullptr); } std::string sarg = std::string("--gpu-architecture=") + props.gcnArchName; const char* options[] = {sarg.c_str(), "-mno-unsafe-fp-atomics"}; @@ -377,18 +107,16 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentRTCnounsafeatomicflag", "", TestType *A_d, *result_d; HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), hipHostMallocNonCoherent)); - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType))); + HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), sizeof(TestType))); A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), A_h, 0)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), result, 0)); struct { TestType* p; TestType* result; } args_f{A_d, result_d}; auto size = sizeof(args_f); + void* config_d[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args_f, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; @@ -402,8 +130,10 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentRTCnounsafeatomicflag", "", } HIP_CHECK(hipModuleUnload(module)); } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); + SUCCEED( + "Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" + "skipping the testcase for this GPU " + << device); } } @@ -413,25 +143,24 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentRTCnounsafeatomicflag", "", Input: Ad{5}, INCREMENT_VAL{10} Output: Expected O/P is 15 */ -TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentRTCunsafeatomicflag", "", - float, double) { +TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentRTCunsafeatomicflag", "", float, double) { int device = 0; hipDeviceProp_t props; HIP_CHECK(hipGetDeviceProperties(&props, device)); std::string gfxName(props.gcnArchName); - if(CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { - hiprtcProgram prog; + if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { + hiprtcProgram prog; if (std::is_same::value) { - hiprtcCreateProgram(&prog, // prog - fkernel, // buffer - "kernel.cu", // name - 0, nullptr, nullptr); + hiprtcCreateProgram(&prog, // prog + fkernel, // buffer + "kernel.cu", // name + 0, nullptr, nullptr); } else { - hiprtcCreateProgram(&prog, // prog - dkernel, // buffer - "kernel.cu", // name - 0, nullptr, nullptr); + hiprtcCreateProgram(&prog, // prog + dkernel, // buffer + "kernel.cu", // name + 0, nullptr, nullptr); } std::string sarg = std::string("--gpu-architecture=") + props.gcnArchName; const char* options[] = {sarg.c_str(), "-munsafe-fp-atomics"}; @@ -465,18 +194,16 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentRTCunsafeatomicflag", "", TestType *A_d, *result_d; HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), hipHostMallocNonCoherent)); - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType))); + HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), sizeof(TestType))); A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), A_h, 0)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), result, 0)); struct { TestType* p; TestType* result; } args_f{A_d, result_d}; auto size = sizeof(args_f); + void* config_d[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args_f, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; @@ -490,8 +217,10 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentRTCunsafeatomicflag", "", } HIP_CHECK(hipModuleUnload(module)); } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); + SUCCEED( + "Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" + "skipping the testcase for this GPU " + << device); } } @@ -501,8 +230,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentRTCunsafeatomicflag", "", Input: Ad{5}, INCREMENT_VAL{10} Output: O/P is 15 */ -TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentRTC", "", - float, double) { +TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentRTC", "", float, double) { int device = 0; hipDeviceProp_t props; HIP_CHECK(hipGetDeviceProperties(&props, device)); @@ -511,15 +239,15 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentRTC", "", if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { hiprtcProgram prog; if (std::is_same::value) { - hiprtcCreateProgram(&prog, // prog - fkernel, // buffer - "kernel.cu", // name - 0, nullptr, nullptr); + hiprtcCreateProgram(&prog, // prog + fkernel, // buffer + "kernel.cu", // name + 0, nullptr, nullptr); } else { - hiprtcCreateProgram(&prog, // prog - dkernel, // buffer - "kernel.cu", // name - 0, nullptr, nullptr); + hiprtcCreateProgram(&prog, // prog + dkernel, // buffer + "kernel.cu", // name + 0, nullptr, nullptr); } std::string sarg = std::string("--gpu-architecture=") + props.gcnArchName; @@ -553,21 +281,18 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentRTC", "", TestType *A_d, *result_d; HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), sizeof(TestType), hipHostMallocNonCoherent)); - HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), - sizeof(TestType))); + HIP_CHECK(hipHostMalloc(reinterpret_cast(&result), sizeof(TestType))); A_h[0] = INITIAL_VAL; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), - A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), - result, 0)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), A_h, 0)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&result_d), result, 0)); struct { TestType* p; TestType* result; } args_f{A_d, result_d}; auto size = sizeof(args_f); - void* config_d[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args_f, - HIP_LAUNCH_PARAM_BUFFER_SIZE, + void* config_d[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args_f, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; + HIP_CHECK(hipModuleLaunchKernel(f_kernel, 1, 1, 1, 1, 1, 1, 0, nullptr, nullptr, config_d)); HIP_CHECK(hipDeviceSynchronize()); @@ -578,7 +303,9 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentRTC", "", } HIP_CHECK(hipModuleUnload(module)); } else { - SUCCEED("Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" - "skipping the testcase for this GPU " << device); + SUCCEED( + "Memory model feature is only supported for gfx90a, gfx940, gfx941, gfx942, Hence" + "skipping the testcase for this GPU " + << device); } } diff --git a/catch/unit/deviceLib/unsafeatomicAdd_withFlag.cc b/catch/unit/deviceLib/unsafeatomicAdd_withFlag.cc new file mode 100644 index 000000000..e39709f04 --- /dev/null +++ b/catch/unit/deviceLib/unsafeatomicAdd_withFlag.cc @@ -0,0 +1,66 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +template +static __global__ void unsafeAtomicAdd_Kernel(Type* ptr, Type* old_res, Type inc_val) { + *old_res = unsafeAtomicAdd(ptr, inc_val); +} + + +TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_Sanity", "", float, double) { + hipDeviceProp_t prop; + int device = 0; + HIP_CHECK(hipGetDevice(&device)); + HIP_CHECK(hipGetDeviceProperties(&prop, device)); + std::string gfxName(prop.gcnArchName); + if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, gfxName)) { + if (prop.canMapHostMemory != 1) { + SUCCEED("Does not support HostPinned Memory"); + } else { + constexpr TestType init_val = 5; + TestType *h_ptr{nullptr}, *h_result{nullptr}; + TestType *d_ptr{nullptr}, *d_result{nullptr}; + HIP_CHECK(hipHostMalloc(reinterpret_cast(&h_ptr), sizeof(TestType), + hipHostMallocNonCoherent)); + *h_ptr = init_val; + HIP_CHECK(hipHostMalloc(reinterpret_cast(&h_result), sizeof(TestType), + hipHostMallocNonCoherent)); + *h_result = init_val; + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&d_ptr), h_ptr, 0)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&d_result), h_result, 0)); + constexpr TestType inc_val = 10; + hipLaunchKernelGGL(unsafeAtomicAdd_Kernel, dim3(1), dim3(1), 0, 0, d_ptr, d_result, + inc_val); + + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(*h_ptr == (init_val + inc_val)); + REQUIRE(*h_result == init_val); + HIP_CHECK(hipHostFree(h_ptr)); + HIP_CHECK(hipHostFree(h_result)); + } + } +}