diff --git a/catch/unit/hip_specific/CMakeLists.txt b/catch/unit/hip_specific/CMakeLists.txt index 27de54f20..1ad4161b0 100644 --- a/catch/unit/hip_specific/CMakeLists.txt +++ b/catch/unit/hip_specific/CMakeLists.txt @@ -22,6 +22,7 @@ set(TEST_SRC hip_hc_8pk.cc + hip_check_VGPRs.cpp ) hip_add_exe_to_target(NAME HipSpecificTest TEST_SRC ${TEST_SRC} diff --git a/catch/unit/hip_specific/hip_check_VGPRs.cpp b/catch/unit/hip_specific/hip_check_VGPRs.cpp new file mode 100644 index 000000000..add00b7ab --- /dev/null +++ b/catch/unit/hip_specific/hip_check_VGPRs.cpp @@ -0,0 +1,102 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include + +#define NUM_THREADS 128 // Per block +#define NUM_BLOCKS 1 + +typedef int v1024i __attribute__((ext_vector_type(1024))); + +__launch_bounds__(NUM_THREADS, NUM_BLOCKS) +__global__ void test1024(v1024i *in, v1024i *out) { + out[threadIdx.x] = in[threadIdx.x]; +} + +void test_vgprs_value() { + size_t n = NUM_BLOCKS * NUM_THREADS; + size_t bufferSize = n * sizeof(v1024i); + + v1024i *dX, *dY; + HIP_CHECK(hipMalloc(&dX, bufferSize)); + HIP_CHECK(hipMalloc(&dY, bufferSize)); + + std::unique_ptr hX{new v1024i[n]}; + std::unique_ptr hY{new v1024i[n]}; + const int sizeofv1024i = sizeof(v1024i)/sizeof(int); + for (int i = 0; i < n; ++i) { + for (int j = 0; j < sizeofv1024i; j++) { + hX[i][j] = (i + 1) * (j + 1); + hY[i][j] = 0; + } + } + HIP_CHECK(hipMemcpy(dX, hX.get(), bufferSize, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(dY, hY.get(), bufferSize, hipMemcpyHostToDevice)); + hipLaunchKernelGGL(test1024, NUM_BLOCKS, NUM_THREADS, 0, 0, dX, dY); + HIP_CHECK(hipMemcpy(hY.get(), dY, bufferSize, hipMemcpyDeviceToHost)); + + for (int i = 0; i < n; ++i) { + for (int j = 0; j < sizeofv1024i; j++) { + if (hY[i][j] != (i + 1) * (j + 1)) { + std::cout << "Failed on " + << i << ", " << j << ":" << hX[i][j] << " with: " << (i + 1) * (j + 1) << std::endl; + REQUIRE(false); + } + } + } + HIP_CHECK(hipFree(dX)); + HIP_CHECK(hipFree(dY)); + REQUIRE(true); +} + +TEST_CASE("Unit_Device__hip_check_VGPRs") { + hipDeviceProp_t props; + hipFuncAttributes attr; + int maxAvailableVgprsPerThread = 0; + constexpr int device = 0; + HIP_CHECK(hipSetDevice(device)); + HIP_CHECK(hipGetDeviceProperties(&props, device)); + HIP_CHECK(hipDeviceGetAttribute(&maxAvailableVgprsPerThread, + hipDeviceAttributeMaxAvailableVgprsPerThread, device)); + if (maxAvailableVgprsPerThread > 1024) { + // The test should work on all current devices as of writing. + HipTest::HIP_SKIP_TEST("maxAvailableVgprsPerThread > 1024 isn't supported in this test!"); + } + HIP_CHECK(hipFuncGetAttributes(&attr, reinterpret_cast(test1024))); + std::cout << "Info: running on device #" << device << " " << props.name << ": arch = " + << props.gcnArchName << ", major = " << props.major << ", minor = " << props.minor + << ", warpSize = " << props.warpSize << ", numRegs of test1024() = " + << attr.numRegs << " DWORDs, MaxAvailableVgprsPerThread = " << maxAvailableVgprsPerThread + << " DWORDs\n"; + const int usedVGPRs_ = attr.numRegs; // Used VGPRs in DWORDS. + const int extraOffset = 20; // Empirical offset due to extra VGPRs consumed. + // Verify VGPRs usage + if (maxAvailableVgprsPerThread < usedVGPRs_ || + usedVGPRs_ < (maxAvailableVgprsPerThread - extraOffset)) { + REQUIRE(false); + } + // Verify VGPRs data + test_vgprs_value(); +} diff --git a/samples/1_Utils/hipInfo/hipInfo.cpp b/samples/1_Utils/hipInfo/hipInfo.cpp index 6704a68d9..f49bde46b 100644 --- a/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/samples/1_Utils/hipInfo/hipInfo.cpp @@ -134,6 +134,11 @@ void printDeviceProp(int deviceId) { cout << setw(w1) << "arch.has3dGrid: " << props.arch.has3dGrid << endl; cout << setw(w1) << "arch.hasDynamicParallelism: " << props.arch.hasDynamicParallelism << endl; cout << setw(w1) << "gcnArchName: " << props.gcnArchName << endl; + int maxAvailableVgprsPerThread = 0; + checkHipErrors(hipDeviceGetAttribute(&maxAvailableVgprsPerThread, + hipDeviceAttributeMaxAvailableVgprsPerThread, deviceId)); + cout << setw(w1) << "maxAvailableVgprsPerThread: " + << maxAvailableVgprsPerThread << " DWORDs" << endl; #endif int deviceCnt; checkHipErrors(hipGetDeviceCount(&deviceCnt));