From 157081bb5e4a6a99b33ccc1dfb2e7af743778222 Mon Sep 17 00:00:00 2001 From: Bincy Mani Date: Mon, 22 Jun 2026 13:49:25 -0400 Subject: [PATCH 1/2] [rocrtst] AILIROCR-95 expanding the Memory_Alignment_Test The current rocrtstFunc.Memory_Alignment_Test only verifies whether the HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT queries return a value that is a power-of-two. Expanding the current Test; so that it also does memory Allocations for each memory pool,and makes sure that the pointer returned is valid. AILIROCR-95 --- .../suites/functional/memory_alignment.cc | 170 +++++++++++------- .../suites/functional/memory_alignment.h | 5 +- 2 files changed, 104 insertions(+), 71 deletions(-) diff --git a/projects/rocr-runtime/rocrtst/suites/functional/memory_alignment.cc b/projects/rocr-runtime/rocrtst/suites/functional/memory_alignment.cc index 432d3e37ae4..a6ad7d8b89c 100755 --- a/projects/rocr-runtime/rocrtst/suites/functional/memory_alignment.cc +++ b/projects/rocr-runtime/rocrtst/suites/functional/memory_alignment.cc @@ -64,28 +64,84 @@ static const uint32_t kNumThreads = 4096; typedef struct control_block { hsa_amd_memory_pool_t* pool; + MemoryAlignmentTest* test_obj; } cb_t; -// Callback function which will call upon when need -// to allocate memory from the pool in the thread. -static void CallbackVerifyPoolAlignmendFunc(void *data) { +// Function to test memory allocations and verify pointer alignment +// Performs all pool checks and allocations +void MemoryAlignmentTest::TestPoolAllocationsAlignment(hsa_amd_memory_pool_t pool) { hsa_status_t err; - cb_t *cb = reinterpret_cast(data); - rocrtst::pool_info_t info; - memset(&info, 0, sizeof(rocrtst::pool_info_t)); - err = rocrtst::AcquirePoolInfo(*(cb->pool), &info); + // Get pool information + rocrtst::pool_info_t pool_i; + err = rocrtst::AcquirePoolInfo(pool, &pool_i); ASSERT_EQ(err, HSA_STATUS_SUCCESS); - if (info.alloc_allowed) { - // Get the allocated alignment size - size_t alignment_size = info.alloc_alignment; - EXPECT_TRUE(alignment_size); - // Verifies the alignment attribute is a power of 2 - if (info.size != 0) { - EXPECT_TRUE((alignment_size&&(!(alignment_size&(alignment_size-1))))); + // Only proceed if allocation is allowed + if (!pool_i.alloc_allowed) { + return; + } + + // Get the allocated alignment size + size_t alignment_size = pool_i.alloc_alignment; + EXPECT_TRUE(alignment_size); + + // Verifies the alignment attribute is a power of 2 + if (pool_i.size != 0) { + EXPECT_TRUE((alignment_size&&(!(alignment_size&(alignment_size-1))))); + } + + // Test with multiple allocation sizes + const size_t test_sizes[] = {64, 256, 1024, 4096, 65536}; + const int num_test_sizes = sizeof(test_sizes) / sizeof(test_sizes[0]); + + for (int i = 0; i < num_test_sizes; ++i) { + void* ptr = nullptr; + size_t alloc_size = test_sizes[i]; + + // Only attempt allocation if the pool has enough space + if (pool_i.size > 0 && alloc_size > pool_i.size) { + continue; + } + + err = hsa_amd_memory_pool_allocate(pool, alloc_size, 0, &ptr); + if (err == HSA_STATUS_SUCCESS) { + uintptr_t addr = reinterpret_cast(ptr); + + if (verbosity() > 1) { + std::cout << " Allocated " << alloc_size << " bytes at 0x" + << std::hex << addr << std::dec; + } + + // Verify the returned pointer is aligned + EXPECT_EQ(addr % alignment_size, 0) + << "Allocation of size " << alloc_size + << " returned pointer 0x" << std::hex << addr << std::dec + << " which is not aligned to " << alignment_size; + + if (verbosity() > 1) { + if (addr % alignment_size == 0) { + std::cout << " - correctly aligned" << std::endl; + } else { + std::cout << " - MISALIGNED!" << std::endl; + } + } + + // Free the allocated memory + err = hsa_amd_memory_pool_free(ptr); + EXPECT_EQ(err, HSA_STATUS_SUCCESS); } } +} + +// Callback function which will call upon when need +// to allocate memory from the pool in the thread. +static void CallbackVerifyPoolAlignmendFunc(void *data) { + cb_t *cb = reinterpret_cast(data); + + // Call the member function through the test object pointer + // The member function does all pool checks and allocations internally + cb->test_obj->TestPoolAllocationsAlignment(*(cb->pool)); return; } @@ -99,8 +155,9 @@ MemoryAlignmentTest::MemoryAlignmentTest(void) : set_title("RocR Memory Alignment Test"); set_description(" This test verifies that each memory pool of the agent that" " has HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED alloc memory, It is " - " aligned as specified by the HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT" - " and has the alignment attribute is a power of 2."); + " aligned as specified by the HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT," + " the alignment attribute is a power of 2, and that actual memory allocations" + " return pointers aligned to the specified alignment value."); } MemoryAlignmentTest::~MemoryAlignmentTest(void) { @@ -194,56 +251,6 @@ static void PrintAgentNameAndType(hsa_agent_t agent) { -void MemoryAlignmentTest::MemoryPoolAlignment(hsa_agent_t agent, - hsa_amd_memory_pool_t pool) { - hsa_status_t err; - - rocrtst::pool_info_t pool_i; - err = rocrtst::AcquirePoolInfo(pool, &pool_i); - ASSERT_EQ(HSA_STATUS_SUCCESS, err); - - if (verbosity() > 0) { - PrintAgentNameAndType(agent); - } - - if (pool_i.alloc_allowed) { - // Get the allocated alignment size - size_t alignment_size = pool_i.alloc_alignment; - EXPECT_TRUE(alignment_size); - // Verifies the alignment attribute is a power of 2 - if (pool_i.size != 0) { - EXPECT_TRUE((alignment_size&&(!(alignment_size&(alignment_size-1))))); - } - - // verifies that alignment attribute is a power of 2 in different threads - rocrtst::test_group* tg_concurrent = rocrtst::TestGroupCreate(kNumThreads); - // The control blocks are used to pass data to the threads - uint32_t kk; - cb_t cb[kNumThreads]; - for (kk = 0; kk < kNumThreads; kk++) { - cb[kk].pool = &pool; - rocrtst::TestGroupAdd(tg_concurrent, &CallbackVerifyPoolAlignmendFunc, &cb[kk], 1); - } - - // Create threads for each test - rocrtst::TestGroupThreadCreate(tg_concurrent); - - // Start to run tests - rocrtst::TestGroupStart(tg_concurrent); - - // Wait all tests finish - rocrtst::TestGroupWait(tg_concurrent); - - // Exit all tests - rocrtst::TestGroupExit(tg_concurrent); - - // Destroy thread group and cleanup resources - rocrtst::TestGroupDestroy(tg_concurrent); - } - return; -} - - void MemoryAlignmentTest::MemoryPoolAlignment(void) { hsa_status_t err; std::vector> agent_pools; @@ -260,8 +267,37 @@ void MemoryAlignmentTest::MemoryPoolAlignment(void) { for (auto p : a->pools) { if (verbosity() > 0) { std::cout << " Pool " << pool_idx++ << ":" << std::endl; + PrintAgentNameAndType(a->agent); } - MemoryPoolAlignment(a->agent, p); + + // Test actual memory allocations in the main thread + TestPoolAllocationsAlignment(p); + + // Verify alignment in different threads (concurrent stress test) + rocrtst::test_group* tg_concurrent = rocrtst::TestGroupCreate(kNumThreads); + // The control blocks are used to pass data to the threads + uint32_t kk; + cb_t cb[kNumThreads]; + for (kk = 0; kk < kNumThreads; kk++) { + cb[kk].pool = &p; + cb[kk].test_obj = this; + rocrtst::TestGroupAdd(tg_concurrent, &CallbackVerifyPoolAlignmendFunc, &cb[kk], 1); + } + + // Create threads for each test + rocrtst::TestGroupThreadCreate(tg_concurrent); + + // Start to run tests + rocrtst::TestGroupStart(tg_concurrent); + + // Wait all tests finish + rocrtst::TestGroupWait(tg_concurrent); + + // Exit all tests + rocrtst::TestGroupExit(tg_concurrent); + + // Destroy thread group and cleanup resources + rocrtst::TestGroupDestroy(tg_concurrent); } } diff --git a/projects/rocr-runtime/rocrtst/suites/functional/memory_alignment.h b/projects/rocr-runtime/rocrtst/suites/functional/memory_alignment.h index d5dad6d2b07..1ea2ba492be 100755 --- a/projects/rocr-runtime/rocrtst/suites/functional/memory_alignment.h +++ b/projects/rocr-runtime/rocrtst/suites/functional/memory_alignment.h @@ -75,10 +75,7 @@ class MemoryAlignmentTest : public TestBase { void MemoryPoolAlignment(void); - - private: - void MemoryPoolAlignment(hsa_agent_t agent, - hsa_amd_memory_pool_t pool); + void TestPoolAllocationsAlignment(hsa_amd_memory_pool_t pool); }; #endif // ROCRTST_SUITES_FUNCTIONAL_MEMORY_ALIGNMENT_H_ From 041a409b84667ad1a46cfff7ab3d8e467659cd0e Mon Sep 17 00:00:00 2001 From: Bincy Mani Date: Wed, 24 Jun 2026 10:36:52 -0400 Subject: [PATCH 2/2] [RoCR]ROCM-21691 Update InterruptSignal hsa_signal_cas_* implementation Currently the implementations in InterruptSignal (e.g. InterruptSignal::CasAcqRel) call SetEvent() unconditionally resulting in spurious wakes for any waiting agent in cases where the signal does not have the expected value we check to see if the observed value is the same as the expected value and only then SetEvent() is invoked. ROCM_-21691 --- .../core/runtime/interrupt_signal.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/interrupt_signal.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/interrupt_signal.cpp index f35b98edc01..ce9350a907a 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/interrupt_signal.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/interrupt_signal.cpp @@ -340,7 +340,9 @@ hsa_signal_value_t InterruptSignal::CasRelaxed(hsa_signal_value_t expected, hsa_signal_value_t ret = hsa_signal_value_t( atomic::Cas(&signal_.value, int64_t(value), int64_t(expected), std::memory_order_relaxed)); - SetEvent(); + // Only wake waiters if CAS succeeded (signal value actually changed) + if (ret == expected) + SetEvent(); return ret; } @@ -349,7 +351,9 @@ hsa_signal_value_t InterruptSignal::CasAcquire(hsa_signal_value_t expected, hsa_signal_value_t ret = hsa_signal_value_t( atomic::Cas(&signal_.value, int64_t(value), int64_t(expected), std::memory_order_acquire)); - SetEvent(); + // Only wake waiters if CAS succeeded (signal value actually changed) + if (ret == expected) + SetEvent(); return ret; } @@ -358,7 +362,9 @@ hsa_signal_value_t InterruptSignal::CasRelease(hsa_signal_value_t expected, hsa_signal_value_t ret = hsa_signal_value_t( atomic::Cas(&signal_.value, int64_t(value), int64_t(expected), std::memory_order_release)); - SetEvent(); + // Only wake waiters if CAS succeeded (signal value actually changed) + if (ret == expected) + SetEvent(); return ret; } @@ -367,7 +373,9 @@ hsa_signal_value_t InterruptSignal::CasAcqRel(hsa_signal_value_t expected, hsa_signal_value_t ret = hsa_signal_value_t( atomic::Cas(&signal_.value, int64_t(value), int64_t(expected), std::memory_order_acq_rel)); - SetEvent(); + // Only wake waiters if CAS succeeded (signal value actually changed) + if (ret == expected) + SetEvent(); return ret; } /// @brief Notify driver of signal value change if necessary.