Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,8 @@ class AMDGPULowerModuleLDS {
? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
: EmptySet;

const size_t HybridModuleRootKernelsSize = HybridModuleRootKernels.size();

for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
// Each iteration of this loop assigns exactly one global variable to
// exactly one of the implementation strategies.
Expand Down Expand Up @@ -647,7 +649,8 @@ class AMDGPULowerModuleLDS {
ModuleScopeVariables.insert(GV);
} else if (K.second.size() == 1) {
KernelAccessVariables.insert(GV);
} else if (set_is_subset(K.second, HybridModuleRootKernels)) {
} else if (K.second.size() == HybridModuleRootKernelsSize &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't the same as ?

Suggested change
} else if (K.second.size() == HybridModuleRootKernelsSize &&
} else if (K.second == HybridModuleRootKernels) {

Copy link
Collaborator Author

@JonChesterfield JonChesterfield Oct 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's the semantic intent certainly, it's looking for equality between the sets. I'll check what DenseSet::operator== does

Yep, it's defined in the header as the code you'd expect, will drop the size variable and use operator== here. Thanks!

set_is_subset(K.second, HybridModuleRootKernels)) {
ModuleScopeVariables.insert(GV);
} else {
TableLookupVariables.insert(GV);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unfortunate - the precommit test has strategy module written at the top and shouldn't do, but it doesn't seem worth rerunning CI to fractionally simplify this diff


; Regression test for issue 160181
; One variable is chosen to be assigned at zero. Here, that's @both
Expand All @@ -22,12 +22,20 @@
;.
; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t poison, align 4, !absolute_symbol [[META0:![0-9]+]]
; CHECK: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"
; CHECK: @llvm.amdgcn.kernel.kern_one.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kern_one.lds.t poison, align 4, !absolute_symbol [[META1:![0-9]+]]
; CHECK: @llvm.amdgcn.kernel.kern_two.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kern_two.lds.t poison, align 4, !absolute_symbol [[META1]]
; CHECK: @llvm.amdgcn.kernel.kern_block_direct_allocation.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kern_block_direct_allocation.lds.t poison, align 4, !absolute_symbol [[META1]]

;.
define void @func_one() {
; CHECK-LABEL: define {{[^@]+}}@func_one() {
; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META1:![0-9]+]]
; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META18:![0-9]+]]
; CHECK-NEXT: store i16 10, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 3), align 4, !noalias [[META23:![0-9]+]]
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META2:![0-9]+]]
; CHECK-NEXT: [[ONE:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ONE]], align 4
; CHECK-NEXT: [[ONE1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) [[ONE1]], align 4
; CHECK-NEXT: store i16 10, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META11:![0-9]+]]
; CHECK-NEXT: ret void
;
%val0 = load i32, ptr addrspace(3) @both
Expand All @@ -38,9 +46,10 @@ define void @func_one() {

define amdgpu_kernel void @kern_one() {
; CHECK-LABEL: define {{[^@]+}}@kern_one
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-SAME: () #[[ATTR0:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META16:![0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !noalias [[META24:![0-9]+]]
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kern_one.lds) ]
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !noalias [[META17:![0-9]+]]
; CHECK-NEXT: call void @func_one()
; CHECK-NEXT: ret void
;
Expand All @@ -51,9 +60,13 @@ entry:

define void @func_two() {
; CHECK-LABEL: define {{[^@]+}}@func_two() {
; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META1]]
; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 2), align 4, !noalias [[META25:![0-9]+]]
; CHECK-NEXT: store i16 20, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 3), align 4, !noalias [[META23]]
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META2]]
; CHECK-NEXT: [[TWO:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TWO]], align 4
; CHECK-NEXT: [[TWO1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) [[TWO1]], align 4
; CHECK-NEXT: store i16 20, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META11]]
; CHECK-NEXT: ret void
;
%val0 = load i32, ptr addrspace(3) @both
Expand All @@ -64,9 +77,10 @@ define void @func_two() {

define amdgpu_kernel void @kern_two() {
; CHECK-LABEL: define {{[^@]+}}@kern_two
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-SAME: () #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META18:![0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META26:![0-9]+]], !noalias [[META27:![0-9]+]]
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kern_two.lds) ]
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META19:![0-9]+]], !noalias [[META20:![0-9]+]]
; CHECK-NEXT: call void @func_two()
; CHECK-NEXT: ret void
;
Expand All @@ -82,11 +96,18 @@ entry:
; remains the best candidate for address zero allocation.
define void @func_block_direct_allocation() {
; CHECK-LABEL: define {{[^@]+}}@func_block_direct_allocation() {
; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META18]]
; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 2), align 4, !noalias [[META25]]
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[ONE:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ONE]], align 4
; CHECK-NEXT: [[ONE1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(3) [[ONE1]], align 4
; CHECK-NEXT: [[TWO:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TWO]], align 4
; CHECK-NEXT: [[TWO2:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr addrspace(3) [[TWO2]], align 4
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[VAL1]], [[VAL2]]
; CHECK-NEXT: store i32 [[SUM]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META1]]
; CHECK-NEXT: store i16 30, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 3), align 4, !noalias [[META23]]
; CHECK-NEXT: store i32 [[SUM]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META2]]
; CHECK-NEXT: store i16 30, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META11]]
; CHECK-NEXT: ret void
;
%val1 = load i32, ptr addrspace(3) @one
Expand All @@ -99,7 +120,8 @@ define void @func_block_direct_allocation() {

define amdgpu_kernel void @kern_block_direct_allocation() {
; CHECK-LABEL: define {{[^@]+}}@kern_block_direct_allocation
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-SAME: () #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META21:![0-9]+]] {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kern_block_direct_allocation.lds) ], !alias.scope [[META22:![0-9]+]], !noalias [[META25:![0-9]+]]
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; CHECK-NEXT: call void @func_block_direct_allocation()
; CHECK-NEXT: call void @func_one()
Expand All @@ -112,35 +134,8 @@ define amdgpu_kernel void @kern_block_direct_allocation() {
ret void
}
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-lds-size"="16" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
;.
; CHECK: [[META0]] = !{i32 0, i32 1}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where did all this metadata go? Is this just an update_test_checks bug?

Copy link
Collaborator Author

@JonChesterfield JonChesterfield Oct 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The metadata is still there, it just doesn't mean very much so I deleted it from the test

; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META4:![0-9]+]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META8:![0-9]+]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META12:![0-9]+]], [[META13:![0-9]+]], [[META14:![0-9]+]], [[META16:![0-9]+]], [[META17:![0-9]+]]}
; CHECK: [[META2]] = distinct !{[[META2]], [[META3:![0-9]+]]}
; CHECK: [[META3]] = distinct !{[[META3]]}
; CHECK: [[META4]] = distinct !{[[META4]], [[META3]]}
; CHECK: [[META5]] = distinct !{[[META5]], [[META3]]}
; CHECK: [[META6]] = distinct !{[[META6]], [[META7:![0-9]+]]}
; CHECK: [[META7]] = distinct !{[[META7]]}
; CHECK: [[META8]] = distinct !{[[META8]], [[META7]]}
; CHECK: [[META9]] = distinct !{[[META9]], [[META7]]}
; CHECK: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]]}
; CHECK: [[META11]] = distinct !{[[META11]]}
; CHECK: [[META12]] = distinct !{[[META12]], [[META11]]}
; CHECK: [[META13]] = distinct !{[[META13]], [[META11]]}
; CHECK: [[META14]] = distinct !{[[META14]], [[META15:![0-9]+]]}
; CHECK: [[META15]] = distinct !{[[META15]]}
; CHECK: [[META16]] = distinct !{[[META16]], [[META15]]}
; CHECK: [[META17]] = distinct !{[[META17]], [[META15]]}
; CHECK: [[META18]] = !{[[META19:![0-9]+]], [[META2]], [[META5]], [[META20:![0-9]+]], [[META6]], [[META9]], [[META21:![0-9]+]], [[META10]], [[META13]], [[META22:![0-9]+]], [[META14]], [[META17]]}
; CHECK: [[META19]] = distinct !{[[META19]], [[META3]]}
; CHECK: [[META20]] = distinct !{[[META20]], [[META7]]}
; CHECK: [[META21]] = distinct !{[[META21]], [[META11]]}
; CHECK: [[META22]] = distinct !{[[META22]], [[META15]]}
; CHECK: [[META23]] = !{[[META19]], [[META4]], [[META5]], [[META20]], [[META8]], [[META9]], [[META21]], [[META12]], [[META13]], [[META22]], [[META16]], [[META17]]}
; CHECK: [[META24]] = !{[[META10]], [[META12]], [[META13]], [[META14]], [[META16]], [[META17]]}
; CHECK: [[META25]] = !{[[META19]], [[META2]], [[META4]], [[META20]], [[META6]], [[META8]], [[META21]], [[META10]], [[META12]], [[META22]], [[META14]], [[META16]]}
; CHECK: [[META26]] = !{[[META22]]}
; CHECK: [[META27]] = !{[[META14]], [[META16]], [[META17]]}
; CHECK: attributes #[[ATTR0]] = { "amdgpu-lds-size"="12" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-lds-size"="16" }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;.