Skip to content

Commit db5321b

Browse files
committed
[CodeGen] add a command to force global merge
I've found that in certain performance scenarios, particularly with the -O2 this PR can significantly enhance the efficiency of loading global variables.
1 parent 6b4fef0 commit db5321b

File tree

2 files changed

+32
-1
lines changed

2 files changed

+32
-1
lines changed

llvm/lib/CodeGen/GlobalMerge.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@ EnableGlobalMerge("enable-global-merge", cl::Hidden,
111111
cl::desc("Enable the global merge pass"),
112112
cl::init(true));
113113

114+
static cl::opt<bool>ForceEnableGlobalMerge(
115+
"force-enable-global-merge", cl::Hidden,
116+
cl::desc(
117+
"Force enable the global merge, regardless of the optimization level"),
118+
cl::init(false));
119+
114120
static cl::opt<unsigned>
115121
GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden,
116122
cl::desc("Set maximum offset for global merge pass"),
@@ -374,7 +380,9 @@ bool GlobalMergeImpl::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
374380
Function *ParentFn = I->getParent()->getParent();
375381

376382
// If we're only optimizing for size, ignore non-minsize functions.
377-
if (Opt.SizeOnly && !ParentFn->hasMinSize())
383+
// And add a config to force global merge
384+
if (!ForceEnableGlobalMerge
385+
&& (Opt.SizeOnly && !ParentFn->hasMinSize()))
378386
continue;
379387

380388
size_t UGSIdx = GlobalUsesByFunction[ParentFn];
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
; RUN: llc -mtriple=arm-eabi -force-enable-global-merge %s -o - | FileCheck %s
2+
3+
@g_value1 = dso_local local_unnamed_addr global i32 0, align 4
4+
@g_value2 = dso_local local_unnamed_addr global i32 0, align 4
5+
@g_value3 = dso_local local_unnamed_addr global i32 0, align 4
6+
@g_value4 = dso_local local_unnamed_addr global i32 0, align 4
7+
8+
define dso_local i32 @foo1() local_unnamed_addr {
9+
entry:
10+
%0 = load i32, ptr @g_value1, align 4
11+
%1 = load i32, ptr @g_value2, align 4
12+
%2 = load i32, ptr @g_value3, align 4
13+
%3 = load i32, ptr @g_value4, align 4
14+
%call = tail call i32 @foo(i32 %0, i32 %1, i32 %2, i32 %3)
15+
ret i32 %call
16+
}
17+
18+
declare i32 @foo(i32, i32, i32, i32)
19+
20+
; CHECK: ldr [[BASE:r[0-9]+]], .LCPI0_0
21+
; CHECK: ldm [[BASE]], {[[R0:r[0-9]+]], [[R1:r[0-9]+]], [[R2:r[0-9]+]], [[R3:r[0-9]+]]}
22+
; CHECK: .LCPI0_0:
23+
; CHECK-NEXT: .long .L_MergedGlobals

0 commit comments

Comments
 (0)