From 1ee11e49c2ec3c741298f038f8705db138f5d45c Mon Sep 17 00:00:00 2001 From: Austin Date: Sun, 16 Nov 2025 03:49:16 +0800 Subject: [PATCH] [CodeGen] add a command to force global merge I've found that in certain performance scenarios, particularly with the -O2 this PR can significantly enhance the efficiency of loading global variables. --- llvm/lib/CodeGen/GlobalMerge.cpp | 10 ++++++++- llvm/test/CodeGen/ARM/force-global-merge.ll | 23 +++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/ARM/force-global-merge.ll diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp index e58d7e344c28b..dc806aaf734fa 100644 --- a/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/llvm/lib/CodeGen/GlobalMerge.cpp @@ -111,6 +111,12 @@ EnableGlobalMerge("enable-global-merge", cl::Hidden, cl::desc("Enable the global merge pass"), cl::init(true)); +static cl::opt ForceEnableGlobalMerge( + "force-enable-global-merge", cl::Hidden, + cl::desc( + "Force enable the global merge, regardless of the optimization level"), + cl::init(false)); + static cl::opt GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden, cl::desc("Set maximum offset for global merge pass"), @@ -374,7 +380,9 @@ bool GlobalMergeImpl::doMerge(SmallVectorImpl &Globals, Function *ParentFn = I->getParent()->getParent(); // If we're only optimizing for size, ignore non-minsize functions. - if (Opt.SizeOnly && !ParentFn->hasMinSize()) + // And add a config to force global merge + if (!ForceEnableGlobalMerge && + (Opt.SizeOnly && !ParentFn->hasMinSize())) continue; size_t UGSIdx = GlobalUsesByFunction[ParentFn]; diff --git a/llvm/test/CodeGen/ARM/force-global-merge.ll b/llvm/test/CodeGen/ARM/force-global-merge.ll new file mode 100644 index 0000000000000..a7b791dc0a634 --- /dev/null +++ b/llvm/test/CodeGen/ARM/force-global-merge.ll @@ -0,0 +1,23 @@ +; RUN: llc -mtriple=arm-eabi -force-enable-global-merge %s -o - | FileCheck %s + +@g_value1 = dso_local local_unnamed_addr global i32 0, align 4 +@g_value2 = dso_local local_unnamed_addr global i32 0, align 4 +@g_value3 = dso_local local_unnamed_addr global i32 0, align 4 +@g_value4 = dso_local local_unnamed_addr global i32 0, align 4 + +define dso_local i32 @foo1() local_unnamed_addr { +entry: + %0 = load i32, ptr @g_value1, align 4 + %1 = load i32, ptr @g_value2, align 4 + %2 = load i32, ptr @g_value3, align 4 + %3 = load i32, ptr @g_value4, align 4 + %call = tail call i32 @foo(i32 %0, i32 %1, i32 %2, i32 %3) + ret i32 %call +} + +declare i32 @foo(i32, i32, i32, i32) + +; CHECK: ldr [[BASE:r[0-9]+]], .LCPI0_0 +; CHECK: ldm [[BASE]], {[[R0:r[0-9]+]], [[R1:r[0-9]+]], [[R2:r[0-9]+]], [[R3:r[0-9]+]]} +; CHECK: .LCPI0_0: +; CHECK-NEXT: .long .L_MergedGlobals