Skip to content

Commit aab5e41

Browse files
committed
[AMDGPU] Add a target option to disable aggressive FMA fusion
1 parent 0898348 commit aab5e41

File tree

5 files changed

+23
-2
lines changed

5 files changed

+23
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,6 +1238,13 @@ def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst",
12381238
// Subtarget Features (options and debugging)
12391239
//===------------------------------------------------------------===//
12401240

1241+
def FeatureDisableAggressiveFMAFusion : SubtargetFeature<
1242+
"disable-aggressive-fma-fusion",
1243+
"DisableAggressiveFMAFusion",
1244+
"true",
1245+
"Do not fold fmul and fadd/fsub into fma."
1246+
>;
1247+
12411248
// Ugly hack to accomodate assembling modules with mixed
12421249
// wavesizes. Ideally we would have a mapping symbol in assembly which
12431250
// would keep track of which sections of code should be treated as

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ class AMDGPUSubtarget {
7676
bool EnablePromoteAlloca = false;
7777
bool HasTrigReducedRange = false;
7878
bool FastFMAF32 = false;
79+
bool DisableAggressiveFMAFusion = false;
7980
unsigned EUsPerCU = 4;
8081
unsigned MaxWavesPerEU = 10;
8182
unsigned LocalMemorySize = 0;
@@ -303,6 +304,10 @@ class AMDGPUSubtarget {
303304
return FastFMAF32;
304305
}
305306

307+
bool hasDisableAggressiveFMAFusion() const {
308+
return DisableAggressiveFMAFusion;
309+
}
310+
306311
bool isPromoteAllocaEnabled() const {
307312
return EnablePromoteAlloca;
308313
}

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ const FeatureBitset GCNTTIImpl::InlineFeatureIgnoreList = {
288288
AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureFlatForGlobal,
289289
AMDGPU::FeaturePromoteAlloca, AMDGPU::FeatureUnalignedScratchAccess,
290290
AMDGPU::FeatureUnalignedAccessMode,
291+
AMDGPU::FeatureDisableAggressiveFMAFusion,
291292

292293
AMDGPU::FeatureAutoWaitcntBeforeBarrier,
293294

llvm/lib/Target/AMDGPU/GCNSubtarget.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
8585
FullFS += "-wavefrontsize64,";
8686
}
8787

88+
// GFX9 enables fast-fmaf by default
89+
if (GPU.contains_insensitive("gfx9") && !FS.contains_insensitive("fast-fmaf"))
90+
FullFS += "+fast-fmaf";
91+
8892
FullFS += FS;
8993

9094
ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6502,10 +6502,14 @@ bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
65026502
// When fma is quarter rate, for f64 where add / sub are at best half rate,
65036503
// most of these combines appear to be cycle neutral but save on instruction
65046504
// count / code size.
6505-
return true;
6505+
return Subtarget->hasFastFMAF32() &&
6506+
!Subtarget->hasDisableAggressiveFMAFusion();
65066507
}
65076508

6508-
bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const { return true; }
6509+
bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const {
6510+
return Subtarget->hasFastFMAF32() &&
6511+
!Subtarget->hasDisableAggressiveFMAFusion();
6512+
}
65096513

65106514
EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
65116515
EVT VT) const {

0 commit comments

Comments
 (0)