Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1238,6 +1238,13 @@ def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst",
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//

def FeatureDisableAggressiveFMAFusion : SubtargetFeature<
"disable-aggressive-fma-fusion",
"DisableAggressiveFMAFusion",
"true",
"Do not fold fmul and fadd/fsub into fma."
>;

// Ugly hack to accomodate assembling modules with mixed
// wavesizes. Ideally we would have a mapping symbol in assembly which
// would keep track of which sections of code should be treated as
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class AMDGPUSubtarget {
bool EnablePromoteAlloca = false;
bool HasTrigReducedRange = false;
bool FastFMAF32 = false;
bool DisableAggressiveFMAFusion = false;
unsigned EUsPerCU = 4;
unsigned MaxWavesPerEU = 10;
unsigned LocalMemorySize = 0;
Expand Down Expand Up @@ -303,6 +304,10 @@ class AMDGPUSubtarget {
return FastFMAF32;
}

bool hasDisableAggressiveFMAFusion() const {
return DisableAggressiveFMAFusion;
}

bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ const FeatureBitset GCNTTIImpl::InlineFeatureIgnoreList = {
AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureFlatForGlobal,
AMDGPU::FeaturePromoteAlloca, AMDGPU::FeatureUnalignedScratchAccess,
AMDGPU::FeatureUnalignedAccessMode,
AMDGPU::FeatureDisableAggressiveFMAFusion,

AMDGPU::FeatureAutoWaitcntBeforeBarrier,

Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6502,10 +6502,12 @@ bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
// When fma is quarter rate, for f64 where add / sub are at best half rate,
// most of these combines appear to be cycle neutral but save on instruction
// count / code size.
return true;
return !Subtarget->hasDisableAggressiveFMAFusion();
}

bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const { return true; }
bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const {
return !Subtarget->hasDisableAggressiveFMAFusion();
}

EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
EVT VT) const {
Expand Down