Skip to content

Commit 152e115

Browse files
weiyu-chensys_zuul
authored andcommitted
Avoid FP64 emulation related code if kernel doesn't use FP64 at all.
Change-Id: Ia68f9ff0d027d4f5b70099f3a41efea92a018f2a
1 parent 158df5f commit 152e115

File tree

4 files changed

+25
-1
lines changed

4 files changed

+25
-1
lines changed

IGC/Compiler/CISACodeGen/CheckInstrTypes.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ CheckInstrTypes::CheckInstrTypes(IGC::SInstrTypes* instrList) : FunctionPass(ID)
9797
instrList->numInsts = 0;
9898
instrList->sampleCmpToDiscardOptimizationPossible = false;
9999
instrList->sampleCmpToDiscardOptimizationSlot = 0;
100+
instrList->hasFP64Inst = false;
100101
}
101102

102103
void CheckInstrTypes::SetLoopFlags(Function& F)
@@ -126,6 +127,22 @@ bool CheckInstrTypes::runOnFunction(Function& F)
126127
// check if module has debug info
127128
g_InstrTypes->hasDebugInfo = F.getParent()->getNamedMetadata("llvm.dbg.cu") != nullptr;
128129

130+
if (F.getType()->isDoubleTy())
131+
{
132+
g_InstrTypes->hasFP64Inst = true;
133+
}
134+
else
135+
{
136+
for (auto&& AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI)
137+
{
138+
if ((*AI).getType()->isDoubleTy())
139+
{
140+
g_InstrTypes->hasFP64Inst = true;
141+
break;
142+
}
143+
}
144+
}
145+
129146
visit(F);
130147
SetLoopFlags(F);
131148
return false;
@@ -148,6 +165,11 @@ void CheckInstrTypes::visitInstruction(llvm::Instruction& I)
148165
{
149166
g_InstrTypes->hasGenericAddressSpacePointers = true;
150167
}
168+
169+
if (I.getType()->isDoubleTy())
170+
{
171+
g_InstrTypes->hasFP64Inst = true;
172+
}
151173
}
152174

153175
void CheckInstrTypes::visitCallInst(CallInst& C)

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,7 @@ static void AddLegalizationPasses(CodeGenContext& ctx, IGCPassManager& mpm, PSSi
402402

403403
bool needDPEmu = (IGC_IS_FLAG_ENABLED(ForceDPEmulation) ||
404404
(ctx.m_DriverInfo.NeedFP64(ctx.platform.getPlatformInfo().eProductFamily) && ctx.platform.hasNoFP64Inst()));
405+
needDPEmu &= ctx.m_instrTypes.hasFP64Inst;
405406
uint32_t theEmuKind = (needDPEmu ? EmuKind::EMU_DP : 0);
406407
theEmuKind |= (ctx.m_DriverInfo.NeedI64BitDivRem() ? EmuKind::EMU_I64DIVREM : 0);
407408
theEmuKind |=

IGC/Compiler/CodeGenPublic.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ namespace IGC
268268
unsigned int numPsInputs;
269269
bool sampleCmpToDiscardOptimizationPossible;
270270
unsigned int sampleCmpToDiscardOptimizationSlot;
271+
bool hasFP64Inst;
271272
};
272273

273274
struct SSimplePushInfo

IGC/Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryUsageAnalysis.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ bool PrivateMemoryUsageAnalysis::runOnFunction(Function& F)
140140
{
141141
CodeGenContext* pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
142142
// This is the condition that double emulation is used.
143-
if ((IGC_IS_FLAG_ENABLED(ForceDPEmulation) ||
143+
if (pCtx->m_instrTypes.hasFP64Inst && (IGC_IS_FLAG_ENABLED(ForceDPEmulation) ||
144144
(pCtx->m_DriverInfo.NeedFP64(pCtx->platform.getPlatformInfo().eProductFamily) && pCtx->platform.hasNoFP64Inst())))
145145
{
146146
m_hasPrivateMem = true;

0 commit comments

Comments
 (0)