@@ -62,6 +62,7 @@ def AMDGPUIfBreakOp : SDTypeProfile<1, 2,
6262// AMDGPU DAG Nodes
6363//
6464
65+ // Masked control flow nodes.
6566def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>;
6667def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>;
6768def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>;
@@ -114,6 +115,7 @@ def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP",
114115 [SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPOptInGlue]
115116>;
116117
118+ // Pointer to the start of the shader's constant data.
117119def AMDGPUconstdata_ptr : SDNode<
118120 "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>,
119121 SDTCisVT<0, iPTR>]>
@@ -122,18 +124,21 @@ def AMDGPUconstdata_ptr : SDNode<
122124// This argument to this node is a dword address.
123125def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
124126
127+ // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
128+ // Denormals handled on some parts.
125129def AMDGPUcos_impl : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
126130def AMDGPUsin_impl : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
131+
127132// out = a - floor(a)
128133def AMDGPUfract_impl : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
129134
130135// out = 1.0 / a
131136def AMDGPUrcp_impl : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
132137
133- // v_log_f32, which is log2
138+ // v_log_f32, which is log2, no denormal handling for f32.
134139def AMDGPUlog_impl : SDNode<"AMDGPUISD::LOG", SDTFPUnaryOp>;
135140
136- // v_exp_f32, which is exp2
141+ // v_exp_f32, which is exp2, no denormal handling for f32.
137142def AMDGPUexp_impl : SDNode<"AMDGPUISD::EXP", SDTFPUnaryOp>;
138143
139144// out = 1.0 / sqrt(a)
@@ -146,11 +151,16 @@ def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>;
146151// out = 1.0 / sqrt(a) result clamped to +/- max_float.
147152def AMDGPUrsq_clamp_impl : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>;
148153
154+ // Convert two float 32 numbers into a single register holding two packed f16
155+ // with round to zero.
149156def AMDGPUpkrtz_f16_f32_impl : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>;
150157def AMDGPUpknorm_i16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>;
151158def AMDGPUpknorm_u16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>;
152159def AMDGPUpk_i16_i32_impl : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>;
153160def AMDGPUpk_u16_u32_impl : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>;
161+
162+ // Same as the standard node, except the high bits of the resulting integer
163+ // are known 0.
154164def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>;
155165
156166
@@ -225,14 +235,18 @@ def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc
225235 SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
226236]>;
227237
238+ // This is SETCC with the full mask result which is used for a compare with a
239+ // result bit per item in the wavefront.
228240def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;
229241
242+ // FP ops with input and output chain.
230243def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [
231244 SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
232245
233246def AMDGPUmul : SDNode<"AMDGPUISD::FMUL_W_CHAIN", SDTFPBinOp, [
234247 SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
235248
249+ // These cvt_f32_ubyte* nodes need to remain consecutive and in order.
236250def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
237251 SDTIntToFPOp, []>;
238252def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1",
@@ -264,6 +278,8 @@ def AMDGPUdiv_fmas_impl : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp,
264278// Denominator, src2 = Numerator).
265279def AMDGPUdiv_fixup_impl : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
266280
281+ // For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is
282+ // treated as an illegal operation.
267283def AMDGPUfmad_ftz_impl : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;
268284
269285def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
@@ -290,14 +306,23 @@ def AMDGPUatomic_cmp_swap : SDNode<"AMDGPUISD::ATOMIC_CMP_SWAP",
290306 [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
291307 SDNPMemOperand]>;
292308
309+ // Extract range of bits with zero extension to 32-bits.
293310def AMDGPUbfe_u32_impl : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>;
311+
312+ // Extract range of bits with sign extension to 32-bits.
294313def AMDGPUbfe_i32_impl : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
314+
315+ // (src0 & src1) | (~src0 & src2)
295316def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
317+
318+ // Insert a range of bits into a 32-bit word.
296319def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
297320
321+ // ctlz with -1 if input is zero.
298322def AMDGPUffbh_u32_impl : SDNode<"AMDGPUISD::FFBH_U32", SDTIntBitCountUnaryOp>;
299323def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntBitCountUnaryOp>;
300324
325+ // cttz with -1 if input is zero.
301326def AMDGPUffbl_b32_impl : SDNode<"AMDGPUISD::FFBL_B32", SDTIntBitCountUnaryOp>;
302327
303328// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore
@@ -394,16 +419,24 @@ def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChai
394419//===----------------------------------------------------------------------===//
395420// Call/Return DAG Nodes
396421//===----------------------------------------------------------------------===//
422+
423+ // A uniform kernel return that terminates the wavefront.
397424def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone,
398425 [SDNPHasChain, SDNPOptInGlue]>;
426+
427+ // s_endpgm, but we may want to insert it in the middle of the block.
399428def AMDGPUendpgm_trap : SDNode<"AMDGPUISD::ENDPGM_TRAP", SDTNone,
400429 [SDNPHasChain]>;
430+
431+ // "s_trap 2" equivalent on hardware that does not support it.
401432def AMDGPUsimulated_trap : SDNode<"AMDGPUISD::SIMULATED_TRAP", SDTNone,
402433 [SDNPHasChain]>;
403434
435+ // Return to a shader part's epilog code.
404436def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone,
405437 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
406438
439+ // Return with values from a non-entry function.
407440def AMDGPUret_glue : SDNode<"AMDGPUISD::RET_GLUE", SDTNone,
408441 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
409442>;
0 commit comments