Skip to content

Commit 70827fb

Browse files
committed
Move comments
1 parent 70c78d9 commit 70827fb

File tree

2 files changed

+37
-2
lines changed

2 files changed

+37
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def AMDGPUIfBreakOp : SDTypeProfile<1, 2,
6262
// AMDGPU DAG Nodes
6363
//
6464

65+
// Masked control flow nodes.
6566
def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>;
6667
def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>;
6768
def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>;
@@ -114,6 +115,7 @@ def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP",
114115
[SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPOptInGlue]
115116
>;
116117

118+
// Pointer to the start of the shader's constant data.
117119
def AMDGPUconstdata_ptr : SDNode<
118120
"AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>,
119121
SDTCisVT<0, iPTR>]>
@@ -122,18 +124,21 @@ def AMDGPUconstdata_ptr : SDNode<
122124
// This argument to this node is a dword address.
123125
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
124126

127+
// SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
128+
// Denormals handled on some parts.
125129
def AMDGPUcos_impl : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
126130
def AMDGPUsin_impl : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
131+
127132
// out = a - floor(a)
128133
def AMDGPUfract_impl : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
129134

130135
// out = 1.0 / a
131136
def AMDGPUrcp_impl : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
132137

133-
// v_log_f32, which is log2
138+
// v_log_f32, which is log2, no denormal handling for f32.
134139
def AMDGPUlog_impl : SDNode<"AMDGPUISD::LOG", SDTFPUnaryOp>;
135140

136-
// v_exp_f32, which is exp2
141+
// v_exp_f32, which is exp2, no denormal handling for f32.
137142
def AMDGPUexp_impl : SDNode<"AMDGPUISD::EXP", SDTFPUnaryOp>;
138143

139144
// out = 1.0 / sqrt(a)
@@ -146,11 +151,16 @@ def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>;
146151
// out = 1.0 / sqrt(a) result clamped to +/- max_float.
147152
def AMDGPUrsq_clamp_impl : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>;
148153

154+
// Convert two float 32 numbers into a single register holding two packed f16
155+
// with round to zero.
149156
def AMDGPUpkrtz_f16_f32_impl : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>;
150157
def AMDGPUpknorm_i16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>;
151158
def AMDGPUpknorm_u16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>;
152159
def AMDGPUpk_i16_i32_impl : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>;
153160
def AMDGPUpk_u16_u32_impl : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>;
161+
162+
// Same as the standard node, except the high bits of the resulting integer
163+
// are known 0.
154164
def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>;
155165

156166

@@ -225,14 +235,18 @@ def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc
225235
SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
226236
]>;
227237

238+
// This is SETCC with the full mask result which is used for a compare with a
239+
// result bit per item in the wavefront.
228240
def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;
229241

242+
// FP ops with input and output chain.
230243
def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [
231244
SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
232245

233246
def AMDGPUmul : SDNode<"AMDGPUISD::FMUL_W_CHAIN", SDTFPBinOp, [
234247
SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
235248

249+
// These cvt_f32_ubyte* nodes need to remain consecutive and in order.
236250
def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
237251
SDTIntToFPOp, []>;
238252
def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1",
@@ -264,6 +278,8 @@ def AMDGPUdiv_fmas_impl : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp,
264278
// Denominator, src2 = Numerator).
265279
def AMDGPUdiv_fixup_impl : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
266280

281+
// For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is
282+
// treated as an illegal operation.
267283
def AMDGPUfmad_ftz_impl : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;
268284

269285
def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
@@ -290,14 +306,23 @@ def AMDGPUatomic_cmp_swap : SDNode<"AMDGPUISD::ATOMIC_CMP_SWAP",
290306
[SDNPHasChain, SDNPMayStore, SDNPMayLoad,
291307
SDNPMemOperand]>;
292308

309+
// Extract range of bits with zero extension to 32-bits.
293310
def AMDGPUbfe_u32_impl : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>;
311+
312+
// Extract range of bits with sign extension to 32-bits.
294313
def AMDGPUbfe_i32_impl : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
314+
315+
// (src0 & src1) | (~src0 & src2)
295316
def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
317+
318+
// Insert a range of bits into a 32-bit word.
296319
def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
297320

321+
// ctlz with -1 if input is zero.
298322
def AMDGPUffbh_u32_impl : SDNode<"AMDGPUISD::FFBH_U32", SDTIntBitCountUnaryOp>;
299323
def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntBitCountUnaryOp>;
300324

325+
// cttz with -1 if input is zero.
301326
def AMDGPUffbl_b32_impl : SDNode<"AMDGPUISD::FFBL_B32", SDTIntBitCountUnaryOp>;
302327

303328
// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore
@@ -394,16 +419,24 @@ def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChai
394419
//===----------------------------------------------------------------------===//
395420
// Call/Return DAG Nodes
396421
//===----------------------------------------------------------------------===//
422+
423+
// A uniform kernel return that terminates the wavefront.
397424
def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone,
398425
[SDNPHasChain, SDNPOptInGlue]>;
426+
427+
// s_endpgm, but we may want to insert it in the middle of the block.
399428
def AMDGPUendpgm_trap : SDNode<"AMDGPUISD::ENDPGM_TRAP", SDTNone,
400429
[SDNPHasChain]>;
430+
431+
// "s_trap 2" equivalent on hardware that does not support it.
401432
def AMDGPUsimulated_trap : SDNode<"AMDGPUISD::SIMULATED_TRAP", SDTNone,
402433
[SDNPHasChain]>;
403434

435+
// Return to a shader part's epilog code.
404436
def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone,
405437
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
406438

439+
// Return with values from a non-entry function.
407440
def AMDGPUret_glue : SDNode<"AMDGPUISD::RET_GLUE", SDTNone,
408441
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
409442
>;

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ def GFX10Gen : GFXGen<isGFX10Only, "GFX10", "_gfx10", SIEncodingFamily.GFX10>;
5050
// SI DAG Nodes
5151
//===----------------------------------------------------------------------===//
5252

53+
// CLAMP value between 0.0 and 1.0. NaN clamped to 0, following clamp output
54+
// modifier behavior with dx10_enable.
5355
def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
5456

5557
def SDTSBufferLoad : SDTypeProfile<1, 3,

0 commit comments

Comments
 (0)