From 5427caffbd2efb786421aa77ae73d02157d5753e Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Mon, 24 Mar 2025 17:28:09 -0700 Subject: [PATCH 1/5] optimize ConditionalSelect with const zero --- src/coreclr/jit/lowerxarch.cpp | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 743218ecede33b..921b3401a8dfe6 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3414,6 +3414,37 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) blendVariableId = NI_EVEX_BlendVariableMask; op1 = maskNode; } + else if (op2->IsVectorZero() || op3->IsVectorZero()) + { + // If either of the value operands is const zero, we can optimize down to AND or AND_NOT. + GenTree* binOp = nullptr; + + if (op3->IsVectorZero()) + { + binOp = comp->gtNewSimdBinOpNode(GT_AND, simdType, op1, op2, simdBaseJitType, simdSize); + BlockRange().Remove(op3); + } + else + { + binOp = comp->gtNewSimdBinOpNode(GT_AND_NOT, simdType, op3, op1, simdBaseJitType, simdSize); + BlockRange().Remove(op2); + } + + BlockRange().InsertAfter(node, binOp); + + LIR::Use use; + if (BlockRange().TryGetUse(node, &use)) + { + use.ReplaceWith(binOp); + } + else + { + binOp->SetUnusedValue(); + } + + BlockRange().Remove(node); + return LowerNode(binOp); + } else if (simdSize == 32) { // For Vector256 (simdSize == 32), BlendVariable for floats/doubles From 6dae5989359c724cc56f4616a439e8fc1141db08 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Thu, 27 Mar 2025 16:29:10 -0700 Subject: [PATCH 2/5] remove superfluous LowerNode calls --- src/coreclr/jit/lowerxarch.cpp | 52 +++++++++++++++++----------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 10fe7e99d51467..895c931785a14c 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -1690,7 +1690,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) assert(HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(ternaryLogicId)); node->NormalizeJitBaseTypeToInt(ternaryLogicId, simdBaseType); } - return LowerNode(node); + return node; } } @@ -1929,7 +1929,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) node->Op(2) = op2; node->ChangeHWIntrinsicId(testIntrinsicId); - return LowerNode(node); + return node; } break; } @@ -2039,7 +2039,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } node->ChangeHWIntrinsicId(NI_Vector128_GetElement); - return LowerNode(node); + return node; } break; } @@ -2706,7 +2706,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm assert(simdSize == 16); LowerHWIntrinsicCC(node, NI_SSE41_PTEST, cmpCnd); } - return LowerNode(node); + return node; } } @@ -3343,7 +3343,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm node->gtType = TYP_VOID; node->ClearUnusedValue(); - return LowerNode(node); + return node; } //---------------------------------------------------------------------------------------------- @@ -3443,7 +3443,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) } BlockRange().Remove(node); - return LowerNode(binOp); + return binOp; } else if (simdSize == 32) { @@ -3472,7 +3472,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) { // result = BlendVariable op3 (right) op2 (left) op1 (mask) node->ResetHWIntrinsicId(blendVariableId, comp, op3, op2, op1); - return LowerNode(node); + return node; } } @@ -3496,7 +3496,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) assert(HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(ternaryLogicId)); node->NormalizeJitBaseTypeToInt(ternaryLogicId, simdBaseType); } - return LowerNode(node); + return node; } // We cannot optimize, so produce unoptimized instructions @@ -3552,7 +3552,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) } BlockRange().Remove(node); - return LowerNode(tmp4); + return tmp4; } //---------------------------------------------------------------------------------------------- @@ -4075,7 +4075,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) BlockRange().Remove(node); - return LowerNode(vecCon); + return vecCon; } else if (argCnt == 1) { @@ -4184,7 +4184,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) unreached(); } } - return LowerNode(node); + return node; } // We have the following (where simd is simd16 or simd32): @@ -4209,7 +4209,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) LowerNode(tmp1); node->ResetHWIntrinsicId(NI_AVX2_BroadcastScalarToVector256, tmp1); - return LowerNode(node); + return node; } assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); @@ -4254,7 +4254,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) node->ResetHWIntrinsicId(NI_Vector256_WithUpper, comp, tmp3, tmp1); LowerNode(tmp3); - return LowerNode(node); + return node; } assert(intrinsicId == NI_Vector128_Create); @@ -4283,7 +4283,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // return Avx2.BroadcastScalarToVector128(tmp1); node->ChangeHWIntrinsicId(NI_AVX2_BroadcastScalarToVector128, tmp1); - return LowerNode(node); + return node; } switch (simdBaseType) @@ -4522,7 +4522,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) } } - return LowerNode(node); + return node; } if (intrinsicId == NI_Vector512_Create || intrinsicId == NI_Vector256_Create) @@ -4588,7 +4588,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) LowerNode(lo); LowerNode(hi); - return LowerNode(node); + return node; } assert(intrinsicId == NI_Vector128_Create); @@ -4985,7 +4985,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) } } - return LowerNode(node); + return node; } //---------------------------------------------------------------------------------------------- @@ -5029,7 +5029,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) } node->ResetHWIntrinsicId(intrinsicId, op1); - return LowerNode(node); + return node; } uint32_t count = simdSize / genTypeSize(simdBaseType); @@ -5196,7 +5196,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) BlockRange().Remove(node); assert(newAddr->gtNext == newIndir); - return LowerNode(newAddr); + return newAddr; } if (!op2->OperIsConst()) @@ -5248,7 +5248,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) BlockRange().Remove(op2); BlockRange().Remove(node); - return LowerNode(lclFld); + return lclFld; } } @@ -5409,7 +5409,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) node->SetSimdSize(16); node->ResetHWIntrinsicId(NI_Vector128_ToScalar, op1); - return LowerNode(node); + return node; } else { @@ -6075,7 +6075,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) } BlockRange().Remove(node); - return LowerNode(tmp1); + return tmp1; } case TYP_DOUBLE: @@ -6169,7 +6169,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3); } - return LowerNode(node); + return node; } horizontalAdd = NI_SSE3_HorizontalAdd; @@ -6222,7 +6222,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3); } - return LowerNode(node); + return node; } horizontalAdd = NI_SSE3_HorizontalAdd; @@ -6663,7 +6663,7 @@ GenTree* Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) BlockRange().Remove(op1); BlockRange().Remove(node); - return LowerNode(newIndir); + return newIndir; } if (op1->OperIs(GT_LCL_VAR, GT_LCL_FLD)) @@ -6693,7 +6693,7 @@ GenTree* Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) BlockRange().Remove(op1); BlockRange().Remove(node); - return LowerNode(lclFld); + return lclFld; } } } From a0abeb0fbeb1803b10013715cfead9d0e69a82ef Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Thu, 27 Mar 2025 17:06:41 -0700 Subject: [PATCH 3/5] Revert "remove superfluous LowerNode calls" This reverts commit 6dae5989359c724cc56f4616a439e8fc1141db08. --- src/coreclr/jit/lowerxarch.cpp | 52 +++++++++++++++++----------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 895c931785a14c..10fe7e99d51467 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -1690,7 +1690,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) assert(HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(ternaryLogicId)); node->NormalizeJitBaseTypeToInt(ternaryLogicId, simdBaseType); } - return node; + return LowerNode(node); } } @@ -1929,7 +1929,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) node->Op(2) = op2; node->ChangeHWIntrinsicId(testIntrinsicId); - return node; + return LowerNode(node); } break; } @@ -2039,7 +2039,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } node->ChangeHWIntrinsicId(NI_Vector128_GetElement); - return node; + return LowerNode(node); } break; } @@ -2706,7 +2706,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm assert(simdSize == 16); LowerHWIntrinsicCC(node, NI_SSE41_PTEST, cmpCnd); } - return node; + return LowerNode(node); } } @@ -3343,7 +3343,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm node->gtType = TYP_VOID; node->ClearUnusedValue(); - return node; + return LowerNode(node); } //---------------------------------------------------------------------------------------------- @@ -3443,7 +3443,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) } BlockRange().Remove(node); - return binOp; + return LowerNode(binOp); } else if (simdSize == 32) { @@ -3472,7 +3472,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) { // result = BlendVariable op3 (right) op2 (left) op1 (mask) node->ResetHWIntrinsicId(blendVariableId, comp, op3, op2, op1); - return node; + return LowerNode(node); } } @@ -3496,7 +3496,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) assert(HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(ternaryLogicId)); node->NormalizeJitBaseTypeToInt(ternaryLogicId, simdBaseType); } - return node; + return LowerNode(node); } // We cannot optimize, so produce unoptimized instructions @@ -3552,7 +3552,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) } BlockRange().Remove(node); - return tmp4; + return LowerNode(tmp4); } //---------------------------------------------------------------------------------------------- @@ -4075,7 +4075,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) BlockRange().Remove(node); - return vecCon; + return LowerNode(vecCon); } else if (argCnt == 1) { @@ -4184,7 +4184,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) unreached(); } } - return node; + return LowerNode(node); } // We have the following (where simd is simd16 or simd32): @@ -4209,7 +4209,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) LowerNode(tmp1); node->ResetHWIntrinsicId(NI_AVX2_BroadcastScalarToVector256, tmp1); - return node; + return LowerNode(node); } assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); @@ -4254,7 +4254,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) node->ResetHWIntrinsicId(NI_Vector256_WithUpper, comp, tmp3, tmp1); LowerNode(tmp3); - return node; + return LowerNode(node); } assert(intrinsicId == NI_Vector128_Create); @@ -4283,7 +4283,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // return Avx2.BroadcastScalarToVector128(tmp1); node->ChangeHWIntrinsicId(NI_AVX2_BroadcastScalarToVector128, tmp1); - return node; + return LowerNode(node); } switch (simdBaseType) @@ -4522,7 +4522,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) } } - return node; + return LowerNode(node); } if (intrinsicId == NI_Vector512_Create || intrinsicId == NI_Vector256_Create) @@ -4588,7 +4588,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) LowerNode(lo); LowerNode(hi); - return node; + return LowerNode(node); } assert(intrinsicId == NI_Vector128_Create); @@ -4985,7 +4985,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) } } - return node; + return LowerNode(node); } //---------------------------------------------------------------------------------------------- @@ -5029,7 +5029,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) } node->ResetHWIntrinsicId(intrinsicId, op1); - return node; + return LowerNode(node); } uint32_t count = simdSize / genTypeSize(simdBaseType); @@ -5196,7 +5196,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) BlockRange().Remove(node); assert(newAddr->gtNext == newIndir); - return newAddr; + return LowerNode(newAddr); } if (!op2->OperIsConst()) @@ -5248,7 +5248,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) BlockRange().Remove(op2); BlockRange().Remove(node); - return lclFld; + return LowerNode(lclFld); } } @@ -5409,7 +5409,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) node->SetSimdSize(16); node->ResetHWIntrinsicId(NI_Vector128_ToScalar, op1); - return node; + return LowerNode(node); } else { @@ -6075,7 +6075,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) } BlockRange().Remove(node); - return tmp1; + return LowerNode(tmp1); } case TYP_DOUBLE: @@ -6169,7 +6169,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3); } - return node; + return LowerNode(node); } horizontalAdd = NI_SSE3_HorizontalAdd; @@ -6222,7 +6222,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3); } - return node; + return LowerNode(node); } horizontalAdd = NI_SSE3_HorizontalAdd; @@ -6663,7 +6663,7 @@ GenTree* Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) BlockRange().Remove(op1); BlockRange().Remove(node); - return newIndir; + return LowerNode(newIndir); } if (op1->OperIs(GT_LCL_VAR, GT_LCL_FLD)) @@ -6693,7 +6693,7 @@ GenTree* Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) BlockRange().Remove(op1); BlockRange().Remove(node); - return lclFld; + return LowerNode(lclFld); } } } From 17886e7d35ecf40d1090d85263f1028bf63e82e1 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Thu, 27 Mar 2025 17:09:13 -0700 Subject: [PATCH 4/5] just remove one LowerNode call --- src/coreclr/jit/lowerxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 10fe7e99d51467..58e54655be8269 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3443,7 +3443,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) } BlockRange().Remove(node); - return LowerNode(binOp); + return binOp; } else if (simdSize == 32) { From 08a5679940fee8316edbcf3a50a43d807269c53a Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Thu, 27 Mar 2025 17:53:13 -0700 Subject: [PATCH 5/5] Revert "just remove one LowerNode call" This reverts commit 17886e7d35ecf40d1090d85263f1028bf63e82e1. --- src/coreclr/jit/lowerxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 58e54655be8269..10fe7e99d51467 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3443,7 +3443,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) } BlockRange().Remove(node); - return binOp; + return LowerNode(binOp); } else if (simdSize == 32) {