From 3fea88565a83a24bbcdc3513e143bac2634357b0 Mon Sep 17 00:00:00 2001 From: John Stiles Date: Wed, 26 Apr 2023 10:40:07 -0400 Subject: [PATCH] Improve RP code generation for negation. Previously, we emitted `0 - x` for all negation. This was fine, but can never use our immediate ops. Now, we emit negation for floats as `x + int(0x80000000)`, and negation for ints as `x * -1`. This allows scalar negation to use a single immediate-op instruction, and sometimes allows multi-slot negation to reuse an existing value on the stack. This never seems to be a downgrade. Change-Id: I427f6e2932370c56cd7076535e082d938a645820 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/681516 Reviewed-by: Brian Osman Commit-Queue: Brian Osman Auto-Submit: John Stiles Commit-Queue: John Stiles --- .../SkSLRasterPipelineCodeGenerator.cpp | 23 ++-- tests/sksl/folding/MatrixNoOpFolding.skrp | 72 +++++------ .../sksl/folding/MatrixScalarNoOpFolding.skrp | 120 +++++++++--------- .../sksl/folding/MatrixVectorNoOpFolding.skrp | 54 ++++---- tests/sksl/folding/Negation.skrp | 24 ++-- .../runtime/RecursiveComparison_Arrays.skrp | 35 +++-- .../runtime/RecursiveComparison_Structs.skrp | 35 +++-- .../runtime/RecursiveComparison_Types.skrp | 37 +++--- .../runtime/RecursiveComparison_Vectors.skrp | 35 +++-- tests/sksl/shared/FunctionPrototype.skrp | 11 +- tests/sksl/shared/MatrixEquality.skrp | 28 ++-- tests/sksl/shared/UnaryPositiveNegative.skrp | 80 ++++++------ 12 files changed, 271 insertions(+), 283 deletions(-) diff --git a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp index bb2eee088959..9b4ba2f5e8b3 100644 --- a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp +++ b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp @@ -3367,23 +3367,30 @@ bool Generator::pushPrefixExpression(Operator op, const Expression& expr) { fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, expr.type().slotCount()); return true; - case OperatorKind::MINUS: - // Handle negation as a componentwise `0 - expr`. - fBuilder.push_zeros(expr.type().slotCount()); + case OperatorKind::MINUS: { if (!this->pushExpression(expr)) { return unsupported(); } - return this->binaryOp(expr.type(), kSubtractOps); - + if (expr.type().componentType().isFloat()) { + // Handle float negation as an integer `x ^ 0x80000000`. This toggles the sign bit. + fBuilder.push_constant_u(0x80000000, expr.type().slotCount()); + fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, expr.type().slotCount()); + } else { + // Handle integer negation as a componentwise `expr * -1`. + fBuilder.push_constant_i(-1, expr.type().slotCount()); + fBuilder.binary_op(BuilderOp::mul_n_ints, expr.type().slotCount()); + } + return true; + } case OperatorKind::PLUSPLUS: { // Rewrite as `expr += 1`. Literal oneLiteral{Position{}, 1.0, &expr.type().componentType()}; return this->pushBinaryExpression(expr, OperatorKind::PLUSEQ, oneLiteral); } case OperatorKind::MINUSMINUS: { - // Rewrite as `expr -= 1`. - Literal oneLiteral{Position{}, 1.0, &expr.type().componentType()}; - return this->pushBinaryExpression(expr, OperatorKind::MINUSEQ, oneLiteral); + // Rewrite as `expr += -1`. + Literal minusOneLiteral{expr.fPosition, -1.0, &expr.type().componentType()}; + return this->pushBinaryExpression(expr, OperatorKind::PLUSEQ, minusOneLiteral); } default: break; diff --git a/tests/sksl/folding/MatrixNoOpFolding.skrp b/tests/sksl/folding/MatrixNoOpFolding.skrp index 0c2bcad989aa..4cf03981ddf8 100644 --- a/tests/sksl/folding/MatrixNoOpFolding.skrp +++ b/tests/sksl/folding/MatrixNoOpFolding.skrp @@ -9,9 +9,8 @@ copy_4_uniforms $0..3 = testMatrix2x2 copy_4_slots_unmasked _0_m = $0..3 copy_4_uniforms $0..3 = testMatrix2x2 copy_4_slots_unmasked _0_m = $0..3 -splat_4_constants $0..3 = 0 -copy_4_slots_unmasked $4..7 = _0_m -sub_4_floats $0..3 -= $4..7 +splat_4_constants $4..7 = 0x80000000 (-0.0) +bitwise_xor_4_ints $0..3 ^= $4..7 copy_4_slots_unmasked _0_m = $0..3 splat_2_constants $0..1 = 0 swizzle_4 $0..3 = ($0..3).yxxy @@ -22,9 +21,9 @@ copy_4_slots_unmasked _1_mm = $0..3 store_condition_mask $49 = CondMask store_condition_mask $78 = CondMask copy_4_slots_unmasked $79..82 = _0_m -splat_4_constants $83..86 = 0 -copy_4_uniforms $87..90 = testMatrix2x2 -sub_4_floats $83..86 -= $87..90 +copy_4_uniforms $83..86 = testMatrix2x2 +splat_4_constants $87..90 = 0x80000000 (-0.0) +bitwise_xor_4_ints $83..86 ^= $87..90 cmpeq_4_floats $79..82 = equal($79..82, $83..86) bitwise_and_2_ints $79..80 &= $81..82 bitwise_and_int $79 &= $80 @@ -36,7 +35,7 @@ bitwise_and_int $80 &= $81 bitwise_and_int $79 &= $80 copy_constant $50 = 0 merge_condition_mask CondMask = $78 & $79 -branch_if_no_lanes_active branch_if_no_lanes_active +71 (label 2 at #110) +branch_if_no_lanes_active branch_if_no_lanes_active +68 (label 2 at #106) splat_4_constants m(0..3) = 0 splat_4_constants m(4..7) = 0 splat_4_constants m(8), mm(0..2) = 0 @@ -59,13 +58,10 @@ copy_uniform $59 = testMatrix3x3(8) copy_4_slots_masked m(0..3) = Mask($51..54) copy_4_slots_masked m(4..7) = Mask($55..58) copy_slot_masked m(8) = Mask($59) -splat_4_constants $51..54 = 0 -splat_4_constants $55..58 = 0 -copy_constant $59 = 0 -copy_4_slots_unmasked $60..63 = m(0..3) -copy_4_slots_unmasked $64..67 = m(4..7) -copy_slot_unmasked $68 = m(8) -sub_n_floats $51..59 -= $60..68 +splat_4_constants $60..63 = 0x80000000 (-0.0) +splat_4_constants $64..67 = 0x80000000 (-0.0) +copy_constant $68 = 0x80000000 (-0.0) +bitwise_xor_n_ints $51..59 ^= $60..68 copy_4_slots_masked m(0..3) = Mask($51..54) copy_4_slots_masked m(4..7) = Mask($55..58) copy_slot_masked m(8) = Mask($59) @@ -82,13 +78,13 @@ copy_slot_masked mm(8) = Mask($59) copy_4_slots_unmasked $51..54 = m(0..3) copy_4_slots_unmasked $55..58 = m(4..7) copy_slot_unmasked $59 = m(8) -splat_4_constants $60..63 = 0 -splat_4_constants $64..67 = 0 -copy_constant $68 = 0 -copy_4_uniforms $69..72 = testMatrix3x3(0..3) -copy_4_uniforms $73..76 = testMatrix3x3(4..7) -copy_uniform $77 = testMatrix3x3(8) -sub_n_floats $60..68 -= $69..77 +copy_4_uniforms $60..63 = testMatrix3x3(0..3) +copy_4_uniforms $64..67 = testMatrix3x3(4..7) +copy_uniform $68 = testMatrix3x3(8) +splat_4_constants $69..72 = 0x80000000 (-0.0) +splat_4_constants $73..76 = 0x80000000 (-0.0) +copy_constant $77 = 0x80000000 (-0.0) +bitwise_xor_n_ints $60..68 ^= $69..77 cmpeq_n_floats $51..59 = equal($51..59, $60..68) bitwise_and_4_ints $52..55 &= $56..59 bitwise_and_2_ints $52..53 &= $54..55 @@ -111,7 +107,7 @@ label label 0x00000002 load_condition_mask CondMask = $78 copy_constant $0 = 0 merge_condition_mask CondMask = $49 & $50 -branch_if_no_lanes_active branch_if_no_lanes_active +96 (label 1 at #210) +branch_if_no_lanes_active branch_if_no_lanes_active +92 (label 1 at #202) copy_4_uniforms testMatrix4x4(0..3) = testInputs copy_4_uniforms testMatrix4x4(4..7) = testInputs copy_4_uniforms testMatrix4x4(8..11) = testInputs @@ -146,15 +142,11 @@ copy_4_slots_masked m₁(0..3) = Mask($1..4) copy_4_slots_masked m₁(4..7) = Mask($5..8) copy_4_slots_masked m₁(8..11) = Mask($9..12) copy_4_slots_masked m₁(12..15) = Mask($13..16) -splat_4_constants $1..4 = 0 -splat_4_constants $5..8 = 0 -splat_4_constants $9..12 = 0 -splat_4_constants $13..16 = 0 -copy_4_slots_unmasked $17..20 = m₁(0..3) -copy_4_slots_unmasked $21..24 = m₁(4..7) -copy_4_slots_unmasked $25..28 = m₁(8..11) -copy_4_slots_unmasked $29..32 = m₁(12..15) -sub_n_floats $1..16 -= $17..32 +splat_4_constants $17..20 = 0x80000000 (-0.0) +splat_4_constants $21..24 = 0x80000000 (-0.0) +splat_4_constants $25..28 = 0x80000000 (-0.0) +splat_4_constants $29..32 = 0x80000000 (-0.0) +bitwise_xor_n_ints $1..16 ^= $17..32 copy_4_slots_masked m₁(0..3) = Mask($1..4) copy_4_slots_masked m₁(4..7) = Mask($5..8) copy_4_slots_masked m₁(8..11) = Mask($9..12) @@ -175,15 +167,15 @@ copy_4_slots_unmasked $1..4 = m₁(0..3) copy_4_slots_unmasked $5..8 = m₁(4..7) copy_4_slots_unmasked $9..12 = m₁(8..11) copy_4_slots_unmasked $13..16 = m₁(12..15) -splat_4_constants $17..20 = 0 -splat_4_constants $21..24 = 0 -splat_4_constants $25..28 = 0 -splat_4_constants $29..32 = 0 -copy_4_slots_unmasked $33..36 = testMatrix4x4(0..3) -copy_4_slots_unmasked $37..40 = testMatrix4x4(4..7) -copy_4_slots_unmasked $41..44 = testMatrix4x4(8..11) -copy_4_slots_unmasked $45..48 = testMatrix4x4(12..15) -sub_n_floats $17..32 -= $33..48 +copy_4_slots_unmasked $17..20 = testMatrix4x4(0..3) +copy_4_slots_unmasked $21..24 = testMatrix4x4(4..7) +copy_4_slots_unmasked $25..28 = testMatrix4x4(8..11) +copy_4_slots_unmasked $29..32 = testMatrix4x4(12..15) +splat_4_constants $33..36 = 0x80000000 (-0.0) +splat_4_constants $37..40 = 0x80000000 (-0.0) +splat_4_constants $41..44 = 0x80000000 (-0.0) +splat_4_constants $45..48 = 0x80000000 (-0.0) +bitwise_xor_n_ints $17..32 ^= $33..48 cmpeq_n_floats $1..16 = equal($1..16, $17..32) bitwise_and_4_ints $9..12 &= $13..16 bitwise_and_4_ints $5..8 &= $9..12 diff --git a/tests/sksl/folding/MatrixScalarNoOpFolding.skrp b/tests/sksl/folding/MatrixScalarNoOpFolding.skrp index 8ac47e1cb403..bf7a6fb960d1 100644 --- a/tests/sksl/folding/MatrixScalarNoOpFolding.skrp +++ b/tests/sksl/folding/MatrixScalarNoOpFolding.skrp @@ -49,15 +49,15 @@ copy_constant $185 = 0 copy_slot_masked [test_no_op_scalar_X_mat2].result = Mask($185) mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask) load_condition_mask CondMask = $183 -splat_4_constants $183..186 = 0 -copy_4_slots_unmasked $187..190 = m -sub_4_floats $183..186 -= $187..190 +copy_4_slots_unmasked $183..186 = m +splat_4_constants $187..190 = 0x80000000 (-0.0) +bitwise_xor_4_ints $183..186 ^= $187..190 copy_4_slots_masked m = Mask($183..186) store_condition_mask $183 = CondMask copy_4_slots_unmasked $184..187 = m -splat_4_constants $188..191 = 0 -copy_4_uniforms $192..195 = testMatrix2x2 -sub_4_floats $188..191 -= $192..195 +copy_4_uniforms $188..191 = testMatrix2x2 +splat_4_constants $192..195 = 0x80000000 (-0.0) +bitwise_xor_4_ints $188..191 ^= $192..195 cmpne_4_floats $184..187 = notEqual($184..187, $188..191) bitwise_or_2_ints $184..185 |= $186..187 bitwise_or_int $184 |= $185 @@ -157,13 +157,13 @@ copy_constant $155 = 0 copy_slot_masked [test_no_op_scalar_X_mat3].result = Mask($155) mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask) load_condition_mask CondMask = $153 -splat_4_constants $153..156 = 0 -splat_4_constants $157..160 = 0 -copy_constant $161 = 0 -copy_4_slots_unmasked $162..165 = m₁(0..3) -copy_4_slots_unmasked $166..169 = m₁(4..7) -copy_slot_unmasked $170 = m₁(8) -sub_n_floats $153..161 -= $162..170 +copy_4_slots_unmasked $153..156 = m₁(0..3) +copy_4_slots_unmasked $157..160 = m₁(4..7) +copy_slot_unmasked $161 = m₁(8) +splat_4_constants $162..165 = 0x80000000 (-0.0) +splat_4_constants $166..169 = 0x80000000 (-0.0) +copy_constant $170 = 0x80000000 (-0.0) +bitwise_xor_n_ints $153..161 ^= $162..170 copy_4_slots_masked m₁(0..3) = Mask($153..156) copy_4_slots_masked m₁(4..7) = Mask($157..160) copy_slot_masked m₁(8) = Mask($161) @@ -171,13 +171,13 @@ store_condition_mask $153 = CondMask copy_4_slots_unmasked $154..157 = m₁(0..3) copy_4_slots_unmasked $158..161 = m₁(4..7) copy_slot_unmasked $162 = m₁(8) -splat_4_constants $163..166 = 0 -splat_4_constants $167..170 = 0 -copy_constant $171 = 0 -copy_4_uniforms $172..175 = testMatrix3x3(0..3) -copy_4_uniforms $176..179 = testMatrix3x3(4..7) -copy_uniform $180 = testMatrix3x3(8) -sub_n_floats $163..171 -= $172..180 +copy_4_uniforms $163..166 = testMatrix3x3(0..3) +copy_4_uniforms $167..170 = testMatrix3x3(4..7) +copy_uniform $171 = testMatrix3x3(8) +splat_4_constants $172..175 = 0x80000000 (-0.0) +splat_4_constants $176..179 = 0x80000000 (-0.0) +copy_constant $180 = 0x80000000 (-0.0) +bitwise_xor_n_ints $163..171 ^= $172..180 cmpne_n_floats $154..162 = notEqual($154..162, $163..171) bitwise_or_4_ints $155..158 |= $159..162 bitwise_or_2_ints $155..156 |= $157..158 @@ -311,15 +311,15 @@ copy_constant $103 = 0 copy_slot_masked [test_no_op_scalar_X_mat4].result = Mask($103) mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask) load_condition_mask CondMask = $101 -splat_4_constants $101..104 = 0 -splat_4_constants $105..108 = 0 -splat_4_constants $109..112 = 0 -splat_4_constants $113..116 = 0 -copy_4_slots_unmasked $117..120 = m₂(0..3) -copy_4_slots_unmasked $121..124 = m₂(4..7) -copy_4_slots_unmasked $125..128 = m₂(8..11) -copy_4_slots_unmasked $129..132 = m₂(12..15) -sub_n_floats $101..116 -= $117..132 +copy_4_slots_unmasked $101..104 = m₂(0..3) +copy_4_slots_unmasked $105..108 = m₂(4..7) +copy_4_slots_unmasked $109..112 = m₂(8..11) +copy_4_slots_unmasked $113..116 = m₂(12..15) +splat_4_constants $117..120 = 0x80000000 (-0.0) +splat_4_constants $121..124 = 0x80000000 (-0.0) +splat_4_constants $125..128 = 0x80000000 (-0.0) +splat_4_constants $129..132 = 0x80000000 (-0.0) +bitwise_xor_n_ints $101..116 ^= $117..132 copy_4_slots_masked m₂(0..3) = Mask($101..104) copy_4_slots_masked m₂(4..7) = Mask($105..108) copy_4_slots_masked m₂(8..11) = Mask($109..112) @@ -329,15 +329,15 @@ copy_4_slots_unmasked $102..105 = m₂(0..3) copy_4_slots_unmasked $106..109 = m₂(4..7) copy_4_slots_unmasked $110..113 = m₂(8..11) copy_4_slots_unmasked $114..117 = m₂(12..15) -splat_4_constants $118..121 = 0 -splat_4_constants $122..125 = 0 -splat_4_constants $126..129 = 0 -splat_4_constants $130..133 = 0 -copy_4_slots_unmasked $134..137 = testMatrix4x4(0..3) -copy_4_slots_unmasked $138..141 = testMatrix4x4(4..7) -copy_4_slots_unmasked $142..145 = testMatrix4x4(8..11) -copy_4_slots_unmasked $146..149 = testMatrix4x4(12..15) -sub_n_floats $118..133 -= $134..149 +copy_4_slots_unmasked $118..121 = testMatrix4x4(0..3) +copy_4_slots_unmasked $122..125 = testMatrix4x4(4..7) +copy_4_slots_unmasked $126..129 = testMatrix4x4(8..11) +copy_4_slots_unmasked $130..133 = testMatrix4x4(12..15) +splat_4_constants $134..137 = 0x80000000 (-0.0) +splat_4_constants $138..141 = 0x80000000 (-0.0) +splat_4_constants $142..145 = 0x80000000 (-0.0) +splat_4_constants $146..149 = 0x80000000 (-0.0) +bitwise_xor_n_ints $118..133 ^= $134..149 cmpne_n_floats $102..117 = notEqual($102..117, $118..133) bitwise_or_4_ints $110..113 |= $114..117 bitwise_or_4_ints $106..109 |= $110..113 @@ -465,12 +465,12 @@ sub_4_floats $85..88 -= $89..92 copy_4_slots_masked m₃ = Mask($85..88) store_condition_mask $85 = CondMask copy_4_slots_unmasked $86..89 = m₃ -splat_4_constants $90..93 = 0 -copy_slot_unmasked $94 = scalar -copy_slot_unmasked $95 = scalar -copy_slot_unmasked $96 = scalar -copy_slot_unmasked $97 = scalar -sub_4_floats $90..93 -= $94..97 +copy_slot_unmasked $90 = scalar +copy_slot_unmasked $91 = scalar +copy_slot_unmasked $92 = scalar +copy_slot_unmasked $93 = scalar +splat_4_constants $94..97 = 0x80000000 (-0.0) +bitwise_xor_4_ints $90..93 ^= $94..97 cmpne_4_floats $86..89 = notEqual($86..89, $90..93) bitwise_or_2_ints $86..87 |= $88..89 bitwise_or_int $86 |= $87 @@ -640,13 +640,13 @@ store_condition_mask $54 = CondMask copy_4_slots_unmasked $55..58 = m₄(0..3) copy_4_slots_unmasked $59..62 = m₄(4..7) copy_slot_unmasked $63 = m₄(8) -splat_4_constants $64..67 = 0 -splat_4_constants $68..71 = 0 -copy_constant $72 = 0 -copy_3_slots_unmasked $73..75 = scalar3 -copy_3_slots_unmasked $76..78 = scalar3 -copy_3_slots_unmasked $79..81 = scalar3 -sub_n_floats $64..72 -= $73..81 +copy_3_slots_unmasked $64..66 = scalar3 +copy_3_slots_unmasked $67..69 = scalar3 +copy_3_slots_unmasked $70..72 = scalar3 +splat_4_constants $73..76 = 0x80000000 (-0.0) +splat_4_constants $77..80 = 0x80000000 (-0.0) +copy_constant $81 = 0x80000000 (-0.0) +bitwise_xor_n_ints $64..72 ^= $73..81 cmpne_n_floats $55..63 = notEqual($55..63, $64..72) bitwise_or_4_ints $56..59 |= $60..63 bitwise_or_2_ints $56..57 |= $58..59 @@ -856,15 +856,15 @@ copy_4_slots_unmasked $3..6 = m₅(0..3) copy_4_slots_unmasked $7..10 = m₅(4..7) copy_4_slots_unmasked $11..14 = m₅(8..11) copy_4_slots_unmasked $15..18 = m₅(12..15) -splat_4_constants $19..22 = 0 -splat_4_constants $23..26 = 0 -splat_4_constants $27..30 = 0 -splat_4_constants $31..34 = 0 -copy_4_slots_unmasked $35..38 = scalar4 -copy_4_slots_unmasked $39..42 = scalar4 -copy_4_slots_unmasked $43..46 = scalar4 -copy_4_slots_unmasked $47..50 = scalar4 -sub_n_floats $19..34 -= $35..50 +copy_4_slots_unmasked $19..22 = scalar4 +copy_4_slots_unmasked $23..26 = scalar4 +copy_4_slots_unmasked $27..30 = scalar4 +copy_4_slots_unmasked $31..34 = scalar4 +splat_4_constants $35..38 = 0x80000000 (-0.0) +splat_4_constants $39..42 = 0x80000000 (-0.0) +splat_4_constants $43..46 = 0x80000000 (-0.0) +splat_4_constants $47..50 = 0x80000000 (-0.0) +bitwise_xor_n_ints $19..34 ^= $35..50 cmpne_n_floats $3..18 = notEqual($3..18, $19..34) bitwise_or_4_ints $11..14 |= $15..18 bitwise_or_4_ints $7..10 |= $11..14 diff --git a/tests/sksl/folding/MatrixVectorNoOpFolding.skrp b/tests/sksl/folding/MatrixVectorNoOpFolding.skrp index 5a34bac524e0..9379caa45a78 100644 --- a/tests/sksl/folding/MatrixVectorNoOpFolding.skrp +++ b/tests/sksl/folding/MatrixVectorNoOpFolding.skrp @@ -32,19 +32,19 @@ copy_constant $88 = 0 copy_slot_masked [test_no_op_mat2_X_vec2].result = Mask($88) mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask) load_condition_mask CondMask = $86 -splat_2_constants $86..87 = 0 -copy_2_uniforms $88..89 = testInputs(0..1) -sub_2_floats $86..87 -= $88..89 +copy_2_uniforms $86..87 = testInputs(0..1) +splat_2_constants $88..89 = 0x80000000 (-0.0) +bitwise_xor_2_ints $86..87 ^= $88..89 copy_2_slots_masked v = Mask($86..87) -splat_2_constants $86..87 = 0 -copy_2_uniforms $88..89 = testInputs(0..1) -sub_2_floats $86..87 -= $88..89 +copy_2_uniforms $86..87 = testInputs(0..1) +splat_2_constants $88..89 = 0x80000000 (-0.0) +bitwise_xor_2_ints $86..87 ^= $88..89 copy_2_slots_masked v = Mask($86..87) store_condition_mask $86 = CondMask copy_2_slots_unmasked $87..88 = v -splat_2_constants $89..90 = 0 -copy_2_uniforms $91..92 = testInputs(0..1) -sub_2_floats $89..90 -= $91..92 +copy_2_uniforms $89..90 = testInputs(0..1) +splat_2_constants $91..92 = 0x80000000 (-0.0) +bitwise_xor_2_ints $89..90 ^= $91..92 cmpne_2_floats $87..88 = notEqual($87..88, $89..90) bitwise_or_int $87 |= $88 merge_condition_mask CondMask = $86 & $87 @@ -95,19 +95,19 @@ copy_constant $76 = 0 copy_slot_masked [test_no_op_mat3_X_vec3].result = Mask($76) mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask) load_condition_mask CondMask = $74 -splat_3_constants $74..76 = 0 -copy_3_uniforms $77..79 = testInputs(0..2) -sub_3_floats $74..76 -= $77..79 +copy_3_uniforms $74..76 = testInputs(0..2) +splat_3_constants $77..79 = 0x80000000 (-0.0) +bitwise_xor_3_ints $74..76 ^= $77..79 copy_3_slots_masked v₁ = Mask($74..76) -splat_3_constants $74..76 = 0 -copy_3_uniforms $77..79 = testInputs(0..2) -sub_3_floats $74..76 -= $77..79 +copy_3_uniforms $74..76 = testInputs(0..2) +splat_3_constants $77..79 = 0x80000000 (-0.0) +bitwise_xor_3_ints $74..76 ^= $77..79 copy_3_slots_masked v₁ = Mask($74..76) store_condition_mask $74 = CondMask copy_3_slots_unmasked $75..77 = v₁ -splat_3_constants $78..80 = 0 -copy_3_uniforms $81..83 = testInputs(0..2) -sub_3_floats $78..80 -= $81..83 +copy_3_uniforms $78..80 = testInputs(0..2) +splat_3_constants $81..83 = 0x80000000 (-0.0) +bitwise_xor_3_ints $78..80 ^= $81..83 cmpne_3_floats $75..77 = notEqual($75..77, $78..80) bitwise_or_int $76 |= $77 bitwise_or_int $75 |= $76 @@ -163,19 +163,19 @@ copy_constant $60 = 0 copy_slot_masked [test_no_op_mat4_X_vec4].result = Mask($60) mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask) load_condition_mask CondMask = $58 -splat_4_constants $58..61 = 0 -copy_4_uniforms $62..65 = testInputs -sub_4_floats $58..61 -= $62..65 +copy_4_uniforms $58..61 = testInputs +splat_4_constants $62..65 = 0x80000000 (-0.0) +bitwise_xor_4_ints $58..61 ^= $62..65 copy_4_slots_masked v₂ = Mask($58..61) -splat_4_constants $58..61 = 0 -copy_4_uniforms $62..65 = testInputs -sub_4_floats $58..61 -= $62..65 +copy_4_uniforms $58..61 = testInputs +splat_4_constants $62..65 = 0x80000000 (-0.0) +bitwise_xor_4_ints $58..61 ^= $62..65 copy_4_slots_masked v₂ = Mask($58..61) store_condition_mask $58 = CondMask copy_4_slots_unmasked $59..62 = v₂ -splat_4_constants $63..66 = 0 -copy_4_uniforms $67..70 = testInputs -sub_4_floats $63..66 -= $67..70 +copy_4_uniforms $63..66 = testInputs +splat_4_constants $67..70 = 0x80000000 (-0.0) +bitwise_xor_4_ints $63..66 ^= $67..70 cmpne_4_floats $59..62 = notEqual($59..62, $63..66) bitwise_or_2_ints $59..60 |= $61..62 bitwise_or_int $59 |= $60 diff --git a/tests/sksl/folding/Negation.skrp b/tests/sksl/folding/Negation.skrp index af88a915e7de..ef3248fb4924 100644 --- a/tests/sksl/folding/Negation.skrp +++ b/tests/sksl/folding/Negation.skrp @@ -11,18 +11,18 @@ copy_constant one = 0x00000001 (1.401298e-45) copy_constant two = 0x00000002 (2.802597e-45) copy_constant ok = 0xFFFFFFFF copy_slot_unmasked $14 = ok -splat_3_constants $15..17 = 0 -copy_slot_unmasked $18 = one -sub_int $17 -= $18 -copy_slot_unmasked $18 = one -copy_slot_unmasked $19 = one -add_int $18 += $19 -sub_2_ints $15..16 -= $17..18 -splat_2_constants $17..18 = 0 -copy_slot_unmasked $19 = one -add_imm_int $19 += 0xFFFFFFFE -copy_constant $20 = 0x00000002 (2.802597e-45) -sub_2_ints $17..18 -= $19..20 +copy_slot_unmasked $15 = one +mul_imm_int $15 *= 0xFFFFFFFF +copy_slot_unmasked $16 = one +copy_slot_unmasked $17 = one +add_int $16 += $17 +splat_2_constants $17..18 = 0xFFFFFFFF +mul_2_ints $15..16 *= $17..18 +copy_slot_unmasked $17 = one +add_imm_int $17 += 0xFFFFFFFE +copy_constant $18 = 0x00000002 (2.802597e-45) +splat_2_constants $19..20 = 0xFFFFFFFF +mul_2_ints $17..18 *= $19..20 cmpeq_2_ints $15..16 = equal($15..16, $17..18) bitwise_and_int $15 &= $16 bitwise_and_int $14 &= $15 diff --git a/tests/sksl/runtime/RecursiveComparison_Arrays.skrp b/tests/sksl/runtime/RecursiveComparison_Arrays.skrp index 496cff56f132..b6c1798fa4c2 100644 --- a/tests/sksl/runtime/RecursiveComparison_Arrays.skrp +++ b/tests/sksl/runtime/RecursiveComparison_Arrays.skrp @@ -12,9 +12,8 @@ copy_uniform $0 = colorGreen(0) copy_uniform $1 = colorGreen(2) mul_float $0 *= $1 copy_slot_unmasked ZP = $0 -copy_constant $0 = 0 -copy_uniform $1 = colorGreen(0) -sub_float $0 -= $1 +copy_uniform $0 = colorGreen(0) +bitwise_xor_imm_int $0 ^= 0x80000000 copy_uniform $1 = colorGreen(2) mul_float $0 *= $1 copy_slot_unmasked ZM = $0 @@ -67,7 +66,7 @@ cmpne_3_floats $67..69 = notEqual($67..69, $70..72) bitwise_or_int $68 |= $69 bitwise_or_int $67 |= $68 merge_condition_mask CondMask = $74 & $75 -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 8 at #77) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 8 at #76) copy_4_slots_unmasked $68..71 = _1_a[0], _1_a[1], _1_a[2], _2_b[0] copy_2_slots_unmasked $72..73 = _2_b[1], _2_b[2] cmpeq_3_floats $68..70 = equal($68..70, $71..73) @@ -78,7 +77,7 @@ label label 0x00000008 load_condition_mask CondMask = $74 copy_constant $58 = 0 merge_condition_mask CondMask = $66 & $67 -branch_if_no_lanes_active branch_if_no_lanes_active +48 (label 7 at #129) +branch_if_no_lanes_active branch_if_no_lanes_active +48 (label 7 at #128) copy_slot_unmasked eq = NE copy_slot_unmasked f1 = F42 copy_slot_unmasked f2 = ZM @@ -114,7 +113,7 @@ cmpne_3_floats $59..61 = notEqual($59..61, $62..64) bitwise_or_int $60 |= $61 bitwise_or_int $59 |= $60 merge_condition_mask CondMask = $74 & $75 -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 10 at #124) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 10 at #123) copy_4_slots_unmasked $60..63 = a[0], a[1], a[2], b[0] copy_2_slots_unmasked $64..65 = b[1], b[2] cmpeq_3_floats $60..62 = equal($60..62, $63..65) @@ -130,7 +129,7 @@ label label 0x00000007 load_condition_mask CondMask = $66 copy_constant $49 = 0 merge_condition_mask CondMask = $57 & $58 -branch_if_no_lanes_active branch_if_no_lanes_active +46 (label 6 at #179) +branch_if_no_lanes_active branch_if_no_lanes_active +46 (label 6 at #178) copy_slot_unmasked eq = NE copy_slot_unmasked f1 = F42 copy_2_slots_unmasked f2, f3 = NAN1, NAN2 @@ -165,7 +164,7 @@ cmpne_3_floats $50..52 = notEqual($50..52, $53..55) bitwise_or_int $51 |= $52 bitwise_or_int $50 |= $51 merge_condition_mask CondMask = $66 & $67 -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 12 at #175) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 12 at #174) copy_4_slots_unmasked $51..54 = a[0], a[1], a[2], b[0] copy_2_slots_unmasked $55..56 = b[1], b[2] cmpeq_3_floats $51..53 = equal($51..53, $54..56) @@ -180,7 +179,7 @@ label label 0x00000006 load_condition_mask CondMask = $57 copy_constant $40 = 0 merge_condition_mask CondMask = $48 & $49 -branch_if_no_lanes_active branch_if_no_lanes_active +47 (label 5 at #230) +branch_if_no_lanes_active branch_if_no_lanes_active +47 (label 5 at #229) copy_slot_unmasked eq = EQ copy_slot_unmasked f1 = F42 copy_2_slots_unmasked f2, f3 = NAN1, NAN2 @@ -215,7 +214,7 @@ cmpne_3_floats $41..43 = notEqual($41..43, $44..46) bitwise_or_int $42 |= $43 bitwise_or_int $41 |= $42 merge_condition_mask CondMask = $57 & $58 -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 14 at #225) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 14 at #224) copy_4_slots_unmasked $42..45 = a[0], a[1], a[2], b[0] copy_2_slots_unmasked $46..47 = b[1], b[2] cmpeq_3_floats $42..44 = equal($42..44, $45..47) @@ -231,7 +230,7 @@ label label 0x00000005 load_condition_mask CondMask = $48 copy_constant $31 = 0 merge_condition_mask CondMask = $39 & $40 -branch_if_no_lanes_active branch_if_no_lanes_active +43 (label 4 at #277) +branch_if_no_lanes_active branch_if_no_lanes_active +43 (label 4 at #276) copy_slot_unmasked eq₁ = NE copy_3_slots_unmasked f1₁, f2₁, f3₁ = F42, F43, F44 copy_uniform $32 = colorGreen(0) @@ -263,7 +262,7 @@ cmpne_3_floats $32..34 = notEqual($32..34, $35..37) bitwise_or_int $33 |= $34 bitwise_or_int $32 |= $33 merge_condition_mask CondMask = $48 & $49 -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 16 at #273) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 16 at #272) copy_4_slots_unmasked $33..36 = a[0]₁, a[1]₁, a[2]₁, b[0]₁ copy_2_slots_unmasked $37..38 = b[1]₁, b[2]₁ cmpeq_3_floats $33..35 = equal($33..35, $36..38) @@ -278,7 +277,7 @@ label label 0x00000004 load_condition_mask CondMask = $39 copy_constant $22 = 0 merge_condition_mask CondMask = $30 & $31 -branch_if_no_lanes_active branch_if_no_lanes_active +44 (label 3 at #325) +branch_if_no_lanes_active branch_if_no_lanes_active +44 (label 3 at #324) copy_slot_unmasked eq₁ = EQ copy_3_slots_unmasked f1₁, f2₁, f3₁ = F42, F43, F44 copy_uniform $23 = colorGreen(0) @@ -310,7 +309,7 @@ cmpne_3_floats $23..25 = notEqual($23..25, $26..28) bitwise_or_int $24 |= $25 bitwise_or_int $23 |= $24 merge_condition_mask CondMask = $39 & $40 -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 18 at #320) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 18 at #319) copy_4_slots_unmasked $24..27 = a[0]₁, a[1]₁, a[2]₁, b[0]₁ copy_2_slots_unmasked $28..29 = b[1]₁, b[2]₁ cmpeq_3_floats $24..26 = equal($24..26, $27..29) @@ -326,7 +325,7 @@ label label 0x00000003 load_condition_mask CondMask = $30 copy_constant $13 = 0 merge_condition_mask CondMask = $21 & $22 -branch_if_no_lanes_active branch_if_no_lanes_active +45 (label 2 at #374) +branch_if_no_lanes_active branch_if_no_lanes_active +45 (label 2 at #373) copy_slot_unmasked eq₁ = NE copy_slot_unmasked f1₁ = NAN1 copy_slot_unmasked f2₁ = ZM @@ -360,7 +359,7 @@ cmpne_3_floats $14..16 = notEqual($14..16, $17..19) bitwise_or_int $15 |= $16 bitwise_or_int $14 |= $15 merge_condition_mask CondMask = $30 & $31 -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 20 at #370) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 20 at #369) copy_4_slots_unmasked $15..18 = a[0]₁, a[1]₁, a[2]₁, b[0]₁ copy_2_slots_unmasked $19..20 = b[1]₁, b[2]₁ cmpeq_3_floats $15..17 = equal($15..17, $18..20) @@ -375,7 +374,7 @@ label label 0x00000002 load_condition_mask CondMask = $21 copy_constant $0 = 0 merge_condition_mask CondMask = $12 & $13 -branch_if_no_lanes_active branch_if_no_lanes_active +46 (label 1 at #424) +branch_if_no_lanes_active branch_if_no_lanes_active +46 (label 1 at #423) copy_slot_unmasked eq₁ = EQ copy_slot_unmasked f1₁ = NAN1 copy_slot_unmasked f2₁ = ZM @@ -409,7 +408,7 @@ cmpne_3_floats $1..3 = notEqual($1..3, $4..6) bitwise_or_int $2 |= $3 bitwise_or_int $1 |= $2 merge_condition_mask CondMask = $21 & $22 -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 22 at #419) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 22 at #418) copy_4_slots_unmasked $2..5 = a[0]₁, a[1]₁, a[2]₁, b[0]₁ copy_2_slots_unmasked $6..7 = b[1]₁, b[2]₁ cmpeq_3_floats $2..4 = equal($2..4, $5..7) diff --git a/tests/sksl/runtime/RecursiveComparison_Structs.skrp b/tests/sksl/runtime/RecursiveComparison_Structs.skrp index 1f3b2d4919ca..bdb43eeb6131 100644 --- a/tests/sksl/runtime/RecursiveComparison_Structs.skrp +++ b/tests/sksl/runtime/RecursiveComparison_Structs.skrp @@ -12,9 +12,8 @@ copy_uniform $0 = colorGreen(0) copy_uniform $1 = colorGreen(2) mul_float $0 *= $1 copy_slot_unmasked ZP = $0 -copy_constant $0 = 0 -copy_uniform $1 = colorGreen(0) -sub_float $0 -= $1 +copy_uniform $0 = colorGreen(0) +bitwise_xor_imm_int $0 ^= 0x80000000 copy_uniform $1 = colorGreen(2) mul_float $0 *= $1 copy_slot_unmasked ZM = $0 @@ -73,7 +72,7 @@ cmpne_float $57 = notEqual($57, $58) bitwise_or_int $56 |= $57 bitwise_or_int $55 |= $56 merge_condition_mask CondMask = $60 & $61 -branch_if_no_lanes_active branch_if_no_lanes_active +13 (label 8 at #89) +branch_if_no_lanes_active branch_if_no_lanes_active +13 (label 8 at #88) copy_slot_unmasked $56 = _1_a.f1 copy_slot_unmasked $57 = _2_b.f1 cmpeq_float $56 = equal($56, $57) @@ -90,7 +89,7 @@ label label 0x00000008 load_condition_mask CondMask = $60 copy_constant $48 = 0 merge_condition_mask CondMask = $54 & $55 -branch_if_no_lanes_active branch_if_no_lanes_active +60 (label 7 at #153) +branch_if_no_lanes_active branch_if_no_lanes_active +60 (label 7 at #152) copy_slot_unmasked eq = NE copy_slot_unmasked f1 = F42 copy_slot_unmasked f2 = ZM @@ -132,7 +131,7 @@ cmpne_float $51 = notEqual($51, $52) bitwise_or_int $50 |= $51 bitwise_or_int $49 |= $50 merge_condition_mask CondMask = $60 & $61 -branch_if_no_lanes_active branch_if_no_lanes_active +13 (label 10 at #148) +branch_if_no_lanes_active branch_if_no_lanes_active +13 (label 10 at #147) copy_slot_unmasked $50 = a.f1 copy_slot_unmasked $51 = b.f1 cmpeq_float $50 = equal($50, $51) @@ -154,7 +153,7 @@ label label 0x00000007 load_condition_mask CondMask = $54 copy_constant $41 = 0 merge_condition_mask CondMask = $47 & $48 -branch_if_no_lanes_active branch_if_no_lanes_active +58 (label 6 at #215) +branch_if_no_lanes_active branch_if_no_lanes_active +58 (label 6 at #214) copy_slot_unmasked eq = NE copy_slot_unmasked f1 = F42 copy_2_slots_unmasked f2, f3 = NAN1, NAN2 @@ -195,7 +194,7 @@ cmpne_float $44 = notEqual($44, $45) bitwise_or_int $43 |= $44 bitwise_or_int $42 |= $43 merge_condition_mask CondMask = $54 & $55 -branch_if_no_lanes_active branch_if_no_lanes_active +13 (label 12 at #211) +branch_if_no_lanes_active branch_if_no_lanes_active +13 (label 12 at #210) copy_slot_unmasked $43 = a.f1 copy_slot_unmasked $44 = b.f1 cmpeq_float $43 = equal($43, $44) @@ -216,7 +215,7 @@ label label 0x00000006 load_condition_mask CondMask = $47 copy_constant $34 = 0 merge_condition_mask CondMask = $40 & $41 -branch_if_no_lanes_active branch_if_no_lanes_active +59 (label 5 at #278) +branch_if_no_lanes_active branch_if_no_lanes_active +59 (label 5 at #277) copy_slot_unmasked eq = EQ copy_slot_unmasked f1 = F42 copy_2_slots_unmasked f2, f3 = NAN1, NAN2 @@ -257,7 +256,7 @@ cmpne_float $37 = notEqual($37, $38) bitwise_or_int $36 |= $37 bitwise_or_int $35 |= $36 merge_condition_mask CondMask = $47 & $48 -branch_if_no_lanes_active branch_if_no_lanes_active +13 (label 14 at #273) +branch_if_no_lanes_active branch_if_no_lanes_active +13 (label 14 at #272) copy_slot_unmasked $36 = a.f1 copy_slot_unmasked $37 = b.f1 cmpeq_float $36 = equal($36, $37) @@ -279,7 +278,7 @@ label label 0x00000005 load_condition_mask CondMask = $40 copy_constant $27 = 0 merge_condition_mask CondMask = $33 & $34 -branch_if_no_lanes_active branch_if_no_lanes_active +55 (label 4 at #337) +branch_if_no_lanes_active branch_if_no_lanes_active +55 (label 4 at #336) copy_slot_unmasked eq₁ = NE copy_3_slots_unmasked f1₁, f2₁, f3₁ = F42, F43, F44 copy_uniform $28 = colorGreen(0) @@ -317,7 +316,7 @@ cmpne_float $30 = notEqual($30, $31) bitwise_or_int $29 |= $30 bitwise_or_int $28 |= $29 merge_condition_mask CondMask = $40 & $41 -branch_if_no_lanes_active branch_if_no_lanes_active +13 (label 16 at #333) +branch_if_no_lanes_active branch_if_no_lanes_active +13 (label 16 at #332) copy_slot_unmasked $29 = a.f1₁ copy_slot_unmasked $30 = b.f1₁ cmpeq_float $29 = equal($29, $30) @@ -338,7 +337,7 @@ label label 0x00000004 load_condition_mask CondMask = $33 copy_constant $20 = 0 merge_condition_mask CondMask = $26 & $27 -branch_if_no_lanes_active branch_if_no_lanes_active +56 (label 3 at #397) +branch_if_no_lanes_active branch_if_no_lanes_active +56 (label 3 at #396) copy_slot_unmasked eq₁ = EQ copy_3_slots_unmasked f1₁, f2₁, f3₁ = F42, F43, F44 copy_uniform $21 = colorGreen(0) @@ -376,7 +375,7 @@ cmpne_float $23 = notEqual($23, $24) bitwise_or_int $22 |= $23 bitwise_or_int $21 |= $22 merge_condition_mask CondMask = $33 & $34 -branch_if_no_lanes_active branch_if_no_lanes_active +13 (label 18 at #392) +branch_if_no_lanes_active branch_if_no_lanes_active +13 (label 18 at #391) copy_slot_unmasked $22 = a.f1₁ copy_slot_unmasked $23 = b.f1₁ cmpeq_float $22 = equal($22, $23) @@ -398,7 +397,7 @@ label label 0x00000003 load_condition_mask CondMask = $26 copy_constant $13 = 0 merge_condition_mask CondMask = $19 & $20 -branch_if_no_lanes_active branch_if_no_lanes_active +57 (label 2 at #458) +branch_if_no_lanes_active branch_if_no_lanes_active +57 (label 2 at #457) copy_slot_unmasked eq₁ = NE copy_slot_unmasked f1₁ = NAN1 copy_slot_unmasked f2₁ = ZM @@ -438,7 +437,7 @@ cmpne_float $16 = notEqual($16, $17) bitwise_or_int $15 |= $16 bitwise_or_int $14 |= $15 merge_condition_mask CondMask = $26 & $27 -branch_if_no_lanes_active branch_if_no_lanes_active +13 (label 20 at #454) +branch_if_no_lanes_active branch_if_no_lanes_active +13 (label 20 at #453) copy_slot_unmasked $15 = a.f1₁ copy_slot_unmasked $16 = b.f1₁ cmpeq_float $15 = equal($15, $16) @@ -459,7 +458,7 @@ label label 0x00000002 load_condition_mask CondMask = $19 copy_constant $0 = 0 merge_condition_mask CondMask = $12 & $13 -branch_if_no_lanes_active branch_if_no_lanes_active +59 (label 1 at #521) +branch_if_no_lanes_active branch_if_no_lanes_active +59 (label 1 at #520) copy_slot_unmasked eq₁ = EQ copy_slot_unmasked f1₁ = NAN1 copy_slot_unmasked f2₁ = ZM @@ -499,8 +498,8 @@ cmpne_float $3 = notEqual($3, $4) bitwise_or_int $2 |= $3 bitwise_or_int $1 |= $2 merge_condition_mask CondMask = $19 & $20 +branch_if_no_lanes_active branch_if_no_lanes_active +14 (label 22 at #515) stack_rewind -branch_if_no_lanes_active branch_if_no_lanes_active +13 (label 22 at #516) copy_slot_unmasked $2 = a.f1₁ copy_slot_unmasked $3 = b.f1₁ cmpeq_float $2 = equal($2, $3) diff --git a/tests/sksl/runtime/RecursiveComparison_Types.skrp b/tests/sksl/runtime/RecursiveComparison_Types.skrp index 7a0eacf8addc..854b7514cfae 100644 --- a/tests/sksl/runtime/RecursiveComparison_Types.skrp +++ b/tests/sksl/runtime/RecursiveComparison_Types.skrp @@ -12,9 +12,8 @@ copy_uniform $0 = colorGreen(0) copy_uniform $1 = colorGreen(2) mul_float $0 *= $1 copy_slot_unmasked ZP = $0 -copy_constant $0 = 0 -copy_uniform $1 = colorGreen(0) -sub_float $0 -= $1 +copy_uniform $0 = colorGreen(0) +bitwise_xor_imm_int $0 ^= 0x80000000 copy_uniform $1 = colorGreen(2) mul_float $0 *= $1 copy_slot_unmasked ZM = $0 @@ -103,7 +102,7 @@ bitwise_or_int $69 |= $70 bitwise_or_int $68 |= $69 bitwise_or_int $67 |= $68 merge_condition_mask CondMask = $74 & $75 -branch_if_no_lanes_active branch_if_no_lanes_active +19 (label 8 at #125) +branch_if_no_lanes_active branch_if_no_lanes_active +19 (label 8 at #124) copy_slot_unmasked $68 = _1_a[0].f1 copy_slot_unmasked $69 = _2_b[0].f1 cmpeq_float $68 = equal($68, $69) @@ -126,7 +125,7 @@ label label 0x00000008 load_condition_mask CondMask = $74 copy_constant $58 = 0 merge_condition_mask CondMask = $66 & $67 -branch_if_no_lanes_active branch_if_no_lanes_active +87 (label 7 at #216) +branch_if_no_lanes_active branch_if_no_lanes_active +87 (label 7 at #215) copy_slot_unmasked eq = NE copy_slot_unmasked f1 = F42 copy_slot_unmasked v2 = ZM @@ -189,7 +188,7 @@ bitwise_or_int $61 |= $62 bitwise_or_int $60 |= $61 bitwise_or_int $59 |= $60 merge_condition_mask CondMask = $74 & $75 -branch_if_no_lanes_active branch_if_no_lanes_active +19 (label 10 at #211) +branch_if_no_lanes_active branch_if_no_lanes_active +19 (label 10 at #210) copy_slot_unmasked $60 = a[0].f1 copy_slot_unmasked $61 = b[0].f1 cmpeq_float $60 = equal($60, $61) @@ -217,7 +216,7 @@ label label 0x00000007 load_condition_mask CondMask = $66 copy_constant $49 = 0 merge_condition_mask CondMask = $57 & $58 -branch_if_no_lanes_active branch_if_no_lanes_active +85 (label 6 at #305) +branch_if_no_lanes_active branch_if_no_lanes_active +85 (label 6 at #304) copy_slot_unmasked eq = NE copy_slot_unmasked f1 = F42 copy_2_slots_unmasked v2, f3 = NAN1, NAN2 @@ -279,7 +278,7 @@ bitwise_or_int $52 |= $53 bitwise_or_int $51 |= $52 bitwise_or_int $50 |= $51 merge_condition_mask CondMask = $66 & $67 -branch_if_no_lanes_active branch_if_no_lanes_active +19 (label 12 at #301) +branch_if_no_lanes_active branch_if_no_lanes_active +19 (label 12 at #300) copy_slot_unmasked $51 = a[0].f1 copy_slot_unmasked $52 = b[0].f1 cmpeq_float $51 = equal($51, $52) @@ -306,7 +305,7 @@ label label 0x00000006 load_condition_mask CondMask = $57 copy_constant $40 = 0 merge_condition_mask CondMask = $48 & $49 -branch_if_no_lanes_active branch_if_no_lanes_active +86 (label 5 at #395) +branch_if_no_lanes_active branch_if_no_lanes_active +86 (label 5 at #394) copy_slot_unmasked eq = EQ copy_slot_unmasked f1 = F42 copy_2_slots_unmasked v2, f3 = NAN1, NAN2 @@ -368,7 +367,7 @@ bitwise_or_int $43 |= $44 bitwise_or_int $42 |= $43 bitwise_or_int $41 |= $42 merge_condition_mask CondMask = $57 & $58 -branch_if_no_lanes_active branch_if_no_lanes_active +19 (label 14 at #390) +branch_if_no_lanes_active branch_if_no_lanes_active +19 (label 14 at #389) copy_slot_unmasked $42 = a[0].f1 copy_slot_unmasked $43 = b[0].f1 cmpeq_float $42 = equal($42, $43) @@ -396,7 +395,7 @@ label label 0x00000005 load_condition_mask CondMask = $48 copy_constant $31 = 0 merge_condition_mask CondMask = $39 & $40 -branch_if_no_lanes_active branch_if_no_lanes_active +82 (label 4 at #481) +branch_if_no_lanes_active branch_if_no_lanes_active +82 (label 4 at #480) copy_slot_unmasked eq₁ = NE copy_4_slots_unmasked f1₁, v2₁, f3₁, f4₁ = F42, F43, F44, F45 copy_2_slots_unmasked f5₁, f6₁ = F46, F47 @@ -455,7 +454,7 @@ bitwise_or_int $34 |= $35 bitwise_or_int $33 |= $34 bitwise_or_int $32 |= $33 merge_condition_mask CondMask = $48 & $49 -branch_if_no_lanes_active branch_if_no_lanes_active +19 (label 16 at #477) +branch_if_no_lanes_active branch_if_no_lanes_active +19 (label 16 at #476) copy_slot_unmasked $33 = a[0].f1₁ copy_slot_unmasked $34 = b[0].f1₁ cmpeq_float $33 = equal($33, $34) @@ -482,7 +481,7 @@ label label 0x00000004 load_condition_mask CondMask = $39 copy_constant $22 = 0 merge_condition_mask CondMask = $30 & $31 -branch_if_no_lanes_active branch_if_no_lanes_active +84 (label 3 at #569) +branch_if_no_lanes_active branch_if_no_lanes_active +84 (label 3 at #568) copy_slot_unmasked eq₁ = EQ copy_4_slots_unmasked f1₁, v2₁, f3₁, f4₁ = F42, F43, F44, F45 copy_2_slots_unmasked f5₁, f6₁ = F46, F47 @@ -499,9 +498,9 @@ copy_slot_unmasked $23 = f4₁ copy_slot_masked a[1].f1₁ = Mask($23) copy_2_slots_unmasked $23..24 = f5₁, f6₁ copy_2_slots_masked a[1].v2₁ = Mask($23..24) -stack_rewind splat_4_constants b[0].f1₁, b[0].v2₁, b[1].f1₁ = 0 splat_2_constants b[1].v2₁ = 0 +stack_rewind copy_slot_unmasked $23 = f1₁ copy_slot_unmasked $24 = two mul_float $23 *= $24 @@ -542,7 +541,7 @@ bitwise_or_int $25 |= $26 bitwise_or_int $24 |= $25 bitwise_or_int $23 |= $24 merge_condition_mask CondMask = $39 & $40 -branch_if_no_lanes_active branch_if_no_lanes_active +19 (label 18 at #564) +branch_if_no_lanes_active branch_if_no_lanes_active +19 (label 18 at #563) copy_slot_unmasked $24 = a[0].f1₁ copy_slot_unmasked $25 = b[0].f1₁ cmpeq_float $24 = equal($24, $25) @@ -570,7 +569,7 @@ label label 0x00000003 load_condition_mask CondMask = $30 copy_constant $13 = 0 merge_condition_mask CondMask = $21 & $22 -branch_if_no_lanes_active branch_if_no_lanes_active +84 (label 2 at #657) +branch_if_no_lanes_active branch_if_no_lanes_active +84 (label 2 at #656) copy_slot_unmasked eq₁ = NE copy_slot_unmasked f1₁ = NAN1 copy_slot_unmasked v2₁ = ZM @@ -631,7 +630,7 @@ bitwise_or_int $16 |= $17 bitwise_or_int $15 |= $16 bitwise_or_int $14 |= $15 merge_condition_mask CondMask = $30 & $31 -branch_if_no_lanes_active branch_if_no_lanes_active +19 (label 20 at #653) +branch_if_no_lanes_active branch_if_no_lanes_active +19 (label 20 at #652) copy_slot_unmasked $15 = a[0].f1₁ copy_slot_unmasked $16 = b[0].f1₁ cmpeq_float $15 = equal($15, $16) @@ -658,7 +657,7 @@ label label 0x00000002 load_condition_mask CondMask = $21 copy_constant $0 = 0 merge_condition_mask CondMask = $12 & $13 -branch_if_no_lanes_active branch_if_no_lanes_active +85 (label 1 at #746) +branch_if_no_lanes_active branch_if_no_lanes_active +85 (label 1 at #745) copy_slot_unmasked eq₁ = EQ copy_slot_unmasked f1₁ = NAN1 copy_slot_unmasked v2₁ = ZM @@ -719,7 +718,7 @@ bitwise_or_int $3 |= $4 bitwise_or_int $2 |= $3 bitwise_or_int $1 |= $2 merge_condition_mask CondMask = $21 & $22 -branch_if_no_lanes_active branch_if_no_lanes_active +19 (label 22 at #741) +branch_if_no_lanes_active branch_if_no_lanes_active +19 (label 22 at #740) copy_slot_unmasked $2 = a[0].f1₁ copy_slot_unmasked $3 = b[0].f1₁ cmpeq_float $2 = equal($2, $3) diff --git a/tests/sksl/runtime/RecursiveComparison_Vectors.skrp b/tests/sksl/runtime/RecursiveComparison_Vectors.skrp index f563780e0e29..4a2706f9953c 100644 --- a/tests/sksl/runtime/RecursiveComparison_Vectors.skrp +++ b/tests/sksl/runtime/RecursiveComparison_Vectors.skrp @@ -12,9 +12,8 @@ copy_uniform $0 = colorGreen(0) copy_uniform $1 = colorGreen(2) mul_float $0 *= $1 copy_slot_unmasked ZP = $0 -copy_constant $0 = 0 -copy_uniform $1 = colorGreen(0) -sub_float $0 -= $1 +copy_uniform $0 = colorGreen(0) +bitwise_xor_imm_int $0 ^= 0x80000000 copy_uniform $1 = colorGreen(2) mul_float $0 *= $1 copy_slot_unmasked ZM = $0 @@ -67,7 +66,7 @@ cmpne_4_floats $79..82 = notEqual($79..82, $83..86) bitwise_or_2_ints $79..80 |= $81..82 bitwise_or_int $79 |= $80 merge_condition_mask CondMask = $88 & $89 -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 8 at #77) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 8 at #76) copy_4_slots_unmasked $80..83 = _1_a copy_4_slots_unmasked $84..87 = _2_b cmpeq_4_floats $80..83 = equal($80..83, $84..87) @@ -78,7 +77,7 @@ label label 0x00000008 load_condition_mask CondMask = $88 copy_constant $68 = 0 merge_condition_mask CondMask = $78 & $79 -branch_if_no_lanes_active branch_if_no_lanes_active +43 (label 7 at #124) +branch_if_no_lanes_active branch_if_no_lanes_active +43 (label 7 at #123) copy_slot_unmasked eq = NE copy_slot_unmasked f1 = F42 copy_slot_unmasked f2 = ZM @@ -109,7 +108,7 @@ cmpne_4_floats $69..72 = notEqual($69..72, $73..76) bitwise_or_2_ints $69..70 |= $71..72 bitwise_or_int $69 |= $70 merge_condition_mask CondMask = $88 & $89 -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 10 at #119) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 10 at #118) copy_4_slots_unmasked $70..73 = a copy_4_slots_unmasked $74..77 = b cmpeq_4_floats $70..73 = equal($70..73, $74..77) @@ -125,7 +124,7 @@ label label 0x00000007 load_condition_mask CondMask = $78 copy_constant $57 = 0 merge_condition_mask CondMask = $67 & $68 -branch_if_no_lanes_active branch_if_no_lanes_active +41 (label 6 at #169) +branch_if_no_lanes_active branch_if_no_lanes_active +41 (label 6 at #168) copy_slot_unmasked eq = NE copy_slot_unmasked f1 = F42 copy_2_slots_unmasked f2, f3 = NAN1, NAN2 @@ -155,7 +154,7 @@ cmpne_4_floats $58..61 = notEqual($58..61, $62..65) bitwise_or_2_ints $58..59 |= $60..61 bitwise_or_int $58 |= $59 merge_condition_mask CondMask = $78 & $79 -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 12 at #165) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 12 at #164) copy_4_slots_unmasked $59..62 = a copy_4_slots_unmasked $63..66 = b cmpeq_4_floats $59..62 = equal($59..62, $63..66) @@ -170,7 +169,7 @@ label label 0x00000006 load_condition_mask CondMask = $67 copy_constant $46 = 0 merge_condition_mask CondMask = $56 & $57 -branch_if_no_lanes_active branch_if_no_lanes_active +42 (label 5 at #215) +branch_if_no_lanes_active branch_if_no_lanes_active +42 (label 5 at #214) copy_slot_unmasked eq = EQ copy_slot_unmasked f1 = F42 copy_2_slots_unmasked f2, f3 = NAN1, NAN2 @@ -200,7 +199,7 @@ cmpne_4_floats $47..50 = notEqual($47..50, $51..54) bitwise_or_2_ints $47..48 |= $49..50 bitwise_or_int $47 |= $48 merge_condition_mask CondMask = $67 & $68 -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 14 at #210) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 14 at #209) copy_4_slots_unmasked $48..51 = a copy_4_slots_unmasked $52..55 = b cmpeq_4_floats $48..51 = equal($48..51, $52..55) @@ -216,7 +215,7 @@ label label 0x00000005 load_condition_mask CondMask = $56 copy_constant $35 = 0 merge_condition_mask CondMask = $45 & $46 -branch_if_no_lanes_active branch_if_no_lanes_active +39 (label 4 at #258) +branch_if_no_lanes_active branch_if_no_lanes_active +39 (label 4 at #257) copy_slot_unmasked eq₁ = NE copy_4_slots_unmasked f1₁, f2₁, f3₁, f4₁ = F42, F43, F44, F45 copy_uniform $36 = colorGreen(0) @@ -244,7 +243,7 @@ cmpne_4_floats $36..39 = notEqual($36..39, $40..43) bitwise_or_2_ints $36..37 |= $38..39 bitwise_or_int $36 |= $37 merge_condition_mask CondMask = $56 & $57 -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 16 at #254) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 16 at #253) copy_4_slots_unmasked $37..40 = a₁ copy_4_slots_unmasked $41..44 = b₁ cmpeq_4_floats $37..40 = equal($37..40, $41..44) @@ -259,7 +258,7 @@ label label 0x00000004 load_condition_mask CondMask = $45 copy_constant $24 = 0 merge_condition_mask CondMask = $34 & $35 -branch_if_no_lanes_active branch_if_no_lanes_active +40 (label 3 at #302) +branch_if_no_lanes_active branch_if_no_lanes_active +40 (label 3 at #301) copy_slot_unmasked eq₁ = EQ copy_4_slots_unmasked f1₁, f2₁, f3₁, f4₁ = F42, F43, F44, F45 copy_uniform $25 = colorGreen(0) @@ -287,7 +286,7 @@ cmpne_4_floats $25..28 = notEqual($25..28, $29..32) bitwise_or_2_ints $25..26 |= $27..28 bitwise_or_int $25 |= $26 merge_condition_mask CondMask = $45 & $46 -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 18 at #297) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 18 at #296) copy_4_slots_unmasked $26..29 = a₁ copy_4_slots_unmasked $30..33 = b₁ cmpeq_4_floats $26..29 = equal($26..29, $30..33) @@ -303,7 +302,7 @@ label label 0x00000003 load_condition_mask CondMask = $34 copy_constant $13 = 0 merge_condition_mask CondMask = $23 & $24 -branch_if_no_lanes_active branch_if_no_lanes_active +42 (label 2 at #348) +branch_if_no_lanes_active branch_if_no_lanes_active +42 (label 2 at #347) copy_slot_unmasked eq₁ = NE copy_slot_unmasked f1₁ = NAN1 copy_slot_unmasked f2₁ = ZM @@ -334,7 +333,7 @@ cmpne_4_floats $14..17 = notEqual($14..17, $18..21) bitwise_or_2_ints $14..15 |= $16..17 bitwise_or_int $14 |= $15 merge_condition_mask CondMask = $34 & $35 -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 20 at #344) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 20 at #343) copy_4_slots_unmasked $15..18 = a₁ copy_4_slots_unmasked $19..22 = b₁ cmpeq_4_floats $15..18 = equal($15..18, $19..22) @@ -349,7 +348,7 @@ label label 0x00000002 load_condition_mask CondMask = $23 copy_constant $0 = 0 merge_condition_mask CondMask = $12 & $13 -branch_if_no_lanes_active branch_if_no_lanes_active +43 (label 1 at #395) +branch_if_no_lanes_active branch_if_no_lanes_active +43 (label 1 at #394) copy_slot_unmasked eq₁ = EQ copy_slot_unmasked f1₁ = NAN1 copy_slot_unmasked f2₁ = ZM @@ -380,7 +379,7 @@ cmpne_4_floats $1..4 = notEqual($1..4, $5..8) bitwise_or_2_ints $1..2 |= $3..4 bitwise_or_int $1 |= $2 merge_condition_mask CondMask = $23 & $24 -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 22 at #390) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 22 at #389) copy_4_slots_unmasked $2..5 = a₁ copy_4_slots_unmasked $6..9 = b₁ cmpeq_4_floats $2..5 = equal($2..5, $6..9) diff --git a/tests/sksl/shared/FunctionPrototype.skrp b/tests/sksl/shared/FunctionPrototype.skrp index 9c31de9e3b3e..63deedfe7f58 100644 --- a/tests/sksl/shared/FunctionPrototype.skrp +++ b/tests/sksl/shared/FunctionPrototype.skrp @@ -1,11 +1,10 @@ store_src_rg coords = src.rg init_lane_masks CondMask = LoopMask = RetMask = true -splat_4_constants $0..3 = 0 -copy_4_uniforms $4..7 = colorGreen -sub_4_floats $0..3 -= $4..7 +copy_4_uniforms $0..3 = colorGreen +splat_4_constants $4..7 = 0x80000000 (-0.0) +bitwise_xor_4_ints $0..3 ^= $4..7 copy_4_slots_unmasked x = $0..3 -splat_4_constants $0..3 = 0 -copy_4_slots_unmasked $4..7 = x -sub_4_floats $0..3 -= $4..7 +splat_4_constants $4..7 = 0x80000000 (-0.0) +bitwise_xor_4_ints $0..3 ^= $4..7 label label 0 load_src src.rgba = $0..3 diff --git a/tests/sksl/shared/MatrixEquality.skrp b/tests/sksl/shared/MatrixEquality.skrp index 6b8d906dfda2..c35fae37ac4e 100644 --- a/tests/sksl/shared/MatrixEquality.skrp +++ b/tests/sksl/shared/MatrixEquality.skrp @@ -112,9 +112,9 @@ bitwise_and_int $1 &= $2 bitwise_xor_imm_int $1 ^= 0xFFFFFFFF bitwise_and_int $0 &= $1 copy_slot_unmasked _0_ok = $0 -splat_2_constants $1..2 = 0 -copy_slot_unmasked $3 = _2_one -sub_float $2 -= $3 +copy_constant $1 = 0 +copy_slot_unmasked $2 = _2_one +bitwise_xor_imm_int $2 ^= 0x80000000 swizzle_4 $1..4 = ($1..4).yxxy copy_constant $5 = 0 copy_constant $6 = 0xBF800000 (-1.0) @@ -135,12 +135,12 @@ bitwise_and_2_ints $1..2 &= $3..4 bitwise_and_int $1 &= $2 bitwise_and_int $0 &= $1 copy_slot_unmasked _0_ok = $0 -splat_4_constants $1..4 = 0 -splat_2_constants $5..6 = 0 -copy_slot_unmasked $7 = _2_one -sub_float $6 -= $7 -swizzle_4 $5..8 = ($5..8).yxxy -sub_4_floats $1..4 -= $5..8 +copy_constant $1 = 0 +copy_slot_unmasked $2 = _2_one +bitwise_xor_imm_int $2 ^= 0x80000000 +swizzle_4 $1..4 = ($1..4).yxxy +splat_4_constants $5..8 = 0x80000000 (-0.0) +bitwise_xor_4_ints $1..4 ^= $5..8 copy_constant $5 = 0 copy_constant $6 = 0x3F800000 (1.0) swizzle_4 $5..8 = ($5..8).yxxy @@ -149,11 +149,11 @@ bitwise_and_2_ints $1..2 &= $3..4 bitwise_and_int $1 &= $2 bitwise_and_int $0 &= $1 copy_slot_unmasked _0_ok = $0 -splat_4_constants $1..4 = 0 -copy_constant $5 = 0 -copy_slot_unmasked $6 = _1_zero -swizzle_4 $5..8 = ($5..8).yxxy -sub_4_floats $1..4 -= $5..8 +copy_constant $1 = 0 +copy_slot_unmasked $2 = _1_zero +swizzle_4 $1..4 = ($1..4).yxxy +splat_4_constants $5..8 = 0x80000000 (-0.0) +bitwise_xor_4_ints $1..4 ^= $5..8 copy_constant $5 = 0 copy_constant $6 = 0x80000000 (-0.0) swizzle_4 $5..8 = ($5..8).yxxy diff --git a/tests/sksl/shared/UnaryPositiveNegative.skrp b/tests/sksl/shared/UnaryPositiveNegative.skrp index bf5a6254048a..2a023ffe5f22 100644 --- a/tests/sksl/shared/UnaryPositiveNegative.skrp +++ b/tests/sksl/shared/UnaryPositiveNegative.skrp @@ -1,40 +1,35 @@ store_src_rg coords = src.rg init_lane_masks CondMask = LoopMask = RetMask = true copy_uniform _0_x = colorWhite(0) -copy_constant $0 = 0 -copy_slot_unmasked $1 = _0_x -sub_float $0 -= $1 -copy_slot_unmasked _0_x = $0 +bitwise_xor_imm_int _0_x ^= 0x80000000 store_condition_mask $33 = CondMask store_condition_mask $53 = CondMask store_condition_mask $63 = CondMask store_condition_mask $69 = CondMask store_condition_mask $75 = CondMask -store_condition_mask $79 = CondMask -copy_slot_unmasked $80 = _0_x -cmpeq_imm_float $80 = equal($80, 0xBF800000 (-1.0)) +store_condition_mask $78 = CondMask +copy_slot_unmasked $79 = _0_x +cmpeq_imm_float $79 = equal($79, 0xBF800000 (-1.0)) copy_constant $76 = 0 -merge_condition_mask CondMask = $79 & $80 -branch_if_no_lanes_active branch_if_no_lanes_active +11 (label 6 at #29) +merge_condition_mask CondMask = $78 & $79 +branch_if_no_lanes_active branch_if_no_lanes_active +9 (label 6 at #24) copy_uniform $77 = colorWhite(0) cast_to_int_from_float $77 = FloatToInt($77) copy_slot_unmasked x = $77 -copy_constant $77 = 0 -copy_slot_unmasked $78 = x -sub_int $77 -= $78 +mul_imm_int $77 *= 0xFFFFFFFF copy_slot_masked x = Mask($77) cmpeq_imm_int $77 = equal($77, 0xFFFFFFFF) label label 0x00000007 copy_slot_masked $76 = Mask($77) label label 0x00000006 -load_condition_mask CondMask = $79 +load_condition_mask CondMask = $78 copy_constant $70 = 0 merge_condition_mask CondMask = $75 & $76 -branch_if_no_lanes_active branch_if_no_lanes_active +11 (label 5 at #44) +branch_if_no_lanes_active branch_if_no_lanes_active +11 (label 5 at #39) copy_2_uniforms x₁ = colorWhite(0..1) -splat_2_constants $71..72 = 0 -copy_2_slots_unmasked $73..74 = x₁ -sub_2_floats $71..72 -= $73..74 +copy_2_slots_unmasked $71..72 = x₁ +splat_2_constants $73..74 = 0x80000000 (-0.0) +bitwise_xor_2_ints $71..72 ^= $73..74 copy_2_slots_masked x₁ = Mask($71..72) splat_2_constants $73..74 = 0xBF800000 (-1.0) cmpeq_2_floats $71..72 = equal($71..72, $73..74) @@ -45,14 +40,13 @@ label label 0x00000005 load_condition_mask CondMask = $75 copy_constant $64 = 0 merge_condition_mask CondMask = $69 & $70 -branch_if_no_lanes_active branch_if_no_lanes_active +14 (label 4 at #62) +branch_if_no_lanes_active branch_if_no_lanes_active +13 (label 4 at #56) copy_uniform $65 = colorWhite(0) cast_to_int_from_float $65 = FloatToInt($65) copy_slot_unmasked $66 = $65 copy_2_slots_unmasked x₂ = $65..66 -splat_2_constants $65..66 = 0 -copy_2_slots_unmasked $67..68 = x₂ -sub_2_ints $65..66 -= $67..68 +splat_2_constants $67..68 = 0xFFFFFFFF +mul_2_ints $65..66 *= $67..68 copy_2_slots_masked x₂ = Mask($65..66) splat_2_constants $67..68 = 0xFFFFFFFF cmpeq_2_ints $65..66 = equal($65..66, $67..68) @@ -63,15 +57,15 @@ label label 0x00000004 load_condition_mask CondMask = $69 copy_constant $54 = 0 merge_condition_mask CondMask = $63 & $64 -branch_if_no_lanes_active branch_if_no_lanes_active +16 (label 3 at #82) +branch_if_no_lanes_active branch_if_no_lanes_active +16 (label 3 at #76) copy_constant negated(0) = 0xBF800000 (-1.0) copy_constant negated(1) = 0xC0000000 (-2.0) copy_constant negated(2) = 0xC0400000 (-3.0) copy_constant negated(3) = 0xC0800000 (-4.0) copy_4_uniforms x₃ = testMatrix2x2 -splat_4_constants $55..58 = 0 -copy_4_slots_unmasked $59..62 = x₃ -sub_4_floats $55..58 -= $59..62 +copy_4_slots_unmasked $55..58 = x₃ +splat_4_constants $59..62 = 0x80000000 (-0.0) +bitwise_xor_4_ints $55..58 ^= $59..62 copy_4_slots_masked x₃ = Mask($55..58) copy_4_slots_unmasked $59..62 = negated cmpeq_4_floats $55..58 = equal($55..58, $59..62) @@ -83,7 +77,7 @@ label label 0x00000003 load_condition_mask CondMask = $63 copy_constant $34 = 0 merge_condition_mask CondMask = $53 & $54 -branch_if_no_lanes_active branch_if_no_lanes_active +33 (label 2 at #119) +branch_if_no_lanes_active branch_if_no_lanes_active +33 (label 2 at #113) copy_constant negated₁(0) = 0xBF800000 (-1.0) copy_constant negated₁(1) = 0xC0000000 (-2.0) copy_constant negated₁(2) = 0xC0400000 (-3.0) @@ -96,13 +90,13 @@ copy_constant negated₁(8) = 0xC1100000 (-9.0) copy_4_uniforms x₄(0..3) = testMatrix3x3(0..3) copy_4_uniforms x₄(4..7) = testMatrix3x3(4..7) copy_uniform x₄(8) = testMatrix3x3(8) -splat_4_constants $35..38 = 0 -splat_4_constants $39..42 = 0 -copy_constant $43 = 0 -copy_4_slots_unmasked $44..47 = x₄(0..3) -copy_4_slots_unmasked $48..51 = x₄(4..7) -copy_slot_unmasked $52 = x₄(8) -sub_n_floats $35..43 -= $44..52 +copy_4_slots_unmasked $35..38 = x₄(0..3) +copy_4_slots_unmasked $39..42 = x₄(4..7) +copy_slot_unmasked $43 = x₄(8) +splat_4_constants $44..47 = 0x80000000 (-0.0) +splat_4_constants $48..51 = 0x80000000 (-0.0) +copy_constant $52 = 0x80000000 (-0.0) +bitwise_xor_n_ints $35..43 ^= $44..52 copy_4_slots_masked x₄(0..3) = Mask($35..38) copy_4_slots_masked x₄(4..7) = Mask($39..42) copy_slot_masked x₄(8) = Mask($43) @@ -120,7 +114,7 @@ label label 0x00000002 load_condition_mask CondMask = $53 copy_constant $0 = 0 merge_condition_mask CondMask = $33 & $34 -branch_if_no_lanes_active branch_if_no_lanes_active +46 (label 1 at #169) +branch_if_no_lanes_active branch_if_no_lanes_active +46 (label 1 at #163) copy_constant negated₂(0) = 0xBF800000 (-1.0) copy_constant negated₂(1) = 0xC0000000 (-2.0) copy_constant negated₂(2) = 0xC0400000 (-3.0) @@ -141,15 +135,15 @@ copy_4_uniforms x₅(0..3) = testMatrix4x4(0..3) copy_4_uniforms x₅(4..7) = testMatrix4x4(4..7) copy_4_uniforms x₅(8..11) = testMatrix4x4(8..11) copy_4_uniforms x₅(12..15) = testMatrix4x4(12..15) -splat_4_constants $1..4 = 0 -splat_4_constants $5..8 = 0 -splat_4_constants $9..12 = 0 -splat_4_constants $13..16 = 0 -copy_4_slots_unmasked $17..20 = x₅(0..3) -copy_4_slots_unmasked $21..24 = x₅(4..7) -copy_4_slots_unmasked $25..28 = x₅(8..11) -copy_4_slots_unmasked $29..32 = x₅(12..15) -sub_n_floats $1..16 -= $17..32 +copy_4_slots_unmasked $1..4 = x₅(0..3) +copy_4_slots_unmasked $5..8 = x₅(4..7) +copy_4_slots_unmasked $9..12 = x₅(8..11) +copy_4_slots_unmasked $13..16 = x₅(12..15) +splat_4_constants $17..20 = 0x80000000 (-0.0) +splat_4_constants $21..24 = 0x80000000 (-0.0) +splat_4_constants $25..28 = 0x80000000 (-0.0) +splat_4_constants $29..32 = 0x80000000 (-0.0) +bitwise_xor_n_ints $1..16 ^= $17..32 copy_4_slots_masked x₅(0..3) = Mask($1..4) copy_4_slots_masked x₅(4..7) = Mask($5..8) copy_4_slots_masked x₅(8..11) = Mask($9..12)