Skip to content

Commit

Permalink
Improve RP code generation for negation.
Browse files Browse the repository at this point in the history
Previously, we emitted `0 - x` for all negation. This was fine, but
can never use our immediate ops.

Now, we emit negation for floats as `x + int(0x80000000)`, and
negation for ints as `x * -1`. This allows scalar negation to use a
single immediate-op instruction, and sometimes allows multi-slot
negation to reuse an existing value on the stack. This never seems
to be a downgrade.

Change-Id: I427f6e2932370c56cd7076535e082d938a645820
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/681516
Reviewed-by: Brian Osman <[email protected]>
Commit-Queue: Brian Osman <[email protected]>
Auto-Submit: John Stiles <[email protected]>
Commit-Queue: John Stiles <[email protected]>
  • Loading branch information
johnstiles-google authored and SkCQ committed Apr 26, 2023
1 parent b59f0fd commit 3fea885
Show file tree
Hide file tree
Showing 12 changed files with 271 additions and 283 deletions.
23 changes: 15 additions & 8 deletions src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3367,23 +3367,30 @@ bool Generator::pushPrefixExpression(Operator op, const Expression& expr) {
fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, expr.type().slotCount());
return true;

case OperatorKind::MINUS:
// Handle negation as a componentwise `0 - expr`.
fBuilder.push_zeros(expr.type().slotCount());
case OperatorKind::MINUS: {
if (!this->pushExpression(expr)) {
return unsupported();
}
return this->binaryOp(expr.type(), kSubtractOps);

if (expr.type().componentType().isFloat()) {
// Handle float negation as an integer `x ^ 0x80000000`. This toggles the sign bit.
fBuilder.push_constant_u(0x80000000, expr.type().slotCount());
fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, expr.type().slotCount());
} else {
// Handle integer negation as a componentwise `expr * -1`.
fBuilder.push_constant_i(-1, expr.type().slotCount());
fBuilder.binary_op(BuilderOp::mul_n_ints, expr.type().slotCount());
}
return true;
}
case OperatorKind::PLUSPLUS: {
// Rewrite as `expr += 1`.
Literal oneLiteral{Position{}, 1.0, &expr.type().componentType()};
return this->pushBinaryExpression(expr, OperatorKind::PLUSEQ, oneLiteral);
}
case OperatorKind::MINUSMINUS: {
// Rewrite as `expr -= 1`.
Literal oneLiteral{Position{}, 1.0, &expr.type().componentType()};
return this->pushBinaryExpression(expr, OperatorKind::MINUSEQ, oneLiteral);
// Rewrite as `expr += -1`.
Literal minusOneLiteral{expr.fPosition, -1.0, &expr.type().componentType()};
return this->pushBinaryExpression(expr, OperatorKind::PLUSEQ, minusOneLiteral);
}
default:
break;
Expand Down
72 changes: 32 additions & 40 deletions tests/sksl/folding/MatrixNoOpFolding.skrp
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ copy_4_uniforms $0..3 = testMatrix2x2
copy_4_slots_unmasked _0_m = $0..3
copy_4_uniforms $0..3 = testMatrix2x2
copy_4_slots_unmasked _0_m = $0..3
splat_4_constants $0..3 = 0
copy_4_slots_unmasked $4..7 = _0_m
sub_4_floats $0..3 -= $4..7
splat_4_constants $4..7 = 0x80000000 (-0.0)
bitwise_xor_4_ints $0..3 ^= $4..7
copy_4_slots_unmasked _0_m = $0..3
splat_2_constants $0..1 = 0
swizzle_4 $0..3 = ($0..3).yxxy
Expand All @@ -22,9 +21,9 @@ copy_4_slots_unmasked _1_mm = $0..3
store_condition_mask $49 = CondMask
store_condition_mask $78 = CondMask
copy_4_slots_unmasked $79..82 = _0_m
splat_4_constants $83..86 = 0
copy_4_uniforms $87..90 = testMatrix2x2
sub_4_floats $83..86 -= $87..90
copy_4_uniforms $83..86 = testMatrix2x2
splat_4_constants $87..90 = 0x80000000 (-0.0)
bitwise_xor_4_ints $83..86 ^= $87..90
cmpeq_4_floats $79..82 = equal($79..82, $83..86)
bitwise_and_2_ints $79..80 &= $81..82
bitwise_and_int $79 &= $80
Expand All @@ -36,7 +35,7 @@ bitwise_and_int $80 &= $81
bitwise_and_int $79 &= $80
copy_constant $50 = 0
merge_condition_mask CondMask = $78 & $79
branch_if_no_lanes_active branch_if_no_lanes_active +71 (label 2 at #110)
branch_if_no_lanes_active branch_if_no_lanes_active +68 (label 2 at #106)
splat_4_constants m(0..3) = 0
splat_4_constants m(4..7) = 0
splat_4_constants m(8), mm(0..2) = 0
Expand All @@ -59,13 +58,10 @@ copy_uniform $59 = testMatrix3x3(8)
copy_4_slots_masked m(0..3) = Mask($51..54)
copy_4_slots_masked m(4..7) = Mask($55..58)
copy_slot_masked m(8) = Mask($59)
splat_4_constants $51..54 = 0
splat_4_constants $55..58 = 0
copy_constant $59 = 0
copy_4_slots_unmasked $60..63 = m(0..3)
copy_4_slots_unmasked $64..67 = m(4..7)
copy_slot_unmasked $68 = m(8)
sub_n_floats $51..59 -= $60..68
splat_4_constants $60..63 = 0x80000000 (-0.0)
splat_4_constants $64..67 = 0x80000000 (-0.0)
copy_constant $68 = 0x80000000 (-0.0)
bitwise_xor_n_ints $51..59 ^= $60..68
copy_4_slots_masked m(0..3) = Mask($51..54)
copy_4_slots_masked m(4..7) = Mask($55..58)
copy_slot_masked m(8) = Mask($59)
Expand All @@ -82,13 +78,13 @@ copy_slot_masked mm(8) = Mask($59)
copy_4_slots_unmasked $51..54 = m(0..3)
copy_4_slots_unmasked $55..58 = m(4..7)
copy_slot_unmasked $59 = m(8)
splat_4_constants $60..63 = 0
splat_4_constants $64..67 = 0
copy_constant $68 = 0
copy_4_uniforms $69..72 = testMatrix3x3(0..3)
copy_4_uniforms $73..76 = testMatrix3x3(4..7)
copy_uniform $77 = testMatrix3x3(8)
sub_n_floats $60..68 -= $69..77
copy_4_uniforms $60..63 = testMatrix3x3(0..3)
copy_4_uniforms $64..67 = testMatrix3x3(4..7)
copy_uniform $68 = testMatrix3x3(8)
splat_4_constants $69..72 = 0x80000000 (-0.0)
splat_4_constants $73..76 = 0x80000000 (-0.0)
copy_constant $77 = 0x80000000 (-0.0)
bitwise_xor_n_ints $60..68 ^= $69..77
cmpeq_n_floats $51..59 = equal($51..59, $60..68)
bitwise_and_4_ints $52..55 &= $56..59
bitwise_and_2_ints $52..53 &= $54..55
Expand All @@ -111,7 +107,7 @@ label label 0x00000002
load_condition_mask CondMask = $78
copy_constant $0 = 0
merge_condition_mask CondMask = $49 & $50
branch_if_no_lanes_active branch_if_no_lanes_active +96 (label 1 at #210)
branch_if_no_lanes_active branch_if_no_lanes_active +92 (label 1 at #202)
copy_4_uniforms testMatrix4x4(0..3) = testInputs
copy_4_uniforms testMatrix4x4(4..7) = testInputs
copy_4_uniforms testMatrix4x4(8..11) = testInputs
Expand Down Expand Up @@ -146,15 +142,11 @@ copy_4_slots_masked m₁(0..3) = Mask($1..4)
copy_4_slots_masked m₁(4..7) = Mask($5..8)
copy_4_slots_masked m₁(8..11) = Mask($9..12)
copy_4_slots_masked m₁(12..15) = Mask($13..16)
splat_4_constants $1..4 = 0
splat_4_constants $5..8 = 0
splat_4_constants $9..12 = 0
splat_4_constants $13..16 = 0
copy_4_slots_unmasked $17..20 = m₁(0..3)
copy_4_slots_unmasked $21..24 = m₁(4..7)
copy_4_slots_unmasked $25..28 = m₁(8..11)
copy_4_slots_unmasked $29..32 = m₁(12..15)
sub_n_floats $1..16 -= $17..32
splat_4_constants $17..20 = 0x80000000 (-0.0)
splat_4_constants $21..24 = 0x80000000 (-0.0)
splat_4_constants $25..28 = 0x80000000 (-0.0)
splat_4_constants $29..32 = 0x80000000 (-0.0)
bitwise_xor_n_ints $1..16 ^= $17..32
copy_4_slots_masked m₁(0..3) = Mask($1..4)
copy_4_slots_masked m₁(4..7) = Mask($5..8)
copy_4_slots_masked m₁(8..11) = Mask($9..12)
Expand All @@ -175,15 +167,15 @@ copy_4_slots_unmasked $1..4 = m₁(0..3)
copy_4_slots_unmasked $5..8 = m₁(4..7)
copy_4_slots_unmasked $9..12 = m₁(8..11)
copy_4_slots_unmasked $13..16 = m₁(12..15)
splat_4_constants $17..20 = 0
splat_4_constants $21..24 = 0
splat_4_constants $25..28 = 0
splat_4_constants $29..32 = 0
copy_4_slots_unmasked $33..36 = testMatrix4x4(0..3)
copy_4_slots_unmasked $37..40 = testMatrix4x4(4..7)
copy_4_slots_unmasked $41..44 = testMatrix4x4(8..11)
copy_4_slots_unmasked $45..48 = testMatrix4x4(12..15)
sub_n_floats $17..32 -= $33..48
copy_4_slots_unmasked $17..20 = testMatrix4x4(0..3)
copy_4_slots_unmasked $21..24 = testMatrix4x4(4..7)
copy_4_slots_unmasked $25..28 = testMatrix4x4(8..11)
copy_4_slots_unmasked $29..32 = testMatrix4x4(12..15)
splat_4_constants $33..36 = 0x80000000 (-0.0)
splat_4_constants $37..40 = 0x80000000 (-0.0)
splat_4_constants $41..44 = 0x80000000 (-0.0)
splat_4_constants $45..48 = 0x80000000 (-0.0)
bitwise_xor_n_ints $17..32 ^= $33..48
cmpeq_n_floats $1..16 = equal($1..16, $17..32)
bitwise_and_4_ints $9..12 &= $13..16
bitwise_and_4_ints $5..8 &= $9..12
Expand Down
120 changes: 60 additions & 60 deletions tests/sksl/folding/MatrixScalarNoOpFolding.skrp
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,15 @@ copy_constant $185 = 0
copy_slot_masked [test_no_op_scalar_X_mat2].result = Mask($185)
mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask)
load_condition_mask CondMask = $183
splat_4_constants $183..186 = 0
copy_4_slots_unmasked $187..190 = m
sub_4_floats $183..186 -= $187..190
copy_4_slots_unmasked $183..186 = m
splat_4_constants $187..190 = 0x80000000 (-0.0)
bitwise_xor_4_ints $183..186 ^= $187..190
copy_4_slots_masked m = Mask($183..186)
store_condition_mask $183 = CondMask
copy_4_slots_unmasked $184..187 = m
splat_4_constants $188..191 = 0
copy_4_uniforms $192..195 = testMatrix2x2
sub_4_floats $188..191 -= $192..195
copy_4_uniforms $188..191 = testMatrix2x2
splat_4_constants $192..195 = 0x80000000 (-0.0)
bitwise_xor_4_ints $188..191 ^= $192..195
cmpne_4_floats $184..187 = notEqual($184..187, $188..191)
bitwise_or_2_ints $184..185 |= $186..187
bitwise_or_int $184 |= $185
Expand Down Expand Up @@ -157,27 +157,27 @@ copy_constant $155 = 0
copy_slot_masked [test_no_op_scalar_X_mat3].result = Mask($155)
mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask)
load_condition_mask CondMask = $153
splat_4_constants $153..156 = 0
splat_4_constants $157..160 = 0
copy_constant $161 = 0
copy_4_slots_unmasked $162..165 = m₁(0..3)
copy_4_slots_unmasked $166..169 = m₁(4..7)
copy_slot_unmasked $170 = m₁(8)
sub_n_floats $153..161 -= $162..170
copy_4_slots_unmasked $153..156 = m₁(0..3)
copy_4_slots_unmasked $157..160 = m₁(4..7)
copy_slot_unmasked $161 = m₁(8)
splat_4_constants $162..165 = 0x80000000 (-0.0)
splat_4_constants $166..169 = 0x80000000 (-0.0)
copy_constant $170 = 0x80000000 (-0.0)
bitwise_xor_n_ints $153..161 ^= $162..170
copy_4_slots_masked m₁(0..3) = Mask($153..156)
copy_4_slots_masked m₁(4..7) = Mask($157..160)
copy_slot_masked m₁(8) = Mask($161)
store_condition_mask $153 = CondMask
copy_4_slots_unmasked $154..157 = m₁(0..3)
copy_4_slots_unmasked $158..161 = m₁(4..7)
copy_slot_unmasked $162 = m₁(8)
splat_4_constants $163..166 = 0
splat_4_constants $167..170 = 0
copy_constant $171 = 0
copy_4_uniforms $172..175 = testMatrix3x3(0..3)
copy_4_uniforms $176..179 = testMatrix3x3(4..7)
copy_uniform $180 = testMatrix3x3(8)
sub_n_floats $163..171 -= $172..180
copy_4_uniforms $163..166 = testMatrix3x3(0..3)
copy_4_uniforms $167..170 = testMatrix3x3(4..7)
copy_uniform $171 = testMatrix3x3(8)
splat_4_constants $172..175 = 0x80000000 (-0.0)
splat_4_constants $176..179 = 0x80000000 (-0.0)
copy_constant $180 = 0x80000000 (-0.0)
bitwise_xor_n_ints $163..171 ^= $172..180
cmpne_n_floats $154..162 = notEqual($154..162, $163..171)
bitwise_or_4_ints $155..158 |= $159..162
bitwise_or_2_ints $155..156 |= $157..158
Expand Down Expand Up @@ -311,15 +311,15 @@ copy_constant $103 = 0
copy_slot_masked [test_no_op_scalar_X_mat4].result = Mask($103)
mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask)
load_condition_mask CondMask = $101
splat_4_constants $101..104 = 0
splat_4_constants $105..108 = 0
splat_4_constants $109..112 = 0
splat_4_constants $113..116 = 0
copy_4_slots_unmasked $117..120 = m₂(0..3)
copy_4_slots_unmasked $121..124 = m₂(4..7)
copy_4_slots_unmasked $125..128 = m₂(8..11)
copy_4_slots_unmasked $129..132 = m₂(12..15)
sub_n_floats $101..116 -= $117..132
copy_4_slots_unmasked $101..104 = m₂(0..3)
copy_4_slots_unmasked $105..108 = m₂(4..7)
copy_4_slots_unmasked $109..112 = m₂(8..11)
copy_4_slots_unmasked $113..116 = m₂(12..15)
splat_4_constants $117..120 = 0x80000000 (-0.0)
splat_4_constants $121..124 = 0x80000000 (-0.0)
splat_4_constants $125..128 = 0x80000000 (-0.0)
splat_4_constants $129..132 = 0x80000000 (-0.0)
bitwise_xor_n_ints $101..116 ^= $117..132
copy_4_slots_masked m₂(0..3) = Mask($101..104)
copy_4_slots_masked m₂(4..7) = Mask($105..108)
copy_4_slots_masked m₂(8..11) = Mask($109..112)
Expand All @@ -329,15 +329,15 @@ copy_4_slots_unmasked $102..105 = m₂(0..3)
copy_4_slots_unmasked $106..109 = m₂(4..7)
copy_4_slots_unmasked $110..113 = m₂(8..11)
copy_4_slots_unmasked $114..117 = m₂(12..15)
splat_4_constants $118..121 = 0
splat_4_constants $122..125 = 0
splat_4_constants $126..129 = 0
splat_4_constants $130..133 = 0
copy_4_slots_unmasked $134..137 = testMatrix4x4(0..3)
copy_4_slots_unmasked $138..141 = testMatrix4x4(4..7)
copy_4_slots_unmasked $142..145 = testMatrix4x4(8..11)
copy_4_slots_unmasked $146..149 = testMatrix4x4(12..15)
sub_n_floats $118..133 -= $134..149
copy_4_slots_unmasked $118..121 = testMatrix4x4(0..3)
copy_4_slots_unmasked $122..125 = testMatrix4x4(4..7)
copy_4_slots_unmasked $126..129 = testMatrix4x4(8..11)
copy_4_slots_unmasked $130..133 = testMatrix4x4(12..15)
splat_4_constants $134..137 = 0x80000000 (-0.0)
splat_4_constants $138..141 = 0x80000000 (-0.0)
splat_4_constants $142..145 = 0x80000000 (-0.0)
splat_4_constants $146..149 = 0x80000000 (-0.0)
bitwise_xor_n_ints $118..133 ^= $134..149
cmpne_n_floats $102..117 = notEqual($102..117, $118..133)
bitwise_or_4_ints $110..113 |= $114..117
bitwise_or_4_ints $106..109 |= $110..113
Expand Down Expand Up @@ -465,12 +465,12 @@ sub_4_floats $85..88 -= $89..92
copy_4_slots_masked m₃ = Mask($85..88)
store_condition_mask $85 = CondMask
copy_4_slots_unmasked $86..89 = m₃
splat_4_constants $90..93 = 0
copy_slot_unmasked $94 = scalar
copy_slot_unmasked $95 = scalar
copy_slot_unmasked $96 = scalar
copy_slot_unmasked $97 = scalar
sub_4_floats $90..93 -= $94..97
copy_slot_unmasked $90 = scalar
copy_slot_unmasked $91 = scalar
copy_slot_unmasked $92 = scalar
copy_slot_unmasked $93 = scalar
splat_4_constants $94..97 = 0x80000000 (-0.0)
bitwise_xor_4_ints $90..93 ^= $94..97
cmpne_4_floats $86..89 = notEqual($86..89, $90..93)
bitwise_or_2_ints $86..87 |= $88..89
bitwise_or_int $86 |= $87
Expand Down Expand Up @@ -640,13 +640,13 @@ store_condition_mask $54 = CondMask
copy_4_slots_unmasked $55..58 = m₄(0..3)
copy_4_slots_unmasked $59..62 = m₄(4..7)
copy_slot_unmasked $63 = m₄(8)
splat_4_constants $64..67 = 0
splat_4_constants $68..71 = 0
copy_constant $72 = 0
copy_3_slots_unmasked $73..75 = scalar3
copy_3_slots_unmasked $76..78 = scalar3
copy_3_slots_unmasked $79..81 = scalar3
sub_n_floats $64..72 -= $73..81
copy_3_slots_unmasked $64..66 = scalar3
copy_3_slots_unmasked $67..69 = scalar3
copy_3_slots_unmasked $70..72 = scalar3
splat_4_constants $73..76 = 0x80000000 (-0.0)
splat_4_constants $77..80 = 0x80000000 (-0.0)
copy_constant $81 = 0x80000000 (-0.0)
bitwise_xor_n_ints $64..72 ^= $73..81
cmpne_n_floats $55..63 = notEqual($55..63, $64..72)
bitwise_or_4_ints $56..59 |= $60..63
bitwise_or_2_ints $56..57 |= $58..59
Expand Down Expand Up @@ -856,15 +856,15 @@ copy_4_slots_unmasked $3..6 = m₅(0..3)
copy_4_slots_unmasked $7..10 = m₅(4..7)
copy_4_slots_unmasked $11..14 = m₅(8..11)
copy_4_slots_unmasked $15..18 = m₅(12..15)
splat_4_constants $19..22 = 0
splat_4_constants $23..26 = 0
splat_4_constants $27..30 = 0
splat_4_constants $31..34 = 0
copy_4_slots_unmasked $35..38 = scalar4
copy_4_slots_unmasked $39..42 = scalar4
copy_4_slots_unmasked $43..46 = scalar4
copy_4_slots_unmasked $47..50 = scalar4
sub_n_floats $19..34 -= $35..50
copy_4_slots_unmasked $19..22 = scalar4
copy_4_slots_unmasked $23..26 = scalar4
copy_4_slots_unmasked $27..30 = scalar4
copy_4_slots_unmasked $31..34 = scalar4
splat_4_constants $35..38 = 0x80000000 (-0.0)
splat_4_constants $39..42 = 0x80000000 (-0.0)
splat_4_constants $43..46 = 0x80000000 (-0.0)
splat_4_constants $47..50 = 0x80000000 (-0.0)
bitwise_xor_n_ints $19..34 ^= $35..50
cmpne_n_floats $3..18 = notEqual($3..18, $19..34)
bitwise_or_4_ints $11..14 |= $15..18
bitwise_or_4_ints $7..10 |= $11..14
Expand Down
Loading

0 comments on commit 3fea885

Please sign in to comment.