Skip to content

Commit

Permalink
Detect and eliminate redundant pop-push with swizzles.
Browse files Browse the repository at this point in the history
The previous implementation would not detect a redundant pop-push
in cases where the follow-up push was split up into multiple parts.
This does not come up much in our test corpus, but it will generate
better code for premul:

  c = (anything);  // pops stack to `c.rgba`
  c.rgb *= c.a;    // pushes `c.rgb`, `c.a` --> can reuse stack

I noticed this while checking the generated code for the
runtime_filtercolor4f benchmark.

Change-Id: I11726065ad799f8525c3b69efafc0b5f632a261f
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/684296
Commit-Queue: John Stiles <[email protected]>
Auto-Submit: John Stiles <[email protected]>
Commit-Queue: Brian Osman <[email protected]>
Reviewed-by: Brian Osman <[email protected]>
  • Loading branch information
johnstiles-google authored and SkCQ committed Apr 27, 2023
1 parent 0102755 commit 05d09f2
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 21 deletions.
45 changes: 27 additions & 18 deletions src/sksl/codegen/SkSLRasterPipelineBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,10 +295,10 @@ void Builder::discard_stack(int32_t count) {
// If this was a single-slot unmasked pop...
if (fInstructions.size() >= 3 && lastInstruction.fImmA == 1) {
// ... and the previous instruction was an immediate-mode op...
Instruction& immInstruction = fInstructions.end()[-2];
Instruction& immInstruction = fInstructions.fromBack(1);
if (is_immediate_op(immInstruction.fOp)) {
// ... and the instruction prior to that was `push_slots`...
Instruction& pushInstruction = fInstructions.end()[-3];
Instruction& pushInstruction = fInstructions.fromBack(2);
if (pushInstruction.fOp == BuilderOp::push_slots) {
// ... from the same slot...
Slot pushedSlot = pushInstruction.fSlotA + pushInstruction.fImmA - 1;
Expand Down Expand Up @@ -439,29 +439,38 @@ void Builder::push_slots(SlotRange src) {
if (lastInstruction.fOp == BuilderOp::push_slots &&
lastInstruction.fSlotA + lastInstruction.fImmA == src.index) {
lastInstruction.fImmA += src.count;
return;
src.count = 0;
}
}

if (src.count > 0) {
fInstructions.push_back({BuilderOp::push_slots, {src.index}, src.count});
}

// If the previous instruction was discarding an equal number of slots...
if (lastInstruction.fOp == BuilderOp::discard_stack && lastInstruction.fImmA == src.count) {
// ... and the instruction before that was copying from the stack to the same slots...
Instruction& prevInstruction = fInstructions.fromBack(1);
if ((prevInstruction.fOp == BuilderOp::copy_stack_to_slots ||
prevInstruction.fOp == BuilderOp::copy_stack_to_slots_unmasked) &&
prevInstruction.fSlotA == src.index &&
prevInstruction.fImmA == src.count) {
// ... we are emitting `copy stack to X, discard stack, copy X to stack`. This is a
// common pattern when multiple operations in a row affect the same variable. We can
// eliminate the discard and just leave X on the stack.
// Look for a sequence of "copy stack to X, discard stack, copy X to stack". This is a common
// pattern when multiple operations in a row affect the same variable. When we see this, we can
// eliminate both the discard and the push.
if (fInstructions.size() >= 3 && fInstructions.back().fOp == BuilderOp::push_slots) {
int pushIndex = fInstructions.back().fSlotA;
int pushCount = fInstructions.back().fImmA;

const Instruction& discardInst = fInstructions.fromBack(1);
const Instruction& copyToSlotsInst = fInstructions.fromBack(2);

// Look for a `discard_stack` matching our push count.
if (discardInst.fOp == BuilderOp::discard_stack && discardInst.fImmA == pushCount) {
// Look for a `copy_stack_to_slots` matching our push.
if ((copyToSlotsInst.fOp == BuilderOp::copy_stack_to_slots ||
copyToSlotsInst.fOp == BuilderOp::copy_stack_to_slots_unmasked) &&
copyToSlotsInst.fSlotA == pushIndex &&
copyToSlotsInst.fImmA == pushCount) {
// We found a matching sequence. Remove the discard and push.
fInstructions.pop_back();
fInstructions.pop_back();
return;
}
}
}

if (src.count > 0) {
fInstructions.push_back({BuilderOp::push_slots, {src.index}, src.count});
}
}

void Builder::push_slots_indirect(SlotRange fixedRange, int dynamicStackID, SlotRange limitRange) {
Expand Down
5 changes: 2 additions & 3 deletions tests/sksl/shared/StructsInFunctions.skrp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ copy_2_slots_unmasked $0..1 = s.x, s.y
label label 0
copy_2_slots_unmasked s.x₁, s.y₁ = $0..1
copy_2_slots_unmasked s.x₂, s.y₂ = $0..1
copy_2_slots_unmasked $0..1 = s.x₂, s.y₂
cast_to_float_from_int $1 = IntToFloat($1)
add_float $0 += $1
label label 0x00000001
Expand Down Expand Up @@ -95,9 +94,9 @@ bitwise_and_int $14 &= $15
bitwise_and_int $13 &= $14
copy_constant $0 = 0
merge_condition_mask CondMask = $12 & $13
branch_if_no_lanes_active branch_if_no_lanes_active +17 (label 6 at #115)
branch_if_no_lanes_active branch_if_no_lanes_active +17 (label 6 at #114)
copy_slot_unmasked $1 = s.x₁
branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 7 at #107)
branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 7 at #106)
splat_2_constants s.x, s.y = 0
copy_constant $17 = 0x3F800000 (1.0)
copy_slot_masked s.x = Mask($17)
Expand Down

0 comments on commit 05d09f2

Please sign in to comment.