From 05d09f28250a77573c609a2ea291aec776e5ff2d Mon Sep 17 00:00:00 2001 From: John Stiles Date: Thu, 27 Apr 2023 12:34:58 -0400 Subject: [PATCH] Detect and eliminate redundant pop-push with swizzles. The previous implementation would not detect a redundant pop-push in cases where the follow-up push was split up into multiple parts. This does not come up much in our test corpus, but it will generate better code for premul: c = (anything); // pops stack to `c.rgba` c.rgb *= c.a; // pushes `c.rgb`, `c.a` --> can reuse stack I noticed this while checking the generated code for the runtime_filtercolor4f benchmark. Change-Id: I11726065ad799f8525c3b69efafc0b5f632a261f Reviewed-on: https://skia-review.googlesource.com/c/skia/+/684296 Commit-Queue: John Stiles Auto-Submit: John Stiles Commit-Queue: Brian Osman Reviewed-by: Brian Osman --- .../codegen/SkSLRasterPipelineBuilder.cpp | 45 +++++++++++-------- tests/sksl/shared/StructsInFunctions.skrp | 5 +-- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/src/sksl/codegen/SkSLRasterPipelineBuilder.cpp b/src/sksl/codegen/SkSLRasterPipelineBuilder.cpp index 1becc63260d2..1d3acbcba51e 100644 --- a/src/sksl/codegen/SkSLRasterPipelineBuilder.cpp +++ b/src/sksl/codegen/SkSLRasterPipelineBuilder.cpp @@ -295,10 +295,10 @@ void Builder::discard_stack(int32_t count) { // If this was a single-slot unmasked pop... if (fInstructions.size() >= 3 && lastInstruction.fImmA == 1) { // ... and the previous instruction was an immediate-mode op... - Instruction& immInstruction = fInstructions.end()[-2]; + Instruction& immInstruction = fInstructions.fromBack(1); if (is_immediate_op(immInstruction.fOp)) { // ... and the instruction prior to that was `push_slots`... - Instruction& pushInstruction = fInstructions.end()[-3]; + Instruction& pushInstruction = fInstructions.fromBack(2); if (pushInstruction.fOp == BuilderOp::push_slots) { // ... from the same slot... Slot pushedSlot = pushInstruction.fSlotA + pushInstruction.fImmA - 1; @@ -439,29 +439,38 @@ void Builder::push_slots(SlotRange src) { if (lastInstruction.fOp == BuilderOp::push_slots && lastInstruction.fSlotA + lastInstruction.fImmA == src.index) { lastInstruction.fImmA += src.count; - return; + src.count = 0; } + } + + if (src.count > 0) { + fInstructions.push_back({BuilderOp::push_slots, {src.index}, src.count}); + } - // If the previous instruction was discarding an equal number of slots... - if (lastInstruction.fOp == BuilderOp::discard_stack && lastInstruction.fImmA == src.count) { - // ... and the instruction before that was copying from the stack to the same slots... - Instruction& prevInstruction = fInstructions.fromBack(1); - if ((prevInstruction.fOp == BuilderOp::copy_stack_to_slots || - prevInstruction.fOp == BuilderOp::copy_stack_to_slots_unmasked) && - prevInstruction.fSlotA == src.index && - prevInstruction.fImmA == src.count) { - // ... we are emitting `copy stack to X, discard stack, copy X to stack`. This is a - // common pattern when multiple operations in a row affect the same variable. We can - // eliminate the discard and just leave X on the stack. + // Look for a sequence of "copy stack to X, discard stack, copy X to stack". This is a common + // pattern when multiple operations in a row affect the same variable. When we see this, we can + // eliminate both the discard and the push. + if (fInstructions.size() >= 3 && fInstructions.back().fOp == BuilderOp::push_slots) { + int pushIndex = fInstructions.back().fSlotA; + int pushCount = fInstructions.back().fImmA; + + const Instruction& discardInst = fInstructions.fromBack(1); + const Instruction& copyToSlotsInst = fInstructions.fromBack(2); + + // Look for a `discard_stack` matching our push count. + if (discardInst.fOp == BuilderOp::discard_stack && discardInst.fImmA == pushCount) { + // Look for a `copy_stack_to_slots` matching our push. + if ((copyToSlotsInst.fOp == BuilderOp::copy_stack_to_slots || + copyToSlotsInst.fOp == BuilderOp::copy_stack_to_slots_unmasked) && + copyToSlotsInst.fSlotA == pushIndex && + copyToSlotsInst.fImmA == pushCount) { + // We found a matching sequence. Remove the discard and push. + fInstructions.pop_back(); fInstructions.pop_back(); return; } } } - - if (src.count > 0) { - fInstructions.push_back({BuilderOp::push_slots, {src.index}, src.count}); - } } void Builder::push_slots_indirect(SlotRange fixedRange, int dynamicStackID, SlotRange limitRange) { diff --git a/tests/sksl/shared/StructsInFunctions.skrp b/tests/sksl/shared/StructsInFunctions.skrp index cf5cf067f59a..d61cccd08075 100644 --- a/tests/sksl/shared/StructsInFunctions.skrp +++ b/tests/sksl/shared/StructsInFunctions.skrp @@ -9,7 +9,6 @@ copy_2_slots_unmasked $0..1 = s.x, s.y label label 0 copy_2_slots_unmasked s.x₁, s.y₁ = $0..1 copy_2_slots_unmasked s.x₂, s.y₂ = $0..1 -copy_2_slots_unmasked $0..1 = s.x₂, s.y₂ cast_to_float_from_int $1 = IntToFloat($1) add_float $0 += $1 label label 0x00000001 @@ -95,9 +94,9 @@ bitwise_and_int $14 &= $15 bitwise_and_int $13 &= $14 copy_constant $0 = 0 merge_condition_mask CondMask = $12 & $13 -branch_if_no_lanes_active branch_if_no_lanes_active +17 (label 6 at #115) +branch_if_no_lanes_active branch_if_no_lanes_active +17 (label 6 at #114) copy_slot_unmasked $1 = s.x₁ -branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 7 at #107) +branch_if_no_lanes_active branch_if_no_lanes_active +7 (label 7 at #106) splat_2_constants s.x, s.y = 0 copy_constant $17 = 0x3F800000 (1.0) copy_slot_masked s.x = Mask($17)