Skip to content

Commit

Permalink
Create dedicated continue_op to handle loop-continue.
Browse files Browse the repository at this point in the history
Building up `continue` out of existing primitives was quick to
implement, but a dedicated op will be more compact and should be
faster to execute.

Change-Id: I8477f8d1d8a75df1c2c87b29da24ddfca11761b0
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/684101
Auto-Submit: John Stiles <[email protected]>
Reviewed-by: Michael Ludwig <[email protected]>
Commit-Queue: Michael Ludwig <[email protected]>
  • Loading branch information
johnstiles-google authored and SkCQ committed Apr 28, 2023
1 parent 75201cb commit d81b864
Show file tree
Hide file tree
Showing 15 changed files with 154 additions and 160 deletions.
2 changes: 1 addition & 1 deletion src/core/SkRasterPipelineOpList.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
M(init_lane_masks) M(store_device_xy01) M(exchange_src) \
M(load_condition_mask) M(store_condition_mask) M(merge_condition_mask) \
M(load_loop_mask) M(store_loop_mask) M(mask_off_loop_mask) \
M(reenable_loop_mask) M(merge_loop_mask) M(case_op) \
M(reenable_loop_mask) M(merge_loop_mask) M(case_op) M(continue_op) \
M(load_return_mask) M(store_return_mask) M(mask_off_return_mask) \
M(branch_if_all_lanes_active) M(branch_if_any_lanes_active) M(branch_if_no_lanes_active) \
M(branch_if_no_active_lanes_eq) M(jump) \
Expand Down
9 changes: 9 additions & 0 deletions src/opts/SkRasterPipeline_opts.h
Original file line number Diff line number Diff line change
Expand Up @@ -3385,6 +3385,15 @@ STAGE_TAIL(merge_loop_mask, I32* ptr) {
update_execution_mask();
}

STAGE_TAIL(continue_op, I32* continueMask) {
// Set any currently-executing lanes in the continue-mask to true.
*continueMask |= execution_mask();

// Disable any currently-executing lanes from the loop mask. (Just like `mask_off_loop_mask`.)
g = sk_bit_cast<F>(sk_bit_cast<I32>(g) & ~execution_mask());
update_execution_mask();
}

STAGE_TAIL(case_op, SkRasterPipeline_CaseOpCtx* ctx) {
// Check each lane to see if the case value matches the expectation.
I32* actualValue = (I32*)ctx->ptr;
Expand Down
14 changes: 12 additions & 2 deletions src/sksl/codegen/SkSLRasterPipelineBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2125,6 +2125,10 @@ void Program::makeStages(TArray<Stage>* pipeline,
pipeline->push_back({ProgramOp::case_op, ctx});
break;
}
case BuilderOp::continue_op:
pipeline->push_back({ProgramOp::continue_op, tempStackMap[inst.fImmA] - (1 * N)});
break;

case BuilderOp::pad_stack:
case BuilderOp::discard_stack:
break;
Expand Down Expand Up @@ -2675,6 +2679,7 @@ void Program::dump(SkWStream* out) const {
case POp::reenable_loop_mask:
case POp::load_return_mask:
case POp::store_return_mask:
case POp::continue_op:
case POp::cast_to_float_from_int: case POp::cast_to_float_from_uint:
case POp::cast_to_int_from_float: case POp::cast_to_uint_from_float:
case POp::abs_float: case POp::abs_int:
Expand Down Expand Up @@ -3421,11 +3426,16 @@ void Program::dump(SkWStream* out) const {
opText = "label " + opArg1;
break;

case POp::case_op: {
case POp::case_op:
opText = "if (" + opArg1 + " == " + opArg3 +
") { LoopMask = true; " + opArg2 + " = false; }";
break;
}

case POp::continue_op:
opText = opArg1 +
" |= Mask(0xFFFFFFFF); LoopMask &= ~(CondMask & LoopMask & RetMask)";
break;

default:
break;
}
Expand Down
5 changes: 5 additions & 0 deletions src/sksl/codegen/SkSLRasterPipelineBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,11 @@ class Builder {
fInstructions.push_back({BuilderOp::case_op, {}, value});
}

// Performs a `continue` in a loop.
void continue_op(int continueMaskStackID) {
fInstructions.push_back({BuilderOp::continue_op, {}, continueMaskStackID});
}

void select(int slots) {
// Overlays the top two entries on the stack, making one hybrid entry. The execution mask
// is used to select which lanes are preserved.
Expand Down
18 changes: 6 additions & 12 deletions src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,11 @@ class AutoContinueMask {
}
}

int stackID() {
SkASSERT(fContinueMaskStack.has_value());
return fContinueMaskStack->stackID();
}

private:
std::optional<AutoStack> fContinueMaskStack;
Generator* fGenerator = nullptr;
Expand Down Expand Up @@ -1579,18 +1584,7 @@ bool Generator::writeBreakStatement(const BreakStatement&) {
}

bool Generator::writeContinueStatement(const ContinueStatement&) {
// This could be written as one hand-tuned RasterPipeline op, but for now, we reuse existing ops
// to assemble a continue op.

// Set any currently-executing lanes in the continue-mask to true via `select.`
fCurrentContinueMask->enter();
fBuilder.push_constant_i(~0);
fBuilder.select(/*slots=*/1);

// Disable any currently-executing lanes from the loop mask.
fBuilder.mask_off_loop_mask();
fCurrentContinueMask->exit();

fBuilder.continue_op(fCurrentContinueMask->stackID());
return true;
}

Expand Down
58 changes: 28 additions & 30 deletions tests/sksl/runtime/LoopFloat.skrp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ label label 0x0000000A
cmpeq_imm_float $100 = equal($100, 0x40A00000 (5.0))
copy_constant $90 = 0
merge_condition_mask CondMask = $99 & $100
branch_if_no_lanes_active branch_if_no_lanes_active +73 (label 9 at #181)
branch_if_no_lanes_active branch_if_no_lanes_active +71 (label 9 at #179)
trace_enter TraceEnter(float continue_loop(float five)) when $13 is true
copy_slot_unmasked five₂ = five
trace_var TraceVar(five₂) when $13 is true
Expand All @@ -124,7 +124,7 @@ trace_line TraceLine(18) when $13 is true
copy_constant i₁ = 0
trace_var TraceVar(i₁) when $13 is true
store_loop_mask $93 = LoopMask
jump jump +35 (label 16 at #162)
jump jump +33 (label 16 at #160)
label label 0x00000011
copy_constant $109 = 0
copy_constant $94 = 0
Expand All @@ -142,9 +142,7 @@ copy_slot_unmasked $98 = $13
copy_slot_masked $97 = Mask($98)
trace_scope TraceScope(+1) when $97 is true
trace_line TraceLine(19) when $13 is true
copy_constant $110 = 0xFFFFFFFF
copy_slot_masked $109 = Mask($110)
mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
continue_op $109 |= Mask(0xFFFFFFFF); LoopMask &= ~(CondMask & LoopMask & RetMask)
trace_scope TraceScope(-1) when $97 is true
load_condition_mask CondMask = $95
trace_line TraceLine(20) when $13 is true
Expand All @@ -164,7 +162,7 @@ copy_slot_unmasked $94 = i₁
cmplt_imm_float $94 = lessThan($94, 0x41200000 (10.0))
merge_loop_mask LoopMask &= $94
stack_rewind
branch_if_any_lanes_active branch_if_any_lanes_active -39 (label 17 at #128)
branch_if_any_lanes_active branch_if_any_lanes_active -37 (label 17 at #128)
label label 0x0000000F
load_loop_mask LoopMask = $93
trace_scope TraceScope(-1) when $92 is true
Expand All @@ -182,7 +180,7 @@ label label 0x00000009
load_condition_mask CondMask = $99
copy_constant $80 = 0
merge_condition_mask CondMask = $89 & $90
branch_if_no_lanes_active branch_if_no_lanes_active +74 (label 8 at #259)
branch_if_no_lanes_active branch_if_no_lanes_active +74 (label 8 at #257)
trace_enter TraceEnter(float break_loop(float five)) when $13 is true
copy_slot_unmasked five₃ = five
trace_var TraceVar(five₃) when $13 is true
Expand All @@ -204,7 +202,7 @@ trace_line TraceLine(29) when $13 is true
copy_constant i₂ = 0
trace_var TraceVar(i₂) when $13 is true
store_loop_mask $83 = LoopMask
jump jump +33 (label 20 at #240)
jump jump +33 (label 20 at #238)
label label 0x00000015
copy_constant $84 = 0
copy_slot_unmasked $85 = $13
Expand All @@ -221,7 +219,7 @@ copy_slot_unmasked $88 = $13
copy_slot_masked $87 = Mask($88)
trace_scope TraceScope(+1) when $87 is true
trace_line TraceLine(30) when $13 is true
branch_if_all_lanes_active branch_if_all_lanes_active +22 (label 19 at #246)
branch_if_all_lanes_active branch_if_all_lanes_active +22 (label 19 at #244)
mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
trace_scope TraceScope(-1) when $87 is true
load_condition_mask CondMask = $85
Expand All @@ -242,7 +240,7 @@ copy_slot_unmasked $84 = i₂
cmplt_imm_float $84 = lessThan($84, 0x41200000 (10.0))
merge_loop_mask LoopMask &= $84
stack_rewind
branch_if_any_lanes_active branch_if_any_lanes_active -37 (label 21 at #208)
branch_if_any_lanes_active branch_if_any_lanes_active -37 (label 21 at #206)
label label 0x00000013
load_loop_mask LoopMask = $83
trace_scope TraceScope(-1) when $82 is true
Expand All @@ -260,7 +258,7 @@ label label 0x00000008
load_condition_mask CondMask = $89
copy_constant $73 = 0
merge_condition_mask CondMask = $79 & $80
branch_if_no_lanes_active branch_if_no_lanes_active +51 (label 7 at #314)
branch_if_no_lanes_active branch_if_no_lanes_active +51 (label 7 at #312)
trace_enter TraceEnter(float float_loop()) when $13 is true
copy_constant $74 = 0
copy_slot_unmasked $75 = $13
Expand All @@ -273,7 +271,7 @@ copy_constant $75 = 0
copy_slot_unmasked $76 = $13
copy_slot_masked $75 = Mask($76)
trace_scope TraceScope(+1) when $75 is true
branch_if_no_lanes_active branch_if_no_lanes_active +24 (label 23 at #300)
branch_if_no_lanes_active branch_if_no_lanes_active +24 (label 23 at #298)
trace_line TraceLine(39) when $13 is true
copy_constant i₃ = 0x3DFBE76D (0.123)
trace_var TraceVar(i₃) when $13 is true
Expand All @@ -296,7 +294,7 @@ trace_var TraceVar(i₃) when $13 is true
copy_slot_unmasked $76 = i₃
cmplt_imm_float $76 = lessThan($76, 0x3F19999A (0.6))
stack_rewind
branch_if_no_active_lanes_eq branch -19 (label 24 at #280) if no lanes of $76 == 0
branch_if_no_active_lanes_eq branch -19 (label 24 at #278) if no lanes of $76 == 0
label label 0x00000017
trace_scope TraceScope(-1) when $75 is true
trace_line TraceLine(42) when $13 is true
Expand All @@ -315,7 +313,7 @@ label label 0x00000007
load_condition_mask CondMask = $79
copy_constant $62 = 0
merge_condition_mask CondMask = $72 & $73
branch_if_no_lanes_active branch_if_no_lanes_active +56 (label 6 at #374)
branch_if_no_lanes_active branch_if_no_lanes_active +56 (label 6 at #372)
trace_enter TraceEnter(bool loop_operator_le()) when $13 is true
copy_constant $63 = 0
copy_slot_unmasked $64 = $13
Expand All @@ -330,7 +328,7 @@ copy_constant $64 = 0
copy_slot_unmasked $65 = $13
copy_slot_masked $64 = Mask($65)
trace_scope TraceScope(+1) when $64 is true
branch_if_no_lanes_active branch_if_no_lanes_active +23 (label 26 at #356)
branch_if_no_lanes_active branch_if_no_lanes_active +23 (label 26 at #354)
trace_line TraceLine(51) when $13 is true
copy_constant i₄ = 0x3F800000 (1.0)
trace_var TraceVar(i₄) when $13 is true
Expand All @@ -352,7 +350,7 @@ trace_var TraceVar(i₄) when $13 is true
copy_slot_unmasked $65 = i₄
cmple_imm_float $65 = lessThanEqual($65, 0x40400000 (3.0))
stack_rewind
branch_if_no_active_lanes_eq branch -18 (label 27 at #337) if no lanes of $65 == 0
branch_if_no_active_lanes_eq branch -18 (label 27 at #335) if no lanes of $65 == 0
label label 0x0000001A
trace_scope TraceScope(-1) when $64 is true
trace_line TraceLine(54) when $13 is true
Expand All @@ -375,7 +373,7 @@ label label 0x00000006
load_condition_mask CondMask = $72
copy_constant $51 = 0
merge_condition_mask CondMask = $61 & $62
branch_if_no_lanes_active branch_if_no_lanes_active +56 (label 5 at #434)
branch_if_no_lanes_active branch_if_no_lanes_active +56 (label 5 at #432)
trace_enter TraceEnter(bool loop_operator_lt()) when $13 is true
copy_constant $52 = 0
copy_slot_unmasked $53 = $13
Expand All @@ -390,7 +388,7 @@ copy_constant $53 = 0
copy_slot_unmasked $54 = $13
copy_slot_masked $53 = Mask($54)
trace_scope TraceScope(+1) when $53 is true
branch_if_no_lanes_active branch_if_no_lanes_active +23 (label 29 at #416)
branch_if_no_lanes_active branch_if_no_lanes_active +23 (label 29 at #414)
trace_line TraceLine(63) when $13 is true
copy_constant i₅ = 0x3F800000 (1.0)
trace_var TraceVar(i₅) when $13 is true
Expand All @@ -412,7 +410,7 @@ trace_var TraceVar(i₅) when $13 is true
copy_slot_unmasked $54 = i₅
cmplt_imm_float $54 = lessThan($54, 0x40800000 (4.0))
stack_rewind
branch_if_no_active_lanes_eq branch -18 (label 30 at #397) if no lanes of $54 == 0
branch_if_no_active_lanes_eq branch -18 (label 30 at #395) if no lanes of $54 == 0
label label 0x0000001D
trace_scope TraceScope(-1) when $53 is true
trace_line TraceLine(66) when $13 is true
Expand All @@ -435,7 +433,7 @@ label label 0x00000005
load_condition_mask CondMask = $61
copy_constant $40 = 0
merge_condition_mask CondMask = $50 & $51
branch_if_no_lanes_active branch_if_no_lanes_active +57 (label 4 at #495)
branch_if_no_lanes_active branch_if_no_lanes_active +57 (label 4 at #493)
trace_enter TraceEnter(bool loop_operator_ge()) when $13 is true
copy_constant $41 = 0
copy_slot_unmasked $42 = $13
Expand All @@ -450,7 +448,7 @@ copy_constant $42 = 0
copy_slot_unmasked $43 = $13
copy_slot_masked $42 = Mask($43)
trace_scope TraceScope(+1) when $42 is true
branch_if_no_lanes_active branch_if_no_lanes_active +24 (label 32 at #477)
branch_if_no_lanes_active branch_if_no_lanes_active +24 (label 32 at #475)
trace_line TraceLine(75) when $13 is true
copy_constant i₆ = 0x40400000 (3.0)
trace_var TraceVar(i₆) when $13 is true
Expand All @@ -473,7 +471,7 @@ copy_constant $43 = 0x3F800000 (1.0)
copy_slot_unmasked $44 = i₆
cmple_float $43 = lessThanEqual($43, $44)
stack_rewind
branch_if_no_active_lanes_eq branch -19 (label 33 at #457) if no lanes of $43 == 0
branch_if_no_active_lanes_eq branch -19 (label 33 at #455) if no lanes of $43 == 0
label label 0x00000020
trace_scope TraceScope(-1) when $42 is true
trace_line TraceLine(78) when $13 is true
Expand All @@ -496,7 +494,7 @@ label label 0x00000004
load_condition_mask CondMask = $50
copy_constant $29 = 0
merge_condition_mask CondMask = $39 & $40
branch_if_no_lanes_active branch_if_no_lanes_active +57 (label 3 at #556)
branch_if_no_lanes_active branch_if_no_lanes_active +57 (label 3 at #554)
trace_enter TraceEnter(bool loop_operator_gt()) when $13 is true
copy_constant $30 = 0
copy_slot_unmasked $31 = $13
Expand All @@ -511,7 +509,7 @@ copy_constant $31 = 0
copy_slot_unmasked $32 = $13
copy_slot_masked $31 = Mask($32)
trace_scope TraceScope(+1) when $31 is true
branch_if_no_lanes_active branch_if_no_lanes_active +24 (label 35 at #538)
branch_if_no_lanes_active branch_if_no_lanes_active +24 (label 35 at #536)
trace_line TraceLine(87) when $13 is true
copy_constant i₇ = 0x40400000 (3.0)
trace_var TraceVar(i₇) when $13 is true
Expand All @@ -534,7 +532,7 @@ copy_constant $32 = 0
copy_slot_unmasked $33 = i₇
cmplt_float $32 = lessThan($32, $33)
stack_rewind
branch_if_no_active_lanes_eq branch -19 (label 36 at #518) if no lanes of $32 == 0
branch_if_no_active_lanes_eq branch -19 (label 36 at #516) if no lanes of $32 == 0
label label 0x00000023
trace_scope TraceScope(-1) when $31 is true
trace_line TraceLine(90) when $13 is true
Expand All @@ -557,7 +555,7 @@ label label 0x00000003
load_condition_mask CondMask = $39
copy_constant $18 = 0
merge_condition_mask CondMask = $28 & $29
branch_if_no_lanes_active branch_if_no_lanes_active +45 (label 2 at #605)
branch_if_no_lanes_active branch_if_no_lanes_active +45 (label 2 at #603)
trace_enter TraceEnter(bool loop_operator_eq()) when $13 is true
copy_constant $19 = 0
copy_slot_unmasked $20 = $13
Expand All @@ -571,7 +569,7 @@ copy_constant $20 = 0
copy_slot_unmasked $21 = $13
copy_slot_masked $20 = Mask($21)
trace_scope TraceScope(+1) when $20 is true
branch_if_no_lanes_active branch_if_no_lanes_active +15 (label 38 at #589)
branch_if_no_lanes_active branch_if_no_lanes_active +15 (label 38 at #587)
trace_line TraceLine(109) when $13 is true
copy_constant i₈ = 0x3F800000 (1.0)
trace_var TraceVar(i₈) when $13 is true
Expand Down Expand Up @@ -606,7 +604,7 @@ label label 0x00000002
load_condition_mask CondMask = $28
copy_constant $1 = 0
merge_condition_mask CondMask = $17 & $18
branch_if_no_lanes_active branch_if_no_lanes_active +55 (label 1 at #664)
branch_if_no_lanes_active branch_if_no_lanes_active +55 (label 1 at #662)
trace_enter TraceEnter(bool loop_operator_ne()) when $13 is true
copy_constant $2 = 0
copy_slot_unmasked $3 = $13
Expand All @@ -620,7 +618,7 @@ copy_constant $3 = 0
copy_slot_unmasked $4 = $13
copy_slot_masked $3 = Mask($4)
trace_scope TraceScope(+1) when $3 is true
branch_if_no_lanes_active branch_if_no_lanes_active +23 (label 41 at #646)
branch_if_no_lanes_active branch_if_no_lanes_active +23 (label 41 at #644)
trace_line TraceLine(98) when $13 is true
copy_constant i₉ = 0x3F800000 (1.0)
trace_var TraceVar(i₉) when $13 is true
Expand All @@ -642,7 +640,7 @@ trace_var TraceVar(i₉) when $13 is true
copy_slot_unmasked $4 = i₉
cmplt_imm_float $4 = lessThan($4, 0x40800000 (4.0))
stack_rewind
branch_if_no_active_lanes_eq branch -18 (label 42 at #627) if no lanes of $4 == 0
branch_if_no_active_lanes_eq branch -18 (label 42 at #625) if no lanes of $4 == 0
label label 0x00000029
trace_scope TraceScope(-1) when $3 is true
trace_line TraceLine(101) when $13 is true
Expand Down
Loading

0 comments on commit d81b864

Please sign in to comment.