Skip to content

Commit

Permalink
pulley: Implement vector sqmul_round_sat (#9911)
Browse files Browse the repository at this point in the history
* pulley: Implement vector sqmul_round_sat

* parenthesize to bring out op. order
  • Loading branch information
eagr authored Dec 29, 2024
1 parent 54b7250 commit 2d1c0ab
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 6 deletions.
2 changes: 1 addition & 1 deletion cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1789,7 +1789,7 @@ pub(crate) fn define(
r#"
Fixed-point multiplication of numbers in the QN format, where N + 1
is the number bitwidth:
`a := signed_saturate((x * y + 1 << (Q - 1)) >> Q)`
`a := signed_saturate((x * y + (1 << (Q - 1))) >> Q)`
Polymorphic over all integer vector types with 16- or 32-bit numbers.
"#,
Expand Down
8 changes: 6 additions & 2 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,10 @@
(rule (lower (has_type $I64 (smulhi a b)))
(pulley_xmulhi64_s a b))

;;;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I16X8 (sqmul_round_sat a b))) (pulley_vqmulrsi16x8 a b))

;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 0 (lower (has_type (fits_in_32 _) (sdiv a b)))
Expand All @@ -335,7 +339,7 @@
(pulley_xrem32_u (zext32 a) (zext32 b)))
(rule 1 (lower (has_type $I64 (urem a b))) (pulley_xrem64_u a b))

;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I16X8 (avg_round a b))) (pulley_vavground16x8 a b))

Expand Down Expand Up @@ -1377,4 +1381,4 @@

;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 1 (lower (has_type $I8X16 (swizzle a b))) (pulley_vswizzlei8x16 a b))
(rule 1 (lower (has_type $I8X16 (swizzle a b))) (pulley_vswizzlei8x16 a b))
3 changes: 0 additions & 3 deletions crates/wast-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,14 +405,12 @@ impl WastTest {
"misc_testsuite/simd/issue_3327_bnot_lowering.wast",
"misc_testsuite/simd/v128-select.wast",
"spec_testsuite/proposals/annotations/simd_lane.wast",
"spec_testsuite/proposals/relaxed-simd/i16x8_relaxed_q15mulr_s.wast",
"spec_testsuite/proposals/relaxed-simd/i32x4_relaxed_trunc.wast",
"spec_testsuite/proposals/relaxed-simd/relaxed_dot_product.wast",
"spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast",
"spec_testsuite/proposals/memory64/simd_lane.wast",
"spec_testsuite/proposals/memory64/relaxed_madd_nmadd.wast",
"spec_testsuite/proposals/memory64/relaxed_dot_product.wast",
"spec_testsuite/proposals/memory64/i16x8_relaxed_q15mulr_s.wast",
"spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast",
"spec_testsuite/simd_f32x4_arith.wast",
"spec_testsuite/simd_f32x4_cmp.wast",
Expand All @@ -421,7 +419,6 @@ impl WastTest {
"spec_testsuite/simd_f64x2_cmp.wast",
"spec_testsuite/simd_f64x2_pmin_pmax.wast",
"spec_testsuite/simd_i16x8_extadd_pairwise_i8x16.wast",
"spec_testsuite/simd_i16x8_q15mulr_sat_s.wast",
"spec_testsuite/simd_i16x8_sat_arith.wast",
"spec_testsuite/simd_i32x4_dot_i16x8.wast",
"spec_testsuite/simd_i32x4_extadd_pairwise_i16x8.wast",
Expand Down
13 changes: 13 additions & 0 deletions pulley/src/interp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3835,6 +3835,19 @@ impl ExtendedOpVisitor for Interpreter<'_> {
ControlFlow::Continue(())
}

fn vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
let mut a = self.state[operands.src1].get_i16x8();
let b = self.state[operands.src2].get_i16x8();
const MIN: i32 = i16::MIN as i32;
const MAX: i32 = i16::MAX as i32;
for (a, b) in a.iter_mut().zip(b) {
let r = (i32::from(*a) * i32::from(b) + (1 << 14)) >> 15;
*a = r.clamp(MIN, MAX) as i16;
}
self.state[operands.dst].set_i16x8(a);
ControlFlow::Continue(())
}

fn xextractv8x16(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
let a = unsafe { *self.state[src].get_u8x16().get_unchecked(usize::from(lane)) };
self.state[dst].set_u32(u32::from(a));
Expand Down
3 changes: 3 additions & 0 deletions pulley/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,9 @@ macro_rules! for_each_extended_op {
/// `dst = src1 * src2`
vmuli64x2 = VMulI64x2 { operands: BinaryOperands<VReg> };

/// `dst = signed_saturate(src1 * src2 + (1 << (Q - 1)) >> Q)`
vqmulrsi16x8 = VQmulrsI16x8 { operands: BinaryOperands<VReg> };

/// `low32(dst) = zext(src[lane])`
xextractv8x16 = XExtractV8x16 { dst: XReg, src: VReg, lane: u8 };
/// `low32(dst) = zext(src[lane])`
Expand Down

0 comments on commit 2d1c0ab

Please sign in to comment.