Skip to content

Commit

Permalink
pulley: Implement 16x8 arithmetics (#9904)
Browse files Browse the repository at this point in the history
* pulley: Implement 16x8 arith.

* leverage 32-bit arithmetic

* fix
  • Loading branch information
eagr authored Dec 28, 2024
1 parent af5a789 commit 01a43ed
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 1 deletion.
9 changes: 9 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,10 @@
(pulley_xrem32_u (zext32 a) (zext32 b)))
(rule 1 (lower (has_type $I64 (urem a b))) (pulley_xrem64_u a b))

;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I16X8 (avg_round a b))) (pulley_vavground16x8 a b))

;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I8 (ishl a b)))
Expand Down Expand Up @@ -504,24 +508,28 @@
(rule 0 (lower (has_type (fits_in_32 _) (umin a b)))
(pulley_xmin32_u (zext32 a) (zext32 b)))
(rule 1 (lower (has_type $I64 (umin a b))) (pulley_xmin64_u a b))
(rule 1 (lower (has_type $I16X8 (umin a b))) (pulley_vmin16x8_u a b))

;;;; Rules for `smin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 0 (lower (has_type (fits_in_32 _) (smin a b)))
(pulley_xmin32_s (sext32 a) (sext32 b)))
(rule 1 (lower (has_type $I64 (smin a b))) (pulley_xmin64_s a b))
(rule 1 (lower (has_type $I16X8 (smin a b))) (pulley_vmin16x8_s a b))

;;;; Rules for `umax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 0 (lower (has_type (fits_in_32 _) (umax a b)))
(pulley_xmax32_u (zext32 a) (zext32 b)))
(rule 1 (lower (has_type $I64 (umax a b))) (pulley_xmax64_u a b))
(rule 1 (lower (has_type $I16X8 (umax a b))) (pulley_vmax16x8_u a b))

;;;; Rules for `smax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 0 (lower (has_type (fits_in_32 _) (smax a b)))
(pulley_xmax32_s (sext32 a) (sext32 b)))
(rule 1 (lower (has_type $I64 (smax a b))) (pulley_xmax64_s a b))
(rule 1 (lower (has_type $I16X8 (smax a b))) (pulley_vmax16x8_s a b))

;;;; Rules for `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -1230,6 +1238,7 @@

(rule 0 (lower (has_type (fits_in_32 _) (iabs a))) (pulley_xabs32 (sext32 a)))
(rule 1 (lower (has_type $I64 (iabs a))) (pulley_xabs64 a))
(rule 1 (lower (has_type $I16X8 (iabs a))) (pulley_vabs16x8 a))

;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down
1 change: 0 additions & 1 deletion crates/wast-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,6 @@ impl WastTest {
"spec_testsuite/simd_f64x2_arith.wast",
"spec_testsuite/simd_f64x2_cmp.wast",
"spec_testsuite/simd_f64x2_pmin_pmax.wast",
"spec_testsuite/simd_i16x8_arith2.wast",
"spec_testsuite/simd_i16x8_extadd_pairwise_i8x16.wast",
"spec_testsuite/simd_i16x8_q15mulr_sat_s.wast",
"spec_testsuite/simd_i16x8_sat_arith.wast",
Expand Down
57 changes: 57 additions & 0 deletions pulley/src/interp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4243,6 +4243,52 @@ impl ExtendedOpVisitor for Interpreter<'_> {
ControlFlow::Continue(())
}

fn vmin16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
let mut a = self.state[operands.src1].get_i16x8();
let b = self.state[operands.src2].get_i16x8();
for (a, b) in a.iter_mut().zip(&b) {
*a = (*a).min(*b);
}
self.state[operands.dst].set_i16x8(a);
ControlFlow::Continue(())
}

fn vmin16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
let mut a = self.state[operands.src1].get_u16x8();
let b = self.state[operands.src2].get_u16x8();
for (a, b) in a.iter_mut().zip(&b) {
*a = (*a).min(*b);
}
self.state[operands.dst].set_u16x8(a);
ControlFlow::Continue(())
}

fn vmax16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
let mut a = self.state[operands.src1].get_i16x8();
let b = self.state[operands.src2].get_i16x8();
for (a, b) in a.iter_mut().zip(&b) {
*a = (*a).max(*b);
}
self.state[operands.dst].set_i16x8(a);
ControlFlow::Continue(())
}

fn vmax16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
let mut a = self.state[operands.src1].get_u16x8();
let b = self.state[operands.src2].get_u16x8();
for (a, b) in a.iter_mut().zip(&b) {
*a = (*a).max(*b);
}
self.state[operands.dst].set_u16x8(a);
ControlFlow::Continue(())
}

fn vabs16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_i16x8();
self.state[dst].set_i16x8(a.map(|i| i.wrapping_abs()));
ControlFlow::Continue(())
}

fn vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_f32x4();
self.state[dst].set_f32x4(a.map(|i| i.wasm_abs()));
Expand Down Expand Up @@ -4309,4 +4355,15 @@ impl ExtendedOpVisitor for Interpreter<'_> {
self.state[operands.dst].set_i8x16(dst);
ControlFlow::Continue(())
}

fn vavground16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
let mut a = self.state[operands.src1].get_u16x8();
let b = self.state[operands.src2].get_u16x8();
for (a, b) in a.iter_mut().zip(&b) {
// rounding average
*a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u16;
}
self.state[operands.dst].set_u16x8(a);
ControlFlow::Continue(())
}
}
16 changes: 16 additions & 0 deletions pulley/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1155,6 +1155,18 @@ macro_rules! for_each_extended_op {
/// `dst = -src`
vneg64x2 = Vneg64x2 { dst: VReg, src: VReg };

/// `dst = min(src1, src2)` (signed)
vmin16x8_s = Vmin16x8S { operands: BinaryOperands<VReg> };
/// `dst = min(src1, src2)` (unsigned)
vmin16x8_u = Vmin16x8U { operands: BinaryOperands<VReg> };
/// `dst = max(src1, src2)` (signed)
vmax16x8_s = Vmax16x8S { operands: BinaryOperands<VReg> };
/// `dst = max(src1, src2)` (unsigned)
vmax16x8_u = Vmax16x8U { operands: BinaryOperands<VReg> };

/// `dst = |src|`
vabs16x8 = Vabs16x8 { dst: VReg, src: VReg };

/// `dst = |src|`
vabsf32x4 = Vabsf32x4 { dst: VReg, src: VReg };
/// `dst = |src|`
Expand All @@ -1167,8 +1179,12 @@ macro_rules! for_each_extended_op {
vminimumf32x4 = Vminimumf32x4 { operands: BinaryOperands<VReg> };
/// `dst = ieee_minimum(src1, src2)`
vminimumf64x2 = Vminimumf64x2 { operands: BinaryOperands<VReg> };

/// `dst = swizzle(src1, src2)`
vswizzlei8x16 = Vswizzlei8x16 { operands: BinaryOperands<VReg> };

/// `dst = (src1 + src2 + 1) // 2`
vavground16x8 = Vavground16x8 { operands: BinaryOperands<VReg> };
}
};
}
Expand Down

0 comments on commit 01a43ed

Please sign in to comment.