diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 280bc770c228..1f77d9c42d6f 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -1121,16 +1121,19 @@ (rule (lower (has_type $F32 (fsub a b))) (pulley_fsub32 a b)) (rule (lower (has_type $F64 (fsub a b))) (pulley_fsub64 a b)) +(rule (lower (has_type $F32X4 (fsub a b))) (pulley_vsub32x4 a b)) ;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $F32 (fmul a b))) (pulley_fmul32 a b)) (rule (lower (has_type $F64 (fmul a b))) (pulley_fmul64 a b)) +(rule (lower (has_type $F32X4 (fmul a b))) (pulley_vmul32x4 a b)) ;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $F32 (fdiv a b))) (pulley_fdiv32 a b)) (rule (lower (has_type $F64 (fdiv a b))) (pulley_fdiv64 a b)) +(rule (lower (has_type $F32X4 (fdiv a b))) (pulley_vdiv32x4 a b)) ;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1193,6 +1196,7 @@ (rule (lower (has_type $F32 (fneg a))) (pulley_fneg32 a)) (rule (lower (has_type $F64 (fneg a))) (pulley_fneg64 a)) +(rule (lower (has_type $F32X4 (fneg a))) (pulley_vnegf32x4 a)) ;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index c700cf6aea2d..fcce84a32a2b 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -416,7 +416,6 @@ impl WastTest { "spec_testsuite/proposals/memory64/i16x8_relaxed_q15mulr_s.wast", "spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast", "spec_testsuite/proposals/memory64/i8x16_relaxed_swizzle.wast", - "spec_testsuite/simd_f32x4_arith.wast", "spec_testsuite/simd_f32x4_cmp.wast", "spec_testsuite/simd_f32x4_pmin_pmax.wast", "spec_testsuite/simd_f64x2_arith.wast", diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index d0e999ad8c2b..d2144cd27c8c 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -2981,6 +2981,16 @@ impl ExtendedOpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn vsub32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let mut a = self.state[operands.src1].get_f32x4(); + let b = self.state[operands.src2].get_f32x4(); + for (a, b) in a.iter_mut().zip(b) { + *a = *a - b; + } + self.state[operands.dst].set_f32x4(a); + ControlFlow::Continue(()) + } + fn fmul32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_f32(); let b = self.state[operands.src2].get_f32(); @@ -2988,6 +2998,16 @@ impl ExtendedOpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn vmul32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let mut a = self.state[operands.src1].get_f32x4(); + let b = self.state[operands.src2].get_f32x4(); + for (a, b) in a.iter_mut().zip(b) { + *a = *a * b; + } + self.state[operands.dst].set_f32x4(a); + ControlFlow::Continue(()) + } + fn fdiv32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_f32(); let b = self.state[operands.src2].get_f32(); @@ -2995,6 +3015,16 @@ impl ExtendedOpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn vdiv32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let mut a = self.state[operands.src1].get_f32x4(); + let b = self.state[operands.src2].get_f32x4(); + for (a, b) in a.iter_mut().zip(b) { + *a = *a / b; + } + self.state[operands.dst].set_f32x4(a); + ControlFlow::Continue(()) + } + fn fmaximum32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_f32(); let b = self.state[operands.src2].get_f32(); @@ -3137,6 +3167,15 @@ impl ExtendedOpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn vnegf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let mut a = self.state[src].get_f32x4(); + for elem in a.iter_mut() { + *elem = -*elem; + } + self.state[dst].set_f32x4(a); + ControlFlow::Continue(()) + } + fn fabs32(&mut self, dst: FReg, src: FReg) -> ControlFlow { let a = self.state[src].get_f32(); self.state[dst].set_f32(a.wasm_abs()); diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index f83a24606e8d..87bdf304569f 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -811,10 +811,16 @@ macro_rules! for_each_extended_op { fadd32 = Fadd32 { operands: BinaryOperands }; /// `low32(dst) = low32(src1) - low32(src2)` fsub32 = Fsub32 { operands: BinaryOperands }; + /// `low128(dst) = low128(src1) - low128(src2)` + vsub32x4 = Vsub32x4 { operands: BinaryOperands }; /// `low32(dst) = low32(src1) * low32(src2)` fmul32 = Fmul32 { operands: BinaryOperands }; + /// `low128(dst) = low128(src1) * low128(src2)` + vmul32x4 = Vmul32x4 { operands: BinaryOperands }; /// `low32(dst) = low32(src1) / low32(src2)` fdiv32 = Fdiv32 { operands: BinaryOperands }; + /// `low128(dst) = low128(src1) / low128(src2)` + vdiv32x4 = Vdiv32x4 { operands: BinaryOperands }; /// `low32(dst) = ieee_maximum(low32(src1), low32(src2))` fmaximum32 = Fmaximum32 { operands: BinaryOperands }; /// `low32(dst) = ieee_minimum(low32(src1), low32(src2))` @@ -847,6 +853,8 @@ macro_rules! for_each_extended_op { vsqrt64x2 = Vsqrt64x2 { dst: VReg, src: VReg }; /// `low32(dst) = -low32(src)` fneg32 = Fneg32 { dst: FReg, src: FReg }; + /// `low128(dst) = -low128(src)` + vnegf32x4 = Vnegf32x4 { dst: VReg, src: VReg }; /// `low32(dst) = |low32(src)|` fabs32 = Fabs32 { dst: FReg, src: FReg };