bytecodealliance · tyoeer · Dec 23, 2024 · Dec 23, 2024 · alexcrichton · Dec 27, 2024
@@ -1121,16 +1121,19 @@
 
 (rule (lower (has_type $F32 (fsub a b))) (pulley_fsub32 a b))
 (rule (lower (has_type $F64 (fsub a b))) (pulley_fsub64 a b))
+(rule (lower (has_type $F32X4 (fsub a b))) (pulley_vsub32x4 a b))
 
 ;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (has_type $F32 (fmul a b))) (pulley_fmul32 a b))
 (rule (lower (has_type $F64 (fmul a b))) (pulley_fmul64 a b))
+(rule (lower (has_type $F32X4 (fmul a b))) (pulley_vmul32x4 a b))
 
 ;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (has_type $F32 (fdiv a b))) (pulley_fdiv32 a b))
 (rule (lower (has_type $F64 (fdiv a b))) (pulley_fdiv64 a b))
+(rule (lower (has_type $F32X4 (fdiv a b))) (pulley_vdiv32x4 a b))
 
 ;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
@@ -1193,6 +1196,7 @@
 
 (rule (lower (has_type $F32 (fneg a))) (pulley_fneg32 a))
 (rule (lower (has_type $F64 (fneg a))) (pulley_fneg64 a))
+(rule (lower (has_type $F32X4 (fneg a))) (pulley_vnegf32x4 a))
 
 ;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 

@@ -416,7 +416,6 @@ impl WastTest {
                 "spec_testsuite/proposals/memory64/i16x8_relaxed_q15mulr_s.wast",
                 "spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast",
                 "spec_testsuite/proposals/memory64/i8x16_relaxed_swizzle.wast",
-                "spec_testsuite/simd_f32x4_arith.wast",
                 "spec_testsuite/simd_f32x4_cmp.wast",
                 "spec_testsuite/simd_f32x4_pmin_pmax.wast",
                 "spec_testsuite/simd_f64x2_arith.wast",

@@ -2981,20 +2981,50 @@ impl ExtendedOpVisitor for Interpreter<'_> {
         ControlFlow::Continue(())
     }
 
+    fn vsub32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let mut a = self.state[operands.src1].get_f32x4();
+        let b = self.state[operands.src2].get_f32x4();
+        for (a, b) in a.iter_mut().zip(b) {
+            *a = *a - b;
+        }
+        self.state[operands.dst].set_f32x4(a);
+        ControlFlow::Continue(())
+    }
+
     fn fmul32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
         let a = self.state[operands.src1].get_f32();
         let b = self.state[operands.src2].get_f32();
         self.state[operands.dst].set_f32(a * b);
         ControlFlow::Continue(())
     }
 
+    fn vmul32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let mut a = self.state[operands.src1].get_f32x4();
+        let b = self.state[operands.src2].get_f32x4();
+        for (a, b) in a.iter_mut().zip(b) {
+            *a = *a * b;
+        }
+        self.state[operands.dst].set_f32x4(a);
+        ControlFlow::Continue(())
+    }
+
     fn fdiv32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
         let a = self.state[operands.src1].get_f32();
         let b = self.state[operands.src2].get_f32();
         self.state[operands.dst].set_f32(a / b);
         ControlFlow::Continue(())
     }
 
+    fn vdiv32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
+        let mut a = self.state[operands.src1].get_f32x4();
+        let b = self.state[operands.src2].get_f32x4();
+        for (a, b) in a.iter_mut().zip(b) {
+            *a = *a / b;
+        }
+        self.state[operands.dst].set_f32x4(a);
+        ControlFlow::Continue(())
+    }
+
     fn fmaximum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
         let a = self.state[operands.src1].get_f32();
         let b = self.state[operands.src2].get_f32();
@@ -3137,6 +3167,15 @@ impl ExtendedOpVisitor for Interpreter<'_> {
         ControlFlow::Continue(())
     }
 
+    fn vnegf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
+        let mut a = self.state[src].get_f32x4();
+        for elem in a.iter_mut() {
+            *elem = -*elem;
+        }
+        self.state[dst].set_f32x4(a);
+        ControlFlow::Continue(())
+    }
+
     fn fabs32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
         let a = self.state[src].get_f32();
         self.state[dst].set_f32(a.wasm_abs());

@@ -811,10 +811,16 @@ macro_rules! for_each_extended_op {
             fadd32 = Fadd32 { operands: BinaryOperands<FReg> };
             /// `low32(dst) = low32(src1) - low32(src2)`
             fsub32 = Fsub32 { operands: BinaryOperands<FReg> };
+            /// `low128(dst) = low128(src1) - low128(src2)`
+            vsub32x4 = Vsub32x4 { operands: BinaryOperands<VReg> };
             /// `low32(dst) = low32(src1) * low32(src2)`
             fmul32 = Fmul32 { operands: BinaryOperands<FReg> };
+            /// `low128(dst) = low128(src1) * low128(src2)`
+            vmul32x4 = Vmul32x4 { operands: BinaryOperands<VReg> };
             /// `low32(dst) = low32(src1) / low32(src2)`
             fdiv32 = Fdiv32 { operands: BinaryOperands<FReg> };
+            /// `low128(dst) = low128(src1) / low128(src2)`
+            vdiv32x4 = Vdiv32x4 { operands: BinaryOperands<VReg> };
             /// `low32(dst) = ieee_maximum(low32(src1), low32(src2))`
             fmaximum32 = Fmaximum32 { operands: BinaryOperands<FReg> };
             /// `low32(dst) = ieee_minimum(low32(src1), low32(src2))`
@@ -847,6 +853,8 @@ macro_rules! for_each_extended_op {
             vsqrt64x2 = Vsqrt64x2 { dst: VReg, src: VReg };
             /// `low32(dst) = -low32(src)`
             fneg32 = Fneg32 { dst: FReg, src: FReg };
+            /// `low128(dst) = -low128(src)`
+            vnegf32x4 = Vnegf32x4 { dst: VReg, src: VReg };
             /// `low32(dst) = |low32(src)|`
             fabs32 = Fabs32 { dst: FReg, src: FReg };