diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 15133e563edc..7347545c5057 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -1131,26 +1131,44 @@ (rule (lower (has_type $F32 (trunc a))) (pulley_ftrunc32 a)) (rule (lower (has_type $F64 (trunc a))) (pulley_ftrunc64 a)) +(rule (lower (has_type $F32X4 (trunc a))) (pulley_vtrunc32x4 a)) +(rule (lower (has_type $F64X2 (trunc a))) (pulley_vtrunc64x2 a)) ;;;; Rules for `floor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $F32 (floor a))) (pulley_ffloor32 a)) (rule (lower (has_type $F64 (floor a))) (pulley_ffloor64 a)) - +(rule (lower (has_type $F32X4 (floor a))) + (pulley_vfloor32x4 a)) +(rule (lower (has_type $F64X2 (floor a))) + (pulley_vfloor64x2 a)) ;;;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $F32 (ceil a))) (pulley_fceil32 a)) (rule (lower (has_type $F64 (ceil a))) (pulley_fceil64 a)) +(rule (lower (has_type $F64X2 (ceil a))) + (pulley_vceil64x2 a)) +(rule (lower (has_type $F32X4 (ceil a))) + (pulley_vceil32x4 a)) ;;;; Rules for `nearest` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $F32 (nearest a))) (pulley_fnearest32 a)) (rule (lower (has_type $F64 (nearest a))) (pulley_fnearest64 a)) +(rule (lower (has_type $F32X4 (nearest a))) + (pulley_vnearest32x4 a)) +(rule (lower (has_type $F64X2 (nearest a))) + (pulley_vnearest64x2 a)) ;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $F32 (sqrt a))) (pulley_fsqrt32 a)) (rule (lower (has_type $F64 (sqrt a))) (pulley_fsqrt64 a)) +(rule (lower (has_type $F32X4 (sqrt a))) + (pulley_vsqrt32x4 a)) +(rule (lower (has_type $F64X2 (sqrt a))) + (pulley_vsqrt64x2 a)) + ;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index cd1dbd1b4e15..e4333ac8d96c 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -2956,30 +2956,122 @@ impl ExtendedOpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn vtrunc32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let mut a = self.state[src].get_f32x4(); + for elem in a.iter_mut() { + *elem = elem.wasm_trunc(); + } + self.state[dst].set_f32x4(a); + ControlFlow::Continue(()) + } + + fn vtrunc64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let mut a = self.state[src].get_f64x2(); + for elem in a.iter_mut() { + *elem = elem.wasm_trunc(); + } + self.state[dst].set_f64x2(a); + ControlFlow::Continue(()) + } + fn ffloor32(&mut self, dst: FReg, src: FReg) -> ControlFlow { let a = self.state[src].get_f32(); self.state[dst].set_f32(a.wasm_floor()); ControlFlow::Continue(()) } + fn vfloor32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let mut a = self.state[src].get_f32x4(); + for elem in a.iter_mut() { + *elem = elem.wasm_floor(); + } + self.state[dst].set_f32x4(a); + ControlFlow::Continue(()) + } + + fn vfloor64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let mut a = self.state[src].get_f64x2(); + for elem in a.iter_mut() { + *elem = elem.wasm_floor(); + } + self.state[dst].set_f64x2(a); + ControlFlow::Continue(()) + } + fn fceil32(&mut self, dst: FReg, src: FReg) -> ControlFlow { let a = self.state[src].get_f32(); self.state[dst].set_f32(a.wasm_ceil()); ControlFlow::Continue(()) } + fn vceil32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let mut a = self.state[src].get_f32x4(); + for elem in a.iter_mut() { + *elem = elem.wasm_ceil(); + } + self.state[dst].set_f32x4(a); + + ControlFlow::Continue(()) + } + + fn vceil64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let mut a = self.state[src].get_f64x2(); + for elem in a.iter_mut() { + *elem = elem.wasm_ceil(); + } + self.state[dst].set_f64x2(a); + + ControlFlow::Continue(()) + } + fn fnearest32(&mut self, dst: FReg, src: FReg) -> ControlFlow { let a = self.state[src].get_f32(); self.state[dst].set_f32(a.wasm_nearest()); ControlFlow::Continue(()) } + fn vnearest32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let mut a = self.state[src].get_f32x4(); + for elem in a.iter_mut() { + *elem = elem.wasm_nearest(); + } + self.state[dst].set_f32x4(a); + ControlFlow::Continue(()) + } + + fn vnearest64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let mut a = self.state[src].get_f64x2(); + for elem in a.iter_mut() { + *elem = elem.wasm_nearest(); + } + self.state[dst].set_f64x2(a); + ControlFlow::Continue(()) + } + fn fsqrt32(&mut self, dst: FReg, src: FReg) -> ControlFlow { let a = self.state[src].get_f32(); self.state[dst].set_f32(a.wasm_sqrt()); ControlFlow::Continue(()) } + fn vsqrt32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let mut a = self.state[src].get_f32x4(); + for elem in a.iter_mut() { + *elem = elem.wasm_sqrt(); + } + self.state[dst].set_f32x4(a); + ControlFlow::Continue(()) + } + + fn vsqrt64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let mut a = self.state[src].get_f64x2(); + for elem in a.iter_mut() { + *elem = elem.wasm_sqrt(); + } + self.state[dst].set_f64x2(a); + ControlFlow::Continue(()) + } + fn fneg32(&mut self, dst: FReg, src: FReg) -> ControlFlow { let a = self.state[src].get_f32(); self.state[dst].set_f32(-a); diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index db951407b614..9a86314045b9 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -805,14 +805,30 @@ macro_rules! for_each_extended_op { fminimum32 = Fminimum32 { operands: BinaryOperands }; /// `low32(dst) = ieee_trunc(low32(src))` ftrunc32 = Ftrunc32 { dst: FReg, src: FReg }; + /// `low128(dst) = ieee_trunc(low128(src))` + vtrunc32x4 = Vtrunc32x4 { dst: VReg, src: VReg }; + /// `low128(dst) = ieee_trunc(low128(src))` + vtrunc64x2 = Vtrunc64x2 { dst: VReg, src: VReg }; /// `low32(dst) = ieee_floor(low32(src))` ffloor32 = Ffloor32 { dst: FReg, src: FReg }; + /// `low128(dst) = ieee_floor(low128(src))` + vfloor32x4 = Vfloor32x4 { dst: VReg, src: VReg }; + /// `low128(dst) = ieee_floor(low128(src))` + vfloor64x2 = Vfloor64x2 { dst: VReg, src: VReg }; /// `low32(dst) = ieee_ceil(low32(src))` fceil32 = Fceil32 { dst: FReg, src: FReg }; + /// `low128(dst) = ieee_ceil(low128(src))` + vceil32x4 = Vceil32x4 { dst: VReg, src: VReg }; + /// `low128(dst) = ieee_ceil(low128(src))` + vceil64x2 = Vceil64x2 { dst: VReg, src: VReg }; /// `low32(dst) = ieee_nearest(low32(src))` fnearest32 = Fnearest32 { dst: FReg, src: FReg }; /// `low32(dst) = ieee_sqrt(low32(src))` fsqrt32 = Fsqrt32 { dst: FReg, src: FReg }; + /// `low32(dst) = ieee_sqrt(low32(src))` + vsqrt32x4 = Vsqrt32x4 { dst: VReg, src: VReg }; + /// `low32(dst) = ieee_sqrt(low32(src))` + vsqrt64x2 = Vsqrt64x2 { dst: VReg, src: VReg }; /// `low32(dst) = -low32(src)` fneg32 = Fneg32 { dst: FReg, src: FReg }; /// `low32(dst) = |low32(src)|` @@ -838,6 +854,10 @@ macro_rules! for_each_extended_op { fceil64 = Fceil64 { dst: FReg, src: FReg }; /// `dst = ieee_nearest(src)` fnearest64 = Fnearest64 { dst: FReg, src: FReg }; + /// `low128(dst) = ieee_nearest(low128(src))` + vnearest32x4 = Vnearest32x4 { dst: VReg, src: VReg }; + /// `low128(dst) = ieee_nearest(low128(src))` + vnearest64x2 = Vnearest64x2 { dst: VReg, src: VReg }; /// `dst = ieee_sqrt(src)` fsqrt64 = Fsqrt64 { dst: FReg, src: FReg }; /// `dst = -src`