From e05f6af6c2f2ddcdb9869774741b9abbefd48b38 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Mon, 23 Dec 2024 20:35:15 +0800 Subject: [PATCH 1/4] fcmp vector comparisons --- .../codegen/src/isa/pulley_shared/lower.isle | 27 +++ pulley/src/interp.rs | 208 ++++++++++++++++++ pulley/src/lib.rs | 33 +++ 3 files changed, 268 insertions(+) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 280bc770c228..6b52f49725ff 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -751,6 +751,33 @@ (if-let true (floatcc_unordered cc)) (pulley_xbxor32_s8 (lower_fcmp ty (floatcc_complement cc) a b) 1)) +;; vector comparisons + +(rule 1 (lower (has_type (ty_vec128 _) (fcmp cc a b @ (value_type (ty_vec128 ty))))) + (lower_vfcmp ty cc a b)) + +(decl lower_vfcmp (Type FloatCC Value Value) VReg) +(rule (lower_vfcmp $F32X4 (FloatCC.Ordered) a b) (pulley_vordf32x4 a b)) +(rule (lower_vfcmp $F32X4 (FloatCC.Unordered) a b) (pulley_vunof32x4 a b)) +(rule (lower_vfcmp $F32X4 (FloatCC.Equal) a b) (pulley_veqf32x4 a b)) +(rule (lower_vfcmp $F32X4 (FloatCC.NotEqual) a b) (pulley_vneqf32x4 a b)) +(rule (lower_vfcmp $F32X4 (FloatCC.OrderedNotEqual) a b) (pulley_vordneqf32x4 a b)) +(rule (lower_vfcmp $F32X4 (FloatCC.UnorderedOrEqual) a b) (pulley_vunoeqf32x4 a b)) +(rule (lower_vfcmp $F32X4 (FloatCC.LessThan) a b) (pulley_vltf32x4 a b)) +(rule (lower_vfcmp $F32X4 (FloatCC.LessThanOrEqual) a b) (pulley_vlteqf32x4 a b)) +(rule (lower_vfcmp $F32X4 (FloatCC.GreaterThan) a b) (pulley_vltf32x4 b a)) +(rule (lower_vfcmp $F32X4 (FloatCC.GreaterThanOrEqual) a b) (pulley_vlteqf32x4 b a)) +(rule (lower_vfcmp $F64X2 (FloatCC.Ordered) a b) (pulley_vordf64x2 a b)) +(rule (lower_vfcmp $F64X2 (FloatCC.Unordered) a b) (pulley_vunof64x2 a b)) +(rule (lower_vfcmp $F64X2 (FloatCC.Equal) a b) (pulley_veqf64x2 a b)) +(rule (lower_vfcmp $F64X2 (FloatCC.NotEqual) a b) (pulley_vneqf64x2 a b)) +(rule (lower_vfcmp $F64X2 (FloatCC.OrderedNotEqual) a b) (pulley_vordneqf64x2 a b)) +(rule (lower_vfcmp $F64X2 (FloatCC.UnorderedOrEqual) a b) (pulley_vunoeqf64x2 a b)) +(rule (lower_vfcmp $F64X2 (FloatCC.LessThan) a b) (pulley_vltf64x2 a b)) +(rule (lower_vfcmp $F64X2 (FloatCC.LessThanOrEqual) a b) (pulley_vlteqf64x2 a b)) +(rule (lower_vfcmp $F64X2 (FloatCC.GreaterThan) a b) (pulley_vltf64x2 b a)) +(rule (lower_vfcmp $F64X2 (FloatCC.GreaterThanOrEqual) a b) (pulley_vlteqf64x2 b a)) + ;;;; Rules for `load` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl amode (Value Offset32) Amode) diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index d0e999ad8c2b..7452d4d7229d 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -4207,6 +4207,214 @@ impl ExtendedOpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn vordf32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f32x4(); + let b = self.state[operands.src2].get_f32x4(); + let mut c = [0; 4]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a.is_nan() || b.is_nan() { + 0 + } else { + u32::MAX + }; + } + self.state[operands.dst].set_u32x4(c); + ControlFlow::Continue(()) + } + + fn vunof32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f32x4(); + let b = self.state[operands.src2].get_f32x4(); + let mut c = [0; 4]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a.is_nan() || b.is_nan() { + u32::MAX + } else { + 0 + }; + } + self.state[operands.dst].set_u32x4(c); + ControlFlow::Continue(()) + } + + fn veqf32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f32x4(); + let b = self.state[operands.src2].get_f32x4(); + let mut c = [0; 4]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a == b { u32::MAX } else { 0 }; + } + self.state[operands.dst].set_u32x4(c); + ControlFlow::Continue(()) + } + + fn vneqf32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f32x4(); + let b = self.state[operands.src2].get_f32x4(); + let mut c = [0; 4]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a != b { u32::MAX } else { 0 }; + } + self.state[operands.dst].set_u32x4(c); + ControlFlow::Continue(()) + } + + fn vordneqf32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f32x4(); + let b = self.state[operands.src2].get_f32x4(); + let mut c = [0; 4]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a == b || a.is_nan() || b.is_nan() { + 0 + } else { + u32::MAX + } + } + self.state[operands.dst].set_u32x4(c); + ControlFlow::Continue(()) + } + + fn vunoeqf32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f32x4(); + let b = self.state[operands.src2].get_f32x4(); + let mut c = [0; 4]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a == b || a.is_nan() || b.is_nan() { + u32::MAX + } else { + 0 + } + } + self.state[operands.dst].set_u32x4(c); + ControlFlow::Continue(()) + } + + fn vltf32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f32x4(); + let b = self.state[operands.src2].get_f32x4(); + let mut c = [0; 4]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a < b { u32::MAX } else { 0 }; + } + self.state[operands.dst].set_u32x4(c); + ControlFlow::Continue(()) + } + + fn vlteqf32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f32x4(); + let b = self.state[operands.src2].get_f32x4(); + let mut c = [0; 4]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a <= b { u32::MAX } else { 0 }; + } + self.state[operands.dst].set_u32x4(c); + ControlFlow::Continue(()) + } + + fn vordf64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f64x2(); + let b = self.state[operands.src2].get_f64x2(); + let mut c = [0; 2]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a.is_nan() || b.is_nan() { + 0 + } else { + u64::MAX + }; + } + self.state[operands.dst].set_u64x2(c); + ControlFlow::Continue(()) + } + + fn vunof64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f64x2(); + let b = self.state[operands.src2].get_f64x2(); + let mut c = [0; 2]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a.is_nan() || b.is_nan() { + u64::MAX + } else { + 0 + }; + } + self.state[operands.dst].set_u64x2(c); + ControlFlow::Continue(()) + } + + fn veqf64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f64x2(); + let b = self.state[operands.src2].get_f64x2(); + let mut c = [0; 2]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a == b { u64::MAX } else { 0 }; + } + self.state[operands.dst].set_u64x2(c); + ControlFlow::Continue(()) + } + + fn vneqf64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f64x2(); + let b = self.state[operands.src2].get_f64x2(); + let mut c = [0; 2]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a != b { u64::MAX } else { 0 }; + } + self.state[operands.dst].set_u64x2(c); + ControlFlow::Continue(()) + } + + fn vordneqf64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f64x2(); + let b = self.state[operands.src2].get_f64x2(); + let mut c = [0; 2]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a == b || a.is_nan() || b.is_nan() { + 0 + } else { + u64::MAX + } + } + self.state[operands.dst].set_u64x2(c); + ControlFlow::Continue(()) + } + + fn vunoeqf64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f64x2(); + let b = self.state[operands.src2].get_f64x2(); + let mut c = [0; 2]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a == b || a.is_nan() || b.is_nan() { + u64::MAX + } else { + 0 + } + } + self.state[operands.dst].set_u64x2(c); + ControlFlow::Continue(()) + } + + fn vltf64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f64x2(); + let b = self.state[operands.src2].get_f64x2(); + let mut c = [0; 2]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a < b { u64::MAX } else { 0 }; + } + self.state[operands.dst].set_u64x2(c); + ControlFlow::Continue(()) + } + + fn vlteqf64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f64x2(); + let b = self.state[operands.src2].get_f64x2(); + let mut c = [0; 2]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a <= b { u64::MAX } else { 0 }; + } + self.state[operands.dst].set_u64x2(c); + ControlFlow::Continue(()) + } + fn vneg8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow { let a = self.state[src].get_i8x16(); self.state[dst].set_i8x16(a.map(|i| i.wrapping_neg())); diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index f83a24606e8d..d8336be2c9cf 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -1144,6 +1144,39 @@ macro_rules! for_each_extended_op { /// `dst = src <= dst` (unsigned) vulteq64x2 = Vulteq64x2 { operands: BinaryOperands }; + /// `dst = !src1.is_nan() && !src2.is_nan()` + vordf32x4 = Vordf32x4 { operands: BinaryOperands }; + /// `dst = src1.is_nan() || src2.is_nan()` + vunof32x4 = Vunof32x4 { operands: BinaryOperands }; + /// `dst = src1 == src2` + veqf32x4 = Veqf32x4 { operands: BinaryOperands }; + /// `dst = src1 != src2` + vneqf32x4 = Vneqf32x4 { operands: BinaryOperands }; + /// `dst = !src1.is_nan() && !src2.is_nan() && src1 != src2` + vordneqf32x4 = Vordneqf32x4 { operands: BinaryOperands }; + /// `dst = src1.is_nan() || src2.is_nan() || src1 == src2` + vunoeqf32x4 = Vunoeqf32x4 { operands: BinaryOperands }; + /// `dst = src1 < src2` + vltf32x4 = Vltf32x4 { operands: BinaryOperands }; + /// `dst = src1 <= src2` + vlteqf32x4 = Vlteqf32x4 { operands: BinaryOperands }; + /// `dst = !src1.is_nan() && !src2.is_nan()` + vordf64x2 = Vordf64x2 { operands: BinaryOperands }; + /// `dst = src1.is_nan() || src2.is_nan()` + vunof64x2 = Vunof64x2 { operands: BinaryOperands }; + /// `dst = src1 == src2` + veqf64x2 = Veqf64x2 { operands: BinaryOperands }; + /// `dst = src1 != src2` + vneqf64x2 = Vneqf64x2 { operands: BinaryOperands }; + /// `dst = !src1.is_nan() && !src2.is_nan() && src1 != src2` + vordneqf64x2 = Vordneqf64x2 { operands: BinaryOperands }; + /// `dst = src1.is_nan() || src2.is_nan() || src1 == src2` + vunoeqf64x2 = Vunoeqf64x2 { operands: BinaryOperands }; + /// `dst = src1 < src2` + vltf64x2 = Vltf64x2 { operands: BinaryOperands }; + /// `dst = src1 <= src2` + vlteqf64x2 = Vlteqf64x2 { operands: BinaryOperands }; + /// `dst = -src` vneg8x16 = Vneg8x16 { dst: VReg, src: VReg }; /// `dst = -src` From 7851d5ee06dd4566a7dd3733f8858f1466ddf1c7 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Mon, 23 Dec 2024 22:31:35 +0800 Subject: [PATCH 2/4] reenable canon-nan tests --- crates/wast-util/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index c700cf6aea2d..694814806c4a 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -401,7 +401,6 @@ impl WastTest { // features in Pulley are implemented. if config.compiler == Compiler::CraneliftPulley { let unsupported = [ - "misc_testsuite/simd/canonicalize-nan.wast", "misc_testsuite/simd/issue_3327_bnot_lowering.wast", "misc_testsuite/simd/v128-select.wast", "spec_testsuite/proposals/annotations/simd_lane.wast", From 35be5cb68597991f01a2aec958cbf37087c1d3f8 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Tue, 24 Dec 2024 14:53:11 +0800 Subject: [PATCH 3/4] unflag more tests --- crates/wast-util/src/lib.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index 694814806c4a..598f81274b26 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -416,11 +416,7 @@ impl WastTest { "spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast", "spec_testsuite/proposals/memory64/i8x16_relaxed_swizzle.wast", "spec_testsuite/simd_f32x4_arith.wast", - "spec_testsuite/simd_f32x4_cmp.wast", - "spec_testsuite/simd_f32x4_pmin_pmax.wast", "spec_testsuite/simd_f64x2_arith.wast", - "spec_testsuite/simd_f64x2_cmp.wast", - "spec_testsuite/simd_f64x2_pmin_pmax.wast", "spec_testsuite/simd_i16x8_arith2.wast", "spec_testsuite/simd_i16x8_extadd_pairwise_i8x16.wast", "spec_testsuite/simd_i16x8_q15mulr_sat_s.wast", From 767feaf026902a234683559afd5ae4d3a445021e Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Tue, 24 Dec 2024 20:12:24 +0800 Subject: [PATCH 4/4] rev instead of negate --- pulley/src/interp.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 7452d4d7229d..950009bb7232 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -4253,7 +4253,7 @@ impl ExtendedOpVisitor for Interpreter<'_> { let b = self.state[operands.src2].get_f32x4(); let mut c = [0; 4]; for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { - *c = if a != b { u32::MAX } else { 0 }; + *c = if a == b { 0 } else { u32::MAX }; } self.state[operands.dst].set_u32x4(c); ControlFlow::Continue(()) @@ -4357,7 +4357,7 @@ impl ExtendedOpVisitor for Interpreter<'_> { let b = self.state[operands.src2].get_f64x2(); let mut c = [0; 2]; for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { - *c = if a != b { u64::MAX } else { 0 }; + *c = if a == b { 0 } else { u64::MAX }; } self.state[operands.dst].set_u64x2(c); ControlFlow::Continue(())