From 8c321f7a9557e945ab4338ef33ab10d2c169a7f2 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 11 Dec 2024 11:59:47 -0700 Subject: [PATCH] pulley: Get `block.wast` test suite passing (#9790) Filling out float compares and some miscellaneous bit-related integer instructions. --- .../codegen/src/isa/pulley_shared/lower.isle | 36 +++++++++ crates/wast-util/src/lib.rs | 3 - pulley/src/interp.rs | 80 +++++++++++++++++++ pulley/src/lib.rs | 27 +++++++ 4 files changed, 143 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 621a6cc066f2..144915384cd2 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -236,6 +236,16 @@ (rule 1 (lower (has_type $I64 (bor a b))) (pulley_xor64 a b)) +;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (ctz a))) (pulley_xctz32 a)) +(rule (lower (has_type $I64 (ctz a))) (pulley_xctz64 a)) + +;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I32 (clz a))) (pulley_xclz32 a)) +(rule (lower (has_type $I64 (clz a))) (pulley_xclz64 a)) + ;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 1 (lower (icmp cc a b @ (value_type $I64))) @@ -292,6 +302,32 @@ (rule (lower_icmp ty (IntCC.UnsignedGreaterThanOrEqual) a b) (lower_icmp ty (IntCC.UnsignedLessThanOrEqual) b a)) +;;;; Rules for `fcmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (fcmp cc a b @ (value_type (ty_scalar_float ty)))) + (lower_fcmp ty cc a b)) + +(decl lower_fcmp (Type FloatCC Value Value) XReg) + +(rule (lower_fcmp $F32 (FloatCC.Equal) a b) (pulley_feq32 a b)) +(rule (lower_fcmp $F64 (FloatCC.Equal) a b) (pulley_feq64 a b)) +(rule (lower_fcmp $F32 (FloatCC.NotEqual) a b) (pulley_fneq32 a b)) +(rule (lower_fcmp $F64 (FloatCC.NotEqual) a b) (pulley_fneq64 a b)) +(rule (lower_fcmp $F32 (FloatCC.LessThan) a b) (pulley_flt32 a b)) +(rule (lower_fcmp $F64 (FloatCC.LessThan) a b) (pulley_flt64 a b)) +(rule (lower_fcmp $F32 (FloatCC.LessThanOrEqual) a b) (pulley_flt32 a b)) +(rule (lower_fcmp $F64 (FloatCC.LessThanOrEqual) a b) (pulley_flt64 a b)) + +;; NB: Pulley doesn't have lowerings for `Ordered` or `Unordered` `FloatCC` +;; conditions as that's not needed by wasm at this time. + +;; Pulley doesn't have instructions for `>` and `>=`, so we have to reverse the +;; operation. +(rule (lower_fcmp ty (FloatCC.GreaterThan) a b) + (lower_fcmp ty (FloatCC.LessThan) b a)) +(rule (lower_fcmp ty (FloatCC.GreaterThanOrEqual) a b) + (lower_fcmp ty (FloatCC.LessThanOrEqual) b a)) + ;;;; Rules for `load` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl amode (Value Offset32) Amode) diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index 16c4c3863256..262dde6e8872 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -454,13 +454,11 @@ impl WastTest { "misc_testsuite/winch/_simd_load.wast", "misc_testsuite/winch/_simd_multivalue.wast", "misc_testsuite/winch/_simd_store.wast", - "misc_testsuite/winch/float-comparison.wast", "misc_testsuite/winch/global.wast", "misc_testsuite/winch/select.wast", "misc_testsuite/winch/table_fill.wast", "misc_testsuite/winch/table_get.wast", "misc_testsuite/winch/table_set.wast", - "spec_testsuite/block.wast", "spec_testsuite/br_if.wast", "spec_testsuite/bulk.wast", "spec_testsuite/call.wast", @@ -495,7 +493,6 @@ impl WastTest { "spec_testsuite/loop.wast", "spec_testsuite/memory.wast", "spec_testsuite/memory_grow.wast", - "spec_testsuite/nop.wast", "spec_testsuite/proposals/annotations/simd_lane.wast", "spec_testsuite/proposals/extended-const/elem.wast", "spec_testsuite/proposals/extended-const/global.wast", diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 061800853ac1..4a667dfef1ef 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -1609,6 +1609,86 @@ impl OpVisitor for Interpreter<'_> { self.state[dst].set_f64(f64::from_bits(bits)); ControlFlow::Continue(()) } + + fn feq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f32(); + let b = self.state[src2].get_f32(); + self.state[dst].set_u32(u32::from(a == b)); + ControlFlow::Continue(()) + } + + fn fneq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f32(); + let b = self.state[src2].get_f32(); + self.state[dst].set_u32(u32::from(a != b)); + ControlFlow::Continue(()) + } + + fn flt32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f32(); + let b = self.state[src2].get_f32(); + self.state[dst].set_u32(u32::from(a < b)); + ControlFlow::Continue(()) + } + + fn flteq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f32(); + let b = self.state[src2].get_f32(); + self.state[dst].set_u32(u32::from(a <= b)); + ControlFlow::Continue(()) + } + + fn feq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f64(); + let b = self.state[src2].get_f64(); + self.state[dst].set_u32(u32::from(a == b)); + ControlFlow::Continue(()) + } + + fn fneq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f64(); + let b = self.state[src2].get_f64(); + self.state[dst].set_u32(u32::from(a != b)); + ControlFlow::Continue(()) + } + + fn flt64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f64(); + let b = self.state[src2].get_f64(); + self.state[dst].set_u32(u32::from(a < b)); + ControlFlow::Continue(()) + } + + fn flteq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow { + let a = self.state[src1].get_f64(); + let b = self.state[src2].get_f64(); + self.state[dst].set_u32(u32::from(a <= b)); + ControlFlow::Continue(()) + } + + fn xctz32(&mut self, dst: XReg, src: XReg) -> ControlFlow { + let a = self.state[src].get_u32(); + self.state[dst].set_u32(a.trailing_zeros()); + ControlFlow::Continue(()) + } + + fn xctz64(&mut self, dst: XReg, src: XReg) -> ControlFlow { + let a = self.state[src].get_u64(); + self.state[dst].set_u64(a.trailing_zeros().into()); + ControlFlow::Continue(()) + } + + fn xclz32(&mut self, dst: XReg, src: XReg) -> ControlFlow { + let a = self.state[src].get_u32(); + self.state[dst].set_u32(a.leading_zeros()); + ControlFlow::Continue(()) + } + + fn xclz64(&mut self, dst: XReg, src: XReg) -> ControlFlow { + let a = self.state[src].get_u64(); + self.state[dst].set_u64(a.leading_zeros().into()); + ControlFlow::Continue(()) + } } impl ExtendedOpVisitor for Interpreter<'_> { diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index a345f480806f..cb805af65460 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -184,6 +184,16 @@ macro_rules! for_each_op { /// 64-bit wrapping subtraction: `dst = src1 - src2`. xsub64 = Xsub64 { operands: BinaryOperands }; + /// `low32(dst) = trailing_zeros(low32(src))` + xctz32 = Xctz32 { dst: XReg, src: XReg }; + /// `dst = trailing_zeros(src)` + xctz64 = Xctz64 { dst: XReg, src: XReg }; + + /// `low32(dst) = leading_zeros(low32(src))` + xclz32 = Xclz32 { dst: XReg, src: XReg }; + /// `dst = leading_zeros(src)` + xclz64 = Xclz64 { dst: XReg, src: XReg }; + /// `low32(dst) = low32(src1) << low5(src2)` xshl32 = Xshl32 { operands: BinaryOperands }; /// `low32(dst) = low32(src1) >> low5(src2)` @@ -338,6 +348,23 @@ macro_rules! for_each_op { fconst32 = FConst32 { dst: FReg, bits: u32 }; /// `dst = bits` fconst64 = FConst64 { dst: FReg, bits: u64 }; + + /// `low32(dst) = zext(src1 == src2)` + feq32 = Feq32 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 != src2)` + fneq32 = Fneq32 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 < src2)` + flt32 = Flt32 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 <= src2)` + flteq32 = Flteq32 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 == src2)` + feq64 = Feq64 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 != src2)` + fneq64 = Fneq64 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 < src2)` + flt64 = Flt64 { dst: XReg, src1: FReg, src2: FReg }; + /// `low32(dst) = zext(src1 <= src2)` + flteq64 = Flteq64 { dst: XReg, src1: FReg, src2: FReg }; } }; }