Skip to content

Commit

Permalink
pulley: Fill out most remaining simd float ops
Browse files Browse the repository at this point in the history
Get most simd/float-related tests passing. Mostly reusing preexisting
scalar ops for the simd implementation.
  • Loading branch information
alexcrichton committed Dec 20, 2024
1 parent 058d751 commit 2ecf13e
Show file tree
Hide file tree
Showing 21 changed files with 356 additions and 34 deletions.
44 changes: 43 additions & 1 deletion cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,9 @@
(rule (lower (fcmp cc a b @ (value_type (ty_scalar_float ty))))
(lower_fcmp ty cc a b))

(rule 1 (lower (fcmp cc a b @ (value_type (ty_vec128 ty))))
(lower_vfcmp ty cc a b))

(decl lower_fcmp (Type FloatCC Value Value) XReg)

(rule (lower_fcmp $F32 (FloatCC.Equal) a b) (pulley_feq32 a b))
Expand Down Expand Up @@ -751,6 +754,32 @@
(if-let true (floatcc_unordered cc))
(pulley_xbxor32_s8 (lower_fcmp ty (floatcc_complement cc) a b) 1))

(decl lower_vfcmp (Type FloatCC Value Value) VReg)

(rule (lower_vfcmp $F32X4 (FloatCC.Equal) a b) (pulley_veqf32x4 a b))
(rule (lower_vfcmp $F64X2 (FloatCC.Equal) a b) (pulley_veqf64x2 a b))
(rule (lower_vfcmp $F32X4 (FloatCC.NotEqual) a b) (pulley_vneqf32x4 a b))
(rule (lower_vfcmp $F64X2 (FloatCC.NotEqual) a b) (pulley_vneqf64x2 a b))
(rule (lower_vfcmp $F32X4 (FloatCC.LessThan) a b) (pulley_vltf32x4 a b))
(rule (lower_vfcmp $F64X2 (FloatCC.LessThan) a b) (pulley_vltf64x2 a b))
(rule (lower_vfcmp $F32X4 (FloatCC.LessThanOrEqual) a b) (pulley_vlteqf32x4 a b))
(rule (lower_vfcmp $F64X2 (FloatCC.LessThanOrEqual) a b) (pulley_vlteqf64x2 a b))

(rule (lower_vfcmp ty (FloatCC.Unordered) a b)
(pulley_vbor128
(lower_vfcmp ty (FloatCC.NotEqual) a a)
(lower_vfcmp ty (FloatCC.NotEqual) b b)))

;; NB: Pulley doesn't have lowerings for `Ordered` or `Unordered*` `FloatCC`
;; conditions as that's not needed by wasm at this time.

;; Pulley doesn't have instructions for `>` and `>=`, so we have to reverse the
;; operation.
(rule (lower_vfcmp ty (FloatCC.GreaterThan) a b)
(lower_vfcmp ty (FloatCC.LessThan) b a))
(rule (lower_vfcmp ty (FloatCC.GreaterThanOrEqual) a b)
(lower_vfcmp ty (FloatCC.LessThanOrEqual) b a))

;;;; Rules for `load` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl amode (Value Offset32) Amode)
Expand Down Expand Up @@ -1121,16 +1150,22 @@

(rule (lower (has_type $F32 (fsub a b))) (pulley_fsub32 a b))
(rule (lower (has_type $F64 (fsub a b))) (pulley_fsub64 a b))
(rule (lower (has_type $F32X4 (fsub a b))) (pulley_vsubf32x4 a b))
(rule (lower (has_type $F64X2 (fsub a b))) (pulley_vsubf64x2 a b))

;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $F32 (fmul a b))) (pulley_fmul32 a b))
(rule (lower (has_type $F64 (fmul a b))) (pulley_fmul64 a b))
(rule (lower (has_type $F32X4 (fmul a b))) (pulley_vmulf32x4 a b))
(rule (lower (has_type $F64X2 (fmul a b))) (pulley_vmulf64x2 a b))

;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $F32 (fdiv a b))) (pulley_fdiv32 a b))
(rule (lower (has_type $F64 (fdiv a b))) (pulley_fdiv64 a b))
(rule (lower (has_type $F32X4 (fdiv a b))) (pulley_vdivf32x4 a b))
(rule (lower (has_type $F64X2 (fdiv a b))) (pulley_vdivf64x2 a b))

;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -1161,6 +1196,7 @@
(pulley_vfloor32x4 a))
(rule (lower (has_type $F64X2 (floor a)))
(pulley_vfloor64x2 a))

;;;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $F32 (ceil a))) (pulley_fceil32 a))
Expand Down Expand Up @@ -1188,11 +1224,12 @@
(rule (lower (has_type $F64X2 (sqrt a)))
(pulley_vsqrt64x2 a))


;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $F32 (fneg a))) (pulley_fneg32 a))
(rule (lower (has_type $F64 (fneg a))) (pulley_fneg64 a))
(rule (lower (has_type $F32X4 (fneg a))) (pulley_vnegf32x4 a))
(rule (lower (has_type $F64X2 (fneg a))) (pulley_vnegf64x2 a))

;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -1358,3 +1395,8 @@
(pulley_vinsertf32 (pulley_vconst128 0) a 0))
(rule (lower (scalar_to_vector a @ (value_type $F64)))
(pulley_vinsertf64 (pulley_vconst128 0) a 0))

;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $F32X4 (fma a b c))) (pulley_vfma32x4 a b c))
(rule (lower (has_type $F64X2 (fma a b c))) (pulley_vfma64x2 a b c))
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fadd-splat.clif
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %splat_f32x4_2(f32x4) -> f32x4 {
block0(v0: f32x4):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fadd.clif
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %fadd_f32x4(f32x4, f32x4) -> f32x4 {
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fcmp-eq.clif
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %simd_fcmp_eq_f32(f32x4, f32x4) -> i32x4 {
block0(v0: f32x4, v1: f32x4):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fcmp-ge.clif
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %simd_fcmp_ge_f32(f32x4, f32x4) -> i32x4 {
block0(v0: f32x4, v1: f32x4):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fcmp-gt.clif
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %simd_fcmp_gt_f32(f32x4, f32x4) -> i32x4 {
block0(v0: f32x4, v1: f32x4):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fcmp-le.clif
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %simd_fcmp_le_f32(f32x4, f32x4) -> i32x4 {
block0(v0: f32x4, v1: f32x4):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fcmp-lt.clif
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %simd_fcmp_lt_f32(f32x4, f32x4) -> i32x4 {
block0(v0: f32x4, v1: f32x4):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fcmp-ne.clif
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %simd_fcmp_ne_f32(f32x4, f32x4) -> i32x4 {
block0(v0: f32x4, v1: f32x4):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fcmp-uno.clif
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %simd_fcmp_uno_f32(f32x4, f32x4) -> i32x4 {
block0(v0: f32x4, v1: f32x4):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fdiv.clif
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %fdiv_f32x4(f32x4, f32x4) -> f32x4 {
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-floor.clif
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ target s390x
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %floor_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fma-neg.clif
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ target aarch64
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

;; This file is not enabled in the interpreter since SIMD fneg is currently broken
;; there.
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fma.clif
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ target aarch64
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %fma_f32x4(f32x4, f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4, v2: f32x4):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ target x86_64 skylake
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %fmin_pseudo_f32x4(f32x4, f32x4) -> f32x4 {
block0(v0:f32x4, v1:f32x4):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fmul.clif
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %fmul_f32x4(f32x4, f32x4) -> f32x4 {
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fneg.clif
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %fneg_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fsub.clif
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be


function %fsub_f32x4(f32x4, f32x4) -> f32x4 {
Expand Down
9 changes: 0 additions & 9 deletions crates/wast-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,27 +401,18 @@ impl WastTest {
// features in Pulley are implemented.
if config.compiler == Compiler::CraneliftPulley {
let unsupported = [
"misc_testsuite/simd/canonicalize-nan.wast",
"misc_testsuite/simd/issue_3327_bnot_lowering.wast",
"misc_testsuite/simd/v128-select.wast",
"spec_testsuite/proposals/annotations/simd_lane.wast",
"spec_testsuite/proposals/relaxed-simd/i16x8_relaxed_q15mulr_s.wast",
"spec_testsuite/proposals/relaxed-simd/i32x4_relaxed_trunc.wast",
"spec_testsuite/proposals/relaxed-simd/i8x16_relaxed_swizzle.wast",
"spec_testsuite/proposals/relaxed-simd/relaxed_dot_product.wast",
"spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast",
"spec_testsuite/proposals/memory64/simd_lane.wast",
"spec_testsuite/proposals/memory64/relaxed_madd_nmadd.wast",
"spec_testsuite/proposals/memory64/relaxed_dot_product.wast",
"spec_testsuite/proposals/memory64/i16x8_relaxed_q15mulr_s.wast",
"spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast",
"spec_testsuite/proposals/memory64/i8x16_relaxed_swizzle.wast",
"spec_testsuite/simd_f32x4_arith.wast",
"spec_testsuite/simd_f32x4_cmp.wast",
"spec_testsuite/simd_f32x4_pmin_pmax.wast",
"spec_testsuite/simd_f64x2_arith.wast",
"spec_testsuite/simd_f64x2_cmp.wast",
"spec_testsuite/simd_f64x2_pmin_pmax.wast",
"spec_testsuite/simd_i16x8_arith2.wast",
"spec_testsuite/simd_i16x8_extadd_pairwise_i8x16.wast",
"spec_testsuite/simd_i16x8_q15mulr_sat_s.wast",
Expand Down
Loading

0 comments on commit 2ecf13e

Please sign in to comment.