Skip to content

Commit 7fded6a

Browse files
committed
init
1 parent a179f95 commit 7fded6a

File tree

3 files changed

+131
-1
lines changed

3 files changed

+131
-1
lines changed

cranelift/codegen/src/isa/pulley_shared/lower.isle

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1131,26 +1131,44 @@
11311131

11321132
(rule (lower (has_type $F32 (trunc a))) (pulley_ftrunc32 a))
11331133
(rule (lower (has_type $F64 (trunc a))) (pulley_ftrunc64 a))
1134+
(rule (lower (has_type $F32X4 (trunc a))) (pulley_vtrunc32x4 a))
1135+
(rule (lower (has_type $F64X2 (trunc a))) (pulley_vtrunc64x2 a))
11341136

11351137
;;;; Rules for `floor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11361138

11371139
(rule (lower (has_type $F32 (floor a))) (pulley_ffloor32 a))
11381140
(rule (lower (has_type $F64 (floor a))) (pulley_ffloor64 a))
1139-
1141+
(rule (lower (has_type $F32X4 (floor a)))
1142+
(pulley_vfloor32x4 a))
1143+
(rule (lower (has_type $F64X2 (floor a)))
1144+
(pulley_vfloor64x2 a))
11401145
;;;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11411146

11421147
(rule (lower (has_type $F32 (ceil a))) (pulley_fceil32 a))
11431148
(rule (lower (has_type $F64 (ceil a))) (pulley_fceil64 a))
1149+
(rule (lower (has_type $F64X2 (ceil a)))
1150+
(pulley_vceil64x2 a))
1151+
(rule (lower (has_type $F32X4 (ceil a)))
1152+
(pulley_vceil32x4 a))
11441153

11451154
;;;; Rules for `nearest` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11461155

11471156
(rule (lower (has_type $F32 (nearest a))) (pulley_fnearest32 a))
11481157
(rule (lower (has_type $F64 (nearest a))) (pulley_fnearest64 a))
1158+
(rule (lower (has_type $F32X4 (nearest a)))
1159+
(pulley_vnearest32x4 a))
1160+
(rule (lower (has_type $F64X2 (nearest a)))
1161+
(pulley_vnearest64x2 a))
11491162

11501163
;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11511164

11521165
(rule (lower (has_type $F32 (sqrt a))) (pulley_fsqrt32 a))
11531166
(rule (lower (has_type $F64 (sqrt a))) (pulley_fsqrt64 a))
1167+
(rule (lower (has_type $F32X4 (sqrt a)))
1168+
(pulley_vsqrt32x4 a))
1169+
(rule (lower (has_type $F64X2 (sqrt a)))
1170+
(pulley_vsqrt64x2 a))
1171+
11541172

11551173
;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11561174

pulley/src/interp.rs

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2956,30 +2956,122 @@ impl ExtendedOpVisitor for Interpreter<'_> {
29562956
ControlFlow::Continue(())
29572957
}
29582958

2959+
fn vtrunc32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
2960+
let mut a = self.state[src].get_f32x4();
2961+
for elem in a.iter_mut() {
2962+
*elem = elem.wasm_trunc();
2963+
}
2964+
self.state[dst].set_f32x4(a);
2965+
ControlFlow::Continue(())
2966+
}
2967+
2968+
fn vtrunc64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
2969+
let mut a = self.state[src].get_f64x2();
2970+
for elem in a.iter_mut() {
2971+
*elem = elem.wasm_trunc();
2972+
}
2973+
self.state[dst].set_f64x2(a);
2974+
ControlFlow::Continue(())
2975+
}
2976+
29592977
fn ffloor32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
29602978
let a = self.state[src].get_f32();
29612979
self.state[dst].set_f32(a.wasm_floor());
29622980
ControlFlow::Continue(())
29632981
}
29642982

2983+
fn vfloor32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
2984+
let mut a = self.state[src].get_f32x4();
2985+
for elem in a.iter_mut() {
2986+
*elem = elem.wasm_floor();
2987+
}
2988+
self.state[dst].set_f32x4(a);
2989+
ControlFlow::Continue(())
2990+
}
2991+
2992+
fn vfloor64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
2993+
let mut a = self.state[src].get_f64x2();
2994+
for elem in a.iter_mut() {
2995+
*elem = elem.wasm_floor();
2996+
}
2997+
self.state[dst].set_f64x2(a);
2998+
ControlFlow::Continue(())
2999+
}
3000+
29653001
fn fceil32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
29663002
let a = self.state[src].get_f32();
29673003
self.state[dst].set_f32(a.wasm_ceil());
29683004
ControlFlow::Continue(())
29693005
}
29703006

3007+
fn vceil32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3008+
let mut a = self.state[src].get_f32x4();
3009+
for elem in a.iter_mut() {
3010+
*elem = elem.wasm_ceil();
3011+
}
3012+
self.state[dst].set_f32x4(a);
3013+
3014+
ControlFlow::Continue(())
3015+
}
3016+
3017+
fn vceil64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3018+
let mut a = self.state[src].get_f64x2();
3019+
for elem in a.iter_mut() {
3020+
*elem = elem.wasm_ceil();
3021+
}
3022+
self.state[dst].set_f64x2(a);
3023+
3024+
ControlFlow::Continue(())
3025+
}
3026+
29713027
fn fnearest32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
29723028
let a = self.state[src].get_f32();
29733029
self.state[dst].set_f32(a.wasm_nearest());
29743030
ControlFlow::Continue(())
29753031
}
29763032

3033+
fn vnearest32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3034+
let mut a = self.state[src].get_f32x4();
3035+
for elem in a.iter_mut() {
3036+
*elem = elem.wasm_nearest();
3037+
}
3038+
self.state[dst].set_f32x4(a);
3039+
ControlFlow::Continue(())
3040+
}
3041+
3042+
fn vnearest64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3043+
let mut a = self.state[src].get_f64x2();
3044+
for elem in a.iter_mut() {
3045+
*elem = elem.wasm_nearest();
3046+
}
3047+
self.state[dst].set_f64x2(a);
3048+
ControlFlow::Continue(())
3049+
}
3050+
29773051
fn fsqrt32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
29783052
let a = self.state[src].get_f32();
29793053
self.state[dst].set_f32(a.wasm_sqrt());
29803054
ControlFlow::Continue(())
29813055
}
29823056

3057+
fn vsqrt32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3058+
let mut a = self.state[src].get_f32x4();
3059+
for elem in a.iter_mut() {
3060+
*elem = elem.wasm_sqrt();
3061+
}
3062+
self.state[dst].set_f32x4(a);
3063+
ControlFlow::Continue(())
3064+
}
3065+
3066+
fn vsqrt64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3067+
let mut a = self.state[src].get_f64x2();
3068+
for elem in a.iter_mut() {
3069+
*elem = elem.wasm_sqrt();
3070+
}
3071+
self.state[dst].set_f64x2(a);
3072+
ControlFlow::Continue(())
3073+
}
3074+
29833075
fn fneg32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
29843076
let a = self.state[src].get_f32();
29853077
self.state[dst].set_f32(-a);

pulley/src/lib.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -805,14 +805,30 @@ macro_rules! for_each_extended_op {
805805
fminimum32 = Fminimum32 { operands: BinaryOperands<FReg> };
806806
/// `low32(dst) = ieee_trunc(low32(src))`
807807
ftrunc32 = Ftrunc32 { dst: FReg, src: FReg };
808+
/// `low128(dst) = ieee_trunc(low128(src))`
809+
vtrunc32x4 = Vtrunc32x4 { dst: VReg, src: VReg };
810+
/// `low128(dst) = ieee_trunc(low128(src))`
811+
vtrunc64x2 = Vtrunc64x2 { dst: VReg, src: VReg };
808812
/// `low32(dst) = ieee_floor(low32(src))`
809813
ffloor32 = Ffloor32 { dst: FReg, src: FReg };
814+
/// `low128(dst) = ieee_floor(low128(src))`
815+
vfloor32x4 = Vfloor32x4 { dst: VReg, src: VReg };
816+
/// `low128(dst) = ieee_floor(low128(src))`
817+
vfloor64x2 = Vfloor64x2 { dst: VReg, src: VReg };
810818
/// `low32(dst) = ieee_ceil(low32(src))`
811819
fceil32 = Fceil32 { dst: FReg, src: FReg };
820+
/// `low128(dst) = ieee_ceil(low128(src))`
821+
vceil32x4 = Vceil32x4 { dst: VReg, src: VReg };
822+
/// `low128(dst) = ieee_ceil(low128(src))`
823+
vceil64x2 = Vceil64x2 { dst: VReg, src: VReg };
812824
/// `low32(dst) = ieee_nearest(low32(src))`
813825
fnearest32 = Fnearest32 { dst: FReg, src: FReg };
814826
/// `low32(dst) = ieee_sqrt(low32(src))`
815827
fsqrt32 = Fsqrt32 { dst: FReg, src: FReg };
828+
/// `low32(dst) = ieee_sqrt(low32(src))`
829+
vsqrt32x4 = Vsqrt32x4 { dst: VReg, src: VReg };
830+
/// `low32(dst) = ieee_sqrt(low32(src))`
831+
vsqrt64x2 = Vsqrt64x2 { dst: VReg, src: VReg };
816832
/// `low32(dst) = -low32(src)`
817833
fneg32 = Fneg32 { dst: FReg, src: FReg };
818834
/// `low32(dst) = |low32(src)|`
@@ -838,6 +854,10 @@ macro_rules! for_each_extended_op {
838854
fceil64 = Fceil64 { dst: FReg, src: FReg };
839855
/// `dst = ieee_nearest(src)`
840856
fnearest64 = Fnearest64 { dst: FReg, src: FReg };
857+
/// `low128(dst) = ieee_nearest(low128(src))`
858+
vnearest32x4 = Vnearest32x4 { dst: VReg, src: VReg };
859+
/// `low128(dst) = ieee_nearest(low128(src))`
860+
vnearest64x2 = Vnearest64x2 { dst: VReg, src: VReg };
841861
/// `dst = ieee_sqrt(src)`
842862
fsqrt64 = Fsqrt64 { dst: FReg, src: FReg };
843863
/// `dst = -src`

0 commit comments

Comments
 (0)