diff --git a/crates/std_float/benches/bench_libm.rs b/crates/std_float/benches/bench_libm.rs
new file mode 100644
index 00000000000..5e3ceeaffdb
--- /dev/null
+++ b/crates/std_float/benches/bench_libm.rs
@@ -0,0 +1,177 @@
+#![feature(test)]
+#![feature(portable_simd)]
+#![feature(concat_idents)]
+
+extern crate test;
+use std_float::StdLibm;
+
+use test::{black_box, Bencher};
+
+use core_simd::{f32x16, f32x4, f64x4, f64x8};
+
+const N: usize = 1024;
+
+fn init_f32x4() -> Vec<f32x4> {
+    vec![f32x4::splat(black_box(0.5)); N / 4]
+}
+
+fn init_f32x16() -> Vec<f32x16> {
+    vec![f32x16::splat(black_box(0.5)); N / 16]
+}
+
+fn init_f32() -> Vec<f32> {
+    vec![black_box(0.5); N]
+}
+
+fn init_f64x4() -> Vec<f64x4> {
+    vec![f64x4::splat(black_box(0.5)); N / 4]
+}
+
+fn init_f64x8() -> Vec<f64x8> {
+    vec![f64x8::splat(black_box(0.5)); N / 8]
+}
+
+fn init_f64() -> Vec<f64> {
+    vec![black_box(1.0); N]
+}
+
+// These fuctions are not inlined to make it easier to check the asm.
+//
+// Build with:
+//
+// RUSTFLAGS="-C target-cpu=native --emit asm" cargo bench
+
+macro_rules! benchmark_libm {
+    (
+        functions ($(
+            $names : ident,
+            $functions : expr,
+            $init : expr
+        )*)
+    ) => {
+
+        $(
+            #[bench]
+            #[inline(never)]
+            fn $names(b: &mut Bencher) {
+                let x = $init;
+                let mut y = $init;
+                b.iter(|| {
+                    for (x, y) in x.iter().zip(y.iter_mut()) {
+                        *y = ($functions)(*x);
+                    }
+                })
+            }
+        )*
+    }
+}
+
+benchmark_libm! {
+    functions (
+        sin_f32x4,  |x : f32x4| x.sin(),  init_f32x4()
+        sin_f32x16, |x : f32x16| x.sin(), init_f32x16()
+        sin_f32,    |x : f32| x.sin(),    init_f32()
+        sin_f64x4,  |x : f64x4| x.sin(),  init_f64x4()
+        sin_f64x8,  |x : f64x8| x.sin(),  init_f64x8()
+        sin_f64,    |x : f64| x.sin(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        cos_f32x4,  |x : f32x4| x.cos(),  init_f32x4()
+        cos_f32x16, |x : f32x16| x.cos(), init_f32x16()
+        cos_f32,    |x : f32| x.cos(),    init_f32()
+        cos_f64x4,  |x : f64x4| x.cos(),  init_f64x4()
+        cos_f64x8,  |x : f64x8| x.cos(),  init_f64x8()
+        cos_f64,    |x : f64| x.cos(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        tan_f32x4,  |x : f32x4| x.tan(),  init_f32x4()
+        tan_f32x16, |x : f32x16| x.tan(), init_f32x16()
+        tan_f32,    |x : f32| x.tan(),    init_f32()
+        tan_f64x4,  |x : f64x4| x.tan(),  init_f64x4()
+        tan_f64x8,  |x : f64x8| x.tan(),  init_f64x8()
+        tan_f64,    |x : f64| x.tan(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        asin_f32x4,  |x : f32x4| x.asin(),  init_f32x4()
+        asin_f32x16, |x : f32x16| x.asin(), init_f32x16()
+        asin_f32,    |x : f32| x.asin(),    init_f32()
+        asin_f64x4,  |x : f64x4| x.asin(),  init_f64x4()
+        asin_f64x8,  |x : f64x8| x.asin(),  init_f64x8()
+        asin_f64,    |x : f64| x.asin(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        acos_f32x4,  |x : f32x4| x.acos(),  init_f32x4()
+        acos_f32x16, |x : f32x16| x.acos(), init_f32x16()
+        acos_f32,    |x : f32| x.acos(),    init_f32()
+        acos_f64x4,  |x : f64x4| x.acos(),  init_f64x4()
+        acos_f64x8,  |x : f64x8| x.acos(),  init_f64x8()
+        acos_f64,    |x : f64| x.acos(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        atan_f32x4,  |x : f32x4| x.atan(),  init_f32x4()
+        atan_f32x16, |x : f32x16| x.atan(), init_f32x16()
+        atan_f32,    |x : f32| x.atan(),    init_f32()
+        atan_f64x4,  |x : f64x4| x.atan(),  init_f64x4()
+        atan_f64x8,  |x : f64x8| x.atan(),  init_f64x8()
+        atan_f64,    |x : f64| x.atan(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        exp2_f32x4,  |x : f32x4| x.exp2(),  init_f32x4()
+        exp2_f32x16, |x : f32x16| x.exp2(), init_f32x16()
+        exp2_f32,    |x : f32| x.exp2(),    init_f32()
+        exp2_f64x4,  |x : f64x4| x.exp2(),  init_f64x4()
+        exp2_f64x8,  |x : f64x8| x.exp2(),  init_f64x8()
+        exp2_f64,    |x : f64| x.exp2(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        exp_f32x4,  |x : f32x4| x.exp(),  init_f32x4()
+        exp_f32x16, |x : f32x16| x.exp(), init_f32x16()
+        exp_f32,    |x : f32| x.exp(),    init_f32()
+        exp_f64x4,  |x : f64x4| x.exp(),  init_f64x4()
+        exp_f64x8,  |x : f64x8| x.exp(),  init_f64x8()
+        exp_f64,    |x : f64| x.exp(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        log2_f32x4,  |x : f32x4| x.log2(),  init_f32x4()
+        log2_f32x16, |x : f32x16| x.log2(), init_f32x16()
+        log2_f32,    |x : f32| x.log2(),    init_f32()
+        log2_f64x4,  |x : f64x4| x.log2(),  init_f64x4()
+        log2_f64x8,  |x : f64x8| x.log2(),  init_f64x8()
+        log2_f64,    |x : f64| x.log2(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        ln_f32x4,  |x : f32x4| x.ln(),  init_f32x4()
+        ln_f32x16, |x : f32x16| x.ln(), init_f32x16()
+        ln_f32,    |x : f32| x.ln(),    init_f32()
+        ln_f64x4,  |x : f64x4| x.ln(),  init_f64x4()
+        ln_f64x8,  |x : f64x8| x.ln(),  init_f64x8()
+        ln_f64,    |x : f64| x.ln(),    init_f64()
+    )
+}
diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs
index 4bd4d4c05e3..6bfd91d4730 100644
--- a/crates/std_float/src/lib.rs
+++ b/crates/std_float/src/lib.rs
@@ -11,6 +11,12 @@ use core_simd::simd;
 
 use simd::{LaneCount, Simd, SupportedLaneCount};
 
+mod libm32;
+mod libm64;
+
+#[cfg(test)]
+mod test_libm;
+
 #[cfg(feature = "as_crate")]
 mod experimental {
     pub trait Sealed {}
@@ -117,6 +123,102 @@ pub trait StdFloat: Sealed + Sized {
     fn fract(self) -> Self;
 }
 
+pub trait StdLibm: StdFloat {
+    /// Signed integer type with the same number of bits as this floating point type.
+    type IntType;
+
+    /// Unsigned integer type with the same number of bits as this floating point type.
+    type UintType;
+
+    /// Computes the sine of a number (in radians).
+    fn sin(self) -> Self;
+
+    /// Computes the cosine of a number (in radians).
+    fn cos(self) -> Self;
+
+    /// Computes the tangent of a number (in radians).
+    fn tan(self) -> Self;
+
+    /// Computes the arcsine of a number. Return value is in radians in
+    /// the range [-pi/2, pi/2] or NaN if the number is outside the range
+    /// [-1, 1].
+    fn asin(self) -> Self;
+
+    /// Computes the arccosine of a number. Return value is in radians in
+    /// the range [0, pi] or NaN if the number is outside the range
+    /// [-1, 1].
+    fn acos(self) -> Self;
+
+    /// Computes the arctangent of a number. Return value is in radians in the
+    /// range [-pi/2, pi/2];
+    fn atan(self) -> Self;
+
+    /// Computes the four quadrant arctangent of `self` (`y`) and `other` (`x`) in radians.
+    ///
+    /// * `x = 0`, `y = 0`: `0`
+    /// * `x >= 0`: `arctan(y/x)` -> `[-pi/2, pi/2]`
+    /// * `y >= 0`: `arctan(y/x) + pi` -> `(pi/2, pi]`
+    /// * `y < 0`: `arctan(y/x) - pi` -> `(-pi, -pi/2)`
+    fn atan2(self, x: Self) -> Self;
+
+    /// Returns `2^(self)`.
+    fn exp2(self) -> Self;
+
+    /// Returns `e^(self)`, (the exponential function).
+    fn exp(self) -> Self;
+
+    /// Returns `e^(self) - 1` in a way that is accurate even if the
+    /// number is close to zero.
+    fn exp_m1(self) -> Self;
+
+    /// Returns the base 2 logarithm of the number.
+    fn log2(self) -> Self;
+
+    /// Returns `ln(1+n)` (natural logarithm) more accurately than if
+    /// the operations were performed separately.
+    fn ln_1p(self) -> Self;
+
+    /// Returns the natural logarithm of the number.
+    fn ln(self) -> Self;
+
+    /// Returns the base 10 logarithm of the number.
+    fn log10(self) -> Self;
+
+    /// Returns the logarithm of the number with respect to an arbitrary base.
+    fn log(self, base: Self) -> Self;
+
+    /// Raises a number to a floating point power.
+    fn powf(self, y: Self) -> Self;
+
+    /// Raises a number to an integer power.
+    fn powi(self, y: Self::IntType) -> Self;
+
+    /// Hyperbolic sine function.
+    fn sinh(self) -> Self;
+
+    /// Hyperbolic cosine function.
+    fn cosh(self) -> Self;
+
+    /// Hyperbolic tangent function.
+    fn tanh(self) -> Self;
+
+    /// Inverse hyperbolic sine function.
+    fn asinh(self) -> Self;
+
+    /// Inverse hyperbolic cosine function.
+    fn acosh(self) -> Self;
+
+    /// Inverse hyperbolic tangent function.
+    fn atanh(self) -> Self;
+
+    /// Returns the cube root of a number.
+    fn cbrt(self) -> Self;
+
+    /// Calculates the length of the hypotenuse of a right-angle triangle given
+    /// legs of length `x` and `y`.
+    fn hypot(self, other: Self) -> Self;
+}
+
 impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
 impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {}
 
@@ -161,5 +263,6 @@ mod tests {
         let _xfma = x.mul_add(x, x);
         let _xsqrt = x.sqrt();
         let _ = x2.abs() * x2;
+        let _ = x.sin();
     }
 }
diff --git a/crates/std_float/src/libm32.rs b/crates/std_float/src/libm32.rs
new file mode 100644
index 00000000000..41c19d5a9cf
--- /dev/null
+++ b/crates/std_float/src/libm32.rs
@@ -0,0 +1,324 @@
+#![allow(non_snake_case)]
+#![allow(clippy::excessive_precision)]
+#![allow(clippy::approx_constant)]
+use super::StdLibm;
+
+use super::StdFloat;
+
+use super::simd::{LaneCount, Simd, SupportedLaneCount};
+
+impl<const N: usize> StdLibm for Simd<f32, N>
+where
+    LaneCount<N>: SupportedLaneCount,
+{
+    type IntType = Simd<i32, N>;
+    type UintType = Simd<u32, N>;
+    #[inline]
+    fn sinh(self) -> Self {
+        let LOG2_E =Self ::splat (1.442695040888963407359769137464649992339735961996202908859290566914912486673985594186422766333708408);
+        let x = self;
+        let a = x.mul_add(LOG2_E, -Self::splat(1.0));
+        let b = x.mul_add(-LOG2_E, -Self::splat(1.0));
+        (a).exp2() - (b).exp2()
+    }
+    #[inline]
+    fn cosh(self) -> Self {
+        let LOG2_E =Self ::splat (1.442695040888963407359769137464649992339735961996202908859290566914912486673985594186422766333708408);
+        let x = self;
+        let a = x.mul_add(LOG2_E, -Self::splat(1.0));
+        let b = x.mul_add(-LOG2_E, -Self::splat(1.0));
+        (a).exp2() + (b).exp2()
+    }
+    #[inline]
+    fn tanh(self) -> Self {
+        let LOG2_E =Self ::splat (1.442695040888963407359769137464649992339735961996202908859290566914912486673985594186422766333708408);
+        let x = self;
+        let exp2x = (x * (LOG2_E * Self::splat(2.0))).exp2();
+        (exp2x - Self::splat(1.0)) / (exp2x + Self::splat(1.0))
+    }
+    #[inline]
+    fn asinh(self) -> Self {
+        let x = self;
+        (x + (x * x + Self::splat(1.0)).sqrt()).ln()
+    }
+    #[inline]
+    fn acosh(self) -> Self {
+        let NAN = Self::splat(f32::NAN);
+        let x = self;
+        ((x).lanes_lt(Self::splat(1.0))).select(NAN, (x + (x * x - Self::splat(1.0)).sqrt()).ln())
+    }
+    #[inline]
+    fn atanh(self) -> Self {
+        let x = self;
+        ((Self::splat(1.0) + x).ln() - (Self::splat(1.0) - x).ln()) * Self::splat(0.5)
+    }
+    #[inline]
+    fn asin(self) -> Self {
+        let PI_BY_2 = Self::splat(1.57079632679489661923);
+        let arg = self;
+        let LIM = Self::splat(0.70710678118654752440);
+        let c = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
+        let s = ((arg).lanes_lt(Self::splat(0.0))).select(-Self::splat(1.0), Self::splat(1.0));
+        let x =
+            ((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1.0) - arg * arg).sqrt());
+        let y = (Self::splat(0.12778643f32))
+            .mul_add(x * x, -Self::splat(0.12145509f32))
+            .mul_add(x * x, Self::splat(0.09684546f32))
+            .mul_add(x * x, Self::splat(0.009571692f32))
+            .mul_add(x * x, Self::splat(0.047712374f32))
+            .mul_add(x * x, Self::splat(0.07478066f32))
+            .mul_add(x * x, Self::splat(0.1666726f32))
+            .mul_add(x * x, Self::splat(1f32))
+            * x;
+        ((arg * arg).lanes_lt(LIM * LIM)).select(y, c - y * s)
+    }
+    #[inline]
+    fn acos(self) -> Self {
+        let PI_BY_2 = Self::splat(1.57079632679489661923);
+        let PI = Self::splat(3.14159265358979323846);
+        let arg = self;
+        let LIM = Self::splat(0.70710678118654752440);
+        let c = ((arg).lanes_lt(Self::splat(0.0))).select(PI, Self::splat(0.0));
+        let s = ((arg).lanes_lt(Self::splat(0.0))).select(Self::splat(1.0), -Self::splat(1.0));
+        let x =
+            ((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1.0) - arg * arg).sqrt());
+        let y = (Self::splat(0.12778643f32))
+            .mul_add(x * x, -Self::splat(0.12145509f32))
+            .mul_add(x * x, Self::splat(0.09684546f32))
+            .mul_add(x * x, Self::splat(0.009571692f32))
+            .mul_add(x * x, Self::splat(0.047712374f32))
+            .mul_add(x * x, Self::splat(0.07478066f32))
+            .mul_add(x * x, Self::splat(0.1666726f32))
+            .mul_add(x * x, Self::splat(1f32))
+            * x;
+        ((arg * arg).lanes_lt(LIM * LIM)).select(PI_BY_2 - y, c - y * s)
+    }
+    #[inline]
+    fn atan(self) -> Self {
+        let PI_BY_2 = Self::splat(1.57079632679489661923);
+        let arg = self;
+        let LIM = Self::splat(1.0);
+        let c = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
+        let x = ((arg.abs()).lanes_lt(LIM)).select(arg, arg.recip());
+        let y = (-Self::splat(0.0039602574f32))
+            .mul_add(x * x, Self::splat(0.021659138f32))
+            .mul_add(x * x, -Self::splat(0.05587457f32))
+            .mul_add(x * x, Self::splat(0.09664151f32))
+            .mul_add(x * x, -Self::splat(0.13930209f32))
+            .mul_add(x * x, Self::splat(0.19954468f32))
+            .mul_add(x * x, -Self::splat(0.33331004f32))
+            .mul_add(x * x, Self::splat(0.9999998f32))
+            * x;
+        ((arg.abs()).lanes_lt(LIM)).select(y, c - y)
+    }
+    #[inline]
+    fn atan2(self, x: Self) -> Self {
+        let PI_BY_8 = Self::splat(0.39269908169872415481);
+        let TAN_PI_BY_8 =Self ::splat (0.4142135623730950488038860984269714564297583406547512313284044241429748710410687354958479747260924362);
+        let PI_BY_2 = Self::splat(1.57079632679489661923);
+        let PI = Self::splat(3.14159265358979323846);
+        let y = self;
+        let x1 = x.abs();
+        let y1 = y.abs();
+        let x2 = ((x1).lanes_gt(y1)).select(x1, y1);
+        let y2 = ((x1).lanes_gt(y1)).select(y1, x1);
+        let x3 = TAN_PI_BY_8 * y2 + x2;
+        let y3 = (-TAN_PI_BY_8) * x2 + y2;
+        let z = y3 / x3;
+        let a = (-Self::splat(0.036384862f32))
+            .mul_add(z * z, Self::splat(0.06897726f32))
+            .mul_add(z * z, -Self::splat(0.08974458f32))
+            .mul_add(z * z, Self::splat(0.11101332f32))
+            .mul_add(z * z, -Self::splat(0.14285259f32))
+            .mul_add(z * z, Self::splat(0.1999999f32))
+            .mul_add(z * z, -Self::splat(0.33333334f32))
+            .mul_add(z * z, Self::splat(1f32))
+            * z
+            + PI_BY_8;
+        let q1 = ((x1).lanes_gt(y1)).select(a, PI_BY_2 - a);
+        let q12 = ((x).lanes_ge(Self::splat(0.0))).select(q1, PI - q1);
+        ((y).lanes_ge(Self::splat(0.0))).select(q12, -q12)
+    }
+    #[inline]
+    fn exp2(self) -> Self {
+        let EXP2_MAX = Self::splat(127.0f32);
+        let EXP2_MIN = -Self::splat(127.0f32);
+        let EXP2_SCALE = Self::splat(8388608.0f32);
+        let EXP2_ONE = Self::splat(1065353216.0f32);
+        let INFINITY = Self::splat(f32::INFINITY);
+        let arg = self;
+        let r = arg.round();
+        let mul = Self::from_bits((r.mul_add(EXP2_SCALE, EXP2_ONE)).cast::<u32>());
+        let x = arg - r;
+        let y = (Self::splat(0.000015310081f32))
+            .mul_add(x, Self::splat(0.0001547802f32))
+            .mul_add(x, Self::splat(0.0013333454f32))
+            .mul_add(x, Self::splat(0.009617995f32))
+            .mul_add(x, Self::splat(0.05550411f32))
+            .mul_add(x, Self::splat(0.24022652f32))
+            .mul_add(x, Self::splat(0.6931472f32))
+            .mul_add(x, Self::splat(1f32))
+            * mul;
+        let y1 = ((arg).lanes_gt(EXP2_MAX)).select(INFINITY, y);
+        ((r).lanes_lt(EXP2_MIN)).select(Self::splat(0.0), y1)
+    }
+    #[inline]
+    fn exp(self) -> Self {
+        let LOG2_E =Self ::splat (1.442695040888963407359769137464649992339735961996202908859290566914912486673985594186422766333708408);
+        let arg = self;
+        (arg * LOG2_E).exp2()
+    }
+    #[inline]
+    fn exp_m1(self) -> Self {
+        let LOG2_E =Self ::splat (1.442695040888963407359769137464649992339735961996202908859290566914912486673985594186422766333708408);
+        let EXP2_SCALE = Self::splat(8388608.0f32);
+        let EXP2_ONE = Self::splat(1065353216.0f32);
+        let arg = self;
+        let scaled = arg * LOG2_E;
+        let r = scaled.round();
+        let mul = Self::from_bits((r.mul_add(EXP2_SCALE, EXP2_ONE)).cast::<u32>());
+        let x = scaled - r;
+        (Self::splat(0.000015310081f32))
+            .mul_add(x, Self::splat(0.0001547802f32))
+            .mul_add(x, Self::splat(0.0013333454f32))
+            .mul_add(x, Self::splat(0.009617995f32))
+            .mul_add(x, Self::splat(0.05550411f32))
+            .mul_add(x, Self::splat(0.24022652f32))
+            .mul_add(x, Self::splat(0.6931472f32))
+            .mul_add(x, -Self::splat(0.00000000008090348f32))
+            * mul
+            + (mul - Self::splat(1.0))
+    }
+    #[inline]
+    fn log2(self) -> Self {
+        let ONE_BITS = Self::UintType::splat(0x3f800000_u32);
+        let ONE_MASK = Self::UintType::splat(0x007fffff_u32);
+        let MIN_POSITIVE = Self::splat(f32::MIN_POSITIVE);
+        let INFINITY = Self::splat(f32::INFINITY);
+        let NAN = Self::splat(f32::NAN);
+        let LOG2_OFFSET = Self::IntType::splat(127_i32);
+        let LOG2_SHIFT = Self::IntType::splat(23_i32);
+        let arg = self;
+        let arg_bits = arg.to_bits();
+        let exponent = (arg_bits.cast::<i32>() >> LOG2_SHIFT) - LOG2_OFFSET;
+        let x = Self::from_bits((arg_bits & ONE_MASK) | ONE_BITS) - Self::splat(1.5);
+        let y = (Self::splat(0.005413892f32))
+            .mul_add(x, -Self::splat(0.009083694f32))
+            .mul_add(x, Self::splat(0.011741556f32))
+            .mul_add(x, -Self::splat(0.020581678f32))
+            .mul_add(x, Self::splat(0.038030382f32))
+            .mul_add(x, -Self::splat(0.07129922f32))
+            .mul_add(x, Self::splat(0.14248715f32))
+            .mul_add(x, -Self::splat(0.32059687f32))
+            .mul_add(x, Self::splat(0.9617967f32))
+            .mul_add(x, Self::splat(0.5849625f32));
+        ((arg).lanes_lt(Self::splat(0.0))).select(
+            -NAN,
+            ((arg).lanes_lt(MIN_POSITIVE)).select(-INFINITY, y + (exponent.cast::<f32>())),
+        )
+    }
+    #[inline]
+    fn ln_1p(self) -> Self {
+        let arg = self;
+        (Self::splat(1.0) + arg).ln()
+    }
+    #[inline]
+    fn ln(self) -> Self {
+        let RECIP_LOG2_E = Self::splat(0.69314718055994530942);
+        let arg = self;
+        (arg).log2() * RECIP_LOG2_E
+    }
+    #[inline]
+    fn log10(self) -> Self {
+        let RECIP_LOG2_10 = Self::splat(0.30102999566398119521);
+        let arg = self;
+        (arg).log2() * RECIP_LOG2_10
+    }
+    #[inline]
+    fn log(self, base: Self) -> Self {
+        let arg = self;
+        (arg).log2() / (base).log2()
+    }
+    #[inline]
+    fn powf(self, y: Self) -> Self {
+        let arg = self;
+        ((arg).log2() * y).exp2()
+    }
+    #[inline]
+    fn powi(self, y: Self::IntType) -> Self {
+        let x = self;
+        (x).powf(y.cast::<f32>())
+    }
+    #[inline]
+    fn cbrt(self) -> Self {
+        let TWO_THIRDS = Self::splat(0.66666667f32);
+        let ONE_THIRD = Self::splat(0.33333333f32);
+        let EXP2_ONE = Self::splat(1065353216.0f32);
+        let x = self;
+        let r = Self::from_bits(
+            ((x.abs().to_bits().cast::<f32>()).mul_add(ONE_THIRD, EXP2_ONE * TWO_THIRDS))
+                .cast::<u32>(),
+        );
+        let r = r + (x.abs() - r * r * r) / (Self::splat(3.0) * r * r);
+        let r = r + (x.abs() - r * r * r) / (Self::splat(3.0) * r * r);
+        let r = r + (x.abs() - r * r * r) / (Self::splat(3.0) * r * r);
+        let r = r + (x.abs() - r * r * r) / (Self::splat(3.0) * r * r);
+        r.copysign(x)
+    }
+    #[inline]
+    fn hypot(self, y: Self) -> Self {
+        let MIN_POSITIVE = Self::splat(f32::MIN_POSITIVE);
+        let x = self;
+        let xgty = (x.abs()).lanes_gt(y.abs());
+        let x2 = (xgty).select(x, y);
+        let y2 = (xgty).select(y, x);
+        ((x2.abs()).lanes_le(MIN_POSITIVE)).select(
+            x2,
+            x2.abs() * (Self::splat(1.0) + (y2 / x2) * (y2 / x2)).sqrt(),
+        )
+    }
+    #[inline]
+    fn sin(self) -> Self {
+        let RECIP_2PI = Self::splat(0.15915494309189533577);
+        let arg = self;
+        let scaled = arg * RECIP_2PI;
+        let x = scaled - scaled.round();
+        (-Self::splat(12.26886f32))
+            .mul_add(x * x, Self::splat(41.21624f32))
+            .mul_add(x * x, -Self::splat(76.58672f32))
+            .mul_add(x * x, Self::splat(81.59746f32))
+            .mul_add(x * x, -Self::splat(41.34151f32))
+            .mul_add(x * x, Self::splat(6.2831845f32))
+            * x
+    }
+    #[inline]
+    fn cos(self) -> Self {
+        let RECIP_2PI = Self::splat(0.15915494309189533577);
+        let arg = self;
+        let scaled = arg * RECIP_2PI;
+        let x = scaled - scaled.round();
+        (Self::splat(6.5286584f32))
+            .mul_add(x * x, -Self::splat(25.973276f32))
+            .mul_add(x * x, Self::splat(60.17118f32))
+            .mul_add(x * x, -Self::splat(85.45092f32))
+            .mul_add(x * x, Self::splat(64.939186f32))
+            .mul_add(x * x, -Self::splat(19.739206f32))
+            .mul_add(x * x, Self::splat(1f32))
+    }
+    #[inline]
+    fn tan(self) -> Self {
+        let RECIP_PI = Self::splat(0.31830988618379067154);
+        let arg = self;
+        let scaled = arg * RECIP_PI;
+        let x = scaled - scaled.round();
+        let recip = Self::splat(1.0) / (x * x - Self::splat(0.25));
+        let y = (Self::splat(0.014397301f32))
+            .mul_add(x * x, Self::splat(0.021017345f32))
+            .mul_add(x * x, Self::splat(0.05285888f32))
+            .mul_add(x * x, Self::splat(0.13475448f32))
+            .mul_add(x * x, Self::splat(0.55773664f32))
+            .mul_add(x * x, -Self::splat(0.7853982f32))
+            * x;
+        y * recip
+    }
+}
diff --git a/crates/std_float/src/libm64.rs b/crates/std_float/src/libm64.rs
new file mode 100644
index 00000000000..314a89c180e
--- /dev/null
+++ b/crates/std_float/src/libm64.rs
@@ -0,0 +1,399 @@
+#![allow(non_snake_case)]
+#![allow(clippy::excessive_precision)]
+#![allow(clippy::approx_constant)]
+use super::StdLibm;
+
+use super::StdFloat;
+
+use super::simd::{LaneCount, Simd, SupportedLaneCount};
+
+impl<const N: usize> StdLibm for Simd<f64, N>
+where
+    LaneCount<N>: SupportedLaneCount,
+{
+    type IntType = Simd<i64, N>;
+    type UintType = Simd<u64, N>;
+    #[inline]
+    fn sinh(self) -> Self {
+        let LOG2_E =Self ::splat (1.442695040888963407359924681001892137426660660756662389692043961734752676409787833023288579071980359);
+        let x = self;
+        let a = x.mul_add(LOG2_E, -Self::splat(1.0));
+        let b = x.mul_add(-LOG2_E, -Self::splat(1.0));
+        (a).exp2() - (b).exp2()
+    }
+    #[inline]
+    fn cosh(self) -> Self {
+        let LOG2_E =Self ::splat (1.442695040888963407359924681001892137426660660756662389692043961734752676409787833023288579071980359);
+        let x = self;
+        let a = x.mul_add(LOG2_E, -Self::splat(1.0));
+        let b = x.mul_add(-LOG2_E, -Self::splat(1.0));
+        (a).exp2() + (b).exp2()
+    }
+    #[inline]
+    fn tanh(self) -> Self {
+        let LOG2_E =Self ::splat (1.442695040888963407359924681001892137426660660756662389692043961734752676409787833023288579071980359);
+        let x = self;
+        let exp2x = (x * (LOG2_E * Self::splat(2.0))).exp2();
+        (exp2x - Self::splat(1.0)) / (exp2x + Self::splat(1.0))
+    }
+    #[inline]
+    fn asinh(self) -> Self {
+        let x = self;
+        (x + (x * x + Self::splat(1.0)).sqrt()).ln()
+    }
+    #[inline]
+    fn acosh(self) -> Self {
+        let NAN = Self::splat(f64::NAN);
+        let x = self;
+        ((x).lanes_lt(Self::splat(1.0))).select(NAN, (x + (x * x - Self::splat(1.0)).sqrt()).ln())
+    }
+    #[inline]
+    fn atanh(self) -> Self {
+        let x = self;
+        ((Self::splat(1.0) + x).ln() - (Self::splat(1.0) - x).ln()) * Self::splat(0.5)
+    }
+    #[inline]
+    fn asin(self) -> Self {
+        let PI_BY_2 = Self::splat(1.5707963267948966192313216916397514420986);
+        let arg = self;
+        let LIM = Self::splat(0.70710678118654752440);
+        let c = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
+        let s = ((arg).lanes_lt(Self::splat(0.0))).select(-Self::splat(1.0), Self::splat(1.0));
+        let x =
+            ((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1.0) - arg * arg).sqrt());
+        let y = (Self::splat(0.8373648093412319f64))
+            .mul_add(x * x, -Self::splat(2.980106295592163f64))
+            .mul_add(x * x, Self::splat(5.042442367613399f64))
+            .mul_add(x * x, -Self::splat(5.227353021050702f64))
+            .mul_add(x * x, Self::splat(3.7146677455757984f64))
+            .mul_add(x * x, -Self::splat(1.8827672802928515f64))
+            .mul_add(x * x, Self::splat(0.7180951142924303f64))
+            .mul_add(x * x, -Self::splat(0.19178725657932066f64))
+            .mul_add(x * x, Self::splat(0.05210781979238637f64))
+            .mul_add(x * x, Self::splat(0.00485554931570699f64))
+            .mul_add(x * x, Self::splat(0.014746118856810628f64))
+            .mul_add(x * x, Self::splat(0.017287003548468568f64))
+            .mul_add(x * x, Self::splat(0.022376015418082405f64))
+            .mul_add(x * x, Self::splat(0.030381795054318782f64))
+            .mul_add(x * x, Self::splat(0.04464286065908419f64))
+            .mul_add(x * x, Self::splat(0.07499999995639162f64))
+            .mul_add(x * x, Self::splat(0.1666666666668809f64))
+            .mul_add(x * x, Self::splat(0.9999999999999997f64))
+            * x;
+        ((arg * arg).lanes_lt(LIM * LIM)).select(y, c - y * s)
+    }
+    #[inline]
+    fn acos(self) -> Self {
+        let PI_BY_2 = Self::splat(1.5707963267948966192313216916397514420986);
+        let PI = Self::splat(3.1415926535897932384626433832795028841972);
+        let arg = self;
+        let LIM = Self::splat(0.70710678118654752440);
+        let c = ((arg).lanes_lt(Self::splat(0.0))).select(PI, Self::splat(0.0));
+        let s = ((arg).lanes_lt(Self::splat(0.0))).select(Self::splat(1.0), -Self::splat(1.0));
+        let x =
+            ((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1.0) - arg * arg).sqrt());
+        let y = (Self::splat(0.6668533325236312f64))
+            .mul_add(x * x, -Self::splat(2.203633342583737f64))
+            .mul_add(x * x, Self::splat(3.4682293590554205f64))
+            .mul_add(x * x, -Self::splat(3.31825365991194f64))
+            .mul_add(x * x, Self::splat(2.1679686827931266f64))
+            .mul_add(x * x, -Self::splat(0.9934711561764131f64))
+            .mul_add(x * x, Self::splat(0.34673516466685284f64))
+            .mul_add(x * x, -Self::splat(0.07465114063751678f64))
+            .mul_add(x * x, Self::splat(0.02708987879711642f64))
+            .mul_add(x * x, Self::splat(0.011875258490214528f64))
+            .mul_add(x * x, Self::splat(0.01755397524017199f64))
+            .mul_add(x * x, Self::splat(0.022358737646075745f64))
+            .mul_add(x * x, Self::splat(0.03038253331569182f64))
+            .mul_add(x * x, Self::splat(0.04464284149373235f64))
+            .mul_add(x * x, Self::splat(0.07500000021866425f64))
+            .mul_add(x * x, Self::splat(0.16666666666545776f64))
+            .mul_add(x * x, Self::splat(1.000000000000001f64))
+            * x;
+        ((arg * arg).lanes_lt(LIM * LIM)).select(PI_BY_2 - y, c - y * s)
+    }
+    #[inline]
+    fn atan(self) -> Self {
+        let PI_BY_2 = Self::splat(1.5707963267948966192313216916397514420986);
+        let arg = self;
+        let LIM = Self::splat(1.0);
+        let c = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
+        let x = ((arg.abs()).lanes_lt(LIM)).select(arg, arg.recip());
+        let y = (-Self::splat(0.000039339860635465445f64))
+            .mul_add(x * x, Self::splat(0.0004066164434836197f64))
+            .mul_add(x * x, -Self::splat(0.001986001768572495f64))
+            .mul_add(x * x, Self::splat(0.006143174006145858f64))
+            .mul_add(x * x, -Self::splat(0.013667536945096575f64))
+            .mul_add(x * x, Self::splat(0.023696745325204483f64))
+            .mul_add(x * x, -Self::splat(0.03413639435272701f64))
+            .mul_add(x * x, Self::splat(0.043317460873511335f64))
+            .mul_add(x * x, -Self::splat(0.05106904370972279f64))
+            .mul_add(x * x, Self::splat(0.058384099848191776f64))
+            .mul_add(x * x, -Self::splat(0.0665730796562759f64))
+            .mul_add(x * x, Self::splat(0.07690840682218662f64))
+            .mul_add(x * x, -Self::splat(0.09090746301914292f64))
+            .mul_add(x * x, Self::splat(0.11111099019519444f64))
+            .mul_add(x * x, -Self::splat(0.1428571373381894f64))
+            .mul_add(x * x, Self::splat(0.19999999986592576f64))
+            .mul_add(x * x, -Self::splat(0.3333333333320309f64))
+            .mul_add(x * x, Self::splat(0.9999999999999978f64))
+            * x;
+        ((arg.abs()).lanes_lt(LIM)).select(y, c - y)
+    }
+    #[inline]
+    fn atan2(self, x: Self) -> Self {
+        let PI_BY_8 = Self::splat(0.3926990816987241548078304229099378605247);
+        let TAN_PI_BY_8 =Self ::splat (0.4142135623730950488016887242096980785697285106532352473741716556853198927224463663590653778617382114);
+        let PI_BY_2 = Self::splat(1.5707963267948966192313216916397514420986);
+        let PI = Self::splat(3.1415926535897932384626433832795028841972);
+        let y = self;
+        let x1 = x.abs();
+        let y1 = y.abs();
+        let x2 = ((x1).lanes_gt(y1)).select(x1, y1);
+        let y2 = ((x1).lanes_gt(y1)).select(y1, x1);
+        let x3 = TAN_PI_BY_8 * y2 + x2;
+        let y3 = (-TAN_PI_BY_8) * x2 + y2;
+        let z = y3 / x3;
+        let a = (-Self::splat(0.006954938736046998f64))
+            .mul_add(z * z, Self::splat(0.018449085999592458f64))
+            .mul_add(z * z, -Self::splat(0.027729176254205356f64))
+            .mul_add(z * z, Self::splat(0.0332359412804085f64))
+            .mul_add(z * z, -Self::splat(0.03678285477572582f64))
+            .mul_add(z * z, Self::splat(0.03996091862436943f64))
+            .mul_add(z * z, -Self::splat(0.043473681490793725f64))
+            .mul_add(z * z, Self::splat(0.04761863716846753f64))
+            .mul_add(z * z, -Self::splat(0.05263155087567416f64))
+            .mul_add(z * z, Self::splat(0.05882352795939899f64))
+            .mul_add(z * z, -Self::splat(0.06666666661070236f64))
+            .mul_add(z * z, Self::splat(0.07692307692150929f64))
+            .mul_add(z * z, -Self::splat(0.0909090909090601f64))
+            .mul_add(z * z, Self::splat(0.1111111111111107f64))
+            .mul_add(z * z, -Self::splat(0.14285714285714282f64))
+            .mul_add(z * z, Self::splat(0.19999999999999998f64))
+            .mul_add(z * z, -Self::splat(0.33333333333333326f64))
+            .mul_add(z * z, Self::splat(1f64))
+            * z
+            + PI_BY_8;
+        let q1 = ((x1).lanes_gt(y1)).select(a, PI_BY_2 - a);
+        let q12 = ((x).lanes_ge(Self::splat(0.0))).select(q1, PI - q1);
+        ((y).lanes_ge(Self::splat(0.0))).select(q12, -q12)
+    }
+    #[inline]
+    fn exp2(self) -> Self {
+        let EXP2_MAX = Self::splat(1023.0f64);
+        let EXP2_MIN = -Self::splat(1023.0f64);
+        let EXP2_SCALE = Self::splat(4503599627370496.0f64);
+        let EXP2_ONE = Self::splat(4607182418800017408.0f64);
+        let INFINITY = Self::splat(f64::INFINITY);
+        let arg = self;
+        let r = arg.round();
+        let mul = Self::from_bits((r.mul_add(EXP2_SCALE, EXP2_ONE)).cast::<u64>());
+        let x = arg - r;
+        let y = (Self::splat(0.00000000044566754636398603f64))
+            .mul_add(x, Self::splat(0.000000007075803175956986f64))
+            .mul_add(x, Self::splat(0.00000010178051728089911f64))
+            .mul_add(x, Self::splat(0.0000013215422480586546f64))
+            .mul_add(x, Self::splat(0.000015252733853280778f64))
+            .mul_add(x, Self::splat(0.00015403530485719912f64))
+            .mul_add(x, Self::splat(0.0013333558146396002f64))
+            .mul_add(x, Self::splat(0.009618129107567618f64))
+            .mul_add(x, Self::splat(0.05550410866482166f64))
+            .mul_add(x, Self::splat(0.2402265069591022f64))
+            .mul_add(x, Self::splat(0.6931471805599452f64))
+            .mul_add(x, Self::splat(1f64))
+            * mul;
+        let y1 = ((arg).lanes_gt(EXP2_MAX)).select(INFINITY, y);
+        ((r).lanes_lt(EXP2_MIN)).select(Self::splat(0.0), y1)
+    }
+    #[inline]
+    fn exp(self) -> Self {
+        let LOG2_E =Self ::splat (1.442695040888963407359924681001892137426660660756662389692043961734752676409787833023288579071980359);
+        let arg = self;
+        (arg * LOG2_E).exp2()
+    }
+    #[inline]
+    fn exp_m1(self) -> Self {
+        let LOG2_E =Self ::splat (1.442695040888963407359924681001892137426660660756662389692043961734752676409787833023288579071980359);
+        let EXP2_SCALE = Self::splat(4503599627370496.0f64);
+        let EXP2_ONE = Self::splat(4607182418800017408.0f64);
+        let arg = self;
+        let scaled = arg * LOG2_E;
+        let r = scaled.round();
+        let mul = Self::from_bits((r.mul_add(EXP2_SCALE, EXP2_ONE)).cast::<u64>());
+        let x = scaled - r;
+        (Self::splat(0.00000000044566754636398603f64))
+            .mul_add(x, Self::splat(0.000000007075803175956986f64))
+            .mul_add(x, Self::splat(0.00000010178051728089911f64))
+            .mul_add(x, Self::splat(0.0000013215422480586546f64))
+            .mul_add(x, Self::splat(0.000015252733853280778f64))
+            .mul_add(x, Self::splat(0.00015403530485719912f64))
+            .mul_add(x, Self::splat(0.0013333558146396002f64))
+            .mul_add(x, Self::splat(0.009618129107567618f64))
+            .mul_add(x, Self::splat(0.05550410866482166f64))
+            .mul_add(x, Self::splat(0.2402265069591022f64))
+            .mul_add(x, Self::splat(0.6931471805599452f64))
+            .mul_add(x, -Self::splat(0.0000000000000000061353609179806035f64))
+            * mul
+            + (mul - Self::splat(1.0))
+    }
+    #[inline]
+    fn log2(self) -> Self {
+        let ONE_BITS = Self::UintType::splat(0x3ff0000000000000_u64);
+        let ONE_MASK = Self::UintType::splat(0x000fffffffffffff_u64);
+        let MIN_POSITIVE = Self::splat(f64::MIN_POSITIVE);
+        let INFINITY = Self::splat(f64::INFINITY);
+        let NAN = Self::splat(f64::NAN);
+        let LOG2_OFFSET = Self::IntType::splat(1023_i64);
+        let LOG2_SHIFT = Self::IntType::splat(52_i64);
+        let arg = self;
+        let arg_bits = arg.to_bits();
+        let exponent = (arg_bits.cast::<i64>() >> LOG2_SHIFT) - LOG2_OFFSET;
+        let x = Self::from_bits((arg_bits & ONE_MASK) | ONE_BITS) - Self::splat(1.5);
+        let y = (Self::splat(0.000059440811569894275f64))
+            .mul_add(x, -Self::splat(0.00009384549305785918f64))
+            .mul_add(x, Self::splat(0.00007056268243091807f64))
+            .mul_add(x, -Self::splat(0.00011279762643562555f64))
+            .mul_add(x, Self::splat(0.00022472329897976745f64))
+            .mul_add(x, -Self::splat(0.00036098242513245754f64))
+            .mul_add(x, Self::splat(0.0005692370613966115f64))
+            .mul_add(x, -Self::splat(0.0009250629378630191f64))
+            .mul_add(x, Self::splat(0.0015163928320102102f64))
+            .mul_add(x, -Self::splat(0.002502038922613527f64))
+            .mul_add(x, Self::splat(0.004169747986750192f64))
+            .mul_add(x, -Self::splat(0.007036450720529529f64))
+            .mul_add(x, Self::splat(0.01206251033391092f64))
+            .mul_add(x, -Self::splat(0.021109393020516138f64))
+            .mul_add(x, Self::splat(0.03799690641909651f64))
+            .mul_add(x, -Self::splat(0.07124419953811696f64))
+            .mul_add(x, Self::splat(0.14248839910020777f64))
+            .mul_add(x, -Self::splat(0.3205988979754245f64))
+            .mul_add(x, Self::splat(0.9617966939259754f64))
+            .mul_add(x, Self::splat(0.5849625007211563f64));
+        ((arg).lanes_lt(Self::splat(0.0))).select(
+            -NAN,
+            ((arg).lanes_lt(MIN_POSITIVE)).select(-INFINITY, y + (exponent.cast::<f64>())),
+        )
+    }
+    #[inline]
+    fn ln_1p(self) -> Self {
+        let arg = self;
+        (Self::splat(1.0) + arg).ln()
+    }
+    #[inline]
+    fn ln(self) -> Self {
+        let RECIP_LOG2_E = Self::splat(0.6931471805599453094172321214581765680755);
+        let arg = self;
+        (arg).log2() * RECIP_LOG2_E
+    }
+    #[inline]
+    fn log10(self) -> Self {
+        let RECIP_LOG2_10 = Self::splat(0.3010299956639811952137388947244930267682);
+        let arg = self;
+        (arg).log2() * RECIP_LOG2_10
+    }
+    #[inline]
+    fn log(self, base: Self) -> Self {
+        let arg = self;
+        (arg).log2() / (base).log2()
+    }
+    #[inline]
+    fn powf(self, y: Self) -> Self {
+        let arg = self;
+        ((arg).log2() * y).exp2()
+    }
+    #[inline]
+    fn powi(self, y: Self::IntType) -> Self {
+        let x = self;
+        (x).powf(y.cast::<f64>())
+    }
+    #[inline]
+    fn cbrt(self) -> Self {
+        let TWO_THIRDS = Self::splat(0.666666666666666667f64);
+        let ONE_THIRD = Self::splat(0.333333333333333333f64);
+        let EXP2_ONE = Self::splat(4607182418800017408.0f64);
+        let x = self;
+        let r = Self::from_bits(
+            ((x.abs().to_bits().cast::<f64>()).mul_add(ONE_THIRD, EXP2_ONE * TWO_THIRDS))
+                .cast::<u64>(),
+        );
+        let r = r + (x.abs() - r * r * r) / (Self::splat(3.0) * r * r);
+        let r = r + (x.abs() - r * r * r) / (Self::splat(3.0) * r * r);
+        let r = r + (x.abs() - r * r * r) / (Self::splat(3.0) * r * r);
+        let r = r + (x.abs() - r * r * r) / (Self::splat(3.0) * r * r);
+        r.copysign(x)
+    }
+    #[inline]
+    fn hypot(self, y: Self) -> Self {
+        let MIN_POSITIVE = Self::splat(f64::MIN_POSITIVE);
+        let x = self;
+        let xgty = (x.abs()).lanes_gt(y.abs());
+        let x2 = (xgty).select(x, y);
+        let y2 = (xgty).select(y, x);
+        ((x2.abs()).lanes_le(MIN_POSITIVE)).select(
+            x2,
+            x2.abs() * (Self::splat(1.0) + (y2 / x2) * (y2 / x2)).sqrt(),
+        )
+    }
+    #[inline]
+    fn sin(self) -> Self {
+        let RECIP_2PI = Self::splat(0.1591549430918953357688837633725143620345);
+        let arg = self;
+        let scaled = arg * RECIP_2PI;
+        let x = scaled - scaled.round();
+        (-Self::splat(0.00007959781355646816f64))
+            .mul_add(x * x, Self::splat(0.0011251039233483632f64))
+            .mul_add(x * x, -Self::splat(0.012029309381583758f64))
+            .mul_add(x * x, Self::splat(0.10422859417031961f64))
+            .mul_add(x * x, -Self::splat(0.718122207748485f64))
+            .mul_add(x * x, Self::splat(3.8199525744232106f64))
+            .mul_add(x * x, -Self::splat(15.094642576059076f64))
+            .mul_add(x * x, Self::splat(42.058693944862014f64))
+            .mul_add(x * x, -Self::splat(76.7058597530604f64))
+            .mul_add(x * x, Self::splat(81.60524927607504f64))
+            .mul_add(x * x, -Self::splat(41.34170224039976f64))
+            .mul_add(x * x, Self::splat(6.283185307179586f64))
+            * x
+    }
+    #[inline]
+    fn cos(self) -> Self {
+        let RECIP_2PI = Self::splat(0.1591549430918953357688837633725143620345);
+        let arg = self;
+        let scaled = arg * RECIP_2PI;
+        let x = scaled - scaled.round();
+        (Self::splat(0.00002092503869007534f64))
+            .mul_add(x * x, -Self::splat(0.0003214576104012376f64))
+            .mul_add(x * x, Self::splat(0.003779202401314546f64))
+            .mul_add(x * x, -Self::splat(0.03638267368288368f64))
+            .mul_add(x * x, Self::splat(0.28200593868080975f64))
+            .mul_add(x * x, -Self::splat(1.7143907074899654f64))
+            .mul_add(x * x, Self::splat(7.903536371025055f64))
+            .mul_add(x * x, -Self::splat(26.426256783358706f64))
+            .mul_add(x * x, Self::splat(60.244641371876135f64))
+            .mul_add(x * x, -Self::splat(85.45681720669371f64))
+            .mul_add(x * x, Self::splat(64.9393940226683f64))
+            .mul_add(x * x, -Self::splat(19.739208802178716f64))
+            .mul_add(x * x, Self::splat(1f64))
+    }
+    #[inline]
+    fn tan(self) -> Self {
+        let RECIP_PI = Self::splat(0.3183098861837906715377675267450287240689);
+        let arg = self;
+        let scaled = arg * RECIP_PI;
+        let x = scaled - scaled.round();
+        let recip = Self::splat(1.0) / (x * x - Self::splat(0.25));
+        let y = (Self::splat(0.00015634929503112444f64))
+            .mul_add(x * x, Self::splat(0.00010749666907629025f64))
+            .mul_add(x * x, Self::splat(0.00040923484089717195f64))
+            .mul_add(x * x, Self::splat(0.0008549505315816931f64))
+            .mul_add(x * x, Self::splat(0.0019412482440671268f64))
+            .mul_add(x * x, Self::splat(0.004371782765072613f64))
+            .mul_add(x * x, Self::splat(0.009879869124007078f64))
+            .mul_add(x * x, Self::splat(0.02251293831770456f64))
+            .mul_add(x * x, Self::splat(0.05263664423645279f64))
+            .mul_add(x * x, Self::splat(0.13476940059037382f64))
+            .mul_add(x * x, Self::splat(0.5577362635648092f64))
+            .mul_add(x * x, -Self::splat(0.7853981633974483f64))
+            * x;
+        y * recip
+    }
+}
diff --git a/crates/std_float/src/test_libm.rs b/crates/std_float/src/test_libm.rs
new file mode 100644
index 00000000000..55bd9f266fa
--- /dev/null
+++ b/crates/std_float/src/test_libm.rs
@@ -0,0 +1,715 @@
+const NUM_ITER: usize = 0x10000;
+
+macro_rules! test_vec {
+    (
+        scalar_type: $scalar_type: ty,
+        vector_type: $vector_type: ty,
+        int_vector_type: $int_vector_type: ty,
+        scalar_fn: $scalar_fn: expr,
+        vector_fn: $vector_fn: expr,
+        limit: $limit: expr,
+        x: $x: expr,
+    ) => ({
+        {
+            #![allow(non_camel_case_types)]
+            use crate::StdLibm;
+            type scalar_type = $scalar_type;
+            type vector_type = $vector_type;
+            let sf = $scalar_fn;
+            let vf = $vector_fn;
+            let yref = <$vector_type>::from_array([sf($x[0]), sf($x[1]), sf($x[2]), sf($x[3])]);
+            let y = vf($x);
+            let e = (y - yref);
+            // Note: is e is NaN then the comparison will fail.
+            let bad_val = e.abs().lanes_gt($limit).any();
+            if bad_val || y.is_nan() != yref.is_nan() || y.is_infinite() != yref.is_infinite() {
+                panic!("\nx     ={:20.16?}\ne     ={:20.16?}\nlimit ={:20.16?}\nvector={:20.16?}\nscalar={:20.16?}\nvector={:020x?}\nscalar={:020x?}\nvector_fn={}",
+                    $x,
+                    e,
+                    $limit,
+                    y, yref,
+                    y.to_bits(), yref.to_bits(),
+                    stringify!($vector_fn)
+                );
+            }
+        }
+    });
+}
+
+macro_rules! test_range {
+    (
+        min: $min: expr,
+        max: $max: expr,
+        limit: $limit: expr,
+        scalar_fn: $scalar_fn: expr,
+        vector_fn: $vector_fn: expr,
+    ) => {{
+        #![allow(non_camel_case_types)]
+        #![allow(dead_code)]
+        #![allow(clippy::approx_constant)]
+        type scalar_type = f32;
+        type vector_type = core_simd::f32x4;
+        type int_vector_type = core_simd::i32x4;
+        const PI: scalar_type = 3.1415926535897932384626433832795028841972;
+
+        let limit = vector_type::splat($limit);
+        let b = (($max) - ($min)) * (1.0 / NUM_ITER as scalar_type);
+        let a = $min;
+        for i in (0..NUM_ITER / 4) {
+            let fi = (i * 4) as scalar_type;
+            let x = vector_type::from_array([
+                (fi + 0.0) * b + a,
+                (fi + 1.0) * b + a,
+                (fi + 2.0) * b + a,
+                (fi + 3.0) * b + a,
+            ]);
+            test_vec!(
+                scalar_type: f32,
+                vector_type: core_simd::f32x4,
+                int_vector_type: core_simd::i32x4,
+                scalar_fn: $scalar_fn,
+                vector_fn: $vector_fn,
+                limit: limit,
+                x: x,
+            )
+        }
+    }
+    {
+        #![allow(non_camel_case_types)]
+        #![allow(dead_code)]
+        #![allow(unused)]
+        #![allow(clippy::approx_constant)]
+        type scalar_type = f64;
+        type vector_type = core_simd::f64x4;
+        type int_vector_type = core_simd::i64x4;
+        const PI: scalar_type = 3.1415926535897932384626433832795028841972;
+
+        let limit = vector_type::splat($limit);
+        let b = (($max) - ($min)) * (1.0 / NUM_ITER as scalar_type);
+        let a = $min;
+        for i in (0..NUM_ITER / 4) {
+            let fi = (i * 4) as scalar_type;
+            let x = vector_type::from_array([
+                (fi + 0.0) * b + a,
+                (fi + 1.0) * b + a,
+                (fi + 2.0) * b + a,
+                (fi + 3.0) * b + a,
+            ]);
+            test_vec!(
+                scalar_type: f64,
+                vector_type: core_simd::f64x4,
+                int_vector_type: core_simd::i64x4,
+                scalar_fn: $scalar_fn,
+                vector_fn: $vector_fn,
+                limit: limit,
+                x: x,
+            )
+        }
+    }};
+    (
+        value: $value: expr,
+        limit: $limit: expr,
+        scalar_fn: $scalar_fn: expr,
+        vector_fn: $vector_fn: expr,
+    ) => {{
+        #![allow(non_camel_case_types)]
+        #![allow(dead_code)]
+        {
+            type scalar_type = f32;
+            type vector_type = core_simd::f32x4;
+            let limit = <core_simd::f32x4>::splat($limit);
+            let x = <core_simd::f32x4>::splat($value);
+            test_vec!(
+                scalar_type: f32,
+                vector_type: core_simd::f32x4,
+                int_vector_type: core_simd::i32x4,
+                scalar_fn: $scalar_fn,
+                vector_fn: $vector_fn,
+                limit: limit,
+                x: x,
+            )
+        }
+        {
+            type scalar_type = f64;
+            type vector_type = core_simd::f64x4;
+            let limit = <core_simd::f64x4>::splat($limit);
+            let x = <core_simd::f64x4>::splat($value);
+            test_vec!(
+                scalar_type: f64,
+                vector_type: core_simd::f64x4,
+                int_vector_type: core_simd::i64x4,
+                scalar_fn: $scalar_fn,
+                vector_fn: $vector_fn,
+                limit: limit,
+                x: x,
+            )
+        }
+    }};
+}
+
+#[test]
+fn sin() {
+    test_range!(
+        min: -PI/4.0,
+        max: PI/4.0,
+        limit: scalar_type::EPSILON * 4.0,
+        scalar_fn: |x : scalar_type| x.sin(),
+        vector_fn: |x : vector_type| x.sin(),
+    );
+
+    test_range!(
+        min: -PI/2.0,
+        max: PI/2.0,
+        limit: scalar_type::EPSILON * 4.0,
+        scalar_fn: |x : scalar_type| x.sin(),
+        vector_fn: |x : vector_type| x.sin(),
+    );
+
+    test_range!(
+        min: -PI,
+        max: PI,
+        limit: scalar_type::EPSILON * 10.0,
+        scalar_fn: |x : scalar_type| x.sin(),
+        vector_fn: |x : vector_type| x.sin(),
+    );
+}
+
+#[test]
+fn cos() {
+    // In the range +/- pi/4 the input has 1 ulp of error.
+    test_range!(
+        min: -PI/4.0,
+        max: PI/4.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.cos(),
+        vector_fn: |x : vector_type| x.cos(),
+    );
+
+    // In the range +/- pi/2 the input and output has 2 ulp of error.
+    test_range!(
+        min: -PI/2.0,
+        max: PI/2.0,
+        limit: scalar_type::EPSILON * 4.0,
+        scalar_fn: |x : scalar_type| x.cos(),
+        vector_fn: |x : vector_type| x.cos(),
+    );
+
+    // In the range +/- pi the input has 4 ulp of error and the output has 5.
+    // Note that the scalar cos also has this error but the implementation
+    // is different.
+    test_range!(
+        min: -PI,
+        max: PI,
+        limit: scalar_type::EPSILON * 10.0,
+        scalar_fn: |x : scalar_type| x.cos(),
+        vector_fn: |x : vector_type| x.cos(),
+    );
+}
+
+#[test]
+fn tan() {
+    // For the outsides, reciprocal accuracy is important.
+    // Note that the vector function correctly gets -inf for -PI/2
+    // but the scalar function does not.
+    test_range!(
+        min: -PI/2.0 + 0.00001,
+        max: -PI/4.0,
+        limit: scalar_type::EPSILON * 4.0,
+        scalar_fn: |x : scalar_type| x.tan().recip(),
+        vector_fn: |x : vector_type| x.tan().recip(),
+    );
+
+    // For the insides, absolute accuracy is important.
+    test_range!(
+        min: -PI/4.0,
+        max: PI/4.0,
+        limit: scalar_type::EPSILON * 3.0,
+        scalar_fn: |x : scalar_type| x.tan(),
+        vector_fn: |x : vector_type| x.tan(),
+    );
+
+    test_range!(
+        min: PI/4.0,
+        max: PI/2.0 - 0.00001,
+        limit: scalar_type::EPSILON * 4.0,
+        scalar_fn: |x : scalar_type| x.tan().recip(),
+        vector_fn: |x : vector_type| x.tan().recip(),
+    );
+}
+
+#[test]
+fn asin() {
+    test_range!(
+        min: -1.0,
+        max: 1.0,
+        limit: scalar_type::EPSILON * 8.0,
+        scalar_fn: |x : scalar_type| x.asin(),
+        vector_fn: |x : vector_type| x.asin(),
+    );
+
+    test_range!(
+        min: -0.5,
+        max: 0.5,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.asin(),
+        vector_fn: |x : vector_type| x.asin(),
+    );
+}
+
+#[test]
+fn atan() {
+    test_range!(
+        min: -1.0,
+        max: 1.0,
+        limit: scalar_type::EPSILON * 8.0,
+        scalar_fn: |x : scalar_type| x.atan(),
+        vector_fn: |x : vector_type| x.atan(),
+    );
+
+    test_range!(
+        min: -1.0,
+        max: 1.0,
+        limit: scalar_type::EPSILON * 8.0,
+        scalar_fn: |x : scalar_type| x.recip().atan(),
+        vector_fn: |x : vector_type| x.recip().atan(),
+    );
+}
+
+#[test]
+fn acos() {
+    test_range!(
+        min: -1.0,
+        max: 1.0,
+        limit: scalar_type::EPSILON * 6.0,
+        scalar_fn: |x : scalar_type| x.acos(),
+        vector_fn: |x : vector_type| x.acos(),
+    );
+}
+
+#[test]
+fn exp2() {
+    test_range!(
+        value: -126.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.exp2(),
+        vector_fn: |x : vector_type| x.exp2(),
+    );
+
+    test_range!(
+        value: 127.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.exp2(),
+        vector_fn: |x : vector_type| x.exp2(),
+    );
+
+    // Denormals not supported.
+    //
+    // test_range!(
+    //     value: -127.0,
+    //     limit: scalar_type::EPSILON * 2.0,
+    //     scalar_fn: |x : scalar_type| x.exp2(),
+    //     vector_fn: |x : vector_type| x.exp2(),
+    // );
+
+    // Large negatives give zero
+    test_range!(
+        value: -200.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.exp2(),
+        vector_fn: |x : vector_type| x.exp2(),
+    );
+
+    test_range!(
+        min: -1.0,
+        max: 1.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.exp2(),
+        vector_fn: |x : vector_type| x.exp2(),
+    );
+
+    // Accuracy is good over the entire range.
+    // (Range expanded because windows exp->log is less accurate)
+    test_range!(
+        min: -126.0,
+        max: 126.0,
+        limit: scalar_type::EPSILON * 4.0,
+        scalar_fn: |x : scalar_type| x.exp2().log2(),
+        vector_fn: |x : vector_type| x.exp2().log2(),
+    );
+}
+
+#[test]
+fn exp() {
+    test_range!(
+        min: -2.0,
+        max: 0.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.exp(),
+        vector_fn: |x : vector_type| x.exp(),
+    );
+
+    test_range!(
+        min: 0.0,
+        max: 2.0,
+        limit: scalar_type::EPSILON * 8.0,
+        scalar_fn: |x : scalar_type| x.exp(),
+        vector_fn: |x : vector_type| x.exp(),
+    );
+}
+
+#[test]
+fn log2() {
+    // Both should give NaN.
+    test_range!(
+        value: -1.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.log2(),
+        vector_fn: |x : vector_type| x.log2(),
+    );
+
+    // Both should give Inf.
+    test_range!(
+        value: 0.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.log2(),
+        vector_fn: |x : vector_type| x.log2(),
+    );
+
+    // Note that the std library may accept denormals.
+    test_range!(
+        value: scalar_type::MIN_POSITIVE,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.log2(),
+        vector_fn: |x : vector_type| x.log2(),
+    );
+
+    test_range!(
+        min: 1.0,
+        max: 2.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.log2(),
+        vector_fn: |x : vector_type| x.log2(),
+    );
+
+    test_range!(
+        min: 2.0,
+        max: 4.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.log2(),
+        vector_fn: |x : vector_type| x.log2(),
+    );
+
+    test_range!(
+        min: 4.0,
+        max: 8.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.log2(),
+        vector_fn: |x : vector_type| x.log2(),
+    );
+}
+
+#[test]
+fn ln() {
+    // test_range!(
+    //     value: -1.0,
+    //     limit: scalar_type::EPSILON * 2.0,
+    //     scalar_fn: |x : scalar_type| x.ln(),
+    //     vector_fn: |x : vector_type| x.ln(),
+    // );
+
+    // test_range!(
+    //     value: 0.0,
+    //     limit: scalar_type::EPSILON * 2.0,
+    //     scalar_fn: |x : scalar_type| x.ln(),
+    //     vector_fn: |x : vector_type| x.ln(),
+    // );
+
+    // test_range!(
+    //     value: scalar_type::MIN_POSITIVE,
+    //     limit: scalar_type::EPSILON * 2.0,
+    //     scalar_fn: |x : scalar_type| x.ln(),
+    //     vector_fn: |x : vector_type| x.ln(),
+    // );
+
+    test_range!(
+        min: 1.0,
+        max: 2.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.ln(),
+        vector_fn: |x : vector_type| x.ln(),
+    );
+
+    test_range!(
+        min: 2.0,
+        max: 4.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.ln(),
+        vector_fn: |x : vector_type| x.ln(),
+    );
+
+    test_range!(
+        min: 4.0,
+        max: 8.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.ln(),
+        vector_fn: |x : vector_type| x.ln(),
+    );
+}
+
+#[test]
+fn log10() {
+    test_range!(
+        min: 1.0,
+        max: 2.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.log10(),
+        vector_fn: |x : vector_type| x.log10(),
+    );
+
+    test_range!(
+        min: 2.0,
+        max: 4.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.log10(),
+        vector_fn: |x : vector_type| x.log10(),
+    );
+
+    test_range!(
+        min: 4.0,
+        max: 8.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.log10(),
+        vector_fn: |x : vector_type| x.log10(),
+    );
+}
+
+#[test]
+fn ln_1p() {
+    test_range!(
+        min: 0.0,
+        max: 1.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.ln_1p(),
+        vector_fn: |x : vector_type| x.ln_1p(),
+    );
+}
+
+#[test]
+fn log() {
+    test_range!(
+        min: 1.0,
+        max: 2.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.log(2.0),
+        vector_fn: |x : vector_type| x.log(vector_type::splat(2.0)),
+    );
+}
+
+#[test]
+fn powf() {
+    test_range!(
+        min: 0.5,
+        max: 1.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.powf(2.0),
+        vector_fn: |x : vector_type| x.powf(vector_type::splat(2.0)),
+    );
+
+    test_range!(
+        min: 1.0,
+        max: 2.0,
+        limit: scalar_type::EPSILON * 5.0,
+        scalar_fn: |x : scalar_type| x.powf(2.0),
+        vector_fn: |x : vector_type| x.powf(vector_type::splat(2.0)),
+    );
+}
+
+#[test]
+fn powi() {
+    test_range!(
+        min: 0.5,
+        max: 1.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.powi(2),
+        vector_fn: |x : vector_type| x.powi(int_vector_type::splat(2)),
+    );
+
+    test_range!(
+        min: 1.0,
+        max: 2.0,
+        limit: scalar_type::EPSILON * 5.0,
+        scalar_fn: |x : scalar_type| x.powi(2),
+        vector_fn: |x : vector_type| x.powi(int_vector_type::splat(2)),
+    );
+}
+
+#[test]
+fn sinh() {
+    test_range!(
+        min: -1.0,
+        max: 1.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.sinh(),
+        vector_fn: |x : vector_type| x.sinh(),
+    );
+}
+
+#[test]
+fn cosh() {
+    test_range!(
+        min: -1.0,
+        max: 1.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.cosh(),
+        vector_fn: |x : vector_type| x.cosh(),
+    );
+}
+
+#[test]
+fn tanh() {
+    test_range!(
+        min: -1.0,
+        max: 1.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.tanh(),
+        vector_fn: |x : vector_type| x.tanh(),
+    );
+}
+
+#[test]
+fn asinh() {
+    test_range!(
+        min: -1.0,
+        max: 1.0,
+        limit: scalar_type::EPSILON * 3.0,
+        scalar_fn: |x : scalar_type| x.asinh(),
+        vector_fn: |x : vector_type| x.asinh(),
+    );
+}
+
+#[test]
+fn acosh() {
+    // Will be NAN in this range.
+    test_range!(
+        min: 0.0,
+        max: 1.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.acosh(),
+        vector_fn: |x : vector_type| x.acosh(),
+    );
+
+    test_range!(
+        min: -1.0,
+        max: 1.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.acosh(),
+        vector_fn: |x : vector_type| x.acosh(),
+    );
+}
+
+#[test]
+fn atanh() {
+    test_range!(
+        value: -1.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.atanh(),
+        vector_fn: |x : vector_type| x.atanh(),
+    );
+
+    test_range!(
+        value: 1.0,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.atanh(),
+        vector_fn: |x : vector_type| x.atanh(),
+    );
+
+    test_range!(
+        min: -0.75,
+        max: 0.75,
+        limit: scalar_type::EPSILON * 2.0,
+        scalar_fn: |x : scalar_type| x.atanh(),
+        vector_fn: |x : vector_type| x.atanh(),
+    );
+}
+
+#[test]
+fn cbrt() {
+    test_range!(
+        min: -8.0,
+        max: 8.0,
+        limit: scalar_type::EPSILON * 3.0,
+        scalar_fn: |x : scalar_type| x.cbrt(),
+        vector_fn: |x : vector_type| x.cbrt(),
+    );
+}
+
+#[test]
+fn hypot() {
+    test_range!(
+        min: 0.0,
+        max: 8.0,
+        limit: scalar_type::EPSILON * 6.0,
+        scalar_fn: |x : scalar_type| x.cos().hypot(x.sin()),
+        vector_fn: |x : vector_type| x.cos().hypot(x.sin()),
+    );
+
+    // Large values will mostly not overflow.
+    // test_range!(
+    //     value: scalar_type::MAX/8.0,
+    //     limit: scalar_type::MAX * scalar_type::EPSILON * 6.0,
+    //     scalar_fn: |x : scalar_type| x.hypot(x),
+    //     vector_fn: |x : vector_type| x.hypot(x),
+    // );
+
+    // Except for MAX.
+    // test_range!(
+    //     value: scalar_type::MAX,
+    //     limit: scalar_type::EPSILON * 6.0,
+    //     scalar_fn: |x : scalar_type| x.hypot(x),
+    //     vector_fn: |x : vector_type| x.hypot(x),
+    // );
+}
+
+#[test]
+fn atan2() {
+    // Studiously ignore -PI and PI where signs change erraticly.
+    test_range!(
+        min: -3.141,
+        max: 3.141,
+        limit: scalar_type::EPSILON * 8.0,
+        scalar_fn: |x : scalar_type| x.sin().atan2(x.cos()),
+        vector_fn: |x : vector_type| x.sin().atan2(x.cos()),
+    );
+
+    // East
+    test_range!(
+        value: 0.0,
+        limit: scalar_type::EPSILON * 6.0,
+        scalar_fn: |x : scalar_type| x.atan2(scalar_type::from(1.0)),
+        vector_fn: |x : vector_type| x.atan2(vector_type::splat(1.0)),
+    );
+
+    // West
+    test_range!(
+        value: 0.0,
+        limit: scalar_type::EPSILON * 6.0,
+        scalar_fn: |x : scalar_type| x.atan2(scalar_type::from(-1.0)),
+        vector_fn: |x : vector_type| x.atan2(vector_type::splat(-1.0)),
+    );
+
+    // North
+    test_range!(
+        value: 1.0,
+        limit: scalar_type::EPSILON * 6.0,
+        scalar_fn: |x : scalar_type| x.atan2(scalar_type::from(0.0)),
+        vector_fn: |x : vector_type| x.atan2(vector_type::splat(0.0)),
+    );
+
+    // South
+    test_range!(
+        value: -1.0,
+        limit: scalar_type::EPSILON * 6.0,
+        scalar_fn: |x : scalar_type| x.atan2(scalar_type::from(0.0)),
+        vector_fn: |x : vector_type| x.atan2(vector_type::splat(0.0)),
+    );
+}