From 1f03fdf048d2b4fda313bc55f2d24ab88effdf07 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 16 May 2024 23:38:13 +0100 Subject: [PATCH 1/8] Fix comment typos in AVX2 header --- Source/astcenc_vecmathlib_avx2_8.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/astcenc_vecmathlib_avx2_8.h b/Source/astcenc_vecmathlib_avx2_8.h index 69b9c98c..966875a3 100644 --- a/Source/astcenc_vecmathlib_avx2_8.h +++ b/Source/astcenc_vecmathlib_avx2_8.h @@ -1146,7 +1146,7 @@ ASTCENC_SIMD_INLINE vint8 vtable_8bt_32bi(vint8 t0, vint8 t1, vint8 t2, vint8 t3 * @brief Return a vector of interleaved RGBA data. * * Input vectors have the value stored in the bottom 8 bits of each lane, - * with high bits set to zero. + * with high bits set to zero. * * Output vector stores a single RGBA texel packed in each lane. */ From d608da5a60ea921a8e67272ba0999a61a94507c7 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Fri, 17 May 2024 11:43:46 +0100 Subject: [PATCH 2/8] Remove arbitrary lane access from VLA code --- Source/UnitTest/test_simd.cpp | 1591 ++++++++++------- Source/astcenc_decompress_symbolic.cpp | 4 +- .../astcenc_ideal_endpoints_and_weights.cpp | 4 +- Source/astcenc_pick_best_endpoint_format.cpp | 2 +- Source/astcenc_vecmathlib_avx2_8.h | 36 +- 5 files changed, 964 insertions(+), 673 deletions(-) diff --git a/Source/UnitTest/test_simd.cpp b/Source/UnitTest/test_simd.cpp index 9424c995..60b8e40f 100644 --- a/Source/UnitTest/test_simd.cpp +++ b/Source/UnitTest/test_simd.cpp @@ -199,14 +199,18 @@ TEST(vfloat, ChangeSign) vfloat a(-1.0f, 1.0f, -3.12f, 3.12f, -1.0f, 1.0f, -3.12f, 3.12f); vfloat b(-1.0f, -1.0f, 3.12f, 3.12f, -1.0f, -1.0f, 3.12f, 3.12f); vfloat r = change_sign(a, b); - EXPECT_EQ(r.lane<0>(), 1.0f); - EXPECT_EQ(r.lane<1>(), -1.0f); - EXPECT_EQ(r.lane<2>(), -3.12f); - EXPECT_EQ(r.lane<3>(), 3.12f); - EXPECT_EQ(r.lane<4>(), 1.0f); - EXPECT_EQ(r.lane<5>(), -1.0f); - EXPECT_EQ(r.lane<6>(), -3.12f); - EXPECT_EQ(r.lane<7>(), 3.12f); + + alignas(32) float ra[8]; + storea(r, ra); + + EXPECT_EQ(ra[0], 1.0f); + EXPECT_EQ(ra[1], -1.0f); + EXPECT_EQ(ra[2], -3.12f); + EXPECT_EQ(ra[3], 3.12f); + EXPECT_EQ(ra[4], 1.0f); + EXPECT_EQ(ra[5], -1.0f); + EXPECT_EQ(ra[6], -3.12f); + EXPECT_EQ(ra[7], 3.12f); } /** @brief Test VLA atan. */ @@ -214,14 +218,18 @@ TEST(vfloat, Atan) { vfloat a(-0.15f, 0.0f, 0.9f, 2.1f, -0.15f, 0.0f, 0.9f, 2.1f); vfloat r = atan(a); - EXPECT_NEAR(r.lane<0>(), -0.149061f, 0.005f); - EXPECT_NEAR(r.lane<1>(), 0.000000f, 0.005f); - EXPECT_NEAR(r.lane<2>(), 0.733616f, 0.005f); - EXPECT_NEAR(r.lane<3>(), 1.123040f, 0.005f); - EXPECT_NEAR(r.lane<4>(), -0.149061f, 0.005f); - EXPECT_NEAR(r.lane<5>(), 0.000000f, 0.005f); - EXPECT_NEAR(r.lane<6>(), 0.733616f, 0.005f); - EXPECT_NEAR(r.lane<7>(), 1.123040f, 0.005f); + + alignas(32) float ra[8]; + storea(r, ra); + + EXPECT_NEAR(ra[0], -0.149061f, 0.005f); + EXPECT_NEAR(ra[1], 0.000000f, 0.005f); + EXPECT_NEAR(ra[2], 0.733616f, 0.005f); + EXPECT_NEAR(ra[3], 1.123040f, 0.005f); + EXPECT_NEAR(ra[4], -0.149061f, 0.005f); + EXPECT_NEAR(ra[5], 0.000000f, 0.005f); + EXPECT_NEAR(ra[6], 0.733616f, 0.005f); + EXPECT_NEAR(ra[7], 1.123040f, 0.005f); } /** @brief Test VLA atan2. */ @@ -230,14 +238,18 @@ TEST(vfloat, Atan2) vfloat a(-0.15f, 0.0f, 0.9f, 2.1f, -0.15f, 0.0f, 0.9f, 2.1f); vfloat b(1.15f, -3.0f, -0.9f, 1.1f, 1.15f, -3.0f, -0.9f, 1.1f); vfloat r = atan2(a, b); - EXPECT_NEAR(r.lane<0>(), -0.129816f, 0.005f); - EXPECT_NEAR(r.lane<1>(), 3.141592f, 0.005f); - EXPECT_NEAR(r.lane<2>(), 2.360342f, 0.005f); - EXPECT_NEAR(r.lane<3>(), 1.084357f, 0.005f); - EXPECT_NEAR(r.lane<4>(), -0.129816f, 0.005f); - EXPECT_NEAR(r.lane<5>(), 3.141592f, 0.005f); - EXPECT_NEAR(r.lane<6>(), 2.360342f, 0.005f); - EXPECT_NEAR(r.lane<7>(), 1.084357f, 0.005f); + + alignas(32) float ra[8]; + storea(r, ra); + + EXPECT_NEAR(ra[0], -0.129816f, 0.005f); + EXPECT_NEAR(ra[1], 3.141592f, 0.005f); + EXPECT_NEAR(ra[2], 2.360342f, 0.005f); + EXPECT_NEAR(ra[3], 1.084357f, 0.005f); + EXPECT_NEAR(ra[4], -0.129816f, 0.005f); + EXPECT_NEAR(ra[5], 3.141592f, 0.005f); + EXPECT_NEAR(ra[6], 2.360342f, 0.005f); + EXPECT_NEAR(ra[7], 1.084357f, 0.005f); } #endif @@ -2020,42 +2032,54 @@ TEST(vint4, interleave_rgba8) TEST(vfloat8, UnalignedLoad) { vfloat8 a(&(f32_data[1])); - EXPECT_EQ(a.lane<0>(), 1.0f); - EXPECT_EQ(a.lane<1>(), 2.0f); - EXPECT_EQ(a.lane<2>(), 3.0f); - EXPECT_EQ(a.lane<3>(), 4.0f); - EXPECT_EQ(a.lane<4>(), 5.0f); - EXPECT_EQ(a.lane<5>(), 6.0f); - EXPECT_EQ(a.lane<6>(), 7.0f); - EXPECT_EQ(a.lane<7>(), 8.0f); + + alignas(32) float ra[8]; + storea(a, ra); + + EXPECT_EQ(ra[0], 1.0f); + EXPECT_EQ(ra[1], 2.0f); + EXPECT_EQ(ra[2], 3.0f); + EXPECT_EQ(ra[3], 4.0f); + EXPECT_EQ(ra[4], 5.0f); + EXPECT_EQ(ra[5], 6.0f); + EXPECT_EQ(ra[6], 7.0f); + EXPECT_EQ(ra[7], 8.0f); } /** @brief Test scalar duplicated vfloat8 load. */ TEST(vfloat8, ScalarDupLoad) { vfloat8 a(1.1f); - EXPECT_EQ(a.lane<0>(), 1.1f); - EXPECT_EQ(a.lane<1>(), 1.1f); - EXPECT_EQ(a.lane<2>(), 1.1f); - EXPECT_EQ(a.lane<3>(), 1.1f); - EXPECT_EQ(a.lane<4>(), 1.1f); - EXPECT_EQ(a.lane<5>(), 1.1f); - EXPECT_EQ(a.lane<6>(), 1.1f); - EXPECT_EQ(a.lane<7>(), 1.1f); + + alignas(32) float ra[8]; + storea(a, ra); + + EXPECT_EQ(ra[0], 1.1f); + EXPECT_EQ(ra[1], 1.1f); + EXPECT_EQ(ra[2], 1.1f); + EXPECT_EQ(ra[3], 1.1f); + EXPECT_EQ(ra[4], 1.1f); + EXPECT_EQ(ra[5], 1.1f); + EXPECT_EQ(ra[6], 1.1f); + EXPECT_EQ(ra[7], 1.1f); } /** @brief Test scalar vfloat8 load. */ TEST(vfloat8, ScalarLoad) { vfloat8 a(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f); - EXPECT_EQ(a.lane<0>(), 1.1f); - EXPECT_EQ(a.lane<1>(), 2.2f); - EXPECT_EQ(a.lane<2>(), 3.3f); - EXPECT_EQ(a.lane<3>(), 4.4f); - EXPECT_EQ(a.lane<4>(), 5.5f); - EXPECT_EQ(a.lane<5>(), 6.6f); - EXPECT_EQ(a.lane<6>(), 7.7f); - EXPECT_EQ(a.lane<7>(), 8.8f); + + alignas(32) float ra[8]; + storea(a, ra); + + EXPECT_EQ(ra[0], 1.1f); + EXPECT_EQ(ra[1], 2.2f); + EXPECT_EQ(ra[2], 3.3f); + EXPECT_EQ(ra[3], 4.4f); + EXPECT_EQ(ra[4], 5.5f); + EXPECT_EQ(ra[5], 6.6f); + EXPECT_EQ(ra[6], 7.7f); + EXPECT_EQ(ra[7], 8.8f); } /** @brief Test copy vfloat8 load. */ @@ -2063,28 +2087,36 @@ TEST(vfloat8, CopyLoad) { vfloat8 s(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f); vfloat8 a(s.m); - EXPECT_EQ(a.lane<0>(), 1.1f); - EXPECT_EQ(a.lane<1>(), 2.2f); - EXPECT_EQ(a.lane<2>(), 3.3f); - EXPECT_EQ(a.lane<3>(), 4.4f); - EXPECT_EQ(a.lane<4>(), 5.5f); - EXPECT_EQ(a.lane<5>(), 6.6f); - EXPECT_EQ(a.lane<6>(), 7.7f); - EXPECT_EQ(a.lane<7>(), 8.8f); + + alignas(32) float ra[8]; + storea(a, ra); + + EXPECT_EQ(ra[0], 1.1f); + EXPECT_EQ(ra[1], 2.2f); + EXPECT_EQ(ra[2], 3.3f); + EXPECT_EQ(ra[3], 4.4f); + EXPECT_EQ(ra[4], 5.5f); + EXPECT_EQ(ra[5], 6.6f); + EXPECT_EQ(ra[6], 7.7f); + EXPECT_EQ(ra[7], 8.8f); } /** @brief Test vfloat8 zero. */ TEST(vfloat8, Zero) { vfloat8 a = vfloat8::zero(); - EXPECT_EQ(a.lane<0>(), 0.0f); - EXPECT_EQ(a.lane<1>(), 0.0f); - EXPECT_EQ(a.lane<2>(), 0.0f); - EXPECT_EQ(a.lane<3>(), 0.0f); - EXPECT_EQ(a.lane<4>(), 0.0f); - EXPECT_EQ(a.lane<5>(), 0.0f); - EXPECT_EQ(a.lane<6>(), 0.0f); - EXPECT_EQ(a.lane<7>(), 0.0f); + + alignas(32) float ra[8]; + storea(a, ra); + + EXPECT_EQ(ra[0], 0.0f); + EXPECT_EQ(ra[1], 0.0f); + EXPECT_EQ(ra[2], 0.0f); + EXPECT_EQ(ra[3], 0.0f); + EXPECT_EQ(ra[4], 0.0f); + EXPECT_EQ(ra[5], 0.0f); + EXPECT_EQ(ra[6], 0.0f); + EXPECT_EQ(ra[7], 0.0f); } /** @brief Test vfloat8 load1. */ @@ -2092,42 +2124,54 @@ TEST(vfloat8, Load1) { float s = 3.14f; vfloat8 a = vfloat8::load1(&s); - EXPECT_EQ(a.lane<0>(), 3.14f); - EXPECT_EQ(a.lane<1>(), 3.14f); - EXPECT_EQ(a.lane<2>(), 3.14f); - EXPECT_EQ(a.lane<3>(), 3.14f); - EXPECT_EQ(a.lane<4>(), 3.14f); - EXPECT_EQ(a.lane<5>(), 3.14f); - EXPECT_EQ(a.lane<6>(), 3.14f); - EXPECT_EQ(a.lane<7>(), 3.14f); + + alignas(32) float ra[8]; + storea(a, ra); + + EXPECT_EQ(ra[0], 3.14f); + EXPECT_EQ(ra[1], 3.14f); + EXPECT_EQ(ra[2], 3.14f); + EXPECT_EQ(ra[3], 3.14f); + EXPECT_EQ(ra[4], 3.14f); + EXPECT_EQ(ra[5], 3.14f); + EXPECT_EQ(ra[6], 3.14f); + EXPECT_EQ(ra[7], 3.14f); } /** @brief Test vfloat8 loada. */ TEST(vfloat8, Loada) { vfloat8 a = vfloat8::loada(&(f32_data[0])); - EXPECT_EQ(a.lane<0>(), 0.0f); - EXPECT_EQ(a.lane<1>(), 1.0f); - EXPECT_EQ(a.lane<2>(), 2.0f); - EXPECT_EQ(a.lane<3>(), 3.0f); - EXPECT_EQ(a.lane<4>(), 4.0f); - EXPECT_EQ(a.lane<5>(), 5.0f); - EXPECT_EQ(a.lane<6>(), 6.0f); - EXPECT_EQ(a.lane<7>(), 7.0f); + + alignas(32) float ra[8]; + storea(a, ra); + + EXPECT_EQ(ra[0], 0.0f); + EXPECT_EQ(ra[1], 1.0f); + EXPECT_EQ(ra[2], 2.0f); + EXPECT_EQ(ra[3], 3.0f); + EXPECT_EQ(ra[4], 4.0f); + EXPECT_EQ(ra[5], 5.0f); + EXPECT_EQ(ra[6], 6.0f); + EXPECT_EQ(ra[7], 7.0f); } /** @brief Test vfloat8 lane_id. */ TEST(vfloat8, LaneID) { vfloat8 a = vfloat8::lane_id(); - EXPECT_EQ(a.lane<0>(), 0.0f); - EXPECT_EQ(a.lane<1>(), 1.0f); - EXPECT_EQ(a.lane<2>(), 2.0f); - EXPECT_EQ(a.lane<3>(), 3.0f); - EXPECT_EQ(a.lane<4>(), 4.0f); - EXPECT_EQ(a.lane<5>(), 5.0f); - EXPECT_EQ(a.lane<6>(), 6.0f); - EXPECT_EQ(a.lane<7>(), 7.0f); + + alignas(32) float ra[8]; + storea(a, ra); + + EXPECT_EQ(ra[0], 0.0f); + EXPECT_EQ(ra[1], 1.0f); + EXPECT_EQ(ra[2], 2.0f); + EXPECT_EQ(ra[3], 3.0f); + EXPECT_EQ(ra[4], 4.0f); + EXPECT_EQ(ra[5], 5.0f); + EXPECT_EQ(ra[6], 6.0f); + EXPECT_EQ(ra[7], 7.0f); } /** @brief Test vfloat8 add. */ @@ -2136,14 +2180,18 @@ TEST(vfloat8, vadd) vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); a = a + b; - EXPECT_EQ(a.lane<0>(), 1.0f + 0.1f); - EXPECT_EQ(a.lane<1>(), 2.0f + 0.2f); - EXPECT_EQ(a.lane<2>(), 3.0f + 0.3f); - EXPECT_EQ(a.lane<3>(), 4.0f + 0.4f); - EXPECT_EQ(a.lane<4>(), 5.0f + 0.5f); - EXPECT_EQ(a.lane<5>(), 6.0f + 0.6f); - EXPECT_EQ(a.lane<6>(), 7.0f + 0.7f); - EXPECT_EQ(a.lane<7>(), 8.0f + 0.8f); + + alignas(32) float ra[8]; + storea(a, ra); + + EXPECT_EQ(ra[0], 1.0f + 0.1f); + EXPECT_EQ(ra[1], 2.0f + 0.2f); + EXPECT_EQ(ra[2], 3.0f + 0.3f); + EXPECT_EQ(ra[3], 4.0f + 0.4f); + EXPECT_EQ(ra[4], 5.0f + 0.5f); + EXPECT_EQ(ra[5], 6.0f + 0.6f); + EXPECT_EQ(ra[6], 7.0f + 0.7f); + EXPECT_EQ(ra[7], 8.0f + 0.8f); } /** @brief Test vfloat8 sub. */ @@ -2152,14 +2200,18 @@ TEST(vfloat8, vsub) vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); a = a - b; - EXPECT_EQ(a.lane<0>(), 1.0f - 0.1f); - EXPECT_EQ(a.lane<1>(), 2.0f - 0.2f); - EXPECT_EQ(a.lane<2>(), 3.0f - 0.3f); - EXPECT_EQ(a.lane<3>(), 4.0f - 0.4f); - EXPECT_EQ(a.lane<4>(), 5.0f - 0.5f); - EXPECT_EQ(a.lane<5>(), 6.0f - 0.6f); - EXPECT_EQ(a.lane<6>(), 7.0f - 0.7f); - EXPECT_EQ(a.lane<7>(), 8.0f - 0.8f); + + alignas(32) float ra[8]; + storea(a, ra); + + EXPECT_EQ(ra[0], 1.0f - 0.1f); + EXPECT_EQ(ra[1], 2.0f - 0.2f); + EXPECT_EQ(ra[2], 3.0f - 0.3f); + EXPECT_EQ(ra[3], 4.0f - 0.4f); + EXPECT_EQ(ra[4], 5.0f - 0.5f); + EXPECT_EQ(ra[5], 6.0f - 0.6f); + EXPECT_EQ(ra[6], 7.0f - 0.7f); + EXPECT_EQ(ra[7], 8.0f - 0.8f); } /** @brief Test vfloat8 mul. */ @@ -2168,14 +2220,18 @@ TEST(vfloat8, vmul) vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); a = a * b; - EXPECT_EQ(a.lane<0>(), 1.0f * 0.1f); - EXPECT_EQ(a.lane<1>(), 2.0f * 0.2f); - EXPECT_EQ(a.lane<2>(), 3.0f * 0.3f); - EXPECT_EQ(a.lane<3>(), 4.0f * 0.4f); - EXPECT_EQ(a.lane<4>(), 5.0f * 0.5f); - EXPECT_EQ(a.lane<5>(), 6.0f * 0.6f); - EXPECT_EQ(a.lane<6>(), 7.0f * 0.7f); - EXPECT_EQ(a.lane<7>(), 8.0f * 0.8f); + + alignas(32) float ra[8]; + storea(a, ra); + + EXPECT_EQ(ra[0], 1.0f * 0.1f); + EXPECT_EQ(ra[1], 2.0f * 0.2f); + EXPECT_EQ(ra[2], 3.0f * 0.3f); + EXPECT_EQ(ra[3], 4.0f * 0.4f); + EXPECT_EQ(ra[4], 5.0f * 0.5f); + EXPECT_EQ(ra[5], 6.0f * 0.6f); + EXPECT_EQ(ra[6], 7.0f * 0.7f); + EXPECT_EQ(ra[7], 8.0f * 0.8f); } /** @brief Test vfloat8 mul. */ @@ -2184,14 +2240,18 @@ TEST(vfloat8, vsmul) vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); float b = 3.14f; a = a * b; - EXPECT_EQ(a.lane<0>(), 1.0f * 3.14f); - EXPECT_EQ(a.lane<1>(), 2.0f * 3.14f); - EXPECT_EQ(a.lane<2>(), 3.0f * 3.14f); - EXPECT_EQ(a.lane<3>(), 4.0f * 3.14f); - EXPECT_EQ(a.lane<4>(), 5.0f * 3.14f); - EXPECT_EQ(a.lane<5>(), 6.0f * 3.14f); - EXPECT_EQ(a.lane<6>(), 7.0f * 3.14f); - EXPECT_EQ(a.lane<7>(), 8.0f * 3.14f); + + alignas(32) float ra[8]; + storea(a, ra); + + EXPECT_EQ(ra[0], 1.0f * 3.14f); + EXPECT_EQ(ra[1], 2.0f * 3.14f); + EXPECT_EQ(ra[2], 3.0f * 3.14f); + EXPECT_EQ(ra[3], 4.0f * 3.14f); + EXPECT_EQ(ra[4], 5.0f * 3.14f); + EXPECT_EQ(ra[5], 6.0f * 3.14f); + EXPECT_EQ(ra[6], 7.0f * 3.14f); + EXPECT_EQ(ra[7], 8.0f * 3.14f); } /** @brief Test vfloat8 mul. */ @@ -2200,14 +2260,18 @@ TEST(vfloat8, svmul) float a = 3.14f; vfloat8 b(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); b = a * b; - EXPECT_EQ(b.lane<0>(), 3.14f * 1.0f); - EXPECT_EQ(b.lane<1>(), 3.14f * 2.0f); - EXPECT_EQ(b.lane<2>(), 3.14f * 3.0f); - EXPECT_EQ(b.lane<3>(), 3.14f * 4.0f); - EXPECT_EQ(b.lane<4>(), 3.14f * 5.0f); - EXPECT_EQ(b.lane<5>(), 3.14f * 6.0f); - EXPECT_EQ(b.lane<6>(), 3.14f * 7.0f); - EXPECT_EQ(b.lane<7>(), 3.14f * 8.0f); + + alignas(32) float ra[8]; + storea(b, ra); + + EXPECT_EQ(ra[0], 3.14f * 1.0f); + EXPECT_EQ(ra[1], 3.14f * 2.0f); + EXPECT_EQ(ra[2], 3.14f * 3.0f); + EXPECT_EQ(ra[3], 3.14f * 4.0f); + EXPECT_EQ(ra[4], 3.14f * 5.0f); + EXPECT_EQ(ra[5], 3.14f * 6.0f); + EXPECT_EQ(ra[6], 3.14f * 7.0f); + EXPECT_EQ(ra[7], 3.14f * 8.0f); } /** @brief Test vfloat8 div. */ @@ -2216,14 +2280,18 @@ TEST(vfloat8, vdiv) vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); a = a / b; - EXPECT_EQ(a.lane<0>(), 1.0f / 0.1f); - EXPECT_EQ(a.lane<1>(), 2.0f / 0.2f); - EXPECT_EQ(a.lane<2>(), 3.0f / 0.3f); - EXPECT_EQ(a.lane<3>(), 4.0f / 0.4f); - EXPECT_EQ(a.lane<4>(), 5.0f / 0.5f); - EXPECT_EQ(a.lane<5>(), 6.0f / 0.6f); - EXPECT_EQ(a.lane<6>(), 7.0f / 0.7f); - EXPECT_EQ(a.lane<7>(), 8.0f / 0.8f); + + alignas(32) float ra[8]; + storea(a, ra); + + EXPECT_EQ(ra[0], 1.0f / 0.1f); + EXPECT_EQ(ra[1], 2.0f / 0.2f); + EXPECT_EQ(ra[2], 3.0f / 0.3f); + EXPECT_EQ(ra[3], 4.0f / 0.4f); + EXPECT_EQ(ra[4], 5.0f / 0.5f); + EXPECT_EQ(ra[5], 6.0f / 0.6f); + EXPECT_EQ(ra[6], 7.0f / 0.7f); + EXPECT_EQ(ra[7], 8.0f / 0.8f); } /** @brief Test vfloat8 div. */ @@ -2233,14 +2301,17 @@ TEST(vfloat8, vsdiv) float b = 3.14f; vfloat8 r = a / b; - EXPECT_EQ(r.lane<0>(), 0.1f / 3.14f); - EXPECT_EQ(r.lane<1>(), 0.2f / 3.14f); - EXPECT_EQ(r.lane<2>(), 0.3f / 3.14f); - EXPECT_EQ(r.lane<3>(), 0.4f / 3.14f); - EXPECT_EQ(r.lane<4>(), 0.5f / 3.14f); - EXPECT_EQ(r.lane<5>(), 0.6f / 3.14f); - EXPECT_EQ(r.lane<6>(), 0.7f / 3.14f); - EXPECT_EQ(r.lane<7>(), 0.8f / 3.14f); + alignas(32) float ra[8]; + storea(r, ra); + + EXPECT_EQ(ra[0], 0.1f / 3.14f); + EXPECT_EQ(ra[1], 0.2f / 3.14f); + EXPECT_EQ(ra[2], 0.3f / 3.14f); + EXPECT_EQ(ra[3], 0.4f / 3.14f); + EXPECT_EQ(ra[4], 0.5f / 3.14f); + EXPECT_EQ(ra[5], 0.6f / 3.14f); + EXPECT_EQ(ra[6], 0.7f / 3.14f); + EXPECT_EQ(ra[7], 0.8f / 3.14f); } /** @brief Test vfloat8 div. */ @@ -2250,14 +2321,17 @@ TEST(vfloat8, svdiv) vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); vfloat8 r = a / b; - EXPECT_EQ(r.lane<0>(), 3.14f / 0.1f); - EXPECT_EQ(r.lane<1>(), 3.14f / 0.2f); - EXPECT_EQ(r.lane<2>(), 3.14f / 0.3f); - EXPECT_EQ(r.lane<3>(), 3.14f / 0.4f); - EXPECT_EQ(r.lane<4>(), 3.14f / 0.5f); - EXPECT_EQ(r.lane<5>(), 3.14f / 0.6f); - EXPECT_EQ(r.lane<6>(), 3.14f / 0.7f); - EXPECT_EQ(r.lane<7>(), 3.14f / 0.8f); + alignas(32) float ra[8]; + storea(r, ra); + + EXPECT_EQ(ra[0], 3.14f / 0.1f); + EXPECT_EQ(ra[1], 3.14f / 0.2f); + EXPECT_EQ(ra[2], 3.14f / 0.3f); + EXPECT_EQ(ra[3], 3.14f / 0.4f); + EXPECT_EQ(ra[4], 3.14f / 0.5f); + EXPECT_EQ(ra[5], 3.14f / 0.6f); + EXPECT_EQ(ra[6], 3.14f / 0.7f); + EXPECT_EQ(ra[7], 3.14f / 0.8f); } /** @brief Test vfloat8 ceq. */ @@ -2364,14 +2438,18 @@ TEST(vfloat8, min) vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); vfloat8 r = min(a, b); - EXPECT_EQ(r.lane<0>(), 0.9f); - EXPECT_EQ(r.lane<1>(), 2.0f); - EXPECT_EQ(r.lane<2>(), 3.0f); - EXPECT_EQ(r.lane<3>(), 4.0f); - EXPECT_EQ(r.lane<4>(), 0.9f); - EXPECT_EQ(r.lane<5>(), 2.0f); - EXPECT_EQ(r.lane<6>(), 3.0f); - EXPECT_EQ(r.lane<7>(), 4.0f); + + alignas(32) float ra[8]; + storea(r, ra); + + EXPECT_EQ(ra[0], 0.9f); + EXPECT_EQ(ra[1], 2.0f); + EXPECT_EQ(ra[2], 3.0f); + EXPECT_EQ(ra[3], 4.0f); + EXPECT_EQ(ra[4], 0.9f); + EXPECT_EQ(ra[5], 2.0f); + EXPECT_EQ(ra[6], 3.0f); + EXPECT_EQ(ra[7], 4.0f); } /** @brief Test vfloat8 max. */ @@ -2380,14 +2458,18 @@ TEST(vfloat8, max) vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); vfloat8 r = max(a, b); - EXPECT_EQ(r.lane<0>(), 1.0f); - EXPECT_EQ(r.lane<1>(), 2.1f); - EXPECT_EQ(r.lane<2>(), 3.0f); - EXPECT_EQ(r.lane<3>(), 4.1f); - EXPECT_EQ(r.lane<4>(), 1.0f); - EXPECT_EQ(r.lane<5>(), 2.1f); - EXPECT_EQ(r.lane<6>(), 3.0f); - EXPECT_EQ(r.lane<7>(), 4.1f); + + alignas(32) float ra[8]; + storea(r, ra); + + EXPECT_EQ(ra[0], 1.0f); + EXPECT_EQ(ra[1], 2.1f); + EXPECT_EQ(ra[2], 3.0f); + EXPECT_EQ(ra[3], 4.1f); + EXPECT_EQ(ra[4], 1.0f); + EXPECT_EQ(ra[5], 2.1f); + EXPECT_EQ(ra[6], 3.0f); + EXPECT_EQ(ra[7], 4.1f); } /** @brief Test vfloat8 clamp. */ @@ -2395,25 +2477,32 @@ TEST(vfloat8, clamp) { vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); vfloat8 r1 = clamp(2.1f, 3.0f, a1); - EXPECT_EQ(r1.lane<0>(), 2.1f); - EXPECT_EQ(r1.lane<1>(), 2.1f); - EXPECT_EQ(r1.lane<2>(), 3.0f); - EXPECT_EQ(r1.lane<3>(), 3.0f); - EXPECT_EQ(r1.lane<4>(), 2.1f); - EXPECT_EQ(r1.lane<5>(), 2.1f); - EXPECT_EQ(r1.lane<6>(), 3.0f); - EXPECT_EQ(r1.lane<7>(), 3.0f); + + alignas(32) float ra[8]; + storea(r1, ra); + + EXPECT_EQ(ra[0], 2.1f); + EXPECT_EQ(ra[1], 2.1f); + EXPECT_EQ(ra[2], 3.0f); + EXPECT_EQ(ra[3], 3.0f); + EXPECT_EQ(ra[4], 2.1f); + EXPECT_EQ(ra[5], 2.1f); + EXPECT_EQ(ra[6], 3.0f); + EXPECT_EQ(ra[7], 3.0f); vfloat8 a2(1.0f, 2.0f, qnan, 4.0f, 1.0f, 2.0f, qnan, 4.0f); vfloat8 r2 = clamp(2.1f, 3.0f, a2); - EXPECT_EQ(r2.lane<0>(), 2.1f); - EXPECT_EQ(r2.lane<1>(), 2.1f); - EXPECT_EQ(r2.lane<2>(), 2.1f); - EXPECT_EQ(r2.lane<3>(), 3.0f); - EXPECT_EQ(r2.lane<4>(), 2.1f); - EXPECT_EQ(r2.lane<5>(), 2.1f); - EXPECT_EQ(r2.lane<6>(), 2.1f); - EXPECT_EQ(r2.lane<7>(), 3.0f); + + storea(r2, ra); + + EXPECT_EQ(ra[0], 2.1f); + EXPECT_EQ(ra[1], 2.1f); + EXPECT_EQ(ra[2], 2.1f); + EXPECT_EQ(ra[3], 3.0f); + EXPECT_EQ(ra[4], 2.1f); + EXPECT_EQ(ra[5], 2.1f); + EXPECT_EQ(ra[6], 2.1f); + EXPECT_EQ(ra[7], 3.0f); } /** @brief Test vfloat8 clampz. */ @@ -2421,25 +2510,32 @@ TEST(vfloat8, clampz) { vfloat8 a1(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f); vfloat8 r1 = clampz(3.0f, a1); - EXPECT_EQ(r1.lane<0>(), 0.0f); - EXPECT_EQ(r1.lane<1>(), 0.0f); - EXPECT_EQ(r1.lane<2>(), 0.1f); - EXPECT_EQ(r1.lane<3>(), 3.0f); - EXPECT_EQ(r1.lane<4>(), 0.0f); - EXPECT_EQ(r1.lane<5>(), 0.0f); - EXPECT_EQ(r1.lane<6>(), 0.1f); - EXPECT_EQ(r1.lane<7>(), 3.0f); + + alignas(32) float ra[8]; + storea(r1, ra); + + EXPECT_EQ(ra[0], 0.0f); + EXPECT_EQ(ra[1], 0.0f); + EXPECT_EQ(ra[2], 0.1f); + EXPECT_EQ(ra[3], 3.0f); + EXPECT_EQ(ra[4], 0.0f); + EXPECT_EQ(ra[5], 0.0f); + EXPECT_EQ(ra[6], 0.1f); + EXPECT_EQ(ra[7], 3.0f); vfloat8 a2(-1.0f, 0.0f, qnan, 4.0f, -1.0f, 0.0f, qnan, 4.0f); vfloat8 r2 = clampz(3.0f, a2); - EXPECT_EQ(r2.lane<0>(), 0.0f); - EXPECT_EQ(r2.lane<1>(), 0.0f); - EXPECT_EQ(r2.lane<2>(), 0.0f); - EXPECT_EQ(r2.lane<3>(), 3.0f); - EXPECT_EQ(r2.lane<4>(), 0.0f); - EXPECT_EQ(r2.lane<5>(), 0.0f); - EXPECT_EQ(r2.lane<6>(), 0.0f); - EXPECT_EQ(r2.lane<7>(), 3.0f); + + storea(r2, ra); + + EXPECT_EQ(ra[0], 0.0f); + EXPECT_EQ(ra[1], 0.0f); + EXPECT_EQ(ra[2], 0.0f); + EXPECT_EQ(ra[3], 3.0f); + EXPECT_EQ(ra[4], 0.0f); + EXPECT_EQ(ra[5], 0.0f); + EXPECT_EQ(ra[6], 0.0f); + EXPECT_EQ(ra[7], 3.0f); } /** @brief Test vfloat8 clampz. */ @@ -2447,25 +2543,32 @@ TEST(vfloat8, clampzo) { vfloat8 a1(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f); vfloat8 r1 = clampzo(a1); - EXPECT_EQ(r1.lane<0>(), 0.0f); - EXPECT_EQ(r1.lane<1>(), 0.0f); - EXPECT_EQ(r1.lane<2>(), 0.1f); - EXPECT_EQ(r1.lane<3>(), 1.0f); - EXPECT_EQ(r1.lane<4>(), 0.0f); - EXPECT_EQ(r1.lane<5>(), 0.0f); - EXPECT_EQ(r1.lane<6>(), 0.1f); - EXPECT_EQ(r1.lane<7>(), 1.0f); + + alignas(32) float ra[8]; + storea(r1, ra); + + EXPECT_EQ(ra[0], 0.0f); + EXPECT_EQ(ra[1], 0.0f); + EXPECT_EQ(ra[2], 0.1f); + EXPECT_EQ(ra[3], 1.0f); + EXPECT_EQ(ra[4], 0.0f); + EXPECT_EQ(ra[5], 0.0f); + EXPECT_EQ(ra[6], 0.1f); + EXPECT_EQ(ra[7], 1.0f); vfloat8 a2(-1.0f, 0.0f, qnan, 4.0f, -1.0f, 0.0f, qnan, 4.0f); vfloat8 r2 = clampzo(a2); - EXPECT_EQ(r2.lane<0>(), 0.0f); - EXPECT_EQ(r2.lane<1>(), 0.0f); - EXPECT_EQ(r2.lane<2>(), 0.0f); - EXPECT_EQ(r2.lane<3>(), 1.0f); - EXPECT_EQ(r2.lane<4>(), 0.0f); - EXPECT_EQ(r2.lane<5>(), 0.0f); - EXPECT_EQ(r2.lane<6>(), 0.0f); - EXPECT_EQ(r2.lane<7>(), 1.0f); + + storea(r2, ra); + + EXPECT_EQ(ra[0], 0.0f); + EXPECT_EQ(ra[1], 0.0f); + EXPECT_EQ(ra[2], 0.0f); + EXPECT_EQ(ra[3], 1.0f); + EXPECT_EQ(ra[4], 0.0f); + EXPECT_EQ(ra[5], 0.0f); + EXPECT_EQ(ra[6], 0.0f); + EXPECT_EQ(ra[7], 1.0f); } /** @brief Test vfloat8 abs. */ @@ -2473,14 +2576,18 @@ TEST(vfloat8, abs) { vfloat8 a(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f); vfloat8 r = abs(a); - EXPECT_EQ(r.lane<0>(), 1.0f); - EXPECT_EQ(r.lane<1>(), 0.0f); - EXPECT_EQ(r.lane<2>(), 0.1f); - EXPECT_EQ(r.lane<3>(), 4.0f); - EXPECT_EQ(r.lane<4>(), 1.0f); - EXPECT_EQ(r.lane<5>(), 0.0f); - EXPECT_EQ(r.lane<6>(), 0.1f); - EXPECT_EQ(r.lane<7>(), 4.0f); + + alignas(32) float ra[8]; + storea(r, ra); + + EXPECT_EQ(ra[0], 1.0f); + EXPECT_EQ(ra[1], 0.0f); + EXPECT_EQ(ra[2], 0.1f); + EXPECT_EQ(ra[3], 4.0f); + EXPECT_EQ(ra[4], 1.0f); + EXPECT_EQ(ra[5], 0.0f); + EXPECT_EQ(ra[6], 0.1f); + EXPECT_EQ(ra[7], 4.0f); } /** @brief Test vfloat8 round. */ @@ -2488,14 +2595,18 @@ TEST(vfloat8, round) { vfloat8 a(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); vfloat8 r = round(a); - EXPECT_EQ(r.lane<0>(), 1.0f); - EXPECT_EQ(r.lane<1>(), 2.0f); - EXPECT_EQ(r.lane<2>(), 2.0f); - EXPECT_EQ(r.lane<3>(), 4.0f); - EXPECT_EQ(r.lane<4>(), 1.0f); - EXPECT_EQ(r.lane<5>(), 2.0f); - EXPECT_EQ(r.lane<6>(), 2.0f); - EXPECT_EQ(r.lane<7>(), 4.0f); + + alignas(32) float ra[8]; + storea(r, ra); + + EXPECT_EQ(ra[0], 1.0f); + EXPECT_EQ(ra[1], 2.0f); + EXPECT_EQ(ra[2], 2.0f); + EXPECT_EQ(ra[3], 4.0f); + EXPECT_EQ(ra[4], 1.0f); + EXPECT_EQ(ra[5], 2.0f); + EXPECT_EQ(ra[6], 2.0f); + EXPECT_EQ(ra[7], 4.0f); } /** @brief Test vfloat8 hmin. */ @@ -2503,25 +2614,32 @@ TEST(vfloat8, hmin) { vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); vfloat8 r1 = hmin(a1); - EXPECT_EQ(r1.lane<0>(), 1.1f); - EXPECT_EQ(r1.lane<1>(), 1.1f); - EXPECT_EQ(r1.lane<2>(), 1.1f); - EXPECT_EQ(r1.lane<3>(), 1.1f); - EXPECT_EQ(r1.lane<4>(), 1.1f); - EXPECT_EQ(r1.lane<5>(), 1.1f); - EXPECT_EQ(r1.lane<6>(), 1.1f); - EXPECT_EQ(r1.lane<7>(), 1.1f); + + alignas(32) float ra[8]; + storea(r1, ra); + + EXPECT_EQ(ra[0], 1.1f); + EXPECT_EQ(ra[1], 1.1f); + EXPECT_EQ(ra[2], 1.1f); + EXPECT_EQ(ra[3], 1.1f); + EXPECT_EQ(ra[4], 1.1f); + EXPECT_EQ(ra[5], 1.1f); + EXPECT_EQ(ra[6], 1.1f); + EXPECT_EQ(ra[7], 1.1f); vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f); vfloat8 r2 = hmin(a2); - EXPECT_EQ(r2.lane<0>(), 0.2f); - EXPECT_EQ(r2.lane<1>(), 0.2f); - EXPECT_EQ(r2.lane<2>(), 0.2f); - EXPECT_EQ(r2.lane<3>(), 0.2f); - EXPECT_EQ(r2.lane<4>(), 0.2f); - EXPECT_EQ(r2.lane<5>(), 0.2f); - EXPECT_EQ(r2.lane<6>(), 0.2f); - EXPECT_EQ(r2.lane<7>(), 0.2f); + + storea(r2, ra); + + EXPECT_EQ(ra[0], 0.2f); + EXPECT_EQ(ra[1], 0.2f); + EXPECT_EQ(ra[2], 0.2f); + EXPECT_EQ(ra[3], 0.2f); + EXPECT_EQ(ra[4], 0.2f); + EXPECT_EQ(ra[5], 0.2f); + EXPECT_EQ(ra[6], 0.2f); + EXPECT_EQ(ra[7], 0.2f); } /** @brief Test vfloat8 hmin_s. */ @@ -2541,25 +2659,32 @@ TEST(vfloat8, hmax) { vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); vfloat8 r1 = hmax(a1); - EXPECT_EQ(r1.lane<0>(), 4.0f); - EXPECT_EQ(r1.lane<1>(), 4.0f); - EXPECT_EQ(r1.lane<2>(), 4.0f); - EXPECT_EQ(r1.lane<3>(), 4.0f); - EXPECT_EQ(r1.lane<4>(), 4.0f); - EXPECT_EQ(r1.lane<5>(), 4.0f); - EXPECT_EQ(r1.lane<6>(), 4.0f); - EXPECT_EQ(r1.lane<7>(), 4.0f); + + alignas(32) float ra[8]; + storea(r1, ra); + + EXPECT_EQ(ra[0], 4.0f); + EXPECT_EQ(ra[1], 4.0f); + EXPECT_EQ(ra[2], 4.0f); + EXPECT_EQ(ra[3], 4.0f); + EXPECT_EQ(ra[4], 4.0f); + EXPECT_EQ(ra[5], 4.0f); + EXPECT_EQ(ra[6], 4.0f); + EXPECT_EQ(ra[7], 4.0f); vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f); vfloat8 r2 = hmax(a2); - EXPECT_EQ(r2.lane<0>(), 1.6f); - EXPECT_EQ(r2.lane<1>(), 1.6f); - EXPECT_EQ(r2.lane<2>(), 1.6f); - EXPECT_EQ(r2.lane<3>(), 1.6f); - EXPECT_EQ(r2.lane<4>(), 1.6f); - EXPECT_EQ(r2.lane<5>(), 1.6f); - EXPECT_EQ(r2.lane<6>(), 1.6f); - EXPECT_EQ(r2.lane<7>(), 1.6f); + + storea(r2, ra); + + EXPECT_EQ(ra[0], 1.6f); + EXPECT_EQ(ra[1], 1.6f); + EXPECT_EQ(ra[2], 1.6f); + EXPECT_EQ(ra[3], 1.6f); + EXPECT_EQ(ra[4], 1.6f); + EXPECT_EQ(ra[5], 1.6f); + EXPECT_EQ(ra[6], 1.6f); + EXPECT_EQ(ra[7], 1.6f); } /** @brief Test vfloat8 hmax_s. */ @@ -2588,14 +2713,18 @@ TEST(vfloat8, sqrt) { vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); vfloat8 r = sqrt(a); - EXPECT_EQ(r.lane<0>(), std::sqrt(1.0f)); - EXPECT_EQ(r.lane<1>(), std::sqrt(2.0f)); - EXPECT_EQ(r.lane<2>(), std::sqrt(3.0f)); - EXPECT_EQ(r.lane<3>(), std::sqrt(4.0f)); - EXPECT_EQ(r.lane<4>(), std::sqrt(1.0f)); - EXPECT_EQ(r.lane<5>(), std::sqrt(2.0f)); - EXPECT_EQ(r.lane<6>(), std::sqrt(3.0f)); - EXPECT_EQ(r.lane<7>(), std::sqrt(4.0f)); + + alignas(32) float ra[8]; + storea(r, ra); + + EXPECT_EQ(ra[0], std::sqrt(1.0f)); + EXPECT_EQ(ra[1], std::sqrt(2.0f)); + EXPECT_EQ(ra[2], std::sqrt(3.0f)); + EXPECT_EQ(ra[3], std::sqrt(4.0f)); + EXPECT_EQ(ra[4], std::sqrt(1.0f)); + EXPECT_EQ(ra[5], std::sqrt(2.0f)); + EXPECT_EQ(ra[6], std::sqrt(3.0f)); + EXPECT_EQ(ra[7], std::sqrt(4.0f)); } /** @brief Test vfloat8 select. */ @@ -2610,25 +2739,32 @@ TEST(vfloat8, select) // Select in one direction vfloat8 r1 = select(a, b, cond); - EXPECT_EQ(r1.lane<0>(), 4.0f); - EXPECT_EQ(r1.lane<1>(), 3.0f); - EXPECT_EQ(r1.lane<2>(), 2.0f); - EXPECT_EQ(r1.lane<3>(), 1.0f); - EXPECT_EQ(r1.lane<4>(), 4.0f); - EXPECT_EQ(r1.lane<5>(), 3.0f); - EXPECT_EQ(r1.lane<6>(), 2.0f); - EXPECT_EQ(r1.lane<7>(), 1.0f); + + alignas(32) float ra[8]; + storea(r1, ra); + + EXPECT_EQ(ra[0], 4.0f); + EXPECT_EQ(ra[1], 3.0f); + EXPECT_EQ(ra[2], 2.0f); + EXPECT_EQ(ra[3], 1.0f); + EXPECT_EQ(ra[4], 4.0f); + EXPECT_EQ(ra[5], 3.0f); + EXPECT_EQ(ra[6], 2.0f); + EXPECT_EQ(ra[7], 1.0f); // Select in the other vfloat8 r2 = select(b, a, cond); - EXPECT_EQ(r2.lane<0>(), 1.0f); - EXPECT_EQ(r2.lane<1>(), 2.0f); - EXPECT_EQ(r2.lane<2>(), 3.0f); - EXPECT_EQ(r2.lane<3>(), 4.0f); - EXPECT_EQ(r2.lane<4>(), 1.0f); - EXPECT_EQ(r2.lane<5>(), 2.0f); - EXPECT_EQ(r2.lane<6>(), 3.0f); - EXPECT_EQ(r2.lane<7>(), 4.0f); + + storea(r2, ra); + + EXPECT_EQ(ra[0], 1.0f); + EXPECT_EQ(ra[1], 2.0f); + EXPECT_EQ(ra[2], 3.0f); + EXPECT_EQ(ra[3], 4.0f); + EXPECT_EQ(ra[4], 1.0f); + EXPECT_EQ(ra[5], 2.0f); + EXPECT_EQ(ra[6], 3.0f); + EXPECT_EQ(ra[7], 4.0f); } /** @brief Test vfloat8 select MSB only. */ @@ -2643,25 +2779,32 @@ TEST(vfloat8, select_msb) // Select in one direction vfloat8 r1 = select(a, b, cond); - EXPECT_EQ(r1.lane<0>(), 4.0f); - EXPECT_EQ(r1.lane<1>(), 3.0f); - EXPECT_EQ(r1.lane<2>(), 2.0f); - EXPECT_EQ(r1.lane<3>(), 1.0f); - EXPECT_EQ(r1.lane<4>(), 4.0f); - EXPECT_EQ(r1.lane<5>(), 3.0f); - EXPECT_EQ(r1.lane<6>(), 2.0f); - EXPECT_EQ(r1.lane<7>(), 1.0f); + + alignas(32) float ra[8]; + storea(r1, ra); + + EXPECT_EQ(ra[0], 4.0f); + EXPECT_EQ(ra[1], 3.0f); + EXPECT_EQ(ra[2], 2.0f); + EXPECT_EQ(ra[3], 1.0f); + EXPECT_EQ(ra[4], 4.0f); + EXPECT_EQ(ra[5], 3.0f); + EXPECT_EQ(ra[6], 2.0f); + EXPECT_EQ(ra[7], 1.0f); // Select in the other vfloat8 r2 = select(b, a, cond); - EXPECT_EQ(r2.lane<0>(), 1.0f); - EXPECT_EQ(r2.lane<1>(), 2.0f); - EXPECT_EQ(r2.lane<2>(), 3.0f); - EXPECT_EQ(r2.lane<3>(), 4.0f); - EXPECT_EQ(r2.lane<4>(), 1.0f); - EXPECT_EQ(r2.lane<5>(), 2.0f); - EXPECT_EQ(r2.lane<6>(), 3.0f); - EXPECT_EQ(r2.lane<7>(), 4.0f); + + storea(r2, ra); + + EXPECT_EQ(ra[0], 1.0f); + EXPECT_EQ(ra[1], 2.0f); + EXPECT_EQ(ra[2], 3.0f); + EXPECT_EQ(ra[3], 4.0f); + EXPECT_EQ(ra[4], 1.0f); + EXPECT_EQ(ra[5], 2.0f); + EXPECT_EQ(ra[6], 3.0f); + EXPECT_EQ(ra[7], 4.0f); } /** @brief Test vfloat8 gatherf. */ @@ -2669,46 +2812,54 @@ TEST(vfloat8, gatherf) { vint8 indices(0, 4, 3, 2, 7, 4, 3, 2); vfloat8 r = gatherf(f32_data, indices); - EXPECT_EQ(r.lane<0>(), 0.0f); - EXPECT_EQ(r.lane<1>(), 4.0f); - EXPECT_EQ(r.lane<2>(), 3.0f); - EXPECT_EQ(r.lane<3>(), 2.0f); - EXPECT_EQ(r.lane<4>(), 7.0f); - EXPECT_EQ(r.lane<5>(), 4.0f); - EXPECT_EQ(r.lane<6>(), 3.0f); - EXPECT_EQ(r.lane<7>(), 2.0f); + + alignas(32) float ra[8]; + storea(r, ra); + + EXPECT_EQ(ra[0], 0.0f); + EXPECT_EQ(ra[1], 4.0f); + EXPECT_EQ(ra[2], 3.0f); + EXPECT_EQ(ra[3], 2.0f); + EXPECT_EQ(ra[4], 7.0f); + EXPECT_EQ(ra[5], 4.0f); + EXPECT_EQ(ra[6], 3.0f); + EXPECT_EQ(ra[7], 2.0f); } /** @brief Test vfloat8 store. */ TEST(vfloat8, store) { - alignas(32) float out[9]; vfloat8 a(f32_data); - store(a, &(out[1])); - EXPECT_EQ(out[1], 0.0f); - EXPECT_EQ(out[2], 1.0f); - EXPECT_EQ(out[3], 2.0f); - EXPECT_EQ(out[4], 3.0f); - EXPECT_EQ(out[5], 4.0f); - EXPECT_EQ(out[6], 5.0f); - EXPECT_EQ(out[7], 6.0f); - EXPECT_EQ(out[8], 7.0f); + + alignas(32) float ra[9]; + storea(a, ra + 1); + + EXPECT_EQ(ra[1], 0.0f); + EXPECT_EQ(ra[2], 1.0f); + EXPECT_EQ(ra[3], 2.0f); + EXPECT_EQ(ra[4], 3.0f); + EXPECT_EQ(ra[5], 4.0f); + EXPECT_EQ(ra[6], 5.0f); + EXPECT_EQ(ra[7], 6.0f); + EXPECT_EQ(ra[8], 7.0f); } /** @brief Test vfloat8 storea. */ TEST(vfloat8, storea) { - alignas(32) float out[9]; vfloat8 a(f32_data); - store(a, out); - EXPECT_EQ(out[0], 0.0f); - EXPECT_EQ(out[1], 1.0f); - EXPECT_EQ(out[2], 2.0f); - EXPECT_EQ(out[3], 3.0f); - EXPECT_EQ(out[4], 4.0f); - EXPECT_EQ(out[5], 5.0f); - EXPECT_EQ(out[6], 6.0f); - EXPECT_EQ(out[7], 7.0f); + + alignas(32) float ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 0.0f); + EXPECT_EQ(ra[1], 1.0f); + EXPECT_EQ(ra[2], 2.0f); + EXPECT_EQ(ra[3], 3.0f); + EXPECT_EQ(ra[4], 4.0f); + EXPECT_EQ(ra[5], 5.0f); + EXPECT_EQ(ra[6], 6.0f); + EXPECT_EQ(ra[7], 7.0f); } /** @brief Test vfloat8 float_to_int. */ @@ -2716,14 +2867,18 @@ TEST(vfloat8, float_to_int) { vfloat8 a(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); vint8 r = float_to_int(a); - EXPECT_EQ(r.lane<0>(), 1); - EXPECT_EQ(r.lane<1>(), 1); - EXPECT_EQ(r.lane<2>(), 1); - EXPECT_EQ(r.lane<3>(), 4); - EXPECT_EQ(r.lane<4>(), 1); - EXPECT_EQ(r.lane<5>(), 1); - EXPECT_EQ(r.lane<6>(), 1); - EXPECT_EQ(r.lane<7>(), 4); + + alignas(32) int ra[8]; + store(r, ra); + + EXPECT_EQ(ra[0], 1); + EXPECT_EQ(ra[1], 1); + EXPECT_EQ(ra[2], 1); + EXPECT_EQ(ra[3], 4); + EXPECT_EQ(ra[4], 1); + EXPECT_EQ(ra[5], 1); + EXPECT_EQ(ra[6], 1); + EXPECT_EQ(ra[7], 4); } // vint8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2732,56 +2887,72 @@ TEST(vfloat8, float_to_int) TEST(vint8, UnalignedLoad) { vint8 a(&(s32_data[1])); - EXPECT_EQ(a.lane<0>(), 1); - EXPECT_EQ(a.lane<1>(), 2); - EXPECT_EQ(a.lane<2>(), 3); - EXPECT_EQ(a.lane<3>(), 4); - EXPECT_EQ(a.lane<4>(), 5); - EXPECT_EQ(a.lane<5>(), 6); - EXPECT_EQ(a.lane<6>(), 7); - EXPECT_EQ(a.lane<7>(), 8); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 1); + EXPECT_EQ(ra[1], 2); + EXPECT_EQ(ra[2], 3); + EXPECT_EQ(ra[3], 4); + EXPECT_EQ(ra[4], 5); + EXPECT_EQ(ra[5], 6); + EXPECT_EQ(ra[6], 7); + EXPECT_EQ(ra[7], 8); } /** @brief Test unaligned vint8 data load. */ TEST(vint8, UnalignedLoad8) { vint8 a(&(u8_data[1])); - EXPECT_EQ(a.lane<0>(), 1); - EXPECT_EQ(a.lane<1>(), 2); - EXPECT_EQ(a.lane<2>(), 3); - EXPECT_EQ(a.lane<3>(), 4); - EXPECT_EQ(a.lane<4>(), 5); - EXPECT_EQ(a.lane<5>(), 6); - EXPECT_EQ(a.lane<6>(), 7); - EXPECT_EQ(a.lane<7>(), 8); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 1); + EXPECT_EQ(ra[1], 2); + EXPECT_EQ(ra[2], 3); + EXPECT_EQ(ra[3], 4); + EXPECT_EQ(ra[4], 5); + EXPECT_EQ(ra[5], 6); + EXPECT_EQ(ra[6], 7); + EXPECT_EQ(ra[7], 8); } /** @brief Test scalar duplicated vint8 load. */ TEST(vint8, ScalarDupLoad) { vint8 a(42); - EXPECT_EQ(a.lane<0>(), 42); - EXPECT_EQ(a.lane<1>(), 42); - EXPECT_EQ(a.lane<2>(), 42); - EXPECT_EQ(a.lane<3>(), 42); - EXPECT_EQ(a.lane<4>(), 42); - EXPECT_EQ(a.lane<5>(), 42); - EXPECT_EQ(a.lane<6>(), 42); - EXPECT_EQ(a.lane<7>(), 42); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 42); + EXPECT_EQ(ra[1], 42); + EXPECT_EQ(ra[2], 42); + EXPECT_EQ(ra[3], 42); + EXPECT_EQ(ra[4], 42); + EXPECT_EQ(ra[5], 42); + EXPECT_EQ(ra[6], 42); + EXPECT_EQ(ra[7], 42); } /** @brief Test scalar vint8 load. */ TEST(vint8, ScalarLoad) { vint8 a(11, 22, 33, 44, 55, 66, 77, 88); - EXPECT_EQ(a.lane<0>(), 11); - EXPECT_EQ(a.lane<1>(), 22); - EXPECT_EQ(a.lane<2>(), 33); - EXPECT_EQ(a.lane<3>(), 44); - EXPECT_EQ(a.lane<4>(), 55); - EXPECT_EQ(a.lane<5>(), 66); - EXPECT_EQ(a.lane<6>(), 77); - EXPECT_EQ(a.lane<7>(), 88); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 11); + EXPECT_EQ(ra[1], 22); + EXPECT_EQ(ra[2], 33); + EXPECT_EQ(ra[3], 44); + EXPECT_EQ(ra[4], 55); + EXPECT_EQ(ra[5], 66); + EXPECT_EQ(ra[6], 77); + EXPECT_EQ(ra[7], 88); } /** @brief Test copy vint8 load. */ @@ -2789,28 +2960,36 @@ TEST(vint8, CopyLoad) { vint8 s(11, 22, 33, 44, 55, 66, 77, 88); vint8 a(s.m); - EXPECT_EQ(a.lane<0>(), 11); - EXPECT_EQ(a.lane<1>(), 22); - EXPECT_EQ(a.lane<2>(), 33); - EXPECT_EQ(a.lane<3>(), 44); - EXPECT_EQ(a.lane<4>(), 55); - EXPECT_EQ(a.lane<5>(), 66); - EXPECT_EQ(a.lane<6>(), 77); - EXPECT_EQ(a.lane<7>(), 88); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 11); + EXPECT_EQ(ra[1], 22); + EXPECT_EQ(ra[2], 33); + EXPECT_EQ(ra[3], 44); + EXPECT_EQ(ra[4], 55); + EXPECT_EQ(ra[5], 66); + EXPECT_EQ(ra[6], 77); + EXPECT_EQ(ra[7], 88); } /** @brief Test vint8 zero. */ TEST(vint8, Zero) { vint8 a = vint8::zero(); - EXPECT_EQ(a.lane<0>(), 0); - EXPECT_EQ(a.lane<1>(), 0); - EXPECT_EQ(a.lane<2>(), 0); - EXPECT_EQ(a.lane<3>(), 0); - EXPECT_EQ(a.lane<4>(), 0); - EXPECT_EQ(a.lane<5>(), 0); - EXPECT_EQ(a.lane<6>(), 0); - EXPECT_EQ(a.lane<7>(), 0); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 0); + EXPECT_EQ(ra[1], 0); + EXPECT_EQ(ra[2], 0); + EXPECT_EQ(ra[3], 0); + EXPECT_EQ(ra[4], 0); + EXPECT_EQ(ra[5], 0); + EXPECT_EQ(ra[6], 0); + EXPECT_EQ(ra[7], 0); } /** @brief Test vint8 load1. */ @@ -2818,42 +2997,54 @@ TEST(vint8, Load1) { int s = 42; vint8 a = vint8::load1(&s); - EXPECT_EQ(a.lane<0>(), 42); - EXPECT_EQ(a.lane<1>(), 42); - EXPECT_EQ(a.lane<2>(), 42); - EXPECT_EQ(a.lane<3>(), 42); - EXPECT_EQ(a.lane<4>(), 42); - EXPECT_EQ(a.lane<5>(), 42); - EXPECT_EQ(a.lane<6>(), 42); - EXPECT_EQ(a.lane<7>(), 42); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 42); + EXPECT_EQ(ra[1], 42); + EXPECT_EQ(ra[2], 42); + EXPECT_EQ(ra[3], 42); + EXPECT_EQ(ra[4], 42); + EXPECT_EQ(ra[5], 42); + EXPECT_EQ(ra[6], 42); + EXPECT_EQ(ra[7], 42); } /** @brief Test vint8 loada. */ TEST(vint8, Loada) { vint8 a = vint8::loada(&(s32_data[0])); - EXPECT_EQ(a.lane<0>(), 0); - EXPECT_EQ(a.lane<1>(), 1); - EXPECT_EQ(a.lane<2>(), 2); - EXPECT_EQ(a.lane<3>(), 3); - EXPECT_EQ(a.lane<4>(), 4); - EXPECT_EQ(a.lane<5>(), 5); - EXPECT_EQ(a.lane<6>(), 6); - EXPECT_EQ(a.lane<7>(), 7); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 0); + EXPECT_EQ(ra[1], 1); + EXPECT_EQ(ra[2], 2); + EXPECT_EQ(ra[3], 3); + EXPECT_EQ(ra[4], 4); + EXPECT_EQ(ra[5], 5); + EXPECT_EQ(ra[6], 6); + EXPECT_EQ(ra[7], 7); } /** @brief Test vint8 lane_id. */ TEST(vint8, LaneID) { vint8 a = vint8::lane_id(); - EXPECT_EQ(a.lane<0>(), 0); - EXPECT_EQ(a.lane<1>(), 1); - EXPECT_EQ(a.lane<2>(), 2); - EXPECT_EQ(a.lane<3>(), 3); - EXPECT_EQ(a.lane<4>(), 4); - EXPECT_EQ(a.lane<5>(), 5); - EXPECT_EQ(a.lane<6>(), 6); - EXPECT_EQ(a.lane<7>(), 7); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 0); + EXPECT_EQ(ra[1], 1); + EXPECT_EQ(ra[2], 2); + EXPECT_EQ(ra[3], 3); + EXPECT_EQ(ra[4], 4); + EXPECT_EQ(ra[5], 5); + EXPECT_EQ(ra[6], 6); + EXPECT_EQ(ra[7], 7); } /** @brief Test vint8 add. */ @@ -2862,14 +3053,18 @@ TEST(vint8, vadd) vint8 a(1, 2, 3, 4, 1, 2, 3, 4); vint8 b(2, 3, 4, 5, 2, 3, 4, 5); a = a + b; - EXPECT_EQ(a.lane<0>(), 1 + 2); - EXPECT_EQ(a.lane<1>(), 2 + 3); - EXPECT_EQ(a.lane<2>(), 3 + 4); - EXPECT_EQ(a.lane<3>(), 4 + 5); - EXPECT_EQ(a.lane<4>(), 1 + 2); - EXPECT_EQ(a.lane<5>(), 2 + 3); - EXPECT_EQ(a.lane<6>(), 3 + 4); - EXPECT_EQ(a.lane<7>(), 4 + 5); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 1 + 2); + EXPECT_EQ(ra[1], 2 + 3); + EXPECT_EQ(ra[2], 3 + 4); + EXPECT_EQ(ra[3], 4 + 5); + EXPECT_EQ(ra[4], 1 + 2); + EXPECT_EQ(ra[5], 2 + 3); + EXPECT_EQ(ra[6], 3 + 4); + EXPECT_EQ(ra[7], 4 + 5); } @@ -2880,14 +3075,17 @@ TEST(vint8, vselfadd1) vint8 b(2, 3, 4, 5, 2, 3, 4, 5); a += b; - EXPECT_EQ(a.lane<0>(), 1 + 2); - EXPECT_EQ(a.lane<1>(), 2 + 3); - EXPECT_EQ(a.lane<2>(), 3 + 4); - EXPECT_EQ(a.lane<3>(), 4 + 5); - EXPECT_EQ(a.lane<4>(), 1 + 2); - EXPECT_EQ(a.lane<5>(), 2 + 3); - EXPECT_EQ(a.lane<6>(), 3 + 4); - EXPECT_EQ(a.lane<7>(), 4 + 5); + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 1 + 2); + EXPECT_EQ(ra[1], 2 + 3); + EXPECT_EQ(ra[2], 3 + 4); + EXPECT_EQ(ra[3], 4 + 5); + EXPECT_EQ(ra[4], 1 + 2); + EXPECT_EQ(ra[5], 2 + 3); + EXPECT_EQ(ra[6], 3 + 4); + EXPECT_EQ(ra[7], 4 + 5); } /** @brief Test vint8 sub. */ @@ -2896,14 +3094,18 @@ TEST(vint8, vsub) vint8 a(1, 2, 4, 4, 1, 2, 4, 4); vint8 b(2, 3, 3, 5, 2, 3, 3, 5); a = a - b; - EXPECT_EQ(a.lane<0>(), 1 - 2); - EXPECT_EQ(a.lane<1>(), 2 - 3); - EXPECT_EQ(a.lane<2>(), 4 - 3); - EXPECT_EQ(a.lane<3>(), 4 - 5); - EXPECT_EQ(a.lane<4>(), 1 - 2); - EXPECT_EQ(a.lane<5>(), 2 - 3); - EXPECT_EQ(a.lane<6>(), 4 - 3); - EXPECT_EQ(a.lane<7>(), 4 - 5); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 1 - 2); + EXPECT_EQ(ra[1], 2 - 3); + EXPECT_EQ(ra[2], 4 - 3); + EXPECT_EQ(ra[3], 4 - 5); + EXPECT_EQ(ra[4], 1 - 2); + EXPECT_EQ(ra[5], 2 - 3); + EXPECT_EQ(ra[6], 4 - 3); + EXPECT_EQ(ra[7], 4 - 5); } /** @brief Test vint8 mul. */ @@ -2912,14 +3114,18 @@ TEST(vint8, vmul) vint8 a(1, 2, 4, 4, 1, 2, 4, 4); vint8 b(2, 3, 3, 5, 2, 3, 3, 5); a = a * b; - EXPECT_EQ(a.lane<0>(), 1 * 2); - EXPECT_EQ(a.lane<1>(), 2 * 3); - EXPECT_EQ(a.lane<2>(), 4 * 3); - EXPECT_EQ(a.lane<3>(), 4 * 5); - EXPECT_EQ(a.lane<4>(), 1 * 2); - EXPECT_EQ(a.lane<5>(), 2 * 3); - EXPECT_EQ(a.lane<6>(), 4 * 3); - EXPECT_EQ(a.lane<7>(), 4 * 5); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 1 * 2); + EXPECT_EQ(ra[1], 2 * 3); + EXPECT_EQ(ra[2], 4 * 3); + EXPECT_EQ(ra[3], 4 * 5); + EXPECT_EQ(ra[4], 1 * 2); + EXPECT_EQ(ra[5], 2 * 3); + EXPECT_EQ(ra[6], 4 * 3); + EXPECT_EQ(ra[7], 4 * 5); } /** @brief Test vint8 bitwise invert. */ @@ -2927,14 +3133,18 @@ TEST(vint8, bit_invert) { vint8 a(-1, 0, 1, 2, -1, 0, 1, 2); a = ~a; - EXPECT_EQ(a.lane<0>(), ~-1); - EXPECT_EQ(a.lane<1>(), ~0); - EXPECT_EQ(a.lane<2>(), ~1); - EXPECT_EQ(a.lane<3>(), ~2); - EXPECT_EQ(a.lane<4>(), ~-1); - EXPECT_EQ(a.lane<5>(), ~0); - EXPECT_EQ(a.lane<6>(), ~1); - EXPECT_EQ(a.lane<7>(), ~2); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], ~-1); + EXPECT_EQ(ra[1], ~0); + EXPECT_EQ(ra[2], ~1); + EXPECT_EQ(ra[3], ~2); + EXPECT_EQ(ra[4], ~-1); + EXPECT_EQ(ra[5], ~0); + EXPECT_EQ(ra[6], ~1); + EXPECT_EQ(ra[7], ~2); } /** @brief Test vint8 bitwise or. */ @@ -2943,14 +3153,18 @@ TEST(vint8, bit_vor) vint8 a(1, 2, 3, 4, 1, 2, 3, 4); vint8 b(2, 3, 4, 5, 2, 3, 4, 5); a = a | b; - EXPECT_EQ(a.lane<0>(), 3); - EXPECT_EQ(a.lane<1>(), 3); - EXPECT_EQ(a.lane<2>(), 7); - EXPECT_EQ(a.lane<3>(), 5); - EXPECT_EQ(a.lane<4>(), 3); - EXPECT_EQ(a.lane<5>(), 3); - EXPECT_EQ(a.lane<6>(), 7); - EXPECT_EQ(a.lane<7>(), 5); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 3); + EXPECT_EQ(ra[1], 3); + EXPECT_EQ(ra[2], 7); + EXPECT_EQ(ra[3], 5); + EXPECT_EQ(ra[4], 3); + EXPECT_EQ(ra[5], 3); + EXPECT_EQ(ra[6], 7); + EXPECT_EQ(ra[7], 5); } /** @brief Test vint8 bitwise and. */ @@ -2959,14 +3173,18 @@ TEST(vint8, bit_vand) vint8 a(1, 2, 3, 4, 1, 2, 3, 4); vint8 b(2, 3, 4, 5, 2, 3, 4, 5); a = a & b; - EXPECT_EQ(a.lane<0>(), 0); - EXPECT_EQ(a.lane<1>(), 2); - EXPECT_EQ(a.lane<2>(), 0); - EXPECT_EQ(a.lane<3>(), 4); - EXPECT_EQ(a.lane<4>(), 0); - EXPECT_EQ(a.lane<5>(), 2); - EXPECT_EQ(a.lane<6>(), 0); - EXPECT_EQ(a.lane<7>(), 4); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 0); + EXPECT_EQ(ra[1], 2); + EXPECT_EQ(ra[2], 0); + EXPECT_EQ(ra[3], 4); + EXPECT_EQ(ra[4], 0); + EXPECT_EQ(ra[5], 2); + EXPECT_EQ(ra[6], 0); + EXPECT_EQ(ra[7], 4); } /** @brief Test vint8 bitwise xor. */ @@ -2975,14 +3193,18 @@ TEST(vint8, bit_vxor) vint8 a(1, 2, 3, 4, 1, 2, 3, 4); vint8 b(2, 3, 4, 5, 2, 3, 4, 5); a = a ^ b; - EXPECT_EQ(a.lane<0>(), 3); - EXPECT_EQ(a.lane<1>(), 1); - EXPECT_EQ(a.lane<2>(), 7); - EXPECT_EQ(a.lane<3>(), 1); - EXPECT_EQ(a.lane<4>(), 3); - EXPECT_EQ(a.lane<5>(), 1); - EXPECT_EQ(a.lane<6>(), 7); - EXPECT_EQ(a.lane<7>(), 1); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 3); + EXPECT_EQ(ra[1], 1); + EXPECT_EQ(ra[2], 7); + EXPECT_EQ(ra[3], 1); + EXPECT_EQ(ra[4], 3); + EXPECT_EQ(ra[5], 1); + EXPECT_EQ(ra[6], 7); + EXPECT_EQ(ra[7], 1); } /** @brief Test vint8 ceq. */ @@ -3071,14 +3293,18 @@ TEST(vint8, min) vint8 a(1, 2, 3, 4, 1, 2, 3, 4); vint8 b(0, 3, 3, 5, 0, 3, 3, 5); vint8 r = min(a, b); - EXPECT_EQ(r.lane<0>(), 0); - EXPECT_EQ(r.lane<1>(), 2); - EXPECT_EQ(r.lane<2>(), 3); - EXPECT_EQ(r.lane<3>(), 4); - EXPECT_EQ(r.lane<4>(), 0); - EXPECT_EQ(r.lane<5>(), 2); - EXPECT_EQ(r.lane<6>(), 3); - EXPECT_EQ(r.lane<7>(), 4); + + alignas(32) int ra[8]; + store(r, ra); + + EXPECT_EQ(ra[0], 0); + EXPECT_EQ(ra[1], 2); + EXPECT_EQ(ra[2], 3); + EXPECT_EQ(ra[3], 4); + EXPECT_EQ(ra[4], 0); + EXPECT_EQ(ra[5], 2); + EXPECT_EQ(ra[6], 3); + EXPECT_EQ(ra[7], 4); } /** @brief Test vint8 max. */ @@ -3087,14 +3313,18 @@ TEST(vint8, max) vint8 a(1, 2, 3, 4, 1, 2, 3, 4); vint8 b(0, 3, 3, 5, 0, 3, 3, 5); vint8 r = max(a, b); - EXPECT_EQ(r.lane<0>(), 1); - EXPECT_EQ(r.lane<1>(), 3); - EXPECT_EQ(r.lane<2>(), 3); - EXPECT_EQ(r.lane<3>(), 5); - EXPECT_EQ(r.lane<4>(), 1); - EXPECT_EQ(r.lane<5>(), 3); - EXPECT_EQ(r.lane<6>(), 3); - EXPECT_EQ(r.lane<7>(), 5); + + alignas(32) int ra[8]; + store(r, ra); + + EXPECT_EQ(ra[0], 1); + EXPECT_EQ(ra[1], 3); + EXPECT_EQ(ra[2], 3); + EXPECT_EQ(ra[3], 5); + EXPECT_EQ(ra[4], 1); + EXPECT_EQ(ra[5], 3); + EXPECT_EQ(ra[6], 3); + EXPECT_EQ(ra[7], 5); } /** @brief Test vint8 lsl. */ @@ -3102,35 +3332,44 @@ TEST(vint8, lsl) { vint8 a(1, 2, 4, -4, 1, 2, 4, -4); a = lsl<0>(a); - EXPECT_EQ(a.lane<0>(), 1); - EXPECT_EQ(a.lane<1>(), 2); - EXPECT_EQ(a.lane<2>(), 4); - EXPECT_EQ(a.lane<3>(), static_cast(0xFFFFFFFC)); - EXPECT_EQ(a.lane<4>(), 1); - EXPECT_EQ(a.lane<5>(), 2); - EXPECT_EQ(a.lane<6>(), 4); - EXPECT_EQ(a.lane<7>(), static_cast(0xFFFFFFFC)); + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 1); + EXPECT_EQ(ra[1], 2); + EXPECT_EQ(ra[2], 4); + EXPECT_EQ(ra[3], static_cast(0xFFFFFFFC)); + EXPECT_EQ(ra[4], 1); + EXPECT_EQ(ra[5], 2); + EXPECT_EQ(ra[6], 4); + EXPECT_EQ(ra[7], static_cast(0xFFFFFFFC)); a = lsl<1>(a); - EXPECT_EQ(a.lane<0>(), 2); - EXPECT_EQ(a.lane<1>(), 4); - EXPECT_EQ(a.lane<2>(), 8); - EXPECT_EQ(a.lane<3>(), static_cast(0xFFFFFFF8)); - EXPECT_EQ(a.lane<4>(), 2); - EXPECT_EQ(a.lane<5>(), 4); - EXPECT_EQ(a.lane<6>(), 8); - EXPECT_EQ(a.lane<7>(), static_cast(0xFFFFFFF8)); + + store(a, ra); + + EXPECT_EQ(ra[0], 2); + EXPECT_EQ(ra[1], 4); + EXPECT_EQ(ra[2], 8); + EXPECT_EQ(ra[3], static_cast(0xFFFFFFF8)); + EXPECT_EQ(ra[4], 2); + EXPECT_EQ(ra[5], 4); + EXPECT_EQ(ra[6], 8); + EXPECT_EQ(ra[7], static_cast(0xFFFFFFF8)); a = lsl<2>(a); - EXPECT_EQ(a.lane<0>(), 8); - EXPECT_EQ(a.lane<1>(), 16); - EXPECT_EQ(a.lane<2>(), 32); - EXPECT_EQ(a.lane<3>(), static_cast(0xFFFFFFE0)); - EXPECT_EQ(a.lane<4>(), 8); - EXPECT_EQ(a.lane<5>(), 16); - EXPECT_EQ(a.lane<6>(), 32); - EXPECT_EQ(a.lane<7>(), static_cast(0xFFFFFFE0)); + + store(a, ra); + + EXPECT_EQ(ra[0], 8); + EXPECT_EQ(ra[1], 16); + EXPECT_EQ(ra[2], 32); + EXPECT_EQ(ra[3], static_cast(0xFFFFFFE0)); + EXPECT_EQ(ra[4], 8); + EXPECT_EQ(ra[5], 16); + EXPECT_EQ(ra[6], 32); + EXPECT_EQ(ra[7], static_cast(0xFFFFFFE0)); } /** @brief Test vint8 lsr. */ @@ -3138,35 +3377,44 @@ TEST(vint8, lsr) { vint8 a(1, 2, 4, -4, 1, 2, 4, -4); a = lsr<0>(a); - EXPECT_EQ(a.lane<0>(), 1); - EXPECT_EQ(a.lane<1>(), 2); - EXPECT_EQ(a.lane<2>(), 4); - EXPECT_EQ(a.lane<3>(), static_cast(0xFFFFFFFC)); - EXPECT_EQ(a.lane<4>(), 1); - EXPECT_EQ(a.lane<5>(), 2); - EXPECT_EQ(a.lane<6>(), 4); - EXPECT_EQ(a.lane<7>(), static_cast(0xFFFFFFFC)); + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 1); + EXPECT_EQ(ra[1], 2); + EXPECT_EQ(ra[2], 4); + EXPECT_EQ(ra[3], static_cast(0xFFFFFFFC)); + EXPECT_EQ(ra[4], 1); + EXPECT_EQ(ra[5], 2); + EXPECT_EQ(ra[6], 4); + EXPECT_EQ(ra[7], static_cast(0xFFFFFFFC)); a = lsr<1>(a); - EXPECT_EQ(a.lane<0>(), 0); - EXPECT_EQ(a.lane<1>(), 1); - EXPECT_EQ(a.lane<2>(), 2); - EXPECT_EQ(a.lane<3>(), 0x7FFFFFFE); - EXPECT_EQ(a.lane<4>(), 0); - EXPECT_EQ(a.lane<5>(), 1); - EXPECT_EQ(a.lane<6>(), 2); - EXPECT_EQ(a.lane<7>(), 0x7FFFFFFE); + + store(a, ra); + + EXPECT_EQ(ra[0], 0); + EXPECT_EQ(ra[1], 1); + EXPECT_EQ(ra[2], 2); + EXPECT_EQ(ra[3], 0x7FFFFFFE); + EXPECT_EQ(ra[4], 0); + EXPECT_EQ(ra[5], 1); + EXPECT_EQ(ra[6], 2); + EXPECT_EQ(ra[7], 0x7FFFFFFE); a = lsr<2>(a); - EXPECT_EQ(a.lane<0>(), 0); - EXPECT_EQ(a.lane<1>(), 0); - EXPECT_EQ(a.lane<2>(), 0); - EXPECT_EQ(a.lane<3>(), 0x1FFFFFFF); - EXPECT_EQ(a.lane<4>(), 0); - EXPECT_EQ(a.lane<5>(), 0); - EXPECT_EQ(a.lane<6>(), 0); - EXPECT_EQ(a.lane<7>(), 0x1FFFFFFF); + + store(a, ra); + + EXPECT_EQ(ra[0], 0); + EXPECT_EQ(ra[1], 0); + EXPECT_EQ(ra[2], 0); + EXPECT_EQ(ra[3], 0x1FFFFFFF); + EXPECT_EQ(ra[4], 0); + EXPECT_EQ(ra[5], 0); + EXPECT_EQ(ra[6], 0); + EXPECT_EQ(ra[7], 0x1FFFFFFF); } /** @brief Test vint8 asr. */ @@ -3174,35 +3422,45 @@ TEST(vint8, asr) { vint8 a(1, 2, 4, -4, 1, 2, 4, -4); a = asr<0>(a); - EXPECT_EQ(a.lane<0>(), 1); - EXPECT_EQ(a.lane<1>(), 2); - EXPECT_EQ(a.lane<2>(), 4); - EXPECT_EQ(a.lane<3>(), -4); - EXPECT_EQ(a.lane<4>(), 1); - EXPECT_EQ(a.lane<5>(), 2); - EXPECT_EQ(a.lane<6>(), 4); - EXPECT_EQ(a.lane<7>(), -4); + + alignas(32) int ra[8]; + store(a, ra); + + EXPECT_EQ(ra[0], 1); + EXPECT_EQ(ra[1], 2); + EXPECT_EQ(ra[2], 4); + EXPECT_EQ(ra[3], -4); + EXPECT_EQ(ra[4], 1); + EXPECT_EQ(ra[5], 2); + EXPECT_EQ(ra[6], 4); + EXPECT_EQ(ra[7], -4); a = asr<1>(a); - EXPECT_EQ(a.lane<0>(), 0); - EXPECT_EQ(a.lane<1>(), 1); - EXPECT_EQ(a.lane<2>(), 2); - EXPECT_EQ(a.lane<3>(), -2); - EXPECT_EQ(a.lane<4>(), 0); - EXPECT_EQ(a.lane<5>(), 1); - EXPECT_EQ(a.lane<6>(), 2); - EXPECT_EQ(a.lane<7>(), -2); + + store(a, ra); + + EXPECT_EQ(ra[0], 0); + EXPECT_EQ(ra[1], 1); + EXPECT_EQ(ra[2], 2); + EXPECT_EQ(ra[3], -2); + EXPECT_EQ(ra[4], 0); + EXPECT_EQ(ra[5], 1); + EXPECT_EQ(ra[6], 2); + EXPECT_EQ(ra[7], -2); // Note - quirk of asr is that you will get "stuck" at -1 a = asr<2>(a); - EXPECT_EQ(a.lane<0>(), 0); - EXPECT_EQ(a.lane<1>(), 0); - EXPECT_EQ(a.lane<2>(), 0); - EXPECT_EQ(a.lane<3>(), -1); - EXPECT_EQ(a.lane<4>(), 0); - EXPECT_EQ(a.lane<5>(), 0); - EXPECT_EQ(a.lane<6>(), 0); - EXPECT_EQ(a.lane<7>(), -1); + + store(a, ra); + + EXPECT_EQ(ra[0], 0); + EXPECT_EQ(ra[1], 0); + EXPECT_EQ(ra[2], 0); + EXPECT_EQ(ra[3], -1); + EXPECT_EQ(ra[4], 0); + EXPECT_EQ(ra[5], 0); + EXPECT_EQ(ra[6], 0); + EXPECT_EQ(ra[7], -1); } /** @brief Test vint8 hmin. */ @@ -3210,25 +3468,32 @@ TEST(vint8, hmin) { vint8 a1(1, 2, 1, 2, 1, 2, 1, 2); vint8 r1 = hmin(a1); - EXPECT_EQ(r1.lane<0>(), 1); - EXPECT_EQ(r1.lane<1>(), 1); - EXPECT_EQ(r1.lane<2>(), 1); - EXPECT_EQ(r1.lane<3>(), 1); - EXPECT_EQ(r1.lane<4>(), 1); - EXPECT_EQ(r1.lane<5>(), 1); - EXPECT_EQ(r1.lane<6>(), 1); - EXPECT_EQ(r1.lane<7>(), 1); + + alignas(32) int ra[8]; + store(r1, ra); + + EXPECT_EQ(ra[0], 1); + EXPECT_EQ(ra[1], 1); + EXPECT_EQ(ra[2], 1); + EXPECT_EQ(ra[3], 1); + EXPECT_EQ(ra[4], 1); + EXPECT_EQ(ra[5], 1); + EXPECT_EQ(ra[6], 1); + EXPECT_EQ(ra[7], 1); vint8 a2(1, 2, -1, 5, 1, 2, -1, 5); vint8 r2 = hmin(a2); - EXPECT_EQ(r2.lane<0>(), -1); - EXPECT_EQ(r2.lane<1>(), -1); - EXPECT_EQ(r2.lane<2>(), -1); - EXPECT_EQ(r2.lane<3>(), -1); - EXPECT_EQ(r2.lane<4>(), -1); - EXPECT_EQ(r2.lane<5>(), -1); - EXPECT_EQ(r2.lane<6>(), -1); - EXPECT_EQ(r2.lane<7>(), -1); + + store(r2, ra); + + EXPECT_EQ(ra[0], -1); + EXPECT_EQ(ra[1], -1); + EXPECT_EQ(ra[2], -1); + EXPECT_EQ(ra[3], -1); + EXPECT_EQ(ra[4], -1); + EXPECT_EQ(ra[5], -1); + EXPECT_EQ(ra[6], -1); + EXPECT_EQ(ra[7], -1); } /** @brief Test vint8 hmax. */ @@ -3236,25 +3501,32 @@ TEST(vint8, hmax) { vint8 a1(1, 2, 1, 2, 1, 3, 1, 2); vint8 r1 = hmax(a1); - EXPECT_EQ(r1.lane<0>(), 3); - EXPECT_EQ(r1.lane<1>(), 3); - EXPECT_EQ(r1.lane<2>(), 3); - EXPECT_EQ(r1.lane<3>(), 3); - EXPECT_EQ(r1.lane<4>(), 3); - EXPECT_EQ(r1.lane<5>(), 3); - EXPECT_EQ(r1.lane<6>(), 3); - EXPECT_EQ(r1.lane<7>(), 3); + + alignas(32) int ra[8]; + store(r1, ra); + + EXPECT_EQ(ra[0], 3); + EXPECT_EQ(ra[1], 3); + EXPECT_EQ(ra[2], 3); + EXPECT_EQ(ra[3], 3); + EXPECT_EQ(ra[4], 3); + EXPECT_EQ(ra[5], 3); + EXPECT_EQ(ra[6], 3); + EXPECT_EQ(ra[7], 3); vint8 a2(1, 2, -1, 5, 1, 2, -1, 5); vint8 r2 = hmax(a2); - EXPECT_EQ(r2.lane<0>(), 5); - EXPECT_EQ(r2.lane<1>(), 5); - EXPECT_EQ(r2.lane<2>(), 5); - EXPECT_EQ(r2.lane<3>(), 5); - EXPECT_EQ(r2.lane<4>(), 5); - EXPECT_EQ(r2.lane<5>(), 5); - EXPECT_EQ(r2.lane<6>(), 5); - EXPECT_EQ(r2.lane<7>(), 5); + + store(r2, ra); + + EXPECT_EQ(ra[0], 5); + EXPECT_EQ(ra[1], 5); + EXPECT_EQ(ra[2], 5); + EXPECT_EQ(ra[3], 5); + EXPECT_EQ(ra[4], 5); + EXPECT_EQ(ra[5], 5); + EXPECT_EQ(ra[6], 5); + EXPECT_EQ(ra[7], 5); } /** @brief Test vint8 storea. */ @@ -3276,17 +3548,19 @@ TEST(vint8, storea) /** @brief Test vint8 store. */ TEST(vint8, store) { - alignas(32) int out[9]; vint8 a(s32_data); - store(a, out + 1); - EXPECT_EQ(out[1], 0); - EXPECT_EQ(out[2], 1); - EXPECT_EQ(out[3], 2); - EXPECT_EQ(out[4], 3); - EXPECT_EQ(out[5], 4); - EXPECT_EQ(out[6], 5); - EXPECT_EQ(out[7], 6); - EXPECT_EQ(out[8], 7); + + alignas(32) int ra[9]; + store(a, ra + 1); + + EXPECT_EQ(ra[1], 0); + EXPECT_EQ(ra[2], 1); + EXPECT_EQ(ra[3], 2); + EXPECT_EQ(ra[4], 3); + EXPECT_EQ(ra[5], 4); + EXPECT_EQ(ra[6], 5); + EXPECT_EQ(ra[7], 6); + EXPECT_EQ(ra[8], 7); } /** @brief Test vint8 store_nbytes. */ @@ -3370,14 +3644,18 @@ TEST(vint8, gatheri) { vint8 indices(0, 4, 3, 2, 7, 4, 3, 2); vint8 r = gatheri(s32_data, indices); - EXPECT_EQ(r.lane<0>(), 0); - EXPECT_EQ(r.lane<1>(), 4); - EXPECT_EQ(r.lane<2>(), 3); - EXPECT_EQ(r.lane<3>(), 2); - EXPECT_EQ(r.lane<4>(), 7); - EXPECT_EQ(r.lane<5>(), 4); - EXPECT_EQ(r.lane<6>(), 3); - EXPECT_EQ(r.lane<7>(), 2); + + alignas(32) int ra[8]; + store(r, ra); + + EXPECT_EQ(ra[0], 0); + EXPECT_EQ(ra[1], 4); + EXPECT_EQ(ra[2], 3); + EXPECT_EQ(ra[3], 2); + EXPECT_EQ(ra[4], 7); + EXPECT_EQ(ra[5], 4); + EXPECT_EQ(ra[6], 3); + EXPECT_EQ(ra[7], 2); } /** @brief Test vint8 pack_low_bytes. */ @@ -3385,8 +3663,12 @@ TEST(vint8, pack_low_bytes) { vint8 a(1, 2, 3, 4, 2, 3, 4, 5); vint8 r = pack_low_bytes(a); - EXPECT_EQ(r.lane<0>(), (4 << 24) | (3 << 16) | (2 << 8) | (1 << 0)); - EXPECT_EQ(r.lane<1>(), (5 << 24) | (4 << 16) | (3 << 8) | (2 << 0)); + + alignas(32) int ra[8]; + store(r, ra); + + EXPECT_EQ(ra[0], (4 << 24) | (3 << 16) | (2 << 8) | (1 << 0)); + EXPECT_EQ(ra[1], (5 << 24) | (4 << 16) | (3 << 8) | (2 << 0)); } /** @brief Test vint8 select. */ @@ -3400,24 +3682,31 @@ TEST(vint8, select) vint8 b(4, 2, 2, 4, 4, 2, 2, 4); vint8 r1 = select(a, b, cond); - EXPECT_EQ(r1.lane<0>(), 4); - EXPECT_EQ(r1.lane<1>(), 3); - EXPECT_EQ(r1.lane<2>(), 2); - EXPECT_EQ(r1.lane<3>(), 1); - EXPECT_EQ(r1.lane<4>(), 4); - EXPECT_EQ(r1.lane<5>(), 3); - EXPECT_EQ(r1.lane<6>(), 2); - EXPECT_EQ(r1.lane<7>(), 1); + + alignas(32) int ra[8]; + store(r1, ra); + + EXPECT_EQ(ra[0], 4); + EXPECT_EQ(ra[1], 3); + EXPECT_EQ(ra[2], 2); + EXPECT_EQ(ra[3], 1); + EXPECT_EQ(ra[4], 4); + EXPECT_EQ(ra[5], 3); + EXPECT_EQ(ra[6], 2); + EXPECT_EQ(ra[7], 1); vint8 r2 = select(b, a, cond); - EXPECT_EQ(r2.lane<0>(), 1); - EXPECT_EQ(r2.lane<1>(), 2); - EXPECT_EQ(r2.lane<2>(), 3); - EXPECT_EQ(r2.lane<3>(), 4); - EXPECT_EQ(r2.lane<4>(), 1); - EXPECT_EQ(r2.lane<5>(), 2); - EXPECT_EQ(r2.lane<6>(), 3); - EXPECT_EQ(r2.lane<7>(), 4); + + store(r2, ra); + + EXPECT_EQ(ra[0], 1); + EXPECT_EQ(ra[1], 2); + EXPECT_EQ(ra[2], 3); + EXPECT_EQ(ra[3], 4); + EXPECT_EQ(ra[4], 1); + EXPECT_EQ(ra[5], 2); + EXPECT_EQ(ra[6], 3); + EXPECT_EQ(ra[7], 4); } // vmask8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -3507,14 +3796,17 @@ TEST(vint8, vtable_8bt_32bi_32entry) vint8 result = vtable_8bt_32bi(table0p, table1p, index); - EXPECT_EQ(result.lane<0>(), 3); - EXPECT_EQ(result.lane<1>(), 4); - EXPECT_EQ(result.lane<2>(), 7); - EXPECT_EQ(result.lane<3>(), 12); - EXPECT_EQ(result.lane<4>(), 19); - EXPECT_EQ(result.lane<5>(), 23); - EXPECT_EQ(result.lane<6>(), 20); - EXPECT_EQ(result.lane<7>(), 28); + alignas(32) int ra[8]; + store(result, ra); + + EXPECT_EQ(ra[0], 3); + EXPECT_EQ(ra[1], 4); + EXPECT_EQ(ra[2], 7); + EXPECT_EQ(ra[3], 12); + EXPECT_EQ(ra[4], 19); + EXPECT_EQ(ra[5], 23); + EXPECT_EQ(ra[6], 20); + EXPECT_EQ(ra[7], 28); } /** @brief Test vint4 table permute. */ @@ -3532,14 +3824,17 @@ TEST(vint8, vtable_8bt_32bi_64entry) vint8 result = vtable_8bt_32bi(table0p, table1p, table2p, table3p, index); - EXPECT_EQ(result.lane<0>(), 3); - EXPECT_EQ(result.lane<1>(), 4); - EXPECT_EQ(result.lane<2>(), 7); - EXPECT_EQ(result.lane<3>(), 12); - EXPECT_EQ(result.lane<4>(), 19); - EXPECT_EQ(result.lane<5>(), 23); - EXPECT_EQ(result.lane<6>(), 37); - EXPECT_EQ(result.lane<7>(), 60); + alignas(32) int ra[8]; + store(result, ra); + + EXPECT_EQ(ra[0], 3); + EXPECT_EQ(ra[1], 4); + EXPECT_EQ(ra[2], 7); + EXPECT_EQ(ra[3], 12); + EXPECT_EQ(ra[4], 19); + EXPECT_EQ(ra[5], 23); + EXPECT_EQ(ra[6], 37); + EXPECT_EQ(ra[7], 60); } #endif diff --git a/Source/astcenc_decompress_symbolic.cpp b/Source/astcenc_decompress_symbolic.cpp index 7463f7e2..902a3f3e 100644 --- a/Source/astcenc_decompress_symbolic.cpp +++ b/Source/astcenc_decompress_symbolic.cpp @@ -110,7 +110,7 @@ void unpack_weights( { vint summed_value(8); vint weight_count(di.texel_weight_count + i); - int max_weight_count = hmax(weight_count).lane<0>(); + int max_weight_count = hmax_s(weight_count); promise(max_weight_count > 0); for (int j = 0; j < max_weight_count; j++) @@ -145,7 +145,7 @@ void unpack_weights( vint sum_plane2(8); vint weight_count(di.texel_weight_count + i); - int max_weight_count = hmax(weight_count).lane<0>(); + int max_weight_count = hmax_s(weight_count); promise(max_weight_count > 0); for (int j = 0; j < max_weight_count; j++) diff --git a/Source/astcenc_ideal_endpoints_and_weights.cpp b/Source/astcenc_ideal_endpoints_and_weights.cpp index 051782fd..9343a0ab 100644 --- a/Source/astcenc_ideal_endpoints_and_weights.cpp +++ b/Source/astcenc_ideal_endpoints_and_weights.cpp @@ -889,7 +889,7 @@ void compute_ideal_weights_for_decimation( // Accumulate error weighting of all the texels using this weight vint weight_texel_count(di.weight_texel_count + i); - unsigned int max_texel_count = hmax(weight_texel_count).lane<0>(); + unsigned int max_texel_count = hmax_s(weight_texel_count); promise(max_texel_count > 0); for (unsigned int j = 0; j < max_texel_count; j++) @@ -947,7 +947,7 @@ void compute_ideal_weights_for_decimation( // Accumulate error weighting of all the texels using this weight vint weight_texel_count(di.weight_texel_count + i); - unsigned int max_texel_count = hmax(weight_texel_count).lane<0>(); + unsigned int max_texel_count = hmax_s(weight_texel_count); promise(max_texel_count > 0); for (unsigned int j = 0; j < max_texel_count; j++) diff --git a/Source/astcenc_pick_best_endpoint_format.cpp b/Source/astcenc_pick_best_endpoint_format.cpp index f25140d4..51354d82 100644 --- a/Source/astcenc_pick_best_endpoint_format.cpp +++ b/Source/astcenc_pick_best_endpoint_format.cpp @@ -1307,7 +1307,7 @@ unsigned int compute_ideal_endpoint_formats( vmask lanes_min_error = vbest_ep_error == hmin(vbest_ep_error); vbest_error_index = select(vint(0x7FFFFFFF), vbest_error_index, lanes_min_error); vbest_error_index = hmin(vbest_error_index); - int best_error_index = vbest_error_index.lane<0>(); + int best_error_index = vbest_error_index.lane0(); best_error_weights[i] = best_error_index; diff --git a/Source/astcenc_vecmathlib_avx2_8.h b/Source/astcenc_vecmathlib_avx2_8.h index 966875a3..cedbdedc 100644 --- a/Source/astcenc_vecmathlib_avx2_8.h +++ b/Source/astcenc_vecmathlib_avx2_8.h @@ -95,17 +95,11 @@ struct vfloat8 } /** - * @brief Get the scalar value of a single lane. + * @brief Get the scalar from the bottom lane. */ - template ASTCENC_SIMD_INLINE float lane() const + ASTCENC_SIMD_INLINE float lane0() const { - #if !defined(__clang__) && defined(_MSC_VER) - return m.m256_f32[l]; - #else - union { __m256 m; float f[8]; } cvt; - cvt.m = m; - return cvt.f[l]; - #endif + return _mm256_cvtss_f32(m); } /** @@ -211,17 +205,11 @@ struct vint8 } /** - * @brief Get the scalar from a single lane. + * @brief Get the scalar from the bottom lane. */ - template ASTCENC_SIMD_INLINE int lane() const + ASTCENC_SIMD_INLINE int lane0() const { - #if !defined(__clang__) && defined(_MSC_VER) - return m.m256i_i32[l]; - #else - union { __m256i m; int f[8]; } cvt; - cvt.m = m; - return cvt.f[l]; - #endif + return _mm256_cvtsi256_si32(m); } /** @@ -543,6 +531,14 @@ ASTCENC_SIMD_INLINE vint8 hmax(vint8 a) return vmax; } +/** + * @brief Return the horizontal maximum of a vector. + */ +ASTCENC_SIMD_INLINE int hmax_s(vint8 a) +{ + return hmax(a).lane0(); +} + /** * @brief Store a vector to a 16B aligned memory address. */ @@ -857,7 +853,7 @@ ASTCENC_SIMD_INLINE vfloat8 hmin(vfloat8 a) */ ASTCENC_SIMD_INLINE float hmin_s(vfloat8 a) { - return hmin(a).lane<0>(); + return hmin(a).lane0(); } /** @@ -887,7 +883,7 @@ ASTCENC_SIMD_INLINE vfloat8 hmax(vfloat8 a) */ ASTCENC_SIMD_INLINE float hmax_s(vfloat8 a) { - return hmax(a).lane<0>(); + return hmax(a).lane0(); } /** From d34147f76fc2d5fc19fb86f7fbca0155a148cccd Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Fri, 7 Jun 2024 21:39:16 +0100 Subject: [PATCH 3/8] Remove unused clampz --- Source/UnitTest/test_simd.cpp | 51 -------------------- Source/astcenc_pick_best_endpoint_format.cpp | 2 +- Source/astcenc_vecmathlib_avx2_8.h | 13 ----- Source/astcenc_vecmathlib_common_4.h | 12 ----- 4 files changed, 1 insertion(+), 77 deletions(-) diff --git a/Source/UnitTest/test_simd.cpp b/Source/UnitTest/test_simd.cpp index 60b8e40f..faaa39f9 100644 --- a/Source/UnitTest/test_simd.cpp +++ b/Source/UnitTest/test_simd.cpp @@ -714,24 +714,6 @@ TEST(vfloat4, clamp) EXPECT_EQ(r2.lane<3>(), 3.0f); } -/** @brief Test vfloat4 clampz. */ -TEST(vfloat4, clampz) -{ - vfloat4 a1(-1.0f, 0.0f, 0.1f, 4.0f); - vfloat4 r1 = clampz(3.0f, a1); - EXPECT_EQ(r1.lane<0>(), 0.0f); - EXPECT_EQ(r1.lane<1>(), 0.0f); - EXPECT_EQ(r1.lane<2>(), 0.1f); - EXPECT_EQ(r1.lane<3>(), 3.0f); - - vfloat4 a2(-1.0f, 0.0f, qnan, 4.0f); - vfloat4 r2 = clampz(3.0f, a2); - EXPECT_EQ(r2.lane<0>(), 0.0f); - EXPECT_EQ(r2.lane<1>(), 0.0f); - EXPECT_EQ(r2.lane<2>(), 0.0f); - EXPECT_EQ(r2.lane<3>(), 3.0f); -} - /** @brief Test vfloat4 clampz. */ TEST(vfloat4, clampzo) { @@ -2505,39 +2487,6 @@ TEST(vfloat8, clamp) EXPECT_EQ(ra[7], 3.0f); } -/** @brief Test vfloat8 clampz. */ -TEST(vfloat8, clampz) -{ - vfloat8 a1(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f); - vfloat8 r1 = clampz(3.0f, a1); - - alignas(32) float ra[8]; - storea(r1, ra); - - EXPECT_EQ(ra[0], 0.0f); - EXPECT_EQ(ra[1], 0.0f); - EXPECT_EQ(ra[2], 0.1f); - EXPECT_EQ(ra[3], 3.0f); - EXPECT_EQ(ra[4], 0.0f); - EXPECT_EQ(ra[5], 0.0f); - EXPECT_EQ(ra[6], 0.1f); - EXPECT_EQ(ra[7], 3.0f); - - vfloat8 a2(-1.0f, 0.0f, qnan, 4.0f, -1.0f, 0.0f, qnan, 4.0f); - vfloat8 r2 = clampz(3.0f, a2); - - storea(r2, ra); - - EXPECT_EQ(ra[0], 0.0f); - EXPECT_EQ(ra[1], 0.0f); - EXPECT_EQ(ra[2], 0.0f); - EXPECT_EQ(ra[3], 3.0f); - EXPECT_EQ(ra[4], 0.0f); - EXPECT_EQ(ra[5], 0.0f); - EXPECT_EQ(ra[6], 0.0f); - EXPECT_EQ(ra[7], 3.0f); -} - /** @brief Test vfloat8 clampz. */ TEST(vfloat8, clampzo) { diff --git a/Source/astcenc_pick_best_endpoint_format.cpp b/Source/astcenc_pick_best_endpoint_format.cpp index 51354d82..9000dcbb 100644 --- a/Source/astcenc_pick_best_endpoint_format.cpp +++ b/Source/astcenc_pick_best_endpoint_format.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2022 Arm Limited +// Copyright 2011-2024 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy diff --git a/Source/astcenc_vecmathlib_avx2_8.h b/Source/astcenc_vecmathlib_avx2_8.h index cedbdedc..8569f871 100644 --- a/Source/astcenc_vecmathlib_avx2_8.h +++ b/Source/astcenc_vecmathlib_avx2_8.h @@ -782,19 +782,6 @@ ASTCENC_SIMD_INLINE vfloat8 clamp(float min, float max, vfloat8 a) return a; } -/** - * @brief Return a clamped value between 0.0f and max. - * - * It is assumed that @c max is not a NaN value. If @c a is NaN then zero will - * be returned for that lane. - */ -ASTCENC_SIMD_INLINE vfloat8 clampz(float max, vfloat8 a) -{ - a.m = _mm256_max_ps(a.m, _mm256_setzero_ps()); - a.m = _mm256_min_ps(a.m, _mm256_set1_ps(max)); - return a; -} - /** * @brief Return a clamped value between 0.0f and 1.0f. * diff --git a/Source/astcenc_vecmathlib_common_4.h b/Source/astcenc_vecmathlib_common_4.h index 1e04367c..ebfee2d3 100644 --- a/Source/astcenc_vecmathlib_common_4.h +++ b/Source/astcenc_vecmathlib_common_4.h @@ -222,18 +222,6 @@ ASTCENC_SIMD_INLINE vfloat4 clamp(float minv, float maxv, vfloat4 a) return min(max(a, minv), maxv); } -/** - * @brief Return the clamped value between 0.0f and max. - * - * It is assumed that @c max is not a NaN value. If @c a is NaN then zero will - * be returned for that lane. - */ -ASTCENC_SIMD_INLINE vfloat4 clampz(float maxv, vfloat4 a) -{ - // Do not reorder - second operand will return if either is NaN - return min(max(a, vfloat4::zero()), maxv); -} - /** * @brief Return the clamped value between 0.0f and 1.0f. * From eb21c08e155b4efd227954a329aebeba36b30822 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Fri, 7 Jun 2024 22:03:21 +0100 Subject: [PATCH 4/8] Use scalar functions where needed --- Source/astcenc_pick_best_endpoint_format.cpp | 4 +-- Source/astcenc_vecmathlib_avx2_8.h | 30 +++++++------------- Source/astcenc_vecmathlib_common_4.h | 16 +++++++++++ 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/Source/astcenc_pick_best_endpoint_format.cpp b/Source/astcenc_pick_best_endpoint_format.cpp index 9000dcbb..6e41005b 100644 --- a/Source/astcenc_pick_best_endpoint_format.cpp +++ b/Source/astcenc_pick_best_endpoint_format.cpp @@ -1306,8 +1306,8 @@ unsigned int compute_ideal_endpoint_formats( // Pick best mode from the SIMD result, using lowest matching index to ensure invariance vmask lanes_min_error = vbest_ep_error == hmin(vbest_ep_error); vbest_error_index = select(vint(0x7FFFFFFF), vbest_error_index, lanes_min_error); - vbest_error_index = hmin(vbest_error_index); - int best_error_index = vbest_error_index.lane0(); + + int best_error_index = hmin_s(vbest_error_index); best_error_weights[i] = best_error_index; diff --git a/Source/astcenc_vecmathlib_avx2_8.h b/Source/astcenc_vecmathlib_avx2_8.h index 8569f871..422c3a14 100644 --- a/Source/astcenc_vecmathlib_avx2_8.h +++ b/Source/astcenc_vecmathlib_avx2_8.h @@ -94,14 +94,6 @@ struct vfloat8 m = a; } - /** - * @brief Get the scalar from the bottom lane. - */ - ASTCENC_SIMD_INLINE float lane0() const - { - return _mm256_cvtss_f32(m); - } - /** * @brief Factory that returns a vector of zeros. */ @@ -204,14 +196,6 @@ struct vint8 m = a; } - /** - * @brief Get the scalar from the bottom lane. - */ - ASTCENC_SIMD_INLINE int lane0() const - { - return _mm256_cvtsi256_si32(m); - } - /** * @brief Factory that returns a vector of zeros. */ @@ -516,6 +500,14 @@ ASTCENC_SIMD_INLINE vint8 hmin(vint8 a) return vmin; } +/** + * @brief Return the horizontal minimum of a vector. + */ +ASTCENC_SIMD_INLINE int hmin_s(vint8 a) +{ + return _mm256_cvtsi256_si32(hmin(a).m); +} + /** * @brief Return the horizontal maximum of a vector. */ @@ -536,7 +528,7 @@ ASTCENC_SIMD_INLINE vint8 hmax(vint8 a) */ ASTCENC_SIMD_INLINE int hmax_s(vint8 a) { - return hmax(a).lane0(); + return _mm256_cvtsi256_si32(hmax(a).m); } /** @@ -840,7 +832,7 @@ ASTCENC_SIMD_INLINE vfloat8 hmin(vfloat8 a) */ ASTCENC_SIMD_INLINE float hmin_s(vfloat8 a) { - return hmin(a).lane0(); + return _mm256_cvtss_f32(hmin(a).m); } /** @@ -870,7 +862,7 @@ ASTCENC_SIMD_INLINE vfloat8 hmax(vfloat8 a) */ ASTCENC_SIMD_INLINE float hmax_s(vfloat8 a) { - return hmax(a).lane0(); + return _mm256_cvtss_f32(hmax(a).m); } /** diff --git a/Source/astcenc_vecmathlib_common_4.h b/Source/astcenc_vecmathlib_common_4.h index ebfee2d3..5e9b33d3 100644 --- a/Source/astcenc_vecmathlib_common_4.h +++ b/Source/astcenc_vecmathlib_common_4.h @@ -129,6 +129,22 @@ ASTCENC_SIMD_INLINE int hadd_rgb_s(vint4 a) return a.lane<0>() + a.lane<1>() + a.lane<2>(); } +/** + * @brief Return the horizontal minimum of a vector. + */ +ASTCENC_SIMD_INLINE int hmin_s(vint4 a) +{ + return hmin(a).lane<0>(); +} + +/** + * @brief Return the horizontal maximum of a vector. + */ +ASTCENC_SIMD_INLINE int hmax_s(vint4 a) +{ + return hmax(a).lane<0>(); +} + // ============================================================================ // vfloat4 operators and functions // ============================================================================ From 2758f64a6d475c8e43025e9bb7f1dd6f0048968f Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Mon, 1 Jul 2024 12:49:01 +0100 Subject: [PATCH 5/8] Remove vfloat*::lane_id() functions --- Source/UnitTest/test_simd.cpp | 28 ---------------------------- Source/astcenc_vecmathlib_avx2_8.h | 8 -------- Source/astcenc_vecmathlib_neon_4.h | 9 --------- Source/astcenc_vecmathlib_none_4.h | 8 -------- Source/astcenc_vecmathlib_sse_4.h | 8 -------- Source/astcenc_weight_align.cpp | 2 +- 6 files changed, 1 insertion(+), 62 deletions(-) diff --git a/Source/UnitTest/test_simd.cpp b/Source/UnitTest/test_simd.cpp index faaa39f9..25215b59 100644 --- a/Source/UnitTest/test_simd.cpp +++ b/Source/UnitTest/test_simd.cpp @@ -372,16 +372,6 @@ TEST(vfloat4, Loada) EXPECT_EQ(a.lane<3>(), 3.0f); } -/** @brief Test vfloat4 lane_id. */ -TEST(vfloat4, LaneID) -{ - vfloat4 a = vfloat4::lane_id(); - EXPECT_EQ(a.lane<0>(), 0.0f); - EXPECT_EQ(a.lane<1>(), 1.0f); - EXPECT_EQ(a.lane<2>(), 2.0f); - EXPECT_EQ(a.lane<3>(), 3.0f); -} - /** @brief Test vfloat4 swz to float4. */ TEST(vfloat4, swz4) { @@ -2138,24 +2128,6 @@ TEST(vfloat8, Loada) EXPECT_EQ(ra[7], 7.0f); } -/** @brief Test vfloat8 lane_id. */ -TEST(vfloat8, LaneID) -{ - vfloat8 a = vfloat8::lane_id(); - - alignas(32) float ra[8]; - storea(a, ra); - - EXPECT_EQ(ra[0], 0.0f); - EXPECT_EQ(ra[1], 1.0f); - EXPECT_EQ(ra[2], 2.0f); - EXPECT_EQ(ra[3], 3.0f); - EXPECT_EQ(ra[4], 4.0f); - EXPECT_EQ(ra[5], 5.0f); - EXPECT_EQ(ra[6], 6.0f); - EXPECT_EQ(ra[7], 7.0f); -} - /** @brief Test vfloat8 add. */ TEST(vfloat8, vadd) { diff --git a/Source/astcenc_vecmathlib_avx2_8.h b/Source/astcenc_vecmathlib_avx2_8.h index 422c3a14..ee38d363 100644 --- a/Source/astcenc_vecmathlib_avx2_8.h +++ b/Source/astcenc_vecmathlib_avx2_8.h @@ -118,14 +118,6 @@ struct vfloat8 return vfloat8(_mm256_load_ps(p)); } - /** - * @brief Factory that returns a vector containing the lane IDs. - */ - static ASTCENC_SIMD_INLINE vfloat8 lane_id() - { - return vfloat8(_mm256_set_ps(7, 6, 5, 4, 3, 2, 1, 0)); - } - /** * @brief The vector ... */ diff --git a/Source/astcenc_vecmathlib_neon_4.h b/Source/astcenc_vecmathlib_neon_4.h index 42545e75..787d8f61 100644 --- a/Source/astcenc_vecmathlib_neon_4.h +++ b/Source/astcenc_vecmathlib_neon_4.h @@ -134,15 +134,6 @@ struct vfloat4 return vfloat4(vld1q_f32(p)); } - /** - * @brief Factory that returns a vector containing the lane IDs. - */ - static ASTCENC_SIMD_INLINE vfloat4 lane_id() - { - alignas(16) float data[4] { 0.0f, 1.0f, 2.0f, 3.0f }; - return vfloat4(vld1q_f32(data)); - } - /** * @brief Return a swizzled float 2. */ diff --git a/Source/astcenc_vecmathlib_none_4.h b/Source/astcenc_vecmathlib_none_4.h index be7348ef..6d0fcf21 100644 --- a/Source/astcenc_vecmathlib_none_4.h +++ b/Source/astcenc_vecmathlib_none_4.h @@ -139,14 +139,6 @@ struct vfloat4 return vfloat4(p); } - /** - * @brief Factory that returns a vector containing the lane IDs. - */ - static ASTCENC_SIMD_INLINE vfloat4 lane_id() - { - return vfloat4(0.0f, 1.0f, 2.0f, 3.0f); - } - /** * @brief Return a swizzled float 2. */ diff --git a/Source/astcenc_vecmathlib_sse_4.h b/Source/astcenc_vecmathlib_sse_4.h index d5058377..d56511dc 100644 --- a/Source/astcenc_vecmathlib_sse_4.h +++ b/Source/astcenc_vecmathlib_sse_4.h @@ -142,14 +142,6 @@ struct vfloat4 return vfloat4(_mm_load_ps(p)); } - /** - * @brief Factory that returns a vector containing the lane IDs. - */ - static ASTCENC_SIMD_INLINE vfloat4 lane_id() - { - return vfloat4(_mm_set_ps(3, 2, 1, 0)); - } - /** * @brief Return a swizzled float 2. */ diff --git a/Source/astcenc_weight_align.cpp b/Source/astcenc_weight_align.cpp index 4e993e73..7233915a 100644 --- a/Source/astcenc_weight_align.cpp +++ b/Source/astcenc_weight_align.cpp @@ -164,7 +164,7 @@ static void compute_lowest_and_highest_weight( promise(weight_count > 0); promise(max_angular_steps > 0); - vfloat rcp_stepsize = vfloat::lane_id() + vfloat(1.0f); + vfloat rcp_stepsize = int_to_float(vint::lane_id()) + vfloat(1.0f); // Arrays are ANGULAR_STEPS long, so always safe to run full vectors for (unsigned int sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH) From be619f80b320537154f856200b04c665b9fb8463 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Mon, 1 Jul 2024 20:14:08 +0100 Subject: [PATCH 6/8] Remove 8-wide literal loads --- Source/UnitTest/test_simd.cpp | 342 ++++++++++++++++------------- Source/astcenc_vecmathlib_avx2_8.h | 24 -- 2 files changed, 188 insertions(+), 178 deletions(-) diff --git a/Source/UnitTest/test_simd.cpp b/Source/UnitTest/test_simd.cpp index 25215b59..5c7b0ed3 100644 --- a/Source/UnitTest/test_simd.cpp +++ b/Source/UnitTest/test_simd.cpp @@ -30,6 +30,40 @@ namespace astcenc { // Misc utility tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +#if ASTCENC_SIMD_WIDTH == 8 +/** + * @brief Construct from 8 scalar values. + * + * The value of @c a is stored to lane 0 (LSB) in the SIMD register. + */ +vfloat8 vfloat8_lit( + float a, float b, float c, float d, + float e, float f, float g, float h +) { + alignas(32) float data[8] { + a, b, c, d, e, f, g, h + }; + + return vfloat8(data); +} + +/** + * @brief Construct from 8 scalar values. + * + * The value of @c a is stored to lane 0 (LSB) in the SIMD register. + */ +vint8 vint8_lit( + int a, int b, int c, int d, + int e, int f, int g, int h +) { + alignas(32) int data[8] { + a, b, c, d, e, f, g, h + }; + + return vint8(data); +} + +#endif static unsigned int round_down(unsigned int x) { @@ -157,9 +191,9 @@ TEST(vfloat, Atan2) /** @brief Test VLA change_sign. */ TEST(vfloat, ChangeSign) { - vfloat a(-1.0f, 1.0f, -3.12f, 3.12f); - vfloat b(-1.0f, -1.0f, 3.12f, 3.12f); - vfloat r = change_sign(a, b); + vfloat4 a(-1.0f, 1.0f, -3.12f, 3.12f); + vfloat4 b(-1.0f, -1.0f, 3.12f, 3.12f); + vfloat4 r = change_sign(a, b); EXPECT_EQ(r.lane<0>(), 1.0f); EXPECT_EQ(r.lane<1>(), -1.0f); EXPECT_EQ(r.lane<2>(), -3.12f); @@ -169,8 +203,8 @@ TEST(vfloat, ChangeSign) /** @brief Test VLA atan. */ TEST(vfloat, Atan) { - vfloat a(-0.15f, 0.0f, 0.9f, 2.1f); - vfloat r = atan(a); + vfloa4 a(-0.15f, 0.0f, 0.9f, 2.1f); + vfloat4 r = atan(a); EXPECT_NEAR(r.lane<0>(), -0.149061f, 0.005f); EXPECT_NEAR(r.lane<1>(), 0.000000f, 0.005f); EXPECT_NEAR(r.lane<2>(), 0.733616f, 0.005f); @@ -180,9 +214,9 @@ TEST(vfloat, Atan) /** @brief Test VLA atan2. */ TEST(vfloat, Atan2) { - vfloat a(-0.15f, 0.0f, 0.9f, 2.1f); - vfloat b(1.15f, -3.0f, -0.9f, 1.1f); - vfloat r = atan2(a, b); + vfloat4 a(-0.15f, 0.0f, 0.9f, 2.1f); + vfloat4 b(1.15f, -3.0f, -0.9f, 1.1f); + vfloat4 r = atan2(a, b); EXPECT_NEAR(r.lane<0>(), -0.129816f, 0.005f); EXPECT_NEAR(r.lane<1>(), 3.141592f, 0.005f); EXPECT_NEAR(r.lane<2>(), 2.360342f, 0.005f); @@ -196,9 +230,9 @@ TEST(vfloat, Atan2) /** @brief Test VLA change_sign. */ TEST(vfloat, ChangeSign) { - vfloat a(-1.0f, 1.0f, -3.12f, 3.12f, -1.0f, 1.0f, -3.12f, 3.12f); - vfloat b(-1.0f, -1.0f, 3.12f, 3.12f, -1.0f, -1.0f, 3.12f, 3.12f); - vfloat r = change_sign(a, b); + vfloat8 a = vfloat8_lit(-1.0f, 1.0f, -3.12f, 3.12f, -1.0f, 1.0f, -3.12f, 3.12f); + vfloat8 b = vfloat8_lit(-1.0f, -1.0f, 3.12f, 3.12f, -1.0f, -1.0f, 3.12f, 3.12f); + vfloat8 r = change_sign(a, b); alignas(32) float ra[8]; storea(r, ra); @@ -216,8 +250,8 @@ TEST(vfloat, ChangeSign) /** @brief Test VLA atan. */ TEST(vfloat, Atan) { - vfloat a(-0.15f, 0.0f, 0.9f, 2.1f, -0.15f, 0.0f, 0.9f, 2.1f); - vfloat r = atan(a); + vfloat8 a = vfloat8_lit(-0.15f, 0.0f, 0.9f, 2.1f, -0.15f, 0.0f, 0.9f, 2.1f); + vfloat8 r = atan(a); alignas(32) float ra[8]; storea(r, ra); @@ -235,9 +269,9 @@ TEST(vfloat, Atan) /** @brief Test VLA atan2. */ TEST(vfloat, Atan2) { - vfloat a(-0.15f, 0.0f, 0.9f, 2.1f, -0.15f, 0.0f, 0.9f, 2.1f); - vfloat b(1.15f, -3.0f, -0.9f, 1.1f, 1.15f, -3.0f, -0.9f, 1.1f); - vfloat r = atan2(a, b); + vfloat8 a = vfloat8_lit(-0.15f, 0.0f, 0.9f, 2.1f, -0.15f, 0.0f, 0.9f, 2.1f); + vfloat8 b = vfloat8_lit(1.15f, -3.0f, -0.9f, 1.1f, 1.15f, -3.0f, -0.9f, 1.1f); + vfloat8 r = atan2(a, b); alignas(32) float ra[8]; storea(r, ra); @@ -2039,7 +2073,7 @@ TEST(vfloat8, ScalarDupLoad) /** @brief Test scalar vfloat8 load. */ TEST(vfloat8, ScalarLoad) { - vfloat8 a(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f); + vfloat8 a = vfloat8_lit(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f); alignas(32) float ra[8]; storea(a, ra); @@ -2057,7 +2091,7 @@ TEST(vfloat8, ScalarLoad) /** @brief Test copy vfloat8 load. */ TEST(vfloat8, CopyLoad) { - vfloat8 s(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f); + vfloat8 s = vfloat8_lit(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f); vfloat8 a(s.m); alignas(32) float ra[8]; @@ -2131,8 +2165,8 @@ TEST(vfloat8, Loada) /** @brief Test vfloat8 add. */ TEST(vfloat8, vadd) { - vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); - vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); + vfloat8 a = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); + vfloat8 b = vfloat8_lit(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); a = a + b; alignas(32) float ra[8]; @@ -2151,8 +2185,8 @@ TEST(vfloat8, vadd) /** @brief Test vfloat8 sub. */ TEST(vfloat8, vsub) { - vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); - vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); + vfloat8 a = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); + vfloat8 b = vfloat8_lit(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); a = a - b; alignas(32) float ra[8]; @@ -2171,8 +2205,8 @@ TEST(vfloat8, vsub) /** @brief Test vfloat8 mul. */ TEST(vfloat8, vmul) { - vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); - vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); + vfloat8 a = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); + vfloat8 b = vfloat8_lit(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); a = a * b; alignas(32) float ra[8]; @@ -2191,7 +2225,7 @@ TEST(vfloat8, vmul) /** @brief Test vfloat8 mul. */ TEST(vfloat8, vsmul) { - vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); + vfloat8 a = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); float b = 3.14f; a = a * b; @@ -2212,7 +2246,7 @@ TEST(vfloat8, vsmul) TEST(vfloat8, svmul) { float a = 3.14f; - vfloat8 b(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); + vfloat8 b = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); b = a * b; alignas(32) float ra[8]; @@ -2231,8 +2265,8 @@ TEST(vfloat8, svmul) /** @brief Test vfloat8 div. */ TEST(vfloat8, vdiv) { - vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); - vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); + vfloat8 a = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); + vfloat8 b = vfloat8_lit(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); a = a / b; alignas(32) float ra[8]; @@ -2251,7 +2285,7 @@ TEST(vfloat8, vdiv) /** @brief Test vfloat8 div. */ TEST(vfloat8, vsdiv) { - vfloat8 a(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); + vfloat8 a = vfloat8_lit(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); float b = 3.14f; vfloat8 r = a / b; @@ -2272,7 +2306,7 @@ TEST(vfloat8, vsdiv) TEST(vfloat8, svdiv) { float a = 3.14f; - vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); + vfloat8 b = vfloat8_lit(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); vfloat8 r = a / b; alignas(32) float ra[8]; @@ -2291,28 +2325,28 @@ TEST(vfloat8, svdiv) /** @brief Test vfloat8 ceq. */ TEST(vfloat8, ceq) { - vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); - vfloat8 b1(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); + vfloat8 a1 = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); + vfloat8 b1 = vfloat8_lit(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); vmask8 r1 = a1 == b1; EXPECT_EQ(0u, mask(r1)); EXPECT_EQ(false, any(r1)); EXPECT_EQ(false, all(r1)); - vfloat8 a2(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); - vfloat8 b2(1.0f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); + vfloat8 a2 = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); + vfloat8 b2 = vfloat8_lit(1.0f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); vmask8 r2 = a2 == b2; EXPECT_EQ(0x1u, mask(r2)); EXPECT_EQ(true, any(r2)); EXPECT_EQ(false, all(r2)); - vfloat8 a3(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); - vfloat8 b3(1.0f, 0.2f, 3.0f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); + vfloat8 a3 = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); + vfloat8 b3 = vfloat8_lit(1.0f, 0.2f, 3.0f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); vmask8 r3 = a3 == b3; EXPECT_EQ(0x5u, mask(r3)); EXPECT_EQ(true, any(r3)); EXPECT_EQ(false, all(r3)); - vfloat8 a4(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); + vfloat8 a4 = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); vmask8 r4 = a4 == a4; EXPECT_EQ(0xFFu, mask(r4)); EXPECT_EQ(true, any(r4)); @@ -2322,28 +2356,28 @@ TEST(vfloat8, ceq) /** @brief Test vfloat8 cne. */ TEST(vfloat8, cne) { - vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); - vfloat8 b1(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); + vfloat8 a1 = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); + vfloat8 b1 = vfloat8_lit(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); vmask8 r1 = a1 != b1; EXPECT_EQ(0xFFu, mask(r1)); EXPECT_EQ(true, any(r1)); EXPECT_EQ(true, all(r1)); - vfloat8 a2(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); - vfloat8 b2(1.0f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); + vfloat8 a2 = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); + vfloat8 b2 = vfloat8_lit(1.0f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); vmask8 r2 = a2 != b2; EXPECT_EQ(0xFEu, mask(r2)); EXPECT_EQ(true, any(r2)); EXPECT_EQ(false, all(r2)); - vfloat8 a3(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); - vfloat8 b3(1.0f, 0.2f, 3.0f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); + vfloat8 a3 = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); + vfloat8 b3 = vfloat8_lit(1.0f, 0.2f, 3.0f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); vmask8 r3 = a3 != b3; EXPECT_EQ(0xFAu, mask(r3)); EXPECT_EQ(true, any(r3)); EXPECT_EQ(false, all(r3)); - vfloat8 a4(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); + vfloat8 a4 = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); vmask8 r4 = a4 != a4; EXPECT_EQ(0u, mask(r4)); EXPECT_EQ(false, any(r4)); @@ -2353,8 +2387,8 @@ TEST(vfloat8, cne) /** @brief Test vfloat8 clt. */ TEST(vfloat8, clt) { - vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); - vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); + vfloat8 a = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); + vfloat8 b = vfloat8_lit(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); vmask8 r = a < b; EXPECT_EQ(0xAAu, mask(r)); } @@ -2362,8 +2396,8 @@ TEST(vfloat8, clt) /** @brief Test vfloat8 cle. */ TEST(vfloat8, cle) { - vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); - vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); + vfloat8 a = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); + vfloat8 b = vfloat8_lit(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); vmask8 r = a <= b; EXPECT_EQ(0xEEu, mask(r)); } @@ -2371,8 +2405,8 @@ TEST(vfloat8, cle) /** @brief Test vfloat8 cgt. */ TEST(vfloat8, cgt) { - vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); - vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); + vfloat8 a = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); + vfloat8 b = vfloat8_lit(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); vmask8 r = a > b; EXPECT_EQ(0x11u, mask(r)); } @@ -2380,8 +2414,8 @@ TEST(vfloat8, cgt) /** @brief Test vfloat8 cge. */ TEST(vfloat8, cge) { - vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); - vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); + vfloat8 a = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); + vfloat8 b = vfloat8_lit(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); vmask8 r = a >= b; EXPECT_EQ(0x55u, mask(r)); } @@ -2389,8 +2423,8 @@ TEST(vfloat8, cge) /** @brief Test vfloat8 min. */ TEST(vfloat8, min) { - vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); - vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); + vfloat8 a = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); + vfloat8 b = vfloat8_lit(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); vfloat8 r = min(a, b); alignas(32) float ra[8]; @@ -2409,8 +2443,8 @@ TEST(vfloat8, min) /** @brief Test vfloat8 max. */ TEST(vfloat8, max) { - vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); - vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); + vfloat8 a = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); + vfloat8 b = vfloat8_lit(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f); vfloat8 r = max(a, b); alignas(32) float ra[8]; @@ -2429,7 +2463,7 @@ TEST(vfloat8, max) /** @brief Test vfloat8 clamp. */ TEST(vfloat8, clamp) { - vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); + vfloat8 a1 = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); vfloat8 r1 = clamp(2.1f, 3.0f, a1); alignas(32) float ra[8]; @@ -2444,7 +2478,7 @@ TEST(vfloat8, clamp) EXPECT_EQ(ra[6], 3.0f); EXPECT_EQ(ra[7], 3.0f); - vfloat8 a2(1.0f, 2.0f, qnan, 4.0f, 1.0f, 2.0f, qnan, 4.0f); + vfloat8 a2 = vfloat8_lit(1.0f, 2.0f, qnan, 4.0f, 1.0f, 2.0f, qnan, 4.0f); vfloat8 r2 = clamp(2.1f, 3.0f, a2); storea(r2, ra); @@ -2462,7 +2496,7 @@ TEST(vfloat8, clamp) /** @brief Test vfloat8 clampz. */ TEST(vfloat8, clampzo) { - vfloat8 a1(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f); + vfloat8 a1 = vfloat8_lit(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f); vfloat8 r1 = clampzo(a1); alignas(32) float ra[8]; @@ -2477,7 +2511,7 @@ TEST(vfloat8, clampzo) EXPECT_EQ(ra[6], 0.1f); EXPECT_EQ(ra[7], 1.0f); - vfloat8 a2(-1.0f, 0.0f, qnan, 4.0f, -1.0f, 0.0f, qnan, 4.0f); + vfloat8 a2 = vfloat8_lit(-1.0f, 0.0f, qnan, 4.0f, -1.0f, 0.0f, qnan, 4.0f); vfloat8 r2 = clampzo(a2); storea(r2, ra); @@ -2495,7 +2529,7 @@ TEST(vfloat8, clampzo) /** @brief Test vfloat8 abs. */ TEST(vfloat8, abs) { - vfloat8 a(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f); + vfloat8 a = vfloat8_lit(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f); vfloat8 r = abs(a); alignas(32) float ra[8]; @@ -2514,7 +2548,7 @@ TEST(vfloat8, abs) /** @brief Test vfloat8 round. */ TEST(vfloat8, round) { - vfloat8 a(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); + vfloat8 a = vfloat8_lit(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); vfloat8 r = round(a); alignas(32) float ra[8]; @@ -2533,7 +2567,7 @@ TEST(vfloat8, round) /** @brief Test vfloat8 hmin. */ TEST(vfloat8, hmin) { - vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); + vfloat8 a1 = vfloat8_lit(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); vfloat8 r1 = hmin(a1); alignas(32) float ra[8]; @@ -2548,7 +2582,7 @@ TEST(vfloat8, hmin) EXPECT_EQ(ra[6], 1.1f); EXPECT_EQ(ra[7], 1.1f); - vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f); + vfloat8 a2 = vfloat8_lit(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f); vfloat8 r2 = hmin(a2); storea(r2, ra); @@ -2566,11 +2600,11 @@ TEST(vfloat8, hmin) /** @brief Test vfloat8 hmin_s. */ TEST(vfloat8, hmin_s) { - vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); + vfloat8 a1 = vfloat8_lit(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); float r1 = hmin_s(a1); EXPECT_EQ(r1, 1.1f); - vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f); + vfloat8 a2 = vfloat8_lit(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f); float r2 = hmin_s(a2); EXPECT_EQ(r2, 0.2f); } @@ -2578,7 +2612,7 @@ TEST(vfloat8, hmin_s) /** @brief Test vfloat8 hmax. */ TEST(vfloat8, hmax) { - vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); + vfloat8 a1 = vfloat8_lit(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); vfloat8 r1 = hmax(a1); alignas(32) float ra[8]; @@ -2593,7 +2627,7 @@ TEST(vfloat8, hmax) EXPECT_EQ(ra[6], 4.0f); EXPECT_EQ(ra[7], 4.0f); - vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f); + vfloat8 a2 = vfloat8_lit(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f); vfloat8 r2 = hmax(a2); storea(r2, ra); @@ -2611,11 +2645,11 @@ TEST(vfloat8, hmax) /** @brief Test vfloat8 hmax_s. */ TEST(vfloat8, hmax_s) { - vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); + vfloat8 a1 = vfloat8_lit(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); float r1 = hmax_s(a1); EXPECT_EQ(r1, 4.0f); - vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f); + vfloat8 a2 = vfloat8_lit(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f); float r2 = hmax_s(a2); EXPECT_EQ(r2, 1.6f); } @@ -2623,7 +2657,7 @@ TEST(vfloat8, hmax_s) /** @brief Test vfloat8 hadd_s. */ TEST(vfloat8, hadd_s) { - vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); + vfloat8 a1 = vfloat8_lit(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); float sum = 1.1f + 1.5f + 1.6f + 4.0f + 1.1f + 1.5f + 1.6f + 4.0f; float r = hadd_s(a1); EXPECT_NEAR(r, sum, 0.005f); @@ -2632,7 +2666,7 @@ TEST(vfloat8, hadd_s) /** @brief Test vfloat8 sqrt. */ TEST(vfloat8, sqrt) { - vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); + vfloat8 a = vfloat8_lit(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); vfloat8 r = sqrt(a); alignas(32) float ra[8]; @@ -2651,12 +2685,12 @@ TEST(vfloat8, sqrt) /** @brief Test vfloat8 select. */ TEST(vfloat8, select) { - vfloat8 m1(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f); - vfloat8 m2(1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f); + vfloat8 m1 = vfloat8_lit(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f); + vfloat8 m2 = vfloat8_lit(1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f); vmask8 cond = m1 == m2; - vfloat8 a(1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 3.0f, 3.0f, 1.0); - vfloat8 b(4.0f, 2.0f, 2.0f, 4.0f, 4.0f, 2.0f, 2.0f, 4.0); + vfloat8 a = vfloat8_lit(1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 3.0f, 3.0f, 1.0); + vfloat8 b = vfloat8_lit(4.0f, 2.0f, 2.0f, 4.0f, 4.0f, 2.0f, 2.0f, 4.0); // Select in one direction vfloat8 r1 = select(a, b, cond); @@ -2692,11 +2726,11 @@ TEST(vfloat8, select) TEST(vfloat8, select_msb) { int msb_set = static_cast(0x80000000); - vint8 msb(msb_set, 0, msb_set, 0, msb_set, 0, msb_set, 0); + vint8 msb = vint8_lit(msb_set, 0, msb_set, 0, msb_set, 0, msb_set, 0); vmask8 cond(msb.m); - vfloat8 a(1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 3.0f, 3.0f, 1.0f); - vfloat8 b(4.0f, 2.0f, 2.0f, 4.0f, 4.0f, 2.0f, 2.0f, 4.0f); + vfloat8 a = vfloat8_lit(1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 3.0f, 3.0f, 1.0f); + vfloat8 b = vfloat8_lit(4.0f, 2.0f, 2.0f, 4.0f, 4.0f, 2.0f, 2.0f, 4.0f); // Select in one direction vfloat8 r1 = select(a, b, cond); @@ -2731,7 +2765,7 @@ TEST(vfloat8, select_msb) /** @brief Test vfloat8 gatherf. */ TEST(vfloat8, gatherf) { - vint8 indices(0, 4, 3, 2, 7, 4, 3, 2); + vint8 indices = vint8_lit(0, 4, 3, 2, 7, 4, 3, 2); vfloat8 r = gatherf(f32_data, indices); alignas(32) float ra[8]; @@ -2786,7 +2820,7 @@ TEST(vfloat8, storea) /** @brief Test vfloat8 float_to_int. */ TEST(vfloat8, float_to_int) { - vfloat8 a(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); + vfloat8 a = vfloat8_lit(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f); vint8 r = float_to_int(a); alignas(32) int ra[8]; @@ -2861,7 +2895,7 @@ TEST(vint8, ScalarDupLoad) /** @brief Test scalar vint8 load. */ TEST(vint8, ScalarLoad) { - vint8 a(11, 22, 33, 44, 55, 66, 77, 88); + vint8 a = vint8_lit(11, 22, 33, 44, 55, 66, 77, 88); alignas(32) int ra[8]; store(a, ra); @@ -2879,7 +2913,7 @@ TEST(vint8, ScalarLoad) /** @brief Test copy vint8 load. */ TEST(vint8, CopyLoad) { - vint8 s(11, 22, 33, 44, 55, 66, 77, 88); + vint8 s = vint8_lit(11, 22, 33, 44, 55, 66, 77, 88); vint8 a(s.m); alignas(32) int ra[8]; @@ -2971,8 +3005,8 @@ TEST(vint8, LaneID) /** @brief Test vint8 add. */ TEST(vint8, vadd) { - vint8 a(1, 2, 3, 4, 1, 2, 3, 4); - vint8 b(2, 3, 4, 5, 2, 3, 4, 5); + vint8 a = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); + vint8 b = vint8_lit(2, 3, 4, 5, 2, 3, 4, 5); a = a + b; alignas(32) int ra[8]; @@ -2992,8 +3026,8 @@ TEST(vint8, vadd) /** @brief Test vint8 self-add. */ TEST(vint8, vselfadd1) { - vint8 a(1, 2, 3, 4, 1, 2, 3, 4); - vint8 b(2, 3, 4, 5, 2, 3, 4, 5); + vint8 a = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); + vint8 b = vint8_lit(2, 3, 4, 5, 2, 3, 4, 5); a += b; alignas(32) int ra[8]; @@ -3012,8 +3046,8 @@ TEST(vint8, vselfadd1) /** @brief Test vint8 sub. */ TEST(vint8, vsub) { - vint8 a(1, 2, 4, 4, 1, 2, 4, 4); - vint8 b(2, 3, 3, 5, 2, 3, 3, 5); + vint8 a = vint8_lit(1, 2, 4, 4, 1, 2, 4, 4); + vint8 b = vint8_lit(2, 3, 3, 5, 2, 3, 3, 5); a = a - b; alignas(32) int ra[8]; @@ -3032,8 +3066,8 @@ TEST(vint8, vsub) /** @brief Test vint8 mul. */ TEST(vint8, vmul) { - vint8 a(1, 2, 4, 4, 1, 2, 4, 4); - vint8 b(2, 3, 3, 5, 2, 3, 3, 5); + vint8 a = vint8_lit(1, 2, 4, 4, 1, 2, 4, 4); + vint8 b = vint8_lit(2, 3, 3, 5, 2, 3, 3, 5); a = a * b; alignas(32) int ra[8]; @@ -3052,7 +3086,7 @@ TEST(vint8, vmul) /** @brief Test vint8 bitwise invert. */ TEST(vint8, bit_invert) { - vint8 a(-1, 0, 1, 2, -1, 0, 1, 2); + vint8 a = vint8_lit(-1, 0, 1, 2, -1, 0, 1, 2); a = ~a; alignas(32) int ra[8]; @@ -3071,8 +3105,8 @@ TEST(vint8, bit_invert) /** @brief Test vint8 bitwise or. */ TEST(vint8, bit_vor) { - vint8 a(1, 2, 3, 4, 1, 2, 3, 4); - vint8 b(2, 3, 4, 5, 2, 3, 4, 5); + vint8 a = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); + vint8 b = vint8_lit(2, 3, 4, 5, 2, 3, 4, 5); a = a | b; alignas(32) int ra[8]; @@ -3091,8 +3125,8 @@ TEST(vint8, bit_vor) /** @brief Test vint8 bitwise and. */ TEST(vint8, bit_vand) { - vint8 a(1, 2, 3, 4, 1, 2, 3, 4); - vint8 b(2, 3, 4, 5, 2, 3, 4, 5); + vint8 a = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); + vint8 b = vint8_lit(2, 3, 4, 5, 2, 3, 4, 5); a = a & b; alignas(32) int ra[8]; @@ -3111,8 +3145,8 @@ TEST(vint8, bit_vand) /** @brief Test vint8 bitwise xor. */ TEST(vint8, bit_vxor) { - vint8 a(1, 2, 3, 4, 1, 2, 3, 4); - vint8 b(2, 3, 4, 5, 2, 3, 4, 5); + vint8 a = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); + vint8 b = vint8_lit(2, 3, 4, 5, 2, 3, 4, 5); a = a ^ b; alignas(32) int ra[8]; @@ -3131,28 +3165,28 @@ TEST(vint8, bit_vxor) /** @brief Test vint8 ceq. */ TEST(vint8, ceq) { - vint8 a1(1, 2, 3, 4, 1, 2, 3, 4); - vint8 b1(0, 1, 2, 3, 0, 1, 2, 3); + vint8 a1 = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); + vint8 b1 = vint8_lit(0, 1, 2, 3, 0, 1, 2, 3); vmask8 r1 = a1 == b1; EXPECT_EQ(0u, mask(r1)); EXPECT_EQ(false, any(r1)); EXPECT_EQ(false, all(r1)); - vint8 a2(1, 2, 3, 4, 1, 2, 3, 4); - vint8 b2(1, 0, 0, 0, 1, 0, 0, 0); + vint8 a2 = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); + vint8 b2 = vint8_lit(1, 0, 0, 0, 1, 0, 0, 0); vmask8 r2 = a2 == b2; EXPECT_EQ(0x11u, mask(r2)); EXPECT_EQ(true, any(r2)); EXPECT_EQ(false, all(r2)); - vint8 a3(1, 2, 3, 4, 1, 2, 3, 4); - vint8 b3(1, 0, 3, 0, 1, 0, 3, 0); + vint8 a3 = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); + vint8 b3 = vint8_lit(1, 0, 3, 0, 1, 0, 3, 0); vmask8 r3 = a3 == b3; EXPECT_EQ(0x55u, mask(r3)); EXPECT_EQ(true, any(r3)); EXPECT_EQ(false, all(r3)); - vint8 a4(1, 2, 3, 4, 1, 2, 3, 4); + vint8 a4 = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); vmask8 r4 = a4 == a4; EXPECT_EQ(0xFFu, mask(r4)); EXPECT_EQ(true, any(r4)); @@ -3162,28 +3196,28 @@ TEST(vint8, ceq) /** @brief Test vint8 cne. */ TEST(vint8, cne) { - vint8 a1(1, 2, 3, 4, 1, 2, 3, 4); - vint8 b1(0, 1, 2, 3, 0, 1, 2, 3); + vint8 a1 = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); + vint8 b1 = vint8_lit(0, 1, 2, 3, 0, 1, 2, 3); vmask8 r1 = a1 != b1; EXPECT_EQ(0xFFu, mask(r1)); EXPECT_EQ(true, any(r1)); EXPECT_EQ(true, all(r1)); - vint8 a2(1, 2, 3, 4, 1, 2, 3, 4); - vint8 b2(1, 0, 0, 0, 1, 0, 0, 0); + vint8 a2 = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); + vint8 b2 = vint8_lit(1, 0, 0, 0, 1, 0, 0, 0); vmask8 r2 = a2 != b2; EXPECT_EQ(0xEEu, mask(r2)); EXPECT_EQ(true, any(r2)); EXPECT_EQ(false, all(r2)); - vint8 a3(1, 2, 3, 4, 1, 2, 3, 4); - vint8 b3(1, 0, 3, 0, 1, 0, 3, 0); + vint8 a3 = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); + vint8 b3 = vint8_lit(1, 0, 3, 0, 1, 0, 3, 0); vmask8 r3 = a3 != b3; EXPECT_EQ(0xAAu, mask(r3)); EXPECT_EQ(true, any(r3)); EXPECT_EQ(false, all(r3)); - vint8 a4(1, 2, 3, 4, 1, 2, 3, 4); + vint8 a4 = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); vmask8 r4 = a4 != a4; EXPECT_EQ(0u, mask(r4)); EXPECT_EQ(false, any(r4)); @@ -3193,8 +3227,8 @@ TEST(vint8, cne) /** @brief Test vint8 clt. */ TEST(vint8, clt) { - vint8 a(1, 2, 3, 4, 1, 2, 3, 4); - vint8 b(0, 3, 3, 5, 0, 3, 3, 5); + vint8 a = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); + vint8 b = vint8_lit(0, 3, 3, 5, 0, 3, 3, 5); vmask8 r = a < b; EXPECT_EQ(0xAAu, mask(r)); } @@ -3202,8 +3236,8 @@ TEST(vint8, clt) /** @brief Test vint8 cgt. */ TEST(vint8, cgt) { - vint8 a(1, 2, 3, 4, 1, 2, 3, 4); - vint8 b(0, 3, 3, 5, 0, 3, 3, 5); + vint8 a = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); + vint8 b = vint8_lit(0, 3, 3, 5, 0, 3, 3, 5); vmask8 r = a > b; EXPECT_EQ(0x11u, mask(r)); } @@ -3211,8 +3245,8 @@ TEST(vint8, cgt) /** @brief Test vint8 min. */ TEST(vint8, min) { - vint8 a(1, 2, 3, 4, 1, 2, 3, 4); - vint8 b(0, 3, 3, 5, 0, 3, 3, 5); + vint8 a = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); + vint8 b = vint8_lit(0, 3, 3, 5, 0, 3, 3, 5); vint8 r = min(a, b); alignas(32) int ra[8]; @@ -3231,8 +3265,8 @@ TEST(vint8, min) /** @brief Test vint8 max. */ TEST(vint8, max) { - vint8 a(1, 2, 3, 4, 1, 2, 3, 4); - vint8 b(0, 3, 3, 5, 0, 3, 3, 5); + vint8 a = vint8_lit(1, 2, 3, 4, 1, 2, 3, 4); + vint8 b = vint8_lit(0, 3, 3, 5, 0, 3, 3, 5); vint8 r = max(a, b); alignas(32) int ra[8]; @@ -3251,7 +3285,7 @@ TEST(vint8, max) /** @brief Test vint8 lsl. */ TEST(vint8, lsl) { - vint8 a(1, 2, 4, -4, 1, 2, 4, -4); + vint8 a = vint8_lit(1, 2, 4, -4, 1, 2, 4, -4); a = lsl<0>(a); alignas(32) int ra[8]; @@ -3296,7 +3330,7 @@ TEST(vint8, lsl) /** @brief Test vint8 lsr. */ TEST(vint8, lsr) { - vint8 a(1, 2, 4, -4, 1, 2, 4, -4); + vint8 a = vint8_lit(1, 2, 4, -4, 1, 2, 4, -4); a = lsr<0>(a); alignas(32) int ra[8]; @@ -3341,7 +3375,7 @@ TEST(vint8, lsr) /** @brief Test vint8 asr. */ TEST(vint8, asr) { - vint8 a(1, 2, 4, -4, 1, 2, 4, -4); + vint8 a = vint8_lit(1, 2, 4, -4, 1, 2, 4, -4); a = asr<0>(a); alignas(32) int ra[8]; @@ -3387,7 +3421,7 @@ TEST(vint8, asr) /** @brief Test vint8 hmin. */ TEST(vint8, hmin) { - vint8 a1(1, 2, 1, 2, 1, 2, 1, 2); + vint8 a1 = vint8_lit(1, 2, 1, 2, 1, 2, 1, 2); vint8 r1 = hmin(a1); alignas(32) int ra[8]; @@ -3402,7 +3436,7 @@ TEST(vint8, hmin) EXPECT_EQ(ra[6], 1); EXPECT_EQ(ra[7], 1); - vint8 a2(1, 2, -1, 5, 1, 2, -1, 5); + vint8 a2 = vint8_lit(1, 2, -1, 5, 1, 2, -1, 5); vint8 r2 = hmin(a2); store(r2, ra); @@ -3420,7 +3454,7 @@ TEST(vint8, hmin) /** @brief Test vint8 hmax. */ TEST(vint8, hmax) { - vint8 a1(1, 2, 1, 2, 1, 3, 1, 2); + vint8 a1 = vint8_lit(1, 2, 1, 2, 1, 3, 1, 2); vint8 r1 = hmax(a1); alignas(32) int ra[8]; @@ -3435,7 +3469,7 @@ TEST(vint8, hmax) EXPECT_EQ(ra[6], 3); EXPECT_EQ(ra[7], 3); - vint8 a2(1, 2, -1, 5, 1, 2, -1, 5); + vint8 a2 = vint8_lit(1, 2, -1, 5, 1, 2, -1, 5); vint8 r2 = hmax(a2); store(r2, ra); @@ -3488,7 +3522,7 @@ TEST(vint8, store) TEST(vint8, store_nbytes) { alignas(32) int out[2]; - vint8 a(42, 314, 75, 90, 42, 314, 75, 90); + vint8 a = vint8_lit(42, 314, 75, 90, 42, 314, 75, 90); store_nbytes(a, reinterpret_cast(&out)); EXPECT_EQ(out[0], 42); EXPECT_EQ(out[1], 314); @@ -3509,12 +3543,12 @@ TEST(vint8, store_lanes_masked) EXPECT_TRUE(all(result1v == expect1v)); // Store half - vmask8 mask2 = vint8(1, 1, 1, 1, 0, 0, 0, 0) == vint8(1); + vmask8 mask2 = vint8_lit(1, 1, 1, 1, 0, 0, 0, 0) == vint8(1); vint8 data2 = vint8(2); store_lanes_masked(resulta, data2, mask2); vint8 result2v = vint8::load(resulta); - vint8 expect2v = vint8(2, 2, 2, 2, 0, 0, 0, 0); + vint8 expect2v = vint8_lit(2, 2, 2, 2, 0, 0, 0, 0); EXPECT_TRUE(all(result2v == expect2v)); // Store all @@ -3542,12 +3576,12 @@ TEST(vint8, store_lanes_masked_unaligned) EXPECT_TRUE(all(result1v == expect1v)); // Store half - vmask8 mask2 = vint8(1, 1, 1, 1, 0, 0, 0, 0) == vint8(1); + vmask8 mask2 = vint8_lit(1, 1, 1, 1, 0, 0, 0, 0) == vint8(1); vint8 data2 = vint8(2); store_lanes_masked(resulta + 1, data2, mask2); vint8 result2v = vint8::load(resulta + 1); - vint8 expect2v = vint8(2, 2, 2, 2, 0, 0, 0, 0); + vint8 expect2v = vint8_lit(2, 2, 2, 2, 0, 0, 0, 0); EXPECT_TRUE(all(result2v == expect2v)); // Store all @@ -3563,7 +3597,7 @@ TEST(vint8, store_lanes_masked_unaligned) /** @brief Test vint8 gatheri. */ TEST(vint8, gatheri) { - vint8 indices(0, 4, 3, 2, 7, 4, 3, 2); + vint8 indices = vint8_lit(0, 4, 3, 2, 7, 4, 3, 2); vint8 r = gatheri(s32_data, indices); alignas(32) int ra[8]; @@ -3582,7 +3616,7 @@ TEST(vint8, gatheri) /** @brief Test vint8 pack_low_bytes. */ TEST(vint8, pack_low_bytes) { - vint8 a(1, 2, 3, 4, 2, 3, 4, 5); + vint8 a = vint8_lit(1, 2, 3, 4, 2, 3, 4, 5); vint8 r = pack_low_bytes(a); alignas(32) int ra[8]; @@ -3595,12 +3629,12 @@ TEST(vint8, pack_low_bytes) /** @brief Test vint8 select. */ TEST(vint8, select) { - vint8 m1(1, 1, 1, 1, 1, 1, 1, 1); - vint8 m2(1, 2, 1, 2, 1, 2, 1, 2); + vint8 m1 = vint8_lit(1, 1, 1, 1, 1, 1, 1, 1); + vint8 m2 = vint8_lit(1, 2, 1, 2, 1, 2, 1, 2); vmask8 cond = m1 == m2; - vint8 a(1, 3, 3, 1, 1, 3, 3, 1); - vint8 b(4, 2, 2, 4, 4, 2, 2, 4); + vint8 a = vint8_lit(1, 3, 3, 1, 1, 3, 3, 1); + vint8 b = vint8_lit(4, 2, 2, 4, 4, 2, 2, 4); vint8 r1 = select(a, b, cond); @@ -3652,12 +3686,12 @@ TEST(vmask8, scalar_literal_construct) /** @brief Test vmask8 or. */ TEST(vmask8, or) { - vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1); - vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1); + vfloat8 m1a = vfloat8_lit(0, 1, 0, 1, 0, 1, 0, 1); + vfloat8 m1b = vfloat8_lit(1, 1, 1, 1, 1, 1, 1, 1); vmask8 m1 = m1a == m1b; - vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0); - vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1); + vfloat8 m2a = vfloat8_lit(1, 1, 0, 0, 1, 1, 0, 0); + vfloat8 m2b = vfloat8_lit(1, 1, 1, 1, 1, 1, 1, 1); vmask8 m2 = m2a == m2b; vmask8 r = m1 | m2; @@ -3667,12 +3701,12 @@ TEST(vmask8, or) /** @brief Test vmask8 and. */ TEST(vmask8, and) { - vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1); - vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1); + vfloat8 m1a = vfloat8_lit(0, 1, 0, 1, 0, 1, 0, 1); + vfloat8 m1b = vfloat8_lit(1, 1, 1, 1, 1, 1, 1, 1); vmask8 m1 = m1a == m1b; - vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0); - vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1); + vfloat8 m2a = vfloat8_lit(1, 1, 0, 0, 1, 1, 0, 0); + vfloat8 m2b = vfloat8_lit(1, 1, 1, 1, 1, 1, 1, 1); vmask8 m2 = m2a == m2b; vmask8 r = m1 & m2; @@ -3682,12 +3716,12 @@ TEST(vmask8, and) /** @brief Test vmask8 xor. */ TEST(vmask8, xor) { - vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1); - vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1); + vfloat8 m1a = vfloat8_lit(0, 1, 0, 1, 0, 1, 0, 1); + vfloat8 m1b = vfloat8_lit(1, 1, 1, 1, 1, 1, 1, 1); vmask8 m1 = m1a == m1b; - vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0); - vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1); + vfloat8 m2a = vfloat8_lit(1, 1, 0, 0, 1, 1, 0, 0); + vfloat8 m2b = vfloat8_lit(1, 1, 1, 1, 1, 1, 1, 1); vmask8 m2 = m2a == m2b; vmask8 r = m1 ^ m2; @@ -3697,8 +3731,8 @@ TEST(vmask8, xor) /** @brief Test vmask8 not. */ TEST(vmask8, not) { - vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1); - vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1); + vfloat8 m1a = vfloat8_lit(0, 1, 0, 1, 0, 1, 0, 1); + vfloat8 m1b = vfloat8_lit(1, 1, 1, 1, 1, 1, 1, 1); vmask8 m1 = m1a == m1b; vmask8 r = ~m1; EXPECT_EQ(mask(r), 0x55u); @@ -3713,7 +3747,7 @@ TEST(vint8, vtable_8bt_32bi_32entry) vint8 table0p, table1p; vtable_prepare(table0, table1, table0p, table1p); - vint8 index(0, 7, 4, 15, 16, 20, 23, 31); + vint8 index = vint8_lit(0, 7, 4, 15, 16, 20, 23, 31); vint8 result = vtable_8bt_32bi(table0p, table1p, index); @@ -3741,7 +3775,7 @@ TEST(vint8, vtable_8bt_32bi_64entry) vint8 table0p, table1p, table2p, table3p; vtable_prepare(table0, table1, table2, table3, table0p, table1p, table2p, table3p); - vint8 index(0, 7, 4, 15, 16, 20, 38, 63); + vint8 index = vint8_lit(0, 7, 4, 15, 16, 20, 38, 63); vint8 result = vtable_8bt_32bi(table0p, table1p, table2p, table3p, index); diff --git a/Source/astcenc_vecmathlib_avx2_8.h b/Source/astcenc_vecmathlib_avx2_8.h index ee38d363..1c7c3af4 100644 --- a/Source/astcenc_vecmathlib_avx2_8.h +++ b/Source/astcenc_vecmathlib_avx2_8.h @@ -74,18 +74,6 @@ struct vfloat8 m = _mm256_set1_ps(a); } - /** - * @brief Construct from 8 scalar values. - * - * The value of @c a is stored to lane 0 (LSB) in the SIMD register. - */ - ASTCENC_SIMD_INLINE explicit vfloat8( - float a, float b, float c, float d, - float e, float f, float g, float h) - { - m = _mm256_set_ps(h, g, f, e, d, c, b, a); - } - /** * @brief Construct from an existing SIMD register. */ @@ -168,18 +156,6 @@ struct vint8 m = _mm256_set1_epi32(a); } - /** - * @brief Construct from 8 scalar values. - * - * The value of @c a is stored to lane 0 (LSB) in the SIMD register. - */ - ASTCENC_SIMD_INLINE explicit vint8( - int a, int b, int c, int d, - int e, int f, int g, int h) - { - m = _mm256_set_epi32(h, g, f, e, d, c, b, a); - } - /** * @brief Construct from an existing SIMD register. */ From f64888045801a354a33249a20aab2bb228c04e8b Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Tue, 2 Jul 2024 08:46:03 +0100 Subject: [PATCH 7/8] Remove unused gatheri --- Source/UnitTest/test_simd.cpp | 30 ------------------------------ Source/astcenc_vecmathlib_avx2_8.h | 8 -------- Source/astcenc_vecmathlib_neon_4.h | 15 --------------- Source/astcenc_vecmathlib_none_4.h | 11 ----------- Source/astcenc_vecmathlib_sse_4.h | 14 -------------- 5 files changed, 78 deletions(-) diff --git a/Source/UnitTest/test_simd.cpp b/Source/UnitTest/test_simd.cpp index 5c7b0ed3..0a0698a1 100644 --- a/Source/UnitTest/test_simd.cpp +++ b/Source/UnitTest/test_simd.cpp @@ -1839,17 +1839,6 @@ TEST(vint4, store_lanes_masked_unaligned) EXPECT_TRUE(all(result3v == expect3v)); } -/** @brief Test vint4 gatheri. */ -TEST(vint4, gatheri) -{ - vint4 indices(0, 4, 3, 2); - vint4 r = gatheri(s32_data, indices); - EXPECT_EQ(r.lane<0>(), 0); - EXPECT_EQ(r.lane<1>(), 4); - EXPECT_EQ(r.lane<2>(), 3); - EXPECT_EQ(r.lane<3>(), 2); -} - /** @brief Test vint4 pack_low_bytes. */ TEST(vint4, pack_low_bytes) { @@ -3594,25 +3583,6 @@ TEST(vint8, store_lanes_masked_unaligned) EXPECT_TRUE(all(result3v == expect3v)); } -/** @brief Test vint8 gatheri. */ -TEST(vint8, gatheri) -{ - vint8 indices = vint8_lit(0, 4, 3, 2, 7, 4, 3, 2); - vint8 r = gatheri(s32_data, indices); - - alignas(32) int ra[8]; - store(r, ra); - - EXPECT_EQ(ra[0], 0); - EXPECT_EQ(ra[1], 4); - EXPECT_EQ(ra[2], 3); - EXPECT_EQ(ra[3], 2); - EXPECT_EQ(ra[4], 7); - EXPECT_EQ(ra[5], 4); - EXPECT_EQ(ra[6], 3); - EXPECT_EQ(ra[7], 2); -} - /** @brief Test vint8 pack_low_bytes. */ TEST(vint8, pack_low_bytes) { diff --git a/Source/astcenc_vecmathlib_avx2_8.h b/Source/astcenc_vecmathlib_avx2_8.h index 1c7c3af4..4ef04a45 100644 --- a/Source/astcenc_vecmathlib_avx2_8.h +++ b/Source/astcenc_vecmathlib_avx2_8.h @@ -526,14 +526,6 @@ ASTCENC_SIMD_INLINE void store_nbytes(vint8 a, uint8_t* p) _mm_storel_epi64(reinterpret_cast<__m128i*>(p), _mm256_extracti128_si256(a.m, 0)); } -/** - * @brief Gather N (vector width) indices from the array. - */ -ASTCENC_SIMD_INLINE vint8 gatheri(const int* base, vint8 indices) -{ - return vint8(_mm256_i32gather_epi32(base, indices.m, 4)); -} - /** * @brief Pack low 8 bits of N (vector width) lanes into bottom of vector. */ diff --git a/Source/astcenc_vecmathlib_neon_4.h b/Source/astcenc_vecmathlib_neon_4.h index 787d8f61..b0187630 100644 --- a/Source/astcenc_vecmathlib_neon_4.h +++ b/Source/astcenc_vecmathlib_neon_4.h @@ -602,21 +602,6 @@ ASTCENC_SIMD_INLINE void store_nbytes(vint4 a, uint8_t* p) vst1q_lane_s32(reinterpret_cast(p), a.m, 0); } -/** - * @brief Gather N (vector width) indices from the array. - */ -ASTCENC_SIMD_INLINE vint4 gatheri(const int* base, vint4 indices) -{ - alignas(16) int idx[4]; - storea(indices, idx); - alignas(16) int vals[4]; - vals[0] = base[idx[0]]; - vals[1] = base[idx[1]]; - vals[2] = base[idx[2]]; - vals[3] = base[idx[3]]; - return vint4(vals); -} - /** * @brief Pack low 8 bits of N (vector width) lanes into bottom of vector. */ diff --git a/Source/astcenc_vecmathlib_none_4.h b/Source/astcenc_vecmathlib_none_4.h index 6d0fcf21..34a2aae2 100644 --- a/Source/astcenc_vecmathlib_none_4.h +++ b/Source/astcenc_vecmathlib_none_4.h @@ -676,17 +676,6 @@ ASTCENC_SIMD_INLINE void store_nbytes(vint4 a, uint8_t* p) std::memcpy(p, a.m, sizeof(uint8_t) * 4); } -/** - * @brief Gather N (vector width) indices from the array. - */ -ASTCENC_SIMD_INLINE vint4 gatheri(const int* base, vint4 indices) -{ - return vint4(base[indices.m[0]], - base[indices.m[1]], - base[indices.m[2]], - base[indices.m[3]]); -} - /** * @brief Pack low 8 bits of N (vector width) lanes into bottom of vector. */ diff --git a/Source/astcenc_vecmathlib_sse_4.h b/Source/astcenc_vecmathlib_sse_4.h index d56511dc..163171cf 100644 --- a/Source/astcenc_vecmathlib_sse_4.h +++ b/Source/astcenc_vecmathlib_sse_4.h @@ -655,20 +655,6 @@ ASTCENC_SIMD_INLINE void store_nbytes(vint4 a, uint8_t* p) _mm_store_ss(reinterpret_cast(p), _mm_castsi128_ps(a.m)); } -/** - * @brief Gather N (vector width) indices from the array. - */ -ASTCENC_SIMD_INLINE vint4 gatheri(const int* base, vint4 indices) -{ -#if ASTCENC_AVX >= 2 - return vint4(_mm_i32gather_epi32(base, indices.m, 4)); -#else - alignas(16) int idx[4]; - storea(indices, idx); - return vint4(base[idx[0]], base[idx[1]], base[idx[2]], base[idx[3]]); -#endif -} - /** * @brief Pack low 8 bits of N (vector width) lanes into bottom of vector. */ From 08026e7e974bad086eb2b2054fa4a2138a4a8ea6 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Tue, 2 Jul 2024 08:48:22 +0100 Subject: [PATCH 8/8] Fix unit test typo --- Source/UnitTest/test_simd.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/UnitTest/test_simd.cpp b/Source/UnitTest/test_simd.cpp index 0a0698a1..f86403d0 100644 --- a/Source/UnitTest/test_simd.cpp +++ b/Source/UnitTest/test_simd.cpp @@ -203,7 +203,7 @@ TEST(vfloat, ChangeSign) /** @brief Test VLA atan. */ TEST(vfloat, Atan) { - vfloa4 a(-0.15f, 0.0f, 0.9f, 2.1f); + vfloat4 a(-0.15f, 0.0f, 0.9f, 2.1f); vfloat4 r = atan(a); EXPECT_NEAR(r.lane<0>(), -0.149061f, 0.005f); EXPECT_NEAR(r.lane<1>(), 0.000000f, 0.005f);