Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,4 @@ __pycache__/
# Ignore Bazel generated files
bazel-*
MODULE.bazel.lock
*.vsidx
2 changes: 1 addition & 1 deletion external/OpenJPH/src/core/openjph/ojph_arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
// preprocessor directives for architecture
///////////////////////////////////////////////////////////////////////////////
#if defined(__arm__) || defined(__TARGET_ARCH_ARM) \
|| defined(__aarch64__) || defined(_M_ARM64)
|| defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
#define OJPH_ARCH_ARM
#elif defined(__i386) || defined(__i386__) || defined(_M_IX86)
#define OJPH_ARCH_I386
Expand Down
2 changes: 1 addition & 1 deletion external/OpenJPH/src/core/others/ojph_arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ namespace ojph {

#else // Linux/FreeBSD/OpenBSD

#if defined(__aarch64__) || defined(_M_ARM64) // 64-bit ARM
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) // 64-bit ARM

#include <sys/auxv.h>
#ifdef OJPH_OS_LINUX
Expand Down
14 changes: 11 additions & 3 deletions src/lib/OpenEXR/ImfSimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
//

// GCC and Visual Studio SSE2 compiler flags
#if defined __SSE2__ || (_MSC_VER && (_M_IX86 || _M_X64))
#if defined __SSE2__ || (_MSC_VER && (_M_IX86 || _M_X64) && !defined(_M_ARM64EC))
# define IMF_HAVE_SSE2 1
#endif

#if defined __SSE4_1__ || (_MSC_VER && (_M_IX86 || _M_X64))
#if defined __SSE4_1__ || (_MSC_VER && (_M_IX86 || _M_X64) && !defined(_M_ARM64EC))
# define IMF_HAVE_SSE4_1 1
#endif

Expand All @@ -42,14 +42,22 @@
# define IMF_HAVE_F16C 1
#endif

#if defined(__ARM_NEON)
#if defined(__ARM_NEON) || defined(_M_ARM64) || defined(_M_ARM64EC)
# define IMF_HAVE_NEON
#endif

#if defined(__aarch64__)
# define IMF_HAVE_NEON_AARCH64 1
#endif

#if defined(_M_ARM64) || defined(_M_ARM64EC)
# define IMF_HAVE_NEON_WINDOWS_ARM64 1
#endif

#if defined(IMF_HAVE_NEON_AARCH64) || defined(IMF_HAVE_NEON_WINDOWS_ARM64)
# define IMF_HAVE_NEON_ARM64 1
#endif

extern "C" {
#ifdef IMF_HAVE_SSE2
# include <emmintrin.h>
Expand Down
6 changes: 3 additions & 3 deletions src/lib/OpenEXR/ImfZip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ reconstruct_sse41 (char* buf, size_t outSize)

#endif

#ifdef IMF_HAVE_NEON_AARCH64
#ifdef IMF_HAVE_NEON_ARM64

void
reconstruct_neon (char* buf, size_t outSize)
Expand Down Expand Up @@ -258,7 +258,7 @@ interleave_sse2 (const char* source, size_t outSize, char* out)

#endif

#ifdef IMF_HAVE_NEON_AARCH64
#ifdef IMF_HAVE_NEON_ARM64

void
interleave_neon (const char* source, size_t outSize, char* out)
Expand Down Expand Up @@ -370,7 +370,7 @@ Zip::initializeFuncs ()
if (cpuId.sse2) { interleave = interleave_sse2; }
#endif

#ifdef IMF_HAVE_NEON_AARCH64
#ifdef IMF_HAVE_NEON_ARM64
reconstruct = reconstruct_neon;
interleave = interleave_neon;
#endif
Expand Down
4 changes: 2 additions & 2 deletions src/lib/OpenEXRCore/internal_cpuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include "OpenEXRConfigInternal.h"

#if defined(i386) || defined(__i386__) || defined(__i386) || \
defined(_M_X86) || defined(__x86_64__) || defined(_M_X64)
defined(_M_X86) || defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC))
# define OPENEXR_ENABLE_X86_SIMD_CHECK 1
#else
# define OPENEXR_ENABLE_X86_SIMD_CHECK 0
Expand Down Expand Up @@ -132,7 +132,7 @@ has_native_half (void)
int sse2, avx, f16c;
check_for_x86_simd (&f16c, &avx, &sse2);
return avx && f16c;
#elif defined(__aarch64__)
#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
return 1;
#else
return 0;
Expand Down
52 changes: 41 additions & 11 deletions src/lib/OpenEXRCore/internal_dwa_simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
// aligned. Unaligned pointers may risk seg-faulting.
//

#if defined __SSE2__ || (_MSC_VER && (_M_IX86 || _M_X64))
#if defined __SSE2__ || (_MSC_VER && (_M_IX86 || _M_X64) && !defined(_M_ARM64EC))
# define IMF_HAVE_SSE2 1
# include <emmintrin.h>
# include <mmintrin.h>
Expand All @@ -35,6 +35,22 @@
# define IMF_HAVE_NEON_AARCH64 1
#endif

#if defined(_M_ARM64) || defined(_M_ARM64EC)
# define IMF_HAVE_NEON_WINDOWS_ARM64 1
#endif

#if defined(IMF_HAVE_NEON_AARCH64) || defined(IMF_HAVE_NEON_WINDOWS_ARM64)
# define IMF_HAVE_NEON_ARM64 1
#endif

#if defined(IMF_HAVE_NEON_ARM64)
# if defined(_MSC_VER)
# define NEON_RESTRICT __restrict
# else
# define NEON_RESTRICT __restrict__
# endif
#endif

#include "internal_coding.h"

#if defined(OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX) && \
Expand Down Expand Up @@ -357,7 +373,7 @@ convertFloatToHalf64_scalar (uint16_t* dst, float* src)
dst[i] = float_to_half (src[i]);
}

#ifdef IMF_HAVE_NEON_AARCH64
#ifdef IMF_HAVE_NEON_ARM64

void
convertFloatToHalf64_neon (uint16_t* dst, float* src)
Expand Down Expand Up @@ -786,36 +802,50 @@ fromHalfZigZag_f16c (uint16_t* src, float* dst)
#endif /* defined IMF_HAVE_GCC_INLINEASM_X86_64 */
}

#ifdef IMF_HAVE_NEON_AARCH64
#ifdef IMF_HAVE_NEON_ARM64

void
fromHalfZigZag_neon (uint16_t* __restrict__ src, float* __restrict__ dst)
fromHalfZigZag_neon (uint16_t* NEON_RESTRICT src, float* NEON_RESTRICT dst)
{
# if defined(_MSC_VER)
static const uint8_t res_tbl_data[4][16] = {
{0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42},
{3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53},
{10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60},
{21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63}};

uint8x16_t res_tbl[4];
for (int i = 0; i < 4; i++)
{
res_tbl[i] = vld1q_u8 (res_tbl_data[i]);
}

# else
uint8x16_t res_tbl[4] = {
{0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42},
{3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53},
{10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60},
{21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63}};
# endif

uint8x16x4_t vec_input_l, vec_input_h;

for (int i = 0; i < 4; i++)
{
uint8x16x2_t vec_in_u8 = vld2q_u8 ((uint8_t*) (src + 16 * i));
vec_input_l.val[i] = vec_in_u8.val[0];
vec_input_h.val[i] = vec_in_u8.val[1];
}

# pragma unroll(4)
for (int i = 0; i < 4; i++)
{
uint8x16_t res_vec_l, res_vec_h;
res_vec_l = vqtbl4q_u8 (vec_input_l, res_tbl[i]);
res_vec_h = vqtbl4q_u8 (vec_input_h, res_tbl[i]);
uint8x16_t res_vec_l = vqtbl4q_u8 (vec_input_l, res_tbl[i]);
uint8x16_t res_vec_h = vqtbl4q_u8 (vec_input_h, res_tbl[i]);

float16x8_t res_vec_l_f16 =
vreinterpretq_f16_u8 (vzip1q_u8 (res_vec_l, res_vec_h));
float16x8_t res_vec_h_f16 =
vreinterpretq_f16_u8 (vzip2q_u8 (res_vec_l, res_vec_h));

vst1q_f32 (dst + i * 16, vcvt_f32_f16 (vget_low_f16 (res_vec_l_f16)));
vst1q_f32 (dst + i * 16 + 4, vcvt_high_f32_f16 (res_vec_l_f16));
vst1q_f32 (
Expand All @@ -824,7 +854,7 @@ fromHalfZigZag_neon (uint16_t* __restrict__ src, float* __restrict__ dst)
}
}

#endif // IMF_HAVE_NEON_AARCH64
#endif //IMF_HAVE_NEON_ARM64

//
// Inverse 8x8 DCT, only inverting the DC. This assumes that
Expand Down Expand Up @@ -2315,7 +2345,7 @@ initializeFuncs (void)
if (done) return;
done = 1;

#ifdef IMF_HAVE_NEON_AARCH64
#ifdef IMF_HAVE_NEON_ARM64
{
convertFloatToHalf64 = convertFloatToHalf64_neon;
fromHalfZigZag = fromHalfZigZag_neon;
Expand Down
25 changes: 20 additions & 5 deletions src/lib/OpenEXRCore/internal_zip.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,33 @@

#include "openexr_compression.h"

#if defined __SSE2__ || (_MSC_VER >= 1300 && (_M_IX86 || _M_X64))
#if defined __SSE2__ || (_MSC_VER >= 1300 && (_M_IX86 || _M_X64) && !defined(_M_ARM64EC))
# define IMF_HAVE_SSE2 1
# include <emmintrin.h>
# include <mmintrin.h>
#endif
#if defined __SSE4_1__ || (_MSC_VER >= 1300 && (_M_IX86 || _M_X64))
#if defined __SSE4_1__ || (_MSC_VER >= 1300 && (_M_IX86 || _M_X64) && !defined(_M_ARM64EC))
# define IMF_HAVE_SSE4_1 1
# include <smmintrin.h>
#endif
#if defined(__aarch64__)
# define IMF_HAVE_NEON_AARCH64 1
# include <arm_neon.h>
#endif

#if defined(_M_ARM64) || defined(_M_ARM64EC)
# define IMF_HAVE_NEON_WINDOWS_ARM64 1
#endif

#if defined(IMF_HAVE_NEON_AARCH64) || defined(IMF_HAVE_NEON_WINDOWS_ARM64)
# define IMF_HAVE_NEON_ARM64 1
#endif

#if defined(IMF_HAVE_NEON_ARM64)
# if defined(_MSC_VER)
# include <arm64_neon.h>
# else
# include <arm_neon.h>
# endif
#endif

/**************************************/
Expand Down Expand Up @@ -77,7 +92,7 @@ reconstruct (uint8_t* buf, const uint64_t outSize)
prev = d;
}
}
#elif defined(IMF_HAVE_NEON_AARCH64)
#elif defined(IMF_HAVE_NEON_ARM64)
static void
reconstruct (uint8_t* buf, const uint64_t outSize)
{
Expand Down Expand Up @@ -174,7 +189,7 @@ interleave (uint8_t* out, const uint8_t* const source, const uint64_t outSize)
*(sOut++) = (i % 2 == 0) ? *(t1++) : *(t2++);
}

#elif defined(IMF_HAVE_NEON_AARCH64)
#elif defined(IMF_HAVE_NEON_ARM64)
static void
interleave (uint8_t* out, const uint8_t* const source, const uint64_t outSize)
{
Expand Down
2 changes: 1 addition & 1 deletion src/test/OpenEXRCoreTest/base_units.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ testCPUIdent (const std::string& tempdir)
EXRCORE_TEST (false);
}

#if defined(__x86_64__) || defined(_M_X64)
#if defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC))
if (has_native_half () != (hf16c && havx))
{
std::cerr << "CPU Id test has native half mismatch" << std::endl;
Expand Down