Skip to content

Fixes issue 47 (clxie) compile error Eigen:QInt32 #48

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: r1.15.5+nv21.12
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#ifndef CXX11_SRC_FIXEDPOINT_PACKETMATHAVX_H_
#define CXX11_SRC_FIXEDPOINT_PACKETMATHAVX_H_
#ifdef _MSC_VER

#include <emmintrin.h>
#include <immintrin.h>
#include <smmintrin.h>

#endif

namespace Eigen {
namespace internal {

typedef eigen_packet_wrapper<__m256i, 10> Packet32q8i;
typedef eigen_packet_wrapper<__m128i, 11> Packet16q8i;

template <>
struct packet_traits<QInt8> : default_packet_traits {
typedef Packet32q8i type;
typedef Packet16q8i half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 32,
};
enum {
HasAdd = 0,
HasSub = 0,
HasMul = 0,
HasNegate = 0,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasConj = 0,
HasSetLinear = 0
};
};

template <>
struct unpacket_traits<Packet32q8i> {
typedef QInt8 type;
typedef Packet16q8i half;
enum {
size = 32,
alignment = Aligned32,
vectorizable = true,
masked_load_available = false,
masked_store_available = false
};
};

template <>
struct unpacket_traits<Packet16q8i> {
typedef QInt8 type;
typedef Packet16q8i half;
enum {
size = 16,
alignment = Aligned32,
vectorizable = true,
masked_load_available = false,
masked_store_available = false
};
};
template <>
EIGEN_STRONG_INLINE Packet32q8i pset1<Packet32q8i>(const QInt8& from) {
return _mm256_set1_epi8(from.value);
}
template <>
EIGEN_STRONG_INLINE Packet32q8i ploadu<Packet32q8i>(const QInt8* from) {
EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(
reinterpret_cast<const __m256i*>(from));
}
template <>
EIGEN_STRONG_INLINE Packet16q8i ploadu<Packet16q8i>(const QInt8* from) {
EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(
reinterpret_cast<const __m128i*>(from));
}

template <>
EIGEN_STRONG_INLINE Packet32q8i pload<Packet32q8i>(const QInt8* from) {
EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(
reinterpret_cast<const __m256i*>(from));
}
template <>
EIGEN_STRONG_INLINE Packet16q8i pload<Packet16q8i>(const QInt8* from) {
EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(
reinterpret_cast<const __m128i*>(from));
}

template <>
EIGEN_STRONG_INLINE void pstoreu<QInt8>(QInt8* to, const Packet32q8i& from) {
EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(
reinterpret_cast<__m256i*>(to), from.m_val);
}
template <>
EIGEN_STRONG_INLINE void pstoreu<QInt8>(QInt8* to, const Packet16q8i& from) {
EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to),
from.m_val);
}

template <>
EIGEN_STRONG_INLINE void pstore<QInt8>(QInt8* to, const Packet32q8i& from) {
EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to),
from.m_val);
}
template <>
EIGEN_STRONG_INLINE void pstore<QInt8>(QInt8* to, const Packet16q8i& from) {
EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to),
from.m_val);
}

typedef __m256 Packet8f;

template <>
struct type_casting_traits<float, QInt8> {
enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };
};

template <>
EIGEN_STRONG_INLINE Packet32q8i
pcast<Packet8f, Packet32q8i>(const Packet8f& a, const Packet8f& b,
const Packet8f& c, const Packet8f& d) {
const __m256i a_conv = _mm256_cvtps_epi32(a);
const __m256i b_conv = _mm256_cvtps_epi32(b);
const __m256i c_conv = _mm256_cvtps_epi32(c);
const __m256i d_conv = _mm256_cvtps_epi32(d);
__m128i low = _mm256_castsi256_si128(a_conv);
__m128i high = _mm256_extractf128_si256(a_conv, 1);
__m128i tmp = _mm_packs_epi32(low, high);
__m128i low2 = _mm256_castsi256_si128(b_conv);
__m128i high2 = _mm256_extractf128_si256(b_conv, 1);
__m128i tmp2 = _mm_packs_epi32(low2, high2);
__m128i converted_low = _mm_packs_epi16(tmp, tmp2);
low = _mm256_castsi256_si128(c_conv);
high = _mm256_extractf128_si256(c_conv, 1);
tmp = _mm_packs_epi32(low, high);
low2 = _mm256_castsi256_si128(d_conv);
high2 = _mm256_extractf128_si256(d_conv, 1);
tmp2 = _mm_packs_epi32(low2, high2);
__m128i converted_high = _mm_packs_epi16(tmp, tmp2);
return _mm256_insertf128_si256(_mm256_castsi128_si256(converted_low),
converted_high, 1);
}

} // end namespace internal
} // end namespace Eigen

#endif // CXX11_SRC_FIXEDPOINT_PACKETMATHAVX_H_
Original file line number Diff line number Diff line change
Expand Up @@ -27,61 +27,14 @@ inline int _mm256_extract_epi8_N1(const __m256i X) {
namespace Eigen {
namespace internal {

typedef struct Packet32q8i {
__m256i val;
operator __m256i() const { return val; }
Packet32q8i() : val(_mm256_setzero_si256()){};
Packet32q8i(__m256i val) : val(val) {}
} Packet32q8i;

typedef struct Packet16q16i {
__m256i val;
operator __m256i() const { return val; }
Packet16q16i() : val(_mm256_setzero_si256()){};
Packet16q16i(__m256i val) : val(val) {}
} Packet16q16i;

typedef struct Packet32q8u {
__m256i val;
operator __m256i() const { return val; }
Packet32q8u() : val(_mm256_setzero_si256()){};
Packet32q8u(__m256i val) : val(val) {}
} Packet32q8u;

typedef struct Packet16q8i {
__m128i val;
operator __m128i() const { return val; }
Packet16q8i() : val(_mm_setzero_si128()) {}
Packet16q8i(__m128i val) : val(val) {}
} Packet16q8i;

typedef struct Packet16q8u {
__m128i val;
operator __m128i() const { return val; }
Packet16q8u() : val(_mm_setzero_si128()) {}
Packet16q8u(__m128i val) : val(val) {}
} Packet16q8u;

typedef struct Packet8q16i {
__m128i val;
operator __m128i() const { return val; }
Packet8q16i() : val(_mm_setzero_si128()) {}
Packet8q16i(__m128i val) : val(val) {}
} Packet8q16i;

typedef struct Packet8q32i {
__m256i val;
operator __m256i() const { return val; }
Packet8q32i() : val(_mm256_setzero_si256()){};
Packet8q32i(__m256i val) : val(val) {}
} Packet8q32i;

typedef struct Packet4q32i {
__m128i val;
operator __m128i() const { return val; }
Packet4q32i() : val(_mm_setzero_si128()) {}
Packet4q32i(__m128i val) : val(val) {}
} Packet4q32i;
typedef eigen_packet_wrapper<__m256i, 20> Packet32q8i;
typedef eigen_packet_wrapper<__m256i, 21> Packet16q16i;
typedef eigen_packet_wrapper<__m256i, 22> Packet32q8u;
typedef eigen_packet_wrapper<__m128i, 23> Packet16q8i;
typedef eigen_packet_wrapper<__m128i, 25> Packet16q8u;
typedef eigen_packet_wrapper<__m128i, 26> Packet8q16i;
typedef eigen_packet_wrapper<__m256i, 27> Packet8q32i;
typedef eigen_packet_wrapper<__m128i, 28> Packet4q32i;

#ifndef EIGEN_VECTORIZE_AVX512
template <>
Expand Down Expand Up @@ -315,64 +268,64 @@ EIGEN_STRONG_INLINE Packet8q32i pload<Packet8q32i>(const QInt32* from) {
template <>
EIGEN_STRONG_INLINE void pstoreu<QInt8>(QInt8* to, const Packet32q8i& from) {
EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(
reinterpret_cast<__m256i*>(to), from.val);
reinterpret_cast<__m256i*>(to), from.m_val);
}
template <>
EIGEN_STRONG_INLINE void pstoreu<QInt8>(QInt8* to, const Packet16q8i& from) {
EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to),
from.val);
from.m_val);
}
template <>
EIGEN_STRONG_INLINE void pstoreu<QUInt8>(QUInt8* to, const Packet32q8u& from) {
EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(
reinterpret_cast<__m256i*>(to), from.val);
reinterpret_cast<__m256i*>(to), from.m_val);
}
template <>
EIGEN_STRONG_INLINE void pstoreu<QInt16>(QInt16* to, const Packet16q16i& from) {
EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(
reinterpret_cast<__m256i*>(to), from.val);
reinterpret_cast<__m256i*>(to), from.m_val);
}
template <>
EIGEN_STRONG_INLINE void pstoreu<QInt16>(QInt16* to, const Packet8q16i& from) {
EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to),
from.val);
from.m_val);
}
template <>
EIGEN_STRONG_INLINE void pstoreu<QInt32>(QInt32* to, const Packet8q32i& from) {
EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(
reinterpret_cast<__m256i*>(to), from.val);
reinterpret_cast<__m256i*>(to), from.m_val);
}

// Aligned store
template <>
EIGEN_STRONG_INLINE void pstore<QInt32>(QInt32* to, const Packet8q32i& from) {
EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to),
from.val);
from.m_val);
}
template <>
EIGEN_STRONG_INLINE void pstore<QInt16>(QInt16* to, const Packet16q16i& from) {
EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to),
from.val);
from.m_val);
}
template <>
EIGEN_STRONG_INLINE void pstore<QInt16>(QInt16* to, const Packet8q16i& from) {
EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to),
from.val);
from.m_val);
}
template <>
EIGEN_STRONG_INLINE void pstore<QUInt8>(QUInt8* to, const Packet32q8u& from) {
EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to),
from.val);
from.m_val);
}
template <>
EIGEN_STRONG_INLINE void pstore<QInt8>(QInt8* to, const Packet32q8i& from) {
EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to),
from.val);
from.m_val);
}
template <>
EIGEN_STRONG_INLINE void pstore<QInt8>(QInt8* to, const Packet16q8i& from) {
EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to),
from.val);
from.m_val);
}

// Extract first element.
Expand All @@ -382,15 +335,15 @@ EIGEN_STRONG_INLINE QInt32 pfirst<Packet8q32i>(const Packet8q32i& a) {
}
template <>
EIGEN_STRONG_INLINE QInt16 pfirst<Packet16q16i>(const Packet16q16i& a) {
return _mm256_extract_epi16_N0(a.val);
return _mm256_extract_epi16_N0(a.m_val);
}
template <>
EIGEN_STRONG_INLINE QUInt8 pfirst<Packet32q8u>(const Packet32q8u& a) {
return static_cast<uint8_t>(_mm256_extract_epi8_N0(a.val));
return static_cast<uint8_t>(_mm256_extract_epi8_N0(a.m_val));
}
template <>
EIGEN_STRONG_INLINE QInt8 pfirst<Packet32q8i>(const Packet32q8i& a) {
return _mm256_extract_epi8_N0(a.val);
return _mm256_extract_epi8_N0(a.m_val);
}

// Initialize to constant value.
Expand All @@ -411,7 +364,7 @@ EIGEN_STRONG_INLINE Packet8q32i pset1<Packet8q32i>(const QInt32& from) {
template <>
EIGEN_STRONG_INLINE Packet8q32i padd<Packet8q32i>(const Packet8q32i& a,
const Packet8q32i& b) {
return _mm256_add_epi32(a.val, b.val);
return _mm256_add_epi32(a.m_val, b.m_val);
}
template <>
EIGEN_STRONG_INLINE Packet16q16i pset1<Packet16q16i>(const QInt16& from) {
Expand All @@ -420,62 +373,62 @@ EIGEN_STRONG_INLINE Packet16q16i pset1<Packet16q16i>(const QInt16& from) {
template <>
EIGEN_STRONG_INLINE Packet8q32i psub<Packet8q32i>(const Packet8q32i& a,
const Packet8q32i& b) {
return _mm256_sub_epi32(a.val, b.val);
return _mm256_sub_epi32(a.m_val, b.m_val);
}
// Note: mullo truncates the result to 32 bits.
template <>
EIGEN_STRONG_INLINE Packet8q32i pmul<Packet8q32i>(const Packet8q32i& a,
const Packet8q32i& b) {
return _mm256_mullo_epi32(a.val, b.val);
return _mm256_mullo_epi32(a.m_val, b.m_val);
}
template <>
EIGEN_STRONG_INLINE Packet8q32i pnegate<Packet8q32i>(const Packet8q32i& a) {
return _mm256_sub_epi32(_mm256_setzero_si256(), a.val);
return _mm256_sub_epi32(_mm256_setzero_si256(), a.m_val);
}

// Min and max.
template <>
EIGEN_STRONG_INLINE Packet8q32i pmin<Packet8q32i>(const Packet8q32i& a,
const Packet8q32i& b) {
return _mm256_min_epi32(a.val, b.val);
return _mm256_min_epi32(a.m_val, b.m_val);
}
template <>
EIGEN_STRONG_INLINE Packet8q32i pmax<Packet8q32i>(const Packet8q32i& a,
const Packet8q32i& b) {
return _mm256_max_epi32(a.val, b.val);
return _mm256_max_epi32(a.m_val, b.m_val);
}

template <>
EIGEN_STRONG_INLINE Packet16q16i pmin<Packet16q16i>(const Packet16q16i& a,
const Packet16q16i& b) {
return _mm256_min_epi16(a.val, b.val);
return _mm256_min_epi16(a.m_val, b.m_val);
}
template <>
EIGEN_STRONG_INLINE Packet16q16i pmax<Packet16q16i>(const Packet16q16i& a,
const Packet16q16i& b) {
return _mm256_max_epi16(a.val, b.val);
return _mm256_max_epi16(a.m_val, b.m_val);
}

template <>
EIGEN_STRONG_INLINE Packet32q8u pmin<Packet32q8u>(const Packet32q8u& a,
const Packet32q8u& b) {
return _mm256_min_epu8(a.val, b.val);
return _mm256_min_epu8(a.m_val, b.m_val);
}
template <>
EIGEN_STRONG_INLINE Packet32q8u pmax<Packet32q8u>(const Packet32q8u& a,
const Packet32q8u& b) {
return _mm256_max_epu8(a.val, b.val);
return _mm256_max_epu8(a.m_val, b.m_val);
}

template <>
EIGEN_STRONG_INLINE Packet32q8i pmin<Packet32q8i>(const Packet32q8i& a,
const Packet32q8i& b) {
return _mm256_min_epi8(a.val, b.val);
return _mm256_min_epi8(a.m_val, b.m_val);
}
template <>
EIGEN_STRONG_INLINE Packet32q8i pmax<Packet32q8i>(const Packet32q8i& a,
const Packet32q8i& b) {
return _mm256_max_epi8(a.val, b.val);
return _mm256_max_epi8(a.m_val, b.m_val);
}

// Reductions.
Expand Down
Loading