Skip to content

Commit 2476fcb

Browse files
committed
fix aligned vec types and add matrix funcs
1 parent 0171d7b commit 2476fcb

File tree

2 files changed

+84
-68
lines changed

2 files changed

+84
-68
lines changed

lib/vecmat_external.h

Lines changed: 79 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -48,48 +48,51 @@
4848
#include <functional>
4949
#include <limits>
5050
#include <numeric>
51+
#include <cstdalign>
5152

52-
static constexpr inline bool VM_ISPOW2(uintmax_t x) { return (x && (!(x & (x-1)))); }
53-
54-
template<typename T>
55-
static constexpr inline int VM_BIT_SCAN_REVERSE_CONST(const T n) noexcept {
56-
if (n == 0) return -1;
57-
if (VM_ISPOW2(n)) return n;
58-
T a = n, b = 0, j = std::numeric_limits<T>::digits, k = 0;
59-
do {
60-
j >>= 1;
61-
k = (T)1 << j;
62-
if (a >= k) {
63-
a >>= j;
64-
b += j;
65-
}
66-
} while (j > 0);
67-
return int(b);
68-
}
53+
#ifndef __alignas_is_defined
54+
#if _MSVC_LANG > 201402
55+
#define alignas(n) __declspec(align(n))
56+
#elif (__GNUC__ >= 3 || defined(__clang__))
57+
#define alignas(n) __attribute__((aligned(n)))
58+
#else
59+
#define alignas(n)
60+
#endif
61+
#endif
6962

70-
template<typename T>
71-
static constexpr inline T VM_BIT_CEIL(T x) noexcept { return T(1) << (VM_BIT_SCAN_REVERSE_CONST((T)x-1) + 1); };
63+
#define BITOP_RUP01__(x) ( (x) | ( (x) >> 1))
64+
#define BITOP_RUP02__(x) (BITOP_RUP01__(x) | (BITOP_RUP01__(x) >> 2))
65+
#define BITOP_RUP04__(x) (BITOP_RUP02__(x) | (BITOP_RUP02__(x) >> 4))
66+
#define BITOP_RUP08__(x) (BITOP_RUP04__(x) | (BITOP_RUP04__(x) >> 8))
67+
#define BITOP_RUP16__(x) (BITOP_RUP08__(x) | (BITOP_RUP08__(x) >> 16))
68+
#define BITOP_RUP32__(x) (BITOP_RUP16__(x) | (BITOP_RUP16__(x) >> 32))
69+
#define BITOP_RUP64__(x) (BITOP_RUP32__(x) | (BITOP_RUP32__(x) >> 64))
7270

73-
template<typename T>
74-
static constexpr inline T VM_BIT_FLOOR(T x) noexcept { return x == 0 || VM_ISPOW2(x) ? x : ((VM_BIT_FLOOR(x >> 1)) << 1); }
71+
constexpr static inline uintmax_t VM_BIT_CEIL (uintmax_t x) {
72+
switch(std::numeric_limits<uintmax_t>::digits)
73+
{
74+
case 8: return (BITOP_RUP04__(uint8_t (x) - 1) + 1);
75+
case 16: return (BITOP_RUP08__(uint16_t(x) - 1) + 1);
76+
default:
77+
case 32: return (BITOP_RUP16__(uint32_t(x) - 1) + 1);
78+
case 64: return (BITOP_RUP32__(uint64_t(x) - 1) + 1);
79+
}
80+
}
7581

76-
#ifdef _MSVC_LANG
77-
#define VM_ALIGN(n) __declspec(align(n))
78-
#else
79-
#define VM_ALIGN(n) __attribute__((aligned(n)))
80-
#endif
82+
constexpr static inline bool VM_ISPOW2 (uintmax_t x) { return x && (!(x & (x-1))); }
83+
constexpr static inline uintmax_t VM_BIT_FLOOR(uintmax_t x) { return ((x == 0) || VM_ISPOW2(x) ? x : ((VM_BIT_FLOOR(x >> 1)) << 1)); }
8184

8285
enum class align
8386
{
8487
none = 0 << 0,
85-
scalar = 1 << 1,
86-
vector = 2 << 1,
87-
matrix = 3 << 1,
88-
adaptive = 4 << 1,
88+
scalar = 1 << 0,
89+
linear = 1 << 1,
90+
matrix = 1 << 2,
91+
adaptive = 1 << 3,
8992
};
9093

91-
template<typename T, size_t N, enum align A = align::adaptive, size_t N_POW2 = VM_BIT_CEIL(N)>
92-
struct alignas(N==N_POW2 && A != align::scalar || A == align::vector ? alignof(T) * N_POW2 : alignof(T)) vec : std::array<T,N> {
94+
template<typename T, size_t N, enum align A = align::adaptive, size_t N_POW2 = VM_BIT_CEIL(N), size_t T_S = std::max<size_t>(alignof(T), sizeof(T))>
95+
struct alignas((((N == N_POW2) && (A != align::scalar)) || ((A == align::linear) || (A == align::matrix))) ? N_POW2 * T_S : T_S) vec : std::array<T,N> {
9396

9497
template<size_t N_DST = N, enum align A_DST = align::adaptive>
9598
operator vec<T,N_DST,A_DST>() { return *reinterpret_cast<vec<T,N_DST,A_DST>*>(this); }
@@ -287,8 +290,8 @@ constexpr static inline scalar dot(const vec<T,N_A,A_A> &a, const vec<T,N_B,A_B>
287290
template<size_t N_A = 3, size_t N_B = 3, enum align A_A = align::adaptive, enum align A_B = align::adaptive>
288291
constexpr static inline vec<T,3> cross3(const vec<T,N_A,A_A> &a, const vec<T,N_B,A_B> &b)
289292
{
290-
return vec<T,3,align::vector>{a.y()*b.z(), a.z()*b.x(), a.x()*b.y()}
291-
- vec<T,3,align::vector>{b.y()*a.z(), b.z()*a.x(), b.x()*a.y()};
293+
return vec<T,3,align::linear>{a.y()*b.z(), a.z()*b.x(), a.x()*b.y()}
294+
- vec<T,3,align::linear>{b.y()*a.z(), b.z()*a.x(), b.x()*a.y()};
292295
}
293296
constexpr inline scalar mag() const { return (scalar)sqrt(dot((*this),(*this))); }
294297

@@ -300,17 +303,17 @@ static constexpr inline scalar distance(const vec<T,N_A,A_A> &a, const vec<T,N_B
300303

301304
using vector = vec<scalar,3>;
302305
using vector_array = vector;
303-
using aligned_vector = vec<scalar,3,align::vector>;
306+
using aligned_vector = vec<scalar,3,align::linear>;
304307
using aligned_vector_array = aligned_vector;
305308

306309
using vector4 = vec<scalar,4>;
307310
using vector4_array = vector4;
308-
using aligned_vector4 = vec<scalar,4,align::vector>;
311+
using aligned_vector4 = vec<scalar,4,align::linear>;
309312
using aligned_vector4_array = aligned_vector4;
310313

311314
using angvec = vec<angle,3>;
312315
using angvec_array = angvec;
313-
using aligned_angvec = vec<angle,3,align::vector>;
316+
using aligned_angvec = vec<angle,3,align::linear>;
314317
using aligned_angvec_array = aligned_angvec;
315318

316319
// Set an angvec to {0,0,0}
@@ -339,9 +342,31 @@ constexpr static inline const matrix ne()
339342
{
340343
return matrix{ vector{}, vector{}, vector{} };
341344
}
345+
constexpr inline void set_col(size_t i, const vector v)
346+
{
347+
a2d[0][i % 3] = v[0];
348+
a2d[1][i % 3] = v[1];
349+
a2d[2][i % 3] = v[2];
350+
}
351+
constexpr inline vector col(size_t i) const
352+
{
353+
return vector{ a2d[0][i % 3], a2d[1][i % 3], a2d[2][i % 3] };
354+
}
355+
constexpr inline matrix transposed() const
356+
{
357+
return matrix{ col(0), col(1), col(2) };
358+
}
359+
constexpr inline scalar det() const
360+
{
361+
scalar dst = scalar(0);
362+
for(size_t i = 0; i < 3; i++)
363+
dst += a2d[0][i] * (a2d[1][(i+1) % 3] * a2d[2][(i+2) % 3]
364+
- a2d[1][(i+2) % 3] * a2d[2][(i+1) % 3]);
365+
return dst;
366+
}
342367
};
343368

344-
constexpr matrix IDENTITY_MATRIX = matrix::id();
369+
constexpr matrix IDENTITY_MATRIX = { vector::id(0), vector::id(1), vector::id(2) };
345370

346371
struct alignas(alignof(vector4) * 4) matrix4 {
347372
constexpr static const size_t RIGHT_HAND = 0;
@@ -366,6 +391,21 @@ constexpr static inline const matrix4 ne()
366391
{
367392
return matrix4{ vector4{}, vector4{}, vector4{}, vector4{} };
368393
}
394+
constexpr inline void set_col(size_t i, const vector4 v)
395+
{
396+
a2d[0][i % 4] = v[0];
397+
a2d[1][i % 4] = v[1];
398+
a2d[2][i % 4] = v[2];
399+
a2d[3][i % 4] = v[3];
400+
}
401+
constexpr inline vector4 col(size_t i) const
402+
{
403+
return vector4{ a2d[0][i % 4], a2d[1][i % 4], a2d[2][i % 4], a2d[2][i % 4] };
404+
}
405+
constexpr inline matrix4 transposed() const
406+
{
407+
return matrix4{ col(0), col(1), col(2), col(3) };
408+
}
369409
};
370410

371411
// Adds 2 matrices
@@ -436,19 +476,7 @@ static inline matrix operator/=(matrix &src, float n) { return (src = src / n);
436476

437477
// Matrix transpose
438478
static inline matrix operator~(matrix m) {
439-
float t;
440-
441-
t = m.uvec.x();
442-
m.uvec.x() = m.rvec.y();
443-
m.rvec.y() = t;
444-
t = m.fvec.x();
445-
m.fvec.x() = m.rvec.z();
446-
m.rvec.z() = t;
447-
t = m.fvec.y();
448-
m.fvec.y() = m.uvec.z();
449-
m.uvec.z() = t;
450-
451-
return m;
479+
return m.transposed();
452480
}
453481

454482
// Apply a matrix to a vector

vecmat/vector.cpp

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -273,17 +273,9 @@ void vm_MatrixMulTMatrix(matrix *dest, const matrix *src0, const matrix *src1) {
273273

274274
ASSERT((dest != src0) && (dest != src1));
275275

276-
dest->rvec.x() = src0->rvec.x() * src1->rvec.x() + src0->uvec.x() * src1->uvec.x() + src0->fvec.x() * src1->fvec.x();
277-
dest->uvec.x() = src0->rvec.x() * src1->rvec.y() + src0->uvec.x() * src1->uvec.y() + src0->fvec.x() * src1->fvec.y();
278-
dest->fvec.x() = src0->rvec.x() * src1->rvec.z() + src0->uvec.x() * src1->uvec.z() + src0->fvec.x() * src1->fvec.z();
279-
280-
dest->rvec.y() = src0->rvec.y() * src1->rvec.x() + src0->uvec.y() * src1->uvec.x() + src0->fvec.y() * src1->fvec.x();
281-
dest->uvec.y() = src0->rvec.y() * src1->rvec.y() + src0->uvec.y() * src1->uvec.y() + src0->fvec.y() * src1->fvec.y();
282-
dest->fvec.y() = src0->rvec.y() * src1->rvec.z() + src0->uvec.y() * src1->uvec.z() + src0->fvec.y() * src1->fvec.z();
283-
284-
dest->rvec.z() = src0->rvec.z() * src1->rvec.x() + src0->uvec.z() * src1->uvec.x() + src0->fvec.z() * src1->fvec.x();
285-
dest->uvec.z() = src0->rvec.z() * src1->rvec.y() + src0->uvec.z() * src1->uvec.y() + src0->fvec.z() * src1->fvec.y();
286-
dest->fvec.z() = src0->rvec.z() * src1->rvec.z() + src0->uvec.z() * src1->uvec.z() + src0->fvec.z() * src1->fvec.z();
276+
dest->set_col(0, src0->rvec.x() * src1->rvec + src0->uvec.x() * src1->uvec + src0->fvec.x() * src1->fvec);
277+
dest->set_col(1, src0->rvec.y() * src1->rvec + src0->uvec.y() * src1->uvec + src0->fvec.y() * src1->fvec);
278+
dest->set_col(2, src0->rvec.z() * src1->rvec + src0->uvec.z() * src1->uvec + src0->fvec.z() * src1->fvec);
287279
}
288280

289281
matrix operator*(const matrix &src0, const matrix &src1) {
@@ -612,9 +604,7 @@ angvec *vm_ExtractAnglesFromMatrix(angvec *a, const matrix *m) {
612604

613605
// returns the value of a determinant
614606
scalar calc_det_value(const matrix *det) {
615-
return det->rvec.x() * det->uvec.y() * det->fvec.z() - det->rvec.x() * det->uvec.z() * det->fvec.y() -
616-
det->rvec.y() * det->uvec.x() * det->fvec.z() + det->rvec.y() * det->uvec.z() * det->fvec.x() +
617-
det->rvec.z() * det->uvec.x() * det->fvec.y() - det->rvec.z() * det->uvec.y() * det->fvec.x();
607+
return det->det();
618608
}
619609

620610
// computes the delta angle between two vectors.
@@ -762,9 +752,7 @@ scalar vm_GetCentroidFast(vector *centroid, const vector *src, int nv) {
762752

763753
// creates a completely random, non-normalized vector with a range of values from -1023 to +1024 values)
764754
void vm_MakeRandomVector(vector *vec) {
765-
vec->x() = ps_rand() - D3_RAND_MAX / 2;
766-
vec->y() = ps_rand() - D3_RAND_MAX / 2;
767-
vec->z() = ps_rand() - D3_RAND_MAX / 2;
755+
vec->fill(ps_rand() - D3_RAND_MAX / 2);
768756
}
769757

770758
// Given a set of points, computes the minimum bounding sphere of those points

0 commit comments

Comments
 (0)