@@ -9,6 +9,7 @@ extern "C" {
9
9
#include < Python.h>
10
10
11
11
#include " numpy/arrayobject.h"
12
+ #include " numpy/halffloat.h"
12
13
#include " numpy/ndarraytypes.h"
13
14
#include " numpy/dtype_api.h"
14
15
}
@@ -20,7 +21,7 @@ extern "C" {
20
21
#include " casts.h"
21
22
#include " dtype.h"
22
23
23
- #define NUM_CASTS 29 // 14 to_casts + 14 from_casts + 1 quad_to_quad
24
+ #define NUM_CASTS 33 // 16 to_casts + 16 from_casts + 1 quad_to_quad
24
25
25
26
static NPY_CASTING
26
27
quad_to_quad_resolve_descriptors (PyObject *NPY_UNUSED (self),
@@ -150,15 +151,27 @@ quad_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const da
150
151
return 0 ;
151
152
}
152
153
154
+ // Tag dispatching to ensure npy_bool/npy_ubyte and npy_half/npy_ushort do not alias in templates
155
+ // see e.g. https://stackoverflow.com/q/32522279
156
+ struct spec_npy_bool {};
157
+ struct spec_npy_half {};
158
+
159
+ template <typename T>
160
+ struct NpyType { typedef T TYPE; };
161
+ template <>
162
+ struct NpyType <spec_npy_bool>{ typedef npy_bool TYPE; };
163
+ template <>
164
+ struct NpyType <spec_npy_half>{ typedef npy_half TYPE; };
165
+
153
166
// Casting from other types to QuadDType
154
167
155
168
template <typename T>
156
169
static inline quad_value
157
- to_quad (T x, QuadBackendType backend);
170
+ to_quad (typename NpyType<T>::TYPE x, QuadBackendType backend);
158
171
159
172
template <>
160
173
inline quad_value
161
- to_quad<npy_bool >(npy_bool x, QuadBackendType backend)
174
+ to_quad<spec_npy_bool >(npy_bool x, QuadBackendType backend)
162
175
{
163
176
quad_value result;
164
177
if (backend == BACKEND_SLEEF) {
@@ -184,6 +197,20 @@ to_quad<npy_byte>(npy_byte x, QuadBackendType backend)
184
197
return result;
185
198
}
186
199
200
+ template <>
201
+ inline quad_value
202
+ to_quad<npy_ubyte>(npy_ubyte x, QuadBackendType backend)
203
+ {
204
+ quad_value result;
205
+ if (backend == BACKEND_SLEEF) {
206
+ result.sleef_value = Sleef_cast_from_uint64q1 (x);
207
+ }
208
+ else {
209
+ result.longdouble_value = (long double )x;
210
+ }
211
+ return result;
212
+ }
213
+
187
214
template <>
188
215
inline quad_value
189
216
to_quad<npy_short>(npy_short x, QuadBackendType backend)
@@ -295,6 +322,21 @@ to_quad<npy_ulonglong>(npy_ulonglong x, QuadBackendType backend)
295
322
}
296
323
return result;
297
324
}
325
+
326
+ template <>
327
+ inline quad_value
328
+ to_quad<spec_npy_half>(npy_half x, QuadBackendType backend)
329
+ {
330
+ quad_value result;
331
+ if (backend == BACKEND_SLEEF) {
332
+ result.sleef_value = Sleef_cast_from_doubleq1 (npy_half_to_double (x));
333
+ }
334
+ else {
335
+ result.longdouble_value = (long double )npy_half_to_double (x);
336
+ }
337
+ return result;
338
+ }
339
+
298
340
template <>
299
341
inline quad_value
300
342
to_quad<float >(float x, QuadBackendType backend)
@@ -374,10 +416,10 @@ numpy_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const
374
416
size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof (Sleef_quad) : sizeof (long double );
375
417
376
418
while (N--) {
377
- T in_val;
419
+ typename NpyType<T>::TYPE in_val;
378
420
quad_value out_val;
379
421
380
- memcpy (&in_val, in_ptr, sizeof (T ));
422
+ memcpy (&in_val, in_ptr, sizeof (typename NpyType<T>::TYPE ));
381
423
out_val = to_quad<T>(in_val, backend);
382
424
memcpy (out_ptr, &out_val, elem_size);
383
425
@@ -401,7 +443,7 @@ numpy_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const d
401
443
QuadBackendType backend = descr_out->backend ;
402
444
403
445
while (N--) {
404
- T in_val = *(T *)in_ptr;
446
+ typename NpyType<T>::TYPE in_val = *(typename NpyType<T>::TYPE *)in_ptr;
405
447
quad_value out_val = to_quad<T>(in_val, backend);
406
448
407
449
if (backend == BACKEND_SLEEF) {
@@ -420,12 +462,12 @@ numpy_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const d
420
462
// Casting from QuadDType to other types
421
463
422
464
template <typename T>
423
- static inline T
465
+ static inline typename NpyType<T>::TYPE
424
466
from_quad (quad_value x, QuadBackendType backend);
425
467
426
468
template <>
427
469
inline npy_bool
428
- from_quad<npy_bool >(quad_value x, QuadBackendType backend)
470
+ from_quad<spec_npy_bool >(quad_value x, QuadBackendType backend)
429
471
{
430
472
if (backend == BACKEND_SLEEF) {
431
473
return Sleef_cast_to_int64q1 (x.sleef_value ) != 0 ;
@@ -447,6 +489,18 @@ from_quad<npy_byte>(quad_value x, QuadBackendType backend)
447
489
}
448
490
}
449
491
492
+ template <>
493
+ inline npy_ubyte
494
+ from_quad<npy_ubyte>(quad_value x, QuadBackendType backend)
495
+ {
496
+ if (backend == BACKEND_SLEEF) {
497
+ return (npy_ubyte)Sleef_cast_to_uint64q1 (x.sleef_value );
498
+ }
499
+ else {
500
+ return (npy_ubyte)x.longdouble_value ;
501
+ }
502
+ }
503
+
450
504
template <>
451
505
inline npy_short
452
506
from_quad<npy_short>(quad_value x, QuadBackendType backend)
@@ -543,6 +597,18 @@ from_quad<npy_ulonglong>(quad_value x, QuadBackendType backend)
543
597
}
544
598
}
545
599
600
+ template <>
601
+ inline npy_half
602
+ from_quad<spec_npy_half>(quad_value x, QuadBackendType backend)
603
+ {
604
+ if (backend == BACKEND_SLEEF) {
605
+ return npy_double_to_half (Sleef_cast_to_doubleq1 (x.sleef_value ));
606
+ }
607
+ else {
608
+ return npy_double_to_half ((double )x.longdouble_value );
609
+ }
610
+ }
611
+
546
612
template <>
547
613
inline float
548
614
from_quad<float >(quad_value x, QuadBackendType backend)
@@ -611,8 +677,8 @@ quad_to_numpy_strided_loop_unaligned(PyArrayMethod_Context *context, char *const
611
677
quad_value in_val;
612
678
memcpy (&in_val, in_ptr, elem_size);
613
679
614
- T out_val = from_quad<T>(in_val, backend);
615
- memcpy (out_ptr, &out_val, sizeof (T ));
680
+ typename NpyType<T>::TYPE out_val = from_quad<T>(in_val, backend);
681
+ memcpy (out_ptr, &out_val, sizeof (typename NpyType<T>::TYPE ));
616
682
617
683
in_ptr += strides[0 ];
618
684
out_ptr += strides[1 ];
@@ -642,8 +708,8 @@ quad_to_numpy_strided_loop_aligned(PyArrayMethod_Context *context, char *const d
642
708
in_val.longdouble_value = *(long double *)in_ptr;
643
709
}
644
710
645
- T out_val = from_quad<T>(in_val, backend);
646
- *(T *)(out_ptr) = out_val;
711
+ typename NpyType<T>::TYPE out_val = from_quad<T>(in_val, backend);
712
+ *(typename NpyType<T>::TYPE *)(out_ptr) = out_val;
647
713
648
714
in_ptr += strides[0 ];
649
715
out_ptr += strides[1 ];
@@ -739,8 +805,9 @@ init_casts_internal(void)
739
805
740
806
add_spec (quad2quad_spec);
741
807
742
- add_cast_to<npy_bool >(&PyArray_BoolDType);
808
+ add_cast_to<spec_npy_bool >(&PyArray_BoolDType);
743
809
add_cast_to<npy_byte>(&PyArray_ByteDType);
810
+ add_cast_to<npy_ubyte>(&PyArray_UByteDType);
744
811
add_cast_to<npy_short>(&PyArray_ShortDType);
745
812
add_cast_to<npy_ushort>(&PyArray_UShortDType);
746
813
add_cast_to<npy_int>(&PyArray_IntDType);
@@ -749,12 +816,14 @@ init_casts_internal(void)
749
816
add_cast_to<npy_ulong>(&PyArray_ULongDType);
750
817
add_cast_to<npy_longlong>(&PyArray_LongLongDType);
751
818
add_cast_to<npy_ulonglong>(&PyArray_ULongLongDType);
819
+ add_cast_to<spec_npy_half>(&PyArray_HalfDType);
752
820
add_cast_to<float >(&PyArray_FloatDType);
753
821
add_cast_to<double >(&PyArray_DoubleDType);
754
822
add_cast_to<long double >(&PyArray_LongDoubleDType);
755
823
756
- add_cast_from<npy_bool >(&PyArray_BoolDType);
824
+ add_cast_from<spec_npy_bool >(&PyArray_BoolDType);
757
825
add_cast_from<npy_byte>(&PyArray_ByteDType);
826
+ add_cast_from<npy_ubyte>(&PyArray_UByteDType);
758
827
add_cast_from<npy_short>(&PyArray_ShortDType);
759
828
add_cast_from<npy_ushort>(&PyArray_UShortDType);
760
829
add_cast_from<npy_int>(&PyArray_IntDType);
@@ -763,6 +832,7 @@ init_casts_internal(void)
763
832
add_cast_from<npy_ulong>(&PyArray_ULongDType);
764
833
add_cast_from<npy_longlong>(&PyArray_LongLongDType);
765
834
add_cast_from<npy_ulonglong>(&PyArray_ULongLongDType);
835
+ add_cast_from<spec_npy_half>(&PyArray_HalfDType);
766
836
add_cast_from<float >(&PyArray_FloatDType);
767
837
add_cast_from<double >(&PyArray_DoubleDType);
768
838
add_cast_from<long double >(&PyArray_LongDoubleDType);
0 commit comments