16
16
17
17
// ================================================================================
18
18
// this file has been auto-generated, do not modify its contents!
19
- // date: 2024-04-22 13:28:09.684538
20
- // git hash: fd4eadfbb0c8597276a6c12f972038cd1baff985
19
+ // date: 2024-04-26 10:06:43.573011
20
+ // git hash: a9c7d752a7329ae5187e3e9362a2b47c9f38371a
21
21
// ================================================================================
22
22
23
23
#ifndef KERNEL_FLOAT_MACROS_H
72
72
#define KERNEL_FLOAT_CALL (F, ...) F(__VA_ARGS__)
73
73
74
74
// TOOD: check if this way is support across all compilers
75
- // #if defined(__has_builtin) && __has_builtin(__builtin_assume_aligned)
76
- #if 0
77
- #define KERNEL_FLOAT_ASSUME_ALIGNED(TYPE, PTR, ALIGNMENT) \
78
- static_cast<TYPE*>(__builtin_assume_aligned(static_cast<TYPE*>(PTR), (ALIGNMENT)))
75
+ #if defined(__has_builtin) && 0 // Seems that `__builtin_assume_aligned` leads to segfaults
76
+ #if __has_builtin(__builtin_assume_aligned)
77
+ #define KERNEL_FLOAT_ASSUME_ALIGNED (TYPE, PTR, ALIGNMENT ) static_cast <TYPE*>(
78
+ __builtin_assume_aligned (static_cast <TYPE*>(PTR), (ALIGNMENT)))
79
+ #else
80
+ #define KERNEL_FLOAT_ASSUME_ALIGNED (TYPE, PTR, ALIGNMENT ) (PTR)
81
+ #endif
79
82
#else
80
83
#define KERNEL_FLOAT_ASSUME_ALIGNED (TYPE, PTR, ALIGNMENT ) (PTR)
81
84
#endif
@@ -4321,8 +4324,8 @@ KERNEL_FLOAT_FP8_CAST(double)
4321
4324
namespace kernel_float {
4322
4325
KERNEL_FLOAT_DEFINE_PROMOTED_TYPE (__half, __nv_fp8_e4m3)
4323
4326
KERNEL_FLOAT_DEFINE_PROMOTED_TYPE (__half, __nv_fp8_e5m2)
4324
- KERNEL_FLOAT_FP8_CAST (__half)
4325
4327
4328
+ KERNEL_FLOAT_FP8_CAST (__half)
4326
4329
KERNEL_FLOAT_FP8_CAST2 (__half, __nv_fp8_e4m3, __NV_E4M3)
4327
4330
KERNEL_FLOAT_FP8_CAST2 (__half, __nv_fp8_e5m2, __NV_E5M2)
4328
4331
@@ -4335,8 +4338,8 @@ KERNEL_FLOAT_FP8_CAST2(__half, __nv_fp8_e5m2, __NV_E5M2)
4335
4338
namespace kernel_float {
4336
4339
KERNEL_FLOAT_DEFINE_PROMOTED_TYPE (__nv_bfloat16, __nv_fp8_e4m3)
4337
4340
KERNEL_FLOAT_DEFINE_PROMOTED_TYPE (__nv_bfloat16, __nv_fp8_e5m2)
4338
- KERNEL_FLOAT_FP8_CAST (__nv_bfloat16)
4339
4341
4342
+ KERNEL_FLOAT_FP8_CAST (__nv_bfloat16)
4340
4343
KERNEL_FLOAT_FP8_CAST2 (__nv_bfloat16, __nv_fp8_e4m3, __NV_E4M3)
4341
4344
KERNEL_FLOAT_FP8_CAST2 (__nv_bfloat16, __nv_fp8_e5m2, __NV_E5M2)
4342
4345
} // namespace kernel_float
0 commit comments