Skip to content

Commit 41246ab

Browse files
committed
Merge branch 'main' into dev
2 parents a9c7d75 + 5cc429e commit 41246ab

File tree

2 files changed

+18
-12
lines changed

2 files changed

+18
-12
lines changed

include/kernel_float/macros.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,13 @@
5050
#define KERNEL_FLOAT_CALL(F, ...) F(__VA_ARGS__)
5151

5252
// TOOD: check if this way is support across all compilers
53-
//#if defined(__has_builtin) && __has_builtin(__builtin_assume_aligned)
54-
#if 0
55-
#define KERNEL_FLOAT_ASSUME_ALIGNED(TYPE, PTR, ALIGNMENT) \
56-
static_cast<TYPE*>(__builtin_assume_aligned(static_cast<TYPE*>(PTR), (ALIGNMENT)))
53+
#if defined(__has_builtin) && 0 // Seems that `__builtin_assume_aligned` leads to segfaults
54+
#if __has_builtin(__builtin_assume_aligned)
55+
#define KERNEL_FLOAT_ASSUME_ALIGNED(TYPE, PTR, ALIGNMENT) static_cast <TYPE*>(
56+
__builtin_assume_aligned(static_cast <TYPE*>(PTR), (ALIGNMENT)))
57+
#else
58+
#define KERNEL_FLOAT_ASSUME_ALIGNED(TYPE, PTR, ALIGNMENT) (PTR)
59+
#endif
5760
#else
5861
#define KERNEL_FLOAT_ASSUME_ALIGNED(TYPE, PTR, ALIGNMENT) (PTR)
5962
#endif

single_include/kernel_float.h

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616

1717
//================================================================================
1818
// this file has been auto-generated, do not modify its contents!
19-
// date: 2024-04-22 13:28:09.684538
20-
// git hash: fd4eadfbb0c8597276a6c12f972038cd1baff985
19+
// date: 2024-04-26 10:06:43.573011
20+
// git hash: a9c7d752a7329ae5187e3e9362a2b47c9f38371a
2121
//================================================================================
2222

2323
#ifndef KERNEL_FLOAT_MACROS_H
@@ -72,10 +72,13 @@
7272
#define KERNEL_FLOAT_CALL(F, ...) F(__VA_ARGS__)
7373

7474
// TOOD: check if this way is support across all compilers
75-
//#if defined(__has_builtin) && __has_builtin(__builtin_assume_aligned)
76-
#if 0
77-
#define KERNEL_FLOAT_ASSUME_ALIGNED(TYPE, PTR, ALIGNMENT) \
78-
static_cast<TYPE*>(__builtin_assume_aligned(static_cast<TYPE*>(PTR), (ALIGNMENT)))
75+
#if defined(__has_builtin) && 0 // Seems that `__builtin_assume_aligned` leads to segfaults
76+
#if __has_builtin(__builtin_assume_aligned)
77+
#define KERNEL_FLOAT_ASSUME_ALIGNED(TYPE, PTR, ALIGNMENT) static_cast <TYPE*>(
78+
__builtin_assume_aligned(static_cast <TYPE*>(PTR), (ALIGNMENT)))
79+
#else
80+
#define KERNEL_FLOAT_ASSUME_ALIGNED(TYPE, PTR, ALIGNMENT) (PTR)
81+
#endif
7982
#else
8083
#define KERNEL_FLOAT_ASSUME_ALIGNED(TYPE, PTR, ALIGNMENT) (PTR)
8184
#endif
@@ -4321,8 +4324,8 @@ KERNEL_FLOAT_FP8_CAST(double)
43214324
namespace kernel_float {
43224325
KERNEL_FLOAT_DEFINE_PROMOTED_TYPE(__half, __nv_fp8_e4m3)
43234326
KERNEL_FLOAT_DEFINE_PROMOTED_TYPE(__half, __nv_fp8_e5m2)
4324-
KERNEL_FLOAT_FP8_CAST(__half)
43254327

4328+
KERNEL_FLOAT_FP8_CAST(__half)
43264329
KERNEL_FLOAT_FP8_CAST2(__half, __nv_fp8_e4m3, __NV_E4M3)
43274330
KERNEL_FLOAT_FP8_CAST2(__half, __nv_fp8_e5m2, __NV_E5M2)
43284331

@@ -4335,8 +4338,8 @@ KERNEL_FLOAT_FP8_CAST2(__half, __nv_fp8_e5m2, __NV_E5M2)
43354338
namespace kernel_float {
43364339
KERNEL_FLOAT_DEFINE_PROMOTED_TYPE(__nv_bfloat16, __nv_fp8_e4m3)
43374340
KERNEL_FLOAT_DEFINE_PROMOTED_TYPE(__nv_bfloat16, __nv_fp8_e5m2)
4338-
KERNEL_FLOAT_FP8_CAST(__nv_bfloat16)
43394341

4342+
KERNEL_FLOAT_FP8_CAST(__nv_bfloat16)
43404343
KERNEL_FLOAT_FP8_CAST2(__nv_bfloat16, __nv_fp8_e4m3, __NV_E4M3)
43414344
KERNEL_FLOAT_FP8_CAST2(__nv_bfloat16, __nv_fp8_e5m2, __NV_E5M2)
43424345
} // namespace kernel_float

0 commit comments

Comments
 (0)