diff --git a/3rdparty/hip-headers/include/hip/amd_detail/amd_channel_descriptor.h b/3rdparty/hip-headers/include/hip/amd_detail/amd_channel_descriptor.h new file mode 100644 index 0000000000..26d03abfb4 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/amd_channel_descriptor.h @@ -0,0 +1,311 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H + +#if !defined(__HIPCC_RTC__) +#include +#include +#include +#endif + +#ifdef __cplusplus + +extern "C" HIP_PUBLIC_API hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, + hipChannelFormatKind f); + +static inline hipChannelFormatDesc hipCreateChannelDescHalf() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); +} + +static inline hipChannelFormatDesc hipCreateChannelDescHalf1() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); +} + +static inline hipChannelFormatDesc hipCreateChannelDescHalf2() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindFloat); +} + +static inline hipChannelFormatDesc hipCreateChannelDescHalf4() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindFloat); +} + +template static inline hipChannelFormatDesc hipCreateChannelDesc() { + return hipCreateChannelDesc(0, 0, 0, 0, hipChannelFormatKindNone); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(char) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed char) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned char) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned char) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed char) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned char) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed char) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); +} + +#ifndef __GNUC__ // vector3 is the same as vector4 +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned char) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed char) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); +} +#endif + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned char) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed char) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed short) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); +} + +#ifndef __GNUC__ +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed short) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); +} +#endif + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed short) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned int) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed int) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned int) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed int) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned int) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed int) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); +} + +#ifndef __GNUC__ +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned int) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed int) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); +} +#endif + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned int) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed int) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(float) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(float) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(float) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindFloat); +} + +#ifndef __GNUC__ +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(float) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindFloat); +} +#endif + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(float) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindFloat); +} + +#if !defined(__LP64__) + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned long) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed long) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned long) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed long) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned long) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed long) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); +} + +#ifndef __GNUC__ +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned long) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed long) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); +} +#endif + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned long) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned); +} + +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed long) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); +} +#endif /* !__LP64__ */ + +#else + +struct hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, + enum hipChannelFormatKind f); + +#endif /* __cplusplus */ + +#endif /* !HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H */ diff --git a/3rdparty/hip-headers/include/hip/amd_detail/amd_device_functions.h b/3rdparty/hip-headers/include/hip/amd_detail/amd_device_functions.h new file mode 100644 index 0000000000..cf33122570 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/amd_device_functions.h @@ -0,0 +1,925 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_FUNCTIONS_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_FUNCTIONS_H + +#if !defined(__HIPCC_RTC__) +#include +#include +#include +#include "host_defines.h" +#include "math_fwd.h" +#include +#include +#include +#endif // !defined(__HIPCC_RTC__) + +#if defined(__clang__) && defined(__HIP__) +extern "C" __device__ int printf(const char* fmt, ...); +#else +template static inline __device__ void printf(const char* format, All... all) {} +#endif + +extern "C" __device__ unsigned long long __ockl_steadyctr_u64(); + +/* +Integer Intrinsics +*/ + +// integer intrinsic function __poc __clz __ffs __brev +__device__ static inline unsigned int __popc(unsigned int input) { + return __builtin_popcount(input); +} +__device__ static inline unsigned int __popcll(unsigned long long int input) { + return __builtin_popcountll(input); +} + +__device__ static inline int __clz(int input) { return __ockl_clz_u32((uint)input); } + +__device__ static inline int __clzll(long long int input) { + return __ockl_clz_u64((__hip_uint64_t)input); +} + +__device__ static inline int __ffs(unsigned int input) { + return (input == 0 ? -1 : __builtin_ctz(input)) + 1; +} + +__device__ static inline int __ffsll(unsigned long long int input) { + return (input == 0 ? -1 : __builtin_ctzll(input)) + 1; +} + +__device__ static inline int __ffs(int input) { + return (input == 0 ? -1 : __builtin_ctz(input)) + 1; +} + +__device__ static inline int __ffsll(long long int input) { + return (input == 0 ? -1 : __builtin_ctzll(input)) + 1; +} + +// Given a 32/64-bit value exec mask and an integer value base (between 0 and WAVEFRONT_SIZE), +// find the n-th (given by offset) set bit in the exec mask from the base bit, and return the bit +// position. If not found, return -1. +__device__ static __hip_int32_t __fns64(__hip_uint64_t mask, __hip_uint32_t base, + __hip_int32_t offset) { + __hip_uint64_t temp_mask = mask; + __hip_int32_t temp_offset = offset; + + if (offset == 0) { + temp_mask &= (1 << base); + temp_offset = 1; + } else if (offset < 0) { + temp_mask = __builtin_bitreverse64(mask); + base = 63 - base; + temp_offset = -offset; + } + + temp_mask = temp_mask & ((~0ULL) << base); + if (__builtin_popcountll(temp_mask) < temp_offset) return -1; + __hip_int32_t total = 0; + for (int i = 0x20; i > 0; i >>= 1) { + __hip_uint64_t temp_mask_lo = temp_mask & ((1ULL << i) - 1); + __hip_int32_t pcnt = __builtin_popcountll(temp_mask_lo); + if (pcnt < temp_offset) { + temp_mask = temp_mask >> i; + temp_offset -= pcnt; + total += i; + } else { + temp_mask = temp_mask_lo; + } + } + if (offset < 0) + return 63 - total; + else + return total; +} + +__device__ static __hip_int32_t __fns32(__hip_uint64_t mask, __hip_uint32_t base, + __hip_int32_t offset) { + __hip_uint32_t temp_mask = mask; + __hip_int32_t temp_offset = offset; + if (offset == 0) { + temp_mask &= (1 << base); + temp_offset = 1; + } else if (offset < 0) { + temp_mask = __builtin_bitreverse32(mask); + base = 31 - base; + temp_offset = -offset; + } + temp_mask = temp_mask & ((~0U) << base); + if (__builtin_popcount(temp_mask) < temp_offset) return -1; + __hip_int32_t total = 0; + for (int i = 0x10; i > 0; i >>= 1) { + __hip_uint32_t temp_mask_lo = temp_mask & ((1U << i) - 1); + __hip_int32_t pcnt = __builtin_popcount(temp_mask_lo); + if (pcnt < temp_offset) { + temp_mask = temp_mask >> i; + temp_offset -= pcnt; + total += i; + } else { + temp_mask = temp_mask_lo; + } + } + if (offset < 0) + return 31 - total; + else + return total; +} + +// Wrapper around __fns32() to make porting from CUDA easier +__device__ static __hip_int32_t __fns(unsigned int mask, unsigned int base, int offset) { + return __fns32(mask, base, offset); +} + +__device__ static inline unsigned int __brev(unsigned int input) { + return __builtin_bitreverse32(input); +} + +__device__ static inline unsigned long long int __brevll(unsigned long long int input) { + return __builtin_bitreverse64(input); +} + +__device__ static inline unsigned int __lastbit_u32_u64(__hip_uint64_t input) { + return input == 0 ? -1 : __builtin_ctzl(input); +} + +__device__ static inline unsigned int __bitextract_u32(unsigned int src0, unsigned int src1, + unsigned int src2) { + __hip_uint32_t offset = src1 & 31; + __hip_uint32_t width = src2 & 31; + return width == 0 ? 0 : (src0 << (32 - offset - width)) >> (32 - width); +} + +__device__ static inline __hip_uint64_t __bitextract_u64(__hip_uint64_t src0, unsigned int src1, + unsigned int src2) { + __hip_uint64_t offset = src1 & 63; + __hip_uint64_t width = src2 & 63; + return width == 0 ? 0 : (src0 << (64 - offset - width)) >> (64 - width); +} + +__device__ static inline unsigned int __bitinsert_u32(unsigned int src0, unsigned int src1, + unsigned int src2, unsigned int src3) { + __hip_uint32_t offset = src2 & 31; + __hip_uint32_t width = src3 & 31; + __hip_uint32_t mask = (1 << width) - 1; + return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset)); +} + +__device__ static inline __hip_uint64_t __bitinsert_u64(__hip_uint64_t src0, __hip_uint64_t src1, + unsigned int src2, unsigned int src3) { + __hip_uint64_t offset = src2 & 63; + __hip_uint64_t width = src3 & 63; + __hip_uint64_t mask = (1ULL << width) - 1; + return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset)); +} + +__device__ inline unsigned int __funnelshift_l(unsigned int lo, unsigned int hi, + unsigned int shift) { + __hip_uint32_t mask_shift = shift & 31; + return mask_shift == 0 ? hi : __builtin_amdgcn_alignbit(hi, lo, 32 - mask_shift); +} + +__device__ inline unsigned int __funnelshift_lc(unsigned int lo, unsigned int hi, + unsigned int shift) { + __hip_uint32_t min_shift = shift >= 32 ? 32 : shift; + return min_shift == 0 ? hi : __builtin_amdgcn_alignbit(hi, lo, 32 - min_shift); +} + +__device__ inline unsigned int __funnelshift_r(unsigned int lo, unsigned int hi, + unsigned int shift) { + return __builtin_amdgcn_alignbit(hi, lo, shift); +} + +__device__ inline unsigned int __funnelshift_rc(unsigned int lo, unsigned int hi, + unsigned int shift) { + return shift >= 32 ? hi : __builtin_amdgcn_alignbit(hi, lo, shift); +} + +__device__ static unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s); +__device__ static int __hadd(int x, int y); +__device__ static int __mul24(int x, int y); +__device__ static long long int __mul64hi(long long int x, long long int y); +__device__ static int __mulhi(int x, int y); +__device__ static int __rhadd(int x, int y); +__device__ static unsigned int __sad(int x, int y, unsigned int z); +__device__ static unsigned int __uhadd(unsigned int x, unsigned int y); +__device__ static int __umul24(unsigned int x, unsigned int y); +__device__ static unsigned long long int __umul64hi(unsigned long long int x, + unsigned long long int y); +__device__ static unsigned int __umulhi(unsigned int x, unsigned int y); +__device__ static unsigned int __urhadd(unsigned int x, unsigned int y); +__device__ static unsigned int __usad(unsigned int x, unsigned int y, unsigned int z); + +struct ucharHolder { + union { + unsigned char c[4]; + unsigned int ui; + }; +} __attribute__((aligned(4))); + +struct uchar2Holder { + union { + unsigned int ui[2]; + unsigned char c[8]; + }; +} __attribute__((aligned(8))); + +__device__ static inline unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s) { + struct uchar2Holder cHoldVal; + struct ucharHolder cHoldKey; + cHoldKey.ui = s; + cHoldVal.ui[0] = x; + cHoldVal.ui[1] = y; + unsigned int result; + result = cHoldVal.c[cHoldKey.c[0] & 0x07]; + result += (cHoldVal.c[(cHoldKey.c[0] & 0x70) >> 4] << 8); + result += (cHoldVal.c[cHoldKey.c[1] & 0x07] << 16); + result += (cHoldVal.c[(cHoldKey.c[1] & 0x70) >> 4] << 24); + return result; +} + +__device__ static inline int __hadd(int x, int y) { return ((long long)x + (long long)y) >> 1; } + +__device__ static inline int __mul24(int x, int y) { return __ockl_mul24_i32(x, y); } + +__device__ static inline long long __mul64hi(long long int x, long long int y) { + unsigned long long x0 = (unsigned long long)x & 0xffffffffUL; + long long x1 = x >> 32; + unsigned long long y0 = (unsigned long long)y & 0xffffffffUL; + long long y1 = y >> 32; + unsigned long long z0 = x0 * y0; + long long t = x1 * y0 + (z0 >> 32); + long long z1 = t & 0xffffffffL; + long long z2 = t >> 32; + z1 = x0 * y1 + z1; + return x1 * y1 + z2 + (z1 >> 32); +} + +__device__ static inline int __mulhi(int x, int y) { return __ockl_mul_hi_i32(x, y); } + +__device__ static inline int __rhadd(int x, int y) { + return ((long long)x + (long long)y + 1) >> 1; +} + +__device__ static inline unsigned int __sad(int x, int y, unsigned int z) { + return x > y ? x - y + z : y - x + z; +} + +__device__ static inline unsigned int __uhadd(unsigned int x, unsigned int y) { + return ((unsigned long long)x + (unsigned long long)y) >> 1; +} + +__device__ static inline int __umul24(unsigned int x, unsigned int y) { + return __ockl_mul24_u32(x, y); +} + +__device__ static inline unsigned long long __umul64hi(unsigned long long int x, + unsigned long long int y) { + unsigned long long x0 = x & 0xffffffffUL; + unsigned long long x1 = x >> 32; + unsigned long long y0 = y & 0xffffffffUL; + unsigned long long y1 = y >> 32; + unsigned long long z0 = x0 * y0; + unsigned long long t = x1 * y0 + (z0 >> 32); + unsigned long long z1 = t & 0xffffffffUL; + unsigned long long z2 = t >> 32; + z1 = x0 * y1 + z1; + return x1 * y1 + z2 + (z1 >> 32); +} + +__device__ static inline unsigned int __umulhi(unsigned int x, unsigned int y) { + return __ockl_mul_hi_u32(x, y); +} + +__device__ static inline unsigned int __urhadd(unsigned int x, unsigned int y) { + return ((unsigned long long)x + (unsigned long long)y + 1) >> 1; +} + +__device__ static inline unsigned int __usad(unsigned int x, unsigned int y, unsigned int z) { + return __ockl_sadd_u32(x, y, z); +} + +__device__ static inline unsigned int __mbcnt_lo(unsigned int x, unsigned int y) { + return __builtin_amdgcn_mbcnt_lo(x, y); +}; + +__device__ static inline unsigned int __mbcnt_hi(unsigned int x, unsigned int y) { + return __builtin_amdgcn_mbcnt_hi(x, y); +}; + +/* +HIP specific device functions +*/ + +#if !defined(__HIPCC_RTC__) +#include "amd_warp_functions.h" +#include "amd_warp_sync_functions.h" +#endif + +#define MASK1 0x00ff00ff +#define MASK2 0xff00ff00 + +__device__ static inline char4 __hip_hc_add8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.w & MASK1; + unsigned one2 = in2.w & MASK1; + out.w = (one1 + one2) & MASK1; + one1 = in1.w & MASK2; + one2 = in2.w & MASK2; + out.w = out.w | ((one1 + one2) & MASK2); + return out; +} + +__device__ static inline char4 __hip_hc_sub8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.w & MASK1; + unsigned one2 = in2.w & MASK1; + out.w = (one1 - one2) & MASK1; + one1 = in1.w & MASK2; + one2 = in2.w & MASK2; + out.w = out.w | ((one1 - one2) & MASK2); + return out; +} + +__device__ static inline char4 __hip_hc_mul8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.w & MASK1; + unsigned one2 = in2.w & MASK1; + out.w = (one1 * one2) & MASK1; + one1 = in1.w & MASK2; + one2 = in2.w & MASK2; + out.w = out.w | ((one1 * one2) & MASK2); + return out; +} + +__device__ static inline float __double2float_rd(double x) { return __ocml_cvtrtn_f32_f64(x); } +__device__ static inline float __double2float_rn(double x) { return x; } +__device__ static inline float __double2float_ru(double x) { return __ocml_cvtrtp_f32_f64(x); } +__device__ static inline float __double2float_rz(double x) { return __ocml_cvtrtz_f32_f64(x); } + +__device__ static inline int __double2hiint(double x) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + + int tmp[2]; + __builtin_memcpy(tmp, &x, sizeof(tmp)); + + return tmp[1]; +} +__device__ static inline int __double2loint(double x) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + + int tmp[2]; + __builtin_memcpy(tmp, &x, sizeof(tmp)); + + return tmp[0]; +} + +__device__ static inline int __double2int_rd(double x) { + return (int)__builtin_elementwise_floor(x); +} +__device__ static inline int __double2int_rn(double x) { + return (int)__builtin_elementwise_rint(x); +} +__device__ static inline int __double2int_ru(double x) { + return (int)__builtin_elementwise_ceil(x); +} +__device__ static inline int __double2int_rz(double x) { return (int)x; } + +__device__ static inline long long int __double2ll_rd(double x) { + return (long long)__builtin_elementwise_floor(x); +} +__device__ static inline long long int __double2ll_rn(double x) { + return (long long)__builtin_elementwise_rint(x); +} +__device__ static inline long long int __double2ll_ru(double x) { + return (long long)__builtin_elementwise_ceil(x); +} +__device__ static inline long long int __double2ll_rz(double x) { return (long long)x; } + +__device__ static inline unsigned int __double2uint_rd(double x) { + return (unsigned int)__builtin_elementwise_floor(x); +} +__device__ static inline unsigned int __double2uint_rn(double x) { + return (unsigned int)__builtin_elementwise_rint(x); +} +__device__ static inline unsigned int __double2uint_ru(double x) { + return (unsigned int)__builtin_elementwise_ceil(x); +} +__device__ static inline unsigned int __double2uint_rz(double x) { return (unsigned int)x; } + +__device__ static inline unsigned long long int __double2ull_rd(double x) { + return (unsigned long long int)__builtin_elementwise_floor(x); +} +__device__ static inline unsigned long long int __double2ull_rn(double x) { + return (unsigned long long int)__builtin_elementwise_rint(x); +} +__device__ static inline unsigned long long int __double2ull_ru(double x) { + return (unsigned long long int)__builtin_elementwise_ceil(x); +} +__device__ static inline unsigned long long int __double2ull_rz(double x) { + return (unsigned long long int)x; +} +__device__ static inline long long int __double_as_longlong(double x) { + static_assert(sizeof(long long) == sizeof(double), ""); + + long long tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +/* +__device__ unsigned short __float2half_rn(float x); +__device__ float __half2float(unsigned short); + +The above device function are not a valid . +Use +__device__ __half __float2half_rn(float x); +__device__ float __half2float(__half); +from hip_fp16.h + +CUDA implements half as unsigned short whereas, HIP doesn't. + +*/ + +__device__ static inline int __float2int_rd(float x) { return (int)__builtin_elementwise_floor(x); } +__device__ static inline int __float2int_rn(float x) { return (int)__builtin_elementwise_rint(x); } +__device__ static inline int __float2int_ru(float x) { return (int)__builtin_elementwise_ceil(x); } +__device__ static inline int __float2int_rz(float x) { return (int)__builtin_elementwise_trunc(x); } + +__device__ static inline long long int __float2ll_rd(float x) { + return (long long int)__builtin_elementwise_floor(x); +} +__device__ static inline long long int __float2ll_rn(float x) { + return (long long int)__builtin_elementwise_rint(x); +} +__device__ static inline long long int __float2ll_ru(float x) { + return (long long int)__builtin_elementwise_ceil(x); +} +__device__ static inline long long int __float2ll_rz(float x) { return (long long int)x; } + +__device__ static inline unsigned int __float2uint_rd(float x) { + return (unsigned int)__builtin_elementwise_floor(x); +} +__device__ static inline unsigned int __float2uint_rn(float x) { + return (unsigned int)__builtin_elementwise_rint(x); +} +__device__ static inline unsigned int __float2uint_ru(float x) { + return (unsigned int)__builtin_elementwise_ceil(x); +} +__device__ static inline unsigned int __float2uint_rz(float x) { return (unsigned int)x; } + +__device__ static inline unsigned long long int __float2ull_rd(float x) { + return (unsigned long long int)__builtin_elementwise_floor(x); +} +__device__ static inline unsigned long long int __float2ull_rn(float x) { + return (unsigned long long int)__builtin_elementwise_rint(x); +} +__device__ static inline unsigned long long int __float2ull_ru(float x) { + return (unsigned long long int)__builtin_elementwise_ceil(x); +} +__device__ static inline unsigned long long int __float2ull_rz(float x) { + return (unsigned long long int)x; +} + +__device__ static inline int __float_as_int(float x) { + static_assert(sizeof(int) == sizeof(float), ""); + + int tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline unsigned int __float_as_uint(float x) { + static_assert(sizeof(unsigned int) == sizeof(float), ""); + + unsigned int tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline double __hiloint2double(int hi, int lo) { + static_assert(sizeof(double) == sizeof(__hip_uint64_t), ""); + + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(hi) << 32ull) | static_cast<__hip_uint32_t>(lo); + double tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + + return tmp1; +} + +__device__ static inline double __int2double_rn(int x) { return (double)x; } + +__device__ static inline float __int2float_rd(int x) { return __ocml_cvtrtn_f32_s32(x); } +__device__ static inline float __int2float_rn(int x) { return (float)x; } +__device__ static inline float __int2float_ru(int x) { return __ocml_cvtrtp_f32_s32(x); } +__device__ static inline float __int2float_rz(int x) { return __ocml_cvtrtz_f32_s32(x); } + +__device__ static inline float __int_as_float(int x) { + static_assert(sizeof(float) == sizeof(int), ""); + + float tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline double __ll2double_rd(long long int x) { return __ocml_cvtrtn_f64_s64(x); } +__device__ static inline double __ll2double_rn(long long int x) { return (double)x; } +__device__ static inline double __ll2double_ru(long long int x) { return __ocml_cvtrtp_f64_s64(x); } +__device__ static inline double __ll2double_rz(long long int x) { return __ocml_cvtrtz_f64_s64(x); } + +__device__ static inline float __ll2float_rd(long long int x) { return __ocml_cvtrtn_f32_s64(x); } +__device__ static inline float __ll2float_rn(long long int x) { return (float)x; } +__device__ static inline float __ll2float_ru(long long int x) { return __ocml_cvtrtp_f32_s64(x); } +__device__ static inline float __ll2float_rz(long long int x) { return __ocml_cvtrtz_f32_s64(x); } + +__device__ static inline double __longlong_as_double(long long int x) { + static_assert(sizeof(double) == sizeof(long long), ""); + + double tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline double __uint2double_rn(unsigned int x) { return (double)x; } + +__device__ static inline float __uint2float_rd(unsigned int x) { return __ocml_cvtrtn_f32_u32(x); } +__device__ static inline float __uint2float_rn(unsigned int x) { return (float)x; } +__device__ static inline float __uint2float_ru(unsigned int x) { return __ocml_cvtrtp_f32_u32(x); } +__device__ static inline float __uint2float_rz(unsigned int x) { return __ocml_cvtrtz_f32_u32(x); } + +__device__ static inline float __uint_as_float(unsigned int x) { + static_assert(sizeof(float) == sizeof(unsigned int), ""); + + float tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline double __ull2double_rd(unsigned long long int x) { + return __ocml_cvtrtn_f64_u64(x); +} +__device__ static inline double __ull2double_rn(unsigned long long int x) { return (double)x; } +__device__ static inline double __ull2double_ru(unsigned long long int x) { + return __ocml_cvtrtp_f64_u64(x); +} +__device__ static inline double __ull2double_rz(unsigned long long int x) { + return __ocml_cvtrtz_f64_u64(x); +} + +__device__ static inline float __ull2float_rd(unsigned long long int x) { + return __ocml_cvtrtn_f32_u64(x); +} +__device__ static inline float __ull2float_rn(unsigned long long int x) { return (float)x; } +__device__ static inline float __ull2float_ru(unsigned long long int x) { + return __ocml_cvtrtp_f32_u64(x); +} +__device__ static inline float __ull2float_rz(unsigned long long int x) { + return __ocml_cvtrtz_f32_u64(x); +} + +#if defined(__clang__) && defined(__HIP__) + +// Clock functions +__device__ long long int __clock64(); +__device__ long long int __clock(); +__device__ long long int clock64(); +__device__ long long int clock(); +__device__ long long int wall_clock64(); +// hip.amdgcn.bc - named sync +__device__ void __named_sync(); + +#ifdef __HIP_DEVICE_COMPILE__ + +// Clock function to return GPU core cycle count. +// GPU can change its core clock frequency at runtime. The maximum frequency can be queried +// through hipDeviceAttributeClockRate attribute. +__device__ inline __attribute((always_inline)) long long int __clock64() { + return (long long int)__builtin_readcyclecounter(); +} + +__device__ inline __attribute((always_inline)) long long int __clock() { return __clock64(); } + +// Clock function to return wall clock count at a constant frequency that can be queried +// through hipDeviceAttributeWallClockRate attribute. +__device__ inline __attribute__((always_inline)) long long int wall_clock64() { + return (long long int)__ockl_steadyctr_u64(); +} + +__device__ inline __attribute__((always_inline)) long long int clock64() { return __clock64(); } + +__device__ inline __attribute__((always_inline)) long long int clock() { return __clock(); } + +// hip.amdgcn.bc - named sync +__device__ inline void __named_sync() { __builtin_amdgcn_s_barrier(); } + +#endif // __HIP_DEVICE_COMPILE__ + +// hip.amdgcn.bc - lanemask +__device__ inline __hip_uint64_t __lanemask_gt() { + __hip_uint32_t lane = __ockl_lane_u32(); + if (lane == 63) return 0; + __hip_uint64_t ballot = __ballot64(1); + __hip_uint64_t mask = (~((__hip_uint64_t)0)) << (lane + 1); + return mask & ballot; +} + +__device__ inline __hip_uint64_t __lanemask_lt() { + __hip_uint32_t lane = __ockl_lane_u32(); + __hip_int64_t ballot = __ballot64(1); + __hip_uint64_t mask = ((__hip_uint64_t)1 << lane) - (__hip_uint64_t)1; + return mask & ballot; +} + +__device__ inline __hip_uint64_t __lanemask_eq() { + __hip_uint32_t lane = __ockl_lane_u32(); + __hip_int64_t mask = ((__hip_uint64_t)1 << lane); + return mask; +} + + +__device__ inline void* __local_to_generic(void* p) { return p; } + +#ifdef __HIP_DEVICE_COMPILE__ +__device__ inline void* __get_dynamicgroupbaseptr() { + // Get group segment base pointer. + return (char*)__local_to_generic((void*)__to_local(__builtin_amdgcn_groupstaticsize())); +} +#else +__device__ void* __get_dynamicgroupbaseptr(); +#endif // __HIP_DEVICE_COMPILE__ + +__device__ inline void* __amdgcn_get_dynamicgroupbaseptr() { return __get_dynamicgroupbaseptr(); } + +// Memory Fence Functions +__device__ inline static void __threadfence() { __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent"); } + +__device__ inline static void __threadfence_block() { + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup"); +} + +__device__ inline static void __threadfence_system() { + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, ""); +} +__device__ inline static void __work_group_barrier(__cl_mem_fence_flags flags) { + if (flags == (__CLK_GLOBAL_MEM_FENCE | __CLK_LOCAL_MEM_FENCE)) { + __builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup"); + __builtin_amdgcn_s_barrier(); + __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup"); + } else if (flags & (__CLK_GLOBAL_MEM_FENCE)) { + __builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup", "global"); + __builtin_amdgcn_s_barrier(); + __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup", "global"); + } else if (flags & (__CLK_LOCAL_MEM_FENCE)) { + __builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup", "local"); + __builtin_amdgcn_s_barrier(); + __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup", "local"); + } else { + __builtin_amdgcn_s_barrier(); + } +} + +__device__ inline static void __barrier(int n) { __work_group_barrier((__cl_mem_fence_flags)n); } + +__device__ +inline +__attribute__((convergent)) +void __syncthreads() +{ + __barrier(__CLK_GLOBAL_MEM_FENCE | __CLK_LOCAL_MEM_FENCE); +} + +__device__ inline __attribute__((convergent)) int __syncthreads_count(int predicate) { + return __ockl_wgred_add_i32(!!predicate); +} + +__device__ inline __attribute__((convergent)) int __syncthreads_and(int predicate) { + return __ockl_wgred_and_i32(!!predicate); +} + +__device__ inline __attribute__((convergent)) int __syncthreads_or(int predicate) { + return __ockl_wgred_or_i32(!!predicate); +} + +// hip.amdgcn.bc - device routine +/* + HW_ID Register bit structure for RDNA2 & RDNA3 + WAVE_ID 4:0 Wave id within the SIMD. + SIMD_ID 9:8 SIMD_ID within the WGP: [0] = row, [1] = column. + WGP_ID 13:10 Physical WGP ID. + SA_ID 16 Shader Array ID + SE_ID 20:18 Shader Engine the wave is assigned to for gfx11 + SE_ID 19:18 Shader Engine the wave is assigned to for gfx10 + DP_RATE 31:29 Number of double-precision float units per SIMD + + HW_ID Register bit structure for GCN and CDNA + WAVE_ID 3:0 Wave buffer slot number. 0-9. + SIMD_ID 5:4 SIMD which the wave is assigned to within the CU. + PIPE_ID 7:6 Pipeline from which the wave was dispatched. + CU_ID 11:8 Compute Unit the wave is assigned to. + SH_ID 12 Shader Array (within an SE) the wave is assigned to. + SE_ID 15:13 Shader Engine the wave is assigned to for gfx908, gfx90a + 14:13 Shader Engine the wave is assigned to for 942 + TG_ID 19:16 Thread-group ID + VM_ID 23:20 Virtual Memory ID + QUEUE_ID 26:24 Queue from which this wave was dispatched. + STATE_ID 29:27 State ID (graphics only, not compute). + ME_ID 31:30 Micro-engine ID. + + XCC_ID Register bit structure for 942/950 + XCC_ID 3:0 XCC the wave is assigned to. + */ + +#if (defined(__GFX10__) || defined(__GFX11__)) +#define HW_ID 23 +#else +#define HW_ID 4 +#endif + +#if (defined(__GFX10__) || defined(__GFX11__)) +#define HW_ID_WGP_ID_SIZE 4 +#define HW_ID_WGP_ID_OFFSET 10 +#if (defined(__AMDGCN_CUMODE__)) +#define HW_ID_CU_ID_SIZE 1 +#define HW_ID_CU_ID_OFFSET 8 +#endif +#else +#define HW_ID_CU_ID_SIZE 4 +#define HW_ID_CU_ID_OFFSET 8 +#endif + +#if (defined(__gfx908__) || defined(__gfx90a__) || defined(__GFX11__)) +#define HW_ID_SE_ID_SIZE 3 +#else // 4 SEs/XCC for 942 +#define HW_ID_SE_ID_SIZE 2 +#endif +#if (defined(__GFX10__) || defined(__GFX11__)) +#define HW_ID_SE_ID_OFFSET 18 +#define HW_ID_SA_ID_OFFSET 16 +#define HW_ID_SA_ID_SIZE 1 +#else +#define HW_ID_SE_ID_OFFSET 13 +#endif + +#if (defined(__gfx942__) || defined(__gfx950__)) +#define __gfx94plus_clr__ +#define XCC_ID 20 +#define XCC_ID_XCC_ID_SIZE 4 +#define XCC_ID_XCC_ID_OFFSET 0 +#endif + +#if !defined(__HIP_NO_IMAGE_SUPPORT) && defined(__gfx94plus_clr__) +#define __HIP_NO_IMAGE_SUPPORT 1 +#endif + +/* + Encoding of parameter bitmask + HW_ID 5:0 HW_ID + OFFSET 10:6 Range: 0..31 + SIZE 15:11 Range: 1..32 + */ + +#define GETREG_IMMED(SZ, OFF, REG) (((SZ) << 11) | ((OFF) << 6) | (REG)) + +/* + __smid returns the wave's assigned Compute Unit and Shader Engine. + The Compute Unit, CU_ID returned in bits 3:0, and Shader Engine, SE_ID in bits 5:4. + Note: the results vary over time. + SZ minus 1 since SIZE is 1-based. +*/ +__device__ inline unsigned __smid(void) { + unsigned se_id = + __builtin_amdgcn_s_getreg(GETREG_IMMED(HW_ID_SE_ID_SIZE - 1, HW_ID_SE_ID_OFFSET, HW_ID)); +#if (defined(__GFX10__) || defined(__GFX11__)) + unsigned wgp_id = + __builtin_amdgcn_s_getreg(GETREG_IMMED(HW_ID_WGP_ID_SIZE - 1, HW_ID_WGP_ID_OFFSET, HW_ID)); + unsigned sa_id = + __builtin_amdgcn_s_getreg(GETREG_IMMED(HW_ID_SA_ID_SIZE - 1, HW_ID_SA_ID_OFFSET, HW_ID)); +#if (defined(__AMDGCN_CUMODE__)) + unsigned cu_id = + __builtin_amdgcn_s_getreg(GETREG_IMMED(HW_ID_CU_ID_SIZE - 1, HW_ID_CU_ID_OFFSET, HW_ID)); +#endif +#else +#if defined(__gfx94plus_clr__) + unsigned xcc_id = + __builtin_amdgcn_s_getreg(GETREG_IMMED(XCC_ID_XCC_ID_SIZE - 1, XCC_ID_XCC_ID_OFFSET, XCC_ID)); +#endif + unsigned cu_id = + __builtin_amdgcn_s_getreg(GETREG_IMMED(HW_ID_CU_ID_SIZE - 1, HW_ID_CU_ID_OFFSET, HW_ID)); +#endif +#if (defined(__GFX10__) || defined(__GFX11__)) + unsigned temp = se_id; + temp = (temp << HW_ID_SA_ID_SIZE) | sa_id; + temp = (temp << HW_ID_WGP_ID_SIZE) | wgp_id; +#if (defined(__AMDGCN_CUMODE__)) + temp = (temp << HW_ID_CU_ID_SIZE) | cu_id; +#endif + return temp; + // TODO : CU Mode impl +#elif defined(__gfx94plus_clr__) + unsigned temp = xcc_id; + temp = (temp << HW_ID_SE_ID_SIZE) | se_id; + temp = (temp << HW_ID_CU_ID_SIZE) | cu_id; + return temp; +#else + return (se_id << HW_ID_CU_ID_SIZE) + cu_id; +#endif +} + +/** + * Map HIP_DYNAMIC_SHARED to "extern __shared__" for compatibility with old HIP applications + * To be removed in a future release. + */ +#define HIP_DYNAMIC_SHARED(type, var) extern __shared__ type var[]; +#define HIP_DYNAMIC_SHARED_ATTRIBUTE + +#endif // defined(__clang__) && defined(__HIP__) + + +// loop unrolling +static inline __device__ void* __hip_hc_memcpy(void* dst, const void* src, size_t size) { + auto dstPtr = static_cast(dst); + auto srcPtr = static_cast(src); + + while (size >= 4u) { + dstPtr[0] = srcPtr[0]; + dstPtr[1] = srcPtr[1]; + dstPtr[2] = srcPtr[2]; + dstPtr[3] = srcPtr[3]; + + size -= 4u; + srcPtr += 4u; + dstPtr += 4u; + } + switch (size) { + case 3: + dstPtr[2] = srcPtr[2]; + case 2: + dstPtr[1] = srcPtr[1]; + case 1: + dstPtr[0] = srcPtr[0]; + } + + return dst; +} + +static inline __device__ void* __hip_hc_memset(void* dst, unsigned char val, size_t size) { + auto dstPtr = static_cast(dst); + + while (size >= 4u) { + dstPtr[0] = val; + dstPtr[1] = val; + dstPtr[2] = val; + dstPtr[3] = val; + + size -= 4u; + dstPtr += 4u; + } + switch (size) { + case 3: + dstPtr[2] = val; + case 2: + dstPtr[1] = val; + case 1: + dstPtr[0] = val; + } + + return dst; +} +#ifndef __OPENMP_AMDGCN__ +static inline __device__ void* memcpy(void* dst, const void* src, size_t size) { + return __hip_hc_memcpy(dst, src, size); +} + +static inline __device__ void* memset(void* ptr, int val, size_t size) { + unsigned char val8 = static_cast(val); + return __hip_hc_memset(ptr, val8, size); +} +#endif // !__OPENMP_AMDGCN__ + +#endif diff --git a/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_atomic.h b/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_atomic.h new file mode 100644 index 0000000000..d9b79faeb9 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_atomic.h @@ -0,0 +1,746 @@ +/* +Copyright (c) 2015 - Present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if !defined(__HIPCC_RTC__) +#include "amd_device_functions.h" +#endif + +#if !defined(__HIP_ATOMIC_BACKWARD_COMPAT) +#define __HIP_ATOMIC_BACKWARD_COMPAT 1 +#endif + +#if defined(__has_extension) && __has_extension(clang_atomic_attributes) && __HIP_ATOMIC_BACKWARD_COMPAT +#define __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY [[clang::atomic(fine_grained_memory, remote_memory)]] +#else +#define __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY +#endif + +template struct Cond_t; + +template struct Cond_t { + using type = T; +}; +template struct Cond_t { + using type = F; +}; + +#if !__HIP_DEVICE_COMPILE__ +// TODO: Remove this after compiler pre-defines the following Macros. +#define __HIP_MEMORY_SCOPE_SINGLETHREAD 1 +#define __HIP_MEMORY_SCOPE_WAVEFRONT 2 +#define __HIP_MEMORY_SCOPE_WORKGROUP 3 +#define __HIP_MEMORY_SCOPE_AGENT 4 +#define __HIP_MEMORY_SCOPE_SYSTEM 5 +#endif + +#if !defined(__HIPCC_RTC__) +#include "amd_hip_unsafe_atomics.h" +#endif + +// Atomic expanders +template +inline __attribute__((always_inline, device)) T hip_cas_expander(T* p, T x, Op op, F f) noexcept { + using FP = __attribute__((address_space(0))) const void*; + + __device__ extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared"); + + if (is_shared_workaround((FP)p)) return f(); + + using U = + typename Cond_t::type; + + auto q = reinterpret_cast(p); + + U tmp0{__hip_atomic_load(q, mem_order, mem_scope)}; + U tmp1; + do { + tmp1 = tmp0; + + op(reinterpret_cast(tmp1), x); + } while (!__hip_atomic_compare_exchange_strong(q, &tmp0, tmp1, mem_order, mem_order, mem_scope)); + + return reinterpret_cast(tmp0); +} + +template +inline __attribute__((always_inline, device)) T hip_cas_extrema_expander(T* p, T x, Cmp cmp, + F f) noexcept { + using FP = __attribute__((address_space(0))) const void*; + + __device__ extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared"); + + if (is_shared_workaround((FP)p)) return f(); + + using U = + typename Cond_t::type; + + auto q = reinterpret_cast(p); + + U tmp{__hip_atomic_load(q, mem_order, mem_scope)}; + while (cmp(x, reinterpret_cast(tmp)) && + !__hip_atomic_compare_exchange_strong(q, &tmp, x, mem_order, mem_order, mem_scope)); + + return reinterpret_cast(tmp); +} + +__device__ inline unsigned short int atomicCAS(unsigned short int* address, + unsigned short int compare, unsigned short int val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + return compare; +} + +__device__ inline unsigned short int atomicCAS_system(unsigned short int* address, + unsigned short int compare, + unsigned short int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + } + return compare; +} + +__device__ inline int atomicCAS(int* address, int compare, int val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + return compare; +} + +__device__ inline int atomicCAS_system(int* address, int compare, int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + } + return compare; +} + +__device__ inline unsigned int atomicCAS(unsigned int* address, unsigned int compare, + unsigned int val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + return compare; +} + +__device__ inline unsigned int atomicCAS_system(unsigned int* address, unsigned int compare, + unsigned int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + } + return compare; +} + +__device__ inline unsigned long atomicCAS(unsigned long* address, unsigned long compare, + unsigned long val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + return compare; +} + +__device__ inline unsigned long atomicCAS_system(unsigned long* address, unsigned long compare, + unsigned long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + } + return compare; +} + +__device__ inline unsigned long long atomicCAS(unsigned long long* address, + unsigned long long compare, unsigned long long val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + return compare; +} + +__device__ inline unsigned long long atomicCAS_system(unsigned long long* address, + unsigned long long compare, + unsigned long long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + } + return compare; +} + +__device__ inline float atomicCAS(float* address, float compare, float val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + return compare; +} + +__device__ inline float atomicCAS_system(float* address, float compare, float val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + } + return compare; +} + +__device__ inline double atomicCAS(double* address, double compare, double val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + return compare; +} + +__device__ inline double atomicCAS_system(double* address, double compare, double val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + } + return compare; +} + +__device__ inline int atomicAdd(int* address, int val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline int atomicAdd_system(int* address, int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned int atomicAdd(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned int atomicAdd_system(unsigned int* address, unsigned int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned long atomicAdd(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned long atomicAdd_system(unsigned long* address, unsigned long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned long long atomicAdd(unsigned long long* address, + unsigned long long val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned long long atomicAdd_system(unsigned long long* address, + unsigned long long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline float atomicAdd(float* address, float val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicAdd(address, val); +#else + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } +#endif +} + +__device__ inline float atomicAdd_system(float* address, float val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +#if !defined(__HIPCC_RTC__) +HIP_DEPRECATED("use atomicAdd instead") +#endif // !defined(__HIPCC_RTC__) +__device__ inline void atomicAddNoRet(float* address, float val) { unsafeAtomicAdd(address, val); } + +__device__ inline double atomicAdd(double* address, double val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicAdd(address, val); +#else + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } +#endif +} + +__device__ inline double atomicAdd_system(double* address, double val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline int atomicSub(int* address, int val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline int atomicSub_system(int* address, int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned int atomicSub(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned int atomicSub_system(unsigned int* address, unsigned int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned long atomicSub(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned long atomicSub_system(unsigned long* address, unsigned long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned long long atomicSub(unsigned long long* address, + unsigned long long val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned long long atomicSub_system(unsigned long long* address, + unsigned long long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline float atomicSub(float* address, float val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicAdd(address, -val); +#else + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } +#endif +} + +__device__ inline float atomicSub_system(float* address, float val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline double atomicSub(double* address, double val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicAdd(address, -val); +#else + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } +#endif +} + +__device__ inline double atomicSub_system(double* address, double val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline int atomicExch(int* address, int val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline int atomicExch_system(int* address, int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned int atomicExch(unsigned int* address, unsigned int val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned int atomicExch_system(unsigned int* address, unsigned int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned long atomicExch(unsigned long* address, unsigned long val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned long atomicExch_system(unsigned long* address, unsigned long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned long long atomicExch(unsigned long long* address, + unsigned long long val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned long long atomicExch_system(unsigned long long* address, + unsigned long long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline float atomicExch(float* address, float val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline float atomicExch_system(float* address, float val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline double atomicExch(double* address, double val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline double atomicExch_system(double* address, double val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline int atomicMin(int* address, int val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline int atomicMin_system(int* address, int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned int atomicMin(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned int atomicMin_system(unsigned int* address, unsigned int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned long atomicMin(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned long atomicMin_system(unsigned long* address, unsigned long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned long long atomicMin(unsigned long long* address, + unsigned long long val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned long long atomicMin_system(unsigned long long* address, + unsigned long long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline long long atomicMin(long long* address, long long val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline long long atomicMin_system(long long* address, long long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline float atomicMin(float* addr, float val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicMin(addr, val); +#else + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_min(addr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } +#endif +} + +__device__ inline float atomicMin_system(float* addr, float val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicMin(addr, val); +#else + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_min(addr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +#endif +} + +__device__ inline double atomicMin(double* addr, double val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicMin(addr, val); +#else + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_min(addr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } +#endif +} + +__device__ inline double atomicMin_system(double* addr, double val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicMin(addr, val); +#else + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_min(addr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +#endif +} + +__device__ inline int atomicMax(int* address, int val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline int atomicMax_system(int* address, int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned int atomicMax(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned int atomicMax_system(unsigned int* address, unsigned int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned long atomicMax(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned long atomicMax_system(unsigned long* address, unsigned long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned long long atomicMax(unsigned long long* address, + unsigned long long val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned long long atomicMax_system(unsigned long long* address, + unsigned long long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} +__device__ inline long long atomicMax(long long* address, long long val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline long long atomicMax_system(long long* address, long long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline float atomicMax(float* addr, float val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicMax(addr, val); +#else + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_max(addr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } +#endif +} + +__device__ inline float atomicMax_system(float* addr, float val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicMax(addr, val); +#else + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_max(addr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +#endif +} + +__device__ inline double atomicMax(double* addr, double val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicMax(addr, val); +#else + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_max(addr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } +#endif +} + +__device__ inline double atomicMax_system(double* addr, double val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicMax(addr, val); +#else + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_max(addr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +#endif +} + +__device__ inline unsigned int atomicInc(unsigned int* address, unsigned int val) { + return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent"); +} + +__device__ inline unsigned int atomicDec(unsigned int* address, unsigned int val) { + return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent"); +} + +__device__ inline int atomicAnd(int* address, int val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline int atomicAnd_system(int* address, int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned int atomicAnd(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned int atomicAnd_system(unsigned int* address, unsigned int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned long atomicAnd(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned long atomicAnd_system(unsigned long* address, unsigned long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned long long atomicAnd(unsigned long long* address, + unsigned long long val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned long long atomicAnd_system(unsigned long long* address, + unsigned long long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline int atomicOr(int* address, int val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline int atomicOr_system(int* address, int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned int atomicOr(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned int atomicOr_system(unsigned int* address, unsigned int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned long atomicOr(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned long atomicOr_system(unsigned long* address, unsigned long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned long long atomicOr(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned long long atomicOr_system(unsigned long long* address, + unsigned long long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline int atomicXor(int* address, int val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline int atomicXor_system(int* address, int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned int atomicXor(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned int atomicXor_system(unsigned int* address, unsigned int val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned long atomicXor(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned long atomicXor_system(unsigned long* address, unsigned long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} + +__device__ inline unsigned long long atomicXor(unsigned long long* address, + unsigned long long val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ inline unsigned long long atomicXor_system(unsigned long long* address, + unsigned long long val) { + __HIP_ATOMIC_BACKWARD_COMPAT_MEMORY { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +} diff --git a/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_common.h b/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_common.h new file mode 100644 index 0000000000..4fb7f6034a --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_common.h @@ -0,0 +1,32 @@ +/* +Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMMON_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMMON_H + +#if defined(__clang__) && defined(__HIP__) +#define __HIP_CLANG_ONLY__ 1 +#else +#define __HIP_CLANG_ONLY__ 0 +#endif + +#endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMMON_H diff --git a/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_gl_interop.h b/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_gl_interop.h new file mode 100644 index 0000000000..72a8f63f61 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_gl_interop.h @@ -0,0 +1,110 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_AMD_HIP_GL_INTEROP_H +#define HIP_INCLUDE_AMD_HIP_GL_INTEROP_H + +#if defined(__cplusplus) +extern "C" { +#endif + +/** + * + * @addtogroup GlobalDefs + * @{ + * + */ + +/** + * HIP Devices used by current OpenGL Context. + */ +typedef enum hipGLDeviceList { + hipGLDeviceListAll = 1, ///< All hip devices used by current OpenGL context. + hipGLDeviceListCurrentFrame = 2, ///< Hip devices used by current OpenGL context in current + ///< frame + hipGLDeviceListNextFrame = 3 ///< Hip devices used by current OpenGL context in next + ///< frame. +} hipGLDeviceList; + + +/** GLuint as uint.*/ +typedef unsigned int GLuint; +/** GLenum as uint.*/ +typedef unsigned int GLenum; +/** + * @} + */ + +/** + * @defgroup GL OpenGL Interoperability + * @ingroup API + * @{ + * This section describes OpenGL interoperability functions of HIP runtime API. + */ + +/** + * @brief Queries devices associated with the current OpenGL context. + * + * @param [out] pHipDeviceCount - Pointer of number of devices on the current GL context. + * @param [out] pHipDevices - Pointer of devices on the current OpenGL context. + * @param [in] hipDeviceCount - Size of device. + * @param [in] deviceList - The setting of devices. It could be either hipGLDeviceListCurrentFrame + * for the devices used to render the current frame, or hipGLDeviceListAll for all devices. + * The default setting is Invalid deviceList value. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + */ +hipError_t hipGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices, + unsigned int hipDeviceCount, hipGLDeviceList deviceList); +/** + * @brief Registers a GL Buffer for interop and returns corresponding graphics resource. + * + * @param [out] resource - Returns pointer of graphics resource. + * @param [in] buffer - Buffer to be registered. + * @param [in] flags - Register flags. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown, #hipErrorInvalidResourceHandle + * + */ +hipError_t hipGraphicsGLRegisterBuffer(hipGraphicsResource** resource, GLuint buffer, + unsigned int flags); +/** + * @brief Register a GL Image for interop and returns the corresponding graphic resource. + * + * @param [out] resource - Returns pointer of graphics resource. + * @param [in] image - Image to be registered. + * @param [in] target - Valid target value Id. + * @param [in] flags - Register flags. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown, #hipErrorInvalidResourceHandle + * + */ +hipError_t hipGraphicsGLRegisterImage(hipGraphicsResource** resource, GLuint image, GLenum target, + unsigned int flags); +/** + * @} + */ +#if defined(__cplusplus) +} +#endif /* __cplusplus */ +#endif /* HIP_INCLUDE_AMD_HIP_GL_INTEROP_H */ diff --git a/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_runtime.h b/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_runtime.h new file mode 100644 index 0000000000..779eb621d8 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_runtime.h @@ -0,0 +1,390 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file amd_detail/hip_runtime.h + * @brief Contains definitions of APIs for HIP runtime. + */ + +// #pragma once +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_RUNTIME_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_RUNTIME_H + +#include + +#if !defined(__HIPCC_RTC__) +#ifdef __cplusplus +#include +#else +#include +#endif // __cplusplus +#endif // !defined(__HIPCC_RTC__) + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Query the installed library build name. + * + * This function can be used even when the library is not initialized. + * + * @returns Returns a string describing the build version of the library. The + * string is owned by the library. + */ +const char* amd_dbgapi_get_build_name(); + +/** + * @brief Query the installed library git hash. + * + * This function can be used even when the library is not initialized. + * + * @returns Returns git hash of the library. + */ +const char* amd_dbgapi_get_git_hash(); + +/** + * @brief Query the installed library build ID. + * + * This function can be used even when the library is not initialized. + * + * @returns Returns build ID of the library. + */ +size_t amd_dbgapi_get_build_id(); + +#ifdef __cplusplus +} /* extern "c" */ +#endif + +//--- +// Top part of file can be compiled with any compiler + +#if !defined(__HIPCC_RTC__) +#ifdef __cplusplus +#include +#include +#include +#else +#include +#include +#endif // __cplusplus +#endif // !defined(__HIPCC_RTC__) + +#if __HIP_CLANG_ONLY__ + +#if !defined(__align__) +#define __align__(x) __attribute__((aligned(x))) +#endif + +#define CUDA_SUCCESS hipSuccess + +#if !defined(__HIPCC_RTC__) +#include +#include +#include +#include +#include +#include +extern int HIP_TRACE_API; +#endif // !defined(__HIPCC_RTC__) + +#ifdef __cplusplus +#include +#endif + +#include + +// TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define. +#if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__) +#define __HCC_ACCELERATOR__ __KALMAR_ACCELERATOR__ +#endif + +// Feature tests: +#if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__ +// Device compile and not host compile: + +// 32-bit Atomics: +#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1) +#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1) +#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1) +#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1) +#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1) + +// 64-bit Atomics: +#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1) +#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (1) + +// Doubles +#define __HIP_ARCH_HAS_DOUBLES__ (1) + +// warp cross-lane operations: +#define __HIP_ARCH_HAS_WARP_VOTE__ (1) +#define __HIP_ARCH_HAS_WARP_BALLOT__ (1) +#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1) +#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0) + +// sync +#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (1) +#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0) + +// misc +#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0) +#define __HIP_ARCH_HAS_3DGRID__ (1) +#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0) + +#endif /* Device feature flags */ + + +#define launch_bounds_impl0(requiredMaxThreadsPerBlock) \ + __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock))) +#define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) \ + __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock), \ + amdgpu_waves_per_eu(minBlocksPerMultiprocessor))) +#define select_impl_(_1, _2, impl_, ...) impl_ +#define __launch_bounds__(...) \ + select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0, )(__VA_ARGS__) + +#if !defined(__HIPCC_RTC__) +__host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; } +#endif // !defined(__HIPCC_RTC__) + +// End doxygen API: +/** + * @} + */ + +// +// hip-clang functions +// +#if !defined(__HIPCC_RTC__) +#define HIP_KERNEL_NAME(...) __VA_ARGS__ +#define HIP_SYMBOL(X) X + +typedef int hipLaunchParm; + +template ::type* = nullptr> +void pArgs(const std::tuple&, void*) {} + +template ::type* = nullptr> +void pArgs(const std::tuple& formals, void** _vargs) { + using T = typename std::tuple_element>::type; + + static_assert(!std::is_reference{}, + "A __global__ function cannot have a reference as one of its " + "arguments."); +#if defined(HIP_STRICT) + static_assert(std::is_trivially_copyable{}, + "Only TriviallyCopyable types can be arguments to a __global__ " + "function"); +#endif + _vargs[n] = const_cast(reinterpret_cast(&std::get(formals))); + return pArgs(formals, _vargs); +} + +template +std::tuple validateArgsCountType(void (*kernel)(Formals...), + std::tuple(actuals)) { + static_assert(sizeof...(Formals) == sizeof...(Actuals), "Argument Count Mismatch"); + std::tuple to_formals{std::move(actuals)}; + return to_formals; +} + +#if defined(HIP_TEMPLATE_KERNEL_LAUNCH) +template +void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, Args... args) { + constexpr size_t count = sizeof...(Args); + auto tup_ = std::tuple{args...}; + auto tup = validateArgsCountType(kernel, tup_); + void* _Args[count]; + pArgs<0>(tup, _Args); + + auto k = reinterpret_cast(kernel); + hipLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream); +} +#else +#define hipLaunchKernelGGLInternal(kernelName, numBlocks, numThreads, memPerBlock, streamId, ...) \ + do { \ + kernelName<<<(numBlocks), (numThreads), (memPerBlock), (streamId)>>>(__VA_ARGS__); \ + } while (0) + +#define hipLaunchKernelGGL(kernelName, ...) hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__) +#endif + +#include +#endif // !defined(__HIPCC_RTC__) + +#if defined(__HIPCC_RTC__) +typedef struct dim3 { + __hip_uint32_t x; ///< x + __hip_uint32_t y; ///< y + __hip_uint32_t z; ///< z +#ifdef __cplusplus + constexpr __device__ dim3(__hip_uint32_t _x = 1, __hip_uint32_t _y = 1, __hip_uint32_t _z = 1) + : x(_x), y(_y), z(_z) {}; +#endif +} dim3; +#endif // !defined(__HIPCC_RTC__) + +#pragma push_macro("__DEVICE__") +#define __DEVICE__ static __device__ __forceinline__ + +extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(unsigned int); +__DEVICE__ unsigned int __hip_get_thread_idx_x() { return __ockl_get_local_id(0); } +__DEVICE__ unsigned int __hip_get_thread_idx_y() { return __ockl_get_local_id(1); } +__DEVICE__ unsigned int __hip_get_thread_idx_z() { return __ockl_get_local_id(2); } + +extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(unsigned int); +__DEVICE__ unsigned int __hip_get_block_idx_x() { return __ockl_get_group_id(0); } +__DEVICE__ unsigned int __hip_get_block_idx_y() { return __ockl_get_group_id(1); } +__DEVICE__ unsigned int __hip_get_block_idx_z() { return __ockl_get_group_id(2); } + +extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(unsigned int); +__DEVICE__ unsigned int __hip_get_block_dim_x() { return __ockl_get_local_size(0); } +__DEVICE__ unsigned int __hip_get_block_dim_y() { return __ockl_get_local_size(1); } +__DEVICE__ unsigned int __hip_get_block_dim_z() { return __ockl_get_local_size(2); } + +extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(unsigned int); +__DEVICE__ unsigned int __hip_get_grid_dim_x() { return __ockl_get_num_groups(0); } +__DEVICE__ unsigned int __hip_get_grid_dim_y() { return __ockl_get_num_groups(1); } +__DEVICE__ unsigned int __hip_get_grid_dim_z() { return __ockl_get_num_groups(2); } + +#define __HIP_DEVICE_BUILTIN(DIMENSION, FUNCTION) \ + __declspec(property(get = __get_##DIMENSION)) unsigned int DIMENSION; \ + __DEVICE__ unsigned int __get_##DIMENSION(void) { return FUNCTION; } + +struct __hip_builtin_threadIdx_t { + __HIP_DEVICE_BUILTIN(x, __hip_get_thread_idx_x()); + __HIP_DEVICE_BUILTIN(y, __hip_get_thread_idx_y()); + __HIP_DEVICE_BUILTIN(z, __hip_get_thread_idx_z()); +#ifdef __cplusplus + __device__ operator dim3() const { return dim3(x, y, z); } +#endif +}; + +struct __hip_builtin_blockIdx_t { + __HIP_DEVICE_BUILTIN(x, __hip_get_block_idx_x()); + __HIP_DEVICE_BUILTIN(y, __hip_get_block_idx_y()); + __HIP_DEVICE_BUILTIN(z, __hip_get_block_idx_z()); +#ifdef __cplusplus + __device__ operator dim3() const { return dim3(x, y, z); } +#endif +}; + +struct __hip_builtin_blockDim_t { + __HIP_DEVICE_BUILTIN(x, __hip_get_block_dim_x()); + __HIP_DEVICE_BUILTIN(y, __hip_get_block_dim_y()); + __HIP_DEVICE_BUILTIN(z, __hip_get_block_dim_z()); +#ifdef __cplusplus + __device__ operator dim3() const { return dim3(x, y, z); } +#endif +}; + +struct __hip_builtin_gridDim_t { + __HIP_DEVICE_BUILTIN(x, __hip_get_grid_dim_x()); + __HIP_DEVICE_BUILTIN(y, __hip_get_grid_dim_y()); + __HIP_DEVICE_BUILTIN(z, __hip_get_grid_dim_z()); +#ifdef __cplusplus + __device__ operator dim3() const { return dim3(x, y, z); } +#endif +}; + +#undef __HIP_DEVICE_BUILTIN +#pragma pop_macro("__DEVICE__") + +extern const __device__ __attribute__((weak)) __hip_builtin_threadIdx_t threadIdx; +extern const __device__ __attribute__((weak)) __hip_builtin_blockIdx_t blockIdx; +extern const __device__ __attribute__((weak)) __hip_builtin_blockDim_t blockDim; +extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim; + +#define hipThreadIdx_x threadIdx.x +#define hipThreadIdx_y threadIdx.y +#define hipThreadIdx_z threadIdx.z + +#define hipBlockIdx_x blockIdx.x +#define hipBlockIdx_y blockIdx.y +#define hipBlockIdx_z blockIdx.z + +#define hipBlockDim_x blockDim.x +#define hipBlockDim_y blockDim.y +#define hipBlockDim_z blockDim.z + +#define hipGridDim_x gridDim.x +#define hipGridDim_y gridDim.y +#define hipGridDim_z gridDim.z + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if __HIP_HCC_COMPAT_MODE__ +// Define HCC work item functions in terms of HIP builtin variables. +#pragma push_macro("__DEFINE_HCC_FUNC") +#define __DEFINE_HCC_FUNC(hc_fun, hip_var) \ + inline __device__ __attribute__((always_inline)) unsigned int hc_get_##hc_fun(unsigned int i) { \ + if (i == 0) \ + return hip_var.x; \ + else if (i == 1) \ + return hip_var.y; \ + else \ + return hip_var.z; \ + } + +__DEFINE_HCC_FUNC(workitem_id, threadIdx) +__DEFINE_HCC_FUNC(group_id, blockIdx) +__DEFINE_HCC_FUNC(group_size, blockDim) +__DEFINE_HCC_FUNC(num_groups, gridDim) +#pragma pop_macro("__DEFINE_HCC_FUNC") + +extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_id(unsigned int); +inline __device__ __attribute__((always_inline)) unsigned int hc_get_workitem_absolute_id(int dim) { + return (unsigned int)__ockl_get_global_id(dim); +} + +#endif + +#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ +#if !defined(__HIPCC_RTC__) +// Support std::complex. +#if !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__ +#pragma push_macro("__CUDA__") +#define __CUDA__ +#include <__clang_cuda_math_forward_declares.h> +#include <__clang_cuda_complex_builtins.h> +// Workaround for using libc++ with HIP-Clang. +// The following headers requires clang include path before standard C++ include path. +// However libc++ include path requires to be before clang include path. +// To workaround this, we pass -isystem with the parent directory of clang include +// path instead of the clang include path itself. +#include +#include +#include +#undef __CUDA__ +#pragma pop_macro("__CUDA__") +#endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__ +#endif // !defined(__HIPCC_RTC__) +#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ +#endif // __HIP_CLANG_ONLY__ + +#endif // HIP_AMD_DETAIL_RUNTIME_H diff --git a/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_runtime_pt_api.h b/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_runtime_pt_api.h new file mode 100644 index 0000000000..e477ad8b67 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_runtime_pt_api.h @@ -0,0 +1,207 @@ +/* +Copyright (c) 2022 - Present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_PT_API_H +#define HIP_INCLUDE_HIP_HIP_RUNTIME_PT_API_H + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + +/// hipStreamPerThread implementation +#if defined(HIP_API_PER_THREAD_DEFAULT_STREAM) +#define __HIP_STREAM_PER_THREAD +#define __HIP_API_SPT(api) api##_spt +#else +#define __HIP_API_SPT(api) api +#endif + +#if defined(__HIP_STREAM_PER_THREAD) +// Memory APIs +#define hipMemcpy __HIP_API_SPT(hipMemcpy) +#define hipMemcpyToSymbol __HIP_API_SPT(hipMemcpyToSymbol) +#define hipMemcpyFromSymbol __HIP_API_SPT(hipMemcpyFromSymbol) +#define hipMemcpy2D __HIP_API_SPT(hipMemcpy2D) +#define hipMemcpy2DFromArray __HIP_API_SPT(hipMemcpy2DFromArray) +#define hipMemcpy3D __HIP_API_SPT(hipMemcpy3D) +#define hipMemset __HIP_API_SPT(hipMemset) +#define hipMemset2D __HIP_API_SPT(hipMemset2D) +#define hipMemset3D __HIP_API_SPT(hipMemset3D) +#define hipMemcpyAsync __HIP_API_SPT(hipMemcpyAsync) +#define hipMemset3DAsync __HIP_API_SPT(hipMemset3DAsync) +#define hipMemset2DAsync __HIP_API_SPT(hipMemset2DAsync) +#define hipMemsetAsync __HIP_API_SPT(hipMemsetAsync) +#define hipMemcpy3DAsync __HIP_API_SPT(hipMemcpy3DAsync) +#define hipMemcpy2DAsync __HIP_API_SPT(hipMemcpy2DAsync) +#define hipMemcpyFromSymbolAsync __HIP_API_SPT(hipMemcpyFromSymbolAsync) +#define hipMemcpyToSymbolAsync __HIP_API_SPT(hipMemcpyToSymbolAsync) +#define hipMemcpyFromArray __HIP_API_SPT(hipMemcpyFromArray) +#define hipMemcpy2DToArray __HIP_API_SPT(hipMemcpy2DToArray) +#define hipMemcpy2DFromArrayAsync __HIP_API_SPT(hipMemcpy2DFromArrayAsync) +#define hipMemcpy2DToArrayAsync __HIP_API_SPT(hipMemcpy2DToArrayAsync) + +// Stream APIs +#define hipStreamSynchronize __HIP_API_SPT(hipStreamSynchronize) +#define hipStreamQuery __HIP_API_SPT(hipStreamQuery) +#define hipStreamGetFlags __HIP_API_SPT(hipStreamGetFlags) +#define hipStreamGetPriority __HIP_API_SPT(hipStreamGetPriority) +#define hipStreamWaitEvent __HIP_API_SPT(hipStreamWaitEvent) +#define hipStreamAddCallback __HIP_API_SPT(hipStreamAddCallback) +#define hipLaunchHostFunc __HIP_API_SPT(hipLaunchHostFunc) + +// Event APIs +#define hipEventRecord __HIP_API_SPT(hipEventRecord) + +// Launch APIs +#define hipLaunchKernel __HIP_API_SPT(hipLaunchKernel) +#define hipLaunchCooperativeKernel __HIP_API_SPT(hipLaunchCooperativeKernel) + +// Graph APIs +#define hipGraphLaunch __HIP_API_SPT(hipGraphLaunch) +#define hipStreamBeginCapture __HIP_API_SPT(hipStreamBeginCapture) +#define hipStreamEndCapture __HIP_API_SPT(hipStreamEndCapture) +#define hipStreamIsCapturing __HIP_API_SPT(hipStreamIsCapturing) +#define hipStreamGetCaptureInfo __HIP_API_SPT(hipStreamGetCaptureInfo) +#define hipStreamGetCaptureInfo_v2 __HIP_API_SPT(hipStreamGetCaptureInfo_v2) + +// Driver Entry Point API +#define hipGetDriverEntryPoint __HIP_API_SPT(hipGetDriverEntryPoint) +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +hipError_t hipMemcpy_spt(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind); + +hipError_t hipMemcpyToSymbol_spt(const void* symbol, const void* src, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)); + +hipError_t hipMemcpyFromSymbol_spt(void* dst, const void* symbol, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)); + +hipError_t hipMemcpy2D_spt(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind); + +hipError_t hipMemcpy2DFromArray_spt(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, + size_t hOffset, size_t width, size_t height, + hipMemcpyKind kind); + +hipError_t hipMemcpy3D_spt(const struct hipMemcpy3DParms* p); + +hipError_t hipMemset_spt(void* dst, int value, size_t sizeBytes); + +hipError_t hipMemsetAsync_spt(void* dst, int value, size_t sizeBytes, + hipStream_t stream __dparm(hipStreamPerThread)); + +hipError_t hipMemset2D_spt(void* dst, size_t pitch, int value, size_t width, size_t height); + +hipError_t hipMemset2DAsync_spt(void* dst, size_t pitch, int value, size_t width, size_t height, + hipStream_t stream __dparm(hipStreamPerThread)); + +hipError_t hipMemset3DAsync_spt(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, + hipStream_t stream __dparm(hipStreamPerThread)); + +hipError_t hipMemset3D_spt(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent); + +hipError_t hipMemcpyAsync_spt(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + hipStream_t stream __dparm(hipStreamPerThread)); + +hipError_t hipMemcpy3DAsync_spt(const hipMemcpy3DParms* p, + hipStream_t stream __dparm(hipStreamPerThread)); + +hipError_t hipMemcpy2DAsync_spt(void* dst, size_t dpitch, const void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind, + hipStream_t stream __dparm(hipStreamPerThread)); + +hipError_t hipMemcpyFromSymbolAsync_spt(void* dst, const void* symbol, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, + hipStream_t stream __dparm(hipStreamPerThread)); + +hipError_t hipMemcpyToSymbolAsync_spt(const void* symbol, const void* src, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, + hipStream_t stream __dparm(hipStreamPerThread)); + +hipError_t hipMemcpyFromArray_spt(void* dst, hipArray_const_t src, size_t wOffsetSrc, + size_t hOffset, size_t count, hipMemcpyKind kind); + +hipError_t hipMemcpy2DToArray_spt(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, + size_t spitch, size_t width, size_t height, hipMemcpyKind kind); + +hipError_t hipMemcpy2DFromArrayAsync_spt(void* dst, size_t dpitch, hipArray_const_t src, + size_t wOffsetSrc, size_t hOffsetSrc, size_t width, + size_t height, hipMemcpyKind kind, + hipStream_t stream __dparm(hipStreamPerThread)); + +hipError_t hipMemcpy2DToArrayAsync_spt(hipArray_t dst, size_t wOffset, size_t hOffset, + const void* src, size_t spitch, size_t width, size_t height, + hipMemcpyKind kind, + hipStream_t stream __dparm(hipStreamPerThread)); + +hipError_t hipStreamQuery_spt(hipStream_t stream); + +hipError_t hipStreamSynchronize_spt(hipStream_t stream); + +hipError_t hipStreamGetPriority_spt(hipStream_t stream, int* priority); + +hipError_t hipStreamWaitEvent_spt(hipStream_t stream, hipEvent_t event, + unsigned int flags __dparm(0)); + +hipError_t hipStreamGetFlags_spt(hipStream_t stream, unsigned int* flags); + +hipError_t hipStreamAddCallback_spt(hipStream_t stream, hipStreamCallback_t callback, + void* userData, unsigned int flags); + +hipError_t hipEventRecord_spt(hipEvent_t event, hipStream_t stream __dparm(hipStreamPerThread)); + +hipError_t hipLaunchCooperativeKernel_spt(const void* f, dim3 gridDim, dim3 blockDim, + void** kernelParams, uint32_t sharedMemBytes, + hipStream_t hStream __dparm(hipStreamPerThread)); + +hipError_t hipLaunchKernel_spt(const void* function_address, dim3 numBlocks, dim3 dimBlocks, + void** args, size_t sharedMemBytes, + hipStream_t stream __dparm(hipStreamPerThread)); + +hipError_t hipGraphLaunch_spt(hipGraphExec_t graphExec, hipStream_t stream); +hipError_t hipStreamBeginCapture_spt(hipStream_t stream, hipStreamCaptureMode mode); +hipError_t hipStreamEndCapture_spt(hipStream_t stream, hipGraph_t* pGraph); +hipError_t hipStreamIsCapturing_spt(hipStream_t stream, hipStreamCaptureStatus* pCaptureStatus); +hipError_t hipStreamGetCaptureInfo_spt(hipStream_t stream, hipStreamCaptureStatus* pCaptureStatus, + unsigned long long* pId); +hipError_t hipStreamGetCaptureInfo_v2_spt(hipStream_t stream, + hipStreamCaptureStatus* captureStatus_out, + unsigned long long* id_out, hipGraph_t* graph_out, + const hipGraphNode_t** dependencies_out, + size_t* numDependencies_out); +hipError_t hipLaunchHostFunc_spt(hipStream_t stream, hipHostFn_t fn, void* userData); +hipError_t hipGetDriverEntryPoint_spt(const char* symbol, void** funcPtr, unsigned long long flags, + hipDriverEntryPointQueryResult* status); + + +#ifdef __cplusplus +} +#endif // extern "C" + +#endif // defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#endif // HIP_INCLUDE_HIP_HIP_RUNTIME_PT_API_H diff --git a/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_unsafe_atomics.h b/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_unsafe_atomics.h new file mode 100644 index 0000000000..0366f278ed --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_unsafe_atomics.h @@ -0,0 +1,595 @@ +/* +Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#ifdef __cplusplus + +#pragma push_macro("__HIP_ATOMICS_IGNORE_DENORMAL_MODE") +#if defined(__has_extension) && __has_extension(clang_atomic_attributes) +#define __HIP_ATOMICS_IGNORE_DENORMAL_MODE [[clang::atomic(ignore_denormal_mode)]] +#else +#define __HIP_ATOMICS_IGNORE_DENORMAL_MODE +#endif + +/** + * @brief Unsafe floating point rmw atomic add. + * + * Performs a relaxed read-modify-write floating point atomic add with + * device memory scope. Original value at \p addr is returned and + * the value of \p addr is updated to have the original value plus \p value + * + * @note This operation currently only performs different operations for + * the gfx90a target. Other devices continue to use safe atomics. + * + * It can be used to generate code that uses fast hardware floating point atomic + * operations which may handle rounding and subnormal values differently than + * non-atomic floating point operations. + * + * The operation is not always safe and can have undefined behavior unless + * following condition are met: + * + * - \p addr is at least 4 bytes aligned + * - If \p addr is a global segment address, it is in a coarse grain allocation. + * Passing in global segment addresses in fine grain allocations will result in + * undefined behavior and is not supported. + * + * @param [in,out] addr Pointer to value to be increment by \p value. + * @param [in] value Value by \p addr is to be incremented. + * @return Original value contained in \p addr. + */ +__device__ inline float unsafeAtomicAdd(float* addr, float value) { +#if defined(__gfx90a__) && __has_builtin(__builtin_amdgcn_is_shared) && \ + __has_builtin(__builtin_amdgcn_is_private) && \ + __has_builtin(__builtin_amdgcn_ds_atomic_fadd_f32) && \ + __has_builtin(__builtin_amdgcn_global_atomic_fadd_f32) + if (__builtin_amdgcn_is_shared((const __attribute__((address_space(0))) void*)addr)) + return __builtin_amdgcn_ds_atomic_fadd_f32(addr, value); + else if (__builtin_amdgcn_is_private((const __attribute__((address_space(0))) void*)addr)) { + float temp = *addr; + *addr = temp + value; + return temp; + } else + return __builtin_amdgcn_global_atomic_fadd_f32(addr, value); +#elif __has_builtin(__hip_atomic_fetch_add) + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } +#else + return __atomic_fetch_add(addr, value, __ATOMIC_RELAXED); +#endif +} + +/** + * @brief Unsafe floating point rmw atomic max. + * + * Performs a relaxed read-modify-write floating point atomic max with + * device memory scope. The original value at \p addr is returned and + * the value at \p addr is replaced by \p val if greater. + * + * @note This operation is currently identical to that performed by + * atomicMax and is included for completeness. + * + * @param [in,out] addr Pointer to value to be updated + * @param [in] val Value used to update the value at \p addr. + * @return Original value contained in \p addr. + */ +__device__ inline float unsafeAtomicMax(float* addr, float val) { +#if __has_builtin(__hip_atomic_load) && __has_builtin(__hip_atomic_compare_exchange_strong) + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value < val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, __ATOMIC_RELAXED, + __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + } +#else + unsigned int* uaddr = (unsigned int*)addr; + unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __uint_as_float(value) < val) { + done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false, __ATOMIC_RELAXED, + __ATOMIC_RELAXED); + } + return __uint_as_float(value); +#endif +} + +/** + * @brief Unsafe floating point rmw atomic min. + * + * Performs a relaxed read-modify-write floating point atomic min with + * device memory scope. The original value at \p addr is returned and + * the value at \p addr is replaced by \p val if lesser. + * + * @note This operation is currently identical to that performed by + * atomicMin and is included for completeness. + * + * @param [in,out] addr Pointer to value to be updated + * @param [in] val Value used to update the value at \p addr. + * @return Original value contained in \p addr. + */ +__device__ inline float unsafeAtomicMin(float* addr, float val) { +#if __has_builtin(__hip_atomic_load) && __has_builtin(__hip_atomic_compare_exchange_strong) + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value > val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, __ATOMIC_RELAXED, + __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + } +#else + unsigned int* uaddr = (unsigned int*)addr; + unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __uint_as_float(value) > val) { + done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false, __ATOMIC_RELAXED, + __ATOMIC_RELAXED); + } + return __uint_as_float(value); +#endif +} + +/** + * @brief Unsafe double precision rmw atomic add. + * + * Performs a relaxed read-modify-write double precision atomic add with + * device memory scope. Original value at \p addr is returned and + * the value of \p addr is updated to have the original value plus \p value + * + * @note This operation currently only performs different operations for + * the gfx90a target. Other devices continue to use safe atomics. + * + * It can be used to generate code that uses fast hardware floating point atomic + * operations which may handle rounding and subnormal values differently than + * non-atomic floating point operations. + * + * The operation is not always safe and can have undefined behavior unless + * following condition are met: + * + * - \p addr is at least 8 byte aligned + * - If \p addr is a global segment address, it is in a coarse grain allocation. + * Passing in global segment addresses in fine grain allocations will result in + * undefined behavior and are not supported. + * + * @param [in,out] addr Pointer to value to be updated. + * @param [in] value Value by \p addr is to be incremented. + * @return Original value contained in \p addr. + */ +__device__ inline double unsafeAtomicAdd(double* addr, double value) { +#if defined(__gfx90a__) && __has_builtin(__builtin_amdgcn_flat_atomic_fadd_f64) + return __builtin_amdgcn_flat_atomic_fadd_f64(addr, value); +#elif __has_builtin(__hip_atomic_fetch_add) + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } +#else + return __atomic_fetch_add(addr, value, __ATOMIC_RELAXED); +#endif +} + +/** + * @brief Unsafe double precision rmw atomic max. + * + * Performs a relaxed read-modify-write double precision atomic max with + * device memory scope. Original value at \p addr is returned and + * the value of \p addr is updated with \p val if greater. + * + * @note This operation currently only performs different operations for + * the gfx90a target. Other devices continue to use safe atomics. + * + * It can be used to generate code that uses fast hardware floating point atomic + * operations which may handle rounding and subnormal values differently than + * non-atomic floating point operations. + * + * The operation is not always safe and can have undefined behavior unless + * following condition are met: + * + * - \p addr is at least 8 byte aligned + * - If \p addr is a global segment address, it is in a coarse grain allocation. + * Passing in global segment addresses in fine grain allocations will result in + * undefined behavior and are not supported. + * + * @param [in,out] addr Pointer to value to be updated. + * @param [in] val Value used to updated the contents at \p addr + * @return Original value contained at \p addr. + */ +__device__ inline double unsafeAtomicMax(double* addr, double val) { +#if (defined(__gfx90a__) || defined(__gfx94plus_clr__)) && \ + __has_builtin(__builtin_amdgcn_flat_atomic_fmax_f64) + return __builtin_amdgcn_flat_atomic_fmax_f64(addr, val); +#else +#if __has_builtin(__hip_atomic_load) && __has_builtin(__hip_atomic_compare_exchange_strong) + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value < val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, __ATOMIC_RELAXED, + __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + } +#else + unsigned long long* uaddr = (unsigned long long*)addr; + unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __longlong_as_double(value) < val) { + done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + return __longlong_as_double(value); +#endif +#endif +} + +/** + * @brief Unsafe double precision rmw atomic min. + * + * Performs a relaxed read-modify-write double precision atomic min with + * device memory scope. Original value at \p addr is returned and + * the value of \p addr is updated with \p val if lesser. + * + * @note This operation currently only performs different operations for + * the gfx90a target. Other devices continue to use safe atomics. + * + * It can be used to generate code that uses fast hardware floating point atomic + * operations which may handle rounding and subnormal values differently than + * non-atomic floating point operations. + * + * The operation is not always safe and can have undefined behavior unless + * following condition are met: + * + * - \p addr is at least 8 byte aligned + * - If \p addr is a global segment address, it is in a coarse grain allocation. + * Passing in global segment addresses in fine grain allocations will result in + * undefined behavior and are not supported. + * + * @param [in,out] addr Pointer to value to be updated. + * @param [in] val Value used to updated the contents at \p addr + * @return Original value contained at \p addr. + */ +__device__ inline double unsafeAtomicMin(double* addr, double val) { +#if (defined(__gfx90a__) || defined(__gfx94plus_clr__)) && \ + __has_builtin(__builtin_amdgcn_flat_atomic_fmin_f64) + return __builtin_amdgcn_flat_atomic_fmin_f64(addr, val); +#else +#if __has_builtin(__hip_atomic_load) && __has_builtin(__hip_atomic_compare_exchange_strong) + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value > val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, __ATOMIC_RELAXED, + __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + } +#else + unsigned long long* uaddr = (unsigned long long*)addr; + unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __longlong_as_double(value) > val) { + done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + return __longlong_as_double(value); +#endif +#endif +} + +/** + * @brief Safe floating point rmw atomic add. + * + * Performs a relaxed read-modify-write floating point atomic add with + * device memory scope. Original value at \p addr is returned and + * the value of \p addr is updated to have the original value plus \p value + * + * @note This operation ensures that, on all targets, we produce safe atomics. + * This will be the case even when -munsafe-fp-atomics is passed into the compiler. + * + * @param [in,out] addr Pointer to value to be increment by \p value. + * @param [in] value Value by \p addr is to be incremented. + * @return Original value contained in \p addr. + */ +__device__ inline float safeAtomicAdd(float* addr, float value) { +#if defined(__gfx908__) || ((defined(__gfx90a__) || defined(__gfx942__) || defined(__gfx950__)) && \ + !__has_builtin(__hip_atomic_fetch_add)) + // On gfx908, we can generate unsafe FP32 atomic add that does not follow all + // IEEE rules when -munsafe-fp-atomics is passed. Do a CAS loop emulation instead. + // On gfx90a, gfx942 and gfx950 if we do not have the __hip_atomic_fetch_add builtin, we + // need to force a CAS loop here. + float old_val; +#if __has_builtin(__hip_atomic_load) + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + old_val = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } +#else // !__has_builtin(__hip_atomic_load) + old_val = + __uint_as_float(__atomic_load_n(reinterpret_cast(addr), __ATOMIC_RELAXED)); +#endif // __has_builtin(__hip_atomic_load) + float expected, temp; + do { + temp = expected = old_val; +#if __has_builtin(__hip_atomic_compare_exchange_strong) + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + __hip_atomic_compare_exchange_strong(addr, &expected, old_val + value, __ATOMIC_RELAXED, + __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } +#else // !__has_builtin(__hip_atomic_compare_exchange_strong) + __atomic_compare_exchange_n(addr, &expected, old_val + value, false, __ATOMIC_RELAXED, + __ATOMIC_RELAXED); +#endif // __has_builtin(__hip_atomic_compare_exchange_strong) + old_val = expected; + } while (__float_as_uint(temp) != __float_as_uint(old_val)); + return old_val; +#elif defined(__gfx90a__) + // On gfx90a, with the __hip_atomic_fetch_add builtin, relaxed system-scope + // atomics will produce safe CAS loops, but are otherwise not different than + // agent-scope atomics. This logic is only applicable for gfx90a, and should + // not be assumed on other architectures. + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +#elif __has_builtin(__hip_atomic_fetch_add) + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } +#else + return __atomic_fetch_add(addr, value, __ATOMIC_RELAXED); +#endif +} + +/** + * @brief Safe floating point rmw atomic max. + * + * Performs a relaxed read-modify-write floating point atomic max with + * device memory scope. The original value at \p addr is returned and + * the value at \p addr is replaced by \p val if greater. + * + * @note This operation ensures that, on all targets, we produce safe atomics. + * This will be the case even when -munsafe-fp-atomics is passed into the compiler. + * + * @param [in,out] addr Pointer to value to be updated + * @param [in] val Value used to update the value at \p addr. + * @return Original value contained in \p addr. + */ +__device__ inline float safeAtomicMax(float* addr, float val) { +#if __has_builtin(__hip_atomic_load) && __has_builtin(__hip_atomic_compare_exchange_strong) + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value < val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, __ATOMIC_RELAXED, + __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + } +#else + unsigned int* uaddr = (unsigned int*)addr; + unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __uint_as_float(value) < val) { + done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false, __ATOMIC_RELAXED, + __ATOMIC_RELAXED); + } + return __uint_as_float(value); +#endif +} + +/** + * @brief Safe floating point rmw atomic min. + * + * Performs a relaxed read-modify-write floating point atomic min with + * device memory scope. The original value at \p addr is returned and + * the value at \p addr is replaced by \p val if lesser. + * + * @note This operation ensures that, on all targets, we produce safe atomics. + * This will be the case even when -munsafe-fp-atomics is passed into the compiler. + * + * @param [in,out] addr Pointer to value to be updated + * @param [in] val Value used to update the value at \p addr. + * @return Original value contained in \p addr. + */ +__device__ inline float safeAtomicMin(float* addr, float val) { +#if __has_builtin(__hip_atomic_load) && __has_builtin(__hip_atomic_compare_exchange_strong) + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value > val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, __ATOMIC_RELAXED, + __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + } +#else + unsigned int* uaddr = (unsigned int*)addr; + unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __uint_as_float(value) > val) { + done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false, __ATOMIC_RELAXED, + __ATOMIC_RELAXED); + } + return __uint_as_float(value); +#endif +} + +/** + * @brief Safe double precision rmw atomic add. + * + * Performs a relaxed read-modify-write double precision atomic add with + * device memory scope. Original value at \p addr is returned and + * the value of \p addr is updated to have the original value plus \p value + * + * @note This operation ensures that, on all targets, we produce safe atomics. + * This will be the case even when -munsafe-fp-atomics is passed into the compiler. + * + * @param [in,out] addr Pointer to value to be increment by \p value. + * @param [in] value Value by \p addr is to be incremented. + * @return Original value contained in \p addr. + */ +__device__ inline double safeAtomicAdd(double* addr, double value) { +#if defined(__gfx90a__) && __has_builtin(__hip_atomic_fetch_add) + // On gfx90a, with the __hip_atomic_fetch_add builtin, relaxed system-scope + // atomics will produce safe CAS loops, but are otherwise not different than + // agent-scope atomics. This logic is only applicable for gfx90a, and should + // not be assumed on other architectures. + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + } +#elif defined(__gfx90a__) + // On gfx90a, if we do not have the __hip_atomic_fetch_add builtin, we need to + // force a CAS loop here. + double old_val; +#if __has_builtin(__hip_atomic_load) + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + old_val = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } +#else // !__has_builtin(__hip_atomic_load) + old_val = __longlong_as_double( + __atomic_load_n(reinterpret_cast(addr), __ATOMIC_RELAXED)); +#endif // __has_builtin(__hip_atomic_load) + double expected, temp; + do { + temp = expected = old_val; +#if __has_builtin(__hip_atomic_compare_exchange_strong) + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + __hip_atomic_compare_exchange_strong(addr, &expected, old_val + value, __ATOMIC_RELAXED, + __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } +#else // !__has_builtin(__hip_atomic_compare_exchange_strong) + __atomic_compare_exchange_n(addr, &expected, old_val + value, false, __ATOMIC_RELAXED, + __ATOMIC_RELAXED); +#endif // __has_builtin(__hip_atomic_compare_exchange_strong) + old_val = expected; + } while (__double_as_longlong(temp) != __double_as_longlong(old_val)); + return old_val; +#else // !defined(__gfx90a__) +#if __has_builtin(__hip_atomic_fetch_add) + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } +#else // !__has_builtin(__hip_atomic_fetch_add) + return __atomic_fetch_add(addr, value, __ATOMIC_RELAXED); +#endif // __has_builtin(__hip_atomic_fetch_add) +#endif +} + +/** + * @brief Safe double precision rmw atomic max. + * + * Performs a relaxed read-modify-write double precision atomic max with + * device memory scope. Original value at \p addr is returned and + * the value of \p addr is updated with \p val if greater. + * + * @note This operation ensures that, on all targets, we produce safe atomics. + * This will be the case even when -munsafe-fp-atomics is passed into the compiler. + * + * @param [in,out] addr Pointer to value to be updated. + * @param [in] val Value used to updated the contents at \p addr + * @return Original value contained at \p addr. + */ +__device__ inline double safeAtomicMax(double* addr, double val) { +#if __has_builtin(__builtin_amdgcn_is_private) + if (__builtin_amdgcn_is_private((const __attribute__((address_space(0))) void*)addr)) { + double old = *addr; + *addr = __builtin_fmax(old, val); + return old; + } else { +#endif +#if __has_builtin(__hip_atomic_load) && __has_builtin(__hip_atomic_compare_exchange_strong) + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value < val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, __ATOMIC_RELAXED, + __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + } +#else + unsigned long long* uaddr = (unsigned long long*)addr; + unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __longlong_as_double(value) < val) { + done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + return __longlong_as_double(value); +#endif +#if __has_builtin(__builtin_amdgcn_is_private) + } +#endif +} + +/** + * @brief Safe double precision rmw atomic min. + * + * Performs a relaxed read-modify-write double precision atomic min with + * device memory scope. Original value at \p addr is returned and + * the value of \p addr is updated with \p val if lesser. + * + * @note This operation ensures that, on all targets, we produce safe atomics. + * This will be the case even when -munsafe-fp-atomics is passed into the compiler. + * + * @param [in,out] addr Pointer to value to be updated. + * @param [in] val Value used to updated the contents at \p addr + * @return Original value contained at \p addr. + */ +__device__ inline double safeAtomicMin(double* addr, double val) { +#if __has_builtin(__builtin_amdgcn_is_private) + if (__builtin_amdgcn_is_private((const __attribute__((address_space(0))) void*)addr)) { + double old = *addr; + *addr = __builtin_fmin(old, val); + return old; + } else { +#endif +#if __has_builtin(__hip_atomic_load) && __has_builtin(__hip_atomic_compare_exchange_strong) + __HIP_ATOMICS_IGNORE_DENORMAL_MODE { + double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value > val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, __ATOMIC_RELAXED, + __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + } +#else + unsigned long long* uaddr = (unsigned long long*)addr; + unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __longlong_as_double(value) > val) { + done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + return __longlong_as_double(value); +#endif +#if __has_builtin(__builtin_amdgcn_is_private) + } +#endif +} + +#pragma pop_macro("__HIP_ATOMICS_IGNORE_DENORMAL_MODE") + +#endif diff --git a/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_vector_types.h b/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_vector_types.h new file mode 100644 index 0000000000..b96bcc1e16 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/amd_hip_vector_types.h @@ -0,0 +1,1606 @@ +/* +Copyright (c) 2015 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file amd_detail/hip_vector_types.h + * @brief Defines the different newt vector types for HIP runtime. + */ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H + +#include "hip/amd_detail/host_defines.h" + +#if defined(__HIPCC_RTC__) +#define __HOST_DEVICE__ __device__ +#else +#define __HOST_DEVICE__ __host__ __device__ +#endif + +#if defined(__has_attribute) +#if __has_attribute(ext_vector_type) +#define __HIP_USE_NATIVE_VECTOR__ 1 +#define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n))) +#else +#define __NATIVE_VECTOR__(n, T) alignas(n * sizeof(T)) T[n] +#endif + +#if defined(__cplusplus) +#if !defined(__HIPCC_RTC__) +#include +#include +#include +#endif // defined(__HIPCC_RTC__) + +template struct HIP_vector_base; +template struct HIP_vector_type; + +namespace hip_impl { +template __attribute__((always_inline)) __HOST_DEVICE__ + typename HIP_vector_base::Native_vec_* + get_native_pointer(HIP_vector_base& base_vec) { + static_assert(sizeof(base_vec) == sizeof(typename HIP_vector_base::Native_vec_)); + static_assert( + (__hip_internal::alignment_of>::value % + __hip_internal::alignment_of::Native_vec_>::value) == 0); + return reinterpret_cast::Native_vec_*>(&base_vec); +}; + +template +__attribute__((always_inline)) __HOST_DEVICE__ const typename HIP_vector_base::Native_vec_* +get_native_pointer(const HIP_vector_base& base_vec) { + static_assert(sizeof(base_vec) == sizeof(typename HIP_vector_base::Native_vec_)); + static_assert( + (__hip_internal::alignment_of>::value % + __hip_internal::alignment_of::Native_vec_>::value) == 0); + return reinterpret_cast::Native_vec_*>(&base_vec); +}; +} // Namespace hip_impl. + +template __attribute__((always_inline)) __HOST_DEVICE__ + typename HIP_vector_base::Native_vec_& + get_native_vector(HIP_vector_base& base_vec) { + return *hip_impl::get_native_pointer(base_vec); +}; + +template +__attribute__((always_inline)) __HOST_DEVICE__ const typename HIP_vector_base::Native_vec_& +get_native_vector(const HIP_vector_base& base_vec) { + return *hip_impl::get_native_pointer(base_vec); +}; + +template struct HIP_vector_base { + using Native_vec_ = __NATIVE_VECTOR__(1, T); + + T x; + + using value_type = T; + + __HOST_DEVICE__ + HIP_vector_base() = default; + __HOST_DEVICE__ + constexpr HIP_vector_base(const HIP_vector_base&) = default; + __HOST_DEVICE__ + explicit constexpr HIP_vector_base(T x_) : x(x_) {} + __HOST_DEVICE__ + constexpr HIP_vector_base(HIP_vector_base&&) = default; + __HOST_DEVICE__ + ~HIP_vector_base() = default; + __HOST_DEVICE__ + HIP_vector_base& operator=(const HIP_vector_base&) = default; +}; + +template struct alignas(2 * sizeof(T)) HIP_vector_base { + using Native_vec_ = __NATIVE_VECTOR__(2, T); + + T x, y; + + using value_type = T; + + __HOST_DEVICE__ + HIP_vector_base() = default; + __HOST_DEVICE__ + constexpr HIP_vector_base(const HIP_vector_base&) = default; + __HOST_DEVICE__ + constexpr HIP_vector_base(T x_, T y_ = T()) : x(x_), y(y_) {} + __HOST_DEVICE__ + constexpr HIP_vector_base(HIP_vector_base&&) = default; + __HOST_DEVICE__ + ~HIP_vector_base() = default; + __HOST_DEVICE__ + HIP_vector_base& operator=(const HIP_vector_base&) = default; +}; + +template struct HIP_vector_base { + struct Native_vec_ { + T d[3]; + + __HOST_DEVICE__ + Native_vec_() = default; + + __HOST_DEVICE__ + explicit constexpr Native_vec_(T x_) noexcept : d{x_, x_, x_} {} + __HOST_DEVICE__ + constexpr Native_vec_(T x_, T y_, T z_) noexcept : d{x_, y_, z_} {} + __HOST_DEVICE__ + constexpr Native_vec_(const Native_vec_&) = default; + __HOST_DEVICE__ + constexpr Native_vec_(Native_vec_&&) = default; + __HOST_DEVICE__ + ~Native_vec_() = default; + + __HOST_DEVICE__ + Native_vec_& operator=(const Native_vec_&) = default; + __HOST_DEVICE__ + Native_vec_& operator=(Native_vec_&&) = default; + + __HOST_DEVICE__ + T& operator[](unsigned int idx) noexcept { return d[idx]; } + __HOST_DEVICE__ + T operator[](unsigned int idx) const noexcept { return d[idx]; } + + __HOST_DEVICE__ + Native_vec_& operator+=(const Native_vec_& x_) noexcept { + for (auto i = 0u; i != 3u; ++i) d[i] += x_.d[i]; + return *this; + } + __HOST_DEVICE__ + Native_vec_& operator-=(const Native_vec_& x_) noexcept { + for (auto i = 0u; i != 3u; ++i) d[i] -= x_.d[i]; + return *this; + } + + __HOST_DEVICE__ + Native_vec_& operator*=(const Native_vec_& x_) noexcept { + for (auto i = 0u; i != 3u; ++i) d[i] *= x_.d[i]; + return *this; + } + __HOST_DEVICE__ + Native_vec_& operator/=(const Native_vec_& x_) noexcept { + for (auto i = 0u; i != 3u; ++i) d[i] /= x_.d[i]; + return *this; + } + + template {}>::type* = nullptr> + __HOST_DEVICE__ Native_vec_ operator-() const noexcept { + auto r{*this}; + for (auto&& x : r.d) x = -x; + return r; + } + + template {}>::type* = nullptr> + __HOST_DEVICE__ Native_vec_ operator~() const noexcept { + auto r{*this}; + for (auto&& x : r.d) x = ~x; + return r; + } + template {}>::type* = nullptr> + __HOST_DEVICE__ Native_vec_& operator%=(const Native_vec_& x_) noexcept { + for (auto i = 0u; i != 3u; ++i) d[i] %= x_.d[i]; + return *this; + } + template {}>::type* = nullptr> + __HOST_DEVICE__ Native_vec_& operator^=(const Native_vec_& x_) noexcept { + for (auto i = 0u; i != 3u; ++i) d[i] ^= x_.d[i]; + return *this; + } + template {}>::type* = nullptr> + __HOST_DEVICE__ Native_vec_& operator|=(const Native_vec_& x_) noexcept { + for (auto i = 0u; i != 3u; ++i) d[i] |= x_.d[i]; + return *this; + } + template {}>::type* = nullptr> + __HOST_DEVICE__ Native_vec_& operator&=(const Native_vec_& x_) noexcept { + for (auto i = 0u; i != 3u; ++i) d[i] &= x_.d[i]; + return *this; + } + template {}>::type* = nullptr> + __HOST_DEVICE__ Native_vec_& operator>>=(const Native_vec_& x_) noexcept { + for (auto i = 0u; i != 3u; ++i) d[i] >>= x_.d[i]; + return *this; + } + template {}>::type* = nullptr> + __HOST_DEVICE__ Native_vec_& operator<<=(const Native_vec_& x_) noexcept { + for (auto i = 0u; i != 3u; ++i) d[i] <<= x_.d[i]; + return *this; + } +#if defined(__INTEL_COMPILER) + typedef struct { + int values[4]; + } _Vec3_cmp; + using Vec3_cmp = _Vec3_cmp; +#else + using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int)))); +#endif // INTEL + __HOST_DEVICE__ + Vec3_cmp operator==(const Native_vec_& x_) const noexcept { + return Vec3_cmp{d[0] == x_.d[0], d[1] == x_.d[1], d[2] == x_.d[2]}; + } + }; + + T x, y, z; + + using value_type = T; + + __HOST_DEVICE__ + HIP_vector_base() = default; + __HOST_DEVICE__ + constexpr HIP_vector_base(const HIP_vector_base&) = default; + __HOST_DEVICE__ + constexpr HIP_vector_base(T x_, T y_ = T(), T z_ = T()) : x(x_), y(y_), z(z_) {}; + __HOST_DEVICE__ + constexpr HIP_vector_base(HIP_vector_base&&) = default; + __HOST_DEVICE__ + ~HIP_vector_base() = default; + + __HOST_DEVICE__ + HIP_vector_base& operator=(const HIP_vector_base&) = default; + __HOST_DEVICE__ + HIP_vector_base& operator=(HIP_vector_base&&) = default; +}; + +template struct alignas(4 * sizeof(T)) HIP_vector_base { + using Native_vec_ = __NATIVE_VECTOR__(4, T); + + T x, y, z, w; + + using value_type = T; + + __HOST_DEVICE__ + HIP_vector_base() = default; + __HOST_DEVICE__ + constexpr HIP_vector_base(const HIP_vector_base&) = default; + __HOST_DEVICE__ + constexpr HIP_vector_base(T x_, T y_ = T(), T z_ = T(), T w_ = T()) + : x(x_), y(y_), z(z_), w(w_) {}; + __HOST_DEVICE__ + constexpr HIP_vector_base(HIP_vector_base&&) = default; + __HOST_DEVICE__ + ~HIP_vector_base() = default; + __HOST_DEVICE__ + HIP_vector_base& operator=(const HIP_vector_base&) = default; +}; + +template +constexpr inline __HOST_DEVICE__ HIP_vector_type make_vector_type_impl( + T val, __hip_internal::index_sequence) noexcept { + // Fills vec with vals, and ignores the indices + return HIP_vector_type{((void)indices, val)...}; +} + +template +constexpr inline __HOST_DEVICE__ HIP_vector_type make_vector_type(T val) { + return make_vector_type_impl( + val, __hip_internal::make_index_sequence_value(__hip_internal::make_index_sequence{})); +} + +template struct HIP_vector_type : public HIP_vector_base { + using typename HIP_vector_base::Native_vec_; + + __HOST_DEVICE__ + HIP_vector_type() = default; + template ::value>::type* = nullptr> + __HOST_DEVICE__ explicit constexpr HIP_vector_type(U x_) noexcept + : HIP_vector_base{static_cast(x_)} {} + template < // TODO: constrain based on type as well. + typename... Us, + typename __hip_internal::enable_if<(rank > 1) && sizeof...(Us) == rank>::type* = nullptr> + __HOST_DEVICE__ constexpr HIP_vector_type(Us... xs) noexcept + : HIP_vector_base{static_cast(xs)...} {} + __HOST_DEVICE__ + constexpr HIP_vector_type(const HIP_vector_type&) = default; + __HOST_DEVICE__ + constexpr HIP_vector_type(HIP_vector_type&&) = default; + __HOST_DEVICE__ + ~HIP_vector_type() = default; + + __HOST_DEVICE__ + HIP_vector_type& operator=(const HIP_vector_type&) = default; + __HOST_DEVICE__ + HIP_vector_type& operator=(HIP_vector_type&&) = default; + + // Operators + __HOST_DEVICE__ + T& operator[](size_t idx) noexcept { return reinterpret_cast(this)[idx]; } + __HOST_DEVICE__ + const T& operator[](size_t idx) const noexcept { return reinterpret_cast(this)[idx]; } + + __HOST_DEVICE__ + HIP_vector_type& operator++() noexcept { + HIP_vector_type unity = make_vector_type(1); + return *this += unity; + } + __HOST_DEVICE__ + HIP_vector_type operator++(int) noexcept { + auto tmp(*this); + ++*this; + return tmp; + } + + __HOST_DEVICE__ + HIP_vector_type& operator--() noexcept { + HIP_vector_type unity = make_vector_type(1); + return *this -= unity; + } + __HOST_DEVICE__ + HIP_vector_type operator--(int) noexcept { + auto tmp(*this); + --*this; + return tmp; + } + + __HOST_DEVICE__ HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept { +#if __HIP_USE_NATIVE_VECTOR__ + get_native_vector(*this) += get_native_vector(x); +#else + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] += get_native_vector(x)[i]; +#endif + return *this; + } + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator+=(U x) noexcept { + return *this += make_vector_type(x); + } + + __HOST_DEVICE__ HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept { +#if __HIP_USE_NATIVE_VECTOR__ + get_native_vector(*this) -= get_native_vector(x); +#else + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] -= get_native_vector(x)[i]; +#endif + return *this; + } + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator-=(U x) noexcept { + return *this -= make_vector_type(x); + } + + __HOST_DEVICE__ HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept { +#if __HIP_USE_NATIVE_VECTOR__ + get_native_vector(*this) *= get_native_vector(x); +#else + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] *= get_native_vector(x)[i]; +#endif + return *this; + } + + friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator*( + HIP_vector_type x, const HIP_vector_type& y) noexcept { + return HIP_vector_type{x} *= y; + } + + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator*=(U x) noexcept { + return *this *= make_vector_type(x); + } + + friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator/( + HIP_vector_type x, const HIP_vector_type& y) noexcept { + return HIP_vector_type{x} /= y; + } + + __HOST_DEVICE__ HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept { +#if __HIP_USE_NATIVE_VECTOR__ + get_native_vector(*this) /= get_native_vector(x); +#else + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] /= get_native_vector(x)[i]; +#endif + return *this; + } + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator/=(U x) noexcept { + return *this /= make_vector_type(x); + } + + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type operator-() const noexcept { + auto tmp(*this); +#if __HIP_USE_NATIVE_VECTOR__ + get_native_vector(tmp) = -get_native_vector(tmp); +#else + for (auto i = 0u; i != rank; ++i) get_native_vector(tmp)[i] = -get_native_vector(tmp)[i]; +#endif + return tmp; + } + + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type operator~() const noexcept { + HIP_vector_type r{*this}; +#if __HIP_USE_NATIVE_VECTOR__ + get_native_vector(r) = ~get_native_vector(r); +#else + for (auto i = 0u; i != rank; ++i) get_native_vector(r)[i] = ~get_native_vector(r)[i]; +#endif + return r; + } + + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept { +#if __HIP_USE_NATIVE_VECTOR__ + get_native_vector(*this) %= get_native_vector(x); +#else + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] %= get_native_vector(x)[i]; +#endif + return *this; + } + + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept { +#if __HIP_USE_NATIVE_VECTOR__ + get_native_vector(*this) ^= get_native_vector(x); +#else + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] ^= get_native_vector(x)[i]; +#endif + return *this; + } + + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept { +#if __HIP_USE_NATIVE_VECTOR__ + get_native_vector(*this) |= get_native_vector(x); +#else + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] |= get_native_vector(x)[i]; +#endif + return *this; + } + + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept { +#if __HIP_USE_NATIVE_VECTOR__ + get_native_vector(*this) &= get_native_vector(x); +#else + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] &= get_native_vector(x)[i]; +#endif + return *this; + } + + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept { +#if __HIP_USE_NATIVE_VECTOR__ + get_native_vector(*this) >>= get_native_vector(x); +#else + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] >>= get_native_vector(x)[i]; +#endif + return *this; + } + + template {}>::type* = nullptr> + __HOST_DEVICE__ HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept { +#if __HIP_USE_NATIVE_VECTOR__ + get_native_vector(*this) <<= get_native_vector(x); +#else + for (auto i = 0u; i != rank; ++i) get_native_vector(*this)[i] <<= get_native_vector(x)[i]; +#endif + return *this; + } +}; + +template +__HOST_DEVICE__ inline constexpr HIP_vector_type operator+( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept { + return HIP_vector_type{x} += y; +} +template +__HOST_DEVICE__ inline constexpr HIP_vector_type operator+(const HIP_vector_type& x, + U y) noexcept { + return HIP_vector_type{x} += make_vector_type(y); +} +template +__HOST_DEVICE__ inline constexpr HIP_vector_type operator+( + U x, const HIP_vector_type& y) noexcept { + return make_vector_type(x) += y; +} + +template +__HOST_DEVICE__ inline constexpr HIP_vector_type operator-( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept { + return HIP_vector_type{x} -= y; +} +template +__HOST_DEVICE__ inline constexpr HIP_vector_type operator-(const HIP_vector_type& x, + U y) noexcept { + return HIP_vector_type{x} -= make_vector_type(y); +} +template +__HOST_DEVICE__ inline constexpr HIP_vector_type operator-( + U x, const HIP_vector_type& y) noexcept { + return make_vector_type(x) -= y; +} + +template +__HOST_DEVICE__ inline constexpr HIP_vector_type operator*(const HIP_vector_type& x, + U y) noexcept { + return HIP_vector_type{x} *= make_vector_type(y); +} +template +__HOST_DEVICE__ inline constexpr HIP_vector_type operator*( + U x, const HIP_vector_type& y) noexcept { + return make_vector_type(x) *= y; +} + +template +__HOST_DEVICE__ inline constexpr HIP_vector_type operator/(const HIP_vector_type& x, + U y) noexcept { + return HIP_vector_type{x} /= make_vector_type(y); +} +template +__HOST_DEVICE__ inline constexpr HIP_vector_type operator/( + U x, const HIP_vector_type& y) noexcept { + return make_vector_type(x) /= y; +} + +template __HOST_DEVICE__ inline +#if __cplusplus >= 201402L && !defined(__HIPCC_RTC__) + constexpr +#endif + bool + operator==(const HIP_vector_type& x, const HIP_vector_type& y) noexcept { + bool isTrue = true; + const auto& native_x = get_native_vector(x); + const auto& native_y = get_native_vector(y); + for (unsigned int i = 0; i < n; ++i) { + isTrue = (isTrue && (native_x[i] == native_y[i])); + } + return isTrue; +} + +template +__HOST_DEVICE__ inline constexpr bool operator==(const HIP_vector_type& x, U y) noexcept { + return x == make_vector_type(y); +} +template +__HOST_DEVICE__ inline constexpr bool operator==(U x, const HIP_vector_type& y) noexcept { + return make_vector_type(x) == y; +} + +template +__HOST_DEVICE__ inline constexpr bool operator!=(const HIP_vector_type& x, + const HIP_vector_type& y) noexcept { + return !(x == y); +} +template +__HOST_DEVICE__ inline constexpr bool operator!=(const HIP_vector_type& x, U y) noexcept { + return !(x == y); +} +template +__HOST_DEVICE__ inline constexpr bool operator!=(U x, const HIP_vector_type& y) noexcept { + return !(x == y); +} + +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator%( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept { + return HIP_vector_type{x} %= y; +} +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator%(const HIP_vector_type& x, + U y) noexcept { + return HIP_vector_type{x} %= make_vector_type(y); +} +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator%( + U x, const HIP_vector_type& y) noexcept { + return make_vector_type(x) %= y; +} + +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator^( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept { + return HIP_vector_type{x} ^= y; +} +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator^(const HIP_vector_type& x, + U y) noexcept { + return HIP_vector_type{x} ^= make_vector_type(y); +} +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator^( + U x, const HIP_vector_type& y) noexcept { + return make_vector_type(x) ^= y; +} + +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator|( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept { + return HIP_vector_type{x} |= y; +} +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator|(const HIP_vector_type& x, + U y) noexcept { + return HIP_vector_type{x} |= make_vector_type(y); +} +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator|( + U x, const HIP_vector_type& y) noexcept { + return make_vector_type(x) |= y; +} + +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator&( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept { + return HIP_vector_type{x} &= y; +} +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator&(const HIP_vector_type& x, + U y) noexcept { + return HIP_vector_type{x} &= make_vector_type(y); +} +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator&( + U x, const HIP_vector_type& y) noexcept { + return make_vector_type(x) &= y; +} + +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator>>( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept { + return HIP_vector_type{x} >>= y; +} +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator>>(const HIP_vector_type& x, + U y) noexcept { + return HIP_vector_type{x} >>= make_vector_type(y); +} +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator>>( + U x, const HIP_vector_type& y) noexcept { + return make_vector_type(x) >>= y; +} + +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator<<( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept { + return HIP_vector_type{x} <<= y; +} +template {}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator<<(const HIP_vector_type& x, + U y) noexcept { + return HIP_vector_type{x} <<= make_vector_type(y); +} +template ::value>::type, + typename __hip_internal::enable_if<__hip_internal::is_integral{}>* = nullptr> +__HOST_DEVICE__ inline constexpr HIP_vector_type operator<<( + U x, const HIP_vector_type& y) noexcept { + return make_vector_type(x) <<= y; +} + +/* + * Map HIP_vector_type to HIP_vector_type + */ +template +__forceinline__ __HOST_DEVICE__ + typename __hip_internal::enable_if<(rankT == 1 && rankU >= 1), + const HIP_vector_type>::type + __hipMapVector(const HIP_vector_type& u) { + return HIP_vector_type(static_cast(u.x)); +}; + +template +__forceinline__ __HOST_DEVICE__ + typename __hip_internal::enable_if<(rankT == 2 && rankU == 1), + const HIP_vector_type>::type + __hipMapVector(const HIP_vector_type& u) { + return HIP_vector_type(static_cast(u.x), static_cast(0)); +}; + +template +__forceinline__ __HOST_DEVICE__ + typename __hip_internal::enable_if<(rankT == 2 && rankU >= 2), + const HIP_vector_type>::type + __hipMapVector(const HIP_vector_type& u) { + return HIP_vector_type(static_cast(u.x), static_cast(u.y)); +}; + +template +__forceinline__ __HOST_DEVICE__ + typename __hip_internal::enable_if<(rankT == 4 && rankU == 1), + const HIP_vector_type>::type + __hipMapVector(const HIP_vector_type& u) { + return HIP_vector_type(static_cast(u.x), static_cast(0), static_cast(0), + static_cast(0)); +}; + +template +__forceinline__ __HOST_DEVICE__ + typename __hip_internal::enable_if<(rankT == 4 && rankU == 2), + const HIP_vector_type>::type + __hipMapVector(const HIP_vector_type& u) { + return HIP_vector_type(static_cast(u.x), static_cast(u.y), static_cast(0), + static_cast(0)); +}; + +template +__forceinline__ __HOST_DEVICE__ + typename __hip_internal::enable_if<(rankT == 4 && rankU == 4), + const HIP_vector_type>::type + __hipMapVector(const HIP_vector_type& u) { + return HIP_vector_type(static_cast(u.x), static_cast(u.y), static_cast(u.z), + static_cast(u.w)); +}; + +#define __MAKE_VECTOR_TYPE__(CUDA_name, T) \ + using CUDA_name##1 = HIP_vector_type; \ + using CUDA_name##2 = HIP_vector_type; \ + using CUDA_name##3 = HIP_vector_type; \ + using CUDA_name##4 = HIP_vector_type; +#else +#define __MAKE_VECTOR_TYPE__(CUDA_name, T) \ + typedef struct { \ + T x; \ + } CUDA_name##1; \ + typedef struct { \ + T x; \ + T y; \ + } CUDA_name##2; \ + typedef struct { \ + T x; \ + T y; \ + T z; \ + } CUDA_name##3; \ + typedef struct { \ + T x; \ + T y; \ + T z; \ + T w; \ + } CUDA_name##4; +#endif + +__MAKE_VECTOR_TYPE__(uchar, unsigned char); +__MAKE_VECTOR_TYPE__(char, char); +__MAKE_VECTOR_TYPE__(ushort, unsigned short); +__MAKE_VECTOR_TYPE__(short, short); +__MAKE_VECTOR_TYPE__(uint, unsigned int); +__MAKE_VECTOR_TYPE__(int, int); +__MAKE_VECTOR_TYPE__(ulong, unsigned long); +__MAKE_VECTOR_TYPE__(long, long); +__MAKE_VECTOR_TYPE__(ulonglong, unsigned long long); +__MAKE_VECTOR_TYPE__(longlong, long long); +__MAKE_VECTOR_TYPE__(float, float); +__MAKE_VECTOR_TYPE__(double, double); + +#else // !defined(__has_attribute) + +#if defined(_MSC_VER) +#include +#include +#include +#include + +/* +this is for compatibility with CUDA as CUDA allows accessing vector components +in C++ program with MSVC +structs are wrapped with templates so that mangled names match templated implementation +*/ + +template struct HIP_vector_type; + +// One template per vector size +template struct HIP_vector_type { + union { + struct { + T x; + }; + T data; + }; +}; +template struct HIP_vector_type { + union { + struct { + T x; + T y; + }; + T data[2]; + }; +}; +template struct HIP_vector_type { + union { + struct { + T x; + T y; + T z; + }; + T data[3]; + }; +}; +template struct HIP_vector_type { + union { + struct { + T x; + T y; + T z; + T w; + }; + T data[4]; + }; +}; +// 8- and 16-length vectors do not have CUDA-style accessible components +template struct HIP_vector_type { + union { + T data[8]; + }; +}; +template struct HIP_vector_type { + union { + T data[16]; + }; +}; + +// Explicit specialization for vectors using MSVC-specific definitions +template <> struct HIP_vector_type { + union { + __m64 data; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data; + }; +}; + +template <> struct HIP_vector_type { + union { + __m64 data; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data; + }; +}; + +template <> struct HIP_vector_type { + union { + struct { + short x; + short y; + short z; + short w; + }; + __m64 data; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data[2]; + }; +}; + +template <> struct HIP_vector_type { + union { + struct { + unsigned short x; + unsigned short y; + unsigned short z; + unsigned short w; + }; + __m64 data; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data[2]; + }; +}; + +template <> struct HIP_vector_type { + union { + struct { + int x; + int y; + }; + __m64 data; + }; +}; +template <> struct HIP_vector_type { + union { + struct { + int x; + int y; + int z; + int w; + }; + __m128i data; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data[2]; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data[4]; + }; +}; + +template <> struct HIP_vector_type { + union { + struct { + unsigned int x; + unsigned int y; + }; + __m64 data; + }; +}; +template <> struct HIP_vector_type { + union { + struct { + unsigned int x; + unsigned int y; + unsigned int z; + unsigned int w; + }; + __m128i data; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data[2]; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data[4]; + }; +}; + +// MSVC uses 32-bit longs and 64-bit long longs, explicitly defining for clarity +template <> struct HIP_vector_type { + union { + struct { + std::int32_t x; + }; + std::int32_t data; + }; +}; +template <> struct HIP_vector_type { + union { + struct { + std::int32_t x; + std::int32_t y; + }; + __m64 data; + }; +}; +template <> struct HIP_vector_type { + union { + struct { + std::int32_t x; + std::int32_t y; + std::int32_t z; + }; + std::int32_t data[3]; + }; +}; +template <> struct HIP_vector_type { + union { + struct { + std::int32_t x; + std::int32_t y; + std::int32_t z; + std::int32_t w; + }; + __m128i data; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data[2]; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data[4]; + }; +}; + +template <> struct HIP_vector_type { + union { + struct { + std::uint32_t x; + }; + std::uint32_t data; + }; +}; +template <> struct HIP_vector_type { + union { + struct { + std::uint32_t x; + std::uint32_t y; + }; + __m64 data; + }; +}; +template <> struct HIP_vector_type { + union { + struct { + std::uint32_t x; + std::uint32_t y; + std::uint32_t z; + }; + std::uint32_t data[3]; + }; +}; +template <> struct HIP_vector_type { + union { + struct { + std::uint32_t x; + std::uint32_t y; + std::uint32_t z; + std::uint32_t w; + }; + __m128i data; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data[2]; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data[4]; + }; +}; + +template <> struct HIP_vector_type { + union { + struct { + std::int64_t x; + }; + __m64 data; + }; +}; +template <> struct HIP_vector_type { + union { + struct { + std::int64_t x; + std::int64_t y; + }; + __m128i data; + }; +}; +template <> struct HIP_vector_type { + union { + struct { + std::int64_t x; + std::int64_t y; + std::int64_t z; + }; + __m64 data[3]; + }; +}; +template <> struct HIP_vector_type { + union { + struct { + std::int64_t x; + std::int64_t y; + std::int64_t z; + std::int64_t w; + }; + __m128i data[2]; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data[4]; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data[8]; + }; +}; + +template <> struct HIP_vector_type { + union { + struct { + std::uint64_t x; + }; + __m64 data; + }; +}; +template <> struct HIP_vector_type { + union { + struct { + std::uint64_t x; + std::uint64_t y; + }; + __m128i data; + }; +}; +template <> struct HIP_vector_type { + union { + struct { + std::uint64_t x; + std::uint64_t y; + std::uint64_t z; + }; + __m64 data[3]; + }; +}; +template <> struct HIP_vector_type { + union { + struct { + std::uint64_t x; + std::uint64_t y; + std::uint64_t z; + std::uint64_t w; + }; + __m128i data[2]; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data[4]; + }; +}; +template <> struct HIP_vector_type { + union { + __m128i data[8]; + }; +}; + +template <> struct HIP_vector_type { + union { + struct { + float x; + float y; + }; + __m64 data; + }; +}; +template <> struct HIP_vector_type { + union { + struct { + float x; + float y; + float z; + float w; + }; + __m128 data; + }; +}; +template <> struct HIP_vector_type { + union { + __m256 data; + }; +}; +template <> struct HIP_vector_type { + union { + __m256 data[2]; + }; +}; + +template <> struct HIP_vector_type { + union { + struct { + double x; + double y; + }; + __m128d data; + }; +}; +template <> struct HIP_vector_type { + union { + struct { + double x; + double y; + double z; + double w; + }; + __m256d data; + }; +}; +template <> struct HIP_vector_type { + union { + __m256d data[2]; + }; +}; +template <> struct HIP_vector_type { + union { + __m256d data[4]; + }; +}; + +// Type aliasing +using char1 = HIP_vector_type; +using char2 = HIP_vector_type; +using char3 = HIP_vector_type; +using char4 = HIP_vector_type; +using char8 = HIP_vector_type; +using char16 = HIP_vector_type; +using uchar1 = HIP_vector_type; +using uchar2 = HIP_vector_type; +using uchar3 = HIP_vector_type; +using uchar4 = HIP_vector_type; +using uchar8 = HIP_vector_type; +using uchar16 = HIP_vector_type; +using short1 = HIP_vector_type; +using short2 = HIP_vector_type; +using short3 = HIP_vector_type; +using short4 = HIP_vector_type; +using short8 = HIP_vector_type; +using short16 = HIP_vector_type; +using ushort1 = HIP_vector_type; +using ushort2 = HIP_vector_type; +using ushort3 = HIP_vector_type; +using ushort4 = HIP_vector_type; +using ushort8 = HIP_vector_type; +using ushort16 = HIP_vector_type; +using int1 = HIP_vector_type; +using int2 = HIP_vector_type; +using int3 = HIP_vector_type; +using int4 = HIP_vector_type; +using int8 = HIP_vector_type; +using int16 = HIP_vector_type; +using uint1 = HIP_vector_type; +using uint2 = HIP_vector_type; +using uint3 = HIP_vector_type; +using uint4 = HIP_vector_type; +using uint8 = HIP_vector_type; +using uint16 = HIP_vector_type; +using long1 = HIP_vector_type; +using long2 = HIP_vector_type; +using long3 = HIP_vector_type; +using long4 = HIP_vector_type; +using long8 = HIP_vector_type; +using long16 = HIP_vector_type; +using ulong1 = HIP_vector_type; +using ulong2 = HIP_vector_type; +using ulong3 = HIP_vector_type; +using ulong4 = HIP_vector_type; +using ulong8 = HIP_vector_type; +using ulong16 = HIP_vector_type; +using longlong1 = HIP_vector_type; +using longlong2 = HIP_vector_type; +using longlong3 = HIP_vector_type; +using longlong4 = HIP_vector_type; +using longlong8 = HIP_vector_type; +using longlong16 = HIP_vector_type; +using ulonglong1 = HIP_vector_type; +using ulonglong2 = HIP_vector_type; +using ulonglong3 = HIP_vector_type; +using ulonglong4 = HIP_vector_type; +using ulonglong8 = HIP_vector_type; +using ulonglong16 = HIP_vector_type; +using float1 = HIP_vector_type; +using float2 = HIP_vector_type; +using float3 = HIP_vector_type; +using float4 = HIP_vector_type; +using float8 = HIP_vector_type; +using float16 = HIP_vector_type; +using double1 = HIP_vector_type; +using double2 = HIP_vector_type; +using double3 = HIP_vector_type; +using double4 = HIP_vector_type; +using double8 = HIP_vector_type; +using double16 = HIP_vector_type; + +#else // !defined(_MSC_VER) + +/* +this is for compatibility with CUDA as CUDA allows accessing vector components +in C++ program with MSVC +structs are wrapped with templates so that mangled names match templated implementation +*/ + +template struct HIP_vector_type; + +// One template per vector size +template struct HIP_vector_type { + union { + struct { + T x; + }; + T data; + }; +}; +template struct HIP_vector_type { + union { + struct { + T x; + T y; + }; + T data[2]; + }; +}; +template struct HIP_vector_type { + union { + struct { + T x; + T y; + T z; + }; + T data[3]; + }; +}; +template struct HIP_vector_type { + union { + struct { + T x; + T y; + T z; + T w; + }; + T data[4]; + }; +}; +// 8- and 16-length vectors do not have CUDA-style accessible components +template struct HIP_vector_type { + union { + T data[8]; + }; +}; +template struct HIP_vector_type { + union { + T data[16]; + }; +}; + +// Type aliasing +using char1 = HIP_vector_type; +using char2 = HIP_vector_type; +using char3 = HIP_vector_type; +using char4 = HIP_vector_type; +using char8 = HIP_vector_type; +using char16 = HIP_vector_type; +using uchar1 = HIP_vector_type; +using uchar2 = HIP_vector_type; +using uchar3 = HIP_vector_type; +using uchar4 = HIP_vector_type; +using uchar8 = HIP_vector_type; +using uchar16 = HIP_vector_type; +using short1 = HIP_vector_type; +using short2 = HIP_vector_type; +using short3 = HIP_vector_type; +using short4 = HIP_vector_type; +using short8 = HIP_vector_type; +using short16 = HIP_vector_type; +using ushort1 = HIP_vector_type; +using ushort2 = HIP_vector_type; +using ushort3 = HIP_vector_type; +using ushort4 = HIP_vector_type; +using ushort8 = HIP_vector_type; +using ushort16 = HIP_vector_type; +using int1 = HIP_vector_type; +using int2 = HIP_vector_type; +using int3 = HIP_vector_type; +using int4 = HIP_vector_type; +using int8 = HIP_vector_type; +using int16 = HIP_vector_type; +using uint1 = HIP_vector_type; +using uint2 = HIP_vector_type; +using uint3 = HIP_vector_type; +using uint4 = HIP_vector_type; +using uint8 = HIP_vector_type; +using uint16 = HIP_vector_type; +using long1 = HIP_vector_type; +using long2 = HIP_vector_type; +using long3 = HIP_vector_type; +using long4 = HIP_vector_type; +using long8 = HIP_vector_type; +using long16 = HIP_vector_type; +using ulong1 = HIP_vector_type; +using ulong2 = HIP_vector_type; +using ulong3 = HIP_vector_type; +using ulong4 = HIP_vector_type; +using ulong8 = HIP_vector_type; +using ulong16 = HIP_vector_type; +using longlong1 = HIP_vector_type; +using longlong2 = HIP_vector_type; +using longlong3 = HIP_vector_type; +using longlong4 = HIP_vector_type; +using longlong8 = HIP_vector_type; +using longlong16 = HIP_vector_type; +using ulonglong1 = HIP_vector_type; +using ulonglong2 = HIP_vector_type; +using ulonglong3 = HIP_vector_type; +using ulonglong4 = HIP_vector_type; +using ulonglong8 = HIP_vector_type; +using ulonglong16 = HIP_vector_type; +using float1 = HIP_vector_type; +using float2 = HIP_vector_type; +using float3 = HIP_vector_type; +using float4 = HIP_vector_type; +using float8 = HIP_vector_type; +using float16 = HIP_vector_type; +using double1 = HIP_vector_type; +using double2 = HIP_vector_type; +using double3 = HIP_vector_type; +using double4 = HIP_vector_type; +using double8 = HIP_vector_type; +using double16 = HIP_vector_type; + +#endif // defined(_MSC_VER) +#endif // defined(__has_attribute) + +#ifdef __cplusplus +#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \ + static inline __HOST_DEVICE__ type make_##type(comp x) { \ + type r{x}; \ + return r; \ + } + +#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \ + static inline __HOST_DEVICE__ type make_##type(comp x, comp y) { \ + type r{x, y}; \ + return r; \ + } + +#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \ + static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z) { \ + type r{x, y, z}; \ + return r; \ + } + +#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \ + static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z, comp w) { \ + type r{x, y, z, w}; \ + return r; \ + } +#else +#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \ + static inline __HOST_DEVICE__ type make_##type(comp x) { \ + type r; \ + r.x = x; \ + return r; \ + } + +#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \ + static inline __HOST_DEVICE__ type make_##type(comp x, comp y) { \ + type r; \ + r.x = x; \ + r.y = y; \ + return r; \ + } + +#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \ + static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z) { \ + type r; \ + r.x = x; \ + r.y = y; \ + r.z = z; \ + return r; \ + } + +#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \ + static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z, comp w) { \ + type r; \ + r.x = x; \ + r.y = y; \ + r.z = z; \ + r.w = w; \ + return r; \ + } +#endif + +DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1); +DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2); +DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3); +DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4); + +DECLOP_MAKE_ONE_COMPONENT(signed char, char1); +DECLOP_MAKE_TWO_COMPONENT(signed char, char2); +DECLOP_MAKE_THREE_COMPONENT(signed char, char3); +DECLOP_MAKE_FOUR_COMPONENT(signed char, char4); + +DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1); +DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2); +DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3); +DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4); + +DECLOP_MAKE_ONE_COMPONENT(signed short, short1); +DECLOP_MAKE_TWO_COMPONENT(signed short, short2); +DECLOP_MAKE_THREE_COMPONENT(signed short, short3); +DECLOP_MAKE_FOUR_COMPONENT(signed short, short4); + +DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1); +DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2); +DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3); +DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4); + +DECLOP_MAKE_ONE_COMPONENT(signed int, int1); +DECLOP_MAKE_TWO_COMPONENT(signed int, int2); +DECLOP_MAKE_THREE_COMPONENT(signed int, int3); +DECLOP_MAKE_FOUR_COMPONENT(signed int, int4); + +DECLOP_MAKE_ONE_COMPONENT(float, float1); +DECLOP_MAKE_TWO_COMPONENT(float, float2); +DECLOP_MAKE_THREE_COMPONENT(float, float3); +DECLOP_MAKE_FOUR_COMPONENT(float, float4); + +DECLOP_MAKE_ONE_COMPONENT(double, double1); +DECLOP_MAKE_TWO_COMPONENT(double, double2); +DECLOP_MAKE_THREE_COMPONENT(double, double3); +DECLOP_MAKE_FOUR_COMPONENT(double, double4); + +DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1); +DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2); +DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3); +DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4); + +DECLOP_MAKE_ONE_COMPONENT(signed long, long1); +DECLOP_MAKE_TWO_COMPONENT(signed long, long2); +DECLOP_MAKE_THREE_COMPONENT(signed long, long3); +DECLOP_MAKE_FOUR_COMPONENT(signed long, long4); + +DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1); +DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2); +DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3); +DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4); + +DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1); +DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2); +DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3); +DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4); + +#endif diff --git a/3rdparty/hip-headers/include/hip/amd_detail/amd_math_functions.h b/3rdparty/hip-headers/include/hip/amd_detail/amd_math_functions.h new file mode 100644 index 0000000000..11e811d3b7 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/amd_math_functions.h @@ -0,0 +1,85 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if !defined(__HIPCC_RTC__) +#include "hip_fp16_math_fwd.h" +#include "amd_hip_vector_types.h" +#include "math_fwd.h" + +#include + +#include +// assert.h is only for the host version of assert. +// The device version of assert is implemented in hip/amd_detail/hip_runtime.h. +// Users should include hip_runtime.h for the device version of assert. +#if !__HIP_DEVICE_COMPILE__ +#include +#endif +#include +#include +#include +#endif // !defined(__HIPCC_RTC__) + +#pragma push_macro("__DEVICE__") +#pragma push_macro("__RETURN_TYPE") + +#define __DEVICE__ static __device__ +#define __RETURN_TYPE bool + +// DOT FUNCTIONS +#if defined(__clang__) && defined(__HIP__) +__DEVICE__ +inline int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) { + return __ockl_sdot2(get_native_vector(a), get_native_vector(b), c, saturate); +} +__DEVICE__ +inline uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) { + return __ockl_udot2(get_native_vector(a), get_native_vector(b), c, saturate); +} +__DEVICE__ +inline int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) { + return __ockl_sdot4(get_native_vector(a), get_native_vector(b), c, saturate); +} +__DEVICE__ +inline uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) { + return __ockl_udot4(get_native_vector(a), get_native_vector(b), c, saturate); +} +__DEVICE__ +inline int amd_mixed_dot(int a, int b, int c, bool saturate) { + return __ockl_sdot8(a, b, c, saturate); +} +__DEVICE__ +inline uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) { + return __ockl_udot8(a, b, c, saturate); +} +#endif + +#pragma pop_macro("__DEVICE__") +#pragma pop_macro("__RETURN_TYPE") +// For backward compatibility. +// There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros +// defined after including math_functions.h. +#if !defined(__HIPCC_RTC__) +#include +#endif diff --git a/3rdparty/hip-headers/include/hip/amd_detail/amd_surface_functions.h b/3rdparty/hip-headers/include/hip/amd_detail/amd_surface_functions.h new file mode 100644 index 0000000000..ac17b1bf79 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/amd_surface_functions.h @@ -0,0 +1,394 @@ +/* +Copyright (c) 2018 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_SURFACE_FUNCTIONS_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_SURFACE_FUNCTIONS_H + +#if defined(__cplusplus) + +#if !defined(__HIPCC_RTC__) +#include +#include +#include +#include +#endif + +#if defined(__HIPCC_RTC__) +#define __HOST_DEVICE__ __device__ +#else +#define __HOST_DEVICE__ __host__ __device__ +#endif + +#define __HIP_SURFACE_OBJECT_PARAMETERS_INIT \ + unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)surfObj; + +/** + * @defgroup SurfaceAPI Surface API + * @{ + */ + +// CUDA is using byte address, need map to pixel address for HIP +static __HOST_DEVICE__ __forceinline__ int __hipGetPixelAddr(int x, int format, int order) { + /* + * use below format index to generate format LUT + typedef enum { + HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0, + HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = 5, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = 6, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010 = 7, + HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8, + HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9, + HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13, + HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14, + HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT = 15 + } hsa_ext_image_channel_type_t; + */ + static const int FormatLUT[] = {0, 1, 0, 1, 3, 1, 1, 1, 0, 1, 2, 0, 1, 2, 1, 2}; + x = FormatLUT[format] == 3 ? x / FormatLUT[format] : x >> FormatLUT[format]; + + /* + * use below order index to generate order LUT + typedef enum { + HSA_EXT_IMAGE_CHANNEL_ORDER_A = 0, + HSA_EXT_IMAGE_CHANNEL_ORDER_R = 1, + HSA_EXT_IMAGE_CHANNEL_ORDER_RX = 2, + HSA_EXT_IMAGE_CHANNEL_ORDER_RG = 3, + HSA_EXT_IMAGE_CHANNEL_ORDER_RGX = 4, + HSA_EXT_IMAGE_CHANNEL_ORDER_RA = 5, + HSA_EXT_IMAGE_CHANNEL_ORDER_RGB = 6, + HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX = 7, + HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA = 8, + HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA = 9, + HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB = 10, + HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR = 11, + HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB = 12, + HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX = 13, + HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA = 14, + HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA = 15, + HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY = 16, + HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE = 17, + HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH = 18, + HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19 + } hsa_ext_image_channel_order_t; + */ + static const int OrderLUT[] = {0, 0, 1, 1, 3, 1, 3, 2, 2, 2, 2, 2, 3, 2, 2, 2, 0, 0, 0, 0}; + return x = OrderLUT[order] == 3 ? x / OrderLUT[order] : x >> OrderLUT[order]; +} + +/** \brief Reads the value at coordinate x from the one-dimensional surface. + * + * \tparam T The data type of the surface. + * \param data [out] The T type result is stored in this pointer. + * \param surfObj [in] The surface descriptor. + * \param x [in] The coordinate where the value will be read out. + * \param boundaryMode [in] The boundary mode is currently ignored. + */ +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf1Dread(T* data, hipSurfaceObject_t surfObj, int x, + int boundaryMode = hipBoundaryModeZero) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT; + (void)boundaryMode; + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i)); + auto tmp = __ockl_image_load_1D(i, x); + *data = __hipMapFrom(tmp); +} + +/** \brief Writes the value data to the one-dimensional surface at coordinate x. + * + * \tparam T The data type of the surface. + * \param data [in] The T type value is written to surface. + * \param surfObj [in] The surface descriptor. + * \param x [in] The coordinate where the data will be written. + */ +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf1Dwrite(T data, hipSurfaceObject_t surfObj, int x) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i)); + auto tmp = __hipMapTo(data); + __ockl_image_store_1D(i, x, tmp); +} + + +/** \brief Reads the value from the two-dimensional surface at coordinate x, y. + * + * \tparam T The data type of the surface. + * \param data [out] The T type result is stored in this pointer. + * \param surfObj [in] The surface descriptor. + * \param x [in] The x coordinate where the value will be read out. + * \param y [in] The y coordinate where the value will be read out. + */ +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf2Dread(T* data, hipSurfaceObject_t surfObj, int x, + int y) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + int2 coords{x, y}; + auto tmp = __ockl_image_load_2D(i, get_native_vector(coords)); + *data = __hipMapFrom(tmp); +} + +/** \brief Writes the value data to the two-dimensional surface at coordinate + * x, y. + * + * \tparam T The data type of the surface. + * \param data [in] The T type value is written to surface. + * \param surfObj [in] The surface descriptor. + * \param x [in] The x coordinate where the data will be written. + * \param y [in] The y coordinate where the data will be written. + */ +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf2Dwrite(T data, hipSurfaceObject_t surfObj, int x, + int y) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + int2 coords{x, y}; + auto tmp = __hipMapTo(data); + __ockl_image_store_2D(i, get_native_vector(coords), tmp); +} + +/** \brief Reads the value from the three-dimensional surface at coordinate + * x, y, z. + * + * \tparam T The data type of the surface. + * \param data [out] The T type result is stored in this pointer. + * \param surfObj [in] The surface descriptor. + * \param x [in] The x coordinate where the value will be read out. + * \param y [in] The y coordinate where the value will be read out. + * \param z [in] The z coordinate where the value will be read out. + */ +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf3Dread(T* data, hipSurfaceObject_t surfObj, int x, int y, + int z) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i)); + int4 coords{x, y, z, 0}; + auto tmp = __ockl_image_load_3D(i, get_native_vector(coords)); + *data = __hipMapFrom(tmp); +} + +/** \brief Writes the value data to the three-dimensional surface at coordinate + * x, y, z. + * + * \tparam T The data type of the surface. + * \param data [in] The T type value is written to surface. + * \param surfObj [in] The surface descriptor. + * \param x [in] The x coordinate where the data will be written. + * \param y [in] The y coordinate where the data will be written. + * \param z [in] The z coordinate where the data will be written. + */ +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf3Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y, + int z) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i)); + int4 coords{x, y, z, 0}; + auto tmp = __hipMapTo(data); + __ockl_image_store_3D(i, get_native_vector(coords), tmp); +} + +/** \brief Reads the value from the one-dimensional layered surface at + * coordinate x and layer index. + * + * \tparam T The data type of the surface. + * \param data [out] The T type result is stored in this pointer. + * \param surfObj [in] The surface descriptor. + * \param x [in] The coordinate where the value will be read out. + * \param layer [in] The layer index where the value will be read out. + */ +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf1DLayeredread(T* data, hipSurfaceObject_t surfObj, int x, + int layer) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i)); + auto tmp = __ockl_image_load_lod_1D(i, x, layer); + *data = __hipMapFrom(tmp); +} + +/** \brief Writes the value data to the one-dimensional layered surface at + * coordinate x and layer index. + * + * \tparam T The data type of the surface. + * \param data [in] The T type value is written to surface. + * \param surfObj [in] The surface descriptor. + * \param x [in] The x coordinate where the data will be written. + * \param layer [in] The layer index where the data will be written. + */ +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf1DLayeredwrite(T data, hipSurfaceObject_t surfObj, int x, + int layer) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i)); + auto tmp = __hipMapTo(data); + __ockl_image_store_lod_1D(i, x, layer, tmp); +} + +/** \brief Reads the value from the two-dimensional layered surface at + * coordinate x, y and layer index. + * + * \tparam T The data type of the surface. + * \param data [out] The T type result is stored in this pointer. + * \param surfObj [in] The surface descriptor. + * \param x [in] The x coordinate where the value will be read out. + * \param y [in] The y coordinate where the value will be read out. + * \param layer [in] The layer index where the value will be read out. + */ +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf2DLayeredread(T* data, hipSurfaceObject_t surfObj, int x, + int y, int layer) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + int2 coords{x, y}; + auto tmp = __ockl_image_load_lod_2D(i, get_native_vector(coords), layer); + *data = __hipMapFrom(tmp); +} + +/** \brief Writes the value data to the two-dimensional layered surface at + * coordinate x, y and layer index. + * + * \tparam T The data type of the surface. + * \param data [in] The T type value is written to surface. + * \param surfObj [in] The surface descriptor. + * \param x [in] The x coordinate where the data will be written. + * \param y [in] The y coordinate where the data will be written. + * \param layer [in] The layer index where the data will be written. + */ +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf2DLayeredwrite(T data, hipSurfaceObject_t surfObj, int x, + int y, int layer) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + int2 coords{x, y}; + auto tmp = __hipMapTo(data); + __ockl_image_store_lod_2D(i, get_native_vector(coords), layer, tmp); +} + +/** \brief Reads the value from the cubemap surface at coordinate x, y and + * face index. + * + * \tparam T The data type of the surface. + * \param data [out] The T type result is stored in this pointer. + * \param surfObj [in] The surface descriptor. + * \param x [in] The x coordinate where the value will be read out. + * \param y [in] The y coordinate where the value will be read out. + * \param face [in] The face index where the value will be read out. + */ +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surfCubemapread(T* data, hipSurfaceObject_t surfObj, int x, + int y, int face) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + int2 coords{x, y}; + auto tmp = __ockl_image_load_CM(i, get_native_vector(coords), face); + *data = __hipMapFrom(tmp); +} + +/** \brief Writes the value data to the cubemap surface at coordinate x, y and + * face index. + * + * \tparam T The data type of the surface. + * \param data [in] The T type value is written to surface. + * \param surfObj [in] The surface descriptor. + * \param x [in] The x coordinate where the data will be written. + * \param y [in] The y coordinate where the data will be written. + * \param face [in] The face index where the data will be written. + */ +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surfCubemapwrite(T data, hipSurfaceObject_t surfObj, int x, + int y, int face) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + int2 coords{x, y}; + auto tmp = __hipMapTo(data); + __ockl_image_store_CM(i, get_native_vector(coords), face, tmp); +} + +/** \brief Reads the value from the layered cubemap surface at coordinate x, y + * and face, layer index. + * + * \tparam T The data type of the surface. + * \param data [out] The T type result is stored in this pointer. + * \param surfObj [in] The surface descriptor. + * \param x [in] The x coordinate where the value will be read out. + * \param y [in] The y coordinate where the value will be read out. + * \param face [in] The face index where the value will be read out. + * \param layer [in] The layer index where the data will be written. + */ +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surfCubemapLayeredread(T* data, hipSurfaceObject_t surfObj, + int x, int y, int face, int layer) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + int2 coords{x, y}; + auto tmp = __ockl_image_load_lod_CM(i, get_native_vector(coords), face, layer); + *data = __hipMapFrom(tmp); +} + +/** \brief Writes the value data to the layered cubemap surface at coordinate + * x, y and face, layer index. + * + * \tparam T The data type of the surface. + * \param data [in] The T type value to write to the surface. + * \param surfObj [in] The surface descriptor. + * \param x [in] The x coordinate where the data will be written. + * \param y [in] The y coordinate where the data will be written. + * \param face [in] The face index where the data will be written. + * \param layer [in] The layer index where the data will be written. + */ +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surfCubemapLayeredwrite(T* data, hipSurfaceObject_t surfObj, + int x, int y, int face, int layer) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + int2 coords{x, y}; + auto tmp = __hipMapTo(data); + __ockl_image_store_lod_CM(i, get_native_vector(coords), face, layer, tmp); +} + +// Doxygen end group SurfaceAPI +/** + * @} + */ + +#endif + +#endif diff --git a/3rdparty/hip-headers/include/hip/amd_detail/amd_warp_functions.h b/3rdparty/hip-headers/include/hip/amd_detail/amd_warp_functions.h new file mode 100644 index 0000000000..efd13acdbe --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/amd_warp_functions.h @@ -0,0 +1,603 @@ +/* +Copyright (c) 2022 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_WARP_FUNCTIONS_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_WARP_FUNCTIONS_H + +#if !defined(__HIPCC_RTC__) +#include "device_library_decls.h" // ockl warp functions +#endif // !defined(__HIPCC_RTC__) + +#if defined(__has_attribute) && __has_attribute(maybe_undef) +#define MAYBE_UNDEF __attribute__((maybe_undef)) +#else +#define MAYBE_UNDEF +#endif + +__device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) { + union { + int i; + unsigned u; + float f; + } tmp; + tmp.u = src; + tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i); + return tmp.u; +} + +__device__ static inline float __hip_ds_bpermutef(int index, float src) { + union { + int i; + unsigned u; + float f; + } tmp; + tmp.f = src; + tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i); + return tmp.f; +} + +__device__ static inline unsigned __hip_ds_permute(int index, unsigned src) { + union { + int i; + unsigned u; + float f; + } tmp; + tmp.u = src; + tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i); + return tmp.u; +} + +__device__ static inline float __hip_ds_permutef(int index, float src) { + union { + int i; + unsigned u; + float f; + } tmp; + tmp.f = src; + tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i); + return tmp.f; +} + +#define __hip_ds_swizzle(src, pattern) __hip_ds_swizzle_N<(pattern)>((src)) +#define __hip_ds_swizzlef(src, pattern) __hip_ds_swizzlef_N<(pattern)>((src)) + +template __device__ static inline unsigned __hip_ds_swizzle_N(unsigned int src) { + union { + int i; + unsigned u; + float f; + } tmp; + tmp.u = src; + tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern); + return tmp.u; +} + +template __device__ static inline float __hip_ds_swizzlef_N(float src) { + union { + int i; + unsigned u; + float f; + } tmp; + tmp.f = src; + tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern); + return tmp.f; +} + +#define __hip_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl) \ + __hip_move_dpp_N<(dpp_ctrl), (row_mask), (bank_mask), (bound_ctrl)>((src)) + +template +__device__ static inline int __hip_move_dpp_N(int src) { + return __builtin_amdgcn_mov_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl); +} + +inline __device__ const struct final { + __device__ __attribute__((always_inline, const)) operator int() const noexcept { + return __builtin_amdgcn_wavefrontsize(); + } +} warpSize{}; + +// warp vote function __all __any __ballot +__device__ inline int __all(int predicate) { return __ockl_wfall_i32(predicate); } + +__device__ inline int __any(int predicate) { return __ockl_wfany_i32(predicate); } + +__device__ inline unsigned long long int __ballot(int predicate) { + return __builtin_amdgcn_ballot_w64(predicate); +} + +__device__ inline unsigned long long int __ballot64(int predicate) { return __ballot(predicate); } + +// See amd_warp_sync_functions.h for an explanation of this preprocessor flag. +#if !defined(HIP_DISABLE_WARP_SYNC_BUILTINS) +// Since threads in a wave do not make independent progress, __activemask() +// always returns the exact active mask, i.e, all active threads in the wave. +__device__ inline unsigned long long __activemask() { return __ballot(true); } +#endif // HIP_DISABLE_WARP_SYNC_BUILTINS + +__device__ static inline unsigned int __lane_id() { + if (static_cast(warpSize) == 32) return __builtin_amdgcn_mbcnt_lo(-1, 0); + return __builtin_amdgcn_mbcnt_hi(-1, __builtin_amdgcn_mbcnt_lo(-1, 0)); +} + +__device__ inline int __shfl(MAYBE_UNDEF int var, int src_lane, int width = warpSize) { + int self = __lane_id(); + int index = (src_lane & (width - 1)) + (self & ~(width - 1)); + return __builtin_amdgcn_ds_bpermute(index << 2, var); +} +__device__ inline unsigned int __shfl(MAYBE_UNDEF unsigned int var, int src_lane, + int width = warpSize) { + union { + int i; + unsigned u; + float f; + } tmp; + tmp.u = var; + tmp.i = __shfl(tmp.i, src_lane, width); + return tmp.u; +} +__device__ inline float __shfl(MAYBE_UNDEF float var, int src_lane, int width = warpSize) { + union { + int i; + unsigned u; + float f; + } tmp; + tmp.f = var; + tmp.i = __shfl(tmp.i, src_lane, width); + return tmp.f; +} +__device__ inline double __shfl(MAYBE_UNDEF double var, int src_lane, int width = warpSize) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + static_assert(sizeof(double) == sizeof(__hip_uint64_t), ""); + + int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl(tmp[0], src_lane, width); + tmp[1] = __shfl(tmp[1], src_lane, width); + + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + double tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} +__device__ inline long __shfl(MAYBE_UNDEF long var, int src_lane, int width = warpSize) { +#ifndef _MSC_VER + static_assert(sizeof(long) == 2 * sizeof(int), ""); + static_assert(sizeof(long) == sizeof(__hip_uint64_t), ""); + + int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl(tmp[0], src_lane, width); + tmp[1] = __shfl(tmp[1], src_lane, width); + + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + long tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +#else + static_assert(sizeof(long) == sizeof(int), ""); + return static_cast(__shfl(static_cast(var), src_lane, width)); +#endif +} +__device__ inline unsigned long __shfl(MAYBE_UNDEF unsigned long var, int src_lane, + int width = warpSize) { +#ifndef _MSC_VER + static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), ""); + static_assert(sizeof(unsigned long) == sizeof(__hip_uint64_t), ""); + + unsigned int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl(tmp[0], src_lane, width); + tmp[1] = __shfl(tmp[1], src_lane, width); + + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + unsigned long tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +#else + static_assert(sizeof(unsigned long) == sizeof(unsigned int), ""); + return static_cast(__shfl(static_cast(var), src_lane, width)); +#endif +} +__device__ inline long long __shfl(MAYBE_UNDEF long long var, int src_lane, int width = warpSize) { + static_assert(sizeof(long long) == 2 * sizeof(int), ""); + static_assert(sizeof(long long) == sizeof(__hip_uint64_t), ""); + + int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl(tmp[0], src_lane, width); + tmp[1] = __shfl(tmp[1], src_lane, width); + + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + long long tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} +__device__ inline unsigned long long __shfl(MAYBE_UNDEF unsigned long long var, int src_lane, + int width = warpSize) { + static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), ""); + static_assert(sizeof(unsigned long long) == sizeof(__hip_uint64_t), ""); + + unsigned int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl(tmp[0], src_lane, width); + tmp[1] = __shfl(tmp[1], src_lane, width); + + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + unsigned long long tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} + +__device__ inline int __shfl_up(MAYBE_UNDEF int var, unsigned int lane_delta, + int width = warpSize) { + int self = __lane_id(); + int index = self - lane_delta; + index = (index < (self & ~(width - 1))) ? self : index; + return __builtin_amdgcn_ds_bpermute(index << 2, var); +} +__device__ inline unsigned int __shfl_up(MAYBE_UNDEF unsigned int var, unsigned int lane_delta, + int width = warpSize) { + union { + int i; + unsigned u; + float f; + } tmp; + tmp.u = var; + tmp.i = __shfl_up(tmp.i, lane_delta, width); + return tmp.u; +} +__device__ inline float __shfl_up(MAYBE_UNDEF float var, unsigned int lane_delta, + int width = warpSize) { + union { + int i; + unsigned u; + float f; + } tmp; + tmp.f = var; + tmp.i = __shfl_up(tmp.i, lane_delta, width); + return tmp.f; +} +__device__ inline double __shfl_up(MAYBE_UNDEF double var, unsigned int lane_delta, + int width = warpSize) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + static_assert(sizeof(double) == sizeof(__hip_uint64_t), ""); + + int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_up(tmp[0], lane_delta, width); + tmp[1] = __shfl_up(tmp[1], lane_delta, width); + + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + double tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} +__device__ inline long __shfl_up(MAYBE_UNDEF long var, unsigned int lane_delta, + int width = warpSize) { +#ifndef _MSC_VER + static_assert(sizeof(long) == 2 * sizeof(int), ""); + static_assert(sizeof(long) == sizeof(__hip_uint64_t), ""); + + int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_up(tmp[0], lane_delta, width); + tmp[1] = __shfl_up(tmp[1], lane_delta, width); + + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + long tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +#else + static_assert(sizeof(long) == sizeof(int), ""); + return static_cast(__shfl_up(static_cast(var), lane_delta, width)); +#endif +} + +__device__ inline unsigned long __shfl_up(MAYBE_UNDEF unsigned long var, unsigned int lane_delta, + int width = warpSize) { +#ifndef _MSC_VER + static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), ""); + static_assert(sizeof(unsigned long) == sizeof(__hip_uint64_t), ""); + + unsigned int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_up(tmp[0], lane_delta, width); + tmp[1] = __shfl_up(tmp[1], lane_delta, width); + + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + unsigned long tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +#else + static_assert(sizeof(unsigned long) == sizeof(unsigned int), ""); + return static_cast(__shfl_up(static_cast(var), lane_delta, width)); +#endif +} + +__device__ inline long long __shfl_up(MAYBE_UNDEF long long var, unsigned int lane_delta, + int width = warpSize) { + static_assert(sizeof(long long) == 2 * sizeof(int), ""); + static_assert(sizeof(long long) == sizeof(__hip_uint64_t), ""); + int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_up(tmp[0], lane_delta, width); + tmp[1] = __shfl_up(tmp[1], lane_delta, width); + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + long long tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} + +__device__ inline unsigned long long __shfl_up(MAYBE_UNDEF unsigned long long var, + unsigned int lane_delta, int width = warpSize) { + static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), ""); + static_assert(sizeof(unsigned long long) == sizeof(__hip_uint64_t), ""); + unsigned int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_up(tmp[0], lane_delta, width); + tmp[1] = __shfl_up(tmp[1], lane_delta, width); + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + unsigned long long tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} + +__device__ inline int __shfl_down(MAYBE_UNDEF int var, unsigned int lane_delta, + int width = warpSize) { + int self = __lane_id(); + int index = self + lane_delta; + index = (int)((self & (width - 1)) + lane_delta) >= width ? self : index; + return __builtin_amdgcn_ds_bpermute(index << 2, var); +} +__device__ inline unsigned int __shfl_down(MAYBE_UNDEF unsigned int var, unsigned int lane_delta, + int width = warpSize) { + union { + int i; + unsigned u; + float f; + } tmp; + tmp.u = var; + tmp.i = __shfl_down(tmp.i, lane_delta, width); + return tmp.u; +} +__device__ inline float __shfl_down(MAYBE_UNDEF float var, unsigned int lane_delta, + int width = warpSize) { + union { + int i; + unsigned u; + float f; + } tmp; + tmp.f = var; + tmp.i = __shfl_down(tmp.i, lane_delta, width); + return tmp.f; +} +__device__ inline double __shfl_down(MAYBE_UNDEF double var, unsigned int lane_delta, + int width = warpSize) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + static_assert(sizeof(double) == sizeof(__hip_uint64_t), ""); + + int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_down(tmp[0], lane_delta, width); + tmp[1] = __shfl_down(tmp[1], lane_delta, width); + + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + double tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} +__device__ inline long __shfl_down(MAYBE_UNDEF long var, unsigned int lane_delta, + int width = warpSize) { +#ifndef _MSC_VER + static_assert(sizeof(long) == 2 * sizeof(int), ""); + static_assert(sizeof(long) == sizeof(__hip_uint64_t), ""); + + int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_down(tmp[0], lane_delta, width); + tmp[1] = __shfl_down(tmp[1], lane_delta, width); + + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + long tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +#else + static_assert(sizeof(long) == sizeof(int), ""); + return static_cast(__shfl_down(static_cast(var), lane_delta, width)); +#endif +} +__device__ inline unsigned long __shfl_down(MAYBE_UNDEF unsigned long var, unsigned int lane_delta, + int width = warpSize) { +#ifndef _MSC_VER + static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), ""); + static_assert(sizeof(unsigned long) == sizeof(__hip_uint64_t), ""); + + unsigned int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_down(tmp[0], lane_delta, width); + tmp[1] = __shfl_down(tmp[1], lane_delta, width); + + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + unsigned long tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +#else + static_assert(sizeof(unsigned long) == sizeof(unsigned int), ""); + return static_cast(__shfl_down(static_cast(var), lane_delta, width)); +#endif +} +__device__ inline long long __shfl_down(MAYBE_UNDEF long long var, unsigned int lane_delta, + int width = warpSize) { + static_assert(sizeof(long long) == 2 * sizeof(int), ""); + static_assert(sizeof(long long) == sizeof(__hip_uint64_t), ""); + int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_down(tmp[0], lane_delta, width); + tmp[1] = __shfl_down(tmp[1], lane_delta, width); + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + long long tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} +__device__ inline unsigned long long __shfl_down(MAYBE_UNDEF unsigned long long var, + unsigned int lane_delta, int width = warpSize) { + static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), ""); + static_assert(sizeof(unsigned long long) == sizeof(__hip_uint64_t), ""); + unsigned int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_down(tmp[0], lane_delta, width); + tmp[1] = __shfl_down(tmp[1], lane_delta, width); + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + unsigned long long tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} + +__device__ inline int __shfl_xor(MAYBE_UNDEF int var, int lane_mask, int width = warpSize) { + int self = __lane_id(); + int index = self ^ lane_mask; + index = index >= ((self + width) & ~(width - 1)) ? self : index; + return __builtin_amdgcn_ds_bpermute(index << 2, var); +} +__device__ inline unsigned int __shfl_xor(MAYBE_UNDEF unsigned int var, int lane_mask, + int width = warpSize) { + union { + int i; + unsigned u; + float f; + } tmp; + tmp.u = var; + tmp.i = __shfl_xor(tmp.i, lane_mask, width); + return tmp.u; +} +__device__ inline float __shfl_xor(MAYBE_UNDEF float var, int lane_mask, int width = warpSize) { + union { + int i; + unsigned u; + float f; + } tmp; + tmp.f = var; + tmp.i = __shfl_xor(tmp.i, lane_mask, width); + return tmp.f; +} +__device__ inline double __shfl_xor(MAYBE_UNDEF double var, int lane_mask, int width = warpSize) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + static_assert(sizeof(double) == sizeof(__hip_uint64_t), ""); + + int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_xor(tmp[0], lane_mask, width); + tmp[1] = __shfl_xor(tmp[1], lane_mask, width); + + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + double tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} +__device__ inline long __shfl_xor(MAYBE_UNDEF long var, int lane_mask, int width = warpSize) { +#ifndef _MSC_VER + static_assert(sizeof(long) == 2 * sizeof(int), ""); + static_assert(sizeof(long) == sizeof(__hip_uint64_t), ""); + + int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_xor(tmp[0], lane_mask, width); + tmp[1] = __shfl_xor(tmp[1], lane_mask, width); + + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + long tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +#else + static_assert(sizeof(long) == sizeof(int), ""); + return static_cast(__shfl_xor(static_cast(var), lane_mask, width)); +#endif +} +__device__ inline unsigned long __shfl_xor(MAYBE_UNDEF unsigned long var, int lane_mask, + int width = warpSize) { +#ifndef _MSC_VER + static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), ""); + static_assert(sizeof(unsigned long) == sizeof(__hip_uint64_t), ""); + + unsigned int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_xor(tmp[0], lane_mask, width); + tmp[1] = __shfl_xor(tmp[1], lane_mask, width); + + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + unsigned long tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +#else + static_assert(sizeof(unsigned long) == sizeof(unsigned int), ""); + return static_cast(__shfl_xor(static_cast(var), lane_mask, width)); +#endif +} +__device__ inline long long __shfl_xor(MAYBE_UNDEF long long var, int lane_mask, + int width = warpSize) { + static_assert(sizeof(long long) == 2 * sizeof(int), ""); + static_assert(sizeof(long long) == sizeof(__hip_uint64_t), ""); + int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_xor(tmp[0], lane_mask, width); + tmp[1] = __shfl_xor(tmp[1], lane_mask, width); + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + long long tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} +__device__ inline unsigned long long __shfl_xor(MAYBE_UNDEF unsigned long long var, int lane_mask, + int width = warpSize) { + static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), ""); + static_assert(sizeof(unsigned long long) == sizeof(__hip_uint64_t), ""); + unsigned int tmp[2]; + __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_xor(tmp[0], lane_mask, width); + tmp[1] = __shfl_xor(tmp[1], lane_mask, width); + __hip_uint64_t tmp0 = + (static_cast<__hip_uint64_t>(tmp[1]) << 32ull) | static_cast<__hip_uint32_t>(tmp[0]); + unsigned long long tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} + +#endif diff --git a/3rdparty/hip-headers/include/hip/amd_detail/amd_warp_sync_functions.h b/3rdparty/hip-headers/include/hip/amd_detail/amd_warp_sync_functions.h new file mode 100644 index 0000000000..aaad1827e2 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/amd_warp_sync_functions.h @@ -0,0 +1,682 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +// Warp sync builtins (with explicit mask argument) introduced in ROCm 6.2 as a +// preview to allow end-users to adapt to the new interface involving 64-bit +// masks. These are enabled by default, and can be disabled by setting the macro +// "HIP_DISABLE_WARP_SYNC_BUILTINS". This arrangement also applies to the +// __activemask() builtin defined in amd_warp_functions.h. +#if !defined(HIP_DISABLE_WARP_SYNC_BUILTINS) + +#if !defined(__HIPCC_RTC__) +#include "amd_warp_functions.h" +#include "amd_device_functions.h" +#include "hip_assert.h" +#include +#include +#endif + +extern "C" __device__ __attribute__((const)) int __ockl_wfred_add_i32(int); +extern "C" __device__ __attribute__((const)) unsigned int __ockl_wfred_add_u32(unsigned int); +extern "C" __device__ __attribute__((const)) int __ockl_wfred_min_i32(int); +extern "C" __device__ __attribute__((const)) unsigned int __ockl_wfred_min_u32(unsigned int); +extern "C" __device__ __attribute__((const)) int __ockl_wfred_max_i32(int); +extern "C" __device__ __attribute__((const)) unsigned int __ockl_wfred_max_u32(unsigned int); +extern "C" __device__ __attribute__((const)) unsigned int __ockl_wfred_and_u32(unsigned int); +extern "C" __device__ __attribute__((const)) unsigned int __ockl_wfred_or_u32(unsigned int); +extern "C" __device__ __attribute__((const)) unsigned int __ockl_wfred_xor_u32(unsigned int); + +#ifdef HIP_ENABLE_EXTRA_WARP_SYNC_TYPES +// this macro enable types that are not in CUDA +extern "C" __device__ __attribute__((const)) long long __ockl_wfred_add_i64(long long); +extern "C" __device__ __attribute__((const)) unsigned long long __ockl_wfred_add_u64( + unsigned long long); +extern "C" __device__ __attribute__((const)) float __ockl_wfred_add_f32(float); +extern "C" __device__ __attribute__((const)) double __ockl_wfred_add_f64(double); + +extern "C" __device__ __attribute__((const)) long long __ockl_wfred_min_i64(long long); +extern "C" __device__ __attribute__((const)) unsigned long long __ockl_wfred_min_u64( + unsigned long long); +extern "C" __device__ __attribute__((const)) float __ockl_wfred_min_f32(float); +extern "C" __device__ __attribute__((const)) double __ockl_wfred_min_f64(double); + +extern "C" __device__ __attribute__((const)) long long __ockl_wfred_max_i64(long long); +extern "C" __device__ __attribute__((const)) unsigned long long __ockl_wfred_max_u64( + unsigned long long); +extern "C" __device__ __attribute__((const)) float __ockl_wfred_max_f32(float); +extern "C" __device__ __attribute__((const)) double __ockl_wfred_max_f64(double); + +extern "C" __device__ __attribute__((const)) int __ockl_wfred_and_i32(int); +extern "C" __device__ __attribute__((const)) long long __ockl_wfred_and_i64(long long); +extern "C" __device__ __attribute__((const)) unsigned long long __ockl_wfred_and_u64( + unsigned long long); + +extern "C" __device__ __attribute__((const)) int __ockl_wfred_or_i32(int); +extern "C" __device__ __attribute__((const)) long long __ockl_wfred_or_i64(long long); +extern "C" __device__ __attribute__((const)) unsigned long long __ockl_wfred_or_u64( + unsigned long long); + +extern "C" __device__ __attribute__((const)) int __ockl_wfred_xor_i32(int); +extern "C" __device__ __attribute__((const)) long long __ockl_wfred_xor_i64(long long); +extern "C" __device__ __attribute__((const)) unsigned long long __ockl_wfred_xor_u64( + unsigned long long); + +#endif + +template __device__ inline T __hip_readfirstlane(T val) { + // In theory, behaviour is undefined when reading from a union member other + // than the member that was last assigned to, but it works in practice because + // we rely on the compiler to do the reasonable thing. + union { + unsigned long long l; + T d; + } u; + u.d = val; + // NOTE: The builtin returns int, so we first cast it to unsigned int and only + // then extend it to 64 bits. + unsigned long long lower = (unsigned)__builtin_amdgcn_readfirstlane(u.l); + unsigned long long upper = (unsigned)__builtin_amdgcn_readfirstlane(u.l >> 32); + u.l = (upper << 32) | lower; + return u.d; +} + +// When compiling for wave32 mode, ignore the upper half of the 64-bit mask. +#define __hip_adjust_mask_for_wave32(MASK) \ + do { \ + if (static_cast(warpSize) == 32) MASK &= 0xFFFFFFFF; \ + } while (0) + +// We use a macro to expand each builtin into a waterfall that implements the +// mask semantics: +// +// 1. The mask argument may be divergent. +// 2. Each active thread must have its own bit set in its own mask value. +// 3. For a given mask value, all threads that are mentioned in the mask must +// execute the same static instance of the builtin with the same mask. +// 4. The union of all mask values supplied at a static instance must be equal +// to the activemask at the program point. +// +// Thus, the mask argument partitions the set of currently active threads in the +// wave into disjoint subsets that cover all active threads. +// +// Implementation notes: +// --------------------- +// +// We implement this as a waterfall loop that executes the builtin for each +// subset separately. The return value is a divergent value across the active +// threads. The value for inactive threads is defined by each builtin +// separately. +// +// As long as every mask value is non-zero, we don't need to check if a lane +// specifies itself in the mask; that is done by the later assertion where all +// chosen lanes must be in the chosen mask. + +#define __hip_check_mask(MASK) \ + do { \ + __hip_assert(MASK && "mask must be non-zero"); \ + bool done = false; \ + while (__any(!done)) { \ + if (!done) { \ + auto chosen_mask = __hip_readfirstlane(MASK); \ + if (MASK == chosen_mask) { \ + __hip_assert(MASK == __ballot(true) && \ + "all threads specified in the mask" \ + " must execute the same operation with the same mask"); \ + done = true; \ + } \ + } \ + } \ + } while (0) + +#define __hip_do_sync(RETVAL, FUNC, MASK, ...) \ + do { \ + __hip_assert(MASK && "mask must be non-zero"); \ + bool done = false; \ + while (__any(!done)) { \ + if (!done) { \ + auto chosen_mask = __hip_readfirstlane(MASK); \ + if (MASK == chosen_mask) { \ + __hip_assert(MASK == __ballot(true) && \ + "all threads specified in the mask" \ + " must execute the same operation with the same mask"); \ + RETVAL = FUNC(__VA_ARGS__); \ + done = true; \ + } \ + } \ + } \ + } while (0) + +__device__ inline void __syncwarp() { + __builtin_amdgcn_fence(__ATOMIC_RELEASE, "wavefront"); + __builtin_amdgcn_wave_barrier(); + __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "wavefront"); +} + +template __device__ inline void __syncwarp(MaskT mask) { + static_assert(__hip_internal::is_integral::value && sizeof(MaskT) == 8, + "The mask must be a 64-bit integer. " + "Implicitly promoting a smaller integer is almost always an error."); + __hip_check_mask(mask); + return __syncwarp(); +} + +// __all_sync, __any_sync, __ballot_sync + +template +__device__ inline unsigned long long __ballot_sync(MaskT mask, int predicate) { + static_assert(__hip_internal::is_integral::value && sizeof(MaskT) == 8, + "The mask must be a 64-bit integer. " + "Implicitly promoting a smaller integer is almost always an error."); + __hip_adjust_mask_for_wave32(mask); + __hip_check_mask(mask); + return __ballot(predicate) & mask; +} + +template __device__ inline int __all_sync(MaskT mask, int predicate) { + static_assert(__hip_internal::is_integral::value && sizeof(MaskT) == 8, + "The mask must be a 64-bit integer. " + "Implicitly promoting a smaller integer is almost always an error."); + __hip_adjust_mask_for_wave32(mask); + return __ballot_sync(mask, predicate) == mask; +} + +template __device__ inline int __any_sync(MaskT mask, int predicate) { + static_assert(__hip_internal::is_integral::value && sizeof(MaskT) == 8, + "The mask must be a 64-bit integer. " + "Implicitly promoting a smaller integer is almost always an error."); + __hip_adjust_mask_for_wave32(mask); + return __ballot_sync(mask, predicate) != 0; +} + +// __match_any, __match_all and sync variants + +template __device__ inline unsigned long long __match_any(T value) { + static_assert( + (__hip_internal::is_integral::value || __hip_internal::is_floating_point::value) && + (sizeof(T) == 4 || sizeof(T) == 8), + "T can be int, unsigned int, long, unsigned long, long long, unsigned " + "long long, float or double."); + bool done = false; + unsigned long long retval = 0; + + while (__any(!done)) { + if (!done) { + T chosen = __hip_readfirstlane(value); + if (chosen == value) { + retval = __activemask(); + done = true; + } + } + } + + return retval; +} + +template +__device__ inline unsigned long long __match_any_sync(MaskT mask, T value) { + static_assert(__hip_internal::is_integral::value && sizeof(MaskT) == 8, + "The mask must be a 64-bit integer. " + "Implicitly promoting a smaller integer is almost always an error."); + __hip_adjust_mask_for_wave32(mask); + __hip_check_mask(mask); + return __match_any(value) & mask; +} + +template __device__ inline unsigned long long __match_all(T value, int* pred) { + static_assert( + (__hip_internal::is_integral::value || __hip_internal::is_floating_point::value) && + (sizeof(T) == 4 || sizeof(T) == 8), + "T can be int, unsigned int, long, unsigned long, long long, unsigned " + "long long, float or double."); + T first = __hip_readfirstlane(value); + if (__all(first == value)) { + *pred = true; + return __activemask(); + } else { + *pred = false; + return 0; + } +} + +template +__device__ inline unsigned long long __match_all_sync(MaskT mask, T value, int* pred) { + static_assert(__hip_internal::is_integral::value && sizeof(MaskT) == 8, + "The mask must be a 64-bit integer. " + "Implicitly promoting a smaller integer is almost always an error."); + MaskT retval = 0; + __hip_adjust_mask_for_wave32(mask); + __hip_do_sync(retval, __match_all, mask, value, pred); + return retval; +} + +// various variants of shfl + +template +__device__ inline T __shfl_sync(MaskT mask, T var, int srcLane, int width = warpSize) { + static_assert(__hip_internal::is_integral::value && sizeof(MaskT) == 8, + "The mask must be a 64-bit integer. " + "Implicitly promoting a smaller integer is almost always an error."); + __hip_adjust_mask_for_wave32(mask); + __hip_check_mask(mask); + return __shfl(var, srcLane, width); +} + +template +__device__ inline T __shfl_up_sync(MaskT mask, T var, unsigned int delta, int width = warpSize) { + static_assert(__hip_internal::is_integral::value && sizeof(MaskT) == 8, + "The mask must be a 64-bit integer. " + "Implicitly promoting a smaller integer is almost always an error."); + __hip_adjust_mask_for_wave32(mask); + __hip_check_mask(mask); + return __shfl_up(var, delta, width); +} + +template +__device__ inline T __shfl_down_sync(MaskT mask, T var, unsigned int delta, int width = warpSize) { + static_assert(__hip_internal::is_integral::value && sizeof(MaskT) == 8, + "The mask must be a 64-bit integer. " + "Implicitly promoting a smaller integer is almost always an error."); + __hip_adjust_mask_for_wave32(mask); + __hip_check_mask(mask); + return __shfl_down(var, delta, width); +} + +template +__device__ inline T __shfl_xor_sync(MaskT mask, T var, int laneMask, int width = warpSize) { + static_assert(__hip_internal::is_integral::value && sizeof(MaskT) == 8, + "The mask must be a 64-bit integer. " + "Implicitly promoting a smaller integer is almost always an error."); + __hip_adjust_mask_for_wave32(mask); + __hip_check_mask(mask); + return __shfl_xor(var, laneMask, width); +} + +template +__device__ inline T __reduce_op_sync(MaskT mask, T val, BinaryOp op, WfReduce wfReduce) { + using permuteType = + typename __hip_internal::conditional::type; + static constexpr auto kMaskNumBits = sizeof(MaskT) * 8; + static_assert(__hip_internal::is_integral::value && sizeof(MaskT) == 8, + "The mask must be a 64-bit integer. " + "Implicitly promoting a smaller integer is almost always an error."); + __hip_adjust_mask_for_wave32(mask); + unsigned int laneId; + unsigned int maskIdx; + // next bit to aggregate with + int nextBit; + + // if doing the binary reduction tree, this will increase by two in every iteration + int modulo = 1; + int leadingZeroes = __clzll(mask); + int firstLane; + int lastLane = kMaskNumBits - leadingZeroes - 1; + int maskNumBits; + int numIterations; + // unsigned int[2] is used when T is 64-bit wide + typename __hip_internal::conditional::type result, + permuteResult; + auto backwardPermute = [](int index, permuteType val) { + if constexpr (__hip_internal::is_integral::value || + __hip_internal::is_same::value) + return __hip_ds_bpermute(index, val); + else + return __hip_ds_bpermutef(index, val); + }; + + __hip_check_mask(mask); + maskNumBits = __popcll(mask); + +#ifdef __OPTIMIZE__ // at the time of this writing the ockl wfred functions do not compile when + // using -O0 + if (maskNumBits == lastLane + 1) + // this means the mask "does not have holes", and starts from 0; we can use a specific intrinsic + // to calculate the aggregated result + return wfReduce(val); +#endif + + firstLane = __builtin_ctzll(mask); + laneId = __ockl_lane_u32(); + nextBit = laneId; + // the number of iterations needs to be at least log2(number of bits on) + numIterations = sizeof(int) * 8 - __clz(maskNumBits); + + if (!(maskNumBits & (maskNumBits - 1))) + // the number of bits in the mask is a power of 2 + numIterations -= 1; + + maskIdx = __popcll(((1ul << laneId) - 1) & mask); + mask >>= laneId; + mask >>= 1ul; + + if constexpr (sizeof(T) == 4 || sizeof(T) == 2) + result = val; + else + __builtin_memcpy(&result, &val, sizeof(T)); + + // add the values from the lanes using a reduction tree (first the threads with even-numbered + // lanes, then multiples of 4, then 8, ... + while (numIterations) { + int offset = modulo >> 1; + int increment = modulo - offset; + int nextPos = maskIdx + offset + increment; + bool insideLanes = nextPos < maskNumBits; + + if (insideLanes) { + int next; + + // find the position to aggregate with; although we could just call fns64() that will probably + // be very slow when called multiple times in this for loop; this is equivalent + for (int i = 0; i < increment; i++) { + next = __builtin_ctzll(mask) + 1; + mask >>= next; + nextBit += next; + } + } + + if constexpr (sizeof(T) == 2) { + union { + int i; + T f; + } tmp; + + tmp.f = result; + tmp.i = __hip_ds_bpermute(nextBit << 2, tmp.i); + permuteResult = tmp.f; + } else if constexpr (sizeof(T) == 4) + permuteResult = backwardPermute(nextBit << 2, result); + else { + // ds_bpermute only deals with 32-bit sizes, so for 64-bit types + // we need to call the permute twice for each half + permuteResult[0] = backwardPermute(nextBit << 2, result[0]); + permuteResult[1] = backwardPermute(nextBit << 2, result[1]); + } + + if (insideLanes) { + if constexpr (sizeof(T) == 4 || sizeof(T) == 2) + result = op(result, permuteResult); + else { + T tmp; + unsigned long long rhs = + (static_cast(permuteResult[1]) << 32) | permuteResult[0]; + + __builtin_memcpy(&tmp, &result, sizeof(T)); + tmp = op(tmp, *reinterpret_cast(&rhs)); + __builtin_memcpy(&result, &tmp, sizeof(T)); + } + } + + modulo <<= 1; + numIterations--; + } + + if constexpr (sizeof(T) == 2) { + union { + int i; + T f; + } tmp; + tmp.f = result; + tmp.i = __hip_ds_bpermute(firstLane << 2, tmp.i); + return tmp.f; + } else if constexpr (sizeof(T) == 4) + return backwardPermute(firstLane << 2, result); + else { + auto tmp = (static_cast(backwardPermute(firstLane << 2, result[1])) << 32) | + static_cast(backwardPermute(firstLane << 2, result[0])); + return *reinterpret_cast(&tmp); + } +} + +template __device__ inline int __reduce_add_sync(MaskT mask, int val) { + // although C++ has std::plus and other functors, we do not use them because + // they are in the header and they were causing problem with hipRTC + // at this time + auto op = [](decltype(val)& a, decltype(val)& b) { return a + b; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_add_i32(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template +__device__ inline unsigned int __reduce_add_sync(MaskT mask, unsigned int val) { + auto op = [](decltype(val)& a, decltype(val)& b) { return a + b; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_add_u32(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template __device__ inline int __reduce_min_sync(MaskT mask, int val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return rhs < lhs ? rhs : lhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_min_i32(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template +__device__ inline unsigned int __reduce_min_sync(MaskT mask, unsigned int val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return rhs < lhs ? rhs : lhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_min_u32(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template __device__ inline int __reduce_max_sync(MaskT mask, int val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return lhs < rhs ? rhs : lhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_max_i32(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template +__device__ inline unsigned int __reduce_max_sync(MaskT mask, unsigned int val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return lhs < rhs ? rhs : lhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_max_u32(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template +__device__ inline unsigned int __reduce_or_sync(MaskT mask, unsigned int val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return lhs || rhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_or_u32(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template +__device__ inline unsigned int __reduce_and_sync(MaskT mask, unsigned int val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return lhs && rhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_and_u32(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template +__device__ inline unsigned int __reduce_xor_sync(MaskT mask, unsigned int val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return (!lhs) != (!rhs) == 1; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_xor_u32(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +#ifdef HIP_ENABLE_EXTRA_WARP_SYNC_TYPES +template __device__ inline long long __reduce_add_sync(MaskT mask, long long val) { + auto op = [](decltype(val)& a, decltype(val)& b) { return a + b; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_add_i64(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template +__device__ inline unsigned long long __reduce_add_sync(MaskT mask, unsigned long long val) { + auto op = [](decltype(val)& a, decltype(val)& b) { return a + b; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_add_u64(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template __device__ inline float __reduce_add_sync(MaskT mask, float val) { + auto op = [](decltype(val)& a, decltype(val)& b) { return a + b; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_add_f32(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template __device__ inline double __reduce_add_sync(MaskT mask, double val) { + auto op = [](decltype(val)& a, decltype(val)& b) { return a + b; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_add_f64(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template __device__ inline long long __reduce_min_sync(MaskT mask, long long val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return rhs < lhs ? rhs : lhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_min_i64(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template +__device__ inline unsigned long long __reduce_min_sync(MaskT mask, unsigned long long val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return rhs < lhs ? rhs : lhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_min_u64(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template __device__ inline float __reduce_min_sync(MaskT mask, float val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return rhs < lhs ? rhs : lhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_min_f32(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template __device__ inline double __reduce_min_sync(MaskT mask, double val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return rhs < lhs ? rhs : lhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_min_f64(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template __device__ inline long long __reduce_max_sync(MaskT mask, long long val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return lhs < rhs ? rhs : lhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_max_i64(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template +__device__ inline unsigned long long __reduce_max_sync(MaskT mask, unsigned long long val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return lhs < rhs ? rhs : lhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_max_u64(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template __device__ inline float __reduce_max_sync(MaskT mask, float val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return lhs < rhs ? rhs : lhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_max_f32(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template __device__ inline double __reduce_max_sync(MaskT mask, double val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return lhs < rhs ? rhs : lhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_max_f64(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template __device__ inline int __reduce_and_sync(MaskT mask, int val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return lhs && rhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_and_i32(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template __device__ inline long long __reduce_and_sync(MaskT mask, long long val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return lhs && rhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_and_i64(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template +__device__ inline unsigned long long __reduce_and_sync(MaskT mask, unsigned long long val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return lhs && rhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_and_u64(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template __device__ inline int __reduce_or_sync(MaskT mask, int val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return lhs || rhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_or_i32(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template __device__ inline long long __reduce_or_sync(MaskT mask, long long val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return lhs || rhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_or_i64(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template +__device__ inline unsigned long long __reduce_or_sync(MaskT mask, unsigned long long val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return lhs || rhs; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_or_u64(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template __device__ inline int __reduce_xor_sync(MaskT mask, int val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return (!lhs) != (!rhs) == 1; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_xor_i32(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template __device__ inline long long __reduce_xor_sync(MaskT mask, long long val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return (!lhs) != (!rhs) == 1; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_xor_i64(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +template +__device__ inline unsigned long long __reduce_xor_sync(MaskT mask, unsigned long long val) { + auto op = [](decltype(val) lhs, decltype(val) rhs) { return (!lhs) != (!rhs) == 1; }; + auto wfReduce = [](decltype(val) v) { return __ockl_wfred_xor_u64(v); }; + + return __reduce_op_sync(mask, val, op, wfReduce); +} + +#undef __hip_do_sync +#undef __hip_check_mask +#undef __hip_adjust_mask_for_wave32 + +#endif // HIP_ENABLE_EXTRA_WARP_SYNC_TYPES +#endif // HIP_DISABLE_WARP_SYNC_BUILTINS diff --git a/3rdparty/hip-headers/include/hip/amd_detail/device_library_decls.h b/3rdparty/hip-headers/include/hip/amd_detail/device_library_decls.h new file mode 100644 index 0000000000..33623f5881 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/device_library_decls.h @@ -0,0 +1,135 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file amd_detail/device_library_decls.h + * @brief Contains declarations for types and functions in device library. + * Uses __hip_int64_t and __hip_uint64_t instead of long, long long, unsigned + * long and unsigned long long types for device library API + * declarations. + */ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_LIBRARY_DECLS_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_LIBRARY_DECLS_H + +#if !defined(__HIPCC_RTC__) +#include "hip/amd_detail/host_defines.h" +#if __cplusplus +#include +#else +#include +#endif +#endif + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; +typedef unsigned long long ullong; + +extern "C" __device__ __attribute__((const)) bool __ockl_wfany_i32(int); +extern "C" __device__ __attribute__((const)) bool __ockl_wfall_i32(int); +extern "C" __device__ uint __ockl_activelane_u32(void); + +extern "C" __device__ __attribute__((const)) uint __ockl_mul24_u32(uint, uint); +extern "C" __device__ __attribute__((const)) int __ockl_mul24_i32(int, int); +extern "C" __device__ __attribute__((const)) uint __ockl_mul_hi_u32(uint, uint); +extern "C" __device__ __attribute__((const)) int __ockl_mul_hi_i32(int, int); +extern "C" __device__ __attribute__((const)) uint __ockl_sadd_u32(uint, uint, uint); + +extern "C" __device__ __attribute__((const)) uchar __ockl_clz_u8(uchar); +extern "C" __device__ __attribute__((const)) ushort __ockl_clz_u16(ushort); +extern "C" __device__ __attribute__((const)) uint __ockl_clz_u32(uint); +extern "C" __device__ __attribute__((const)) __hip_uint64_t __ockl_clz_u64(__hip_uint64_t); + +extern "C" __device__ __attribute__((const)) float __ocml_fmin_f32(float, float); +extern "C" __device__ __attribute__((const)) float __ocml_fmax_f32(float, float); + +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_f64(double); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_f64(double); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_f64(double); + +extern "C" __device__ __attribute__((const)) _Float16 __ocml_cvtrtn_f16_f32(float); +extern "C" __device__ __attribute__((const)) _Float16 __ocml_cvtrtp_f16_f32(float); +extern "C" __device__ __attribute__((const)) _Float16 __ocml_cvtrtz_f16_f32(float); + +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_s32(int); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_s32(int); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_s32(int); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_u32(__hip_uint32_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_u32(__hip_uint32_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_u32(__hip_uint32_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_s64(__hip_int64_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_s64(__hip_int64_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_s64(__hip_int64_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_u64(__hip_uint64_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_u64(__hip_uint64_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_u64(__hip_uint64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtn_f64_s64(__hip_int64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtp_f64_s64(__hip_int64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtz_f64_s64(__hip_int64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtn_f64_u64(__hip_uint64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtp_f64_u64(__hip_uint64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtz_f64_u64(__hip_uint64_t); + +extern "C" __device__ __attribute__((convergent)) void __ockl_gws_init(uint nwm1, uint rid); +extern "C" __device__ __attribute__((convergent)) void __ockl_gws_barrier(uint nwm1, uint rid); + +extern "C" __device__ __attribute__((const)) __hip_uint32_t __ockl_lane_u32(); +extern "C" __device__ __attribute__((const)) int __ockl_grid_is_valid(void); +extern "C" __device__ __attribute__((convergent)) void __ockl_grid_sync(void); +extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_num_grids(void); +extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_grid_rank(void); +extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_size(void); +extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_thread_rank(void); +extern "C" __device__ __attribute__((const)) int __ockl_multi_grid_is_valid(void); +extern "C" __device__ __attribute__((convergent)) void __ockl_multi_grid_sync(void); + +extern "C" __device__ void __ockl_atomic_add_noret_f32(float*, float); + +extern "C" __device__ __attribute__((convergent)) int __ockl_wgred_add_i32(int a); +extern "C" __device__ __attribute__((convergent)) int __ockl_wgred_and_i32(int a); +extern "C" __device__ __attribute__((convergent)) int __ockl_wgred_or_i32(int a); + +extern "C" __device__ __hip_uint64_t __ockl_fprintf_stderr_begin(); +extern "C" __device__ __hip_uint64_t __ockl_fprintf_append_args( + __hip_uint64_t msg_desc, __hip_uint32_t num_args, __hip_uint64_t value0, __hip_uint64_t value1, + __hip_uint64_t value2, __hip_uint64_t value3, __hip_uint64_t value4, __hip_uint64_t value5, + __hip_uint64_t value6, __hip_uint32_t is_last); +extern "C" __device__ __hip_uint64_t __ockl_fprintf_append_string_n(__hip_uint64_t msg_desc, + const char* data, + __hip_uint64_t length, + __hip_uint32_t is_last); + +// Introduce local address space +#define __local __attribute__((address_space(3))) + +#ifdef __HIP_DEVICE_COMPILE__ +__device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; } +#endif //__HIP_DEVICE_COMPILE__ + +// Using hip.amdgcn.bc - sync threads +#define __CLK_LOCAL_MEM_FENCE 0x01 +#define __CLK_GLOBAL_MEM_FENCE 0x02 +typedef unsigned __cl_mem_fence_flags; + +#endif diff --git a/3rdparty/hip-headers/include/hip/amd_detail/hip_api_trace.hpp b/3rdparty/hip-headers/include/hip/amd_detail/hip_api_trace.hpp new file mode 100644 index 0000000000..ece40ae64a --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/hip_api_trace.hpp @@ -0,0 +1,1774 @@ +/* + * Copyright (c) Advanced Micro Devices, Inc., or its affiliates. + * + * SPDX-License-Identifier: MIT + */ +#pragma once + +#include + +// Forward-declare types that rocprofiler-sdk/hip/api_args.h still +// references but were removed/renamed in newer HIP headers. +// Only pointer-to-struct usage exists, so incomplete types suffice. +struct HIP_MEMSET_NODE_PARAMS; +struct hipArrayMemoryRequirements; + +// Define some version macros for the API table. Use similar naming conventions to HSA-runtime +// (MAJOR and STEP versions). Three groups at this time: +// +// (A) HIP_API_TABLE_* defines for versioning for API table structure +// (B) HIP_RUNTIME_API_TABLE_* defines for versioning the HipDispatchTable struct +// (C) HIP_COMPILER_API_TABLE_* defines for versioning the HipCompilerDispatchTable struct +// +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! IMPORTANT !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// +// 1. When new functions are added to the API table, always add the new function pointer to the +// end of the table and increment the dispatch table's step version number. NEVER re-arrange +// the order of the member variables in a dispatch table. This will break the ABI. +// 2. In dire circumstances, if the type of an existing member variable in a dispatch +// table has be changed because a data type has been changed/removed, increment the dispatch +// table's major version number. If the function pointer type can no longer be declared, DO +// NOT REMOVE IT! Make the function pointer type void* and have it always be set to a nullptr. +// +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// +// The major version number should (ideally) never need to be incremented. +// - Increment the HIP_API_TABLE_MAJOR_VERSION for fundamental changes to the API table structs. +// - Increment the HIP_RUNTIME_API_TABLE_MAJOR_VERSION for fundamental changes to the +// HipDispatchTable struct, such as a *change* to type/name an existing member variable. DO NOT +// REMOVE IT. +// - Increment the HIP_COMPILER_API_TABLE_MAJOR_VERSION for fundamental changes to the +// HipCompilerDispatchTable struct, such as a *change* to type/name an existing member variable. +// DO NOT REMOVE IT. +#define HIP_API_TABLE_MAJOR_VERSION 0 +#define HIP_COMPILER_API_TABLE_MAJOR_VERSION 0 +#define HIP_TOOLS_API_TABLE_MAJOR_VERSION 0 +#define HIP_RUNTIME_API_TABLE_MAJOR_VERSION 0 + +// The step version number should be changed whenever the size of the API table struct(s) change. +// - Increment the HIP_API_TABLE_STEP_VERSION when/if new API table structs are added +// - Increment the HIP_RUNTIME_API_TABLE_STEP_VERSION when new runtime API functions are added +// - Increment the HIP_COMPILER_API_TABLE_STEP_VERSION when new compiler API functions are added +// - Reset any of the *_STEP_VERSION defines to zero if the corresponding *_MAJOR_VERSION increases +#define HIP_API_TABLE_STEP_VERSION 0 +#define HIP_COMPILER_API_TABLE_STEP_VERSION 0 +#define HIP_TOOLS_API_TABLE_STEP_VERSION 0 +#define HIP_RUNTIME_API_TABLE_STEP_VERSION 25 + +// HIP API interface +// HIP compiler dispatch functions +typedef hipError_t (*t___hipPopCallConfiguration)(dim3* gridDim, dim3* blockDim, size_t* sharedMem, + hipStream_t* stream); +typedef hipError_t (*t___hipPushCallConfiguration)(dim3 gridDim, dim3 blockDim, size_t sharedMem, + hipStream_t stream); +typedef void** (*t___hipRegisterFatBinary)(const void* data); +typedef void (*t___hipRegisterFunction)(void** modules, const void* hostFunction, + char* deviceFunction, const char* deviceName, + unsigned int threadLimit, uint3* tid, uint3* bid, + dim3* blockDim, dim3* gridDim, int* wSize); +typedef void (*t___hipRegisterManagedVar)(void* hipModule, void** pointer, void* init_value, + const char* name, size_t size, unsigned align); +typedef void (*t___hipRegisterSurface)(void** modules, void* var, char* hostVar, char* deviceVar, + int type, int ext); +typedef void (*t___hipRegisterTexture)(void** modules, void* var, char* hostVar, char* deviceVar, + int type, int norm, int ext); +typedef void (*t___hipRegisterVar)(void** modules, void* var, char* hostVar, char* deviceVar, + int ext, size_t size, int constant, int global); +typedef void (*t___hipUnregisterFatBinary)(void** modules); + +// HIP tools dispatch functions +typedef void (*t___hipReportDevices)(size_t numDevices, const hipUUID* uuids); + +// HIP runtime dispatch functions +typedef const char* (*t_hipApiName)(uint32_t id); +typedef hipError_t (*t_hipArray3DCreate)(hipArray_t* array, + const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray); +typedef hipError_t (*t_hipArray3DGetDescriptor)(HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor, + hipArray_t array); +typedef hipError_t (*t_hipArrayCreate)(hipArray_t* pHandle, + const HIP_ARRAY_DESCRIPTOR* pAllocateArray); +typedef hipError_t (*t_hipArrayDestroy)(hipArray_t array); +typedef hipError_t (*t_hipArrayGetDescriptor)(HIP_ARRAY_DESCRIPTOR* pArrayDescriptor, + hipArray_t array); +typedef hipError_t (*t_hipArrayGetInfo)(hipChannelFormatDesc* desc, hipExtent* extent, + unsigned int* flags, hipArray_t array); +typedef hipError_t (*t_hipBindTexture)(size_t* offset, const textureReference* tex, + const void* devPtr, const hipChannelFormatDesc* desc, + size_t size); +typedef hipError_t (*t_hipBindTexture2D)(size_t* offset, const textureReference* tex, + const void* devPtr, const hipChannelFormatDesc* desc, + size_t width, size_t height, size_t pitch); +typedef hipError_t (*t_hipBindTextureToArray)(const textureReference* tex, hipArray_const_t array, + const hipChannelFormatDesc* desc); +typedef hipError_t (*t_hipBindTextureToMipmappedArray)(const textureReference* tex, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc); +typedef hipError_t (*t_hipChooseDevice)(int* device, const hipDeviceProp_t* prop); +typedef hipError_t (*t_hipChooseDeviceR0000)(int* device, const hipDeviceProp_tR0000* properties); +typedef hipError_t (*t_hipConfigureCall)(dim3 gridDim, dim3 blockDim, size_t sharedMem, + hipStream_t stream); +typedef hipError_t (*t_hipCreateSurfaceObject)(hipSurfaceObject_t* pSurfObject, + const hipResourceDesc* pResDesc); +typedef hipError_t (*t_hipCreateTextureObject)(hipTextureObject_t* pTexObject, + const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const struct hipResourceViewDesc* pResViewDesc); +typedef hipError_t (*t_hipCtxCreate)(hipCtx_t* ctx, unsigned int flags, hipDevice_t device); +typedef hipError_t (*t_hipCtxDestroy)(hipCtx_t ctx); +typedef hipError_t (*t_hipCtxDisablePeerAccess)(hipCtx_t peerCtx); +typedef hipError_t (*t_hipCtxEnablePeerAccess)(hipCtx_t peerCtx, unsigned int flags); +typedef hipError_t (*t_hipCtxGetApiVersion)(hipCtx_t ctx, unsigned int* apiVersion); +typedef hipError_t (*t_hipCtxGetCacheConfig)(hipFuncCache_t* cacheConfig); +typedef hipError_t (*t_hipCtxGetCurrent)(hipCtx_t* ctx); +typedef hipError_t (*t_hipCtxGetDevice)(hipDevice_t* device); +typedef hipError_t (*t_hipCtxGetFlags)(unsigned int* flags); +typedef hipError_t (*t_hipCtxGetSharedMemConfig)(hipSharedMemConfig* pConfig); +typedef hipError_t (*t_hipCtxPopCurrent)(hipCtx_t* ctx); +typedef hipError_t (*t_hipCtxPushCurrent)(hipCtx_t ctx); +typedef hipError_t (*t_hipCtxSetCacheConfig)(hipFuncCache_t cacheConfig); +typedef hipError_t (*t_hipCtxSetCurrent)(hipCtx_t ctx); +typedef hipError_t (*t_hipCtxSetSharedMemConfig)(hipSharedMemConfig config); +typedef hipError_t (*t_hipCtxSynchronize)(void); +typedef hipError_t (*t_hipDestroyExternalMemory)(hipExternalMemory_t extMem); +typedef hipError_t (*t_hipDestroyExternalSemaphore)(hipExternalSemaphore_t extSem); +typedef hipError_t (*t_hipDestroySurfaceObject)(hipSurfaceObject_t surfaceObject); +typedef hipError_t (*t_hipDestroyTextureObject)(hipTextureObject_t textureObject); +typedef hipError_t (*t_hipDeviceCanAccessPeer)(int* canAccessPeer, int deviceId, int peerDeviceId); +typedef hipError_t (*t_hipDeviceComputeCapability)(int* major, int* minor, hipDevice_t device); +typedef hipError_t (*t_hipDeviceDisablePeerAccess)(int peerDeviceId); +typedef hipError_t (*t_hipDeviceEnablePeerAccess)(int peerDeviceId, unsigned int flags); +typedef hipError_t (*t_hipDeviceGet)(hipDevice_t* device, int ordinal); +typedef hipError_t (*t_hipDeviceGetAttribute)(int* pi, hipDeviceAttribute_t attr, int deviceId); +typedef hipError_t (*t_hipDeviceGetByPCIBusId)(int* device, const char* pciBusId); +typedef hipError_t (*t_hipDeviceGetCacheConfig)(hipFuncCache_t* cacheConfig); +typedef hipError_t (*t_hipDeviceGetDefaultMemPool)(hipMemPool_t* mem_pool, int device); +typedef hipError_t (*t_hipDeviceGetGraphMemAttribute)(int device, hipGraphMemAttributeType attr, + void* value); +typedef hipError_t (*t_hipDeviceGetLimit)(size_t* pValue, enum hipLimit_t limit); +typedef hipError_t (*t_hipDeviceGetMemPool)(hipMemPool_t* mem_pool, int device); +typedef hipError_t (*t_hipDeviceGetName)(char* name, int len, hipDevice_t device); +typedef hipError_t (*t_hipDeviceGetP2PAttribute)(int* value, hipDeviceP2PAttr attr, int srcDevice, + int dstDevice); +typedef hipError_t (*t_hipDeviceGetPCIBusId)(char* pciBusId, int len, int device); +typedef hipError_t (*t_hipDeviceGetSharedMemConfig)(hipSharedMemConfig* pConfig); +typedef hipError_t (*t_hipDeviceGetStreamPriorityRange)(int* leastPriority, int* greatestPriority); +typedef hipError_t (*t_hipDeviceGetUuid)(hipUUID* uuid, hipDevice_t device); +typedef hipError_t (*t_hipDeviceGraphMemTrim)(int device); +typedef hipError_t (*t_hipDevicePrimaryCtxGetState)(hipDevice_t dev, unsigned int* flags, + int* active); +typedef hipError_t (*t_hipDevicePrimaryCtxRelease)(hipDevice_t dev); +typedef hipError_t (*t_hipDevicePrimaryCtxReset)(hipDevice_t dev); +typedef hipError_t (*t_hipDevicePrimaryCtxRetain)(hipCtx_t* pctx, hipDevice_t dev); +typedef hipError_t (*t_hipDevicePrimaryCtxSetFlags)(hipDevice_t dev, unsigned int flags); +typedef hipError_t (*t_hipDeviceReset)(void); +typedef hipError_t (*t_hipDeviceSetCacheConfig)(hipFuncCache_t cacheConfig); +typedef hipError_t (*t_hipDeviceSetGraphMemAttribute)(int device, hipGraphMemAttributeType attr, + void* value); +typedef hipError_t (*t_hipDeviceSetLimit)(enum hipLimit_t limit, size_t value); +typedef hipError_t (*t_hipDeviceSetMemPool)(int device, hipMemPool_t mem_pool); +typedef hipError_t (*t_hipDeviceSetSharedMemConfig)(hipSharedMemConfig config); +typedef hipError_t (*t_hipDeviceSynchronize)(void); +typedef hipError_t (*t_hipDeviceTotalMem)(size_t* bytes, hipDevice_t device); +typedef hipError_t (*t_hipDriverGetVersion)(int* driverVersion); +typedef hipError_t (*t_hipDrvGetErrorName)(hipError_t hipError, const char** errorString); +typedef hipError_t (*t_hipDrvGetErrorString)(hipError_t hipError, const char** errorString); +typedef hipError_t (*t_hipDrvGraphAddMemcpyNode)(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, + const hipGraphNode_t* dependencies, + size_t numDependencies, + const HIP_MEMCPY3D* copyParams, hipCtx_t ctx); +typedef hipError_t (*t_hipDrvMemcpy2DUnaligned)(const hip_Memcpy2D* pCopy); +typedef hipError_t (*t_hipDrvMemcpy3D)(const HIP_MEMCPY3D* pCopy); +typedef hipError_t (*t_hipDrvMemcpy3DAsync)(const HIP_MEMCPY3D* pCopy, hipStream_t stream); +typedef hipError_t (*t_hipDrvPointerGetAttributes)(unsigned int numAttributes, + hipPointer_attribute* attributes, void** data, + hipDeviceptr_t ptr); +typedef hipError_t (*t_hipEventCreate)(hipEvent_t* event); +typedef hipError_t (*t_hipEventCreateWithFlags)(hipEvent_t* event, unsigned flags); +typedef hipError_t (*t_hipEventDestroy)(hipEvent_t event); +typedef hipError_t (*t_hipEventElapsedTime)(float* ms, hipEvent_t start, hipEvent_t stop); +typedef hipError_t (*t_hipEventQuery)(hipEvent_t event); +typedef hipError_t (*t_hipEventRecord)(hipEvent_t event, hipStream_t stream); +typedef hipError_t (*t_hipEventSynchronize)(hipEvent_t event); +typedef hipError_t (*t_hipExtGetLinkTypeAndHopCount)(int device1, int device2, uint32_t* linktype, + uint32_t* hopcount); +typedef hipError_t (*t_hipExtLaunchKernel)(const void* function_address, dim3 numBlocks, + dim3 dimBlocks, void** args, size_t sharedMemBytes, + hipStream_t stream, hipEvent_t startEvent, + hipEvent_t stopEvent, int flags); +typedef hipError_t (*t_hipExtLaunchMultiKernelMultiDevice)(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags); +typedef hipError_t (*t_hipExtMallocWithFlags)(void** ptr, size_t sizeBytes, unsigned int flags); +typedef hipError_t (*t_hipExtStreamCreateWithCUMask)(hipStream_t* stream, uint32_t cuMaskSize, + const uint32_t* cuMask); +typedef hipError_t (*t_hipExtStreamGetCUMask)(hipStream_t stream, uint32_t cuMaskSize, + uint32_t* cuMask); +typedef hipError_t (*t_hipExternalMemoryGetMappedBuffer)( + void** devPtr, hipExternalMemory_t extMem, const hipExternalMemoryBufferDesc* bufferDesc); +typedef hipError_t (*t_hipFree)(void* ptr); +typedef hipError_t (*t_hipFreeArray)(hipArray_t array); +typedef hipError_t (*t_hipFreeAsync)(void* dev_ptr, hipStream_t stream); +typedef hipError_t (*t_hipFreeHost)(void* ptr); +typedef hipError_t (*t_hipFreeMipmappedArray)(hipMipmappedArray_t mipmappedArray); +typedef hipError_t (*t_hipFuncGetAttribute)(int* value, hipFunction_attribute attrib, + hipFunction_t hfunc); +typedef hipError_t (*t_hipFuncGetAttributes)(struct hipFuncAttributes* attr, const void* func); +typedef hipError_t (*t_hipFuncSetAttribute)(const void* func, hipFuncAttribute attr, int value); +typedef hipError_t (*t_hipFuncSetCacheConfig)(const void* func, hipFuncCache_t config); +typedef hipError_t (*t_hipFuncSetSharedMemConfig)(const void* func, hipSharedMemConfig config); +typedef hipError_t (*t_hipGLGetDevices)(unsigned int* pHipDeviceCount, int* pHipDevices, + unsigned int hipDeviceCount, hipGLDeviceList deviceList); +typedef hipError_t (*t_hipGetChannelDesc)(hipChannelFormatDesc* desc, hipArray_const_t array); +typedef hipError_t (*t_hipGetDevice)(int* deviceId); +typedef hipError_t (*t_hipGetDeviceCount)(int* count); +typedef hipError_t (*t_hipGetDeviceFlags)(unsigned int* flags); +typedef hipError_t (*t_hipGetDevicePropertiesR0600)(hipDeviceProp_tR0600* prop, int device); +typedef hipError_t (*t_hipGetDevicePropertiesR0000)(hipDeviceProp_tR0000* prop, int device); +typedef const char* (*t_hipGetErrorName)(hipError_t hip_error); +typedef const char* (*t_hipGetErrorString)(hipError_t hipError); +typedef hipError_t (*t_hipGetLastError)(void); +typedef hipError_t (*t_hipGetMipmappedArrayLevel)(hipArray_t* levelArray, + hipMipmappedArray_const_t mipmappedArray, + unsigned int level); +typedef hipError_t (*t_hipGetSymbolAddress)(void** devPtr, const void* symbol); +typedef hipError_t (*t_hipGetSymbolSize)(size_t* size, const void* symbol); +typedef hipError_t (*t_hipGetTextureAlignmentOffset)(size_t* offset, + const textureReference* texref); +typedef hipError_t (*t_hipGetTextureObjectResourceDesc)(hipResourceDesc* pResDesc, + hipTextureObject_t textureObject); +typedef hipError_t (*t_hipGetTextureObjectResourceViewDesc)( + struct hipResourceViewDesc* pResViewDesc, hipTextureObject_t textureObject); +typedef hipError_t (*t_hipGetTextureObjectTextureDesc)(hipTextureDesc* pTexDesc, + hipTextureObject_t textureObject); +typedef hipError_t (*t_hipGetTextureReference)(const textureReference** texref, const void* symbol); +typedef hipError_t (*t_hipGraphAddChildGraphNode)(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, hipGraph_t childGraph); +typedef hipError_t (*t_hipGraphAddDependencies)(hipGraph_t graph, const hipGraphNode_t* from, + const hipGraphNode_t* to, size_t numDependencies); +typedef hipError_t (*t_hipGraphAddEmptyNode)(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies); +typedef hipError_t (*t_hipGraphAddEventRecordNode)(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, hipEvent_t event); +typedef hipError_t (*t_hipGraphAddEventWaitNode)(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, hipEvent_t event); +typedef hipError_t (*t_hipGraphAddHostNode)(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, + const hipHostNodeParams* pNodeParams); +typedef hipError_t (*t_hipGraphAddKernelNode)(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, + const hipKernelNodeParams* pNodeParams); +typedef hipError_t (*t_hipGraphAddMemAllocNode)(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, + hipMemAllocNodeParams* pNodeParams); +typedef hipError_t (*t_hipGraphAddMemFreeNode)(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, void* dev_ptr); +typedef hipError_t (*t_hipGraphAddMemcpyNode)(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, + const hipMemcpy3DParms* pCopyParams); +typedef hipError_t (*t_hipGraphAddMemcpyNode1D)(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, void* dst, const void* src, + size_t count, hipMemcpyKind kind); +typedef hipError_t (*t_hipGraphAddMemcpyNodeFromSymbol)(hipGraphNode_t* pGraphNode, + hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, void* dst, + const void* symbol, size_t count, + size_t offset, hipMemcpyKind kind); +typedef hipError_t (*t_hipGraphAddMemcpyNodeToSymbol)(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, const void* symbol, + const void* src, size_t count, size_t offset, + hipMemcpyKind kind); +typedef hipError_t (*t_hipGraphAddMemsetNode)(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, + const hipMemsetParams* pMemsetParams); + +typedef hipError_t (*t_hipGraphChildGraphNodeGetGraph)(hipGraphNode_t node, hipGraph_t* pGraph); +typedef hipError_t (*t_hipGraphClone)(hipGraph_t* pGraphClone, hipGraph_t originalGraph); +typedef hipError_t (*t_hipGraphCreate)(hipGraph_t* pGraph, unsigned int flags); +typedef hipError_t (*t_hipGraphDebugDotPrint)(hipGraph_t graph, const char* path, + unsigned int flags); +typedef hipError_t (*t_hipGraphDestroy)(hipGraph_t graph); +typedef hipError_t (*t_hipGraphDestroyNode)(hipGraphNode_t node); +typedef hipError_t (*t_hipGraphEventRecordNodeGetEvent)(hipGraphNode_t node, hipEvent_t* event_out); +typedef hipError_t (*t_hipGraphEventRecordNodeSetEvent)(hipGraphNode_t node, hipEvent_t event); +typedef hipError_t (*t_hipGraphEventWaitNodeGetEvent)(hipGraphNode_t node, hipEvent_t* event_out); +typedef hipError_t (*t_hipGraphEventWaitNodeSetEvent)(hipGraphNode_t node, hipEvent_t event); +typedef hipError_t (*t_hipGraphExecChildGraphNodeSetParams)(hipGraphExec_t hGraphExec, + hipGraphNode_t node, + hipGraph_t childGraph); +typedef hipError_t (*t_hipGraphExecDestroy)(hipGraphExec_t graphExec); +typedef hipError_t (*t_hipGraphExecEventRecordNodeSetEvent)(hipGraphExec_t hGraphExec, + hipGraphNode_t hNode, hipEvent_t event); +typedef hipError_t (*t_hipGraphExecEventWaitNodeSetEvent)(hipGraphExec_t hGraphExec, + hipGraphNode_t hNode, hipEvent_t event); +typedef hipError_t (*t_hipGraphExecHostNodeSetParams)(hipGraphExec_t hGraphExec, + hipGraphNode_t node, + const hipHostNodeParams* pNodeParams); +typedef hipError_t (*t_hipGraphExecKernelNodeSetParams)(hipGraphExec_t hGraphExec, + hipGraphNode_t node, + const hipKernelNodeParams* pNodeParams); +typedef hipError_t (*t_hipGraphExecMemcpyNodeSetParams)(hipGraphExec_t hGraphExec, + hipGraphNode_t node, + hipMemcpy3DParms* pNodeParams); +typedef hipError_t (*t_hipGraphExecMemcpyNodeSetParams1D)(hipGraphExec_t hGraphExec, + hipGraphNode_t node, void* dst, + const void* src, size_t count, + hipMemcpyKind kind); +typedef hipError_t (*t_hipGraphExecMemcpyNodeSetParamsFromSymbol)(hipGraphExec_t hGraphExec, + hipGraphNode_t node, void* dst, + const void* symbol, size_t count, + size_t offset, + hipMemcpyKind kind); +typedef hipError_t (*t_hipGraphExecMemcpyNodeSetParamsToSymbol)(hipGraphExec_t hGraphExec, + hipGraphNode_t node, + const void* symbol, const void* src, + size_t count, size_t offset, + hipMemcpyKind kind); +typedef hipError_t (*t_hipGraphExecMemsetNodeSetParams)(hipGraphExec_t hGraphExec, + hipGraphNode_t node, + const hipMemsetParams* pNodeParams); +typedef hipError_t (*t_hipGraphExecUpdate)(hipGraphExec_t hGraphExec, hipGraph_t hGraph, + hipGraphNode_t* hErrorNode_out, + hipGraphExecUpdateResult* updateResult_out); +typedef hipError_t (*t_hipGraphGetEdges)(hipGraph_t graph, hipGraphNode_t* from, hipGraphNode_t* to, + size_t* numEdges); +typedef hipError_t (*t_hipGraphGetNodes)(hipGraph_t graph, hipGraphNode_t* nodes, size_t* numNodes); +typedef hipError_t (*t_hipGraphGetRootNodes)(hipGraph_t graph, hipGraphNode_t* pRootNodes, + size_t* pNumRootNodes); +typedef hipError_t (*t_hipGraphHostNodeGetParams)(hipGraphNode_t node, + hipHostNodeParams* pNodeParams); +typedef hipError_t (*t_hipGraphHostNodeSetParams)(hipGraphNode_t node, + const hipHostNodeParams* pNodeParams); +typedef hipError_t (*t_hipGraphInstantiate)(hipGraphExec_t* pGraphExec, hipGraph_t graph, + hipGraphNode_t* pErrorNode, char* pLogBuffer, + size_t bufferSize); +typedef hipError_t (*t_hipGraphInstantiateWithFlags)(hipGraphExec_t* pGraphExec, hipGraph_t graph, + unsigned long long flags); +typedef hipError_t (*t_hipGraphKernelNodeCopyAttributes)(hipGraphNode_t hSrc, hipGraphNode_t hDst); +typedef hipError_t (*t_hipGraphKernelNodeGetAttribute)(hipGraphNode_t hNode, + hipKernelNodeAttrID attr, + hipKernelNodeAttrValue* value); +typedef hipError_t (*t_hipGraphKernelNodeGetParams)(hipGraphNode_t node, + hipKernelNodeParams* pNodeParams); +typedef hipError_t (*t_hipGraphKernelNodeSetAttribute)(hipGraphNode_t hNode, + hipKernelNodeAttrID attr, + const hipKernelNodeAttrValue* value); +typedef hipError_t (*t_hipGraphKernelNodeSetParams)(hipGraphNode_t node, + const hipKernelNodeParams* pNodeParams); +typedef hipError_t (*t_hipGraphLaunch)(hipGraphExec_t graphExec, hipStream_t stream); +typedef hipError_t (*t_hipGraphMemAllocNodeGetParams)(hipGraphNode_t node, + hipMemAllocNodeParams* pNodeParams); +typedef hipError_t (*t_hipGraphMemFreeNodeGetParams)(hipGraphNode_t node, void* dev_ptr); +typedef hipError_t (*t_hipGraphMemcpyNodeGetParams)(hipGraphNode_t node, + hipMemcpy3DParms* pNodeParams); +typedef hipError_t (*t_hipGraphMemcpyNodeSetParams)(hipGraphNode_t node, + const hipMemcpy3DParms* pNodeParams); +typedef hipError_t (*t_hipGraphMemcpyNodeSetParams1D)(hipGraphNode_t node, void* dst, + const void* src, size_t count, + hipMemcpyKind kind); +typedef hipError_t (*t_hipGraphMemcpyNodeSetParamsFromSymbol)(hipGraphNode_t node, void* dst, + const void* symbol, size_t count, + size_t offset, hipMemcpyKind kind); +typedef hipError_t (*t_hipGraphMemcpyNodeSetParamsToSymbol)(hipGraphNode_t node, const void* symbol, + const void* src, size_t count, + size_t offset, hipMemcpyKind kind); +typedef hipError_t (*t_hipGraphMemsetNodeGetParams)(hipGraphNode_t node, + hipMemsetParams* pNodeParams); +typedef hipError_t (*t_hipGraphMemsetNodeSetParams)(hipGraphNode_t node, + const hipMemsetParams* pNodeParams); +typedef hipError_t (*t_hipGraphNodeFindInClone)(hipGraphNode_t* pNode, hipGraphNode_t originalNode, + hipGraph_t clonedGraph); +typedef hipError_t (*t_hipGraphNodeGetDependencies)(hipGraphNode_t node, + hipGraphNode_t* pDependencies, + size_t* pNumDependencies); +typedef hipError_t (*t_hipGraphNodeGetDependentNodes)(hipGraphNode_t node, + hipGraphNode_t* pDependentNodes, + size_t* pNumDependentNodes); +typedef hipError_t (*t_hipGraphNodeGetEnabled)(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + unsigned int* isEnabled); +typedef hipError_t (*t_hipGraphNodeGetType)(hipGraphNode_t node, hipGraphNodeType* pType); +typedef hipError_t (*t_hipGraphNodeSetEnabled)(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + unsigned int isEnabled); +typedef hipError_t (*t_hipGraphReleaseUserObject)(hipGraph_t graph, hipUserObject_t object, + unsigned int count); +typedef hipError_t (*t_hipGraphRemoveDependencies)(hipGraph_t graph, const hipGraphNode_t* from, + const hipGraphNode_t* to, + size_t numDependencies); +typedef hipError_t (*t_hipGraphRetainUserObject)(hipGraph_t graph, hipUserObject_t object, + unsigned int count, unsigned int flags); +typedef hipError_t (*t_hipGraphUpload)(hipGraphExec_t graphExec, hipStream_t stream); +typedef hipError_t (*t_hipGraphicsGLRegisterBuffer)(hipGraphicsResource** resource, GLuint buffer, + unsigned int flags); +typedef hipError_t (*t_hipGraphicsGLRegisterImage)(hipGraphicsResource** resource, GLuint image, + GLenum target, unsigned int flags); +typedef hipError_t (*t_hipGraphicsMapResources)(int count, hipGraphicsResource_t* resources, + hipStream_t stream); +typedef hipError_t (*t_hipGraphicsResourceGetMappedPointer)(void** devPtr, size_t* size, + hipGraphicsResource_t resource); +typedef hipError_t (*t_hipGraphicsSubResourceGetMappedArray)(hipArray_t* array, + hipGraphicsResource_t resource, + unsigned int arrayIndex, + unsigned int mipLevel); +typedef hipError_t (*t_hipGraphicsUnmapResources)(int count, hipGraphicsResource_t* resources, + hipStream_t stream); +typedef hipError_t (*t_hipGraphicsUnregisterResource)(hipGraphicsResource_t resource); +typedef hipError_t (*t_hipHostAlloc)(void** ptr, size_t size, unsigned int flags); +typedef hipError_t (*t_hipHostFree)(void* ptr); +typedef hipError_t (*t_hipHostGetDevicePointer)(void** devPtr, void* hstPtr, unsigned int flags); +typedef hipError_t (*t_hipHostGetFlags)(unsigned int* flagsPtr, void* hostPtr); +typedef hipError_t (*t_hipHostMalloc)(void** ptr, size_t size, unsigned int flags); +typedef hipError_t (*t_hipHostRegister)(void* hostPtr, size_t sizeBytes, unsigned int flags); +typedef hipError_t (*t_hipHostUnregister)(void* hostPtr); +typedef hipError_t (*t_hipImportExternalMemory)(hipExternalMemory_t* extMem_out, + const hipExternalMemoryHandleDesc* memHandleDesc); +typedef hipError_t (*t_hipImportExternalSemaphore)( + hipExternalSemaphore_t* extSem_out, const hipExternalSemaphoreHandleDesc* semHandleDesc); +typedef hipError_t (*t_hipInit)(unsigned int flags); +typedef hipError_t (*t_hipIpcCloseMemHandle)(void* devPtr); +typedef hipError_t (*t_hipIpcGetEventHandle)(hipIpcEventHandle_t* handle, hipEvent_t event); +typedef hipError_t (*t_hipIpcGetMemHandle)(hipIpcMemHandle_t* handle, void* devPtr); +typedef hipError_t (*t_hipIpcOpenEventHandle)(hipEvent_t* event, hipIpcEventHandle_t handle); +typedef hipError_t (*t_hipIpcOpenMemHandle)(void** devPtr, hipIpcMemHandle_t handle, + unsigned int flags); +typedef const char* (*t_hipKernelNameRef)(const hipFunction_t f); +typedef const char* (*t_hipKernelNameRefByPtr)(const void* hostFunction, hipStream_t stream); +typedef hipError_t (*t_hipLaunchByPtr)(const void* func); +typedef hipError_t (*t_hipLaunchCooperativeKernel)(const void* f, dim3 gridDim, dim3 blockDimX, + void** kernelParams, unsigned int sharedMemBytes, + hipStream_t stream); +typedef hipError_t (*t_hipLaunchCooperativeKernelMultiDevice)(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags); +typedef hipError_t (*t_hipLaunchHostFunc)(hipStream_t stream, hipHostFn_t fn, void* userData); +typedef hipError_t (*t_hipLaunchKernel)(const void* function_address, dim3 numBlocks, + dim3 dimBlocks, void** args, size_t sharedMemBytes, + hipStream_t stream); +typedef hipError_t (*t_hipMalloc)(void** ptr, size_t size); +typedef hipError_t (*t_hipMalloc3D)(hipPitchedPtr* pitchedDevPtr, hipExtent extent); +typedef hipError_t (*t_hipMalloc3DArray)(hipArray_t* array, const struct hipChannelFormatDesc* desc, + struct hipExtent extent, unsigned int flags); +typedef hipError_t (*t_hipMallocArray)(hipArray_t* array, const hipChannelFormatDesc* desc, + size_t width, size_t height, unsigned int flags); +typedef hipError_t (*t_hipMallocAsync)(void** dev_ptr, size_t size, hipStream_t stream); +typedef hipError_t (*t_hipMallocFromPoolAsync)(void** dev_ptr, size_t size, hipMemPool_t mem_pool, + hipStream_t stream); +typedef hipError_t (*t_hipMallocHost)(void** ptr, size_t size); +typedef hipError_t (*t_hipMallocManaged)(void** dev_ptr, size_t size, unsigned int flags); +typedef hipError_t (*t_hipMallocMipmappedArray)(hipMipmappedArray_t* mipmappedArray, + const struct hipChannelFormatDesc* desc, + struct hipExtent extent, unsigned int numLevels, + unsigned int flags); +typedef hipError_t (*t_hipMallocPitch)(void** ptr, size_t* pitch, size_t width, size_t height); +typedef hipError_t (*t_hipMemAddressFree)(void* devPtr, size_t size); +typedef hipError_t (*t_hipMemAddressReserve)(void** ptr, size_t size, size_t alignment, void* addr, + unsigned long long flags); +typedef hipError_t (*t_hipMemAdvise)(const void* dev_ptr, size_t count, hipMemoryAdvise advice, + int device); +typedef hipError_t (*t_hipMemAdvise_v2)(const void* dev_ptr, size_t count, hipMemoryAdvise advice, + hipMemLocation device); +typedef hipError_t (*t_hipMemAllocHost)(void** ptr, size_t size); +typedef hipError_t (*t_hipMemAllocPitch)(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, + size_t height, unsigned int elementSizeBytes); +typedef hipError_t (*t_hipMemCreate)(hipMemGenericAllocationHandle_t* handle, size_t size, + const hipMemAllocationProp* prop, unsigned long long flags); +typedef hipError_t (*t_hipMemExportToShareableHandle)(void* shareableHandle, + hipMemGenericAllocationHandle_t handle, + hipMemAllocationHandleType handleType, + unsigned long long flags); +typedef hipError_t (*t_hipMemGetAccess)(unsigned long long* flags, const hipMemLocation* location, + void* ptr); +typedef hipError_t (*t_hipMemGetAddressRange)(hipDeviceptr_t* pbase, size_t* psize, + hipDeviceptr_t dptr); +typedef hipError_t (*t_hipMemGetAllocationGranularity)(size_t* granularity, + const hipMemAllocationProp* prop, + hipMemAllocationGranularity_flags option); +typedef hipError_t (*t_hipMemGetAllocationPropertiesFromHandle)( + hipMemAllocationProp* prop, hipMemGenericAllocationHandle_t handle); +typedef hipError_t (*t_hipMemGetInfo)(size_t* free, size_t* total); +typedef hipError_t (*t_hipMemImportFromShareableHandle)(hipMemGenericAllocationHandle_t* handle, + void* osHandle, + hipMemAllocationHandleType shHandleType); +typedef hipError_t (*t_hipMemMap)(void* ptr, size_t size, size_t offset, + hipMemGenericAllocationHandle_t handle, unsigned long long flags); +typedef hipError_t (*t_hipMemMapArrayAsync)(hipArrayMapInfo* mapInfoList, unsigned int count, + hipStream_t stream); +typedef hipError_t (*t_hipMemPoolCreate)(hipMemPool_t* mem_pool, const hipMemPoolProps* pool_props); +typedef hipError_t (*t_hipMemPoolDestroy)(hipMemPool_t mem_pool); +typedef hipError_t (*t_hipMemPoolExportPointer)(hipMemPoolPtrExportData* export_data, + void* dev_ptr); +typedef hipError_t (*t_hipMemPoolExportToShareableHandle)(void* shared_handle, + hipMemPool_t mem_pool, + hipMemAllocationHandleType handle_type, + unsigned int flags); +typedef hipError_t (*t_hipMemPoolGetAccess)(hipMemAccessFlags* flags, hipMemPool_t mem_pool, + hipMemLocation* location); +typedef hipError_t (*t_hipMemPoolGetAttribute)(hipMemPool_t mem_pool, hipMemPoolAttr attr, + void* value); +typedef hipError_t (*t_hipMemPoolImportFromShareableHandle)(hipMemPool_t* mem_pool, + void* shared_handle, + hipMemAllocationHandleType handle_type, + unsigned int flags); +typedef hipError_t (*t_hipMemPoolImportPointer)(void** dev_ptr, hipMemPool_t mem_pool, + hipMemPoolPtrExportData* export_data); +typedef hipError_t (*t_hipMemPoolSetAccess)(hipMemPool_t mem_pool, + const hipMemAccessDesc* desc_list, size_t count); +typedef hipError_t (*t_hipMemPoolSetAttribute)(hipMemPool_t mem_pool, hipMemPoolAttr attr, + void* value); +typedef hipError_t (*t_hipMemPoolTrimTo)(hipMemPool_t mem_pool, size_t min_bytes_to_hold); +typedef hipError_t (*t_hipMemPrefetchAsync)(const void* dev_ptr, size_t count, int device, + hipStream_t stream); +typedef hipError_t (*t_hipMemPrefetchAsync_v2)(const void* dev_ptr, size_t count, + hipMemLocation location, unsigned int flags, + hipStream_t stream); +typedef hipError_t (*t_hipMemPtrGetInfo)(void* ptr, size_t* size); +typedef hipError_t (*t_hipMemRangeGetAttribute)(void* data, size_t data_size, + hipMemRangeAttribute attribute, const void* dev_ptr, + size_t count); +typedef hipError_t (*t_hipMemRangeGetAttributes)(void** data, size_t* data_sizes, + hipMemRangeAttribute* attributes, + size_t num_attributes, const void* dev_ptr, + size_t count); +typedef hipError_t (*t_hipMemRelease)(hipMemGenericAllocationHandle_t handle); +typedef hipError_t (*t_hipMemRetainAllocationHandle)(hipMemGenericAllocationHandle_t* handle, + void* addr); +typedef hipError_t (*t_hipMemSetAccess)(void* ptr, size_t size, const hipMemAccessDesc* desc, + size_t count); +typedef hipError_t (*t_hipMemUnmap)(void* ptr, size_t size); +typedef hipError_t (*t_hipMemcpy)(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind); +typedef hipError_t (*t_hipMemcpy2D)(void* dst, size_t dpitch, const void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind); +typedef hipError_t (*t_hipMemcpy2DAsync)(void* dst, size_t dpitch, const void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind, + hipStream_t stream); +typedef hipError_t (*t_hipMemcpy2DFromArray)(void* dst, size_t dpitch, hipArray_const_t src, + size_t wOffset, size_t hOffset, size_t width, + size_t height, hipMemcpyKind kind); +typedef hipError_t (*t_hipMemcpy2DFromArrayAsync)(void* dst, size_t dpitch, hipArray_const_t src, + size_t wOffset, size_t hOffset, size_t width, + size_t height, hipMemcpyKind kind, + hipStream_t stream); +typedef hipError_t (*t_hipMemcpy2DToArray)(hipArray_t dst, size_t wOffset, size_t hOffset, + const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind); +typedef hipError_t (*t_hipMemcpy2DToArrayAsync)(hipArray_t dst, size_t wOffset, size_t hOffset, + const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, + hipStream_t stream); +typedef hipError_t (*t_hipMemcpy3D)(const struct hipMemcpy3DParms* p); +typedef hipError_t (*t_hipMemcpy3DAsync)(const struct hipMemcpy3DParms* p, hipStream_t stream); +typedef hipError_t (*t_hipMemcpyAsync)(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream); +typedef hipError_t (*t_hipMemcpyAtoH)(void* dst, hipArray_t srcArray, size_t srcOffset, + size_t count); +typedef hipError_t (*t_hipMemcpyDtoD)(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes); +typedef hipError_t (*t_hipMemcpyDtoDAsync)(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, + hipStream_t stream); +typedef hipError_t (*t_hipMemcpyDtoH)(void* dst, hipDeviceptr_t src, size_t sizeBytes); +typedef hipError_t (*t_hipMemcpyDtoHAsync)(void* dst, hipDeviceptr_t src, size_t sizeBytes, + hipStream_t stream); +typedef hipError_t (*t_hipMemcpyFromArray)(void* dst, hipArray_const_t srcArray, size_t wOffset, + size_t hOffset, size_t count, hipMemcpyKind kind); +typedef hipError_t (*t_hipMemcpyFromSymbol)(void* dst, const void* symbol, size_t sizeBytes, + size_t offset, hipMemcpyKind kind); +typedef hipError_t (*t_hipMemcpyFromSymbolAsync)(void* dst, const void* symbol, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, + hipStream_t stream); +typedef hipError_t (*t_hipMemcpyHtoA)(hipArray_t dstArray, size_t dstOffset, const void* srcHost, + size_t count); +typedef hipError_t (*t_hipMemcpyHtoD)(hipDeviceptr_t dst, const void* src, size_t sizeBytes); +typedef hipError_t (*t_hipMemcpyHtoDAsync)(hipDeviceptr_t dst, const void* src, size_t sizeBytes, + hipStream_t stream); +typedef hipError_t (*t_hipMemcpyParam2D)(const hip_Memcpy2D* pCopy); +typedef hipError_t (*t_hipMemcpyParam2DAsync)(const hip_Memcpy2D* pCopy, hipStream_t stream); +typedef hipError_t (*t_hipMemcpyPeer)(void* dst, int dstDeviceId, const void* src, int srcDeviceId, + size_t sizeBytes); +typedef hipError_t (*t_hipMemcpyPeerAsync)(void* dst, int dstDeviceId, const void* src, + int srcDevice, size_t sizeBytes, hipStream_t stream); +typedef hipError_t (*t_hipMemcpyToArray)(hipArray_t dst, size_t wOffset, size_t hOffset, + const void* src, size_t count, hipMemcpyKind kind); +typedef hipError_t (*t_hipMemcpyToSymbol)(const void* symbol, const void* src, size_t sizeBytes, + size_t offset, hipMemcpyKind kind); +typedef hipError_t (*t_hipMemcpyToSymbolAsync)(const void* symbol, const void* src, + size_t sizeBytes, size_t offset, hipMemcpyKind kind, + hipStream_t stream); +typedef hipError_t (*t_hipMemcpyWithStream)(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream); +typedef hipError_t (*t_hipMemset)(void* dst, int value, size_t sizeBytes); +typedef hipError_t (*t_hipMemset2D)(void* dst, size_t pitch, int value, size_t width, + size_t height); +typedef hipError_t (*t_hipMemset2DAsync)(void* dst, size_t pitch, int value, size_t width, + size_t height, hipStream_t stream); +typedef hipError_t (*t_hipMemset3D)(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent); +typedef hipError_t (*t_hipMemset3DAsync)(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, + hipStream_t stream); +typedef hipError_t (*t_hipMemsetAsync)(void* dst, int value, size_t sizeBytes, hipStream_t stream); +typedef hipError_t (*t_hipMemsetD16)(hipDeviceptr_t dest, unsigned short value, size_t count); +typedef hipError_t (*t_hipMemsetD16Async)(hipDeviceptr_t dest, unsigned short value, size_t count, + hipStream_t stream); +typedef hipError_t (*t_hipMemsetD32)(hipDeviceptr_t dest, int value, size_t count); +typedef hipError_t (*t_hipMemsetD32Async)(hipDeviceptr_t dst, int value, size_t count, + hipStream_t stream); +typedef hipError_t (*t_hipMemsetD8)(hipDeviceptr_t dest, unsigned char value, size_t count); +typedef hipError_t (*t_hipMemsetD8Async)(hipDeviceptr_t dest, unsigned char value, size_t count, + hipStream_t stream); +typedef hipError_t (*t_hipMipmappedArrayCreate)(hipMipmappedArray_t* pHandle, + HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, + unsigned int numMipmapLevels); +typedef hipError_t (*t_hipMipmappedArrayDestroy)(hipMipmappedArray_t hMipmappedArray); +typedef hipError_t (*t_hipMipmappedArrayGetLevel)(hipArray_t* pLevelArray, + hipMipmappedArray_t hMipMappedArray, + unsigned int level); +typedef hipError_t (*t_hipModuleGetFunction)(hipFunction_t* function, hipModule_t module, + const char* kname); +typedef hipError_t (*t_hipModuleGetFunctionCount)(unsigned int* count, hipModule_t module); +typedef hipError_t (*t_hipModuleGetGlobal)(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, + const char* name); +typedef hipError_t (*t_hipModuleGetTexRef)(textureReference** texRef, hipModule_t hmod, + const char* name); +typedef hipError_t (*t_hipModuleLaunchCooperativeKernel)( + hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, + unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, + unsigned int sharedMemBytes, hipStream_t stream, void** kernelParams); +typedef hipError_t (*t_hipModuleLaunchCooperativeKernelMultiDevice)( + hipFunctionLaunchParams* launchParamsList, unsigned int numDevices, unsigned int flags); +typedef hipError_t (*t_hipModuleLaunchKernel)(hipFunction_t f, unsigned int gridDimX, + unsigned int gridDimY, unsigned int gridDimZ, + unsigned int blockDimX, unsigned int blockDimY, + unsigned int blockDimZ, unsigned int sharedMemBytes, + hipStream_t stream, void** kernelParams, + void** extra); +typedef hipError_t (*t_hipModuleLoad)(hipModule_t* module, const char* fname); +typedef hipError_t (*t_hipModuleLoadData)(hipModule_t* module, const void* image); +typedef hipError_t (*t_hipModuleLoadDataEx)(hipModule_t* module, const void* image, + unsigned int numOptions, hipJitOption* options, + void** optionValues); +typedef hipError_t (*t_hipLinkAddData)(hipLinkState_t state, hipJitInputType type, void* data, + size_t size, const char* name, unsigned int numOptions, + hipJitOption* options, void** optionValues); +typedef hipError_t (*t_hipLinkAddFile)(hipLinkState_t state, hipJitInputType type, const char* path, + unsigned int numOptions, hipJitOption* options, + void** optionValues); +typedef hipError_t (*t_hipLinkComplete)(hipLinkState_t state, void** hipBinOut, size_t* sizeOut); +typedef hipError_t (*t_hipLinkCreate)(unsigned int numOptions, hipJitOption* options, + void** optionValues, hipLinkState_t* stateOut); +typedef hipError_t (*t_hipLinkDestroy)(hipLinkState_t state); + +typedef hipError_t (*t_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor)( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk); +typedef hipError_t (*t_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags)( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags); +typedef hipError_t (*t_hipModuleOccupancyMaxPotentialBlockSize)(int* gridSize, int* blockSize, + hipFunction_t f, + size_t dynSharedMemPerBlk, + int blockSizeLimit); +typedef hipError_t (*t_hipModuleOccupancyMaxPotentialBlockSizeWithFlags)( + int* gridSize, int* blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit, + unsigned int flags); +typedef hipError_t (*t_hipModuleUnload)(hipModule_t module); +typedef hipError_t (*t_hipOccupancyAvailableDynamicSMemPerBlock)(size_t* dynamicSmemSize, const void* f, + int numBlocks, int blockSize); +typedef hipError_t (*t_hipOccupancyMaxActiveBlocksPerMultiprocessor)(int* numBlocks, const void* f, + int blockSize, + size_t dynSharedMemPerBlk); +typedef hipError_t (*t_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags)( + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags); +typedef hipError_t (*t_hipOccupancyMaxPotentialBlockSize)(int* gridSize, int* blockSize, + const void* f, size_t dynSharedMemPerBlk, + int blockSizeLimit); +typedef hipError_t (*t_hipPeekAtLastError)(void); +typedef hipError_t (*t_hipPointerGetAttribute)(void* data, hipPointer_attribute attribute, + hipDeviceptr_t ptr); +typedef hipError_t (*t_hipPointerGetAttributes)(hipPointerAttribute_t* attributes, const void* ptr); +typedef hipError_t (*t_hipPointerSetAttribute)(const void* value, hipPointer_attribute attribute, + hipDeviceptr_t ptr); +typedef hipError_t (*t_hipProfilerStart)(); +typedef hipError_t (*t_hipProfilerStop)(); +typedef hipError_t (*t_hipRuntimeGetVersion)(int* runtimeVersion); +typedef hipError_t (*t_hipSetDevice)(int deviceId); +typedef hipError_t (*t_hipSetDeviceFlags)(unsigned flags); +typedef hipError_t (*t_hipSetupArgument)(const void* arg, size_t size, size_t offset); +typedef hipError_t (*t_hipSignalExternalSemaphoresAsync)( + const hipExternalSemaphore_t* extSemArray, const hipExternalSemaphoreSignalParams* paramsArray, + unsigned int numExtSems, hipStream_t stream); +typedef hipError_t (*t_hipStreamAddCallback)(hipStream_t stream, hipStreamCallback_t callback, + void* userData, unsigned int flags); +typedef hipError_t (*t_hipStreamAttachMemAsync)(hipStream_t stream, void* dev_ptr, size_t length, + unsigned int flags); +typedef hipError_t (*t_hipStreamBeginCapture)(hipStream_t stream, hipStreamCaptureMode mode); +typedef hipError_t (*t_hipStreamCopyAttributes)(hipStream_t dst, hipStream_t src); +typedef hipError_t (*t_hipStreamCreate)(hipStream_t* stream); +typedef hipError_t (*t_hipStreamCreateWithFlags)(hipStream_t* stream, unsigned int flags); +typedef hipError_t (*t_hipStreamCreateWithPriority)(hipStream_t* stream, unsigned int flags, + int priority); +typedef hipError_t (*t_hipStreamDestroy)(hipStream_t stream); +typedef hipError_t (*t_hipStreamEndCapture)(hipStream_t stream, hipGraph_t* pGraph); +typedef hipError_t (*t_hipStreamGetCaptureInfo)(hipStream_t stream, + hipStreamCaptureStatus* pCaptureStatus, + unsigned long long* pId); +typedef hipError_t (*t_hipStreamGetCaptureInfo_v2)( + hipStream_t stream, hipStreamCaptureStatus* captureStatus_out, unsigned long long* id_out, + hipGraph_t* graph_out, const hipGraphNode_t** dependencies_out, size_t* numDependencies_out); +typedef hipError_t (*t_hipStreamGetDevice)(hipStream_t stream, hipDevice_t* device); +typedef hipError_t (*t_hipStreamGetFlags)(hipStream_t stream, unsigned int* flags); +typedef hipError_t (*t_hipStreamGetId)(hipStream_t stream, unsigned long long* streamId); +typedef hipError_t (*t_hipStreamGetPriority)(hipStream_t stream, int* priority); +typedef hipError_t (*t_hipStreamIsCapturing)(hipStream_t stream, + hipStreamCaptureStatus* pCaptureStatus); +typedef hipError_t (*t_hipStreamQuery)(hipStream_t stream); +typedef hipError_t (*t_hipStreamSynchronize)(hipStream_t stream); +typedef hipError_t (*t_hipStreamUpdateCaptureDependencies)(hipStream_t stream, + hipGraphNode_t* dependencies, + size_t numDependencies, + unsigned int flags); +typedef hipError_t (*t_hipStreamWaitEvent)(hipStream_t stream, hipEvent_t event, + unsigned int flags); +typedef hipError_t (*t_hipStreamWaitValue32)(hipStream_t stream, void* ptr, uint32_t value, + unsigned int flags, uint32_t mask); +typedef hipError_t (*t_hipStreamWaitValue64)(hipStream_t stream, void* ptr, uint64_t value, + unsigned int flags, uint64_t mask); +typedef hipError_t (*t_hipStreamWriteValue32)(hipStream_t stream, void* ptr, uint32_t value, + unsigned int flags); +typedef hipError_t (*t_hipStreamWriteValue64)(hipStream_t stream, void* ptr, uint64_t value, + unsigned int flags); +typedef hipError_t (*t_hipStreamBatchMemOp)(hipStream_t stream, unsigned int count, + hipStreamBatchMemOpParams* paramArray, + unsigned int flags); +typedef hipError_t (*t_hipTexObjectCreate)(hipTextureObject_t* pTexObject, + const HIP_RESOURCE_DESC* pResDesc, + const HIP_TEXTURE_DESC* pTexDesc, + const HIP_RESOURCE_VIEW_DESC* pResViewDesc); +typedef hipError_t (*t_hipTexObjectDestroy)(hipTextureObject_t texObject); +typedef hipError_t (*t_hipTexObjectGetResourceDesc)(HIP_RESOURCE_DESC* pResDesc, + hipTextureObject_t texObject); +typedef hipError_t (*t_hipTexObjectGetResourceViewDesc)(HIP_RESOURCE_VIEW_DESC* pResViewDesc, + hipTextureObject_t texObject); +typedef hipError_t (*t_hipTexObjectGetTextureDesc)(HIP_TEXTURE_DESC* pTexDesc, + hipTextureObject_t texObject); +typedef hipError_t (*t_hipTexRefGetAddress)(hipDeviceptr_t* dev_ptr, + const textureReference* texRef); +typedef hipError_t (*t_hipTexRefGetAddressMode)(enum hipTextureAddressMode* pam, + const textureReference* texRef, int dim); +typedef hipError_t (*t_hipTexRefGetFilterMode)(enum hipTextureFilterMode* pfm, + const textureReference* texRef); +typedef hipError_t (*t_hipTexRefGetFlags)(unsigned int* pFlags, const textureReference* texRef); +typedef hipError_t (*t_hipTexRefGetFormat)(hipArray_Format* pFormat, int* pNumChannels, + const textureReference* texRef); +typedef hipError_t (*t_hipTexRefGetMaxAnisotropy)(int* pmaxAnsio, const textureReference* texRef); +typedef hipError_t (*t_hipTexRefGetMipMappedArray)(hipMipmappedArray_t* pArray, + const textureReference* texRef); +typedef hipError_t (*t_hipTexRefGetMipmapFilterMode)(enum hipTextureFilterMode* pfm, + const textureReference* texRef); +typedef hipError_t (*t_hipTexRefGetMipmapLevelBias)(float* pbias, const textureReference* texRef); +typedef hipError_t (*t_hipTexRefGetMipmapLevelClamp)(float* pminMipmapLevelClamp, + float* pmaxMipmapLevelClamp, + const textureReference* texRef); +typedef hipError_t (*t_hipTexRefSetAddress)(size_t* ByteOffset, textureReference* texRef, + hipDeviceptr_t dptr, size_t bytes); +typedef hipError_t (*t_hipTexRefSetAddress2D)(textureReference* texRef, + const HIP_ARRAY_DESCRIPTOR* desc, hipDeviceptr_t dptr, + size_t Pitch); +typedef hipError_t (*t_hipTexRefSetAddressMode)(textureReference* texRef, int dim, + enum hipTextureAddressMode am); +typedef hipError_t (*t_hipTexRefSetArray)(textureReference* tex, hipArray_const_t array, + unsigned int flags); +typedef hipError_t (*t_hipTexRefSetBorderColor)(textureReference* texRef, float* pBorderColor); +typedef hipError_t (*t_hipTexRefSetFilterMode)(textureReference* texRef, + enum hipTextureFilterMode fm); +typedef hipError_t (*t_hipTexRefSetFlags)(textureReference* texRef, unsigned int Flags); +typedef hipError_t (*t_hipTexRefSetFormat)(textureReference* texRef, hipArray_Format fmt, + int NumPackedComponents); +typedef hipError_t (*t_hipTexRefSetMaxAnisotropy)(textureReference* texRef, unsigned int maxAniso); +typedef hipError_t (*t_hipTexRefSetMipmapFilterMode)(textureReference* texRef, + enum hipTextureFilterMode fm); +typedef hipError_t (*t_hipTexRefSetMipmapLevelBias)(textureReference* texRef, float bias); +typedef hipError_t (*t_hipTexRefSetMipmapLevelClamp)(textureReference* texRef, + float minMipMapLevelClamp, + float maxMipMapLevelClamp); +typedef hipError_t (*t_hipTexRefSetMipmappedArray)(textureReference* texRef, + struct hipMipmappedArray* mipmappedArray, + unsigned int Flags); +typedef hipError_t (*t_hipThreadExchangeStreamCaptureMode)(hipStreamCaptureMode* mode); +typedef hipError_t (*t_hipUnbindTexture)(const textureReference* tex); +typedef hipError_t (*t_hipUserObjectCreate)(hipUserObject_t* object_out, void* ptr, + hipHostFn_t destroy, unsigned int initialRefcount, + unsigned int flags); +typedef hipError_t (*t_hipUserObjectRelease)(hipUserObject_t object, unsigned int count); +typedef hipError_t (*t_hipUserObjectRetain)(hipUserObject_t object, unsigned int count); +typedef hipError_t (*t_hipWaitExternalSemaphoresAsync)( + const hipExternalSemaphore_t* extSemArray, const hipExternalSemaphoreWaitParams* paramsArray, + unsigned int numExtSems, hipStream_t stream); + +typedef hipError_t (*t_hipMemcpy_spt)(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind); + +typedef hipError_t (*t_hipMemcpyToSymbol_spt)(const void* symbol, const void* src, size_t sizeBytes, + size_t offset, hipMemcpyKind kind); + +typedef hipError_t (*t_hipMemcpyFromSymbol_spt)(void* dst, const void* symbol, size_t sizeBytes, + size_t offset, hipMemcpyKind kind); + +typedef hipError_t (*t_hipMemcpy2D_spt)(void* dst, size_t dpitch, const void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind); + +typedef hipError_t (*t_hipMemcpy2DFromArray_spt)(void* dst, size_t dpitch, hipArray_const_t src, + size_t wOffset, size_t hOffset, size_t width, + size_t height, hipMemcpyKind kind); + +typedef hipError_t (*t_hipMemcpy3D_spt)(const struct hipMemcpy3DParms* p); + +typedef hipError_t (*t_hipMemset_spt)(void* dst, int value, size_t sizeBytes); + +typedef hipError_t (*t_hipMemsetAsync_spt)(void* dst, int value, size_t sizeBytes, + hipStream_t stream); + +typedef hipError_t (*t_hipMemset2D_spt)(void* dst, size_t pitch, int value, size_t width, + size_t height); + +typedef hipError_t (*t_hipMemset2DAsync_spt)(void* dst, size_t pitch, int value, size_t width, + size_t height, hipStream_t stream); + +typedef hipError_t (*t_hipMemset3DAsync_spt)(hipPitchedPtr pitchedDevPtr, int value, + hipExtent extent, hipStream_t stream); + +typedef hipError_t (*t_hipMemset3D_spt)(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent); + +typedef hipError_t (*t_hipMemcpyAsync_spt)(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream); + +typedef hipError_t (*t_hipMemcpy3DAsync_spt)(const hipMemcpy3DParms* p, hipStream_t stream); + +typedef hipError_t (*t_hipMemcpy2DAsync_spt)(void* dst, size_t dpitch, const void* src, + size_t spitch, size_t width, size_t height, + hipMemcpyKind kind, hipStream_t stream); + +typedef hipError_t (*t_hipMemcpyFromSymbolAsync_spt)(void* dst, const void* symbol, + size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream); + +typedef hipError_t (*t_hipMemcpyToSymbolAsync_spt)(const void* symbol, const void* src, + size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream); + +typedef hipError_t (*t_hipMemcpyFromArray_spt)(void* dst, hipArray_const_t src, size_t wOffsetSrc, + size_t hOffset, size_t count, hipMemcpyKind kind); + +typedef hipError_t (*t_hipMemcpy2DToArray_spt)(hipArray_t dst, size_t wOffset, size_t hOffset, + const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind); + +typedef hipError_t (*t_hipMemcpy2DFromArrayAsync_spt)(void* dst, size_t dpitch, + hipArray_const_t src, size_t wOffsetSrc, + size_t hOffsetSrc, size_t width, + size_t height, hipMemcpyKind kind, + hipStream_t stream); + +typedef hipError_t (*t_hipMemcpy2DToArrayAsync_spt)(hipArray_t dst, size_t wOffset, size_t hOffset, + const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, + hipStream_t stream); + +typedef hipError_t (*t_hipStreamQuery_spt)(hipStream_t stream); + +typedef hipError_t (*t_hipStreamSynchronize_spt)(hipStream_t stream); + +typedef hipError_t (*t_hipStreamGetPriority_spt)(hipStream_t stream, int* priority); + +typedef hipError_t (*t_hipStreamWaitEvent_spt)(hipStream_t stream, hipEvent_t event, + unsigned int flags); + +typedef hipError_t (*t_hipStreamGetFlags_spt)(hipStream_t stream, unsigned int* flags); + +typedef hipError_t (*t_hipStreamAddCallback_spt)(hipStream_t stream, hipStreamCallback_t callback, + void* userData, unsigned int flags); +typedef hipError_t (*t_hipEventRecord_spt)(hipEvent_t event, hipStream_t stream); +typedef hipError_t (*t_hipLaunchCooperativeKernel_spt)(const void* f, dim3 gridDim, dim3 blockDim, + void** kernelParams, uint32_t sharedMemBytes, + hipStream_t hStream); + +typedef hipError_t (*t_hipLaunchKernel_spt)(const void* function_address, dim3 numBlocks, + dim3 dimBlocks, void** args, size_t sharedMemBytes, + hipStream_t stream); + +typedef hipError_t (*t_hipGraphLaunch_spt)(hipGraphExec_t graphExec, hipStream_t stream); +typedef hipError_t (*t_hipStreamBeginCapture_spt)(hipStream_t stream, hipStreamCaptureMode mode); +typedef hipError_t (*t_hipStreamEndCapture_spt)(hipStream_t stream, hipGraph_t* pGraph); +typedef hipError_t (*t_hipStreamIsCapturing_spt)(hipStream_t stream, + hipStreamCaptureStatus* pCaptureStatus); +typedef hipError_t (*t_hipStreamGetCaptureInfo_spt)(hipStream_t stream, + hipStreamCaptureStatus* pCaptureStatus, + unsigned long long* pId); +typedef hipError_t (*t_hipStreamGetCaptureInfo_v2_spt)( + hipStream_t stream, hipStreamCaptureStatus* captureStatus_out, unsigned long long* id_out, + hipGraph_t* graph_out, const hipGraphNode_t** dependencies_out, size_t* numDependencies_out); +typedef hipError_t (*t_hipLaunchHostFunc_spt)(hipStream_t stream, hipHostFn_t fn, void* userData); +typedef hipChannelFormatDesc (*t_hipCreateChannelDesc)(int x, int y, int z, int w, + hipChannelFormatKind f); +typedef hipError_t (*t_hipExtModuleLaunchKernel)(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, + void** extra, hipEvent_t startEvent, + hipEvent_t stopEvent, uint32_t flags); +typedef hipError_t (*t_hipHccModuleLaunchKernel)(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, + void** extra, hipEvent_t startEvent, + hipEvent_t stopEvent); +typedef int (*t_hipGetStreamDeviceId)(hipStream_t stream); +typedef hipError_t (*t_hipDrvGraphAddMemsetNode)(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, + const hipGraphNode_t* dependencies, + size_t numDependencies, + const hipMemsetParams* memsetParams, hipCtx_t ctx); +typedef hipError_t (*t_hipGraphAddExternalSemaphoresWaitNode)( + hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, + size_t numDependencies, const hipExternalSemaphoreWaitNodeParams* nodeParams); +typedef hipError_t (*t_hipGraphAddExternalSemaphoresSignalNode)( + hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, + size_t numDependencies, const hipExternalSemaphoreSignalNodeParams* nodeParams); +typedef hipError_t (*t_hipGraphExternalSemaphoresSignalNodeSetParams)( + hipGraphNode_t hNode, const hipExternalSemaphoreSignalNodeParams* nodeParams); +typedef hipError_t (*t_hipGraphExternalSemaphoresWaitNodeSetParams)( + hipGraphNode_t hNode, const hipExternalSemaphoreWaitNodeParams* nodeParams); +typedef hipError_t (*t_hipGraphExternalSemaphoresSignalNodeGetParams)( + hipGraphNode_t hNode, hipExternalSemaphoreSignalNodeParams* params_out); +typedef hipError_t (*t_hipGraphExternalSemaphoresWaitNodeGetParams)( + hipGraphNode_t hNode, hipExternalSemaphoreWaitNodeParams* params_out); +typedef hipError_t (*t_hipGraphExecExternalSemaphoresSignalNodeSetParams)( + hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const hipExternalSemaphoreSignalNodeParams* nodeParams); +typedef hipError_t (*t_hipGraphExecExternalSemaphoresWaitNodeSetParams)( + hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const hipExternalSemaphoreWaitNodeParams* nodeParams); +typedef hipError_t (*t_hipGraphAddNode)(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipGraphNodeParams* nodeParams); +typedef hipError_t (*t_hipGraphInstantiateWithParams)(hipGraphExec_t* pGraphExec, hipGraph_t graph, + hipGraphInstantiateParams* instantiateParams); +typedef hipError_t (*t_hipExtGetLastError)(); +typedef hipError_t (*t_hipTexRefGetBorderColor)(float* pBorderColor, + const textureReference* texRef); +typedef hipError_t (*t_hipTexRefGetArray)(hipArray_t* pArray, const textureReference* texRef); + +typedef hipError_t (*t_hipTexRefGetBorderColor)(float* pBorderColor, + const textureReference* texRef); +typedef hipError_t (*t_hipTexRefGetArray)(hipArray_t* pArray, const textureReference* texRef); +typedef hipError_t (*t_hipGetProcAddress)(const char* symbol, void** pfn, int hipVersion, + uint64_t flags, + hipDriverProcAddressQueryResult* symbolStatus); +typedef hipError_t (*t_hipStreamBeginCaptureToGraph)(hipStream_t stream, hipGraph_t graph, + const hipGraphNode_t* dependencies, + const hipGraphEdgeData* dependencyData, + size_t numDependencies, + hipStreamCaptureMode mode); +typedef hipError_t (*t_hipGetFuncBySymbol)(hipFunction_t* functionPtr, const void* symbolPtr); +typedef hipError_t (*t_hipDrvGraphAddMemFreeNode)(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, + const hipGraphNode_t* dependencies, + size_t numDependencies, hipDeviceptr_t dptr); + +typedef hipError_t (*t_hipDrvGraphExecMemcpyNodeSetParams)(hipGraphExec_t hGraphExec, + hipGraphNode_t hNode, + const HIP_MEMCPY3D* copyParams, + hipCtx_t ctx); + +typedef hipError_t (*t_hipDrvGraphExecMemsetNodeSetParams)(hipGraphExec_t hGraphExec, + hipGraphNode_t hNode, + const hipMemsetParams* memsetParams, + hipCtx_t ctx); +typedef hipError_t (*t_hipSetValidDevices)(int* device_arr, int len); +typedef hipError_t (*t_hipMemcpyAtoD)(hipDeviceptr_t dstDevice, hipArray_t srcArray, + size_t srcOffset, size_t ByteCount); +typedef hipError_t (*t_hipMemcpyDtoA)(hipArray_t dstArray, size_t dstOffset, + hipDeviceptr_t srcDevice, size_t ByteCount); +typedef hipError_t (*t_hipMemcpyAtoA)(hipArray_t dstArray, size_t dstOffset, hipArray_t srcArray, + size_t srcOffset, size_t ByteCount); +typedef hipError_t (*t_hipMemcpyAtoHAsync)(void* dstHost, hipArray_t srcArray, size_t srcOffset, + size_t ByteCount, hipStream_t stream); +typedef hipError_t (*t_hipMemcpyHtoAAsync)(hipArray_t dstArray, size_t dstOffset, + const void* srcHost, size_t ByteCount, + hipStream_t stream); +typedef hipError_t (*t_hipMemcpy2DArrayToArray)(hipArray_t dst, size_t wOffsetDst, + size_t hOffsetDst, hipArray_const_t src, + size_t wOffsetSrc, size_t hOffsetSrc, size_t width, + size_t height, hipMemcpyKind kind); + + +typedef hipError_t (*t_hipGraphExecGetFlags)(hipGraphExec_t graphExec, unsigned long long* flags); +typedef hipError_t (*t_hipGraphNodeSetParams)(hipGraphNode_t node, hipGraphNodeParams* nodeParams); +typedef hipError_t (*t_hipGraphExecNodeSetParams)(hipGraphExec_t graphExec, hipGraphNode_t node, + hipGraphNodeParams* nodeParams); + + +typedef hipError_t (*t_hipExternalMemoryGetMappedMipmappedArray)( + hipMipmappedArray_t* mipmap, hipExternalMemory_t extMem, + const hipExternalMemoryMipmappedArrayDesc* mipmapDesc); +typedef hipError_t (*t_hipDrvGraphMemcpyNodeGetParams)(hipGraphNode_t hNode, + HIP_MEMCPY3D* nodeParams); + +typedef hipError_t (*t_hipDrvGraphMemcpyNodeSetParams)(hipGraphNode_t hNode, + const HIP_MEMCPY3D* nodeParams); + +typedef hipError_t (*t_hipExtHostAlloc)(void** ptr, size_t size, unsigned int flags); + +typedef hipError_t (*t_hipDeviceGetTexture1DLinearMaxWidth)(size_t* maxWidthInElements, + const hipChannelFormatDesc* fmtDesc, + int device); + +typedef hipError_t (*t_hipGraphAddBatchMemOpNode)(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, + const hipGraphNode_t* dependencies, + size_t numDependencies, + const hipBatchMemOpNodeParams* nodeParams); +typedef hipError_t (*t_hipGraphBatchMemOpNodeGetParams)(hipGraphNode_t hNode, + hipBatchMemOpNodeParams* nodeParams_out); +typedef hipError_t (*t_hipGraphBatchMemOpNodeSetParams)(hipGraphNode_t hNode, + hipBatchMemOpNodeParams* nodeParams); +typedef hipError_t (*t_hipGraphExecBatchMemOpNodeSetParams)( + hipGraphExec_t hGraphExec, hipGraphNode_t hNode, const hipBatchMemOpNodeParams* nodeParams); +typedef hipError_t (*t_hipEventRecordWithFlags)(hipEvent_t event, hipStream_t stream, + unsigned int flags); +typedef hipError_t (*t_hipLaunchKernelExC)(const hipLaunchConfig_t* config, const void* fPtr, + void** args); +typedef hipError_t (*t_hipDrvLaunchKernelEx)(const HIP_LAUNCH_CONFIG* config, hipFunction_t f, + void** params, void** extra); + +typedef hipError_t (*t_hipMemGetHandleForAddressRange)(void* handle, hipDeviceptr_t dptr, + size_t size, + hipMemRangeHandleType handleType, + unsigned long long flags); +typedef hipError_t (*t_hipMemsetD2D8)(hipDeviceptr_t dst, size_t dstPitch, unsigned char value, + size_t width, size_t height); +typedef hipError_t (*t_hipMemsetD2D8Async)(hipDeviceptr_t dst, size_t dstPitch, unsigned char value, + size_t width, size_t height, hipStream_t stream); +typedef hipError_t (*t_hipMemsetD2D16)(hipDeviceptr_t dst, size_t dstPitch, unsigned short value, + size_t width, size_t height); +typedef hipError_t (*t_hipMemsetD2D16Async)(hipDeviceptr_t dst, size_t dstPitch, + unsigned short value, size_t width, size_t height, + hipStream_t stream); +typedef hipError_t (*t_hipMemsetD2D32)(hipDeviceptr_t dst, size_t dstPitch, unsigned int value, + size_t width, size_t height); +typedef hipError_t (*t_hipMemsetD2D32Async)(hipDeviceptr_t dst, size_t dstPitch, unsigned int value, + size_t width, size_t height, hipStream_t stream); +typedef hipError_t (*t_hipStreamSetAttribute)(hipStream_t stream, hipStreamAttrID attr, + const hipStreamAttrValue* value); +typedef hipError_t (*t_hipStreamGetAttribute)(hipStream_t stream, hipStreamAttrID attr, + hipStreamAttrValue* value_out); +typedef hipError_t (*t_hipModuleLoadFatBinary)(hipModule_t* module, const void* fatbin); +typedef hipError_t (*t_hipMemcpyBatchAsync)(void** dsts, void** srcs, size_t* sizes, size_t count, + hipMemcpyAttributes* attrs, size_t* attrsIdxs, + size_t numAttrs, size_t* failIdx, hipStream_t stream); +typedef hipError_t (*t_hipMemcpy3DBatchAsync)(size_t numOps, struct hipMemcpy3DBatchOp* opList, + size_t* failIdx, unsigned long long flags, + hipStream_t stream); +typedef hipError_t (*t_hipMemcpy3DPeer)(hipMemcpy3DPeerParms* p); +typedef hipError_t (*t_hipMemcpy3DPeerAsync)(hipMemcpy3DPeerParms* p, hipStream_t stream); + +typedef hipError_t (*t_hipGetDriverEntryPoint)(const char* symbol, void** funcPtr, + unsigned long long flags, + hipDriverEntryPointQueryResult* status); +typedef hipError_t (*t_hipGetDriverEntryPoint_spt)(const char* symbol, void** funcPtr, + unsigned long long flags, + hipDriverEntryPointQueryResult* status); +typedef hipError_t (*t_hipLibraryLoadData)(hipLibrary_t* library, const void* code, + hipJitOption* jitOptions, void** jitOptionsValues, + unsigned int numJitOptions, + hipLibraryOption* libraryOptions, + void** libraryOptionValues, + unsigned int numLibraryOptions); +typedef hipError_t (*t_hipLibraryLoadFromFile)(hipLibrary_t* library, const char* fileName, + hipJitOption* jitOptions, void** jitOptionsValues, + unsigned int numJitOptions, + hipLibraryOption* libraryOptions, + void** libraryOptionValues, + unsigned int numLibraryOptions); +typedef hipError_t (*t_hipLibraryUnload)(hipLibrary_t library); +typedef hipError_t (*t_hipLibraryGetKernel)(hipKernel_t* pKernel, hipLibrary_t library, + const char* name); +typedef hipError_t (*t_hipLibraryGetKernelCount)(unsigned int *count, + hipLibrary_t library); +typedef hipError_t (*t_hipLibraryEnumerateKernels)(hipKernel_t* kernels, unsigned int numKernels, + hipLibrary_t library); +typedef hipError_t (*t_hipKernelGetLibrary)(hipLibrary_t* library, hipKernel_t kernel); +typedef hipError_t (*t_hipKernelGetName)(const char** name, hipKernel_t kernel); +typedef hipError_t (*t_hipGetProcAddress_spt)(const char* symbol, void** pfn, int hipVersion, uint64_t flags, + hipDriverProcAddressQueryResult* symbolStatus); +typedef hipError_t (*t_hipExtDisableLogging)(); +typedef hipError_t (*t_hipExtEnableLogging)(); +typedef hipError_t (*t_hipExtSetLoggingParams)(size_t log_level, size_t log_size, size_t log_mask); +typedef hipError_t (*t_hipKernelGetAttribute)(int* pi, hipFunction_attribute attrib, hipKernel_t kernel, + hipDevice_t dev); +typedef hipError_t (*t_hipKernelSetAttribute)(hipFunction_attribute attrib, + int value, hipKernel_t kernel, hipDevice_t dev); + +typedef hipError_t (*t_hipKernelGetFunction)(hipFunction_t* pFunc, hipKernel_t kernel); + + +typedef hipError_t (*t_hipKernelGetParamInfo)(hipKernel_t kernel, size_t paramIndex, + size_t* paramOffset, size_t* paramSize); +typedef hipError_t (*t_hipMemSetMemPool)(hipMemLocation* location, hipMemAllocationType type, + hipMemPool_t pool); +typedef hipError_t (*t_hipMemGetMemPool)(hipMemPool_t* pool, hipMemLocation* location, + hipMemAllocationType type); +typedef hipError_t (*t_hipMipmappedArrayGetMemoryRequirements)( + hipArrayMemoryRequirements* memoryRequirements, hipMipmappedArray_t mipmap, hipDevice_t device); +// HIP Compiler dispatch table +struct HipCompilerDispatchTable { + // HIP_COMPILER_API_TABLE_STEP_VERSION == 0 + size_t size; + t___hipPopCallConfiguration __hipPopCallConfiguration_fn; + t___hipPushCallConfiguration __hipPushCallConfiguration_fn; + t___hipRegisterFatBinary __hipRegisterFatBinary_fn; + t___hipRegisterFunction __hipRegisterFunction_fn; + t___hipRegisterManagedVar __hipRegisterManagedVar_fn; + t___hipRegisterSurface __hipRegisterSurface_fn; + t___hipRegisterTexture __hipRegisterTexture_fn; + t___hipRegisterVar __hipRegisterVar_fn; + t___hipUnregisterFatBinary __hipUnregisterFatBinary_fn; + + // DO NOT EDIT ABOVE! + // HIP_COMPILER_API_TABLE_STEP_VERSION == 1 + + // ******************************************************************************************* // + // + // READ BELOW + // + // ******************************************************************************************* // + // KEEP AT END OF STRUCT + // 1) DO NOT REORDER ANY EXIST MEMBERS + // 2) INCREASE STEP VERSION DEFINE BEFORE ADDING NEW MEMBERS + // 3) INSERT NEW MEMBERS UNDER APPROPRIATE STEP VERSION COMMENT + // 4) GENERATE COMMENT FOR NEXT STEP VERSION + // 5) ADD "DO NOT EDIT ABOVE!" COMMENT + // ******************************************************************************************* // +}; + +// HIP API dispatch table +struct HipDispatchTable { + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 0 + size_t size; + t_hipApiName hipApiName_fn; + t_hipArray3DCreate hipArray3DCreate_fn; + t_hipArray3DGetDescriptor hipArray3DGetDescriptor_fn; + t_hipArrayCreate hipArrayCreate_fn; + t_hipArrayDestroy hipArrayDestroy_fn; + t_hipArrayGetDescriptor hipArrayGetDescriptor_fn; + t_hipArrayGetInfo hipArrayGetInfo_fn; + t_hipBindTexture hipBindTexture_fn; + t_hipBindTexture2D hipBindTexture2D_fn; + t_hipBindTextureToArray hipBindTextureToArray_fn; + t_hipBindTextureToMipmappedArray hipBindTextureToMipmappedArray_fn; + t_hipChooseDevice hipChooseDevice_fn; + t_hipChooseDeviceR0000 hipChooseDeviceR0000_fn; + t_hipConfigureCall hipConfigureCall_fn; + t_hipCreateSurfaceObject hipCreateSurfaceObject_fn; + t_hipCreateTextureObject hipCreateTextureObject_fn; + t_hipCtxCreate hipCtxCreate_fn; + t_hipCtxDestroy hipCtxDestroy_fn; + t_hipCtxDisablePeerAccess hipCtxDisablePeerAccess_fn; + t_hipCtxEnablePeerAccess hipCtxEnablePeerAccess_fn; + t_hipCtxGetApiVersion hipCtxGetApiVersion_fn; + t_hipCtxGetCacheConfig hipCtxGetCacheConfig_fn; + t_hipCtxGetCurrent hipCtxGetCurrent_fn; + t_hipCtxGetDevice hipCtxGetDevice_fn; + t_hipCtxGetFlags hipCtxGetFlags_fn; + t_hipCtxGetSharedMemConfig hipCtxGetSharedMemConfig_fn; + t_hipCtxPopCurrent hipCtxPopCurrent_fn; + t_hipCtxPushCurrent hipCtxPushCurrent_fn; + t_hipCtxSetCacheConfig hipCtxSetCacheConfig_fn; + t_hipCtxSetCurrent hipCtxSetCurrent_fn; + t_hipCtxSetSharedMemConfig hipCtxSetSharedMemConfig_fn; + t_hipCtxSynchronize hipCtxSynchronize_fn; + t_hipDestroyExternalMemory hipDestroyExternalMemory_fn; + t_hipDestroyExternalSemaphore hipDestroyExternalSemaphore_fn; + t_hipDestroySurfaceObject hipDestroySurfaceObject_fn; + t_hipDestroyTextureObject hipDestroyTextureObject_fn; + t_hipDeviceCanAccessPeer hipDeviceCanAccessPeer_fn; + t_hipDeviceComputeCapability hipDeviceComputeCapability_fn; + t_hipDeviceDisablePeerAccess hipDeviceDisablePeerAccess_fn; + t_hipDeviceEnablePeerAccess hipDeviceEnablePeerAccess_fn; + t_hipDeviceGet hipDeviceGet_fn; + t_hipDeviceGetAttribute hipDeviceGetAttribute_fn; + t_hipDeviceGetByPCIBusId hipDeviceGetByPCIBusId_fn; + t_hipDeviceGetCacheConfig hipDeviceGetCacheConfig_fn; + t_hipDeviceGetDefaultMemPool hipDeviceGetDefaultMemPool_fn; + t_hipDeviceGetGraphMemAttribute hipDeviceGetGraphMemAttribute_fn; + t_hipDeviceGetLimit hipDeviceGetLimit_fn; + t_hipDeviceGetMemPool hipDeviceGetMemPool_fn; + t_hipDeviceGetName hipDeviceGetName_fn; + t_hipDeviceGetP2PAttribute hipDeviceGetP2PAttribute_fn; + t_hipDeviceGetPCIBusId hipDeviceGetPCIBusId_fn; + t_hipDeviceGetSharedMemConfig hipDeviceGetSharedMemConfig_fn; + t_hipDeviceGetStreamPriorityRange hipDeviceGetStreamPriorityRange_fn; + t_hipDeviceGetUuid hipDeviceGetUuid_fn; + t_hipDeviceGraphMemTrim hipDeviceGraphMemTrim_fn; + t_hipDevicePrimaryCtxGetState hipDevicePrimaryCtxGetState_fn; + t_hipDevicePrimaryCtxRelease hipDevicePrimaryCtxRelease_fn; + t_hipDevicePrimaryCtxReset hipDevicePrimaryCtxReset_fn; + t_hipDevicePrimaryCtxRetain hipDevicePrimaryCtxRetain_fn; + t_hipDevicePrimaryCtxSetFlags hipDevicePrimaryCtxSetFlags_fn; + t_hipDeviceReset hipDeviceReset_fn; + t_hipDeviceSetCacheConfig hipDeviceSetCacheConfig_fn; + t_hipDeviceSetGraphMemAttribute hipDeviceSetGraphMemAttribute_fn; + t_hipDeviceSetLimit hipDeviceSetLimit_fn; + t_hipDeviceSetMemPool hipDeviceSetMemPool_fn; + t_hipDeviceSetSharedMemConfig hipDeviceSetSharedMemConfig_fn; + t_hipDeviceSynchronize hipDeviceSynchronize_fn; + t_hipDeviceTotalMem hipDeviceTotalMem_fn; + t_hipDriverGetVersion hipDriverGetVersion_fn; + t_hipDrvGetErrorName hipDrvGetErrorName_fn; + t_hipDrvGetErrorString hipDrvGetErrorString_fn; + t_hipDrvGraphAddMemcpyNode hipDrvGraphAddMemcpyNode_fn; + t_hipDrvMemcpy2DUnaligned hipDrvMemcpy2DUnaligned_fn; + t_hipDrvMemcpy3D hipDrvMemcpy3D_fn; + t_hipDrvMemcpy3DAsync hipDrvMemcpy3DAsync_fn; + t_hipDrvPointerGetAttributes hipDrvPointerGetAttributes_fn; + t_hipEventCreate hipEventCreate_fn; + t_hipEventCreateWithFlags hipEventCreateWithFlags_fn; + t_hipEventDestroy hipEventDestroy_fn; + t_hipEventElapsedTime hipEventElapsedTime_fn; + t_hipEventQuery hipEventQuery_fn; + t_hipEventRecord hipEventRecord_fn; + t_hipEventSynchronize hipEventSynchronize_fn; + t_hipExtGetLinkTypeAndHopCount hipExtGetLinkTypeAndHopCount_fn; + t_hipExtLaunchKernel hipExtLaunchKernel_fn; + t_hipExtLaunchMultiKernelMultiDevice hipExtLaunchMultiKernelMultiDevice_fn; + t_hipExtMallocWithFlags hipExtMallocWithFlags_fn; + t_hipExtStreamCreateWithCUMask hipExtStreamCreateWithCUMask_fn; + t_hipExtStreamGetCUMask hipExtStreamGetCUMask_fn; + t_hipExternalMemoryGetMappedBuffer hipExternalMemoryGetMappedBuffer_fn; + t_hipFree hipFree_fn; + t_hipFreeArray hipFreeArray_fn; + t_hipFreeAsync hipFreeAsync_fn; + t_hipFreeHost hipFreeHost_fn; + t_hipFreeMipmappedArray hipFreeMipmappedArray_fn; + t_hipFuncGetAttribute hipFuncGetAttribute_fn; + t_hipFuncGetAttributes hipFuncGetAttributes_fn; + t_hipFuncSetAttribute hipFuncSetAttribute_fn; + t_hipFuncSetCacheConfig hipFuncSetCacheConfig_fn; + t_hipFuncSetSharedMemConfig hipFuncSetSharedMemConfig_fn; + t_hipGLGetDevices hipGLGetDevices_fn; + t_hipGetChannelDesc hipGetChannelDesc_fn; + t_hipGetDevice hipGetDevice_fn; + t_hipGetDeviceCount hipGetDeviceCount_fn; + t_hipGetDeviceFlags hipGetDeviceFlags_fn; + t_hipGetDevicePropertiesR0600 hipGetDevicePropertiesR0600_fn; + t_hipGetDevicePropertiesR0000 hipGetDevicePropertiesR0000_fn; + t_hipGetErrorName hipGetErrorName_fn; + t_hipGetErrorString hipGetErrorString_fn; + t_hipGetLastError hipGetLastError_fn; + t_hipGetMipmappedArrayLevel hipGetMipmappedArrayLevel_fn; + t_hipGetSymbolAddress hipGetSymbolAddress_fn; + t_hipGetSymbolSize hipGetSymbolSize_fn; + t_hipGetTextureAlignmentOffset hipGetTextureAlignmentOffset_fn; + t_hipGetTextureObjectResourceDesc hipGetTextureObjectResourceDesc_fn; + t_hipGetTextureObjectResourceViewDesc hipGetTextureObjectResourceViewDesc_fn; + t_hipGetTextureObjectTextureDesc hipGetTextureObjectTextureDesc_fn; + t_hipGetTextureReference hipGetTextureReference_fn; + t_hipGraphAddChildGraphNode hipGraphAddChildGraphNode_fn; + t_hipGraphAddDependencies hipGraphAddDependencies_fn; + t_hipGraphAddEmptyNode hipGraphAddEmptyNode_fn; + t_hipGraphAddEventRecordNode hipGraphAddEventRecordNode_fn; + t_hipGraphAddEventWaitNode hipGraphAddEventWaitNode_fn; + t_hipGraphAddHostNode hipGraphAddHostNode_fn; + t_hipGraphAddKernelNode hipGraphAddKernelNode_fn; + t_hipGraphAddMemAllocNode hipGraphAddMemAllocNode_fn; + t_hipGraphAddMemFreeNode hipGraphAddMemFreeNode_fn; + t_hipGraphAddMemcpyNode hipGraphAddMemcpyNode_fn; + t_hipGraphAddMemcpyNode1D hipGraphAddMemcpyNode1D_fn; + t_hipGraphAddMemcpyNodeFromSymbol hipGraphAddMemcpyNodeFromSymbol_fn; + t_hipGraphAddMemcpyNodeToSymbol hipGraphAddMemcpyNodeToSymbol_fn; + t_hipGraphAddMemsetNode hipGraphAddMemsetNode_fn; + t_hipGraphChildGraphNodeGetGraph hipGraphChildGraphNodeGetGraph_fn; + t_hipGraphClone hipGraphClone_fn; + t_hipGraphCreate hipGraphCreate_fn; + t_hipGraphDebugDotPrint hipGraphDebugDotPrint_fn; + t_hipGraphDestroy hipGraphDestroy_fn; + t_hipGraphDestroyNode hipGraphDestroyNode_fn; + t_hipGraphEventRecordNodeGetEvent hipGraphEventRecordNodeGetEvent_fn; + t_hipGraphEventRecordNodeSetEvent hipGraphEventRecordNodeSetEvent_fn; + t_hipGraphEventWaitNodeGetEvent hipGraphEventWaitNodeGetEvent_fn; + t_hipGraphEventWaitNodeSetEvent hipGraphEventWaitNodeSetEvent_fn; + t_hipGraphExecChildGraphNodeSetParams hipGraphExecChildGraphNodeSetParams_fn; + t_hipGraphExecDestroy hipGraphExecDestroy_fn; + t_hipGraphExecEventRecordNodeSetEvent hipGraphExecEventRecordNodeSetEvent_fn; + t_hipGraphExecEventWaitNodeSetEvent hipGraphExecEventWaitNodeSetEvent_fn; + t_hipGraphExecHostNodeSetParams hipGraphExecHostNodeSetParams_fn; + t_hipGraphExecKernelNodeSetParams hipGraphExecKernelNodeSetParams_fn; + t_hipGraphExecMemcpyNodeSetParams hipGraphExecMemcpyNodeSetParams_fn; + t_hipGraphExecMemcpyNodeSetParams1D hipGraphExecMemcpyNodeSetParams1D_fn; + t_hipGraphExecMemcpyNodeSetParamsFromSymbol hipGraphExecMemcpyNodeSetParamsFromSymbol_fn; + t_hipGraphExecMemcpyNodeSetParamsToSymbol hipGraphExecMemcpyNodeSetParamsToSymbol_fn; + t_hipGraphExecMemsetNodeSetParams hipGraphExecMemsetNodeSetParams_fn; + t_hipGraphExecUpdate hipGraphExecUpdate_fn; + t_hipGraphGetEdges hipGraphGetEdges_fn; + t_hipGraphGetNodes hipGraphGetNodes_fn; + t_hipGraphGetRootNodes hipGraphGetRootNodes_fn; + t_hipGraphHostNodeGetParams hipGraphHostNodeGetParams_fn; + t_hipGraphHostNodeSetParams hipGraphHostNodeSetParams_fn; + t_hipGraphInstantiate hipGraphInstantiate_fn; + t_hipGraphInstantiateWithFlags hipGraphInstantiateWithFlags_fn; + t_hipGraphKernelNodeCopyAttributes hipGraphKernelNodeCopyAttributes_fn; + t_hipGraphKernelNodeGetAttribute hipGraphKernelNodeGetAttribute_fn; + t_hipGraphKernelNodeGetParams hipGraphKernelNodeGetParams_fn; + t_hipGraphKernelNodeSetAttribute hipGraphKernelNodeSetAttribute_fn; + t_hipGraphKernelNodeSetParams hipGraphKernelNodeSetParams_fn; + t_hipGraphLaunch hipGraphLaunch_fn; + t_hipGraphMemAllocNodeGetParams hipGraphMemAllocNodeGetParams_fn; + t_hipGraphMemFreeNodeGetParams hipGraphMemFreeNodeGetParams_fn; + t_hipGraphMemcpyNodeGetParams hipGraphMemcpyNodeGetParams_fn; + t_hipGraphMemcpyNodeSetParams hipGraphMemcpyNodeSetParams_fn; + t_hipGraphMemcpyNodeSetParams1D hipGraphMemcpyNodeSetParams1D_fn; + t_hipGraphMemcpyNodeSetParamsFromSymbol hipGraphMemcpyNodeSetParamsFromSymbol_fn; + t_hipGraphMemcpyNodeSetParamsToSymbol hipGraphMemcpyNodeSetParamsToSymbol_fn; + t_hipGraphMemsetNodeGetParams hipGraphMemsetNodeGetParams_fn; + t_hipGraphMemsetNodeSetParams hipGraphMemsetNodeSetParams_fn; + t_hipGraphNodeFindInClone hipGraphNodeFindInClone_fn; + t_hipGraphNodeGetDependencies hipGraphNodeGetDependencies_fn; + t_hipGraphNodeGetDependentNodes hipGraphNodeGetDependentNodes_fn; + t_hipGraphNodeGetEnabled hipGraphNodeGetEnabled_fn; + t_hipGraphNodeGetType hipGraphNodeGetType_fn; + t_hipGraphNodeSetEnabled hipGraphNodeSetEnabled_fn; + t_hipGraphReleaseUserObject hipGraphReleaseUserObject_fn; + t_hipGraphRemoveDependencies hipGraphRemoveDependencies_fn; + t_hipGraphRetainUserObject hipGraphRetainUserObject_fn; + t_hipGraphUpload hipGraphUpload_fn; + t_hipGraphicsGLRegisterBuffer hipGraphicsGLRegisterBuffer_fn; + t_hipGraphicsGLRegisterImage hipGraphicsGLRegisterImage_fn; + t_hipGraphicsMapResources hipGraphicsMapResources_fn; + t_hipGraphicsResourceGetMappedPointer hipGraphicsResourceGetMappedPointer_fn; + t_hipGraphicsSubResourceGetMappedArray hipGraphicsSubResourceGetMappedArray_fn; + t_hipGraphicsUnmapResources hipGraphicsUnmapResources_fn; + t_hipGraphicsUnregisterResource hipGraphicsUnregisterResource_fn; + t_hipHostAlloc hipHostAlloc_fn; + t_hipHostFree hipHostFree_fn; + t_hipHostGetDevicePointer hipHostGetDevicePointer_fn; + t_hipHostGetFlags hipHostGetFlags_fn; + t_hipHostMalloc hipHostMalloc_fn; + t_hipHostRegister hipHostRegister_fn; + t_hipHostUnregister hipHostUnregister_fn; + t_hipImportExternalMemory hipImportExternalMemory_fn; + t_hipImportExternalSemaphore hipImportExternalSemaphore_fn; + t_hipInit hipInit_fn; + t_hipIpcCloseMemHandle hipIpcCloseMemHandle_fn; + t_hipIpcGetEventHandle hipIpcGetEventHandle_fn; + t_hipIpcGetMemHandle hipIpcGetMemHandle_fn; + t_hipIpcOpenEventHandle hipIpcOpenEventHandle_fn; + t_hipIpcOpenMemHandle hipIpcOpenMemHandle_fn; + t_hipKernelNameRef hipKernelNameRef_fn; + t_hipKernelNameRefByPtr hipKernelNameRefByPtr_fn; + t_hipLaunchByPtr hipLaunchByPtr_fn; + t_hipLaunchCooperativeKernel hipLaunchCooperativeKernel_fn; + t_hipLaunchCooperativeKernelMultiDevice hipLaunchCooperativeKernelMultiDevice_fn; + t_hipLaunchHostFunc hipLaunchHostFunc_fn; + t_hipLaunchKernel hipLaunchKernel_fn; + t_hipMalloc hipMalloc_fn; + t_hipMalloc3D hipMalloc3D_fn; + t_hipMalloc3DArray hipMalloc3DArray_fn; + t_hipMallocArray hipMallocArray_fn; + t_hipMallocAsync hipMallocAsync_fn; + t_hipMallocFromPoolAsync hipMallocFromPoolAsync_fn; + t_hipMallocHost hipMallocHost_fn; + t_hipMallocManaged hipMallocManaged_fn; + t_hipMallocMipmappedArray hipMallocMipmappedArray_fn; + t_hipMallocPitch hipMallocPitch_fn; + t_hipMemAddressFree hipMemAddressFree_fn; + t_hipMemAddressReserve hipMemAddressReserve_fn; + t_hipMemAdvise hipMemAdvise_fn; + t_hipMemAllocHost hipMemAllocHost_fn; + t_hipMemAllocPitch hipMemAllocPitch_fn; + t_hipMemCreate hipMemCreate_fn; + t_hipMemExportToShareableHandle hipMemExportToShareableHandle_fn; + t_hipMemGetAccess hipMemGetAccess_fn; + t_hipMemGetAddressRange hipMemGetAddressRange_fn; + t_hipMemGetAllocationGranularity hipMemGetAllocationGranularity_fn; + t_hipMemGetAllocationPropertiesFromHandle hipMemGetAllocationPropertiesFromHandle_fn; + t_hipMemGetInfo hipMemGetInfo_fn; + t_hipMemImportFromShareableHandle hipMemImportFromShareableHandle_fn; + t_hipMemMap hipMemMap_fn; + t_hipMemMapArrayAsync hipMemMapArrayAsync_fn; + t_hipMemPoolCreate hipMemPoolCreate_fn; + t_hipMemPoolDestroy hipMemPoolDestroy_fn; + t_hipMemPoolExportPointer hipMemPoolExportPointer_fn; + t_hipMemPoolExportToShareableHandle hipMemPoolExportToShareableHandle_fn; + t_hipMemPoolGetAccess hipMemPoolGetAccess_fn; + t_hipMemPoolGetAttribute hipMemPoolGetAttribute_fn; + t_hipMemPoolImportFromShareableHandle hipMemPoolImportFromShareableHandle_fn; + t_hipMemPoolImportPointer hipMemPoolImportPointer_fn; + t_hipMemPoolSetAccess hipMemPoolSetAccess_fn; + t_hipMemPoolSetAttribute hipMemPoolSetAttribute_fn; + t_hipMemPoolTrimTo hipMemPoolTrimTo_fn; + t_hipMemPrefetchAsync hipMemPrefetchAsync_fn; + t_hipMemPtrGetInfo hipMemPtrGetInfo_fn; + t_hipMemRangeGetAttribute hipMemRangeGetAttribute_fn; + t_hipMemRangeGetAttributes hipMemRangeGetAttributes_fn; + t_hipMemRelease hipMemRelease_fn; + t_hipMemRetainAllocationHandle hipMemRetainAllocationHandle_fn; + t_hipMemSetAccess hipMemSetAccess_fn; + t_hipMemUnmap hipMemUnmap_fn; + t_hipMemcpy hipMemcpy_fn; + t_hipMemcpy2D hipMemcpy2D_fn; + t_hipMemcpy2DAsync hipMemcpy2DAsync_fn; + t_hipMemcpy2DFromArray hipMemcpy2DFromArray_fn; + t_hipMemcpy2DFromArrayAsync hipMemcpy2DFromArrayAsync_fn; + t_hipMemcpy2DToArray hipMemcpy2DToArray_fn; + t_hipMemcpy2DToArrayAsync hipMemcpy2DToArrayAsync_fn; + t_hipMemcpy3D hipMemcpy3D_fn; + t_hipMemcpy3DAsync hipMemcpy3DAsync_fn; + t_hipMemcpyAsync hipMemcpyAsync_fn; + t_hipMemcpyAtoH hipMemcpyAtoH_fn; + t_hipMemcpyDtoD hipMemcpyDtoD_fn; + t_hipMemcpyDtoDAsync hipMemcpyDtoDAsync_fn; + t_hipMemcpyDtoH hipMemcpyDtoH_fn; + t_hipMemcpyDtoHAsync hipMemcpyDtoHAsync_fn; + t_hipMemcpyFromArray hipMemcpyFromArray_fn; + t_hipMemcpyFromSymbol hipMemcpyFromSymbol_fn; + t_hipMemcpyFromSymbolAsync hipMemcpyFromSymbolAsync_fn; + t_hipMemcpyHtoA hipMemcpyHtoA_fn; + t_hipMemcpyHtoD hipMemcpyHtoD_fn; + t_hipMemcpyHtoDAsync hipMemcpyHtoDAsync_fn; + t_hipMemcpyParam2D hipMemcpyParam2D_fn; + t_hipMemcpyParam2DAsync hipMemcpyParam2DAsync_fn; + t_hipMemcpyPeer hipMemcpyPeer_fn; + t_hipMemcpyPeerAsync hipMemcpyPeerAsync_fn; + t_hipMemcpyToArray hipMemcpyToArray_fn; + t_hipMemcpyToSymbol hipMemcpyToSymbol_fn; + t_hipMemcpyToSymbolAsync hipMemcpyToSymbolAsync_fn; + t_hipMemcpyWithStream hipMemcpyWithStream_fn; + t_hipMemset hipMemset_fn; + t_hipMemset2D hipMemset2D_fn; + t_hipMemset2DAsync hipMemset2DAsync_fn; + t_hipMemset3D hipMemset3D_fn; + t_hipMemset3DAsync hipMemset3DAsync_fn; + t_hipMemsetAsync hipMemsetAsync_fn; + t_hipMemsetD16 hipMemsetD16_fn; + t_hipMemsetD16Async hipMemsetD16Async_fn; + t_hipMemsetD32 hipMemsetD32_fn; + t_hipMemsetD32Async hipMemsetD32Async_fn; + t_hipMemsetD8 hipMemsetD8_fn; + t_hipMemsetD8Async hipMemsetD8Async_fn; + t_hipMipmappedArrayCreate hipMipmappedArrayCreate_fn; + t_hipMipmappedArrayDestroy hipMipmappedArrayDestroy_fn; + t_hipMipmappedArrayGetLevel hipMipmappedArrayGetLevel_fn; + t_hipModuleGetFunction hipModuleGetFunction_fn; + t_hipModuleGetGlobal hipModuleGetGlobal_fn; + t_hipModuleGetTexRef hipModuleGetTexRef_fn; + t_hipModuleLaunchCooperativeKernel hipModuleLaunchCooperativeKernel_fn; + t_hipModuleLaunchCooperativeKernelMultiDevice hipModuleLaunchCooperativeKernelMultiDevice_fn; + t_hipModuleLaunchKernel hipModuleLaunchKernel_fn; + t_hipModuleLoad hipModuleLoad_fn; + t_hipModuleLoadData hipModuleLoadData_fn; + t_hipModuleLoadDataEx hipModuleLoadDataEx_fn; + t_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor + hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_fn; + t_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags + hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_fn; + t_hipModuleOccupancyMaxPotentialBlockSize hipModuleOccupancyMaxPotentialBlockSize_fn; + t_hipModuleOccupancyMaxPotentialBlockSizeWithFlags + hipModuleOccupancyMaxPotentialBlockSizeWithFlags_fn; + t_hipModuleUnload hipModuleUnload_fn; + t_hipOccupancyMaxActiveBlocksPerMultiprocessor hipOccupancyMaxActiveBlocksPerMultiprocessor_fn; + t_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags + hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_fn; + t_hipOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize_fn; + t_hipPeekAtLastError hipPeekAtLastError_fn; + t_hipPointerGetAttribute hipPointerGetAttribute_fn; + t_hipPointerGetAttributes hipPointerGetAttributes_fn; + t_hipPointerSetAttribute hipPointerSetAttribute_fn; + t_hipProfilerStart hipProfilerStart_fn; + t_hipProfilerStop hipProfilerStop_fn; + t_hipRuntimeGetVersion hipRuntimeGetVersion_fn; + t_hipSetDevice hipSetDevice_fn; + t_hipSetDeviceFlags hipSetDeviceFlags_fn; + t_hipSetupArgument hipSetupArgument_fn; + t_hipSignalExternalSemaphoresAsync hipSignalExternalSemaphoresAsync_fn; + t_hipStreamAddCallback hipStreamAddCallback_fn; + t_hipStreamAttachMemAsync hipStreamAttachMemAsync_fn; + t_hipStreamBeginCapture hipStreamBeginCapture_fn; + t_hipStreamCreate hipStreamCreate_fn; + t_hipStreamCreateWithFlags hipStreamCreateWithFlags_fn; + t_hipStreamCreateWithPriority hipStreamCreateWithPriority_fn; + t_hipStreamDestroy hipStreamDestroy_fn; + t_hipStreamEndCapture hipStreamEndCapture_fn; + t_hipStreamGetCaptureInfo hipStreamGetCaptureInfo_fn; + t_hipStreamGetCaptureInfo_v2 hipStreamGetCaptureInfo_v2_fn; + t_hipStreamGetDevice hipStreamGetDevice_fn; + t_hipStreamGetFlags hipStreamGetFlags_fn; + t_hipStreamGetPriority hipStreamGetPriority_fn; + t_hipStreamIsCapturing hipStreamIsCapturing_fn; + t_hipStreamQuery hipStreamQuery_fn; + t_hipStreamSynchronize hipStreamSynchronize_fn; + t_hipStreamUpdateCaptureDependencies hipStreamUpdateCaptureDependencies_fn; + t_hipStreamWaitEvent hipStreamWaitEvent_fn; + t_hipStreamWaitValue32 hipStreamWaitValue32_fn; + t_hipStreamWaitValue64 hipStreamWaitValue64_fn; + t_hipStreamWriteValue32 hipStreamWriteValue32_fn; + t_hipStreamWriteValue64 hipStreamWriteValue64_fn; + t_hipTexObjectCreate hipTexObjectCreate_fn; + t_hipTexObjectDestroy hipTexObjectDestroy_fn; + t_hipTexObjectGetResourceDesc hipTexObjectGetResourceDesc_fn; + t_hipTexObjectGetResourceViewDesc hipTexObjectGetResourceViewDesc_fn; + t_hipTexObjectGetTextureDesc hipTexObjectGetTextureDesc_fn; + t_hipTexRefGetAddress hipTexRefGetAddress_fn; + t_hipTexRefGetAddressMode hipTexRefGetAddressMode_fn; + t_hipTexRefGetFilterMode hipTexRefGetFilterMode_fn; + t_hipTexRefGetFlags hipTexRefGetFlags_fn; + t_hipTexRefGetFormat hipTexRefGetFormat_fn; + t_hipTexRefGetMaxAnisotropy hipTexRefGetMaxAnisotropy_fn; + t_hipTexRefGetMipMappedArray hipTexRefGetMipMappedArray_fn; + t_hipTexRefGetMipmapFilterMode hipTexRefGetMipmapFilterMode_fn; + t_hipTexRefGetMipmapLevelBias hipTexRefGetMipmapLevelBias_fn; + t_hipTexRefGetMipmapLevelClamp hipTexRefGetMipmapLevelClamp_fn; + t_hipTexRefSetAddress hipTexRefSetAddress_fn; + t_hipTexRefSetAddress2D hipTexRefSetAddress2D_fn; + t_hipTexRefSetAddressMode hipTexRefSetAddressMode_fn; + t_hipTexRefSetArray hipTexRefSetArray_fn; + t_hipTexRefSetBorderColor hipTexRefSetBorderColor_fn; + t_hipTexRefSetFilterMode hipTexRefSetFilterMode_fn; + t_hipTexRefSetFlags hipTexRefSetFlags_fn; + t_hipTexRefSetFormat hipTexRefSetFormat_fn; + t_hipTexRefSetMaxAnisotropy hipTexRefSetMaxAnisotropy_fn; + t_hipTexRefSetMipmapFilterMode hipTexRefSetMipmapFilterMode_fn; + t_hipTexRefSetMipmapLevelBias hipTexRefSetMipmapLevelBias_fn; + t_hipTexRefSetMipmapLevelClamp hipTexRefSetMipmapLevelClamp_fn; + t_hipTexRefSetMipmappedArray hipTexRefSetMipmappedArray_fn; + t_hipThreadExchangeStreamCaptureMode hipThreadExchangeStreamCaptureMode_fn; + t_hipUnbindTexture hipUnbindTexture_fn; + t_hipUserObjectCreate hipUserObjectCreate_fn; + t_hipUserObjectRelease hipUserObjectRelease_fn; + t_hipUserObjectRetain hipUserObjectRetain_fn; + t_hipWaitExternalSemaphoresAsync hipWaitExternalSemaphoresAsync_fn; + t_hipCreateChannelDesc hipCreateChannelDesc_fn; + t_hipExtModuleLaunchKernel hipExtModuleLaunchKernel_fn; + t_hipHccModuleLaunchKernel hipHccModuleLaunchKernel_fn; + t_hipMemcpy_spt hipMemcpy_spt_fn; + t_hipMemcpyToSymbol_spt hipMemcpyToSymbol_spt_fn; + t_hipMemcpyFromSymbol_spt hipMemcpyFromSymbol_spt_fn; + t_hipMemcpy2D_spt hipMemcpy2D_spt_fn; + t_hipMemcpy2DFromArray_spt hipMemcpy2DFromArray_spt_fn; + t_hipMemcpy3D_spt hipMemcpy3D_spt_fn; + t_hipMemset_spt hipMemset_spt_fn; + t_hipMemsetAsync_spt hipMemsetAsync_spt_fn; + t_hipMemset2D_spt hipMemset2D_spt_fn; + t_hipMemset2DAsync_spt hipMemset2DAsync_spt_fn; + t_hipMemset3DAsync_spt hipMemset3DAsync_spt_fn; + t_hipMemset3D_spt hipMemset3D_spt_fn; + t_hipMemcpyAsync_spt hipMemcpyAsync_spt_fn; + t_hipMemcpy3DAsync_spt hipMemcpy3DAsync_spt_fn; + t_hipMemcpy2DAsync_spt hipMemcpy2DAsync_spt_fn; + t_hipMemcpyFromSymbolAsync_spt hipMemcpyFromSymbolAsync_spt_fn; + t_hipMemcpyToSymbolAsync_spt hipMemcpyToSymbolAsync_spt_fn; + t_hipMemcpyFromArray_spt hipMemcpyFromArray_spt_fn; + t_hipMemcpy2DToArray_spt hipMemcpy2DToArray_spt_fn; + t_hipMemcpy2DFromArrayAsync_spt hipMemcpy2DFromArrayAsync_spt_fn; + t_hipMemcpy2DToArrayAsync_spt hipMemcpy2DToArrayAsync_spt_fn; + t_hipStreamQuery_spt hipStreamQuery_spt_fn; + t_hipStreamSynchronize_spt hipStreamSynchronize_spt_fn; + t_hipStreamGetPriority_spt hipStreamGetPriority_spt_fn; + t_hipStreamWaitEvent_spt hipStreamWaitEvent_spt_fn; + t_hipStreamGetFlags_spt hipStreamGetFlags_spt_fn; + t_hipStreamAddCallback_spt hipStreamAddCallback_spt_fn; + t_hipEventRecord_spt hipEventRecord_spt_fn; + t_hipLaunchCooperativeKernel_spt hipLaunchCooperativeKernel_spt_fn; + t_hipLaunchKernel_spt hipLaunchKernel_spt_fn; + t_hipGraphLaunch_spt hipGraphLaunch_spt_fn; + t_hipStreamBeginCapture_spt hipStreamBeginCapture_spt_fn; + t_hipStreamEndCapture_spt hipStreamEndCapture_spt_fn; + t_hipStreamIsCapturing_spt hipStreamIsCapturing_spt_fn; + t_hipStreamGetCaptureInfo_spt hipStreamGetCaptureInfo_spt_fn; + t_hipStreamGetCaptureInfo_v2_spt hipStreamGetCaptureInfo_v2_spt_fn; + t_hipLaunchHostFunc_spt hipLaunchHostFunc_spt_fn; + t_hipGetStreamDeviceId hipGetStreamDeviceId_fn; + t_hipDrvGraphAddMemsetNode hipDrvGraphAddMemsetNode_fn; + t_hipGraphAddExternalSemaphoresWaitNode hipGraphAddExternalSemaphoresWaitNode_fn; + t_hipGraphAddExternalSemaphoresSignalNode hipGraphAddExternalSemaphoresSignalNode_fn; + t_hipGraphExternalSemaphoresSignalNodeSetParams hipGraphExternalSemaphoresSignalNodeSetParams_fn; + t_hipGraphExternalSemaphoresWaitNodeSetParams hipGraphExternalSemaphoresWaitNodeSetParams_fn; + t_hipGraphExternalSemaphoresSignalNodeGetParams hipGraphExternalSemaphoresSignalNodeGetParams_fn; + t_hipGraphExternalSemaphoresWaitNodeGetParams hipGraphExternalSemaphoresWaitNodeGetParams_fn; + t_hipGraphExecExternalSemaphoresSignalNodeSetParams + hipGraphExecExternalSemaphoresSignalNodeSetParams_fn; + t_hipGraphExecExternalSemaphoresWaitNodeSetParams + hipGraphExecExternalSemaphoresWaitNodeSetParams_fn; + t_hipGraphAddNode hipGraphAddNode_fn; + t_hipGraphInstantiateWithParams hipGraphInstantiateWithParams_fn; + t_hipExtGetLastError hipExtGetLastError_fn; + t_hipTexRefGetBorderColor hipTexRefGetBorderColor_fn; + t_hipTexRefGetArray hipTexRefGetArray_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 1 + t_hipGetProcAddress hipGetProcAddress_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 2 + t_hipStreamBeginCaptureToGraph hipStreamBeginCaptureToGraph_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 3 + t_hipGetFuncBySymbol hipGetFuncBySymbol_fn; + t_hipSetValidDevices hipSetValidDevices_fn; + t_hipMemcpyAtoD hipMemcpyAtoD_fn; + t_hipMemcpyDtoA hipMemcpyDtoA_fn; + t_hipMemcpyAtoA hipMemcpyAtoA_fn; + t_hipMemcpyAtoHAsync hipMemcpyAtoHAsync_fn; + t_hipMemcpyHtoAAsync hipMemcpyHtoAAsync_fn; + t_hipMemcpy2DArrayToArray hipMemcpy2DArrayToArray_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 4 + t_hipDrvGraphAddMemFreeNode hipDrvGraphAddMemFreeNode_fn; + t_hipDrvGraphExecMemcpyNodeSetParams hipDrvGraphExecMemcpyNodeSetParams_fn; + t_hipDrvGraphExecMemsetNodeSetParams hipDrvGraphExecMemsetNodeSetParams_fn; + t_hipGraphExecGetFlags hipGraphExecGetFlags_fn; + t_hipGraphNodeSetParams hipGraphNodeSetParams_fn; + t_hipGraphExecNodeSetParams hipGraphExecNodeSetParams_fn; + t_hipExternalMemoryGetMappedMipmappedArray hipExternalMemoryGetMappedMipmappedArray_fn; + t_hipDrvGraphMemcpyNodeGetParams hipDrvGraphMemcpyNodeGetParams_fn; + t_hipDrvGraphMemcpyNodeSetParams hipDrvGraphMemcpyNodeSetParams_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 5 + t_hipExtHostAlloc hipExtHostAlloc_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 6 + t_hipDeviceGetTexture1DLinearMaxWidth hipDeviceGetTexture1DLinearMaxWidth_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 7 + t_hipStreamBatchMemOp hipStreamBatchMemOp_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 8 + t_hipGraphAddBatchMemOpNode hipGraphAddBatchMemOpNode_fn; + t_hipGraphBatchMemOpNodeGetParams hipGraphBatchMemOpNodeGetParams_fn; + t_hipGraphBatchMemOpNodeSetParams hipGraphBatchMemOpNodeSetParams_fn; + t_hipGraphExecBatchMemOpNodeSetParams hipGraphExecBatchMemOpNodeSetParams_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 9 + t_hipLinkAddData hipLinkAddData_fn; + t_hipLinkAddFile hipLinkAddFile_fn; + t_hipLinkComplete hipLinkComplete_fn; + t_hipLinkCreate hipLinkCreate_fn; + t_hipLinkDestroy hipLinkDestroy_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 10 + t_hipEventRecordWithFlags hipEventRecordWithFlags_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION = 11 + t_hipLaunchKernelExC hipLaunchKernelExC_fn; + t_hipDrvLaunchKernelEx hipDrvLaunchKernelEx_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION = 12 + t_hipMemGetHandleForAddressRange hipMemGetHandleForAddressRange_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION = 13 + // removed HIP_MEMSET_NODE_PARAMS replaced by hipMemsetParams + + // HIP_RUNTIME_API_TABLE_STEP_VERSION = 14 + t_hipModuleGetFunctionCount hipModuleGetFunctionCount_fn; + t_hipMemsetD2D8 hipMemsetD2D8_fn; + t_hipMemsetD2D8Async hipMemsetD2D8Async_fn; + t_hipMemsetD2D16 hipMemsetD2D16_fn; + t_hipMemsetD2D16Async hipMemsetD2D16Async_fn; + t_hipMemsetD2D32 hipMemsetD2D32_fn; + t_hipMemsetD2D32Async hipMemsetD2D32Async_fn; + t_hipStreamGetAttribute hipStreamGetAttribute_fn; + t_hipStreamSetAttribute hipStreamSetAttribute_fn; + t_hipModuleLoadFatBinary hipModuleLoadFatBinary_fn; + t_hipMemcpyBatchAsync hipMemcpyBatchAsync_fn; + t_hipMemcpy3DBatchAsync hipMemcpy3DBatchAsync_fn; + t_hipMemcpy3DPeer hipMemcpy3DPeer_fn; + t_hipMemcpy3DPeerAsync hipMemcpy3DPeerAsync_fn; + t_hipGetDriverEntryPoint hipGetDriverEntryPoint_fn; + t_hipGetDriverEntryPoint_spt hipGetDriverEntryPoint_spt_fn; + t_hipMemPrefetchAsync_v2 hipMemPrefetchAsync_v2_fn; + t_hipMemAdvise_v2 hipMemAdvise_v2_fn; + t_hipStreamGetId hipStreamGetId_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION = 15 + t_hipLibraryLoadData hipLibraryLoadData_fn; + t_hipLibraryLoadFromFile hipLibraryLoadFromFile_fn; + t_hipLibraryUnload hipLibraryUnload_fn; + t_hipLibraryGetKernel hipLibraryGetKernel_fn; + t_hipLibraryGetKernelCount hipLibraryGetKernelCount_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION = 16 + t_hipStreamCopyAttributes hipStreamCopyAttributes_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION = 17 + t_hipLibraryEnumerateKernels hipLibraryEnumerateKernels_fn; + t_hipKernelGetLibrary hipKernelGetLibrary_fn; + t_hipKernelGetName hipKernelGetName_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 18 + t_hipOccupancyAvailableDynamicSMemPerBlock hipOccupancyAvailableDynamicSMemPerBlock_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 19 + t_hipGetProcAddress_spt hipGetProcAddress_spt_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 20 + t_hipKernelGetParamInfo hipKernelGetParamInfo_fn; + + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 21 + t_hipExtDisableLogging hipExtDisableLogging_fn; + t_hipExtEnableLogging hipExtEnableLogging_fn; + t_hipExtSetLoggingParams hipExtSetLoggingParams_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 22 + t_hipMemSetMemPool hipMemSetMemPool_fn; + t_hipMemGetMemPool hipMemGetMemPool_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 23 + t_hipMipmappedArrayGetMemoryRequirements hipMipmappedArrayGetMemoryRequirements_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 24 + t_hipKernelGetAttribute hipKernelGetAttribute_fn; + + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 25 + t_hipKernelSetAttribute hipKernelSetAttribute_fn; + t_hipKernelGetFunction hipKernelGetFunction_fn; + + // DO NOT EDIT ABOVE! + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 25 + + // ******************************************************************************************* // + // + // READ BELOW + // + // ******************************************************************************************* // + // KEEP AT END OF STRUCT + // 1) DO NOT REORDER ANY EXIST MEMBERS + // 2) INCREASE STEP VERSION DEFINE BEFORE ADDING NEW MEMBERS + // 3) INSERT NEW MEMBERS UNDER APPROPRIATE STEP VERSION COMMENT + // 4) GENERATE COMMENT FOR NEXT STEP VERSION + // 5) ADD "DO NOT EDIT ABOVE!" COMMENT + // ******************************************************************************************* // +}; + +// HIP Tools dispatch table +struct HipToolsDispatchTable { + // HIP_TOOLS_API_TABLE_STEP_VERSION == 0 + size_t size; + t___hipReportDevices __hipReportDevices_fn; + + // DO NOT EDIT ABOVE! + // HIP_TOOLS_API_TABLE_STEP_VERSION == 1 + + // ******************************************************************************************* // + // + // READ BELOW + // + // ******************************************************************************************* // + // KEEP AT END OF STRUCT + // 1) DO NOT REORDER ANY EXIST MEMBERS + // 2) INCREASE STEP VERSION DEFINE BEFORE ADDING NEW MEMBERS + // 3) INSERT NEW MEMBERS UNDER APPROPRIATE STEP VERSION COMMENT + // 4) GENERATE COMMENT FOR NEXT STEP VERSION + // 5) ADD "DO NOT EDIT ABOVE!" COMMENT + // ******************************************************************************************* // +}; diff --git a/3rdparty/hip-headers/include/hip/amd_detail/hip_assert.h b/3rdparty/hip-headers/include/hip/amd_detail/hip_assert.h new file mode 100644 index 0000000000..00ed9efa38 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/hip_assert.h @@ -0,0 +1,95 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if defined(__clang__) and defined(__HIP__) + +// abort +extern "C" __device__ inline __attribute__((weak)) void abort() { __builtin_trap(); } + +// The noinline attribute helps encapsulate the printf expansion, +// which otherwise has a performance impact just by increasing the +// size of the calling function. Additionally, the weak attribute +// allows the function to exist as a global although its definition is +// included in every compilation unit. +#if defined(_WIN32) || defined(_WIN64) +extern "C" __device__ __attribute__((noinline)) __attribute__((weak)) void _wassert( + const wchar_t* _msg, const wchar_t* _file, unsigned _line) { + // FIXME: Need `wchar_t` support to generate assertion message. + __builtin_trap(); +} +#else /* defined(_WIN32) || defined(_WIN64) */ +extern "C" __device__ __attribute__((noinline)) __attribute__((weak)) void __assert_fail( + const char* assertion, const char* file, unsigned int line, const char* function) { + const char fmt[] = "%s:%u: %s: Device-side assertion `%s' failed.\n"; + + // strlen is not available as a built-in yet, so we create our own + // loop in a macro. With a string literal argument, the compiler + // usually manages to replace the loop with a constant. + // + // The macro does not check for null pointer, since all the string + // arguments are defined to be constant literals when called from + // the assert() macro. + // + // NOTE: The loop below includes the null terminator in the length + // as required by append_string_n(). +#define __hip_get_string_length(LEN, STR) \ + do { \ + const char* tmp = STR; \ + while (*tmp++); \ + LEN = tmp - STR; \ + } while (0) + + auto msg = __ockl_fprintf_stderr_begin(); + int len = 0; + __hip_get_string_length(len, fmt); + msg = __ockl_fprintf_append_string_n(msg, fmt, len, 0); + __hip_get_string_length(len, file); + msg = __ockl_fprintf_append_string_n(msg, file, len, 0); + msg = __ockl_fprintf_append_args(msg, 1, line, 0, 0, 0, 0, 0, 0, 0); + __hip_get_string_length(len, function); + msg = __ockl_fprintf_append_string_n(msg, function, len, 0); + __hip_get_string_length(len, assertion); + __ockl_fprintf_append_string_n(msg, assertion, len, /* is_last = */ 1); + +#undef __hip_get_string_length + + __builtin_trap(); +} + +extern "C" __device__ __attribute__((noinline)) __attribute__((weak)) void __assertfail() { + // ignore all the args for now. + __builtin_trap(); +} +#endif /* defined(_WIN32) || defined(_WIN64) */ + +#if defined(NDEBUG) +#define __hip_assert(COND) +#else +#define __hip_assert(COND) \ + do { \ + if (!(COND)) __builtin_trap(); \ + } while (0) +#endif + +#endif // defined(__clang__) and defined(__HIP__) diff --git a/3rdparty/hip-headers/include/hip/amd_detail/hip_fp16_math_fwd.h b/3rdparty/hip-headers/include/hip/amd_detail/hip_fp16_math_fwd.h new file mode 100644 index 0000000000..46759c8a5a --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/hip_fp16_math_fwd.h @@ -0,0 +1,93 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +// /* +// Half Math Functions +// */ +#if !defined(__HIPCC_RTC__) +#include "host_defines.h" +#endif +#ifndef __CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ +extern "C" { +__device__ __attribute__((const)) _Float16 __ocml_ceil_f16(_Float16); +__device__ _Float16 __ocml_cos_f16(_Float16); +__device__ __attribute__((pure)) _Float16 __ocml_exp_f16(_Float16); +__device__ __attribute__((pure)) _Float16 __ocml_exp10_f16(_Float16); +__device__ __attribute__((pure)) _Float16 __ocml_exp2_f16(_Float16); +__device__ __attribute__((const)) _Float16 __ocml_floor_f16(_Float16); +__device__ __attribute__((const)) _Float16 __ocml_fma_f16(_Float16, _Float16, _Float16); +__device__ __attribute__((const)) _Float16 __ocml_fabs_f16(_Float16); +__device__ __attribute__((const)) int __ocml_isinf_f16(_Float16); +__device__ __attribute__((const)) int __ocml_isnan_f16(_Float16); +__device__ __attribute__((pure)) _Float16 __ocml_log_f16(_Float16); +__device__ __attribute__((pure)) _Float16 __ocml_log10_f16(_Float16); +__device__ __attribute__((pure)) _Float16 __ocml_log2_f16(_Float16); +__device__ __attribute__((pure)) _Float16 __ocml_pown_f16(_Float16, int); +__device__ __attribute__((const)) _Float16 __ocml_rint_f16(_Float16); +__device__ __attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16); +__device__ _Float16 __ocml_sin_f16(_Float16); +__device__ __attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16); +__device__ __attribute__((const)) _Float16 __ocml_trunc_f16(_Float16); +__device__ __attribute__((const)) _Float16 __ocml_fmax_f16(_Float16, _Float16); +__device__ __attribute__((const)) _Float16 __ocml_fmin_f16(_Float16, _Float16); + +typedef _Float16 __2f16 __attribute__((ext_vector_type(2))); +typedef short __2i16 __attribute__((ext_vector_type(2))); + +#if defined(__clang__) && defined(__HIP__) +__device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b, float c, bool s); +#endif + +__device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16); +__device__ __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16); +__device__ __2f16 __ocml_cos_2f16(__2f16); +__device__ __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16); +__device__ __attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16); +__device__ __attribute__((pure)) __2f16 __ocml_exp2_2f16(__2f16); +__device__ __attribute__((const)) __2f16 __ocml_floor_2f16(__2f16); +__device__ __attribute__((const)) __2f16 __ocml_fma_2f16(__2f16, __2f16, __2f16); +__device__ __attribute__((const)) __2i16 __ocml_isinf_2f16(__2f16); +__device__ __attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16); +__device__ __attribute__((pure)) __2f16 __ocml_log_2f16(__2f16); +__device__ __attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16); +__device__ __attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16); +__device__ __attribute__((const)) __2f16 __ocml_rint_2f16(__2f16); +__device__ __attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16); +__device__ __2f16 __ocml_sin_2f16(__2f16); +__device__ __attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16); +__device__ __attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16); + +__device__ __attribute__((const)) _Float16 __ocml_cvtrtn_f16_f32(float); +__device__ __attribute__((const)) _Float16 __ocml_cvtrtp_f16_f32(float); +__device__ __attribute__((const)) _Float16 __ocml_cvtrtz_f16_f32(float); +} +#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ +// TODO: remove these after they get into clang header __clang_hip_libdevice_declares.h' +extern "C" { +__device__ __attribute__((const)) _Float16 __ocml_fmax_f16(_Float16, _Float16); +__device__ __attribute__((const)) _Float16 __ocml_fmin_f16(_Float16, _Float16); +__device__ __attribute__((const)) _Float16 __ocml_cvtrtn_f16_f32(float); +__device__ __attribute__((const)) _Float16 __ocml_cvtrtp_f16_f32(float); +__device__ __attribute__((const)) _Float16 __ocml_cvtrtz_f16_f32(float); +} diff --git a/3rdparty/hip-headers/include/hip/amd_detail/hip_ldg.h b/3rdparty/hip-headers/include/hip/amd_detail/hip_ldg.h new file mode 100644 index 0000000000..ce1fb51f46 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/hip_ldg.h @@ -0,0 +1,100 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_LDG_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_LDG_H + +#if __HIP_CLANG_ONLY__ +#include "amd_hip_vector_types.h" +#include "host_defines.h" + +__device__ inline static char __ldg(const char* ptr) { return *ptr; } + +__device__ inline static char2 __ldg(const char2* ptr) { return *ptr; } + +__device__ inline static char4 __ldg(const char4* ptr) { return *ptr; } + +__device__ inline static signed char __ldg(const signed char* ptr) { return ptr[0]; } + +__device__ inline static unsigned char __ldg(const unsigned char* ptr) { return ptr[0]; } + + +__device__ inline static short __ldg(const short* ptr) { return ptr[0]; } + +__device__ inline static short2 __ldg(const short2* ptr) { return ptr[0]; } + +__device__ inline static short4 __ldg(const short4* ptr) { return ptr[0]; } + +__device__ inline static unsigned short __ldg(const unsigned short* ptr) { return ptr[0]; } + + +__device__ inline static int __ldg(const int* ptr) { return ptr[0]; } + +__device__ inline static int2 __ldg(const int2* ptr) { return ptr[0]; } + +__device__ inline static int4 __ldg(const int4* ptr) { return ptr[0]; } + +__device__ inline static unsigned int __ldg(const unsigned int* ptr) { return ptr[0]; } + + +__device__ inline static long __ldg(const long* ptr) { return ptr[0]; } + +__device__ inline static unsigned long __ldg(const unsigned long* ptr) { return ptr[0]; } + + +__device__ inline static long long __ldg(const long long* ptr) { return ptr[0]; } + +__device__ inline static longlong2 __ldg(const longlong2* ptr) { return ptr[0]; } + +__device__ inline static unsigned long long __ldg(const unsigned long long* ptr) { return ptr[0]; } + + +__device__ inline static uchar2 __ldg(const uchar2* ptr) { return ptr[0]; } + +__device__ inline static uchar4 __ldg(const uchar4* ptr) { return ptr[0]; } + + +__device__ inline static ushort2 __ldg(const ushort2* ptr) { return ptr[0]; } + + +__device__ inline static uint2 __ldg(const uint2* ptr) { return ptr[0]; } + +__device__ inline static uint4 __ldg(const uint4* ptr) { return ptr[0]; } + + +__device__ inline static ulonglong2 __ldg(const ulonglong2* ptr) { return ptr[0]; } + + +__device__ inline static float __ldg(const float* ptr) { return ptr[0]; } + +__device__ inline static float2 __ldg(const float2* ptr) { return ptr[0]; } + +__device__ inline static float4 __ldg(const float4* ptr) { return ptr[0]; } + + +__device__ inline static double __ldg(const double* ptr) { return ptr[0]; } + +__device__ inline static double2 __ldg(const double2* ptr) { return ptr[0]; } + +#endif // __HIP_CLANG_ONLY__ + +#endif // HIP_LDG_H diff --git a/3rdparty/hip-headers/include/hip/amd_detail/hip_prof_str.h b/3rdparty/hip-headers/include/hip/amd_detail/hip_prof_str.h new file mode 100644 index 0000000000..13d85b3dd0 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/hip_prof_str.h @@ -0,0 +1,11944 @@ +// Generated file. DO NOT EDIT. +// +// This file is automatically generated by the hip_prof_gen.py script. +// If changes are required, run the script and commit the updated file. + +#ifndef _HIP_PROF_STR_H +#define _HIP_PROF_STR_H +#define HIP_PROF_VER 1 + +#include +#include +#include "amd_hip_gl_interop.h" + +#define HIP_API_ID_CONCAT_HELPER(a,b) a##b +#define HIP_API_ID_CONCAT(a,b) HIP_API_ID_CONCAT_HELPER(a,b) + +// HIP API callbacks ID enumeration +enum hip_api_id_t { + HIP_API_ID_NONE = 0, + HIP_API_ID_FIRST = 1, + HIP_API_ID___hipPopCallConfiguration = 1, + HIP_API_ID___hipPushCallConfiguration = 2, + HIP_API_ID_hipArray3DCreate = 3, + HIP_API_ID_hipArrayCreate = 4, + HIP_API_ID_hipArrayDestroy = 5, + HIP_API_ID_hipChooseDeviceR0000 = 6, + HIP_API_ID_hipConfigureCall = 7, + HIP_API_ID_hipCtxCreate = 8, + HIP_API_ID_hipCtxDestroy = 9, + HIP_API_ID_hipCtxDisablePeerAccess = 10, + HIP_API_ID_hipCtxEnablePeerAccess = 11, + HIP_API_ID_hipCtxGetApiVersion = 12, + HIP_API_ID_hipCtxGetCacheConfig = 13, + HIP_API_ID_hipCtxGetCurrent = 14, + HIP_API_ID_hipCtxGetDevice = 15, + HIP_API_ID_hipCtxGetFlags = 16, + HIP_API_ID_hipCtxGetSharedMemConfig = 17, + HIP_API_ID_hipCtxPopCurrent = 18, + HIP_API_ID_hipCtxPushCurrent = 19, + HIP_API_ID_hipCtxSetCacheConfig = 20, + HIP_API_ID_hipCtxSetCurrent = 21, + HIP_API_ID_hipCtxSetSharedMemConfig = 22, + HIP_API_ID_hipCtxSynchronize = 23, + HIP_API_ID_hipDestroyExternalMemory = 24, + HIP_API_ID_hipDestroyExternalSemaphore = 25, + HIP_API_ID_hipDeviceCanAccessPeer = 26, + HIP_API_ID_hipDeviceComputeCapability = 27, + HIP_API_ID_hipDeviceDisablePeerAccess = 28, + HIP_API_ID_hipDeviceEnablePeerAccess = 29, + HIP_API_ID_hipDeviceGet = 30, + HIP_API_ID_hipDeviceGetAttribute = 31, + HIP_API_ID_hipDeviceGetByPCIBusId = 32, + HIP_API_ID_hipDeviceGetCacheConfig = 33, + HIP_API_ID_hipDeviceGetLimit = 34, + HIP_API_ID_hipDeviceGetName = 35, + HIP_API_ID_hipDeviceGetP2PAttribute = 36, + HIP_API_ID_hipDeviceGetPCIBusId = 37, + HIP_API_ID_hipDeviceGetSharedMemConfig = 38, + HIP_API_ID_hipDeviceGetStreamPriorityRange = 39, + HIP_API_ID_hipDevicePrimaryCtxGetState = 40, + HIP_API_ID_hipDevicePrimaryCtxRelease = 41, + HIP_API_ID_hipDevicePrimaryCtxReset = 42, + HIP_API_ID_hipDevicePrimaryCtxRetain = 43, + HIP_API_ID_hipDevicePrimaryCtxSetFlags = 44, + HIP_API_ID_hipDeviceReset = 45, + HIP_API_ID_hipDeviceSetCacheConfig = 46, + HIP_API_ID_hipDeviceSetSharedMemConfig = 47, + HIP_API_ID_hipDeviceSynchronize = 48, + HIP_API_ID_hipDeviceTotalMem = 49, + HIP_API_ID_RESERVED_50 = 50, + HIP_API_ID_hipDrvMemcpy2DUnaligned = 51, + HIP_API_ID_hipDrvMemcpy3D = 52, + HIP_API_ID_hipDrvMemcpy3DAsync = 53, + HIP_API_ID_hipEventCreate = 54, + HIP_API_ID_hipEventCreateWithFlags = 55, + HIP_API_ID_hipEventDestroy = 56, + HIP_API_ID_hipEventElapsedTime = 57, + HIP_API_ID_hipEventQuery = 58, + HIP_API_ID_hipEventRecord = 59, + HIP_API_ID_hipEventSynchronize = 60, + HIP_API_ID_hipExtGetLinkTypeAndHopCount = 61, + HIP_API_ID_hipExtLaunchKernel = 62, + HIP_API_ID_hipExtLaunchMultiKernelMultiDevice = 63, + HIP_API_ID_hipExtMallocWithFlags = 64, + HIP_API_ID_hipExtModuleLaunchKernel = 65, + HIP_API_ID_hipExtStreamCreateWithCUMask = 66, + HIP_API_ID_hipExtStreamGetCUMask = 67, + HIP_API_ID_hipExternalMemoryGetMappedBuffer = 68, + HIP_API_ID_hipFree = 69, + HIP_API_ID_hipFreeArray = 70, + HIP_API_ID_hipFreeHost = 71, + HIP_API_ID_hipFreeMipmappedArray = 72, + HIP_API_ID_hipFuncGetAttribute = 73, + HIP_API_ID_hipFuncGetAttributes = 74, + HIP_API_ID_hipFuncSetAttribute = 75, + HIP_API_ID_hipFuncSetCacheConfig = 76, + HIP_API_ID_hipFuncSetSharedMemConfig = 77, + HIP_API_ID_hipGetDevice = 78, + HIP_API_ID_hipGetDeviceCount = 79, + HIP_API_ID_hipGetDeviceFlags = 80, + HIP_API_ID_hipGetDevicePropertiesR0000 = 81, + HIP_API_ID_RESERVED_82 = 82, + HIP_API_ID_RESERVED_83 = 83, + HIP_API_ID_hipGetLastError = 84, + HIP_API_ID_hipGetMipmappedArrayLevel = 85, + HIP_API_ID_hipGetSymbolAddress = 86, + HIP_API_ID_hipGetSymbolSize = 87, + HIP_API_ID_hipHccModuleLaunchKernel = 88, + HIP_API_ID_hipHostAlloc = 89, + HIP_API_ID_hipHostFree = 90, + HIP_API_ID_hipHostGetDevicePointer = 91, + HIP_API_ID_hipHostGetFlags = 92, + HIP_API_ID_hipHostMalloc = 93, + HIP_API_ID_hipHostRegister = 94, + HIP_API_ID_hipHostUnregister = 95, + HIP_API_ID_hipImportExternalMemory = 96, + HIP_API_ID_hipImportExternalSemaphore = 97, + HIP_API_ID_hipInit = 98, + HIP_API_ID_hipIpcCloseMemHandle = 99, + HIP_API_ID_hipIpcGetEventHandle = 100, + HIP_API_ID_hipIpcGetMemHandle = 101, + HIP_API_ID_hipIpcOpenEventHandle = 102, + HIP_API_ID_hipIpcOpenMemHandle = 103, + HIP_API_ID_hipLaunchByPtr = 104, + HIP_API_ID_hipLaunchCooperativeKernel = 105, + HIP_API_ID_hipLaunchCooperativeKernelMultiDevice = 106, + HIP_API_ID_hipLaunchKernel = 107, + HIP_API_ID_hipMalloc = 108, + HIP_API_ID_hipMalloc3D = 109, + HIP_API_ID_hipMalloc3DArray = 110, + HIP_API_ID_hipMallocArray = 111, + HIP_API_ID_hipMallocHost = 112, + HIP_API_ID_hipMallocManaged = 113, + HIP_API_ID_hipMallocMipmappedArray = 114, + HIP_API_ID_hipMallocPitch = 115, + HIP_API_ID_hipMemAdvise = 116, + HIP_API_ID_hipMemAllocHost = 117, + HIP_API_ID_hipMemAllocPitch = 118, + HIP_API_ID_hipMemGetAddressRange = 119, + HIP_API_ID_hipMemGetInfo = 120, + HIP_API_ID_hipMemPrefetchAsync = 121, + HIP_API_ID_hipMemPtrGetInfo = 122, + HIP_API_ID_hipMemRangeGetAttribute = 123, + HIP_API_ID_hipMemRangeGetAttributes = 124, + HIP_API_ID_hipMemcpy = 125, + HIP_API_ID_hipMemcpy2D = 126, + HIP_API_ID_hipMemcpy2DAsync = 127, + HIP_API_ID_hipMemcpy2DFromArray = 128, + HIP_API_ID_hipMemcpy2DFromArrayAsync = 129, + HIP_API_ID_hipMemcpy2DToArray = 130, + HIP_API_ID_hipMemcpy2DToArrayAsync = 131, + HIP_API_ID_hipMemcpy3D = 132, + HIP_API_ID_hipMemcpy3DAsync = 133, + HIP_API_ID_hipMemcpyAsync = 134, + HIP_API_ID_hipMemcpyAtoH = 135, + HIP_API_ID_hipMemcpyDtoD = 136, + HIP_API_ID_hipMemcpyDtoDAsync = 137, + HIP_API_ID_hipMemcpyDtoH = 138, + HIP_API_ID_hipMemcpyDtoHAsync = 139, + HIP_API_ID_hipMemcpyFromArray = 140, + HIP_API_ID_hipMemcpyFromSymbol = 141, + HIP_API_ID_hipMemcpyFromSymbolAsync = 142, + HIP_API_ID_hipMemcpyHtoA = 143, + HIP_API_ID_hipMemcpyHtoD = 144, + HIP_API_ID_hipMemcpyHtoDAsync = 145, + HIP_API_ID_hipMemcpyParam2D = 146, + HIP_API_ID_hipMemcpyParam2DAsync = 147, + HIP_API_ID_hipMemcpyPeer = 148, + HIP_API_ID_hipMemcpyPeerAsync = 149, + HIP_API_ID_hipMemcpyToArray = 150, + HIP_API_ID_hipMemcpyToSymbol = 151, + HIP_API_ID_hipMemcpyToSymbolAsync = 152, + HIP_API_ID_hipMemcpyWithStream = 153, + HIP_API_ID_hipMemset = 154, + HIP_API_ID_hipMemset2D = 155, + HIP_API_ID_hipMemset2DAsync = 156, + HIP_API_ID_hipMemset3D = 157, + HIP_API_ID_hipMemset3DAsync = 158, + HIP_API_ID_hipMemsetAsync = 159, + HIP_API_ID_hipMemsetD16 = 160, + HIP_API_ID_hipMemsetD16Async = 161, + HIP_API_ID_hipMemsetD32 = 162, + HIP_API_ID_hipMemsetD32Async = 163, + HIP_API_ID_hipMemsetD8 = 164, + HIP_API_ID_hipMemsetD8Async = 165, + HIP_API_ID_hipModuleGetFunction = 166, + HIP_API_ID_hipModuleGetGlobal = 167, + HIP_API_ID_hipModuleGetTexRef = 168, + HIP_API_ID_hipModuleLaunchKernel = 169, + HIP_API_ID_hipModuleLoad = 170, + HIP_API_ID_hipModuleLoadData = 171, + HIP_API_ID_hipModuleLoadDataEx = 172, + HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor = 173, + HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = 174, + HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize = 175, + HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags = 176, + HIP_API_ID_hipModuleUnload = 177, + HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor = 178, + HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = 179, + HIP_API_ID_hipOccupancyMaxPotentialBlockSize = 180, + HIP_API_ID_hipPeekAtLastError = 181, + HIP_API_ID_hipPointerGetAttributes = 182, + HIP_API_ID_hipProfilerStart = 183, + HIP_API_ID_hipProfilerStop = 184, + HIP_API_ID_RESERVED_185 = 185, + HIP_API_ID_hipSetDevice = 186, + HIP_API_ID_hipSetDeviceFlags = 187, + HIP_API_ID_hipSetupArgument = 188, + HIP_API_ID_hipSignalExternalSemaphoresAsync = 189, + HIP_API_ID_hipStreamAddCallback = 190, + HIP_API_ID_hipStreamAttachMemAsync = 191, + HIP_API_ID_hipStreamCreate = 192, + HIP_API_ID_hipStreamCreateWithFlags = 193, + HIP_API_ID_hipStreamCreateWithPriority = 194, + HIP_API_ID_hipStreamDestroy = 195, + HIP_API_ID_hipStreamGetFlags = 196, + HIP_API_ID_hipStreamGetPriority = 197, + HIP_API_ID_hipStreamQuery = 198, + HIP_API_ID_hipStreamSynchronize = 199, + HIP_API_ID_hipStreamWaitEvent = 200, + HIP_API_ID_hipStreamWaitValue32 = 201, + HIP_API_ID_hipStreamWaitValue64 = 202, + HIP_API_ID_hipStreamWriteValue32 = 203, + HIP_API_ID_hipStreamWriteValue64 = 204, + HIP_API_ID_hipWaitExternalSemaphoresAsync = 205, + HIP_API_ID_hipCreateSurfaceObject = 206, + HIP_API_ID_hipDestroySurfaceObject = 207, + HIP_API_ID_hipGraphAddKernelNode = 208, + HIP_API_ID_hipGraphAddMemcpyNode = 209, + HIP_API_ID_hipGraphAddMemsetNode = 210, + HIP_API_ID_hipGraphCreate = 211, + HIP_API_ID_hipGraphDestroy = 212, + HIP_API_ID_hipGraphExecDestroy = 213, + HIP_API_ID_hipGraphInstantiate = 214, + HIP_API_ID_hipGraphLaunch = 215, + HIP_API_ID_hipMipmappedArrayCreate = 216, + HIP_API_ID_hipMipmappedArrayDestroy = 217, + HIP_API_ID_hipMipmappedArrayGetLevel = 218, + HIP_API_ID_hipStreamBeginCapture = 219, + HIP_API_ID_hipStreamEndCapture = 220, + HIP_API_ID_hipTexRefGetAddress = 221, + HIP_API_ID_hipTexRefGetFlags = 222, + HIP_API_ID_hipTexRefGetFormat = 223, + HIP_API_ID_hipTexRefGetMaxAnisotropy = 224, + HIP_API_ID_hipTexRefGetMipMappedArray = 225, + HIP_API_ID_hipTexRefGetMipmapLevelBias = 226, + HIP_API_ID_hipTexRefGetMipmapLevelClamp = 227, + HIP_API_ID_hipTexRefSetAddress = 228, + HIP_API_ID_hipTexRefSetAddress2D = 229, + HIP_API_ID_hipTexRefSetBorderColor = 230, + HIP_API_ID_hipTexRefSetFormat = 231, + HIP_API_ID_hipTexRefSetMaxAnisotropy = 232, + HIP_API_ID_hipTexRefSetMipmapLevelClamp = 233, + HIP_API_ID_hipTexRefSetMipmappedArray = 234, + HIP_API_ID_hipGLGetDevices = 235, + HIP_API_ID_hipGraphAddDependencies = 236, + HIP_API_ID_hipGraphAddEmptyNode = 237, + HIP_API_ID_hipGraphExecKernelNodeSetParams = 238, + HIP_API_ID_hipGraphGetNodes = 239, + HIP_API_ID_hipGraphGetRootNodes = 240, + HIP_API_ID_hipGraphKernelNodeGetParams = 241, + HIP_API_ID_hipGraphKernelNodeSetParams = 242, + HIP_API_ID_hipGraphMemcpyNodeGetParams = 243, + HIP_API_ID_hipGraphMemcpyNodeSetParams = 244, + HIP_API_ID_hipGraphMemsetNodeGetParams = 245, + HIP_API_ID_hipGraphMemsetNodeSetParams = 246, + HIP_API_ID_hipGraphicsGLRegisterBuffer = 247, + HIP_API_ID_hipGraphicsMapResources = 248, + HIP_API_ID_hipGraphicsResourceGetMappedPointer = 249, + HIP_API_ID_hipGraphicsUnmapResources = 250, + HIP_API_ID_hipGraphicsUnregisterResource = 251, + HIP_API_ID_hipGraphAddChildGraphNode = 252, + HIP_API_ID_hipGraphAddEventRecordNode = 253, + HIP_API_ID_hipGraphAddEventWaitNode = 254, + HIP_API_ID_hipGraphAddHostNode = 255, + HIP_API_ID_hipGraphAddMemcpyNode1D = 256, + HIP_API_ID_hipGraphAddMemcpyNodeFromSymbol = 257, + HIP_API_ID_hipGraphAddMemcpyNodeToSymbol = 258, + HIP_API_ID_hipGraphChildGraphNodeGetGraph = 259, + HIP_API_ID_hipGraphClone = 260, + HIP_API_ID_hipGraphDestroyNode = 261, + HIP_API_ID_hipGraphEventRecordNodeGetEvent = 262, + HIP_API_ID_hipGraphEventRecordNodeSetEvent = 263, + HIP_API_ID_hipGraphEventWaitNodeGetEvent = 264, + HIP_API_ID_hipGraphEventWaitNodeSetEvent = 265, + HIP_API_ID_hipGraphExecChildGraphNodeSetParams = 266, + HIP_API_ID_hipGraphExecEventRecordNodeSetEvent = 267, + HIP_API_ID_hipGraphExecEventWaitNodeSetEvent = 268, + HIP_API_ID_hipGraphExecHostNodeSetParams = 269, + HIP_API_ID_hipGraphExecMemcpyNodeSetParams = 270, + HIP_API_ID_hipGraphExecMemcpyNodeSetParams1D = 271, + HIP_API_ID_hipGraphExecMemcpyNodeSetParamsFromSymbol = 272, + HIP_API_ID_hipGraphExecMemcpyNodeSetParamsToSymbol = 273, + HIP_API_ID_hipGraphExecMemsetNodeSetParams = 274, + HIP_API_ID_hipGraphExecUpdate = 275, + HIP_API_ID_hipGraphGetEdges = 276, + HIP_API_ID_hipGraphHostNodeGetParams = 277, + HIP_API_ID_hipGraphHostNodeSetParams = 278, + HIP_API_ID_hipGraphInstantiateWithFlags = 279, + HIP_API_ID_hipGraphMemcpyNodeSetParams1D = 280, + HIP_API_ID_hipGraphMemcpyNodeSetParamsFromSymbol = 281, + HIP_API_ID_hipGraphMemcpyNodeSetParamsToSymbol = 282, + HIP_API_ID_hipGraphNodeFindInClone = 283, + HIP_API_ID_hipGraphNodeGetDependencies = 284, + HIP_API_ID_hipGraphNodeGetDependentNodes = 285, + HIP_API_ID_hipGraphNodeGetType = 286, + HIP_API_ID_hipGraphRemoveDependencies = 287, + HIP_API_ID_hipStreamGetCaptureInfo = 288, + HIP_API_ID_hipStreamGetCaptureInfo_v2 = 289, + HIP_API_ID_hipStreamIsCapturing = 290, + HIP_API_ID_hipStreamUpdateCaptureDependencies = 291, + HIP_API_ID_hipDrvPointerGetAttributes = 292, + HIP_API_ID_hipGraphicsGLRegisterImage = 293, + HIP_API_ID_hipGraphicsSubResourceGetMappedArray = 294, + HIP_API_ID_hipPointerGetAttribute = 295, + HIP_API_ID_RESERVED_296 = 296, + HIP_API_ID_hipThreadExchangeStreamCaptureMode = 297, + HIP_API_ID_hipDeviceGetUuid = 298, + HIP_API_ID_hipGetChannelDesc = 299, + HIP_API_ID_hipGraphKernelNodeGetAttribute = 300, + HIP_API_ID_hipGraphKernelNodeSetAttribute = 301, + HIP_API_ID_hipLaunchHostFunc = 302, + HIP_API_ID_hipDeviceGetDefaultMemPool = 303, + HIP_API_ID_hipDeviceGetMemPool = 304, + HIP_API_ID_hipDeviceSetMemPool = 305, + HIP_API_ID_hipFreeAsync = 306, + HIP_API_ID_hipMallocAsync = 307, + HIP_API_ID_hipMallocFromPoolAsync = 308, + HIP_API_ID_hipMemPoolCreate = 309, + HIP_API_ID_hipMemPoolDestroy = 310, + HIP_API_ID_hipMemPoolExportPointer = 311, + HIP_API_ID_hipMemPoolExportToShareableHandle = 312, + HIP_API_ID_hipMemPoolGetAccess = 313, + HIP_API_ID_hipMemPoolGetAttribute = 314, + HIP_API_ID_hipMemPoolImportFromShareableHandle = 315, + HIP_API_ID_hipMemPoolImportPointer = 316, + HIP_API_ID_hipMemPoolSetAccess = 317, + HIP_API_ID_hipMemPoolSetAttribute = 318, + HIP_API_ID_hipMemPoolTrimTo = 319, + HIP_API_ID_hipMemAddressFree = 320, + HIP_API_ID_hipMemAddressReserve = 321, + HIP_API_ID_hipMemCreate = 322, + HIP_API_ID_hipMemExportToShareableHandle = 323, + HIP_API_ID_hipMemGetAccess = 324, + HIP_API_ID_hipMemGetAllocationGranularity = 325, + HIP_API_ID_hipMemGetAllocationPropertiesFromHandle = 326, + HIP_API_ID_hipMemImportFromShareableHandle = 327, + HIP_API_ID_hipMemMap = 328, + HIP_API_ID_hipMemMapArrayAsync = 329, + HIP_API_ID_hipMemRelease = 330, + HIP_API_ID_hipMemRetainAllocationHandle = 331, + HIP_API_ID_hipMemSetAccess = 332, + HIP_API_ID_hipMemUnmap = 333, + HIP_API_ID_hipDeviceSetGraphMemAttribute = 334, + HIP_API_ID_hipDeviceGetGraphMemAttribute = 335, + HIP_API_ID_hipDeviceGraphMemTrim = 336, + HIP_API_ID_hipDeviceSetLimit = 337, + HIP_API_ID_hipTexRefSetArray = 338, + HIP_API_ID_hipTexRefSetFlags = 339, + HIP_API_ID_hipTexRefSetMipmapLevelBias = 340, + HIP_API_ID_hipDriverGetVersion = 341, + HIP_API_ID_hipGraphUpload = 342, + HIP_API_ID_hipRuntimeGetVersion = 343, + HIP_API_ID_hipUserObjectCreate = 344, + HIP_API_ID_hipUserObjectRelease = 345, + HIP_API_ID_hipUserObjectRetain = 346, + HIP_API_ID_hipGraphRetainUserObject = 347, + HIP_API_ID_hipGraphReleaseUserObject = 348, + HIP_API_ID_hipGraphDebugDotPrint = 349, + HIP_API_ID_hipGraphKernelNodeCopyAttributes = 350, + HIP_API_ID_hipGraphNodeGetEnabled = 351, + HIP_API_ID_hipGraphNodeSetEnabled = 352, + HIP_API_ID_hipPointerSetAttribute = 353, + HIP_API_ID_hipGraphAddMemAllocNode = 354, + HIP_API_ID_hipGraphAddMemFreeNode = 355, + HIP_API_ID_hipGraphMemAllocNodeGetParams = 356, + HIP_API_ID_hipGraphMemFreeNodeGetParams = 357, + HIP_API_ID_hipModuleLaunchCooperativeKernel = 358, + HIP_API_ID_hipModuleLaunchCooperativeKernelMultiDevice = 359, + HIP_API_ID_hipArray3DGetDescriptor = 360, + HIP_API_ID_hipArrayGetDescriptor = 361, + HIP_API_ID_hipArrayGetInfo = 362, + HIP_API_ID_hipStreamGetDevice = 363, + HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray = 364, + HIP_API_ID_hipChooseDeviceR0600 = 365, + HIP_API_ID_hipDrvGraphAddMemcpyNode = 366, + HIP_API_ID_hipDrvGraphAddMemsetNode = 367, + HIP_API_ID_hipDrvGraphMemcpyNodeGetParams = 368, + HIP_API_ID_hipDrvGraphMemcpyNodeSetParams = 369, + HIP_API_ID_hipGetDevicePropertiesR0600 = 370, + HIP_API_ID_hipGraphAddExternalSemaphoresSignalNode = 371, + HIP_API_ID_hipGraphAddExternalSemaphoresWaitNode = 372, + HIP_API_ID_hipGraphExecExternalSemaphoresSignalNodeSetParams = 373, + HIP_API_ID_hipGraphExecExternalSemaphoresWaitNodeSetParams = 374, + HIP_API_ID_hipGraphExternalSemaphoresSignalNodeGetParams = 375, + HIP_API_ID_hipGraphExternalSemaphoresSignalNodeSetParams = 376, + HIP_API_ID_hipGraphExternalSemaphoresWaitNodeGetParams = 377, + HIP_API_ID_hipGraphExternalSemaphoresWaitNodeSetParams = 378, + HIP_API_ID_hipExtGetLastError = 379, + HIP_API_ID_hipGraphAddNode = 380, + HIP_API_ID_hipGetProcAddress = 381, + HIP_API_ID_hipGraphExecGetFlags = 382, + HIP_API_ID_hipGraphExecNodeSetParams = 383, + HIP_API_ID_hipGraphInstantiateWithParams = 384, + HIP_API_ID_hipGraphNodeSetParams = 385, + HIP_API_ID_hipDrvGraphAddMemFreeNode = 386, + HIP_API_ID_hipDrvGraphExecMemcpyNodeSetParams = 387, + HIP_API_ID_hipDrvGraphExecMemsetNodeSetParams = 388, + HIP_API_ID_hipTexRefGetArray = 389, + HIP_API_ID_hipTexRefGetBorderColor = 390, + HIP_API_ID_hipStreamBeginCaptureToGraph = 391, + HIP_API_ID_hipGetFuncBySymbol = 392, + HIP_API_ID_RESERVED_393 = 393, + HIP_API_ID_RESERVED_394 = 394, + HIP_API_ID_RESERVED_395 = 395, + HIP_API_ID_RESERVED_396 = 396, + HIP_API_ID_RESERVED_397 = 397, + HIP_API_ID_RESERVED_398 = 398, + HIP_API_ID_RESERVED_399 = 399, + HIP_API_ID_hipMemcpy2DArrayToArray = 400, + HIP_API_ID_hipMemcpyAtoA = 401, + HIP_API_ID_hipMemcpyAtoD = 402, + HIP_API_ID_hipMemcpyAtoHAsync = 403, + HIP_API_ID_hipMemcpyDtoA = 404, + HIP_API_ID_hipMemcpyHtoAAsync = 405, + HIP_API_ID_hipSetValidDevices = 406, + HIP_API_ID_RESERVED_407 = 407, + HIP_API_ID_hipStreamBatchMemOp = 408, + HIP_API_ID_hipGraphAddBatchMemOpNode = 409, + HIP_API_ID_hipGraphBatchMemOpNodeGetParams = 410, + HIP_API_ID_hipGraphBatchMemOpNodeSetParams = 411, + HIP_API_ID_hipGraphExecBatchMemOpNodeSetParams = 412, + HIP_API_ID_hipEventRecordWithFlags = 413, + HIP_API_ID_hipLinkAddData = 414, + HIP_API_ID_hipLinkAddFile = 415, + HIP_API_ID_hipLinkComplete = 416, + HIP_API_ID_hipLinkCreate = 417, + HIP_API_ID_hipLinkDestroy = 418, + HIP_API_ID_hipLaunchKernelExC = 419, + HIP_API_ID_hipDrvLaunchKernelEx = 420, + HIP_API_ID_hipModuleGetFunctionCount = 421, + HIP_API_ID_hipMemsetD2D16 = 422, + HIP_API_ID_hipMemsetD2D16Async = 423, + HIP_API_ID_hipMemsetD2D32 = 424, + HIP_API_ID_hipMemsetD2D32Async = 425, + HIP_API_ID_hipMemsetD2D8 = 426, + HIP_API_ID_hipMemsetD2D8Async = 427, + HIP_API_ID_hipStreamGetAttribute = 428, + HIP_API_ID_hipStreamSetAttribute = 429, + HIP_API_ID_hipModuleLoadFatBinary = 430, + HIP_API_ID_hipMemcpy3DBatchAsync = 431, + HIP_API_ID_hipMemcpy3DPeer = 432, + HIP_API_ID_hipMemcpy3DPeerAsync = 433, + HIP_API_ID_hipMemcpyBatchAsync = 434, + HIP_API_ID_hipGetDriverEntryPoint = 435, + HIP_API_ID_hipMemPrefetchAsync_v2 = 436, + HIP_API_ID_hipMemAdvise_v2 = 437, + HIP_API_ID_hipStreamGetId = 438, + HIP_API_ID_hipLibraryLoadData = 439, + HIP_API_ID_hipLibraryLoadFromFile = 440, + HIP_API_ID_hipLibraryUnload = 441, + HIP_API_ID_hipLibraryGetKernel = 442, + HIP_API_ID_hipLibraryGetKernelCount = 443, + HIP_API_ID_hipMemGetHandleForAddressRange = 444, + HIP_API_ID_LAST = 444, + + HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice), + HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties), + + HIP_API_ID_hipBindTexture = HIP_API_ID_NONE, + HIP_API_ID_hipBindTexture2D = HIP_API_ID_NONE, + HIP_API_ID_hipBindTextureToArray = HIP_API_ID_NONE, + HIP_API_ID_hipBindTextureToMipmappedArray = HIP_API_ID_NONE, + HIP_API_ID_hipCreateTextureObject = HIP_API_ID_NONE, + HIP_API_ID_hipDestroyTextureObject = HIP_API_ID_NONE, + HIP_API_ID_hipDeviceGetCount = HIP_API_ID_NONE, + HIP_API_ID_hipDeviceGetTexture1DLinearMaxWidth = HIP_API_ID_NONE, + HIP_API_ID_hipGetTextureAlignmentOffset = HIP_API_ID_NONE, + HIP_API_ID_hipGetTextureObjectResourceDesc = HIP_API_ID_NONE, + HIP_API_ID_hipGetTextureObjectResourceViewDesc = HIP_API_ID_NONE, + HIP_API_ID_hipGetTextureObjectTextureDesc = HIP_API_ID_NONE, + HIP_API_ID_hipGetTextureReference = HIP_API_ID_NONE, + HIP_API_ID_hipTexObjectCreate = HIP_API_ID_NONE, + HIP_API_ID_hipTexObjectDestroy = HIP_API_ID_NONE, + HIP_API_ID_hipTexObjectGetResourceDesc = HIP_API_ID_NONE, + HIP_API_ID_hipTexObjectGetResourceViewDesc = HIP_API_ID_NONE, + HIP_API_ID_hipTexObjectGetTextureDesc = HIP_API_ID_NONE, + HIP_API_ID_hipTexRefGetAddressMode = HIP_API_ID_NONE, + HIP_API_ID_hipTexRefGetFilterMode = HIP_API_ID_NONE, + HIP_API_ID_hipTexRefGetMipmapFilterMode = HIP_API_ID_NONE, + HIP_API_ID_hipTexRefSetAddressMode = HIP_API_ID_NONE, + HIP_API_ID_hipTexRefSetFilterMode = HIP_API_ID_NONE, + HIP_API_ID_hipTexRefSetMipmapFilterMode = HIP_API_ID_NONE, + HIP_API_ID_hipUnbindTexture = HIP_API_ID_NONE, +}; + +#undef HIP_API_ID_CONCAT_HELPER +#undef HIP_API_ID_CONCAT + +// Return the HIP API string for a given callback ID +static inline const char* hip_api_name(const uint32_t id) { + switch(id) { + case HIP_API_ID___hipPopCallConfiguration: return "__hipPopCallConfiguration"; + case HIP_API_ID___hipPushCallConfiguration: return "__hipPushCallConfiguration"; + case HIP_API_ID_hipArray3DCreate: return "hipArray3DCreate"; + case HIP_API_ID_hipArray3DGetDescriptor: return "hipArray3DGetDescriptor"; + case HIP_API_ID_hipArrayCreate: return "hipArrayCreate"; + case HIP_API_ID_hipArrayDestroy: return "hipArrayDestroy"; + case HIP_API_ID_hipArrayGetDescriptor: return "hipArrayGetDescriptor"; + case HIP_API_ID_hipArrayGetInfo: return "hipArrayGetInfo"; + case HIP_API_ID_hipChooseDeviceR0000: return "hipChooseDeviceR0000"; + case HIP_API_ID_hipChooseDeviceR0600: return "hipChooseDeviceR0600"; + case HIP_API_ID_hipConfigureCall: return "hipConfigureCall"; + case HIP_API_ID_hipCreateSurfaceObject: return "hipCreateSurfaceObject"; + case HIP_API_ID_hipCtxCreate: return "hipCtxCreate"; + case HIP_API_ID_hipCtxDestroy: return "hipCtxDestroy"; + case HIP_API_ID_hipCtxDisablePeerAccess: return "hipCtxDisablePeerAccess"; + case HIP_API_ID_hipCtxEnablePeerAccess: return "hipCtxEnablePeerAccess"; + case HIP_API_ID_hipCtxGetApiVersion: return "hipCtxGetApiVersion"; + case HIP_API_ID_hipCtxGetCacheConfig: return "hipCtxGetCacheConfig"; + case HIP_API_ID_hipCtxGetCurrent: return "hipCtxGetCurrent"; + case HIP_API_ID_hipCtxGetDevice: return "hipCtxGetDevice"; + case HIP_API_ID_hipCtxGetFlags: return "hipCtxGetFlags"; + case HIP_API_ID_hipCtxGetSharedMemConfig: return "hipCtxGetSharedMemConfig"; + case HIP_API_ID_hipCtxPopCurrent: return "hipCtxPopCurrent"; + case HIP_API_ID_hipCtxPushCurrent: return "hipCtxPushCurrent"; + case HIP_API_ID_hipCtxSetCacheConfig: return "hipCtxSetCacheConfig"; + case HIP_API_ID_hipCtxSetCurrent: return "hipCtxSetCurrent"; + case HIP_API_ID_hipCtxSetSharedMemConfig: return "hipCtxSetSharedMemConfig"; + case HIP_API_ID_hipCtxSynchronize: return "hipCtxSynchronize"; + case HIP_API_ID_hipDestroyExternalMemory: return "hipDestroyExternalMemory"; + case HIP_API_ID_hipDestroyExternalSemaphore: return "hipDestroyExternalSemaphore"; + case HIP_API_ID_hipDestroySurfaceObject: return "hipDestroySurfaceObject"; + case HIP_API_ID_hipDeviceCanAccessPeer: return "hipDeviceCanAccessPeer"; + case HIP_API_ID_hipDeviceComputeCapability: return "hipDeviceComputeCapability"; + case HIP_API_ID_hipDeviceDisablePeerAccess: return "hipDeviceDisablePeerAccess"; + case HIP_API_ID_hipDeviceEnablePeerAccess: return "hipDeviceEnablePeerAccess"; + case HIP_API_ID_hipDeviceGet: return "hipDeviceGet"; + case HIP_API_ID_hipDeviceGetAttribute: return "hipDeviceGetAttribute"; + case HIP_API_ID_hipDeviceGetByPCIBusId: return "hipDeviceGetByPCIBusId"; + case HIP_API_ID_hipDeviceGetCacheConfig: return "hipDeviceGetCacheConfig"; + case HIP_API_ID_hipDeviceGetDefaultMemPool: return "hipDeviceGetDefaultMemPool"; + case HIP_API_ID_hipDeviceGetGraphMemAttribute: return "hipDeviceGetGraphMemAttribute"; + case HIP_API_ID_hipDeviceGetLimit: return "hipDeviceGetLimit"; + case HIP_API_ID_hipDeviceGetMemPool: return "hipDeviceGetMemPool"; + case HIP_API_ID_hipDeviceGetName: return "hipDeviceGetName"; + case HIP_API_ID_hipDeviceGetP2PAttribute: return "hipDeviceGetP2PAttribute"; + case HIP_API_ID_hipDeviceGetPCIBusId: return "hipDeviceGetPCIBusId"; + case HIP_API_ID_hipDeviceGetSharedMemConfig: return "hipDeviceGetSharedMemConfig"; + case HIP_API_ID_hipDeviceGetStreamPriorityRange: return "hipDeviceGetStreamPriorityRange"; + case HIP_API_ID_hipDeviceGetUuid: return "hipDeviceGetUuid"; + case HIP_API_ID_hipDeviceGraphMemTrim: return "hipDeviceGraphMemTrim"; + case HIP_API_ID_hipDevicePrimaryCtxGetState: return "hipDevicePrimaryCtxGetState"; + case HIP_API_ID_hipDevicePrimaryCtxRelease: return "hipDevicePrimaryCtxRelease"; + case HIP_API_ID_hipDevicePrimaryCtxReset: return "hipDevicePrimaryCtxReset"; + case HIP_API_ID_hipDevicePrimaryCtxRetain: return "hipDevicePrimaryCtxRetain"; + case HIP_API_ID_hipDevicePrimaryCtxSetFlags: return "hipDevicePrimaryCtxSetFlags"; + case HIP_API_ID_hipDeviceReset: return "hipDeviceReset"; + case HIP_API_ID_hipDeviceSetCacheConfig: return "hipDeviceSetCacheConfig"; + case HIP_API_ID_hipDeviceSetGraphMemAttribute: return "hipDeviceSetGraphMemAttribute"; + case HIP_API_ID_hipDeviceSetLimit: return "hipDeviceSetLimit"; + case HIP_API_ID_hipDeviceSetMemPool: return "hipDeviceSetMemPool"; + case HIP_API_ID_hipDeviceSetSharedMemConfig: return "hipDeviceSetSharedMemConfig"; + case HIP_API_ID_hipDeviceSynchronize: return "hipDeviceSynchronize"; + case HIP_API_ID_hipDeviceTotalMem: return "hipDeviceTotalMem"; + case HIP_API_ID_hipDriverGetVersion: return "hipDriverGetVersion"; + case HIP_API_ID_hipDrvGraphAddMemFreeNode: return "hipDrvGraphAddMemFreeNode"; + case HIP_API_ID_hipDrvGraphAddMemcpyNode: return "hipDrvGraphAddMemcpyNode"; + case HIP_API_ID_hipDrvGraphAddMemsetNode: return "hipDrvGraphAddMemsetNode"; + case HIP_API_ID_hipDrvGraphExecMemcpyNodeSetParams: return "hipDrvGraphExecMemcpyNodeSetParams"; + case HIP_API_ID_hipDrvGraphExecMemsetNodeSetParams: return "hipDrvGraphExecMemsetNodeSetParams"; + case HIP_API_ID_hipDrvGraphMemcpyNodeGetParams: return "hipDrvGraphMemcpyNodeGetParams"; + case HIP_API_ID_hipDrvGraphMemcpyNodeSetParams: return "hipDrvGraphMemcpyNodeSetParams"; + case HIP_API_ID_hipDrvLaunchKernelEx: return "hipDrvLaunchKernelEx"; + case HIP_API_ID_hipDrvMemcpy2DUnaligned: return "hipDrvMemcpy2DUnaligned"; + case HIP_API_ID_hipDrvMemcpy3D: return "hipDrvMemcpy3D"; + case HIP_API_ID_hipDrvMemcpy3DAsync: return "hipDrvMemcpy3DAsync"; + case HIP_API_ID_hipDrvPointerGetAttributes: return "hipDrvPointerGetAttributes"; + case HIP_API_ID_hipEventCreate: return "hipEventCreate"; + case HIP_API_ID_hipEventCreateWithFlags: return "hipEventCreateWithFlags"; + case HIP_API_ID_hipEventDestroy: return "hipEventDestroy"; + case HIP_API_ID_hipEventElapsedTime: return "hipEventElapsedTime"; + case HIP_API_ID_hipEventQuery: return "hipEventQuery"; + case HIP_API_ID_hipEventRecord: return "hipEventRecord"; + case HIP_API_ID_hipEventRecordWithFlags: return "hipEventRecordWithFlags"; + case HIP_API_ID_hipEventSynchronize: return "hipEventSynchronize"; + case HIP_API_ID_hipExtGetLastError: return "hipExtGetLastError"; + case HIP_API_ID_hipExtGetLinkTypeAndHopCount: return "hipExtGetLinkTypeAndHopCount"; + case HIP_API_ID_hipExtLaunchKernel: return "hipExtLaunchKernel"; + case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: return "hipExtLaunchMultiKernelMultiDevice"; + case HIP_API_ID_hipExtMallocWithFlags: return "hipExtMallocWithFlags"; + case HIP_API_ID_hipExtModuleLaunchKernel: return "hipExtModuleLaunchKernel"; + case HIP_API_ID_hipExtStreamCreateWithCUMask: return "hipExtStreamCreateWithCUMask"; + case HIP_API_ID_hipExtStreamGetCUMask: return "hipExtStreamGetCUMask"; + case HIP_API_ID_hipExternalMemoryGetMappedBuffer: return "hipExternalMemoryGetMappedBuffer"; + case HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray: return "hipExternalMemoryGetMappedMipmappedArray"; + case HIP_API_ID_hipFree: return "hipFree"; + case HIP_API_ID_hipFreeArray: return "hipFreeArray"; + case HIP_API_ID_hipFreeAsync: return "hipFreeAsync"; + case HIP_API_ID_hipFreeHost: return "hipFreeHost"; + case HIP_API_ID_hipFreeMipmappedArray: return "hipFreeMipmappedArray"; + case HIP_API_ID_hipFuncGetAttribute: return "hipFuncGetAttribute"; + case HIP_API_ID_hipFuncGetAttributes: return "hipFuncGetAttributes"; + case HIP_API_ID_hipFuncSetAttribute: return "hipFuncSetAttribute"; + case HIP_API_ID_hipFuncSetCacheConfig: return "hipFuncSetCacheConfig"; + case HIP_API_ID_hipFuncSetSharedMemConfig: return "hipFuncSetSharedMemConfig"; + case HIP_API_ID_hipGLGetDevices: return "hipGLGetDevices"; + case HIP_API_ID_hipGetChannelDesc: return "hipGetChannelDesc"; + case HIP_API_ID_hipGetDevice: return "hipGetDevice"; + case HIP_API_ID_hipGetDeviceCount: return "hipGetDeviceCount"; + case HIP_API_ID_hipGetDeviceFlags: return "hipGetDeviceFlags"; + case HIP_API_ID_hipGetDevicePropertiesR0000: return "hipGetDevicePropertiesR0000"; + case HIP_API_ID_hipGetDevicePropertiesR0600: return "hipGetDevicePropertiesR0600"; + case HIP_API_ID_hipGetDriverEntryPoint: return "hipGetDriverEntryPoint"; + case HIP_API_ID_hipGetFuncBySymbol: return "hipGetFuncBySymbol"; + case HIP_API_ID_hipGetLastError: return "hipGetLastError"; + case HIP_API_ID_hipGetMipmappedArrayLevel: return "hipGetMipmappedArrayLevel"; + case HIP_API_ID_hipGetProcAddress: return "hipGetProcAddress"; + case HIP_API_ID_hipGetSymbolAddress: return "hipGetSymbolAddress"; + case HIP_API_ID_hipGetSymbolSize: return "hipGetSymbolSize"; + case HIP_API_ID_hipGraphAddBatchMemOpNode: return "hipGraphAddBatchMemOpNode"; + case HIP_API_ID_hipGraphAddChildGraphNode: return "hipGraphAddChildGraphNode"; + case HIP_API_ID_hipGraphAddDependencies: return "hipGraphAddDependencies"; + case HIP_API_ID_hipGraphAddEmptyNode: return "hipGraphAddEmptyNode"; + case HIP_API_ID_hipGraphAddEventRecordNode: return "hipGraphAddEventRecordNode"; + case HIP_API_ID_hipGraphAddEventWaitNode: return "hipGraphAddEventWaitNode"; + case HIP_API_ID_hipGraphAddExternalSemaphoresSignalNode: return "hipGraphAddExternalSemaphoresSignalNode"; + case HIP_API_ID_hipGraphAddExternalSemaphoresWaitNode: return "hipGraphAddExternalSemaphoresWaitNode"; + case HIP_API_ID_hipGraphAddHostNode: return "hipGraphAddHostNode"; + case HIP_API_ID_hipGraphAddKernelNode: return "hipGraphAddKernelNode"; + case HIP_API_ID_hipGraphAddMemAllocNode: return "hipGraphAddMemAllocNode"; + case HIP_API_ID_hipGraphAddMemFreeNode: return "hipGraphAddMemFreeNode"; + case HIP_API_ID_hipGraphAddMemcpyNode: return "hipGraphAddMemcpyNode"; + case HIP_API_ID_hipGraphAddMemcpyNode1D: return "hipGraphAddMemcpyNode1D"; + case HIP_API_ID_hipGraphAddMemcpyNodeFromSymbol: return "hipGraphAddMemcpyNodeFromSymbol"; + case HIP_API_ID_hipGraphAddMemcpyNodeToSymbol: return "hipGraphAddMemcpyNodeToSymbol"; + case HIP_API_ID_hipGraphAddMemsetNode: return "hipGraphAddMemsetNode"; + case HIP_API_ID_hipGraphAddNode: return "hipGraphAddNode"; + case HIP_API_ID_hipGraphBatchMemOpNodeGetParams: return "hipGraphBatchMemOpNodeGetParams"; + case HIP_API_ID_hipGraphBatchMemOpNodeSetParams: return "hipGraphBatchMemOpNodeSetParams"; + case HIP_API_ID_hipGraphChildGraphNodeGetGraph: return "hipGraphChildGraphNodeGetGraph"; + case HIP_API_ID_hipGraphClone: return "hipGraphClone"; + case HIP_API_ID_hipGraphCreate: return "hipGraphCreate"; + case HIP_API_ID_hipGraphDebugDotPrint: return "hipGraphDebugDotPrint"; + case HIP_API_ID_hipGraphDestroy: return "hipGraphDestroy"; + case HIP_API_ID_hipGraphDestroyNode: return "hipGraphDestroyNode"; + case HIP_API_ID_hipGraphEventRecordNodeGetEvent: return "hipGraphEventRecordNodeGetEvent"; + case HIP_API_ID_hipGraphEventRecordNodeSetEvent: return "hipGraphEventRecordNodeSetEvent"; + case HIP_API_ID_hipGraphEventWaitNodeGetEvent: return "hipGraphEventWaitNodeGetEvent"; + case HIP_API_ID_hipGraphEventWaitNodeSetEvent: return "hipGraphEventWaitNodeSetEvent"; + case HIP_API_ID_hipGraphExecBatchMemOpNodeSetParams: return "hipGraphExecBatchMemOpNodeSetParams"; + case HIP_API_ID_hipGraphExecChildGraphNodeSetParams: return "hipGraphExecChildGraphNodeSetParams"; + case HIP_API_ID_hipGraphExecDestroy: return "hipGraphExecDestroy"; + case HIP_API_ID_hipGraphExecEventRecordNodeSetEvent: return "hipGraphExecEventRecordNodeSetEvent"; + case HIP_API_ID_hipGraphExecEventWaitNodeSetEvent: return "hipGraphExecEventWaitNodeSetEvent"; + case HIP_API_ID_hipGraphExecExternalSemaphoresSignalNodeSetParams: return "hipGraphExecExternalSemaphoresSignalNodeSetParams"; + case HIP_API_ID_hipGraphExecExternalSemaphoresWaitNodeSetParams: return "hipGraphExecExternalSemaphoresWaitNodeSetParams"; + case HIP_API_ID_hipGraphExecGetFlags: return "hipGraphExecGetFlags"; + case HIP_API_ID_hipGraphExecHostNodeSetParams: return "hipGraphExecHostNodeSetParams"; + case HIP_API_ID_hipGraphExecKernelNodeSetParams: return "hipGraphExecKernelNodeSetParams"; + case HIP_API_ID_hipGraphExecMemcpyNodeSetParams: return "hipGraphExecMemcpyNodeSetParams"; + case HIP_API_ID_hipGraphExecMemcpyNodeSetParams1D: return "hipGraphExecMemcpyNodeSetParams1D"; + case HIP_API_ID_hipGraphExecMemcpyNodeSetParamsFromSymbol: return "hipGraphExecMemcpyNodeSetParamsFromSymbol"; + case HIP_API_ID_hipGraphExecMemcpyNodeSetParamsToSymbol: return "hipGraphExecMemcpyNodeSetParamsToSymbol"; + case HIP_API_ID_hipGraphExecMemsetNodeSetParams: return "hipGraphExecMemsetNodeSetParams"; + case HIP_API_ID_hipGraphExecNodeSetParams: return "hipGraphExecNodeSetParams"; + case HIP_API_ID_hipGraphExecUpdate: return "hipGraphExecUpdate"; + case HIP_API_ID_hipGraphExternalSemaphoresSignalNodeGetParams: return "hipGraphExternalSemaphoresSignalNodeGetParams"; + case HIP_API_ID_hipGraphExternalSemaphoresSignalNodeSetParams: return "hipGraphExternalSemaphoresSignalNodeSetParams"; + case HIP_API_ID_hipGraphExternalSemaphoresWaitNodeGetParams: return "hipGraphExternalSemaphoresWaitNodeGetParams"; + case HIP_API_ID_hipGraphExternalSemaphoresWaitNodeSetParams: return "hipGraphExternalSemaphoresWaitNodeSetParams"; + case HIP_API_ID_hipGraphGetEdges: return "hipGraphGetEdges"; + case HIP_API_ID_hipGraphGetNodes: return "hipGraphGetNodes"; + case HIP_API_ID_hipGraphGetRootNodes: return "hipGraphGetRootNodes"; + case HIP_API_ID_hipGraphHostNodeGetParams: return "hipGraphHostNodeGetParams"; + case HIP_API_ID_hipGraphHostNodeSetParams: return "hipGraphHostNodeSetParams"; + case HIP_API_ID_hipGraphInstantiate: return "hipGraphInstantiate"; + case HIP_API_ID_hipGraphInstantiateWithFlags: return "hipGraphInstantiateWithFlags"; + case HIP_API_ID_hipGraphInstantiateWithParams: return "hipGraphInstantiateWithParams"; + case HIP_API_ID_hipGraphKernelNodeCopyAttributes: return "hipGraphKernelNodeCopyAttributes"; + case HIP_API_ID_hipGraphKernelNodeGetAttribute: return "hipGraphKernelNodeGetAttribute"; + case HIP_API_ID_hipGraphKernelNodeGetParams: return "hipGraphKernelNodeGetParams"; + case HIP_API_ID_hipGraphKernelNodeSetAttribute: return "hipGraphKernelNodeSetAttribute"; + case HIP_API_ID_hipGraphKernelNodeSetParams: return "hipGraphKernelNodeSetParams"; + case HIP_API_ID_hipGraphLaunch: return "hipGraphLaunch"; + case HIP_API_ID_hipGraphMemAllocNodeGetParams: return "hipGraphMemAllocNodeGetParams"; + case HIP_API_ID_hipGraphMemFreeNodeGetParams: return "hipGraphMemFreeNodeGetParams"; + case HIP_API_ID_hipGraphMemcpyNodeGetParams: return "hipGraphMemcpyNodeGetParams"; + case HIP_API_ID_hipGraphMemcpyNodeSetParams: return "hipGraphMemcpyNodeSetParams"; + case HIP_API_ID_hipGraphMemcpyNodeSetParams1D: return "hipGraphMemcpyNodeSetParams1D"; + case HIP_API_ID_hipGraphMemcpyNodeSetParamsFromSymbol: return "hipGraphMemcpyNodeSetParamsFromSymbol"; + case HIP_API_ID_hipGraphMemcpyNodeSetParamsToSymbol: return "hipGraphMemcpyNodeSetParamsToSymbol"; + case HIP_API_ID_hipGraphMemsetNodeGetParams: return "hipGraphMemsetNodeGetParams"; + case HIP_API_ID_hipGraphMemsetNodeSetParams: return "hipGraphMemsetNodeSetParams"; + case HIP_API_ID_hipGraphNodeFindInClone: return "hipGraphNodeFindInClone"; + case HIP_API_ID_hipGraphNodeGetDependencies: return "hipGraphNodeGetDependencies"; + case HIP_API_ID_hipGraphNodeGetDependentNodes: return "hipGraphNodeGetDependentNodes"; + case HIP_API_ID_hipGraphNodeGetEnabled: return "hipGraphNodeGetEnabled"; + case HIP_API_ID_hipGraphNodeGetType: return "hipGraphNodeGetType"; + case HIP_API_ID_hipGraphNodeSetEnabled: return "hipGraphNodeSetEnabled"; + case HIP_API_ID_hipGraphNodeSetParams: return "hipGraphNodeSetParams"; + case HIP_API_ID_hipGraphReleaseUserObject: return "hipGraphReleaseUserObject"; + case HIP_API_ID_hipGraphRemoveDependencies: return "hipGraphRemoveDependencies"; + case HIP_API_ID_hipGraphRetainUserObject: return "hipGraphRetainUserObject"; + case HIP_API_ID_hipGraphUpload: return "hipGraphUpload"; + case HIP_API_ID_hipGraphicsGLRegisterBuffer: return "hipGraphicsGLRegisterBuffer"; + case HIP_API_ID_hipGraphicsGLRegisterImage: return "hipGraphicsGLRegisterImage"; + case HIP_API_ID_hipGraphicsMapResources: return "hipGraphicsMapResources"; + case HIP_API_ID_hipGraphicsResourceGetMappedPointer: return "hipGraphicsResourceGetMappedPointer"; + case HIP_API_ID_hipGraphicsSubResourceGetMappedArray: return "hipGraphicsSubResourceGetMappedArray"; + case HIP_API_ID_hipGraphicsUnmapResources: return "hipGraphicsUnmapResources"; + case HIP_API_ID_hipGraphicsUnregisterResource: return "hipGraphicsUnregisterResource"; + case HIP_API_ID_hipHccModuleLaunchKernel: return "hipHccModuleLaunchKernel"; + case HIP_API_ID_hipHostAlloc: return "hipHostAlloc"; + case HIP_API_ID_hipHostFree: return "hipHostFree"; + case HIP_API_ID_hipHostGetDevicePointer: return "hipHostGetDevicePointer"; + case HIP_API_ID_hipHostGetFlags: return "hipHostGetFlags"; + case HIP_API_ID_hipHostMalloc: return "hipHostMalloc"; + case HIP_API_ID_hipHostRegister: return "hipHostRegister"; + case HIP_API_ID_hipHostUnregister: return "hipHostUnregister"; + case HIP_API_ID_hipImportExternalMemory: return "hipImportExternalMemory"; + case HIP_API_ID_hipImportExternalSemaphore: return "hipImportExternalSemaphore"; + case HIP_API_ID_hipInit: return "hipInit"; + case HIP_API_ID_hipIpcCloseMemHandle: return "hipIpcCloseMemHandle"; + case HIP_API_ID_hipIpcGetEventHandle: return "hipIpcGetEventHandle"; + case HIP_API_ID_hipIpcGetMemHandle: return "hipIpcGetMemHandle"; + case HIP_API_ID_hipIpcOpenEventHandle: return "hipIpcOpenEventHandle"; + case HIP_API_ID_hipIpcOpenMemHandle: return "hipIpcOpenMemHandle"; + case HIP_API_ID_hipLaunchByPtr: return "hipLaunchByPtr"; + case HIP_API_ID_hipLaunchCooperativeKernel: return "hipLaunchCooperativeKernel"; + case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: return "hipLaunchCooperativeKernelMultiDevice"; + case HIP_API_ID_hipLaunchHostFunc: return "hipLaunchHostFunc"; + case HIP_API_ID_hipLaunchKernel: return "hipLaunchKernel"; + case HIP_API_ID_hipLaunchKernelExC: return "hipLaunchKernelExC"; + case HIP_API_ID_hipLibraryGetKernel: return "hipLibraryGetKernel"; + case HIP_API_ID_hipLibraryGetKernelCount: return "hipLibraryGetKernelCount"; + case HIP_API_ID_hipLibraryLoadData: return "hipLibraryLoadData"; + case HIP_API_ID_hipLibraryLoadFromFile: return "hipLibraryLoadFromFile"; + case HIP_API_ID_hipLibraryUnload: return "hipLibraryUnload"; + case HIP_API_ID_hipLinkAddData: return "hipLinkAddData"; + case HIP_API_ID_hipLinkAddFile: return "hipLinkAddFile"; + case HIP_API_ID_hipLinkComplete: return "hipLinkComplete"; + case HIP_API_ID_hipLinkCreate: return "hipLinkCreate"; + case HIP_API_ID_hipLinkDestroy: return "hipLinkDestroy"; + case HIP_API_ID_hipMalloc: return "hipMalloc"; + case HIP_API_ID_hipMalloc3D: return "hipMalloc3D"; + case HIP_API_ID_hipMalloc3DArray: return "hipMalloc3DArray"; + case HIP_API_ID_hipMallocArray: return "hipMallocArray"; + case HIP_API_ID_hipMallocAsync: return "hipMallocAsync"; + case HIP_API_ID_hipMallocFromPoolAsync: return "hipMallocFromPoolAsync"; + case HIP_API_ID_hipMallocHost: return "hipMallocHost"; + case HIP_API_ID_hipMallocManaged: return "hipMallocManaged"; + case HIP_API_ID_hipMallocMipmappedArray: return "hipMallocMipmappedArray"; + case HIP_API_ID_hipMallocPitch: return "hipMallocPitch"; + case HIP_API_ID_hipMemAddressFree: return "hipMemAddressFree"; + case HIP_API_ID_hipMemAddressReserve: return "hipMemAddressReserve"; + case HIP_API_ID_hipMemAdvise: return "hipMemAdvise"; + case HIP_API_ID_hipMemAdvise_v2: return "hipMemAdvise_v2"; + case HIP_API_ID_hipMemAllocHost: return "hipMemAllocHost"; + case HIP_API_ID_hipMemAllocPitch: return "hipMemAllocPitch"; + case HIP_API_ID_hipMemCreate: return "hipMemCreate"; + case HIP_API_ID_hipMemExportToShareableHandle: return "hipMemExportToShareableHandle"; + case HIP_API_ID_hipMemGetAccess: return "hipMemGetAccess"; + case HIP_API_ID_hipMemGetAddressRange: return "hipMemGetAddressRange"; + case HIP_API_ID_hipMemGetAllocationGranularity: return "hipMemGetAllocationGranularity"; + case HIP_API_ID_hipMemGetAllocationPropertiesFromHandle: return "hipMemGetAllocationPropertiesFromHandle"; + case HIP_API_ID_hipMemGetHandleForAddressRange: return "hipMemGetHandleForAddressRange"; + case HIP_API_ID_hipMemGetInfo: return "hipMemGetInfo"; + case HIP_API_ID_hipMemImportFromShareableHandle: return "hipMemImportFromShareableHandle"; + case HIP_API_ID_hipMemMap: return "hipMemMap"; + case HIP_API_ID_hipMemMapArrayAsync: return "hipMemMapArrayAsync"; + case HIP_API_ID_hipMemPoolCreate: return "hipMemPoolCreate"; + case HIP_API_ID_hipMemPoolDestroy: return "hipMemPoolDestroy"; + case HIP_API_ID_hipMemPoolExportPointer: return "hipMemPoolExportPointer"; + case HIP_API_ID_hipMemPoolExportToShareableHandle: return "hipMemPoolExportToShareableHandle"; + case HIP_API_ID_hipMemPoolGetAccess: return "hipMemPoolGetAccess"; + case HIP_API_ID_hipMemPoolGetAttribute: return "hipMemPoolGetAttribute"; + case HIP_API_ID_hipMemPoolImportFromShareableHandle: return "hipMemPoolImportFromShareableHandle"; + case HIP_API_ID_hipMemPoolImportPointer: return "hipMemPoolImportPointer"; + case HIP_API_ID_hipMemPoolSetAccess: return "hipMemPoolSetAccess"; + case HIP_API_ID_hipMemPoolSetAttribute: return "hipMemPoolSetAttribute"; + case HIP_API_ID_hipMemPoolTrimTo: return "hipMemPoolTrimTo"; + case HIP_API_ID_hipMemPrefetchAsync: return "hipMemPrefetchAsync"; + case HIP_API_ID_hipMemPrefetchAsync_v2: return "hipMemPrefetchAsync_v2"; + case HIP_API_ID_hipMemPtrGetInfo: return "hipMemPtrGetInfo"; + case HIP_API_ID_hipMemRangeGetAttribute: return "hipMemRangeGetAttribute"; + case HIP_API_ID_hipMemRangeGetAttributes: return "hipMemRangeGetAttributes"; + case HIP_API_ID_hipMemRelease: return "hipMemRelease"; + case HIP_API_ID_hipMemRetainAllocationHandle: return "hipMemRetainAllocationHandle"; + case HIP_API_ID_hipMemSetAccess: return "hipMemSetAccess"; + case HIP_API_ID_hipMemUnmap: return "hipMemUnmap"; + case HIP_API_ID_hipMemcpy: return "hipMemcpy"; + case HIP_API_ID_hipMemcpy2D: return "hipMemcpy2D"; + case HIP_API_ID_hipMemcpy2DArrayToArray: return "hipMemcpy2DArrayToArray"; + case HIP_API_ID_hipMemcpy2DAsync: return "hipMemcpy2DAsync"; + case HIP_API_ID_hipMemcpy2DFromArray: return "hipMemcpy2DFromArray"; + case HIP_API_ID_hipMemcpy2DFromArrayAsync: return "hipMemcpy2DFromArrayAsync"; + case HIP_API_ID_hipMemcpy2DToArray: return "hipMemcpy2DToArray"; + case HIP_API_ID_hipMemcpy2DToArrayAsync: return "hipMemcpy2DToArrayAsync"; + case HIP_API_ID_hipMemcpy3D: return "hipMemcpy3D"; + case HIP_API_ID_hipMemcpy3DAsync: return "hipMemcpy3DAsync"; + case HIP_API_ID_hipMemcpy3DBatchAsync: return "hipMemcpy3DBatchAsync"; + case HIP_API_ID_hipMemcpy3DPeer: return "hipMemcpy3DPeer"; + case HIP_API_ID_hipMemcpy3DPeerAsync: return "hipMemcpy3DPeerAsync"; + case HIP_API_ID_hipMemcpyAsync: return "hipMemcpyAsync"; + case HIP_API_ID_hipMemcpyAtoA: return "hipMemcpyAtoA"; + case HIP_API_ID_hipMemcpyAtoD: return "hipMemcpyAtoD"; + case HIP_API_ID_hipMemcpyAtoH: return "hipMemcpyAtoH"; + case HIP_API_ID_hipMemcpyAtoHAsync: return "hipMemcpyAtoHAsync"; + case HIP_API_ID_hipMemcpyBatchAsync: return "hipMemcpyBatchAsync"; + case HIP_API_ID_hipMemcpyDtoA: return "hipMemcpyDtoA"; + case HIP_API_ID_hipMemcpyDtoD: return "hipMemcpyDtoD"; + case HIP_API_ID_hipMemcpyDtoDAsync: return "hipMemcpyDtoDAsync"; + case HIP_API_ID_hipMemcpyDtoH: return "hipMemcpyDtoH"; + case HIP_API_ID_hipMemcpyDtoHAsync: return "hipMemcpyDtoHAsync"; + case HIP_API_ID_hipMemcpyFromArray: return "hipMemcpyFromArray"; + case HIP_API_ID_hipMemcpyFromSymbol: return "hipMemcpyFromSymbol"; + case HIP_API_ID_hipMemcpyFromSymbolAsync: return "hipMemcpyFromSymbolAsync"; + case HIP_API_ID_hipMemcpyHtoA: return "hipMemcpyHtoA"; + case HIP_API_ID_hipMemcpyHtoAAsync: return "hipMemcpyHtoAAsync"; + case HIP_API_ID_hipMemcpyHtoD: return "hipMemcpyHtoD"; + case HIP_API_ID_hipMemcpyHtoDAsync: return "hipMemcpyHtoDAsync"; + case HIP_API_ID_hipMemcpyParam2D: return "hipMemcpyParam2D"; + case HIP_API_ID_hipMemcpyParam2DAsync: return "hipMemcpyParam2DAsync"; + case HIP_API_ID_hipMemcpyPeer: return "hipMemcpyPeer"; + case HIP_API_ID_hipMemcpyPeerAsync: return "hipMemcpyPeerAsync"; + case HIP_API_ID_hipMemcpyToArray: return "hipMemcpyToArray"; + case HIP_API_ID_hipMemcpyToSymbol: return "hipMemcpyToSymbol"; + case HIP_API_ID_hipMemcpyToSymbolAsync: return "hipMemcpyToSymbolAsync"; + case HIP_API_ID_hipMemcpyWithStream: return "hipMemcpyWithStream"; + case HIP_API_ID_hipMemset: return "hipMemset"; + case HIP_API_ID_hipMemset2D: return "hipMemset2D"; + case HIP_API_ID_hipMemset2DAsync: return "hipMemset2DAsync"; + case HIP_API_ID_hipMemset3D: return "hipMemset3D"; + case HIP_API_ID_hipMemset3DAsync: return "hipMemset3DAsync"; + case HIP_API_ID_hipMemsetAsync: return "hipMemsetAsync"; + case HIP_API_ID_hipMemsetD16: return "hipMemsetD16"; + case HIP_API_ID_hipMemsetD16Async: return "hipMemsetD16Async"; + case HIP_API_ID_hipMemsetD2D16: return "hipMemsetD2D16"; + case HIP_API_ID_hipMemsetD2D16Async: return "hipMemsetD2D16Async"; + case HIP_API_ID_hipMemsetD2D32: return "hipMemsetD2D32"; + case HIP_API_ID_hipMemsetD2D32Async: return "hipMemsetD2D32Async"; + case HIP_API_ID_hipMemsetD2D8: return "hipMemsetD2D8"; + case HIP_API_ID_hipMemsetD2D8Async: return "hipMemsetD2D8Async"; + case HIP_API_ID_hipMemsetD32: return "hipMemsetD32"; + case HIP_API_ID_hipMemsetD32Async: return "hipMemsetD32Async"; + case HIP_API_ID_hipMemsetD8: return "hipMemsetD8"; + case HIP_API_ID_hipMemsetD8Async: return "hipMemsetD8Async"; + case HIP_API_ID_hipMipmappedArrayCreate: return "hipMipmappedArrayCreate"; + case HIP_API_ID_hipMipmappedArrayDestroy: return "hipMipmappedArrayDestroy"; + case HIP_API_ID_hipMipmappedArrayGetLevel: return "hipMipmappedArrayGetLevel"; + case HIP_API_ID_hipModuleGetFunction: return "hipModuleGetFunction"; + case HIP_API_ID_hipModuleGetFunctionCount: return "hipModuleGetFunctionCount"; + case HIP_API_ID_hipModuleGetGlobal: return "hipModuleGetGlobal"; + case HIP_API_ID_hipModuleGetTexRef: return "hipModuleGetTexRef"; + case HIP_API_ID_hipModuleLaunchCooperativeKernel: return "hipModuleLaunchCooperativeKernel"; + case HIP_API_ID_hipModuleLaunchCooperativeKernelMultiDevice: return "hipModuleLaunchCooperativeKernelMultiDevice"; + case HIP_API_ID_hipModuleLaunchKernel: return "hipModuleLaunchKernel"; + case HIP_API_ID_hipModuleLoad: return "hipModuleLoad"; + case HIP_API_ID_hipModuleLoadData: return "hipModuleLoadData"; + case HIP_API_ID_hipModuleLoadDataEx: return "hipModuleLoadDataEx"; + case HIP_API_ID_hipModuleLoadFatBinary: return "hipModuleLoadFatBinary"; + case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor: return "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor"; + case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags: return "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"; + case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize: return "hipModuleOccupancyMaxPotentialBlockSize"; + case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags: return "hipModuleOccupancyMaxPotentialBlockSizeWithFlags"; + case HIP_API_ID_hipModuleUnload: return "hipModuleUnload"; + case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor: return "hipOccupancyMaxActiveBlocksPerMultiprocessor"; + case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags: return "hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"; + case HIP_API_ID_hipOccupancyMaxPotentialBlockSize: return "hipOccupancyMaxPotentialBlockSize"; + case HIP_API_ID_hipPeekAtLastError: return "hipPeekAtLastError"; + case HIP_API_ID_hipPointerGetAttribute: return "hipPointerGetAttribute"; + case HIP_API_ID_hipPointerGetAttributes: return "hipPointerGetAttributes"; + case HIP_API_ID_hipPointerSetAttribute: return "hipPointerSetAttribute"; + case HIP_API_ID_hipProfilerStart: return "hipProfilerStart"; + case HIP_API_ID_hipProfilerStop: return "hipProfilerStop"; + case HIP_API_ID_hipRuntimeGetVersion: return "hipRuntimeGetVersion"; + case HIP_API_ID_hipSetDevice: return "hipSetDevice"; + case HIP_API_ID_hipSetDeviceFlags: return "hipSetDeviceFlags"; + case HIP_API_ID_hipSetValidDevices: return "hipSetValidDevices"; + case HIP_API_ID_hipSetupArgument: return "hipSetupArgument"; + case HIP_API_ID_hipSignalExternalSemaphoresAsync: return "hipSignalExternalSemaphoresAsync"; + case HIP_API_ID_hipStreamAddCallback: return "hipStreamAddCallback"; + case HIP_API_ID_hipStreamAttachMemAsync: return "hipStreamAttachMemAsync"; + case HIP_API_ID_hipStreamBatchMemOp: return "hipStreamBatchMemOp"; + case HIP_API_ID_hipStreamBeginCapture: return "hipStreamBeginCapture"; + case HIP_API_ID_hipStreamBeginCaptureToGraph: return "hipStreamBeginCaptureToGraph"; + case HIP_API_ID_hipStreamCreate: return "hipStreamCreate"; + case HIP_API_ID_hipStreamCreateWithFlags: return "hipStreamCreateWithFlags"; + case HIP_API_ID_hipStreamCreateWithPriority: return "hipStreamCreateWithPriority"; + case HIP_API_ID_hipStreamDestroy: return "hipStreamDestroy"; + case HIP_API_ID_hipStreamEndCapture: return "hipStreamEndCapture"; + case HIP_API_ID_hipStreamGetAttribute: return "hipStreamGetAttribute"; + case HIP_API_ID_hipStreamGetCaptureInfo: return "hipStreamGetCaptureInfo"; + case HIP_API_ID_hipStreamGetCaptureInfo_v2: return "hipStreamGetCaptureInfo_v2"; + case HIP_API_ID_hipStreamGetDevice: return "hipStreamGetDevice"; + case HIP_API_ID_hipStreamGetFlags: return "hipStreamGetFlags"; + case HIP_API_ID_hipStreamGetId: return "hipStreamGetId"; + case HIP_API_ID_hipStreamGetPriority: return "hipStreamGetPriority"; + case HIP_API_ID_hipStreamIsCapturing: return "hipStreamIsCapturing"; + case HIP_API_ID_hipStreamQuery: return "hipStreamQuery"; + case HIP_API_ID_hipStreamSetAttribute: return "hipStreamSetAttribute"; + case HIP_API_ID_hipStreamSynchronize: return "hipStreamSynchronize"; + case HIP_API_ID_hipStreamUpdateCaptureDependencies: return "hipStreamUpdateCaptureDependencies"; + case HIP_API_ID_hipStreamWaitEvent: return "hipStreamWaitEvent"; + case HIP_API_ID_hipStreamWaitValue32: return "hipStreamWaitValue32"; + case HIP_API_ID_hipStreamWaitValue64: return "hipStreamWaitValue64"; + case HIP_API_ID_hipStreamWriteValue32: return "hipStreamWriteValue32"; + case HIP_API_ID_hipStreamWriteValue64: return "hipStreamWriteValue64"; + case HIP_API_ID_hipTexRefGetAddress: return "hipTexRefGetAddress"; + case HIP_API_ID_hipTexRefGetArray: return "hipTexRefGetArray"; + case HIP_API_ID_hipTexRefGetBorderColor: return "hipTexRefGetBorderColor"; + case HIP_API_ID_hipTexRefGetFlags: return "hipTexRefGetFlags"; + case HIP_API_ID_hipTexRefGetFormat: return "hipTexRefGetFormat"; + case HIP_API_ID_hipTexRefGetMaxAnisotropy: return "hipTexRefGetMaxAnisotropy"; + case HIP_API_ID_hipTexRefGetMipMappedArray: return "hipTexRefGetMipMappedArray"; + case HIP_API_ID_hipTexRefGetMipmapLevelBias: return "hipTexRefGetMipmapLevelBias"; + case HIP_API_ID_hipTexRefGetMipmapLevelClamp: return "hipTexRefGetMipmapLevelClamp"; + case HIP_API_ID_hipTexRefSetAddress: return "hipTexRefSetAddress"; + case HIP_API_ID_hipTexRefSetAddress2D: return "hipTexRefSetAddress2D"; + case HIP_API_ID_hipTexRefSetArray: return "hipTexRefSetArray"; + case HIP_API_ID_hipTexRefSetBorderColor: return "hipTexRefSetBorderColor"; + case HIP_API_ID_hipTexRefSetFlags: return "hipTexRefSetFlags"; + case HIP_API_ID_hipTexRefSetFormat: return "hipTexRefSetFormat"; + case HIP_API_ID_hipTexRefSetMaxAnisotropy: return "hipTexRefSetMaxAnisotropy"; + case HIP_API_ID_hipTexRefSetMipmapLevelBias: return "hipTexRefSetMipmapLevelBias"; + case HIP_API_ID_hipTexRefSetMipmapLevelClamp: return "hipTexRefSetMipmapLevelClamp"; + case HIP_API_ID_hipTexRefSetMipmappedArray: return "hipTexRefSetMipmappedArray"; + case HIP_API_ID_hipThreadExchangeStreamCaptureMode: return "hipThreadExchangeStreamCaptureMode"; + case HIP_API_ID_hipUserObjectCreate: return "hipUserObjectCreate"; + case HIP_API_ID_hipUserObjectRelease: return "hipUserObjectRelease"; + case HIP_API_ID_hipUserObjectRetain: return "hipUserObjectRetain"; + case HIP_API_ID_hipWaitExternalSemaphoresAsync: return "hipWaitExternalSemaphoresAsync"; + }; + return "unknown"; +}; + +#include +// Return the HIP API callback ID for a given name +static inline uint32_t hipApiIdByName(const char* name) { + if (strcmp("__hipPopCallConfiguration", name) == 0) return HIP_API_ID___hipPopCallConfiguration; + if (strcmp("__hipPushCallConfiguration", name) == 0) return HIP_API_ID___hipPushCallConfiguration; + if (strcmp("hipArray3DCreate", name) == 0) return HIP_API_ID_hipArray3DCreate; + if (strcmp("hipArray3DGetDescriptor", name) == 0) return HIP_API_ID_hipArray3DGetDescriptor; + if (strcmp("hipArrayCreate", name) == 0) return HIP_API_ID_hipArrayCreate; + if (strcmp("hipArrayDestroy", name) == 0) return HIP_API_ID_hipArrayDestroy; + if (strcmp("hipArrayGetDescriptor", name) == 0) return HIP_API_ID_hipArrayGetDescriptor; + if (strcmp("hipArrayGetInfo", name) == 0) return HIP_API_ID_hipArrayGetInfo; + if (strcmp("hipChooseDeviceR0000", name) == 0) return HIP_API_ID_hipChooseDeviceR0000; + if (strcmp("hipChooseDeviceR0600", name) == 0) return HIP_API_ID_hipChooseDeviceR0600; + if (strcmp("hipConfigureCall", name) == 0) return HIP_API_ID_hipConfigureCall; + if (strcmp("hipCreateSurfaceObject", name) == 0) return HIP_API_ID_hipCreateSurfaceObject; + if (strcmp("hipCtxCreate", name) == 0) return HIP_API_ID_hipCtxCreate; + if (strcmp("hipCtxDestroy", name) == 0) return HIP_API_ID_hipCtxDestroy; + if (strcmp("hipCtxDisablePeerAccess", name) == 0) return HIP_API_ID_hipCtxDisablePeerAccess; + if (strcmp("hipCtxEnablePeerAccess", name) == 0) return HIP_API_ID_hipCtxEnablePeerAccess; + if (strcmp("hipCtxGetApiVersion", name) == 0) return HIP_API_ID_hipCtxGetApiVersion; + if (strcmp("hipCtxGetCacheConfig", name) == 0) return HIP_API_ID_hipCtxGetCacheConfig; + if (strcmp("hipCtxGetCurrent", name) == 0) return HIP_API_ID_hipCtxGetCurrent; + if (strcmp("hipCtxGetDevice", name) == 0) return HIP_API_ID_hipCtxGetDevice; + if (strcmp("hipCtxGetFlags", name) == 0) return HIP_API_ID_hipCtxGetFlags; + if (strcmp("hipCtxGetSharedMemConfig", name) == 0) return HIP_API_ID_hipCtxGetSharedMemConfig; + if (strcmp("hipCtxPopCurrent", name) == 0) return HIP_API_ID_hipCtxPopCurrent; + if (strcmp("hipCtxPushCurrent", name) == 0) return HIP_API_ID_hipCtxPushCurrent; + if (strcmp("hipCtxSetCacheConfig", name) == 0) return HIP_API_ID_hipCtxSetCacheConfig; + if (strcmp("hipCtxSetCurrent", name) == 0) return HIP_API_ID_hipCtxSetCurrent; + if (strcmp("hipCtxSetSharedMemConfig", name) == 0) return HIP_API_ID_hipCtxSetSharedMemConfig; + if (strcmp("hipCtxSynchronize", name) == 0) return HIP_API_ID_hipCtxSynchronize; + if (strcmp("hipDestroyExternalMemory", name) == 0) return HIP_API_ID_hipDestroyExternalMemory; + if (strcmp("hipDestroyExternalSemaphore", name) == 0) return HIP_API_ID_hipDestroyExternalSemaphore; + if (strcmp("hipDestroySurfaceObject", name) == 0) return HIP_API_ID_hipDestroySurfaceObject; + if (strcmp("hipDeviceCanAccessPeer", name) == 0) return HIP_API_ID_hipDeviceCanAccessPeer; + if (strcmp("hipDeviceComputeCapability", name) == 0) return HIP_API_ID_hipDeviceComputeCapability; + if (strcmp("hipDeviceDisablePeerAccess", name) == 0) return HIP_API_ID_hipDeviceDisablePeerAccess; + if (strcmp("hipDeviceEnablePeerAccess", name) == 0) return HIP_API_ID_hipDeviceEnablePeerAccess; + if (strcmp("hipDeviceGet", name) == 0) return HIP_API_ID_hipDeviceGet; + if (strcmp("hipDeviceGetAttribute", name) == 0) return HIP_API_ID_hipDeviceGetAttribute; + if (strcmp("hipDeviceGetByPCIBusId", name) == 0) return HIP_API_ID_hipDeviceGetByPCIBusId; + if (strcmp("hipDeviceGetCacheConfig", name) == 0) return HIP_API_ID_hipDeviceGetCacheConfig; + if (strcmp("hipDeviceGetDefaultMemPool", name) == 0) return HIP_API_ID_hipDeviceGetDefaultMemPool; + if (strcmp("hipDeviceGetGraphMemAttribute", name) == 0) return HIP_API_ID_hipDeviceGetGraphMemAttribute; + if (strcmp("hipDeviceGetLimit", name) == 0) return HIP_API_ID_hipDeviceGetLimit; + if (strcmp("hipDeviceGetMemPool", name) == 0) return HIP_API_ID_hipDeviceGetMemPool; + if (strcmp("hipDeviceGetName", name) == 0) return HIP_API_ID_hipDeviceGetName; + if (strcmp("hipDeviceGetP2PAttribute", name) == 0) return HIP_API_ID_hipDeviceGetP2PAttribute; + if (strcmp("hipDeviceGetPCIBusId", name) == 0) return HIP_API_ID_hipDeviceGetPCIBusId; + if (strcmp("hipDeviceGetSharedMemConfig", name) == 0) return HIP_API_ID_hipDeviceGetSharedMemConfig; + if (strcmp("hipDeviceGetStreamPriorityRange", name) == 0) return HIP_API_ID_hipDeviceGetStreamPriorityRange; + if (strcmp("hipDeviceGetUuid", name) == 0) return HIP_API_ID_hipDeviceGetUuid; + if (strcmp("hipDeviceGraphMemTrim", name) == 0) return HIP_API_ID_hipDeviceGraphMemTrim; + if (strcmp("hipDevicePrimaryCtxGetState", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxGetState; + if (strcmp("hipDevicePrimaryCtxRelease", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxRelease; + if (strcmp("hipDevicePrimaryCtxReset", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxReset; + if (strcmp("hipDevicePrimaryCtxRetain", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxRetain; + if (strcmp("hipDevicePrimaryCtxSetFlags", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxSetFlags; + if (strcmp("hipDeviceReset", name) == 0) return HIP_API_ID_hipDeviceReset; + if (strcmp("hipDeviceSetCacheConfig", name) == 0) return HIP_API_ID_hipDeviceSetCacheConfig; + if (strcmp("hipDeviceSetGraphMemAttribute", name) == 0) return HIP_API_ID_hipDeviceSetGraphMemAttribute; + if (strcmp("hipDeviceSetLimit", name) == 0) return HIP_API_ID_hipDeviceSetLimit; + if (strcmp("hipDeviceSetMemPool", name) == 0) return HIP_API_ID_hipDeviceSetMemPool; + if (strcmp("hipDeviceSetSharedMemConfig", name) == 0) return HIP_API_ID_hipDeviceSetSharedMemConfig; + if (strcmp("hipDeviceSynchronize", name) == 0) return HIP_API_ID_hipDeviceSynchronize; + if (strcmp("hipDeviceTotalMem", name) == 0) return HIP_API_ID_hipDeviceTotalMem; + if (strcmp("hipDriverGetVersion", name) == 0) return HIP_API_ID_hipDriverGetVersion; + if (strcmp("hipDrvGraphAddMemFreeNode", name) == 0) return HIP_API_ID_hipDrvGraphAddMemFreeNode; + if (strcmp("hipDrvGraphAddMemcpyNode", name) == 0) return HIP_API_ID_hipDrvGraphAddMemcpyNode; + if (strcmp("hipDrvGraphAddMemsetNode", name) == 0) return HIP_API_ID_hipDrvGraphAddMemsetNode; + if (strcmp("hipDrvGraphExecMemcpyNodeSetParams", name) == 0) return HIP_API_ID_hipDrvGraphExecMemcpyNodeSetParams; + if (strcmp("hipDrvGraphExecMemsetNodeSetParams", name) == 0) return HIP_API_ID_hipDrvGraphExecMemsetNodeSetParams; + if (strcmp("hipDrvGraphMemcpyNodeGetParams", name) == 0) return HIP_API_ID_hipDrvGraphMemcpyNodeGetParams; + if (strcmp("hipDrvGraphMemcpyNodeSetParams", name) == 0) return HIP_API_ID_hipDrvGraphMemcpyNodeSetParams; + if (strcmp("hipDrvLaunchKernelEx", name) == 0) return HIP_API_ID_hipDrvLaunchKernelEx; + if (strcmp("hipDrvMemcpy2DUnaligned", name) == 0) return HIP_API_ID_hipDrvMemcpy2DUnaligned; + if (strcmp("hipDrvMemcpy3D", name) == 0) return HIP_API_ID_hipDrvMemcpy3D; + if (strcmp("hipDrvMemcpy3DAsync", name) == 0) return HIP_API_ID_hipDrvMemcpy3DAsync; + if (strcmp("hipDrvPointerGetAttributes", name) == 0) return HIP_API_ID_hipDrvPointerGetAttributes; + if (strcmp("hipEventCreate", name) == 0) return HIP_API_ID_hipEventCreate; + if (strcmp("hipEventCreateWithFlags", name) == 0) return HIP_API_ID_hipEventCreateWithFlags; + if (strcmp("hipEventDestroy", name) == 0) return HIP_API_ID_hipEventDestroy; + if (strcmp("hipEventElapsedTime", name) == 0) return HIP_API_ID_hipEventElapsedTime; + if (strcmp("hipEventQuery", name) == 0) return HIP_API_ID_hipEventQuery; + if (strcmp("hipEventRecord", name) == 0) return HIP_API_ID_hipEventRecord; + if (strcmp("hipEventRecordWithFlags", name) == 0) return HIP_API_ID_hipEventRecordWithFlags; + if (strcmp("hipEventSynchronize", name) == 0) return HIP_API_ID_hipEventSynchronize; + if (strcmp("hipExtGetLastError", name) == 0) return HIP_API_ID_hipExtGetLastError; + if (strcmp("hipExtGetLinkTypeAndHopCount", name) == 0) return HIP_API_ID_hipExtGetLinkTypeAndHopCount; + if (strcmp("hipExtLaunchKernel", name) == 0) return HIP_API_ID_hipExtLaunchKernel; + if (strcmp("hipExtLaunchMultiKernelMultiDevice", name) == 0) return HIP_API_ID_hipExtLaunchMultiKernelMultiDevice; + if (strcmp("hipExtMallocWithFlags", name) == 0) return HIP_API_ID_hipExtMallocWithFlags; + if (strcmp("hipExtModuleLaunchKernel", name) == 0) return HIP_API_ID_hipExtModuleLaunchKernel; + if (strcmp("hipExtStreamCreateWithCUMask", name) == 0) return HIP_API_ID_hipExtStreamCreateWithCUMask; + if (strcmp("hipExtStreamGetCUMask", name) == 0) return HIP_API_ID_hipExtStreamGetCUMask; + if (strcmp("hipExternalMemoryGetMappedBuffer", name) == 0) return HIP_API_ID_hipExternalMemoryGetMappedBuffer; + if (strcmp("hipExternalMemoryGetMappedMipmappedArray", name) == 0) return HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray; + if (strcmp("hipFree", name) == 0) return HIP_API_ID_hipFree; + if (strcmp("hipFreeArray", name) == 0) return HIP_API_ID_hipFreeArray; + if (strcmp("hipFreeAsync", name) == 0) return HIP_API_ID_hipFreeAsync; + if (strcmp("hipFreeHost", name) == 0) return HIP_API_ID_hipFreeHost; + if (strcmp("hipFreeMipmappedArray", name) == 0) return HIP_API_ID_hipFreeMipmappedArray; + if (strcmp("hipFuncGetAttribute", name) == 0) return HIP_API_ID_hipFuncGetAttribute; + if (strcmp("hipFuncGetAttributes", name) == 0) return HIP_API_ID_hipFuncGetAttributes; + if (strcmp("hipFuncSetAttribute", name) == 0) return HIP_API_ID_hipFuncSetAttribute; + if (strcmp("hipFuncSetCacheConfig", name) == 0) return HIP_API_ID_hipFuncSetCacheConfig; + if (strcmp("hipFuncSetSharedMemConfig", name) == 0) return HIP_API_ID_hipFuncSetSharedMemConfig; + if (strcmp("hipGLGetDevices", name) == 0) return HIP_API_ID_hipGLGetDevices; + if (strcmp("hipGetChannelDesc", name) == 0) return HIP_API_ID_hipGetChannelDesc; + if (strcmp("hipGetDevice", name) == 0) return HIP_API_ID_hipGetDevice; + if (strcmp("hipGetDeviceCount", name) == 0) return HIP_API_ID_hipGetDeviceCount; + if (strcmp("hipGetDeviceFlags", name) == 0) return HIP_API_ID_hipGetDeviceFlags; + if (strcmp("hipGetDevicePropertiesR0000", name) == 0) return HIP_API_ID_hipGetDevicePropertiesR0000; + if (strcmp("hipGetDevicePropertiesR0600", name) == 0) return HIP_API_ID_hipGetDevicePropertiesR0600; + if (strcmp("hipGetDriverEntryPoint", name) == 0) return HIP_API_ID_hipGetDriverEntryPoint; + if (strcmp("hipGetFuncBySymbol", name) == 0) return HIP_API_ID_hipGetFuncBySymbol; + if (strcmp("hipGetLastError", name) == 0) return HIP_API_ID_hipGetLastError; + if (strcmp("hipGetMipmappedArrayLevel", name) == 0) return HIP_API_ID_hipGetMipmappedArrayLevel; + if (strcmp("hipGetProcAddress", name) == 0) return HIP_API_ID_hipGetProcAddress; + if (strcmp("hipGetSymbolAddress", name) == 0) return HIP_API_ID_hipGetSymbolAddress; + if (strcmp("hipGetSymbolSize", name) == 0) return HIP_API_ID_hipGetSymbolSize; + if (strcmp("hipGraphAddBatchMemOpNode", name) == 0) return HIP_API_ID_hipGraphAddBatchMemOpNode; + if (strcmp("hipGraphAddChildGraphNode", name) == 0) return HIP_API_ID_hipGraphAddChildGraphNode; + if (strcmp("hipGraphAddDependencies", name) == 0) return HIP_API_ID_hipGraphAddDependencies; + if (strcmp("hipGraphAddEmptyNode", name) == 0) return HIP_API_ID_hipGraphAddEmptyNode; + if (strcmp("hipGraphAddEventRecordNode", name) == 0) return HIP_API_ID_hipGraphAddEventRecordNode; + if (strcmp("hipGraphAddEventWaitNode", name) == 0) return HIP_API_ID_hipGraphAddEventWaitNode; + if (strcmp("hipGraphAddExternalSemaphoresSignalNode", name) == 0) return HIP_API_ID_hipGraphAddExternalSemaphoresSignalNode; + if (strcmp("hipGraphAddExternalSemaphoresWaitNode", name) == 0) return HIP_API_ID_hipGraphAddExternalSemaphoresWaitNode; + if (strcmp("hipGraphAddHostNode", name) == 0) return HIP_API_ID_hipGraphAddHostNode; + if (strcmp("hipGraphAddKernelNode", name) == 0) return HIP_API_ID_hipGraphAddKernelNode; + if (strcmp("hipGraphAddMemAllocNode", name) == 0) return HIP_API_ID_hipGraphAddMemAllocNode; + if (strcmp("hipGraphAddMemFreeNode", name) == 0) return HIP_API_ID_hipGraphAddMemFreeNode; + if (strcmp("hipGraphAddMemcpyNode", name) == 0) return HIP_API_ID_hipGraphAddMemcpyNode; + if (strcmp("hipGraphAddMemcpyNode1D", name) == 0) return HIP_API_ID_hipGraphAddMemcpyNode1D; + if (strcmp("hipGraphAddMemcpyNodeFromSymbol", name) == 0) return HIP_API_ID_hipGraphAddMemcpyNodeFromSymbol; + if (strcmp("hipGraphAddMemcpyNodeToSymbol", name) == 0) return HIP_API_ID_hipGraphAddMemcpyNodeToSymbol; + if (strcmp("hipGraphAddMemsetNode", name) == 0) return HIP_API_ID_hipGraphAddMemsetNode; + if (strcmp("hipGraphAddNode", name) == 0) return HIP_API_ID_hipGraphAddNode; + if (strcmp("hipGraphBatchMemOpNodeGetParams", name) == 0) return HIP_API_ID_hipGraphBatchMemOpNodeGetParams; + if (strcmp("hipGraphBatchMemOpNodeSetParams", name) == 0) return HIP_API_ID_hipGraphBatchMemOpNodeSetParams; + if (strcmp("hipGraphChildGraphNodeGetGraph", name) == 0) return HIP_API_ID_hipGraphChildGraphNodeGetGraph; + if (strcmp("hipGraphClone", name) == 0) return HIP_API_ID_hipGraphClone; + if (strcmp("hipGraphCreate", name) == 0) return HIP_API_ID_hipGraphCreate; + if (strcmp("hipGraphDebugDotPrint", name) == 0) return HIP_API_ID_hipGraphDebugDotPrint; + if (strcmp("hipGraphDestroy", name) == 0) return HIP_API_ID_hipGraphDestroy; + if (strcmp("hipGraphDestroyNode", name) == 0) return HIP_API_ID_hipGraphDestroyNode; + if (strcmp("hipGraphEventRecordNodeGetEvent", name) == 0) return HIP_API_ID_hipGraphEventRecordNodeGetEvent; + if (strcmp("hipGraphEventRecordNodeSetEvent", name) == 0) return HIP_API_ID_hipGraphEventRecordNodeSetEvent; + if (strcmp("hipGraphEventWaitNodeGetEvent", name) == 0) return HIP_API_ID_hipGraphEventWaitNodeGetEvent; + if (strcmp("hipGraphEventWaitNodeSetEvent", name) == 0) return HIP_API_ID_hipGraphEventWaitNodeSetEvent; + if (strcmp("hipGraphExecBatchMemOpNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecBatchMemOpNodeSetParams; + if (strcmp("hipGraphExecChildGraphNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecChildGraphNodeSetParams; + if (strcmp("hipGraphExecDestroy", name) == 0) return HIP_API_ID_hipGraphExecDestroy; + if (strcmp("hipGraphExecEventRecordNodeSetEvent", name) == 0) return HIP_API_ID_hipGraphExecEventRecordNodeSetEvent; + if (strcmp("hipGraphExecEventWaitNodeSetEvent", name) == 0) return HIP_API_ID_hipGraphExecEventWaitNodeSetEvent; + if (strcmp("hipGraphExecExternalSemaphoresSignalNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecExternalSemaphoresSignalNodeSetParams; + if (strcmp("hipGraphExecExternalSemaphoresWaitNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecExternalSemaphoresWaitNodeSetParams; + if (strcmp("hipGraphExecGetFlags", name) == 0) return HIP_API_ID_hipGraphExecGetFlags; + if (strcmp("hipGraphExecHostNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecHostNodeSetParams; + if (strcmp("hipGraphExecKernelNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecKernelNodeSetParams; + if (strcmp("hipGraphExecMemcpyNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecMemcpyNodeSetParams; + if (strcmp("hipGraphExecMemcpyNodeSetParams1D", name) == 0) return HIP_API_ID_hipGraphExecMemcpyNodeSetParams1D; + if (strcmp("hipGraphExecMemcpyNodeSetParamsFromSymbol", name) == 0) return HIP_API_ID_hipGraphExecMemcpyNodeSetParamsFromSymbol; + if (strcmp("hipGraphExecMemcpyNodeSetParamsToSymbol", name) == 0) return HIP_API_ID_hipGraphExecMemcpyNodeSetParamsToSymbol; + if (strcmp("hipGraphExecMemsetNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecMemsetNodeSetParams; + if (strcmp("hipGraphExecNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecNodeSetParams; + if (strcmp("hipGraphExecUpdate", name) == 0) return HIP_API_ID_hipGraphExecUpdate; + if (strcmp("hipGraphExternalSemaphoresSignalNodeGetParams", name) == 0) return HIP_API_ID_hipGraphExternalSemaphoresSignalNodeGetParams; + if (strcmp("hipGraphExternalSemaphoresSignalNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExternalSemaphoresSignalNodeSetParams; + if (strcmp("hipGraphExternalSemaphoresWaitNodeGetParams", name) == 0) return HIP_API_ID_hipGraphExternalSemaphoresWaitNodeGetParams; + if (strcmp("hipGraphExternalSemaphoresWaitNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExternalSemaphoresWaitNodeSetParams; + if (strcmp("hipGraphGetEdges", name) == 0) return HIP_API_ID_hipGraphGetEdges; + if (strcmp("hipGraphGetNodes", name) == 0) return HIP_API_ID_hipGraphGetNodes; + if (strcmp("hipGraphGetRootNodes", name) == 0) return HIP_API_ID_hipGraphGetRootNodes; + if (strcmp("hipGraphHostNodeGetParams", name) == 0) return HIP_API_ID_hipGraphHostNodeGetParams; + if (strcmp("hipGraphHostNodeSetParams", name) == 0) return HIP_API_ID_hipGraphHostNodeSetParams; + if (strcmp("hipGraphInstantiate", name) == 0) return HIP_API_ID_hipGraphInstantiate; + if (strcmp("hipGraphInstantiateWithFlags", name) == 0) return HIP_API_ID_hipGraphInstantiateWithFlags; + if (strcmp("hipGraphInstantiateWithParams", name) == 0) return HIP_API_ID_hipGraphInstantiateWithParams; + if (strcmp("hipGraphKernelNodeCopyAttributes", name) == 0) return HIP_API_ID_hipGraphKernelNodeCopyAttributes; + if (strcmp("hipGraphKernelNodeGetAttribute", name) == 0) return HIP_API_ID_hipGraphKernelNodeGetAttribute; + if (strcmp("hipGraphKernelNodeGetParams", name) == 0) return HIP_API_ID_hipGraphKernelNodeGetParams; + if (strcmp("hipGraphKernelNodeSetAttribute", name) == 0) return HIP_API_ID_hipGraphKernelNodeSetAttribute; + if (strcmp("hipGraphKernelNodeSetParams", name) == 0) return HIP_API_ID_hipGraphKernelNodeSetParams; + if (strcmp("hipGraphLaunch", name) == 0) return HIP_API_ID_hipGraphLaunch; + if (strcmp("hipGraphMemAllocNodeGetParams", name) == 0) return HIP_API_ID_hipGraphMemAllocNodeGetParams; + if (strcmp("hipGraphMemFreeNodeGetParams", name) == 0) return HIP_API_ID_hipGraphMemFreeNodeGetParams; + if (strcmp("hipGraphMemcpyNodeGetParams", name) == 0) return HIP_API_ID_hipGraphMemcpyNodeGetParams; + if (strcmp("hipGraphMemcpyNodeSetParams", name) == 0) return HIP_API_ID_hipGraphMemcpyNodeSetParams; + if (strcmp("hipGraphMemcpyNodeSetParams1D", name) == 0) return HIP_API_ID_hipGraphMemcpyNodeSetParams1D; + if (strcmp("hipGraphMemcpyNodeSetParamsFromSymbol", name) == 0) return HIP_API_ID_hipGraphMemcpyNodeSetParamsFromSymbol; + if (strcmp("hipGraphMemcpyNodeSetParamsToSymbol", name) == 0) return HIP_API_ID_hipGraphMemcpyNodeSetParamsToSymbol; + if (strcmp("hipGraphMemsetNodeGetParams", name) == 0) return HIP_API_ID_hipGraphMemsetNodeGetParams; + if (strcmp("hipGraphMemsetNodeSetParams", name) == 0) return HIP_API_ID_hipGraphMemsetNodeSetParams; + if (strcmp("hipGraphNodeFindInClone", name) == 0) return HIP_API_ID_hipGraphNodeFindInClone; + if (strcmp("hipGraphNodeGetDependencies", name) == 0) return HIP_API_ID_hipGraphNodeGetDependencies; + if (strcmp("hipGraphNodeGetDependentNodes", name) == 0) return HIP_API_ID_hipGraphNodeGetDependentNodes; + if (strcmp("hipGraphNodeGetEnabled", name) == 0) return HIP_API_ID_hipGraphNodeGetEnabled; + if (strcmp("hipGraphNodeGetType", name) == 0) return HIP_API_ID_hipGraphNodeGetType; + if (strcmp("hipGraphNodeSetEnabled", name) == 0) return HIP_API_ID_hipGraphNodeSetEnabled; + if (strcmp("hipGraphNodeSetParams", name) == 0) return HIP_API_ID_hipGraphNodeSetParams; + if (strcmp("hipGraphReleaseUserObject", name) == 0) return HIP_API_ID_hipGraphReleaseUserObject; + if (strcmp("hipGraphRemoveDependencies", name) == 0) return HIP_API_ID_hipGraphRemoveDependencies; + if (strcmp("hipGraphRetainUserObject", name) == 0) return HIP_API_ID_hipGraphRetainUserObject; + if (strcmp("hipGraphUpload", name) == 0) return HIP_API_ID_hipGraphUpload; + if (strcmp("hipGraphicsGLRegisterBuffer", name) == 0) return HIP_API_ID_hipGraphicsGLRegisterBuffer; + if (strcmp("hipGraphicsGLRegisterImage", name) == 0) return HIP_API_ID_hipGraphicsGLRegisterImage; + if (strcmp("hipGraphicsMapResources", name) == 0) return HIP_API_ID_hipGraphicsMapResources; + if (strcmp("hipGraphicsResourceGetMappedPointer", name) == 0) return HIP_API_ID_hipGraphicsResourceGetMappedPointer; + if (strcmp("hipGraphicsSubResourceGetMappedArray", name) == 0) return HIP_API_ID_hipGraphicsSubResourceGetMappedArray; + if (strcmp("hipGraphicsUnmapResources", name) == 0) return HIP_API_ID_hipGraphicsUnmapResources; + if (strcmp("hipGraphicsUnregisterResource", name) == 0) return HIP_API_ID_hipGraphicsUnregisterResource; + if (strcmp("hipHccModuleLaunchKernel", name) == 0) return HIP_API_ID_hipHccModuleLaunchKernel; + if (strcmp("hipHostAlloc", name) == 0) return HIP_API_ID_hipHostAlloc; + if (strcmp("hipHostFree", name) == 0) return HIP_API_ID_hipHostFree; + if (strcmp("hipHostGetDevicePointer", name) == 0) return HIP_API_ID_hipHostGetDevicePointer; + if (strcmp("hipHostGetFlags", name) == 0) return HIP_API_ID_hipHostGetFlags; + if (strcmp("hipHostMalloc", name) == 0) return HIP_API_ID_hipHostMalloc; + if (strcmp("hipHostRegister", name) == 0) return HIP_API_ID_hipHostRegister; + if (strcmp("hipHostUnregister", name) == 0) return HIP_API_ID_hipHostUnregister; + if (strcmp("hipImportExternalMemory", name) == 0) return HIP_API_ID_hipImportExternalMemory; + if (strcmp("hipImportExternalSemaphore", name) == 0) return HIP_API_ID_hipImportExternalSemaphore; + if (strcmp("hipInit", name) == 0) return HIP_API_ID_hipInit; + if (strcmp("hipIpcCloseMemHandle", name) == 0) return HIP_API_ID_hipIpcCloseMemHandle; + if (strcmp("hipIpcGetEventHandle", name) == 0) return HIP_API_ID_hipIpcGetEventHandle; + if (strcmp("hipIpcGetMemHandle", name) == 0) return HIP_API_ID_hipIpcGetMemHandle; + if (strcmp("hipIpcOpenEventHandle", name) == 0) return HIP_API_ID_hipIpcOpenEventHandle; + if (strcmp("hipIpcOpenMemHandle", name) == 0) return HIP_API_ID_hipIpcOpenMemHandle; + if (strcmp("hipLaunchByPtr", name) == 0) return HIP_API_ID_hipLaunchByPtr; + if (strcmp("hipLaunchCooperativeKernel", name) == 0) return HIP_API_ID_hipLaunchCooperativeKernel; + if (strcmp("hipLaunchCooperativeKernelMultiDevice", name) == 0) return HIP_API_ID_hipLaunchCooperativeKernelMultiDevice; + if (strcmp("hipLaunchHostFunc", name) == 0) return HIP_API_ID_hipLaunchHostFunc; + if (strcmp("hipLaunchKernel", name) == 0) return HIP_API_ID_hipLaunchKernel; + if (strcmp("hipLaunchKernelExC", name) == 0) return HIP_API_ID_hipLaunchKernelExC; + if (strcmp("hipLibraryGetKernel", name) == 0) return HIP_API_ID_hipLibraryGetKernel; + if (strcmp("hipLibraryGetKernelCount", name) == 0) return HIP_API_ID_hipLibraryGetKernelCount; + if (strcmp("hipLibraryLoadData", name) == 0) return HIP_API_ID_hipLibraryLoadData; + if (strcmp("hipLibraryLoadFromFile", name) == 0) return HIP_API_ID_hipLibraryLoadFromFile; + if (strcmp("hipLibraryUnload", name) == 0) return HIP_API_ID_hipLibraryUnload; + if (strcmp("hipLinkAddData", name) == 0) return HIP_API_ID_hipLinkAddData; + if (strcmp("hipLinkAddFile", name) == 0) return HIP_API_ID_hipLinkAddFile; + if (strcmp("hipLinkComplete", name) == 0) return HIP_API_ID_hipLinkComplete; + if (strcmp("hipLinkCreate", name) == 0) return HIP_API_ID_hipLinkCreate; + if (strcmp("hipLinkDestroy", name) == 0) return HIP_API_ID_hipLinkDestroy; + if (strcmp("hipMalloc", name) == 0) return HIP_API_ID_hipMalloc; + if (strcmp("hipMalloc3D", name) == 0) return HIP_API_ID_hipMalloc3D; + if (strcmp("hipMalloc3DArray", name) == 0) return HIP_API_ID_hipMalloc3DArray; + if (strcmp("hipMallocArray", name) == 0) return HIP_API_ID_hipMallocArray; + if (strcmp("hipMallocAsync", name) == 0) return HIP_API_ID_hipMallocAsync; + if (strcmp("hipMallocFromPoolAsync", name) == 0) return HIP_API_ID_hipMallocFromPoolAsync; + if (strcmp("hipMallocHost", name) == 0) return HIP_API_ID_hipMallocHost; + if (strcmp("hipMallocManaged", name) == 0) return HIP_API_ID_hipMallocManaged; + if (strcmp("hipMallocMipmappedArray", name) == 0) return HIP_API_ID_hipMallocMipmappedArray; + if (strcmp("hipMallocPitch", name) == 0) return HIP_API_ID_hipMallocPitch; + if (strcmp("hipMemAddressFree", name) == 0) return HIP_API_ID_hipMemAddressFree; + if (strcmp("hipMemAddressReserve", name) == 0) return HIP_API_ID_hipMemAddressReserve; + if (strcmp("hipMemAdvise", name) == 0) return HIP_API_ID_hipMemAdvise; + if (strcmp("hipMemAdvise_v2", name) == 0) return HIP_API_ID_hipMemAdvise_v2; + if (strcmp("hipMemAllocHost", name) == 0) return HIP_API_ID_hipMemAllocHost; + if (strcmp("hipMemAllocPitch", name) == 0) return HIP_API_ID_hipMemAllocPitch; + if (strcmp("hipMemCreate", name) == 0) return HIP_API_ID_hipMemCreate; + if (strcmp("hipMemExportToShareableHandle", name) == 0) return HIP_API_ID_hipMemExportToShareableHandle; + if (strcmp("hipMemGetAccess", name) == 0) return HIP_API_ID_hipMemGetAccess; + if (strcmp("hipMemGetAddressRange", name) == 0) return HIP_API_ID_hipMemGetAddressRange; + if (strcmp("hipMemGetAllocationGranularity", name) == 0) return HIP_API_ID_hipMemGetAllocationGranularity; + if (strcmp("hipMemGetAllocationPropertiesFromHandle", name) == 0) return HIP_API_ID_hipMemGetAllocationPropertiesFromHandle; + if (strcmp("hipMemGetHandleForAddressRange", name) == 0) return HIP_API_ID_hipMemGetHandleForAddressRange; + if (strcmp("hipMemGetInfo", name) == 0) return HIP_API_ID_hipMemGetInfo; + if (strcmp("hipMemImportFromShareableHandle", name) == 0) return HIP_API_ID_hipMemImportFromShareableHandle; + if (strcmp("hipMemMap", name) == 0) return HIP_API_ID_hipMemMap; + if (strcmp("hipMemMapArrayAsync", name) == 0) return HIP_API_ID_hipMemMapArrayAsync; + if (strcmp("hipMemPoolCreate", name) == 0) return HIP_API_ID_hipMemPoolCreate; + if (strcmp("hipMemPoolDestroy", name) == 0) return HIP_API_ID_hipMemPoolDestroy; + if (strcmp("hipMemPoolExportPointer", name) == 0) return HIP_API_ID_hipMemPoolExportPointer; + if (strcmp("hipMemPoolExportToShareableHandle", name) == 0) return HIP_API_ID_hipMemPoolExportToShareableHandle; + if (strcmp("hipMemPoolGetAccess", name) == 0) return HIP_API_ID_hipMemPoolGetAccess; + if (strcmp("hipMemPoolGetAttribute", name) == 0) return HIP_API_ID_hipMemPoolGetAttribute; + if (strcmp("hipMemPoolImportFromShareableHandle", name) == 0) return HIP_API_ID_hipMemPoolImportFromShareableHandle; + if (strcmp("hipMemPoolImportPointer", name) == 0) return HIP_API_ID_hipMemPoolImportPointer; + if (strcmp("hipMemPoolSetAccess", name) == 0) return HIP_API_ID_hipMemPoolSetAccess; + if (strcmp("hipMemPoolSetAttribute", name) == 0) return HIP_API_ID_hipMemPoolSetAttribute; + if (strcmp("hipMemPoolTrimTo", name) == 0) return HIP_API_ID_hipMemPoolTrimTo; + if (strcmp("hipMemPrefetchAsync", name) == 0) return HIP_API_ID_hipMemPrefetchAsync; + if (strcmp("hipMemPrefetchAsync_v2", name) == 0) return HIP_API_ID_hipMemPrefetchAsync_v2; + if (strcmp("hipMemPtrGetInfo", name) == 0) return HIP_API_ID_hipMemPtrGetInfo; + if (strcmp("hipMemRangeGetAttribute", name) == 0) return HIP_API_ID_hipMemRangeGetAttribute; + if (strcmp("hipMemRangeGetAttributes", name) == 0) return HIP_API_ID_hipMemRangeGetAttributes; + if (strcmp("hipMemRelease", name) == 0) return HIP_API_ID_hipMemRelease; + if (strcmp("hipMemRetainAllocationHandle", name) == 0) return HIP_API_ID_hipMemRetainAllocationHandle; + if (strcmp("hipMemSetAccess", name) == 0) return HIP_API_ID_hipMemSetAccess; + if (strcmp("hipMemUnmap", name) == 0) return HIP_API_ID_hipMemUnmap; + if (strcmp("hipMemcpy", name) == 0) return HIP_API_ID_hipMemcpy; + if (strcmp("hipMemcpy2D", name) == 0) return HIP_API_ID_hipMemcpy2D; + if (strcmp("hipMemcpy2DArrayToArray", name) == 0) return HIP_API_ID_hipMemcpy2DArrayToArray; + if (strcmp("hipMemcpy2DAsync", name) == 0) return HIP_API_ID_hipMemcpy2DAsync; + if (strcmp("hipMemcpy2DFromArray", name) == 0) return HIP_API_ID_hipMemcpy2DFromArray; + if (strcmp("hipMemcpy2DFromArrayAsync", name) == 0) return HIP_API_ID_hipMemcpy2DFromArrayAsync; + if (strcmp("hipMemcpy2DToArray", name) == 0) return HIP_API_ID_hipMemcpy2DToArray; + if (strcmp("hipMemcpy2DToArrayAsync", name) == 0) return HIP_API_ID_hipMemcpy2DToArrayAsync; + if (strcmp("hipMemcpy3D", name) == 0) return HIP_API_ID_hipMemcpy3D; + if (strcmp("hipMemcpy3DAsync", name) == 0) return HIP_API_ID_hipMemcpy3DAsync; + if (strcmp("hipMemcpy3DBatchAsync", name) == 0) return HIP_API_ID_hipMemcpy3DBatchAsync; + if (strcmp("hipMemcpy3DPeer", name) == 0) return HIP_API_ID_hipMemcpy3DPeer; + if (strcmp("hipMemcpy3DPeerAsync", name) == 0) return HIP_API_ID_hipMemcpy3DPeerAsync; + if (strcmp("hipMemcpyAsync", name) == 0) return HIP_API_ID_hipMemcpyAsync; + if (strcmp("hipMemcpyAtoA", name) == 0) return HIP_API_ID_hipMemcpyAtoA; + if (strcmp("hipMemcpyAtoD", name) == 0) return HIP_API_ID_hipMemcpyAtoD; + if (strcmp("hipMemcpyAtoH", name) == 0) return HIP_API_ID_hipMemcpyAtoH; + if (strcmp("hipMemcpyAtoHAsync", name) == 0) return HIP_API_ID_hipMemcpyAtoHAsync; + if (strcmp("hipMemcpyBatchAsync", name) == 0) return HIP_API_ID_hipMemcpyBatchAsync; + if (strcmp("hipMemcpyDtoA", name) == 0) return HIP_API_ID_hipMemcpyDtoA; + if (strcmp("hipMemcpyDtoD", name) == 0) return HIP_API_ID_hipMemcpyDtoD; + if (strcmp("hipMemcpyDtoDAsync", name) == 0) return HIP_API_ID_hipMemcpyDtoDAsync; + if (strcmp("hipMemcpyDtoH", name) == 0) return HIP_API_ID_hipMemcpyDtoH; + if (strcmp("hipMemcpyDtoHAsync", name) == 0) return HIP_API_ID_hipMemcpyDtoHAsync; + if (strcmp("hipMemcpyFromArray", name) == 0) return HIP_API_ID_hipMemcpyFromArray; + if (strcmp("hipMemcpyFromSymbol", name) == 0) return HIP_API_ID_hipMemcpyFromSymbol; + if (strcmp("hipMemcpyFromSymbolAsync", name) == 0) return HIP_API_ID_hipMemcpyFromSymbolAsync; + if (strcmp("hipMemcpyHtoA", name) == 0) return HIP_API_ID_hipMemcpyHtoA; + if (strcmp("hipMemcpyHtoAAsync", name) == 0) return HIP_API_ID_hipMemcpyHtoAAsync; + if (strcmp("hipMemcpyHtoD", name) == 0) return HIP_API_ID_hipMemcpyHtoD; + if (strcmp("hipMemcpyHtoDAsync", name) == 0) return HIP_API_ID_hipMemcpyHtoDAsync; + if (strcmp("hipMemcpyParam2D", name) == 0) return HIP_API_ID_hipMemcpyParam2D; + if (strcmp("hipMemcpyParam2DAsync", name) == 0) return HIP_API_ID_hipMemcpyParam2DAsync; + if (strcmp("hipMemcpyPeer", name) == 0) return HIP_API_ID_hipMemcpyPeer; + if (strcmp("hipMemcpyPeerAsync", name) == 0) return HIP_API_ID_hipMemcpyPeerAsync; + if (strcmp("hipMemcpyToArray", name) == 0) return HIP_API_ID_hipMemcpyToArray; + if (strcmp("hipMemcpyToSymbol", name) == 0) return HIP_API_ID_hipMemcpyToSymbol; + if (strcmp("hipMemcpyToSymbolAsync", name) == 0) return HIP_API_ID_hipMemcpyToSymbolAsync; + if (strcmp("hipMemcpyWithStream", name) == 0) return HIP_API_ID_hipMemcpyWithStream; + if (strcmp("hipMemset", name) == 0) return HIP_API_ID_hipMemset; + if (strcmp("hipMemset2D", name) == 0) return HIP_API_ID_hipMemset2D; + if (strcmp("hipMemset2DAsync", name) == 0) return HIP_API_ID_hipMemset2DAsync; + if (strcmp("hipMemset3D", name) == 0) return HIP_API_ID_hipMemset3D; + if (strcmp("hipMemset3DAsync", name) == 0) return HIP_API_ID_hipMemset3DAsync; + if (strcmp("hipMemsetAsync", name) == 0) return HIP_API_ID_hipMemsetAsync; + if (strcmp("hipMemsetD16", name) == 0) return HIP_API_ID_hipMemsetD16; + if (strcmp("hipMemsetD16Async", name) == 0) return HIP_API_ID_hipMemsetD16Async; + if (strcmp("hipMemsetD2D16", name) == 0) return HIP_API_ID_hipMemsetD2D16; + if (strcmp("hipMemsetD2D16Async", name) == 0) return HIP_API_ID_hipMemsetD2D16Async; + if (strcmp("hipMemsetD2D32", name) == 0) return HIP_API_ID_hipMemsetD2D32; + if (strcmp("hipMemsetD2D32Async", name) == 0) return HIP_API_ID_hipMemsetD2D32Async; + if (strcmp("hipMemsetD2D8", name) == 0) return HIP_API_ID_hipMemsetD2D8; + if (strcmp("hipMemsetD2D8Async", name) == 0) return HIP_API_ID_hipMemsetD2D8Async; + if (strcmp("hipMemsetD32", name) == 0) return HIP_API_ID_hipMemsetD32; + if (strcmp("hipMemsetD32Async", name) == 0) return HIP_API_ID_hipMemsetD32Async; + if (strcmp("hipMemsetD8", name) == 0) return HIP_API_ID_hipMemsetD8; + if (strcmp("hipMemsetD8Async", name) == 0) return HIP_API_ID_hipMemsetD8Async; + if (strcmp("hipMipmappedArrayCreate", name) == 0) return HIP_API_ID_hipMipmappedArrayCreate; + if (strcmp("hipMipmappedArrayDestroy", name) == 0) return HIP_API_ID_hipMipmappedArrayDestroy; + if (strcmp("hipMipmappedArrayGetLevel", name) == 0) return HIP_API_ID_hipMipmappedArrayGetLevel; + if (strcmp("hipModuleGetFunction", name) == 0) return HIP_API_ID_hipModuleGetFunction; + if (strcmp("hipModuleGetFunctionCount", name) == 0) return HIP_API_ID_hipModuleGetFunctionCount; + if (strcmp("hipModuleGetGlobal", name) == 0) return HIP_API_ID_hipModuleGetGlobal; + if (strcmp("hipModuleGetTexRef", name) == 0) return HIP_API_ID_hipModuleGetTexRef; + if (strcmp("hipModuleLaunchCooperativeKernel", name) == 0) return HIP_API_ID_hipModuleLaunchCooperativeKernel; + if (strcmp("hipModuleLaunchCooperativeKernelMultiDevice", name) == 0) return HIP_API_ID_hipModuleLaunchCooperativeKernelMultiDevice; + if (strcmp("hipModuleLaunchKernel", name) == 0) return HIP_API_ID_hipModuleLaunchKernel; + if (strcmp("hipModuleLoad", name) == 0) return HIP_API_ID_hipModuleLoad; + if (strcmp("hipModuleLoadData", name) == 0) return HIP_API_ID_hipModuleLoadData; + if (strcmp("hipModuleLoadDataEx", name) == 0) return HIP_API_ID_hipModuleLoadDataEx; + if (strcmp("hipModuleLoadFatBinary", name) == 0) return HIP_API_ID_hipModuleLoadFatBinary; + if (strcmp("hipModuleOccupancyMaxActiveBlocksPerMultiprocessor", name) == 0) return HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor; + if (strcmp("hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", name) == 0) return HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; + if (strcmp("hipModuleOccupancyMaxPotentialBlockSize", name) == 0) return HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize; + if (strcmp("hipModuleOccupancyMaxPotentialBlockSizeWithFlags", name) == 0) return HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags; + if (strcmp("hipModuleUnload", name) == 0) return HIP_API_ID_hipModuleUnload; + if (strcmp("hipOccupancyMaxActiveBlocksPerMultiprocessor", name) == 0) return HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor; + if (strcmp("hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", name) == 0) return HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; + if (strcmp("hipOccupancyMaxPotentialBlockSize", name) == 0) return HIP_API_ID_hipOccupancyMaxPotentialBlockSize; + if (strcmp("hipPeekAtLastError", name) == 0) return HIP_API_ID_hipPeekAtLastError; + if (strcmp("hipPointerGetAttribute", name) == 0) return HIP_API_ID_hipPointerGetAttribute; + if (strcmp("hipPointerGetAttributes", name) == 0) return HIP_API_ID_hipPointerGetAttributes; + if (strcmp("hipPointerSetAttribute", name) == 0) return HIP_API_ID_hipPointerSetAttribute; + if (strcmp("hipProfilerStart", name) == 0) return HIP_API_ID_hipProfilerStart; + if (strcmp("hipProfilerStop", name) == 0) return HIP_API_ID_hipProfilerStop; + if (strcmp("hipRuntimeGetVersion", name) == 0) return HIP_API_ID_hipRuntimeGetVersion; + if (strcmp("hipSetDevice", name) == 0) return HIP_API_ID_hipSetDevice; + if (strcmp("hipSetDeviceFlags", name) == 0) return HIP_API_ID_hipSetDeviceFlags; + if (strcmp("hipSetValidDevices", name) == 0) return HIP_API_ID_hipSetValidDevices; + if (strcmp("hipSetupArgument", name) == 0) return HIP_API_ID_hipSetupArgument; + if (strcmp("hipSignalExternalSemaphoresAsync", name) == 0) return HIP_API_ID_hipSignalExternalSemaphoresAsync; + if (strcmp("hipStreamAddCallback", name) == 0) return HIP_API_ID_hipStreamAddCallback; + if (strcmp("hipStreamAttachMemAsync", name) == 0) return HIP_API_ID_hipStreamAttachMemAsync; + if (strcmp("hipStreamBatchMemOp", name) == 0) return HIP_API_ID_hipStreamBatchMemOp; + if (strcmp("hipStreamBeginCapture", name) == 0) return HIP_API_ID_hipStreamBeginCapture; + if (strcmp("hipStreamBeginCaptureToGraph", name) == 0) return HIP_API_ID_hipStreamBeginCaptureToGraph; + if (strcmp("hipStreamCreate", name) == 0) return HIP_API_ID_hipStreamCreate; + if (strcmp("hipStreamCreateWithFlags", name) == 0) return HIP_API_ID_hipStreamCreateWithFlags; + if (strcmp("hipStreamCreateWithPriority", name) == 0) return HIP_API_ID_hipStreamCreateWithPriority; + if (strcmp("hipStreamDestroy", name) == 0) return HIP_API_ID_hipStreamDestroy; + if (strcmp("hipStreamEndCapture", name) == 0) return HIP_API_ID_hipStreamEndCapture; + if (strcmp("hipStreamGetAttribute", name) == 0) return HIP_API_ID_hipStreamGetAttribute; + if (strcmp("hipStreamGetCaptureInfo", name) == 0) return HIP_API_ID_hipStreamGetCaptureInfo; + if (strcmp("hipStreamGetCaptureInfo_v2", name) == 0) return HIP_API_ID_hipStreamGetCaptureInfo_v2; + if (strcmp("hipStreamGetDevice", name) == 0) return HIP_API_ID_hipStreamGetDevice; + if (strcmp("hipStreamGetFlags", name) == 0) return HIP_API_ID_hipStreamGetFlags; + if (strcmp("hipStreamGetId", name) == 0) return HIP_API_ID_hipStreamGetId; + if (strcmp("hipStreamGetPriority", name) == 0) return HIP_API_ID_hipStreamGetPriority; + if (strcmp("hipStreamIsCapturing", name) == 0) return HIP_API_ID_hipStreamIsCapturing; + if (strcmp("hipStreamQuery", name) == 0) return HIP_API_ID_hipStreamQuery; + if (strcmp("hipStreamSetAttribute", name) == 0) return HIP_API_ID_hipStreamSetAttribute; + if (strcmp("hipStreamSynchronize", name) == 0) return HIP_API_ID_hipStreamSynchronize; + if (strcmp("hipStreamUpdateCaptureDependencies", name) == 0) return HIP_API_ID_hipStreamUpdateCaptureDependencies; + if (strcmp("hipStreamWaitEvent", name) == 0) return HIP_API_ID_hipStreamWaitEvent; + if (strcmp("hipStreamWaitValue32", name) == 0) return HIP_API_ID_hipStreamWaitValue32; + if (strcmp("hipStreamWaitValue64", name) == 0) return HIP_API_ID_hipStreamWaitValue64; + if (strcmp("hipStreamWriteValue32", name) == 0) return HIP_API_ID_hipStreamWriteValue32; + if (strcmp("hipStreamWriteValue64", name) == 0) return HIP_API_ID_hipStreamWriteValue64; + if (strcmp("hipTexRefGetAddress", name) == 0) return HIP_API_ID_hipTexRefGetAddress; + if (strcmp("hipTexRefGetArray", name) == 0) return HIP_API_ID_hipTexRefGetArray; + if (strcmp("hipTexRefGetBorderColor", name) == 0) return HIP_API_ID_hipTexRefGetBorderColor; + if (strcmp("hipTexRefGetFlags", name) == 0) return HIP_API_ID_hipTexRefGetFlags; + if (strcmp("hipTexRefGetFormat", name) == 0) return HIP_API_ID_hipTexRefGetFormat; + if (strcmp("hipTexRefGetMaxAnisotropy", name) == 0) return HIP_API_ID_hipTexRefGetMaxAnisotropy; + if (strcmp("hipTexRefGetMipMappedArray", name) == 0) return HIP_API_ID_hipTexRefGetMipMappedArray; + if (strcmp("hipTexRefGetMipmapLevelBias", name) == 0) return HIP_API_ID_hipTexRefGetMipmapLevelBias; + if (strcmp("hipTexRefGetMipmapLevelClamp", name) == 0) return HIP_API_ID_hipTexRefGetMipmapLevelClamp; + if (strcmp("hipTexRefSetAddress", name) == 0) return HIP_API_ID_hipTexRefSetAddress; + if (strcmp("hipTexRefSetAddress2D", name) == 0) return HIP_API_ID_hipTexRefSetAddress2D; + if (strcmp("hipTexRefSetArray", name) == 0) return HIP_API_ID_hipTexRefSetArray; + if (strcmp("hipTexRefSetBorderColor", name) == 0) return HIP_API_ID_hipTexRefSetBorderColor; + if (strcmp("hipTexRefSetFlags", name) == 0) return HIP_API_ID_hipTexRefSetFlags; + if (strcmp("hipTexRefSetFormat", name) == 0) return HIP_API_ID_hipTexRefSetFormat; + if (strcmp("hipTexRefSetMaxAnisotropy", name) == 0) return HIP_API_ID_hipTexRefSetMaxAnisotropy; + if (strcmp("hipTexRefSetMipmapLevelBias", name) == 0) return HIP_API_ID_hipTexRefSetMipmapLevelBias; + if (strcmp("hipTexRefSetMipmapLevelClamp", name) == 0) return HIP_API_ID_hipTexRefSetMipmapLevelClamp; + if (strcmp("hipTexRefSetMipmappedArray", name) == 0) return HIP_API_ID_hipTexRefSetMipmappedArray; + if (strcmp("hipThreadExchangeStreamCaptureMode", name) == 0) return HIP_API_ID_hipThreadExchangeStreamCaptureMode; + if (strcmp("hipUserObjectCreate", name) == 0) return HIP_API_ID_hipUserObjectCreate; + if (strcmp("hipUserObjectRelease", name) == 0) return HIP_API_ID_hipUserObjectRelease; + if (strcmp("hipUserObjectRetain", name) == 0) return HIP_API_ID_hipUserObjectRetain; + if (strcmp("hipWaitExternalSemaphoresAsync", name) == 0) return HIP_API_ID_hipWaitExternalSemaphoresAsync; + return HIP_API_ID_NONE; +} + +// HIP API callbacks data structures +typedef struct hip_api_data_s { + uint64_t correlation_id; + uint32_t phase; + union { + struct { + dim3* gridDim; + dim3 gridDim__val; + dim3* blockDim; + dim3 blockDim__val; + size_t* sharedMem; + size_t sharedMem__val; + hipStream_t* stream; + hipStream_t stream__val; + } __hipPopCallConfiguration; + struct { + dim3 gridDim; + dim3 blockDim; + size_t sharedMem; + hipStream_t stream; + } __hipPushCallConfiguration; + struct { + hipArray_t* array; + hipArray_t array__val; + const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray; + HIP_ARRAY3D_DESCRIPTOR pAllocateArray__val; + } hipArray3DCreate; + struct { + HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor; + HIP_ARRAY3D_DESCRIPTOR pArrayDescriptor__val; + hipArray_t array; + } hipArray3DGetDescriptor; + struct { + hipArray_t* pHandle; + hipArray_t pHandle__val; + const HIP_ARRAY_DESCRIPTOR* pAllocateArray; + HIP_ARRAY_DESCRIPTOR pAllocateArray__val; + } hipArrayCreate; + struct { + hipArray_t array; + } hipArrayDestroy; + struct { + HIP_ARRAY_DESCRIPTOR* pArrayDescriptor; + HIP_ARRAY_DESCRIPTOR pArrayDescriptor__val; + hipArray_t array; + } hipArrayGetDescriptor; + struct { + hipChannelFormatDesc* desc; + hipChannelFormatDesc desc__val; + hipExtent* extent; + hipExtent extent__val; + unsigned int* flags; + unsigned int flags__val; + hipArray_t array; + } hipArrayGetInfo; + struct { + int* device; + int device__val; + const hipDeviceProp_tR0000* prop; + hipDeviceProp_tR0000 prop__val; + } hipChooseDeviceR0000; + struct { + int* device; + int device__val; + const hipDeviceProp_tR0600* prop; + hipDeviceProp_tR0600 prop__val; + } hipChooseDeviceR0600; + struct { + dim3 gridDim; + dim3 blockDim; + size_t sharedMem; + hipStream_t stream; + } hipConfigureCall; + struct { + hipSurfaceObject_t* pSurfObject; + hipSurfaceObject_t pSurfObject__val; + const hipResourceDesc* pResDesc; + hipResourceDesc pResDesc__val; + } hipCreateSurfaceObject; + struct { + hipCtx_t* ctx; + hipCtx_t ctx__val; + unsigned int flags; + hipDevice_t device; + } hipCtxCreate; + struct { + hipCtx_t ctx; + } hipCtxDestroy; + struct { + hipCtx_t peerCtx; + } hipCtxDisablePeerAccess; + struct { + hipCtx_t peerCtx; + unsigned int flags; + } hipCtxEnablePeerAccess; + struct { + hipCtx_t ctx; + unsigned int* apiVersion; + unsigned int apiVersion__val; + } hipCtxGetApiVersion; + struct { + hipFuncCache_t* cacheConfig; + hipFuncCache_t cacheConfig__val; + } hipCtxGetCacheConfig; + struct { + hipCtx_t* ctx; + hipCtx_t ctx__val; + } hipCtxGetCurrent; + struct { + hipDevice_t* device; + hipDevice_t device__val; + } hipCtxGetDevice; + struct { + unsigned int* flags; + unsigned int flags__val; + } hipCtxGetFlags; + struct { + hipSharedMemConfig* pConfig; + hipSharedMemConfig pConfig__val; + } hipCtxGetSharedMemConfig; + struct { + hipCtx_t* ctx; + hipCtx_t ctx__val; + } hipCtxPopCurrent; + struct { + hipCtx_t ctx; + } hipCtxPushCurrent; + struct { + hipFuncCache_t cacheConfig; + } hipCtxSetCacheConfig; + struct { + hipCtx_t ctx; + } hipCtxSetCurrent; + struct { + hipSharedMemConfig config; + } hipCtxSetSharedMemConfig; + struct { + hipExternalMemory_t extMem; + } hipDestroyExternalMemory; + struct { + hipExternalSemaphore_t extSem; + } hipDestroyExternalSemaphore; + struct { + hipSurfaceObject_t surfaceObject; + } hipDestroySurfaceObject; + struct { + int* canAccessPeer; + int canAccessPeer__val; + int deviceId; + int peerDeviceId; + } hipDeviceCanAccessPeer; + struct { + int* major; + int major__val; + int* minor; + int minor__val; + hipDevice_t device; + } hipDeviceComputeCapability; + struct { + int peerDeviceId; + } hipDeviceDisablePeerAccess; + struct { + int peerDeviceId; + unsigned int flags; + } hipDeviceEnablePeerAccess; + struct { + hipDevice_t* device; + hipDevice_t device__val; + int ordinal; + } hipDeviceGet; + struct { + int* pi; + int pi__val; + hipDeviceAttribute_t attr; + int deviceId; + } hipDeviceGetAttribute; + struct { + int* device; + int device__val; + const char* pciBusId; + char pciBusId__val; + } hipDeviceGetByPCIBusId; + struct { + hipFuncCache_t* cacheConfig; + hipFuncCache_t cacheConfig__val; + } hipDeviceGetCacheConfig; + struct { + hipMemPool_t* mem_pool; + hipMemPool_t mem_pool__val; + int device; + } hipDeviceGetDefaultMemPool; + struct { + int device; + hipGraphMemAttributeType attr; + void* value; + } hipDeviceGetGraphMemAttribute; + struct { + size_t* pValue; + size_t pValue__val; + enum hipLimit_t limit; + } hipDeviceGetLimit; + struct { + hipMemPool_t* mem_pool; + hipMemPool_t mem_pool__val; + int device; + } hipDeviceGetMemPool; + struct { + char* name; + char name__val; + int len; + hipDevice_t device; + } hipDeviceGetName; + struct { + int* value; + int value__val; + hipDeviceP2PAttr attr; + int srcDevice; + int dstDevice; + } hipDeviceGetP2PAttribute; + struct { + char* pciBusId; + char pciBusId__val; + int len; + int device; + } hipDeviceGetPCIBusId; + struct { + hipSharedMemConfig* pConfig; + hipSharedMemConfig pConfig__val; + } hipDeviceGetSharedMemConfig; + struct { + int* leastPriority; + int leastPriority__val; + int* greatestPriority; + int greatestPriority__val; + } hipDeviceGetStreamPriorityRange; + struct { + hipUUID* uuid; + hipUUID uuid__val; + hipDevice_t device; + } hipDeviceGetUuid; + struct { + int device; + } hipDeviceGraphMemTrim; + struct { + hipDevice_t dev; + unsigned int* flags; + unsigned int flags__val; + int* active; + int active__val; + } hipDevicePrimaryCtxGetState; + struct { + hipDevice_t dev; + } hipDevicePrimaryCtxRelease; + struct { + hipDevice_t dev; + } hipDevicePrimaryCtxReset; + struct { + hipCtx_t* pctx; + hipCtx_t pctx__val; + hipDevice_t dev; + } hipDevicePrimaryCtxRetain; + struct { + hipDevice_t dev; + unsigned int flags; + } hipDevicePrimaryCtxSetFlags; + struct { + hipFuncCache_t cacheConfig; + } hipDeviceSetCacheConfig; + struct { + int device; + hipGraphMemAttributeType attr; + void* value; + } hipDeviceSetGraphMemAttribute; + struct { + enum hipLimit_t limit; + size_t value; + } hipDeviceSetLimit; + struct { + int device; + hipMemPool_t mem_pool; + } hipDeviceSetMemPool; + struct { + hipSharedMemConfig config; + } hipDeviceSetSharedMemConfig; + struct { + size_t* bytes; + size_t bytes__val; + hipDevice_t device; + } hipDeviceTotalMem; + struct { + int* driverVersion; + int driverVersion__val; + } hipDriverGetVersion; + struct { + hipGraphNode_t* phGraphNode; + hipGraphNode_t phGraphNode__val; + hipGraph_t hGraph; + const hipGraphNode_t* dependencies; + hipGraphNode_t dependencies__val; + size_t numDependencies; + hipDeviceptr_t dptr; + } hipDrvGraphAddMemFreeNode; + struct { + hipGraphNode_t* phGraphNode; + hipGraphNode_t phGraphNode__val; + hipGraph_t hGraph; + const hipGraphNode_t* dependencies; + hipGraphNode_t dependencies__val; + size_t numDependencies; + const HIP_MEMCPY3D* copyParams; + HIP_MEMCPY3D copyParams__val; + hipCtx_t ctx; + } hipDrvGraphAddMemcpyNode; + struct { + hipGraphNode_t* phGraphNode; + hipGraphNode_t phGraphNode__val; + hipGraph_t hGraph; + const hipGraphNode_t* dependencies; + hipGraphNode_t dependencies__val; + size_t numDependencies; + const hipMemsetParams* memsetParams; + hipMemsetParams memsetParams__val; + hipCtx_t ctx; + } hipDrvGraphAddMemsetNode; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + const HIP_MEMCPY3D* copyParams; + HIP_MEMCPY3D copyParams__val; + hipCtx_t ctx; + } hipDrvGraphExecMemcpyNodeSetParams; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + const hipMemsetParams* memsetParams; + hipMemsetParams memsetParams__val; + hipCtx_t ctx; + } hipDrvGraphExecMemsetNodeSetParams; + struct { + hipGraphNode_t hNode; + HIP_MEMCPY3D* nodeParams; + HIP_MEMCPY3D nodeParams__val; + } hipDrvGraphMemcpyNodeGetParams; + struct { + hipGraphNode_t hNode; + const HIP_MEMCPY3D* nodeParams; + HIP_MEMCPY3D nodeParams__val; + } hipDrvGraphMemcpyNodeSetParams; + struct { + const HIP_LAUNCH_CONFIG* config; + HIP_LAUNCH_CONFIG config__val; + hipFunction_t f; + void** params; + void* params__val; + void** extra; + void* extra__val; + } hipDrvLaunchKernelEx; + struct { + const hip_Memcpy2D* pCopy; + hip_Memcpy2D pCopy__val; + } hipDrvMemcpy2DUnaligned; + struct { + const HIP_MEMCPY3D* pCopy; + HIP_MEMCPY3D pCopy__val; + } hipDrvMemcpy3D; + struct { + const HIP_MEMCPY3D* pCopy; + HIP_MEMCPY3D pCopy__val; + hipStream_t stream; + } hipDrvMemcpy3DAsync; + struct { + unsigned int numAttributes; + hipPointer_attribute* attributes; + hipPointer_attribute attributes__val; + void** data; + void* data__val; + hipDeviceptr_t ptr; + } hipDrvPointerGetAttributes; + struct { + hipEvent_t* event; + hipEvent_t event__val; + } hipEventCreate; + struct { + hipEvent_t* event; + hipEvent_t event__val; + unsigned int flags; + } hipEventCreateWithFlags; + struct { + hipEvent_t event; + } hipEventDestroy; + struct { + float* ms; + float ms__val; + hipEvent_t start; + hipEvent_t stop; + } hipEventElapsedTime; + struct { + hipEvent_t event; + } hipEventQuery; + struct { + hipEvent_t event; + hipStream_t stream; + } hipEventRecord; + struct { + hipEvent_t event; + hipStream_t stream; + unsigned int flags; + } hipEventRecordWithFlags; + struct { + hipEvent_t event; + } hipEventSynchronize; + struct { + int device1; + int device2; + unsigned int* linktype; + unsigned int linktype__val; + unsigned int* hopcount; + unsigned int hopcount__val; + } hipExtGetLinkTypeAndHopCount; + struct { + const void* function_address; + dim3 numBlocks; + dim3 dimBlocks; + void** args; + void* args__val; + size_t sharedMemBytes; + hipStream_t stream; + hipEvent_t startEvent; + hipEvent_t stopEvent; + int flags; + } hipExtLaunchKernel; + struct { + hipLaunchParams* launchParamsList; + hipLaunchParams launchParamsList__val; + int numDevices; + unsigned int flags; + } hipExtLaunchMultiKernelMultiDevice; + struct { + void** ptr; + void* ptr__val; + size_t sizeBytes; + unsigned int flags; + } hipExtMallocWithFlags; + struct { + hipFunction_t f; + unsigned int globalWorkSizeX; + unsigned int globalWorkSizeY; + unsigned int globalWorkSizeZ; + unsigned int localWorkSizeX; + unsigned int localWorkSizeY; + unsigned int localWorkSizeZ; + size_t sharedMemBytes; + hipStream_t hStream; + void** kernelParams; + void* kernelParams__val; + void** extra; + void* extra__val; + hipEvent_t startEvent; + hipEvent_t stopEvent; + unsigned int flags; + } hipExtModuleLaunchKernel; + struct { + hipStream_t* stream; + hipStream_t stream__val; + unsigned int cuMaskSize; + const unsigned int* cuMask; + unsigned int cuMask__val; + } hipExtStreamCreateWithCUMask; + struct { + hipStream_t stream; + unsigned int cuMaskSize; + unsigned int* cuMask; + unsigned int cuMask__val; + } hipExtStreamGetCUMask; + struct { + void** devPtr; + void* devPtr__val; + hipExternalMemory_t extMem; + const hipExternalMemoryBufferDesc* bufferDesc; + hipExternalMemoryBufferDesc bufferDesc__val; + } hipExternalMemoryGetMappedBuffer; + struct { + hipMipmappedArray_t* mipmap; + hipMipmappedArray_t mipmap__val; + hipExternalMemory_t extMem; + const hipExternalMemoryMipmappedArrayDesc* mipmapDesc; + hipExternalMemoryMipmappedArrayDesc mipmapDesc__val; + } hipExternalMemoryGetMappedMipmappedArray; + struct { + void* ptr; + } hipFree; + struct { + hipArray_t array; + } hipFreeArray; + struct { + void* dev_ptr; + hipStream_t stream; + } hipFreeAsync; + struct { + void* ptr; + } hipFreeHost; + struct { + hipMipmappedArray_t mipmappedArray; + } hipFreeMipmappedArray; + struct { + int* value; + int value__val; + hipFunction_attribute attrib; + hipFunction_t hfunc; + } hipFuncGetAttribute; + struct { + hipFuncAttributes* attr; + hipFuncAttributes attr__val; + const void* func; + } hipFuncGetAttributes; + struct { + const void* func; + hipFuncAttribute attr; + int value; + } hipFuncSetAttribute; + struct { + const void* func; + hipFuncCache_t config; + } hipFuncSetCacheConfig; + struct { + const void* func; + hipSharedMemConfig config; + } hipFuncSetSharedMemConfig; + struct { + unsigned int* pHipDeviceCount; + unsigned int pHipDeviceCount__val; + int* pHipDevices; + int pHipDevices__val; + unsigned int hipDeviceCount; + hipGLDeviceList deviceList; + } hipGLGetDevices; + struct { + hipChannelFormatDesc* desc; + hipChannelFormatDesc desc__val; + hipArray_const_t array; + } hipGetChannelDesc; + struct { + int* deviceId; + int deviceId__val; + } hipGetDevice; + struct { + int* count; + int count__val; + } hipGetDeviceCount; + struct { + unsigned int* flags; + unsigned int flags__val; + } hipGetDeviceFlags; + struct { + hipDeviceProp_tR0000* prop; + hipDeviceProp_tR0000 prop__val; + int device; + } hipGetDevicePropertiesR0000; + struct { + hipDeviceProp_tR0600* prop; + hipDeviceProp_tR0600 prop__val; + int deviceId; + } hipGetDevicePropertiesR0600; + struct { + const char* symbol; + char symbol__val; + void** funcPtr; + void* funcPtr__val; + unsigned long long flags; + hipDriverEntryPointQueryResult* driverStatus; + hipDriverEntryPointQueryResult driverStatus__val; + } hipGetDriverEntryPoint; + struct { + hipFunction_t* functionPtr; + hipFunction_t functionPtr__val; + const void* symbolPtr; + } hipGetFuncBySymbol; + struct { + hipArray_t* levelArray; + hipArray_t levelArray__val; + hipMipmappedArray_const_t mipmappedArray; + unsigned int level; + } hipGetMipmappedArrayLevel; + struct { + const char* symbol; + char symbol__val; + void** pfn; + void* pfn__val; + int hipVersion; + uint64_t flags; + hipDriverProcAddressQueryResult* symbolStatus; + hipDriverProcAddressQueryResult symbolStatus__val; + } hipGetProcAddress; + struct { + void** devPtr; + void* devPtr__val; + const void* symbol; + } hipGetSymbolAddress; + struct { + size_t* size; + size_t size__val; + const void* symbol; + } hipGetSymbolSize; + struct { + hipGraphNode_t* phGraphNode; + hipGraphNode_t phGraphNode__val; + hipGraph_t hGraph; + const hipGraphNode_t* dependencies; + hipGraphNode_t dependencies__val; + size_t numDependencies; + const hipBatchMemOpNodeParams* nodeParams; + hipBatchMemOpNodeParams nodeParams__val; + } hipGraphAddBatchMemOpNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + hipGraph_t childGraph; + } hipGraphAddChildGraphNode; + struct { + hipGraph_t graph; + const hipGraphNode_t* from; + hipGraphNode_t from__val; + const hipGraphNode_t* to; + hipGraphNode_t to__val; + size_t numDependencies; + } hipGraphAddDependencies; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + } hipGraphAddEmptyNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + hipEvent_t event; + } hipGraphAddEventRecordNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + hipEvent_t event; + } hipGraphAddEventWaitNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + const hipExternalSemaphoreSignalNodeParams* nodeParams; + hipExternalSemaphoreSignalNodeParams nodeParams__val; + } hipGraphAddExternalSemaphoresSignalNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + const hipExternalSemaphoreWaitNodeParams* nodeParams; + hipExternalSemaphoreWaitNodeParams nodeParams__val; + } hipGraphAddExternalSemaphoresWaitNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + const hipHostNodeParams* pNodeParams; + hipHostNodeParams pNodeParams__val; + } hipGraphAddHostNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + const hipKernelNodeParams* pNodeParams; + hipKernelNodeParams pNodeParams__val; + } hipGraphAddKernelNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + hipMemAllocNodeParams* pNodeParams; + hipMemAllocNodeParams pNodeParams__val; + } hipGraphAddMemAllocNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + void* dev_ptr; + } hipGraphAddMemFreeNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + const hipMemcpy3DParms* pCopyParams; + hipMemcpy3DParms pCopyParams__val; + } hipGraphAddMemcpyNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + void* dst; + const void* src; + size_t count; + hipMemcpyKind kind; + } hipGraphAddMemcpyNode1D; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + void* dst; + const void* symbol; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphAddMemcpyNodeFromSymbol; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + const void* symbol; + const void* src; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphAddMemcpyNodeToSymbol; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + const hipMemsetParams* pMemsetParams; + hipMemsetParams pMemsetParams__val; + } hipGraphAddMemsetNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + hipGraphNodeParams* nodeParams; + hipGraphNodeParams nodeParams__val; + } hipGraphAddNode; + struct { + hipGraphNode_t hNode; + hipBatchMemOpNodeParams* nodeParams_out; + hipBatchMemOpNodeParams nodeParams_out__val; + } hipGraphBatchMemOpNodeGetParams; + struct { + hipGraphNode_t hNode; + hipBatchMemOpNodeParams* nodeParams; + hipBatchMemOpNodeParams nodeParams__val; + } hipGraphBatchMemOpNodeSetParams; + struct { + hipGraphNode_t node; + hipGraph_t* pGraph; + hipGraph_t pGraph__val; + } hipGraphChildGraphNodeGetGraph; + struct { + hipGraph_t* pGraphClone; + hipGraph_t pGraphClone__val; + hipGraph_t originalGraph; + } hipGraphClone; + struct { + hipGraph_t* pGraph; + hipGraph_t pGraph__val; + unsigned int flags; + } hipGraphCreate; + struct { + hipGraph_t graph; + const char* path; + char path__val; + unsigned int flags; + } hipGraphDebugDotPrint; + struct { + hipGraph_t graph; + } hipGraphDestroy; + struct { + hipGraphNode_t node; + } hipGraphDestroyNode; + struct { + hipGraphNode_t node; + hipEvent_t* event_out; + hipEvent_t event_out__val; + } hipGraphEventRecordNodeGetEvent; + struct { + hipGraphNode_t node; + hipEvent_t event; + } hipGraphEventRecordNodeSetEvent; + struct { + hipGraphNode_t node; + hipEvent_t* event_out; + hipEvent_t event_out__val; + } hipGraphEventWaitNodeGetEvent; + struct { + hipGraphNode_t node; + hipEvent_t event; + } hipGraphEventWaitNodeSetEvent; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + const hipBatchMemOpNodeParams* nodeParams; + hipBatchMemOpNodeParams nodeParams__val; + } hipGraphExecBatchMemOpNodeSetParams; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + hipGraph_t childGraph; + } hipGraphExecChildGraphNodeSetParams; + struct { + hipGraphExec_t graphExec; + } hipGraphExecDestroy; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + hipEvent_t event; + } hipGraphExecEventRecordNodeSetEvent; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + hipEvent_t event; + } hipGraphExecEventWaitNodeSetEvent; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + const hipExternalSemaphoreSignalNodeParams* nodeParams; + hipExternalSemaphoreSignalNodeParams nodeParams__val; + } hipGraphExecExternalSemaphoresSignalNodeSetParams; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + const hipExternalSemaphoreWaitNodeParams* nodeParams; + hipExternalSemaphoreWaitNodeParams nodeParams__val; + } hipGraphExecExternalSemaphoresWaitNodeSetParams; + struct { + hipGraphExec_t graphExec; + unsigned long long* flags; + unsigned long long flags__val; + } hipGraphExecGetFlags; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + const hipHostNodeParams* pNodeParams; + hipHostNodeParams pNodeParams__val; + } hipGraphExecHostNodeSetParams; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + const hipKernelNodeParams* pNodeParams; + hipKernelNodeParams pNodeParams__val; + } hipGraphExecKernelNodeSetParams; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + hipMemcpy3DParms* pNodeParams; + hipMemcpy3DParms pNodeParams__val; + } hipGraphExecMemcpyNodeSetParams; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + void* dst; + const void* src; + size_t count; + hipMemcpyKind kind; + } hipGraphExecMemcpyNodeSetParams1D; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + void* dst; + const void* symbol; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphExecMemcpyNodeSetParamsFromSymbol; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + const void* symbol; + const void* src; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphExecMemcpyNodeSetParamsToSymbol; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + const hipMemsetParams* pNodeParams; + hipMemsetParams pNodeParams__val; + } hipGraphExecMemsetNodeSetParams; + struct { + hipGraphExec_t graphExec; + hipGraphNode_t node; + hipGraphNodeParams* nodeParams; + hipGraphNodeParams nodeParams__val; + } hipGraphExecNodeSetParams; + struct { + hipGraphExec_t hGraphExec; + hipGraph_t hGraph; + hipGraphNode_t* hErrorNode_out; + hipGraphNode_t hErrorNode_out__val; + hipGraphExecUpdateResult* updateResult_out; + hipGraphExecUpdateResult updateResult_out__val; + } hipGraphExecUpdate; + struct { + hipGraphNode_t hNode; + hipExternalSemaphoreSignalNodeParams* params_out; + hipExternalSemaphoreSignalNodeParams params_out__val; + } hipGraphExternalSemaphoresSignalNodeGetParams; + struct { + hipGraphNode_t hNode; + const hipExternalSemaphoreSignalNodeParams* nodeParams; + hipExternalSemaphoreSignalNodeParams nodeParams__val; + } hipGraphExternalSemaphoresSignalNodeSetParams; + struct { + hipGraphNode_t hNode; + hipExternalSemaphoreWaitNodeParams* params_out; + hipExternalSemaphoreWaitNodeParams params_out__val; + } hipGraphExternalSemaphoresWaitNodeGetParams; + struct { + hipGraphNode_t hNode; + const hipExternalSemaphoreWaitNodeParams* nodeParams; + hipExternalSemaphoreWaitNodeParams nodeParams__val; + } hipGraphExternalSemaphoresWaitNodeSetParams; + struct { + hipGraph_t graph; + hipGraphNode_t* from; + hipGraphNode_t from__val; + hipGraphNode_t* to; + hipGraphNode_t to__val; + size_t* numEdges; + size_t numEdges__val; + } hipGraphGetEdges; + struct { + hipGraph_t graph; + hipGraphNode_t* nodes; + hipGraphNode_t nodes__val; + size_t* numNodes; + size_t numNodes__val; + } hipGraphGetNodes; + struct { + hipGraph_t graph; + hipGraphNode_t* pRootNodes; + hipGraphNode_t pRootNodes__val; + size_t* pNumRootNodes; + size_t pNumRootNodes__val; + } hipGraphGetRootNodes; + struct { + hipGraphNode_t node; + hipHostNodeParams* pNodeParams; + hipHostNodeParams pNodeParams__val; + } hipGraphHostNodeGetParams; + struct { + hipGraphNode_t node; + const hipHostNodeParams* pNodeParams; + hipHostNodeParams pNodeParams__val; + } hipGraphHostNodeSetParams; + struct { + hipGraphExec_t* pGraphExec; + hipGraphExec_t pGraphExec__val; + hipGraph_t graph; + hipGraphNode_t* pErrorNode; + hipGraphNode_t pErrorNode__val; + char* pLogBuffer; + char pLogBuffer__val; + size_t bufferSize; + } hipGraphInstantiate; + struct { + hipGraphExec_t* pGraphExec; + hipGraphExec_t pGraphExec__val; + hipGraph_t graph; + unsigned long long flags; + } hipGraphInstantiateWithFlags; + struct { + hipGraphExec_t* pGraphExec; + hipGraphExec_t pGraphExec__val; + hipGraph_t graph; + hipGraphInstantiateParams* instantiateParams; + hipGraphInstantiateParams instantiateParams__val; + } hipGraphInstantiateWithParams; + struct { + hipGraphNode_t hSrc; + hipGraphNode_t hDst; + } hipGraphKernelNodeCopyAttributes; + struct { + hipGraphNode_t hNode; + hipLaunchAttributeID attr; + hipLaunchAttributeValue* value; + hipLaunchAttributeValue value__val; + } hipGraphKernelNodeGetAttribute; + struct { + hipGraphNode_t node; + hipKernelNodeParams* pNodeParams; + hipKernelNodeParams pNodeParams__val; + } hipGraphKernelNodeGetParams; + struct { + hipGraphNode_t hNode; + hipLaunchAttributeID attr; + const hipLaunchAttributeValue* value; + hipLaunchAttributeValue value__val; + } hipGraphKernelNodeSetAttribute; + struct { + hipGraphNode_t node; + const hipKernelNodeParams* pNodeParams; + hipKernelNodeParams pNodeParams__val; + } hipGraphKernelNodeSetParams; + struct { + hipGraphExec_t graphExec; + hipStream_t stream; + } hipGraphLaunch; + struct { + hipGraphNode_t node; + hipMemAllocNodeParams* pNodeParams; + hipMemAllocNodeParams pNodeParams__val; + } hipGraphMemAllocNodeGetParams; + struct { + hipGraphNode_t node; + void* dev_ptr; + } hipGraphMemFreeNodeGetParams; + struct { + hipGraphNode_t node; + hipMemcpy3DParms* pNodeParams; + hipMemcpy3DParms pNodeParams__val; + } hipGraphMemcpyNodeGetParams; + struct { + hipGraphNode_t node; + const hipMemcpy3DParms* pNodeParams; + hipMemcpy3DParms pNodeParams__val; + } hipGraphMemcpyNodeSetParams; + struct { + hipGraphNode_t node; + void* dst; + const void* src; + size_t count; + hipMemcpyKind kind; + } hipGraphMemcpyNodeSetParams1D; + struct { + hipGraphNode_t node; + void* dst; + const void* symbol; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphMemcpyNodeSetParamsFromSymbol; + struct { + hipGraphNode_t node; + const void* symbol; + const void* src; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphMemcpyNodeSetParamsToSymbol; + struct { + hipGraphNode_t node; + hipMemsetParams* pNodeParams; + hipMemsetParams pNodeParams__val; + } hipGraphMemsetNodeGetParams; + struct { + hipGraphNode_t node; + const hipMemsetParams* pNodeParams; + hipMemsetParams pNodeParams__val; + } hipGraphMemsetNodeSetParams; + struct { + hipGraphNode_t* pNode; + hipGraphNode_t pNode__val; + hipGraphNode_t originalNode; + hipGraph_t clonedGraph; + } hipGraphNodeFindInClone; + struct { + hipGraphNode_t node; + hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t* pNumDependencies; + size_t pNumDependencies__val; + } hipGraphNodeGetDependencies; + struct { + hipGraphNode_t node; + hipGraphNode_t* pDependentNodes; + hipGraphNode_t pDependentNodes__val; + size_t* pNumDependentNodes; + size_t pNumDependentNodes__val; + } hipGraphNodeGetDependentNodes; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + unsigned int* isEnabled; + unsigned int isEnabled__val; + } hipGraphNodeGetEnabled; + struct { + hipGraphNode_t node; + hipGraphNodeType* pType; + hipGraphNodeType pType__val; + } hipGraphNodeGetType; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + unsigned int isEnabled; + } hipGraphNodeSetEnabled; + struct { + hipGraphNode_t node; + hipGraphNodeParams* nodeParams; + hipGraphNodeParams nodeParams__val; + } hipGraphNodeSetParams; + struct { + hipGraph_t graph; + hipUserObject_t object; + unsigned int count; + } hipGraphReleaseUserObject; + struct { + hipGraph_t graph; + const hipGraphNode_t* from; + hipGraphNode_t from__val; + const hipGraphNode_t* to; + hipGraphNode_t to__val; + size_t numDependencies; + } hipGraphRemoveDependencies; + struct { + hipGraph_t graph; + hipUserObject_t object; + unsigned int count; + unsigned int flags; + } hipGraphRetainUserObject; + struct { + hipGraphExec_t graphExec; + hipStream_t stream; + } hipGraphUpload; + struct { + hipGraphicsResource** resource; + hipGraphicsResource* resource__val; + GLuint buffer; + unsigned int flags; + } hipGraphicsGLRegisterBuffer; + struct { + hipGraphicsResource** resource; + hipGraphicsResource* resource__val; + GLuint image; + GLenum target; + unsigned int flags; + } hipGraphicsGLRegisterImage; + struct { + int count; + hipGraphicsResource_t* resources; + hipGraphicsResource_t resources__val; + hipStream_t stream; + } hipGraphicsMapResources; + struct { + void** devPtr; + void* devPtr__val; + size_t* size; + size_t size__val; + hipGraphicsResource_t resource; + } hipGraphicsResourceGetMappedPointer; + struct { + hipArray_t* array; + hipArray_t array__val; + hipGraphicsResource_t resource; + unsigned int arrayIndex; + unsigned int mipLevel; + } hipGraphicsSubResourceGetMappedArray; + struct { + int count; + hipGraphicsResource_t* resources; + hipGraphicsResource_t resources__val; + hipStream_t stream; + } hipGraphicsUnmapResources; + struct { + hipGraphicsResource_t resource; + } hipGraphicsUnregisterResource; + struct { + hipFunction_t f; + unsigned int globalWorkSizeX; + unsigned int globalWorkSizeY; + unsigned int globalWorkSizeZ; + unsigned int blockDimX; + unsigned int blockDimY; + unsigned int blockDimZ; + size_t sharedMemBytes; + hipStream_t hStream; + void** kernelParams; + void* kernelParams__val; + void** extra; + void* extra__val; + hipEvent_t startEvent; + hipEvent_t stopEvent; + } hipHccModuleLaunchKernel; + struct { + void** ptr; + void* ptr__val; + size_t size; + unsigned int flags; + } hipHostAlloc; + struct { + void* ptr; + } hipHostFree; + struct { + void** devPtr; + void* devPtr__val; + void* hstPtr; + unsigned int flags; + } hipHostGetDevicePointer; + struct { + unsigned int* flagsPtr; + unsigned int flagsPtr__val; + void* hostPtr; + } hipHostGetFlags; + struct { + void** ptr; + void* ptr__val; + size_t size; + unsigned int flags; + } hipHostMalloc; + struct { + void* hostPtr; + size_t sizeBytes; + unsigned int flags; + } hipHostRegister; + struct { + void* hostPtr; + } hipHostUnregister; + struct { + hipExternalMemory_t* extMem_out; + hipExternalMemory_t extMem_out__val; + const hipExternalMemoryHandleDesc* memHandleDesc; + hipExternalMemoryHandleDesc memHandleDesc__val; + } hipImportExternalMemory; + struct { + hipExternalSemaphore_t* extSem_out; + hipExternalSemaphore_t extSem_out__val; + const hipExternalSemaphoreHandleDesc* semHandleDesc; + hipExternalSemaphoreHandleDesc semHandleDesc__val; + } hipImportExternalSemaphore; + struct { + unsigned int flags; + } hipInit; + struct { + void* devPtr; + } hipIpcCloseMemHandle; + struct { + hipIpcEventHandle_t* handle; + hipIpcEventHandle_t handle__val; + hipEvent_t event; + } hipIpcGetEventHandle; + struct { + hipIpcMemHandle_t* handle; + hipIpcMemHandle_t handle__val; + void* devPtr; + } hipIpcGetMemHandle; + struct { + hipEvent_t* event; + hipEvent_t event__val; + hipIpcEventHandle_t handle; + } hipIpcOpenEventHandle; + struct { + void** devPtr; + void* devPtr__val; + hipIpcMemHandle_t handle; + unsigned int flags; + } hipIpcOpenMemHandle; + struct { + const void* hostFunction; + } hipLaunchByPtr; + struct { + const void* f; + dim3 gridDim; + dim3 blockDimX; + void** kernelParams; + void* kernelParams__val; + unsigned int sharedMemBytes; + hipStream_t stream; + } hipLaunchCooperativeKernel; + struct { + hipLaunchParams* launchParamsList; + hipLaunchParams launchParamsList__val; + int numDevices; + unsigned int flags; + } hipLaunchCooperativeKernelMultiDevice; + struct { + hipStream_t stream; + hipHostFn_t fn; + void* userData; + } hipLaunchHostFunc; + struct { + const void* function_address; + dim3 numBlocks; + dim3 dimBlocks; + void** args; + void* args__val; + size_t sharedMemBytes; + hipStream_t stream; + } hipLaunchKernel; + struct { + const hipLaunchConfig_t* config; + hipLaunchConfig_t config__val; + const void* fPtr; + void** args; + void* args__val; + } hipLaunchKernelExC; + struct { + hipKernel_t* pKernel; + hipKernel_t pKernel__val; + hipLibrary_t library; + const char* name; + char name__val; + } hipLibraryGetKernel; + struct { + unsigned int* count; + unsigned int count__val; + hipLibrary_t library; + } hipLibraryGetKernelCount; + struct { + hipLibrary_t* library; + hipLibrary_t library__val; + const void* code; + hipJitOption** jitOptions; + hipJitOption* jitOptions__val; + void** jitOptionsValues; + void* jitOptionsValues__val; + unsigned int numJitOptions; + hipLibraryOption** libraryOptions; + hipLibraryOption* libraryOptions__val; + void** libraryOptionValues; + void* libraryOptionValues__val; + unsigned int numLibraryOptions; + } hipLibraryLoadData; + struct { + hipLibrary_t* library; + hipLibrary_t library__val; + const char* fileName; + char fileName__val; + hipJitOption** jitOptions; + hipJitOption* jitOptions__val; + void** jitOptionsValues; + void* jitOptionsValues__val; + unsigned int numJitOptions; + hipLibraryOption** libraryOptions; + hipLibraryOption* libraryOptions__val; + void** libraryOptionValues; + void* libraryOptionValues__val; + unsigned int numLibraryOptions; + } hipLibraryLoadFromFile; + struct { + hipLibrary_t library; + } hipLibraryUnload; + struct { + hipLinkState_t state; + hipJitInputType type; + void* data; + size_t size; + const char* name; + char name__val; + unsigned int numOptions; + hipJitOption* options; + hipJitOption options__val; + void** optionValues; + void* optionValues__val; + } hipLinkAddData; + struct { + hipLinkState_t state; + hipJitInputType type; + const char* path; + char path__val; + unsigned int numOptions; + hipJitOption* options; + hipJitOption options__val; + void** optionValues; + void* optionValues__val; + } hipLinkAddFile; + struct { + hipLinkState_t state; + void** hipBinOut; + void* hipBinOut__val; + size_t* sizeOut; + size_t sizeOut__val; + } hipLinkComplete; + struct { + unsigned int numOptions; + hipJitOption* options; + hipJitOption options__val; + void** optionValues; + void* optionValues__val; + hipLinkState_t* stateOut; + hipLinkState_t stateOut__val; + } hipLinkCreate; + struct { + hipLinkState_t state; + } hipLinkDestroy; + struct { + void** ptr; + void* ptr__val; + size_t size; + } hipMalloc; + struct { + hipPitchedPtr* pitchedDevPtr; + hipPitchedPtr pitchedDevPtr__val; + hipExtent extent; + } hipMalloc3D; + struct { + hipArray_t* array; + hipArray_t array__val; + const hipChannelFormatDesc* desc; + hipChannelFormatDesc desc__val; + hipExtent extent; + unsigned int flags; + } hipMalloc3DArray; + struct { + hipArray_t* array; + hipArray_t array__val; + const hipChannelFormatDesc* desc; + hipChannelFormatDesc desc__val; + size_t width; + size_t height; + unsigned int flags; + } hipMallocArray; + struct { + void** dev_ptr; + void* dev_ptr__val; + size_t size; + hipStream_t stream; + } hipMallocAsync; + struct { + void** dev_ptr; + void* dev_ptr__val; + size_t size; + hipMemPool_t mem_pool; + hipStream_t stream; + } hipMallocFromPoolAsync; + struct { + void** ptr; + void* ptr__val; + size_t size; + } hipMallocHost; + struct { + void** dev_ptr; + void* dev_ptr__val; + size_t size; + unsigned int flags; + } hipMallocManaged; + struct { + hipMipmappedArray_t* mipmappedArray; + hipMipmappedArray_t mipmappedArray__val; + const hipChannelFormatDesc* desc; + hipChannelFormatDesc desc__val; + hipExtent extent; + unsigned int numLevels; + unsigned int flags; + } hipMallocMipmappedArray; + struct { + void** ptr; + void* ptr__val; + size_t* pitch; + size_t pitch__val; + size_t width; + size_t height; + } hipMallocPitch; + struct { + void* devPtr; + size_t size; + } hipMemAddressFree; + struct { + void** ptr; + void* ptr__val; + size_t size; + size_t alignment; + void* addr; + unsigned long long flags; + } hipMemAddressReserve; + struct { + const void* dev_ptr; + size_t count; + hipMemoryAdvise advice; + int device; + } hipMemAdvise; + struct { + const void* dev_ptr; + size_t count; + hipMemoryAdvise advice; + hipMemLocation location; + } hipMemAdvise_v2; + struct { + void** ptr; + void* ptr__val; + size_t size; + } hipMemAllocHost; + struct { + hipDeviceptr_t* dptr; + hipDeviceptr_t dptr__val; + size_t* pitch; + size_t pitch__val; + size_t widthInBytes; + size_t height; + unsigned int elementSizeBytes; + } hipMemAllocPitch; + struct { + hipMemGenericAllocationHandle_t* handle; + hipMemGenericAllocationHandle_t handle__val; + size_t size; + const hipMemAllocationProp* prop; + hipMemAllocationProp prop__val; + unsigned long long flags; + } hipMemCreate; + struct { + void* shareableHandle; + hipMemGenericAllocationHandle_t handle; + hipMemAllocationHandleType handleType; + unsigned long long flags; + } hipMemExportToShareableHandle; + struct { + unsigned long long* flags; + unsigned long long flags__val; + const hipMemLocation* location; + hipMemLocation location__val; + void* ptr; + } hipMemGetAccess; + struct { + hipDeviceptr_t* pbase; + hipDeviceptr_t pbase__val; + size_t* psize; + size_t psize__val; + hipDeviceptr_t dptr; + } hipMemGetAddressRange; + struct { + size_t* granularity; + size_t granularity__val; + const hipMemAllocationProp* prop; + hipMemAllocationProp prop__val; + hipMemAllocationGranularity_flags option; + } hipMemGetAllocationGranularity; + struct { + hipMemAllocationProp* prop; + hipMemAllocationProp prop__val; + hipMemGenericAllocationHandle_t handle; + } hipMemGetAllocationPropertiesFromHandle; + struct { + void* handle; + hipDeviceptr_t dptr; + size_t size; + hipMemRangeHandleType handleType; + unsigned long long flags; + } hipMemGetHandleForAddressRange; + struct { + size_t* free; + size_t free__val; + size_t* total; + size_t total__val; + } hipMemGetInfo; + struct { + hipMemGenericAllocationHandle_t* handle; + hipMemGenericAllocationHandle_t handle__val; + void* osHandle; + hipMemAllocationHandleType shHandleType; + } hipMemImportFromShareableHandle; + struct { + void* ptr; + size_t size; + size_t offset; + hipMemGenericAllocationHandle_t handle; + unsigned long long flags; + } hipMemMap; + struct { + hipArrayMapInfo* mapInfoList; + hipArrayMapInfo mapInfoList__val; + unsigned int count; + hipStream_t stream; + } hipMemMapArrayAsync; + struct { + hipMemPool_t* mem_pool; + hipMemPool_t mem_pool__val; + const hipMemPoolProps* pool_props; + hipMemPoolProps pool_props__val; + } hipMemPoolCreate; + struct { + hipMemPool_t mem_pool; + } hipMemPoolDestroy; + struct { + hipMemPoolPtrExportData* export_data; + hipMemPoolPtrExportData export_data__val; + void* dev_ptr; + } hipMemPoolExportPointer; + struct { + void* shared_handle; + hipMemPool_t mem_pool; + hipMemAllocationHandleType handle_type; + unsigned int flags; + } hipMemPoolExportToShareableHandle; + struct { + hipMemAccessFlags* flags; + hipMemAccessFlags flags__val; + hipMemPool_t mem_pool; + hipMemLocation* location; + hipMemLocation location__val; + } hipMemPoolGetAccess; + struct { + hipMemPool_t mem_pool; + hipMemPoolAttr attr; + void* value; + } hipMemPoolGetAttribute; + struct { + hipMemPool_t* mem_pool; + hipMemPool_t mem_pool__val; + void* shared_handle; + hipMemAllocationHandleType handle_type; + unsigned int flags; + } hipMemPoolImportFromShareableHandle; + struct { + void** dev_ptr; + void* dev_ptr__val; + hipMemPool_t mem_pool; + hipMemPoolPtrExportData* export_data; + hipMemPoolPtrExportData export_data__val; + } hipMemPoolImportPointer; + struct { + hipMemPool_t mem_pool; + const hipMemAccessDesc* desc_list; + hipMemAccessDesc desc_list__val; + size_t count; + } hipMemPoolSetAccess; + struct { + hipMemPool_t mem_pool; + hipMemPoolAttr attr; + void* value; + } hipMemPoolSetAttribute; + struct { + hipMemPool_t mem_pool; + size_t min_bytes_to_hold; + } hipMemPoolTrimTo; + struct { + const void* dev_ptr; + size_t count; + int device; + hipStream_t stream; + } hipMemPrefetchAsync; + struct { + const void* dev_ptr; + size_t count; + hipMemLocation location; + unsigned int flags; + hipStream_t stream; + } hipMemPrefetchAsync_v2; + struct { + void* ptr; + size_t* size; + size_t size__val; + } hipMemPtrGetInfo; + struct { + void* data; + size_t data_size; + hipMemRangeAttribute attribute; + const void* dev_ptr; + size_t count; + } hipMemRangeGetAttribute; + struct { + void** data; + void* data__val; + size_t* data_sizes; + size_t data_sizes__val; + hipMemRangeAttribute* attributes; + hipMemRangeAttribute attributes__val; + size_t num_attributes; + const void* dev_ptr; + size_t count; + } hipMemRangeGetAttributes; + struct { + hipMemGenericAllocationHandle_t handle; + } hipMemRelease; + struct { + hipMemGenericAllocationHandle_t* handle; + hipMemGenericAllocationHandle_t handle__val; + void* addr; + } hipMemRetainAllocationHandle; + struct { + void* ptr; + size_t size; + const hipMemAccessDesc* desc; + hipMemAccessDesc desc__val; + size_t count; + } hipMemSetAccess; + struct { + void* ptr; + size_t size; + } hipMemUnmap; + struct { + void* dst; + const void* src; + size_t sizeBytes; + hipMemcpyKind kind; + } hipMemcpy; + struct { + void* dst; + size_t dpitch; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + } hipMemcpy2D; + struct { + hipArray_t dst; + size_t wOffsetDst; + size_t hOffsetDst; + hipArray_const_t src; + size_t wOffsetSrc; + size_t hOffsetSrc; + size_t width; + size_t height; + hipMemcpyKind kind; + } hipMemcpy2DArrayToArray; + struct { + void* dst; + size_t dpitch; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpy2DAsync; + struct { + void* dst; + size_t dpitch; + hipArray_const_t src; + size_t wOffset; + size_t hOffset; + size_t width; + size_t height; + hipMemcpyKind kind; + } hipMemcpy2DFromArray; + struct { + void* dst; + size_t dpitch; + hipArray_const_t src; + size_t wOffset; + size_t hOffset; + size_t width; + size_t height; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpy2DFromArrayAsync; + struct { + hipArray_t dst; + size_t wOffset; + size_t hOffset; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + } hipMemcpy2DToArray; + struct { + hipArray_t dst; + size_t wOffset; + size_t hOffset; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpy2DToArrayAsync; + struct { + const hipMemcpy3DParms* p; + hipMemcpy3DParms p__val; + } hipMemcpy3D; + struct { + const hipMemcpy3DParms* p; + hipMemcpy3DParms p__val; + hipStream_t stream; + } hipMemcpy3DAsync; + struct { + size_t numOps; + hipMemcpy3DBatchOp* opList; + hipMemcpy3DBatchOp opList__val; + size_t* failIdx; + size_t failIdx__val; + unsigned long long flags; + hipStream_t stream; + } hipMemcpy3DBatchAsync; + struct { + hipMemcpy3DPeerParms* p; + hipMemcpy3DPeerParms p__val; + } hipMemcpy3DPeer; + struct { + hipMemcpy3DPeerParms* p; + hipMemcpy3DPeerParms p__val; + hipStream_t stream; + } hipMemcpy3DPeerAsync; + struct { + void* dst; + const void* src; + size_t sizeBytes; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyAsync; + struct { + hipArray_t dstArray; + size_t dstOffset; + hipArray_t srcArray; + size_t srcOffset; + size_t ByteCount; + } hipMemcpyAtoA; + struct { + hipDeviceptr_t dstDevice; + hipArray_t srcArray; + size_t srcOffset; + size_t ByteCount; + } hipMemcpyAtoD; + struct { + void* dst; + hipArray_t srcArray; + size_t srcOffset; + size_t count; + } hipMemcpyAtoH; + struct { + void* dstHost; + hipArray_t srcArray; + size_t srcOffset; + size_t ByteCount; + hipStream_t stream; + } hipMemcpyAtoHAsync; + struct { + void** dsts; + void* dsts__val; + void** srcs; + void* srcs__val; + size_t* sizes; + size_t sizes__val; + size_t count; + hipMemcpyAttributes* attrs; + hipMemcpyAttributes attrs__val; + size_t* attrsIdxs; + size_t attrsIdxs__val; + size_t numAttrs; + size_t* failIdx; + size_t failIdx__val; + hipStream_t stream; + } hipMemcpyBatchAsync; + struct { + hipArray_t dstArray; + size_t dstOffset; + hipDeviceptr_t srcDevice; + size_t ByteCount; + } hipMemcpyDtoA; + struct { + hipDeviceptr_t dst; + hipDeviceptr_t src; + size_t sizeBytes; + } hipMemcpyDtoD; + struct { + hipDeviceptr_t dst; + hipDeviceptr_t src; + size_t sizeBytes; + hipStream_t stream; + } hipMemcpyDtoDAsync; + struct { + void* dst; + hipDeviceptr_t src; + size_t sizeBytes; + } hipMemcpyDtoH; + struct { + void* dst; + hipDeviceptr_t src; + size_t sizeBytes; + hipStream_t stream; + } hipMemcpyDtoHAsync; + struct { + void* dst; + hipArray_const_t srcArray; + size_t wOffset; + size_t hOffset; + size_t count; + hipMemcpyKind kind; + } hipMemcpyFromArray; + struct { + void* dst; + const void* symbol; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + } hipMemcpyFromSymbol; + struct { + void* dst; + const void* symbol; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyFromSymbolAsync; + struct { + hipArray_t dstArray; + size_t dstOffset; + const void* srcHost; + size_t count; + } hipMemcpyHtoA; + struct { + hipArray_t dstArray; + size_t dstOffset; + const void* srcHost; + size_t ByteCount; + hipStream_t stream; + } hipMemcpyHtoAAsync; + struct { + hipDeviceptr_t dst; + const void* src; + size_t sizeBytes; + } hipMemcpyHtoD; + struct { + hipDeviceptr_t dst; + const void* src; + size_t sizeBytes; + hipStream_t stream; + } hipMemcpyHtoDAsync; + struct { + const hip_Memcpy2D* pCopy; + hip_Memcpy2D pCopy__val; + } hipMemcpyParam2D; + struct { + const hip_Memcpy2D* pCopy; + hip_Memcpy2D pCopy__val; + hipStream_t stream; + } hipMemcpyParam2DAsync; + struct { + void* dst; + int dstDeviceId; + const void* src; + int srcDeviceId; + size_t sizeBytes; + } hipMemcpyPeer; + struct { + void* dst; + int dstDeviceId; + const void* src; + int srcDevice; + size_t sizeBytes; + hipStream_t stream; + } hipMemcpyPeerAsync; + struct { + hipArray_t dst; + size_t wOffset; + size_t hOffset; + const void* src; + size_t count; + hipMemcpyKind kind; + } hipMemcpyToArray; + struct { + const void* symbol; + const void* src; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + } hipMemcpyToSymbol; + struct { + const void* symbol; + const void* src; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyToSymbolAsync; + struct { + void* dst; + const void* src; + size_t sizeBytes; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyWithStream; + struct { + void* dst; + int value; + size_t sizeBytes; + } hipMemset; + struct { + void* dst; + size_t pitch; + int value; + size_t width; + size_t height; + } hipMemset2D; + struct { + void* dst; + size_t pitch; + int value; + size_t width; + size_t height; + hipStream_t stream; + } hipMemset2DAsync; + struct { + hipPitchedPtr pitchedDevPtr; + int value; + hipExtent extent; + } hipMemset3D; + struct { + hipPitchedPtr pitchedDevPtr; + int value; + hipExtent extent; + hipStream_t stream; + } hipMemset3DAsync; + struct { + void* dst; + int value; + size_t sizeBytes; + hipStream_t stream; + } hipMemsetAsync; + struct { + hipDeviceptr_t dest; + unsigned short value; + size_t count; + } hipMemsetD16; + struct { + hipDeviceptr_t dest; + unsigned short value; + size_t count; + hipStream_t stream; + } hipMemsetD16Async; + struct { + hipDeviceptr_t dst; + size_t dstPitch; + unsigned short value; + size_t width; + size_t height; + } hipMemsetD2D16; + struct { + hipDeviceptr_t dst; + size_t dstPitch; + unsigned short value; + size_t width; + size_t height; + hipStream_t stream; + } hipMemsetD2D16Async; + struct { + hipDeviceptr_t dst; + size_t dstPitch; + unsigned int value; + size_t width; + size_t height; + } hipMemsetD2D32; + struct { + hipDeviceptr_t dst; + size_t dstPitch; + unsigned int value; + size_t width; + size_t height; + hipStream_t stream; + } hipMemsetD2D32Async; + struct { + hipDeviceptr_t dst; + size_t dstPitch; + unsigned char value; + size_t width; + size_t height; + } hipMemsetD2D8; + struct { + hipDeviceptr_t dst; + size_t dstPitch; + unsigned char value; + size_t width; + size_t height; + hipStream_t stream; + } hipMemsetD2D8Async; + struct { + hipDeviceptr_t dest; + int value; + size_t count; + } hipMemsetD32; + struct { + hipDeviceptr_t dst; + int value; + size_t count; + hipStream_t stream; + } hipMemsetD32Async; + struct { + hipDeviceptr_t dest; + unsigned char value; + size_t count; + } hipMemsetD8; + struct { + hipDeviceptr_t dest; + unsigned char value; + size_t count; + hipStream_t stream; + } hipMemsetD8Async; + struct { + hipMipmappedArray_t* pHandle; + hipMipmappedArray_t pHandle__val; + HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc; + HIP_ARRAY3D_DESCRIPTOR pMipmappedArrayDesc__val; + unsigned int numMipmapLevels; + } hipMipmappedArrayCreate; + struct { + hipMipmappedArray_t hMipmappedArray; + } hipMipmappedArrayDestroy; + struct { + hipArray_t* pLevelArray; + hipArray_t pLevelArray__val; + hipMipmappedArray_t hMipMappedArray; + unsigned int level; + } hipMipmappedArrayGetLevel; + struct { + hipFunction_t* function; + hipFunction_t function__val; + hipModule_t module; + const char* kname; + char kname__val; + } hipModuleGetFunction; + struct { + unsigned int* count; + unsigned int count__val; + hipModule_t mod; + } hipModuleGetFunctionCount; + struct { + hipDeviceptr_t* dptr; + hipDeviceptr_t dptr__val; + size_t* bytes; + size_t bytes__val; + hipModule_t hmod; + const char* name; + char name__val; + } hipModuleGetGlobal; + struct { + textureReference** texRef; + textureReference* texRef__val; + hipModule_t hmod; + const char* name; + char name__val; + } hipModuleGetTexRef; + struct { + hipFunction_t f; + unsigned int gridDimX; + unsigned int gridDimY; + unsigned int gridDimZ; + unsigned int blockDimX; + unsigned int blockDimY; + unsigned int blockDimZ; + unsigned int sharedMemBytes; + hipStream_t stream; + void** kernelParams; + void* kernelParams__val; + } hipModuleLaunchCooperativeKernel; + struct { + hipFunctionLaunchParams* launchParamsList; + hipFunctionLaunchParams launchParamsList__val; + unsigned int numDevices; + unsigned int flags; + } hipModuleLaunchCooperativeKernelMultiDevice; + struct { + hipFunction_t f; + unsigned int gridDimX; + unsigned int gridDimY; + unsigned int gridDimZ; + unsigned int blockDimX; + unsigned int blockDimY; + unsigned int blockDimZ; + unsigned int sharedMemBytes; + hipStream_t stream; + void** kernelParams; + void* kernelParams__val; + void** extra; + void* extra__val; + } hipModuleLaunchKernel; + struct { + hipModule_t* module; + hipModule_t module__val; + const char* fname; + char fname__val; + } hipModuleLoad; + struct { + hipModule_t* module; + hipModule_t module__val; + const void* image; + } hipModuleLoadData; + struct { + hipModule_t* module; + hipModule_t module__val; + const void* image; + unsigned int numOptions; + hipJitOption* options; + hipJitOption options__val; + void** optionsValues; + void* optionsValues__val; + } hipModuleLoadDataEx; + struct { + hipModule_t* module; + hipModule_t module__val; + const void* fatbin; + } hipModuleLoadFatBinary; + struct { + int* numBlocks; + int numBlocks__val; + hipFunction_t f; + int blockSize; + size_t dynSharedMemPerBlk; + } hipModuleOccupancyMaxActiveBlocksPerMultiprocessor; + struct { + int* numBlocks; + int numBlocks__val; + hipFunction_t f; + int blockSize; + size_t dynSharedMemPerBlk; + unsigned int flags; + } hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; + struct { + int* gridSize; + int gridSize__val; + int* blockSize; + int blockSize__val; + hipFunction_t f; + size_t dynSharedMemPerBlk; + int blockSizeLimit; + } hipModuleOccupancyMaxPotentialBlockSize; + struct { + int* gridSize; + int gridSize__val; + int* blockSize; + int blockSize__val; + hipFunction_t f; + size_t dynSharedMemPerBlk; + int blockSizeLimit; + unsigned int flags; + } hipModuleOccupancyMaxPotentialBlockSizeWithFlags; + struct { + hipModule_t module; + } hipModuleUnload; + struct { + int* numBlocks; + int numBlocks__val; + const void* f; + int blockSize; + size_t dynamicSMemSize; + } hipOccupancyMaxActiveBlocksPerMultiprocessor; + struct { + int* numBlocks; + int numBlocks__val; + const void* f; + int blockSize; + size_t dynamicSMemSize; + unsigned int flags; + } hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; + struct { + int* gridSize; + int gridSize__val; + int* blockSize; + int blockSize__val; + const void* f; + size_t dynSharedMemPerBlk; + int blockSizeLimit; + } hipOccupancyMaxPotentialBlockSize; + struct { + void* data; + hipPointer_attribute attribute; + hipDeviceptr_t ptr; + } hipPointerGetAttribute; + struct { + hipPointerAttribute_t* attributes; + hipPointerAttribute_t attributes__val; + const void* ptr; + } hipPointerGetAttributes; + struct { + const void* value; + hipPointer_attribute attribute; + hipDeviceptr_t ptr; + } hipPointerSetAttribute; + struct { + int* runtimeVersion; + int runtimeVersion__val; + } hipRuntimeGetVersion; + struct { + int deviceId; + } hipSetDevice; + struct { + unsigned int flags; + } hipSetDeviceFlags; + struct { + int* device_arr; + int device_arr__val; + int len; + } hipSetValidDevices; + struct { + const void* arg; + size_t size; + size_t offset; + } hipSetupArgument; + struct { + const hipExternalSemaphore_t* extSemArray; + hipExternalSemaphore_t extSemArray__val; + const hipExternalSemaphoreSignalParams* paramsArray; + hipExternalSemaphoreSignalParams paramsArray__val; + unsigned int numExtSems; + hipStream_t stream; + } hipSignalExternalSemaphoresAsync; + struct { + hipStream_t stream; + hipStreamCallback_t callback; + void* userData; + unsigned int flags; + } hipStreamAddCallback; + struct { + hipStream_t stream; + void* dev_ptr; + size_t length; + unsigned int flags; + } hipStreamAttachMemAsync; + struct { + hipStream_t stream; + unsigned int count; + hipStreamBatchMemOpParams* paramArray; + hipStreamBatchMemOpParams paramArray__val; + unsigned int flags; + } hipStreamBatchMemOp; + struct { + hipStream_t stream; + hipStreamCaptureMode mode; + } hipStreamBeginCapture; + struct { + hipStream_t stream; + hipGraph_t graph; + const hipGraphNode_t* dependencies; + hipGraphNode_t dependencies__val; + const hipGraphEdgeData* dependencyData; + hipGraphEdgeData dependencyData__val; + size_t numDependencies; + hipStreamCaptureMode mode; + } hipStreamBeginCaptureToGraph; + struct { + hipStream_t* stream; + hipStream_t stream__val; + } hipStreamCreate; + struct { + hipStream_t* stream; + hipStream_t stream__val; + unsigned int flags; + } hipStreamCreateWithFlags; + struct { + hipStream_t* stream; + hipStream_t stream__val; + unsigned int flags; + int priority; + } hipStreamCreateWithPriority; + struct { + hipStream_t stream; + } hipStreamDestroy; + struct { + hipStream_t stream; + hipGraph_t* pGraph; + hipGraph_t pGraph__val; + } hipStreamEndCapture; + struct { + hipStream_t stream; + hipLaunchAttributeID attr; + hipLaunchAttributeValue* value_out; + hipLaunchAttributeValue value_out__val; + } hipStreamGetAttribute; + struct { + hipStream_t stream; + hipStreamCaptureStatus* pCaptureStatus; + hipStreamCaptureStatus pCaptureStatus__val; + unsigned long long* pId; + unsigned long long pId__val; + } hipStreamGetCaptureInfo; + struct { + hipStream_t stream; + hipStreamCaptureStatus* captureStatus_out; + hipStreamCaptureStatus captureStatus_out__val; + unsigned long long* id_out; + unsigned long long id_out__val; + hipGraph_t* graph_out; + hipGraph_t graph_out__val; + const hipGraphNode_t** dependencies_out; + const hipGraphNode_t* dependencies_out__val; + size_t* numDependencies_out; + size_t numDependencies_out__val; + } hipStreamGetCaptureInfo_v2; + struct { + hipStream_t stream; + hipDevice_t* device; + hipDevice_t device__val; + } hipStreamGetDevice; + struct { + hipStream_t stream; + unsigned int* flags; + unsigned int flags__val; + } hipStreamGetFlags; + struct { + hipStream_t stream; + unsigned long long* streamId; + unsigned long long streamId__val; + } hipStreamGetId; + struct { + hipStream_t stream; + int* priority; + int priority__val; + } hipStreamGetPriority; + struct { + hipStream_t stream; + hipStreamCaptureStatus* pCaptureStatus; + hipStreamCaptureStatus pCaptureStatus__val; + } hipStreamIsCapturing; + struct { + hipStream_t stream; + } hipStreamQuery; + struct { + hipStream_t stream; + hipLaunchAttributeID attr; + const hipLaunchAttributeValue* value; + hipLaunchAttributeValue value__val; + } hipStreamSetAttribute; + struct { + hipStream_t stream; + } hipStreamSynchronize; + struct { + hipStream_t stream; + hipGraphNode_t* dependencies; + hipGraphNode_t dependencies__val; + size_t numDependencies; + unsigned int flags; + } hipStreamUpdateCaptureDependencies; + struct { + hipStream_t stream; + hipEvent_t event; + unsigned int flags; + } hipStreamWaitEvent; + struct { + hipStream_t stream; + void* ptr; + unsigned int value; + unsigned int flags; + unsigned int mask; + } hipStreamWaitValue32; + struct { + hipStream_t stream; + void* ptr; + uint64_t value; + unsigned int flags; + uint64_t mask; + } hipStreamWaitValue64; + struct { + hipStream_t stream; + void* ptr; + unsigned int value; + unsigned int flags; + } hipStreamWriteValue32; + struct { + hipStream_t stream; + void* ptr; + uint64_t value; + unsigned int flags; + } hipStreamWriteValue64; + struct { + hipDeviceptr_t* dev_ptr; + hipDeviceptr_t dev_ptr__val; + const textureReference* texRef; + textureReference texRef__val; + } hipTexRefGetAddress; + struct { + hipArray_t* pArray; + hipArray_t pArray__val; + const textureReference* texRef; + textureReference texRef__val; + } hipTexRefGetArray; + struct { + float* pBorderColor; + float pBorderColor__val; + const textureReference* texRef; + textureReference texRef__val; + } hipTexRefGetBorderColor; + struct { + unsigned int* pFlags; + unsigned int pFlags__val; + const textureReference* texRef; + textureReference texRef__val; + } hipTexRefGetFlags; + struct { + hipArray_Format* pFormat; + hipArray_Format pFormat__val; + int* pNumChannels; + int pNumChannels__val; + const textureReference* texRef; + textureReference texRef__val; + } hipTexRefGetFormat; + struct { + int* pmaxAnsio; + int pmaxAnsio__val; + const textureReference* texRef; + textureReference texRef__val; + } hipTexRefGetMaxAnisotropy; + struct { + hipMipmappedArray_t* pArray; + hipMipmappedArray_t pArray__val; + const textureReference* texRef; + textureReference texRef__val; + } hipTexRefGetMipMappedArray; + struct { + float* pbias; + float pbias__val; + const textureReference* texRef; + textureReference texRef__val; + } hipTexRefGetMipmapLevelBias; + struct { + float* pminMipmapLevelClamp; + float pminMipmapLevelClamp__val; + float* pmaxMipmapLevelClamp; + float pmaxMipmapLevelClamp__val; + const textureReference* texRef; + textureReference texRef__val; + } hipTexRefGetMipmapLevelClamp; + struct { + size_t* ByteOffset; + size_t ByteOffset__val; + textureReference* texRef; + textureReference texRef__val; + hipDeviceptr_t dptr; + size_t bytes; + } hipTexRefSetAddress; + struct { + textureReference* texRef; + textureReference texRef__val; + const HIP_ARRAY_DESCRIPTOR* desc; + HIP_ARRAY_DESCRIPTOR desc__val; + hipDeviceptr_t dptr; + size_t Pitch; + } hipTexRefSetAddress2D; + struct { + textureReference* tex; + textureReference tex__val; + hipArray_const_t array; + unsigned int flags; + } hipTexRefSetArray; + struct { + textureReference* texRef; + textureReference texRef__val; + float* pBorderColor; + float pBorderColor__val; + } hipTexRefSetBorderColor; + struct { + textureReference* texRef; + textureReference texRef__val; + unsigned int Flags; + } hipTexRefSetFlags; + struct { + textureReference* texRef; + textureReference texRef__val; + hipArray_Format fmt; + int NumPackedComponents; + } hipTexRefSetFormat; + struct { + textureReference* texRef; + textureReference texRef__val; + unsigned int maxAniso; + } hipTexRefSetMaxAnisotropy; + struct { + textureReference* texRef; + textureReference texRef__val; + float bias; + } hipTexRefSetMipmapLevelBias; + struct { + textureReference* texRef; + textureReference texRef__val; + float minMipMapLevelClamp; + float maxMipMapLevelClamp; + } hipTexRefSetMipmapLevelClamp; + struct { + textureReference* texRef; + textureReference texRef__val; + hipMipmappedArray* mipmappedArray; + hipMipmappedArray mipmappedArray__val; + unsigned int Flags; + } hipTexRefSetMipmappedArray; + struct { + hipStreamCaptureMode* mode; + hipStreamCaptureMode mode__val; + } hipThreadExchangeStreamCaptureMode; + struct { + hipUserObject_t* object_out; + hipUserObject_t object_out__val; + void* ptr; + hipHostFn_t destroy; + unsigned int initialRefcount; + unsigned int flags; + } hipUserObjectCreate; + struct { + hipUserObject_t object; + unsigned int count; + } hipUserObjectRelease; + struct { + hipUserObject_t object; + unsigned int count; + } hipUserObjectRetain; + struct { + const hipExternalSemaphore_t* extSemArray; + hipExternalSemaphore_t extSemArray__val; + const hipExternalSemaphoreWaitParams* paramsArray; + hipExternalSemaphoreWaitParams paramsArray__val; + unsigned int numExtSems; + hipStream_t stream; + } hipWaitExternalSemaphoresAsync; + } args; + uint64_t *phase_data; +} hip_api_data_t; + +// HIP API callbacks args data filling macros +// __hipPopCallConfiguration[('dim3*', 'gridDim'), ('dim3*', 'blockDim'), ('size_t*', 'sharedMem'), ('hipStream_t*', 'stream')] +#define INIT___hipPopCallConfiguration_CB_ARGS_DATA(cb_data) { \ + cb_data.args.__hipPopCallConfiguration.gridDim = (dim3*)gridDim; \ + cb_data.args.__hipPopCallConfiguration.blockDim = (dim3*)blockDim; \ + cb_data.args.__hipPopCallConfiguration.sharedMem = (size_t*)sharedMem; \ + cb_data.args.__hipPopCallConfiguration.stream = (hipStream_t*)stream; \ +}; +// __hipPushCallConfiguration[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')] +#define INIT___hipPushCallConfiguration_CB_ARGS_DATA(cb_data) { \ + cb_data.args.__hipPushCallConfiguration.gridDim = (dim3)gridDim; \ + cb_data.args.__hipPushCallConfiguration.blockDim = (dim3)blockDim; \ + cb_data.args.__hipPushCallConfiguration.sharedMem = (size_t)sharedMem; \ + cb_data.args.__hipPushCallConfiguration.stream = (hipStream_t)stream; \ +}; +// hipArray3DCreate[('hipArray_t*', 'array'), ('const HIP_ARRAY3D_DESCRIPTOR*', 'pAllocateArray')] +#define INIT_hipArray3DCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipArray3DCreate.array = (hipArray_t*)array; \ + cb_data.args.hipArray3DCreate.pAllocateArray = (const HIP_ARRAY3D_DESCRIPTOR*)pAllocateArray; \ +}; +// hipArray3DGetDescriptor[('HIP_ARRAY3D_DESCRIPTOR*', 'pArrayDescriptor'), ('hipArray_t', 'array')] +#define INIT_hipArray3DGetDescriptor_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipArray3DGetDescriptor.pArrayDescriptor = (HIP_ARRAY3D_DESCRIPTOR*)pArrayDescriptor; \ + cb_data.args.hipArray3DGetDescriptor.array = (hipArray_t)array; \ +}; +// hipArrayCreate[('hipArray_t*', 'pHandle'), ('const HIP_ARRAY_DESCRIPTOR*', 'pAllocateArray')] +#define INIT_hipArrayCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipArrayCreate.pHandle = (hipArray_t*)array; \ + cb_data.args.hipArrayCreate.pAllocateArray = (const HIP_ARRAY_DESCRIPTOR*)pAllocateArray; \ +}; +// hipArrayDestroy[('hipArray_t', 'array')] +#define INIT_hipArrayDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipArrayDestroy.array = (hipArray_t)array; \ +}; +// hipArrayGetDescriptor[('HIP_ARRAY_DESCRIPTOR*', 'pArrayDescriptor'), ('hipArray_t', 'array')] +#define INIT_hipArrayGetDescriptor_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipArrayGetDescriptor.pArrayDescriptor = (HIP_ARRAY_DESCRIPTOR*)pArrayDescriptor; \ + cb_data.args.hipArrayGetDescriptor.array = (hipArray_t)array; \ +}; +// hipArrayGetInfo[('hipChannelFormatDesc*', 'desc'), ('hipExtent*', 'extent'), ('unsigned int*', 'flags'), ('hipArray_t', 'array')] +#define INIT_hipArrayGetInfo_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipArrayGetInfo.desc = (hipChannelFormatDesc*)desc; \ + cb_data.args.hipArrayGetInfo.extent = (hipExtent*)extent; \ + cb_data.args.hipArrayGetInfo.flags = (unsigned int*)flags; \ + cb_data.args.hipArrayGetInfo.array = (hipArray_t)array; \ +}; +// hipChooseDeviceR0000[('int*', 'device'), ('const hipDeviceProp_tR0000*', 'prop')] +#define INIT_hipChooseDeviceR0000_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipChooseDeviceR0000.device = (int*)device; \ + cb_data.args.hipChooseDeviceR0000.prop = (const hipDeviceProp_tR0000*)properties; \ +}; +// hipChooseDeviceR0600[('int*', 'device'), ('const hipDeviceProp_tR0600*', 'prop')] +#define INIT_hipChooseDeviceR0600_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipChooseDeviceR0600.device = (int*)device; \ + cb_data.args.hipChooseDeviceR0600.prop = (const hipDeviceProp_tR0600*)properties; \ +}; +// hipConfigureCall[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')] +#define INIT_hipConfigureCall_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipConfigureCall.gridDim = (dim3)gridDim; \ + cb_data.args.hipConfigureCall.blockDim = (dim3)blockDim; \ + cb_data.args.hipConfigureCall.sharedMem = (size_t)sharedMem; \ + cb_data.args.hipConfigureCall.stream = (hipStream_t)stream; \ +}; +// hipCreateSurfaceObject[('hipSurfaceObject_t*', 'pSurfObject'), ('const hipResourceDesc*', 'pResDesc')] +#define INIT_hipCreateSurfaceObject_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCreateSurfaceObject.pSurfObject = (hipSurfaceObject_t*)pSurfObject; \ + cb_data.args.hipCreateSurfaceObject.pResDesc = (const hipResourceDesc*)pResDesc; \ +}; +// hipCtxCreate[('hipCtx_t*', 'ctx'), ('unsigned int', 'flags'), ('hipDevice_t', 'device')] +#define INIT_hipCtxCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxCreate.ctx = (hipCtx_t*)ctx; \ + cb_data.args.hipCtxCreate.flags = (unsigned int)flags; \ + cb_data.args.hipCtxCreate.device = (hipDevice_t)device; \ +}; +// hipCtxDestroy[('hipCtx_t', 'ctx')] +#define INIT_hipCtxDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxDestroy.ctx = (hipCtx_t)ctx; \ +}; +// hipCtxDisablePeerAccess[('hipCtx_t', 'peerCtx')] +#define INIT_hipCtxDisablePeerAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxDisablePeerAccess.peerCtx = (hipCtx_t)peerCtx; \ +}; +// hipCtxEnablePeerAccess[('hipCtx_t', 'peerCtx'), ('unsigned int', 'flags')] +#define INIT_hipCtxEnablePeerAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxEnablePeerAccess.peerCtx = (hipCtx_t)peerCtx; \ + cb_data.args.hipCtxEnablePeerAccess.flags = (unsigned int)flags; \ +}; +// hipCtxGetApiVersion[('hipCtx_t', 'ctx'), ('unsigned int*', 'apiVersion')] +#define INIT_hipCtxGetApiVersion_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetApiVersion.ctx = (hipCtx_t)ctx; \ + cb_data.args.hipCtxGetApiVersion.apiVersion = (unsigned int*)apiVersion; \ +}; +// hipCtxGetCacheConfig[('hipFuncCache_t*', 'cacheConfig')] +#define INIT_hipCtxGetCacheConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetCacheConfig.cacheConfig = (hipFuncCache_t*)cacheConfig; \ +}; +// hipCtxGetCurrent[('hipCtx_t*', 'ctx')] +#define INIT_hipCtxGetCurrent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetCurrent.ctx = (hipCtx_t*)ctx; \ +}; +// hipCtxGetDevice[('hipDevice_t*', 'device')] +#define INIT_hipCtxGetDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetDevice.device = (hipDevice_t*)device; \ +}; +// hipCtxGetFlags[('unsigned int*', 'flags')] +#define INIT_hipCtxGetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetFlags.flags = (unsigned int*)flags; \ +}; +// hipCtxGetSharedMemConfig[('hipSharedMemConfig*', 'pConfig')] +#define INIT_hipCtxGetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetSharedMemConfig.pConfig = (hipSharedMemConfig*)pConfig; \ +}; +// hipCtxPopCurrent[('hipCtx_t*', 'ctx')] +#define INIT_hipCtxPopCurrent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxPopCurrent.ctx = (hipCtx_t*)ctx; \ +}; +// hipCtxPushCurrent[('hipCtx_t', 'ctx')] +#define INIT_hipCtxPushCurrent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxPushCurrent.ctx = (hipCtx_t)ctx; \ +}; +// hipCtxSetCacheConfig[('hipFuncCache_t', 'cacheConfig')] +#define INIT_hipCtxSetCacheConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxSetCacheConfig.cacheConfig = (hipFuncCache_t)cacheConfig; \ +}; +// hipCtxSetCurrent[('hipCtx_t', 'ctx')] +#define INIT_hipCtxSetCurrent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxSetCurrent.ctx = (hipCtx_t)ctx; \ +}; +// hipCtxSetSharedMemConfig[('hipSharedMemConfig', 'config')] +#define INIT_hipCtxSetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxSetSharedMemConfig.config = (hipSharedMemConfig)config; \ +}; +// hipCtxSynchronize[] +#define INIT_hipCtxSynchronize_CB_ARGS_DATA(cb_data) { \ +}; +// hipDestroyExternalMemory[('hipExternalMemory_t', 'extMem')] +#define INIT_hipDestroyExternalMemory_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDestroyExternalMemory.extMem = (hipExternalMemory_t)extMem; \ +}; +// hipDestroyExternalSemaphore[('hipExternalSemaphore_t', 'extSem')] +#define INIT_hipDestroyExternalSemaphore_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDestroyExternalSemaphore.extSem = (hipExternalSemaphore_t)extSem; \ +}; +// hipDestroySurfaceObject[('hipSurfaceObject_t', 'surfaceObject')] +#define INIT_hipDestroySurfaceObject_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDestroySurfaceObject.surfaceObject = (hipSurfaceObject_t)surfaceObject; \ +}; +// hipDeviceCanAccessPeer[('int*', 'canAccessPeer'), ('int', 'deviceId'), ('int', 'peerDeviceId')] +#define INIT_hipDeviceCanAccessPeer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceCanAccessPeer.canAccessPeer = (int*)canAccess; \ + cb_data.args.hipDeviceCanAccessPeer.deviceId = (int)deviceId; \ + cb_data.args.hipDeviceCanAccessPeer.peerDeviceId = (int)peerDeviceId; \ +}; +// hipDeviceComputeCapability[('int*', 'major'), ('int*', 'minor'), ('hipDevice_t', 'device')] +#define INIT_hipDeviceComputeCapability_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceComputeCapability.major = (int*)major; \ + cb_data.args.hipDeviceComputeCapability.minor = (int*)minor; \ + cb_data.args.hipDeviceComputeCapability.device = (hipDevice_t)device; \ +}; +// hipDeviceDisablePeerAccess[('int', 'peerDeviceId')] +#define INIT_hipDeviceDisablePeerAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceDisablePeerAccess.peerDeviceId = (int)peerDeviceId; \ +}; +// hipDeviceEnablePeerAccess[('int', 'peerDeviceId'), ('unsigned int', 'flags')] +#define INIT_hipDeviceEnablePeerAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceEnablePeerAccess.peerDeviceId = (int)peerDeviceId; \ + cb_data.args.hipDeviceEnablePeerAccess.flags = (unsigned int)flags; \ +}; +// hipDeviceGet[('hipDevice_t*', 'device'), ('int', 'ordinal')] +#define INIT_hipDeviceGet_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGet.device = (hipDevice_t*)device; \ + cb_data.args.hipDeviceGet.ordinal = (int)deviceId; \ +}; +// hipDeviceGetAttribute[('int*', 'pi'), ('hipDeviceAttribute_t', 'attr'), ('int', 'deviceId')] +#define INIT_hipDeviceGetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetAttribute.pi = (int*)pi; \ + cb_data.args.hipDeviceGetAttribute.attr = (hipDeviceAttribute_t)attr; \ + cb_data.args.hipDeviceGetAttribute.deviceId = (int)device; \ +}; +// hipDeviceGetByPCIBusId[('int*', 'device'), ('const char*', 'pciBusId')] +#define INIT_hipDeviceGetByPCIBusId_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetByPCIBusId.device = (int*)device; \ + cb_data.args.hipDeviceGetByPCIBusId.pciBusId = (pciBusIdstr) ? strdup(pciBusIdstr) : NULL; \ +}; +// hipDeviceGetCacheConfig[('hipFuncCache_t*', 'cacheConfig')] +#define INIT_hipDeviceGetCacheConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetCacheConfig.cacheConfig = (hipFuncCache_t*)cacheConfig; \ +}; +// hipDeviceGetDefaultMemPool[('hipMemPool_t*', 'mem_pool'), ('int', 'device')] +#define INIT_hipDeviceGetDefaultMemPool_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetDefaultMemPool.mem_pool = (hipMemPool_t*)mem_pool; \ + cb_data.args.hipDeviceGetDefaultMemPool.device = (int)device; \ +}; +// hipDeviceGetGraphMemAttribute[('int', 'device'), ('hipGraphMemAttributeType', 'attr'), ('void*', 'value')] +#define INIT_hipDeviceGetGraphMemAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetGraphMemAttribute.device = (int)device; \ + cb_data.args.hipDeviceGetGraphMemAttribute.attr = (hipGraphMemAttributeType)attr; \ + cb_data.args.hipDeviceGetGraphMemAttribute.value = (void*)value; \ +}; +// hipDeviceGetLimit[('size_t*', 'pValue'), ('hipLimit_t', 'limit')] +#define INIT_hipDeviceGetLimit_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetLimit.pValue = (size_t*)pValue; \ + cb_data.args.hipDeviceGetLimit.limit = (hipLimit_t)limit; \ +}; +// hipDeviceGetMemPool[('hipMemPool_t*', 'mem_pool'), ('int', 'device')] +#define INIT_hipDeviceGetMemPool_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetMemPool.mem_pool = (hipMemPool_t*)mem_pool; \ + cb_data.args.hipDeviceGetMemPool.device = (int)device; \ +}; +// hipDeviceGetName[('char*', 'name'), ('int', 'len'), ('hipDevice_t', 'device')] +#define INIT_hipDeviceGetName_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetName.name = (char*)name; \ + cb_data.args.hipDeviceGetName.len = (int)len; \ + cb_data.args.hipDeviceGetName.device = (hipDevice_t)device; \ +}; +// hipDeviceGetP2PAttribute[('int*', 'value'), ('hipDeviceP2PAttr', 'attr'), ('int', 'srcDevice'), ('int', 'dstDevice')] +#define INIT_hipDeviceGetP2PAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetP2PAttribute.value = (int*)value; \ + cb_data.args.hipDeviceGetP2PAttribute.attr = (hipDeviceP2PAttr)attr; \ + cb_data.args.hipDeviceGetP2PAttribute.srcDevice = (int)srcDevice; \ + cb_data.args.hipDeviceGetP2PAttribute.dstDevice = (int)dstDevice; \ +}; +// hipDeviceGetPCIBusId[('char*', 'pciBusId'), ('int', 'len'), ('int', 'device')] +#define INIT_hipDeviceGetPCIBusId_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetPCIBusId.pciBusId = (char*)pciBusId; \ + cb_data.args.hipDeviceGetPCIBusId.len = (int)len; \ + cb_data.args.hipDeviceGetPCIBusId.device = (int)device; \ +}; +// hipDeviceGetSharedMemConfig[('hipSharedMemConfig*', 'pConfig')] +#define INIT_hipDeviceGetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetSharedMemConfig.pConfig = (hipSharedMemConfig*)pConfig; \ +}; +// hipDeviceGetStreamPriorityRange[('int*', 'leastPriority'), ('int*', 'greatestPriority')] +#define INIT_hipDeviceGetStreamPriorityRange_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetStreamPriorityRange.leastPriority = (int*)leastPriority; \ + cb_data.args.hipDeviceGetStreamPriorityRange.greatestPriority = (int*)greatestPriority; \ +}; +// hipDeviceGetUuid[('hipUUID*', 'uuid'), ('hipDevice_t', 'device')] +#define INIT_hipDeviceGetUuid_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetUuid.uuid = (hipUUID*)uuid; \ + cb_data.args.hipDeviceGetUuid.device = (hipDevice_t)device; \ +}; +// hipDeviceGraphMemTrim[('int', 'device')] +#define INIT_hipDeviceGraphMemTrim_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGraphMemTrim.device = (int)device; \ +}; +// hipDevicePrimaryCtxGetState[('hipDevice_t', 'dev'), ('unsigned int*', 'flags'), ('int*', 'active')] +#define INIT_hipDevicePrimaryCtxGetState_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDevicePrimaryCtxGetState.dev = (hipDevice_t)dev; \ + cb_data.args.hipDevicePrimaryCtxGetState.flags = (unsigned int*)flags; \ + cb_data.args.hipDevicePrimaryCtxGetState.active = (int*)active; \ +}; +// hipDevicePrimaryCtxRelease[('hipDevice_t', 'dev')] +#define INIT_hipDevicePrimaryCtxRelease_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDevicePrimaryCtxRelease.dev = (hipDevice_t)dev; \ +}; +// hipDevicePrimaryCtxReset[('hipDevice_t', 'dev')] +#define INIT_hipDevicePrimaryCtxReset_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDevicePrimaryCtxReset.dev = (hipDevice_t)dev; \ +}; +// hipDevicePrimaryCtxRetain[('hipCtx_t*', 'pctx'), ('hipDevice_t', 'dev')] +#define INIT_hipDevicePrimaryCtxRetain_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDevicePrimaryCtxRetain.pctx = (hipCtx_t*)pctx; \ + cb_data.args.hipDevicePrimaryCtxRetain.dev = (hipDevice_t)dev; \ +}; +// hipDevicePrimaryCtxSetFlags[('hipDevice_t', 'dev'), ('unsigned int', 'flags')] +#define INIT_hipDevicePrimaryCtxSetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDevicePrimaryCtxSetFlags.dev = (hipDevice_t)dev; \ + cb_data.args.hipDevicePrimaryCtxSetFlags.flags = (unsigned int)flags; \ +}; +// hipDeviceReset[] +#define INIT_hipDeviceReset_CB_ARGS_DATA(cb_data) { \ +}; +// hipDeviceSetCacheConfig[('hipFuncCache_t', 'cacheConfig')] +#define INIT_hipDeviceSetCacheConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceSetCacheConfig.cacheConfig = (hipFuncCache_t)cacheConfig; \ +}; +// hipDeviceSetGraphMemAttribute[('int', 'device'), ('hipGraphMemAttributeType', 'attr'), ('void*', 'value')] +#define INIT_hipDeviceSetGraphMemAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceSetGraphMemAttribute.device = (int)device; \ + cb_data.args.hipDeviceSetGraphMemAttribute.attr = (hipGraphMemAttributeType)attr; \ + cb_data.args.hipDeviceSetGraphMemAttribute.value = (void*)value; \ +}; +// hipDeviceSetLimit[('hipLimit_t', 'limit'), ('size_t', 'value')] +#define INIT_hipDeviceSetLimit_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceSetLimit.limit = (hipLimit_t)limit; \ + cb_data.args.hipDeviceSetLimit.value = (size_t)value; \ +}; +// hipDeviceSetMemPool[('int', 'device'), ('hipMemPool_t', 'mem_pool')] +#define INIT_hipDeviceSetMemPool_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceSetMemPool.device = (int)device; \ + cb_data.args.hipDeviceSetMemPool.mem_pool = (hipMemPool_t)mem_pool; \ +}; +// hipDeviceSetSharedMemConfig[('hipSharedMemConfig', 'config')] +#define INIT_hipDeviceSetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceSetSharedMemConfig.config = (hipSharedMemConfig)config; \ +}; +// hipDeviceSynchronize[] +#define INIT_hipDeviceSynchronize_CB_ARGS_DATA(cb_data) { \ +}; +// hipDeviceTotalMem[('size_t*', 'bytes'), ('hipDevice_t', 'device')] +#define INIT_hipDeviceTotalMem_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceTotalMem.bytes = (size_t*)bytes; \ + cb_data.args.hipDeviceTotalMem.device = (hipDevice_t)device; \ +}; +// hipDriverGetVersion[('int*', 'driverVersion')] +#define INIT_hipDriverGetVersion_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDriverGetVersion.driverVersion = (int*)driverVersion; \ +}; +// hipDrvGraphAddMemFreeNode[('hipGraphNode_t*', 'phGraphNode'), ('hipGraph_t', 'hGraph'), ('const hipGraphNode_t*', 'dependencies'), ('size_t', 'numDependencies'), ('hipDeviceptr_t', 'dptr')] +#define INIT_hipDrvGraphAddMemFreeNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDrvGraphAddMemFreeNode.phGraphNode = (hipGraphNode_t*)phGraphNode; \ + cb_data.args.hipDrvGraphAddMemFreeNode.hGraph = (hipGraph_t)hGraph; \ + cb_data.args.hipDrvGraphAddMemFreeNode.dependencies = (const hipGraphNode_t*)dependencies; \ + cb_data.args.hipDrvGraphAddMemFreeNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipDrvGraphAddMemFreeNode.dptr = (hipDeviceptr_t)dptr; \ +}; +// hipDrvGraphAddMemcpyNode[('hipGraphNode_t*', 'phGraphNode'), ('hipGraph_t', 'hGraph'), ('const hipGraphNode_t*', 'dependencies'), ('size_t', 'numDependencies'), ('const HIP_MEMCPY3D*', 'copyParams'), ('hipCtx_t', 'ctx')] +#define INIT_hipDrvGraphAddMemcpyNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDrvGraphAddMemcpyNode.phGraphNode = (hipGraphNode_t*)phGraphNode; \ + cb_data.args.hipDrvGraphAddMemcpyNode.hGraph = (hipGraph_t)hGraph; \ + cb_data.args.hipDrvGraphAddMemcpyNode.dependencies = (const hipGraphNode_t*)dependencies; \ + cb_data.args.hipDrvGraphAddMemcpyNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipDrvGraphAddMemcpyNode.copyParams = (const HIP_MEMCPY3D*)copyParams; \ + cb_data.args.hipDrvGraphAddMemcpyNode.ctx = (hipCtx_t)ctx; \ +}; +// hipDrvGraphAddMemsetNode[('hipGraphNode_t*', 'phGraphNode'), ('hipGraph_t', 'hGraph'), ('const hipGraphNode_t*', 'dependencies'), ('size_t', 'numDependencies'), ('const hipMemsetParams*', 'memsetParams'), ('hipCtx_t', 'ctx')] +#define INIT_hipDrvGraphAddMemsetNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDrvGraphAddMemsetNode.phGraphNode = (hipGraphNode_t*)phGraphNode; \ + cb_data.args.hipDrvGraphAddMemsetNode.hGraph = (hipGraph_t)hGraph; \ + cb_data.args.hipDrvGraphAddMemsetNode.dependencies = (const hipGraphNode_t*)dependencies; \ + cb_data.args.hipDrvGraphAddMemsetNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipDrvGraphAddMemsetNode.memsetParams = (const hipMemsetParams*)memsetParams; \ + cb_data.args.hipDrvGraphAddMemsetNode.ctx = (hipCtx_t)ctx; \ +}; +// hipDrvGraphExecMemcpyNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('const HIP_MEMCPY3D*', 'copyParams'), ('hipCtx_t', 'ctx')] +#define INIT_hipDrvGraphExecMemcpyNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDrvGraphExecMemcpyNodeSetParams.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipDrvGraphExecMemcpyNodeSetParams.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipDrvGraphExecMemcpyNodeSetParams.copyParams = (const HIP_MEMCPY3D*)copyParams; \ + cb_data.args.hipDrvGraphExecMemcpyNodeSetParams.ctx = (hipCtx_t)ctx; \ +}; +// hipDrvGraphExecMemsetNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('const hipMemsetParams*', 'memsetParams'), ('hipCtx_t', 'ctx')] +#define INIT_hipDrvGraphExecMemsetNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDrvGraphExecMemsetNodeSetParams.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipDrvGraphExecMemsetNodeSetParams.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipDrvGraphExecMemsetNodeSetParams.memsetParams = (const hipMemsetParams*)memsetParams; \ + cb_data.args.hipDrvGraphExecMemsetNodeSetParams.ctx = (hipCtx_t)ctx; \ +}; +// hipDrvGraphMemcpyNodeGetParams[('hipGraphNode_t', 'hNode'), ('HIP_MEMCPY3D*', 'nodeParams')] +#define INIT_hipDrvGraphMemcpyNodeGetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDrvGraphMemcpyNodeGetParams.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipDrvGraphMemcpyNodeGetParams.nodeParams = (HIP_MEMCPY3D*)nodeParams; \ +}; +// hipDrvGraphMemcpyNodeSetParams[('hipGraphNode_t', 'hNode'), ('const HIP_MEMCPY3D*', 'nodeParams')] +#define INIT_hipDrvGraphMemcpyNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDrvGraphMemcpyNodeSetParams.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipDrvGraphMemcpyNodeSetParams.nodeParams = (const HIP_MEMCPY3D*)nodeParams; \ +}; +// hipDrvLaunchKernelEx[('const HIP_LAUNCH_CONFIG*', 'config'), ('hipFunction_t', 'f'), ('void**', 'params'), ('void**', 'extra')] +#define INIT_hipDrvLaunchKernelEx_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDrvLaunchKernelEx.config = (const HIP_LAUNCH_CONFIG*)config; \ + cb_data.args.hipDrvLaunchKernelEx.f = (hipFunction_t)f; \ + cb_data.args.hipDrvLaunchKernelEx.params = (void**)kernelParams; \ + cb_data.args.hipDrvLaunchKernelEx.extra = (void**)extra; \ +}; +// hipDrvMemcpy2DUnaligned[('const hip_Memcpy2D*', 'pCopy')] +#define INIT_hipDrvMemcpy2DUnaligned_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDrvMemcpy2DUnaligned.pCopy = (const hip_Memcpy2D*)pCopy; \ +}; +// hipDrvMemcpy3D[('const HIP_MEMCPY3D*', 'pCopy')] +#define INIT_hipDrvMemcpy3D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDrvMemcpy3D.pCopy = (const HIP_MEMCPY3D*)pCopy; \ +}; +// hipDrvMemcpy3DAsync[('const HIP_MEMCPY3D*', 'pCopy'), ('hipStream_t', 'stream')] +#define INIT_hipDrvMemcpy3DAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDrvMemcpy3DAsync.pCopy = (const HIP_MEMCPY3D*)pCopy; \ + cb_data.args.hipDrvMemcpy3DAsync.stream = (hipStream_t)stream; \ +}; +// hipDrvPointerGetAttributes[('unsigned int', 'numAttributes'), ('hipPointer_attribute*', 'attributes'), ('void**', 'data'), ('hipDeviceptr_t', 'ptr')] +#define INIT_hipDrvPointerGetAttributes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDrvPointerGetAttributes.numAttributes = (unsigned int)numAttributes; \ + cb_data.args.hipDrvPointerGetAttributes.attributes = (hipPointer_attribute*)attributes; \ + cb_data.args.hipDrvPointerGetAttributes.data = (void**)data; \ + cb_data.args.hipDrvPointerGetAttributes.ptr = (hipDeviceptr_t)ptr; \ +}; +// hipEventCreate[('hipEvent_t*', 'event')] +#define INIT_hipEventCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventCreate.event = (hipEvent_t*)event; \ +}; +// hipEventCreateWithFlags[('hipEvent_t*', 'event'), ('unsigned int', 'flags')] +#define INIT_hipEventCreateWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventCreateWithFlags.event = (hipEvent_t*)event; \ + cb_data.args.hipEventCreateWithFlags.flags = (unsigned int)flags; \ +}; +// hipEventDestroy[('hipEvent_t', 'event')] +#define INIT_hipEventDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventDestroy.event = (hipEvent_t)event; \ +}; +// hipEventElapsedTime[('float*', 'ms'), ('hipEvent_t', 'start'), ('hipEvent_t', 'stop')] +#define INIT_hipEventElapsedTime_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventElapsedTime.ms = (float*)ms; \ + cb_data.args.hipEventElapsedTime.start = (hipEvent_t)start; \ + cb_data.args.hipEventElapsedTime.stop = (hipEvent_t)stop; \ +}; +// hipEventQuery[('hipEvent_t', 'event')] +#define INIT_hipEventQuery_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventQuery.event = (hipEvent_t)event; \ +}; +// hipEventRecord[('hipEvent_t', 'event'), ('hipStream_t', 'stream')] +#define INIT_hipEventRecord_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventRecord.event = (hipEvent_t)event; \ + cb_data.args.hipEventRecord.stream = (hipStream_t)stream; \ +}; +// hipEventRecordWithFlags[('hipEvent_t', 'event'), ('hipStream_t', 'stream'), ('unsigned int', 'flags')] +#define INIT_hipEventRecordWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventRecordWithFlags.event = (hipEvent_t)event; \ + cb_data.args.hipEventRecordWithFlags.stream = (hipStream_t)stream; \ + cb_data.args.hipEventRecordWithFlags.flags = (unsigned int)flags; \ +}; +// hipEventSynchronize[('hipEvent_t', 'event')] +#define INIT_hipEventSynchronize_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventSynchronize.event = (hipEvent_t)event; \ +}; +// hipExtGetLastError[] +#define INIT_hipExtGetLastError_CB_ARGS_DATA(cb_data) { \ +}; +// hipExtGetLinkTypeAndHopCount[('int', 'device1'), ('int', 'device2'), ('unsigned int*', 'linktype'), ('unsigned int*', 'hopcount')] +#define INIT_hipExtGetLinkTypeAndHopCount_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExtGetLinkTypeAndHopCount.device1 = (int)device1; \ + cb_data.args.hipExtGetLinkTypeAndHopCount.device2 = (int)device2; \ + cb_data.args.hipExtGetLinkTypeAndHopCount.linktype = (unsigned int*)linktype; \ + cb_data.args.hipExtGetLinkTypeAndHopCount.hopcount = (unsigned int*)hopcount; \ +}; +// hipExtLaunchKernel[('const void*', 'function_address'), ('dim3', 'numBlocks'), ('dim3', 'dimBlocks'), ('void**', 'args'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent'), ('int', 'flags')] +#define INIT_hipExtLaunchKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExtLaunchKernel.function_address = (const void*)hostFunction; \ + cb_data.args.hipExtLaunchKernel.numBlocks = (dim3)gridDim; \ + cb_data.args.hipExtLaunchKernel.dimBlocks = (dim3)blockDim; \ + cb_data.args.hipExtLaunchKernel.args = (void**)args; \ + cb_data.args.hipExtLaunchKernel.sharedMemBytes = (size_t)sharedMemBytes; \ + cb_data.args.hipExtLaunchKernel.stream = (hipStream_t)stream; \ + cb_data.args.hipExtLaunchKernel.startEvent = (hipEvent_t)startEvent; \ + cb_data.args.hipExtLaunchKernel.stopEvent = (hipEvent_t)stopEvent; \ + cb_data.args.hipExtLaunchKernel.flags = (int)flags; \ +}; +// hipExtLaunchMultiKernelMultiDevice[('hipLaunchParams*', 'launchParamsList'), ('int', 'numDevices'), ('unsigned int', 'flags')] +#define INIT_hipExtLaunchMultiKernelMultiDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExtLaunchMultiKernelMultiDevice.launchParamsList = (hipLaunchParams*)launchParamsList; \ + cb_data.args.hipExtLaunchMultiKernelMultiDevice.numDevices = (int)numDevices; \ + cb_data.args.hipExtLaunchMultiKernelMultiDevice.flags = (unsigned int)flags; \ +}; +// hipExtMallocWithFlags[('void**', 'ptr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')] +#define INIT_hipExtMallocWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExtMallocWithFlags.ptr = (void**)ptr; \ + cb_data.args.hipExtMallocWithFlags.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipExtMallocWithFlags.flags = (unsigned int)flags; \ +}; +// hipExtModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'globalWorkSizeX'), ('unsigned int', 'globalWorkSizeY'), ('unsigned int', 'globalWorkSizeZ'), ('unsigned int', 'localWorkSizeX'), ('unsigned int', 'localWorkSizeY'), ('unsigned int', 'localWorkSizeZ'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'hStream'), ('void**', 'kernelParams'), ('void**', 'extra'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent'), ('unsigned int', 'flags')] +#define INIT_hipExtModuleLaunchKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExtModuleLaunchKernel.f = (hipFunction_t)f; \ + cb_data.args.hipExtModuleLaunchKernel.globalWorkSizeX = (unsigned int)globalWorkSizeX; \ + cb_data.args.hipExtModuleLaunchKernel.globalWorkSizeY = (unsigned int)globalWorkSizeY; \ + cb_data.args.hipExtModuleLaunchKernel.globalWorkSizeZ = (unsigned int)globalWorkSizeZ; \ + cb_data.args.hipExtModuleLaunchKernel.localWorkSizeX = (unsigned int)localWorkSizeX; \ + cb_data.args.hipExtModuleLaunchKernel.localWorkSizeY = (unsigned int)localWorkSizeY; \ + cb_data.args.hipExtModuleLaunchKernel.localWorkSizeZ = (unsigned int)localWorkSizeZ; \ + cb_data.args.hipExtModuleLaunchKernel.sharedMemBytes = (size_t)sharedMemBytes; \ + cb_data.args.hipExtModuleLaunchKernel.hStream = (hipStream_t)hStream; \ + cb_data.args.hipExtModuleLaunchKernel.kernelParams = (void**)kernelParams; \ + cb_data.args.hipExtModuleLaunchKernel.extra = (void**)extra; \ + cb_data.args.hipExtModuleLaunchKernel.startEvent = (hipEvent_t)startEvent; \ + cb_data.args.hipExtModuleLaunchKernel.stopEvent = (hipEvent_t)stopEvent; \ + cb_data.args.hipExtModuleLaunchKernel.flags = (unsigned int)flags; \ +}; +// hipExtStreamCreateWithCUMask[('hipStream_t*', 'stream'), ('unsigned int', 'cuMaskSize'), ('const unsigned int*', 'cuMask')] +#define INIT_hipExtStreamCreateWithCUMask_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExtStreamCreateWithCUMask.stream = (hipStream_t*)stream; \ + cb_data.args.hipExtStreamCreateWithCUMask.cuMaskSize = (unsigned int)cuMaskSize; \ + cb_data.args.hipExtStreamCreateWithCUMask.cuMask = (const unsigned int*)cuMask; \ +}; +// hipExtStreamGetCUMask[('hipStream_t', 'stream'), ('unsigned int', 'cuMaskSize'), ('unsigned int*', 'cuMask')] +#define INIT_hipExtStreamGetCUMask_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExtStreamGetCUMask.stream = (hipStream_t)stream; \ + cb_data.args.hipExtStreamGetCUMask.cuMaskSize = (unsigned int)cuMaskSize; \ + cb_data.args.hipExtStreamGetCUMask.cuMask = (unsigned int*)cuMask; \ +}; +// hipExternalMemoryGetMappedBuffer[('void**', 'devPtr'), ('hipExternalMemory_t', 'extMem'), ('const hipExternalMemoryBufferDesc*', 'bufferDesc')] +#define INIT_hipExternalMemoryGetMappedBuffer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExternalMemoryGetMappedBuffer.devPtr = (void**)devPtr; \ + cb_data.args.hipExternalMemoryGetMappedBuffer.extMem = (hipExternalMemory_t)extMem; \ + cb_data.args.hipExternalMemoryGetMappedBuffer.bufferDesc = (const hipExternalMemoryBufferDesc*)bufferDesc; \ +}; +// hipExternalMemoryGetMappedMipmappedArray[('hipMipmappedArray_t*', 'mipmap'), ('hipExternalMemory_t', 'extMem'), ('const hipExternalMemoryMipmappedArrayDesc*', 'mipmapDesc')] +#define INIT_hipExternalMemoryGetMappedMipmappedArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExternalMemoryGetMappedMipmappedArray.mipmap = (hipMipmappedArray_t*)mipmap; \ + cb_data.args.hipExternalMemoryGetMappedMipmappedArray.extMem = (hipExternalMemory_t)extMem; \ + cb_data.args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc = (const hipExternalMemoryMipmappedArrayDesc*)mipmapDesc; \ +}; +// hipFree[('void*', 'ptr')] +#define INIT_hipFree_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFree.ptr = (void*)ptr; \ +}; +// hipFreeArray[('hipArray_t', 'array')] +#define INIT_hipFreeArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFreeArray.array = (hipArray_t)array; \ +}; +// hipFreeAsync[('void*', 'dev_ptr'), ('hipStream_t', 'stream')] +#define INIT_hipFreeAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFreeAsync.dev_ptr = (void*)dev_ptr; \ + cb_data.args.hipFreeAsync.stream = (hipStream_t)stream; \ +}; +// hipFreeHost[('void*', 'ptr')] +#define INIT_hipFreeHost_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFreeHost.ptr = (void*)ptr; \ +}; +// hipFreeMipmappedArray[('hipMipmappedArray_t', 'mipmappedArray')] +#define INIT_hipFreeMipmappedArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFreeMipmappedArray.mipmappedArray = (hipMipmappedArray_t)mipmappedArray; \ +}; +// hipFuncGetAttribute[('int*', 'value'), ('hipFunction_attribute', 'attrib'), ('hipFunction_t', 'hfunc')] +#define INIT_hipFuncGetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFuncGetAttribute.value = (int*)value; \ + cb_data.args.hipFuncGetAttribute.attrib = (hipFunction_attribute)attrib; \ + cb_data.args.hipFuncGetAttribute.hfunc = (hipFunction_t)hfunc; \ +}; +// hipFuncGetAttributes[('hipFuncAttributes*', 'attr'), ('const void*', 'func')] +#define INIT_hipFuncGetAttributes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFuncGetAttributes.attr = (hipFuncAttributes*)attr; \ + cb_data.args.hipFuncGetAttributes.func = (const void*)func; \ +}; +// hipFuncSetAttribute[('const void*', 'func'), ('hipFuncAttribute', 'attr'), ('int', 'value')] +#define INIT_hipFuncSetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFuncSetAttribute.func = (const void*)func; \ + cb_data.args.hipFuncSetAttribute.attr = (hipFuncAttribute)attr; \ + cb_data.args.hipFuncSetAttribute.value = (int)value; \ +}; +// hipFuncSetCacheConfig[('const void*', 'func'), ('hipFuncCache_t', 'config')] +#define INIT_hipFuncSetCacheConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFuncSetCacheConfig.func = (const void*)func; \ + cb_data.args.hipFuncSetCacheConfig.config = (hipFuncCache_t)cacheConfig; \ +}; +// hipFuncSetSharedMemConfig[('const void*', 'func'), ('hipSharedMemConfig', 'config')] +#define INIT_hipFuncSetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFuncSetSharedMemConfig.func = (const void*)func; \ + cb_data.args.hipFuncSetSharedMemConfig.config = (hipSharedMemConfig)config; \ +}; +// hipGLGetDevices[('unsigned int*', 'pHipDeviceCount'), ('int*', 'pHipDevices'), ('unsigned int', 'hipDeviceCount'), ('hipGLDeviceList', 'deviceList')] +#define INIT_hipGLGetDevices_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGLGetDevices.pHipDeviceCount = (unsigned int*)pHipDeviceCount; \ + cb_data.args.hipGLGetDevices.pHipDevices = (int*)pHipDevices; \ + cb_data.args.hipGLGetDevices.hipDeviceCount = (unsigned int)hipDeviceCount; \ + cb_data.args.hipGLGetDevices.deviceList = (hipGLDeviceList)deviceList; \ +}; +// hipGetChannelDesc[('hipChannelFormatDesc*', 'desc'), ('hipArray_const_t', 'array')] +#define INIT_hipGetChannelDesc_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetChannelDesc.desc = (hipChannelFormatDesc*)desc; \ + cb_data.args.hipGetChannelDesc.array = (hipArray_const_t)array; \ +}; +// hipGetDevice[('int*', 'deviceId')] +#define INIT_hipGetDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetDevice.deviceId = (int*)deviceId; \ +}; +// hipGetDeviceCount[('int*', 'count')] +#define INIT_hipGetDeviceCount_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetDeviceCount.count = (int*)count; \ +}; +// hipGetDeviceFlags[('unsigned int*', 'flags')] +#define INIT_hipGetDeviceFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetDeviceFlags.flags = (unsigned int*)flags; \ +}; +// hipGetDevicePropertiesR0000[('hipDeviceProp_tR0000*', 'prop'), ('int', 'device')] +#define INIT_hipGetDevicePropertiesR0000_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetDevicePropertiesR0000.prop = (hipDeviceProp_tR0000*)prop; \ + cb_data.args.hipGetDevicePropertiesR0000.device = (int)device; \ +}; +// hipGetDevicePropertiesR0600[('hipDeviceProp_tR0600*', 'prop'), ('int', 'deviceId')] +#define INIT_hipGetDevicePropertiesR0600_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetDevicePropertiesR0600.prop = (hipDeviceProp_tR0600*)prop; \ + cb_data.args.hipGetDevicePropertiesR0600.deviceId = (int)device; \ +}; +// hipGetDriverEntryPoint[('const char*', 'symbol'), ('void**', 'funcPtr'), ('unsigned long long', 'flags'), ('hipDriverEntryPointQueryResult*', 'driverStatus')] +#define INIT_hipGetDriverEntryPoint_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetDriverEntryPoint.symbol = (symbol) ? strdup(symbol) : NULL; \ + cb_data.args.hipGetDriverEntryPoint.funcPtr = (void**)funcPtr; \ + cb_data.args.hipGetDriverEntryPoint.flags = (unsigned long long)flags; \ + cb_data.args.hipGetDriverEntryPoint.driverStatus = (hipDriverEntryPointQueryResult*)status; \ +}; +// hipGetFuncBySymbol[('hipFunction_t*', 'functionPtr'), ('const void*', 'symbolPtr')] +#define INIT_hipGetFuncBySymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetFuncBySymbol.functionPtr = (hipFunction_t*)functionPtr; \ + cb_data.args.hipGetFuncBySymbol.symbolPtr = (const void*)symbolPtr; \ +}; +// hipGetLastError[] +#define INIT_hipGetLastError_CB_ARGS_DATA(cb_data) { \ +}; +// hipGetMipmappedArrayLevel[('hipArray_t*', 'levelArray'), ('hipMipmappedArray_const_t', 'mipmappedArray'), ('unsigned int', 'level')] +#define INIT_hipGetMipmappedArrayLevel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetMipmappedArrayLevel.levelArray = (hipArray_t*)levelArray; \ + cb_data.args.hipGetMipmappedArrayLevel.mipmappedArray = (hipMipmappedArray_const_t)mipmappedArray; \ + cb_data.args.hipGetMipmappedArrayLevel.level = (unsigned int)level; \ +}; +// hipGetProcAddress[('const char*', 'symbol'), ('void**', 'pfn'), ('int', 'hipVersion'), ('uint64_t', 'flags'), ('hipDriverProcAddressQueryResult*', 'symbolStatus')] +#define INIT_hipGetProcAddress_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetProcAddress.symbol = (symbol) ? strdup(symbol) : NULL; \ + cb_data.args.hipGetProcAddress.pfn = (void**)pfn; \ + cb_data.args.hipGetProcAddress.hipVersion = (int)hipVersion; \ + cb_data.args.hipGetProcAddress.flags = (uint64_t)flags; \ + cb_data.args.hipGetProcAddress.symbolStatus = (hipDriverProcAddressQueryResult*)symbolStatus; \ +}; +// hipGetSymbolAddress[('void**', 'devPtr'), ('const void*', 'symbol')] +#define INIT_hipGetSymbolAddress_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetSymbolAddress.devPtr = (void**)devPtr; \ + cb_data.args.hipGetSymbolAddress.symbol = (const void*)symbol; \ +}; +// hipGetSymbolSize[('size_t*', 'size'), ('const void*', 'symbol')] +#define INIT_hipGetSymbolSize_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetSymbolSize.size = (size_t*)sizePtr; \ + cb_data.args.hipGetSymbolSize.symbol = (const void*)symbol; \ +}; +// hipGraphAddBatchMemOpNode[('hipGraphNode_t*', 'phGraphNode'), ('hipGraph_t', 'hGraph'), ('const hipGraphNode_t*', 'dependencies'), ('size_t', 'numDependencies'), ('const hipBatchMemOpNodeParams*', 'nodeParams')] +#define INIT_hipGraphAddBatchMemOpNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddBatchMemOpNode.phGraphNode = (hipGraphNode_t*)phGraphNode; \ + cb_data.args.hipGraphAddBatchMemOpNode.hGraph = (hipGraph_t)hGraph; \ + cb_data.args.hipGraphAddBatchMemOpNode.dependencies = (const hipGraphNode_t*)dependencies; \ + cb_data.args.hipGraphAddBatchMemOpNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddBatchMemOpNode.nodeParams = (const hipBatchMemOpNodeParams*)nodeParams; \ +}; +// hipGraphAddChildGraphNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipGraph_t', 'childGraph')] +#define INIT_hipGraphAddChildGraphNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddChildGraphNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddChildGraphNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddChildGraphNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddChildGraphNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddChildGraphNode.childGraph = (hipGraph_t)childGraph; \ +}; +// hipGraphAddDependencies[('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'from'), ('const hipGraphNode_t*', 'to'), ('size_t', 'numDependencies')] +#define INIT_hipGraphAddDependencies_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddDependencies.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddDependencies.from = (const hipGraphNode_t*)from; \ + cb_data.args.hipGraphAddDependencies.to = (const hipGraphNode_t*)to; \ + cb_data.args.hipGraphAddDependencies.numDependencies = (size_t)numDependencies; \ +}; +// hipGraphAddEmptyNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies')] +#define INIT_hipGraphAddEmptyNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddEmptyNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddEmptyNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddEmptyNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddEmptyNode.numDependencies = (size_t)numDependencies; \ +}; +// hipGraphAddEventRecordNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipEvent_t', 'event')] +#define INIT_hipGraphAddEventRecordNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddEventRecordNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddEventRecordNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddEventRecordNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddEventRecordNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddEventRecordNode.event = (hipEvent_t)event; \ +}; +// hipGraphAddEventWaitNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipEvent_t', 'event')] +#define INIT_hipGraphAddEventWaitNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddEventWaitNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddEventWaitNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddEventWaitNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddEventWaitNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddEventWaitNode.event = (hipEvent_t)event; \ +}; +// hipGraphAddExternalSemaphoresSignalNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipExternalSemaphoreSignalNodeParams*', 'nodeParams')] +#define INIT_hipGraphAddExternalSemaphoresSignalNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddExternalSemaphoresSignalNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddExternalSemaphoresSignalNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddExternalSemaphoresSignalNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddExternalSemaphoresSignalNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddExternalSemaphoresSignalNode.nodeParams = (const hipExternalSemaphoreSignalNodeParams*)nodeParams; \ +}; +// hipGraphAddExternalSemaphoresWaitNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipExternalSemaphoreWaitNodeParams*', 'nodeParams')] +#define INIT_hipGraphAddExternalSemaphoresWaitNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddExternalSemaphoresWaitNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddExternalSemaphoresWaitNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddExternalSemaphoresWaitNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddExternalSemaphoresWaitNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddExternalSemaphoresWaitNode.nodeParams = (const hipExternalSemaphoreWaitNodeParams*)nodeParams; \ +}; +// hipGraphAddHostNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipHostNodeParams*', 'pNodeParams')] +#define INIT_hipGraphAddHostNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddHostNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddHostNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddHostNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddHostNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddHostNode.pNodeParams = (const hipHostNodeParams*)pNodeParams; \ +}; +// hipGraphAddKernelNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipKernelNodeParams*', 'pNodeParams')] +#define INIT_hipGraphAddKernelNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddKernelNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddKernelNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddKernelNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddKernelNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddKernelNode.pNodeParams = (const hipKernelNodeParams*)pNodeParams; \ +}; +// hipGraphAddMemAllocNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipMemAllocNodeParams*', 'pNodeParams')] +#define INIT_hipGraphAddMemAllocNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddMemAllocNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddMemAllocNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddMemAllocNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddMemAllocNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddMemAllocNode.pNodeParams = (hipMemAllocNodeParams*)pNodeParams; \ +}; +// hipGraphAddMemFreeNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('void*', 'dev_ptr')] +#define INIT_hipGraphAddMemFreeNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddMemFreeNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddMemFreeNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddMemFreeNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddMemFreeNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddMemFreeNode.dev_ptr = (void*)dev_ptr; \ +}; +// hipGraphAddMemcpyNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipMemcpy3DParms*', 'pCopyParams')] +#define INIT_hipGraphAddMemcpyNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddMemcpyNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddMemcpyNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddMemcpyNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddMemcpyNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddMemcpyNode.pCopyParams = (const hipMemcpy3DParms*)pCopyParams; \ +}; +// hipGraphAddMemcpyNode1D[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('void*', 'dst'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphAddMemcpyNode1D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddMemcpyNode1D.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddMemcpyNode1D.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddMemcpyNode1D.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddMemcpyNode1D.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddMemcpyNode1D.dst = (void*)dst; \ + cb_data.args.hipGraphAddMemcpyNode1D.src = (const void*)src; \ + cb_data.args.hipGraphAddMemcpyNode1D.count = (size_t)count; \ + cb_data.args.hipGraphAddMemcpyNode1D.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphAddMemcpyNodeFromSymbol[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphAddMemcpyNodeFromSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.dst = (void*)dst; \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.symbol = (const void*)symbol; \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.count = (size_t)count; \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.offset = (size_t)offset; \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphAddMemcpyNodeToSymbol[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphAddMemcpyNodeToSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.symbol = (const void*)symbol; \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.src = (const void*)src; \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.count = (size_t)count; \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.offset = (size_t)offset; \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphAddMemsetNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipMemsetParams*', 'pMemsetParams')] +#define INIT_hipGraphAddMemsetNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddMemsetNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddMemsetNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddMemsetNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddMemsetNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddMemsetNode.pMemsetParams = (const hipMemsetParams*)pMemsetParams; \ +}; +// hipGraphAddNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipGraphNodeParams*', 'nodeParams')] +#define INIT_hipGraphAddNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddNode.nodeParams = (hipGraphNodeParams*)nodeParams; \ +}; +// hipGraphBatchMemOpNodeGetParams[('hipGraphNode_t', 'hNode'), ('hipBatchMemOpNodeParams*', 'nodeParams_out')] +#define INIT_hipGraphBatchMemOpNodeGetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphBatchMemOpNodeGetParams.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphBatchMemOpNodeGetParams.nodeParams_out = (hipBatchMemOpNodeParams*)nodeParams_out; \ +}; +// hipGraphBatchMemOpNodeSetParams[('hipGraphNode_t', 'hNode'), ('hipBatchMemOpNodeParams*', 'nodeParams')] +#define INIT_hipGraphBatchMemOpNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphBatchMemOpNodeSetParams.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphBatchMemOpNodeSetParams.nodeParams = (hipBatchMemOpNodeParams*)nodeParams; \ +}; +// hipGraphChildGraphNodeGetGraph[('hipGraphNode_t', 'node'), ('hipGraph_t*', 'pGraph')] +#define INIT_hipGraphChildGraphNodeGetGraph_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphChildGraphNodeGetGraph.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphChildGraphNodeGetGraph.pGraph = (hipGraph_t*)pGraph; \ +}; +// hipGraphClone[('hipGraph_t*', 'pGraphClone'), ('hipGraph_t', 'originalGraph')] +#define INIT_hipGraphClone_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphClone.pGraphClone = (hipGraph_t*)pGraphClone; \ + cb_data.args.hipGraphClone.originalGraph = (hipGraph_t)originalGraph; \ +}; +// hipGraphCreate[('hipGraph_t*', 'pGraph'), ('unsigned int', 'flags')] +#define INIT_hipGraphCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphCreate.pGraph = (hipGraph_t*)pGraph; \ + cb_data.args.hipGraphCreate.flags = (unsigned int)flags; \ +}; +// hipGraphDebugDotPrint[('hipGraph_t', 'graph'), ('const char*', 'path'), ('unsigned int', 'flags')] +#define INIT_hipGraphDebugDotPrint_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphDebugDotPrint.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphDebugDotPrint.path = (path) ? strdup(path) : NULL; \ + cb_data.args.hipGraphDebugDotPrint.flags = (unsigned int)flags; \ +}; +// hipGraphDestroy[('hipGraph_t', 'graph')] +#define INIT_hipGraphDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphDestroy.graph = (hipGraph_t)graph; \ +}; +// hipGraphDestroyNode[('hipGraphNode_t', 'node')] +#define INIT_hipGraphDestroyNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphDestroyNode.node = (hipGraphNode_t)node; \ +}; +// hipGraphEventRecordNodeGetEvent[('hipGraphNode_t', 'node'), ('hipEvent_t*', 'event_out')] +#define INIT_hipGraphEventRecordNodeGetEvent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphEventRecordNodeGetEvent.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphEventRecordNodeGetEvent.event_out = (hipEvent_t*)event_out; \ +}; +// hipGraphEventRecordNodeSetEvent[('hipGraphNode_t', 'node'), ('hipEvent_t', 'event')] +#define INIT_hipGraphEventRecordNodeSetEvent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphEventRecordNodeSetEvent.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphEventRecordNodeSetEvent.event = (hipEvent_t)event; \ +}; +// hipGraphEventWaitNodeGetEvent[('hipGraphNode_t', 'node'), ('hipEvent_t*', 'event_out')] +#define INIT_hipGraphEventWaitNodeGetEvent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphEventWaitNodeGetEvent.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphEventWaitNodeGetEvent.event_out = (hipEvent_t*)event_out; \ +}; +// hipGraphEventWaitNodeSetEvent[('hipGraphNode_t', 'node'), ('hipEvent_t', 'event')] +#define INIT_hipGraphEventWaitNodeSetEvent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphEventWaitNodeSetEvent.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphEventWaitNodeSetEvent.event = (hipEvent_t)event; \ +}; +// hipGraphExecBatchMemOpNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('const hipBatchMemOpNodeParams*', 'nodeParams')] +#define INIT_hipGraphExecBatchMemOpNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecBatchMemOpNodeSetParams.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecBatchMemOpNodeSetParams.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphExecBatchMemOpNodeSetParams.nodeParams = (const hipBatchMemOpNodeParams*)nodeParams; \ +}; +// hipGraphExecChildGraphNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('hipGraph_t', 'childGraph')] +#define INIT_hipGraphExecChildGraphNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecChildGraphNodeSetParams.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecChildGraphNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecChildGraphNodeSetParams.childGraph = (hipGraph_t)childGraph; \ +}; +// hipGraphExecDestroy[('hipGraphExec_t', 'graphExec')] +#define INIT_hipGraphExecDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecDestroy.graphExec = (hipGraphExec_t)pGraphExec; \ +}; +// hipGraphExecEventRecordNodeSetEvent[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('hipEvent_t', 'event')] +#define INIT_hipGraphExecEventRecordNodeSetEvent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecEventRecordNodeSetEvent.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecEventRecordNodeSetEvent.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphExecEventRecordNodeSetEvent.event = (hipEvent_t)event; \ +}; +// hipGraphExecEventWaitNodeSetEvent[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('hipEvent_t', 'event')] +#define INIT_hipGraphExecEventWaitNodeSetEvent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecEventWaitNodeSetEvent.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecEventWaitNodeSetEvent.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphExecEventWaitNodeSetEvent.event = (hipEvent_t)event; \ +}; +// hipGraphExecExternalSemaphoresSignalNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('const hipExternalSemaphoreSignalNodeParams*', 'nodeParams')] +#define INIT_hipGraphExecExternalSemaphoresSignalNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecExternalSemaphoresSignalNodeSetParams.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecExternalSemaphoresSignalNodeSetParams.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphExecExternalSemaphoresSignalNodeSetParams.nodeParams = (const hipExternalSemaphoreSignalNodeParams*)nodeParams; \ +}; +// hipGraphExecExternalSemaphoresWaitNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('const hipExternalSemaphoreWaitNodeParams*', 'nodeParams')] +#define INIT_hipGraphExecExternalSemaphoresWaitNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecExternalSemaphoresWaitNodeSetParams.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecExternalSemaphoresWaitNodeSetParams.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphExecExternalSemaphoresWaitNodeSetParams.nodeParams = (const hipExternalSemaphoreWaitNodeParams*)nodeParams; \ +}; +// hipGraphExecGetFlags[('hipGraphExec_t', 'graphExec'), ('unsigned long long*', 'flags')] +#define INIT_hipGraphExecGetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecGetFlags.graphExec = (hipGraphExec_t)graphExec; \ + cb_data.args.hipGraphExecGetFlags.flags = (unsigned long long*)flags; \ +}; +// hipGraphExecHostNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const hipHostNodeParams*', 'pNodeParams')] +#define INIT_hipGraphExecHostNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecHostNodeSetParams.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecHostNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecHostNodeSetParams.pNodeParams = (const hipHostNodeParams*)pNodeParams; \ +}; +// hipGraphExecKernelNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const hipKernelNodeParams*', 'pNodeParams')] +#define INIT_hipGraphExecKernelNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecKernelNodeSetParams.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecKernelNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecKernelNodeSetParams.pNodeParams = (const hipKernelNodeParams*)pNodeParams; \ +}; +// hipGraphExecMemcpyNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('hipMemcpy3DParms*', 'pNodeParams')] +#define INIT_hipGraphExecMemcpyNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecMemcpyNodeSetParams.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecMemcpyNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecMemcpyNodeSetParams.pNodeParams = (hipMemcpy3DParms*)pNodeParams; \ +}; +// hipGraphExecMemcpyNodeSetParams1D[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('void*', 'dst'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphExecMemcpyNodeSetParams1D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecMemcpyNodeSetParams1D.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecMemcpyNodeSetParams1D.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecMemcpyNodeSetParams1D.dst = (void*)dst; \ + cb_data.args.hipGraphExecMemcpyNodeSetParams1D.src = (const void*)src; \ + cb_data.args.hipGraphExecMemcpyNodeSetParams1D.count = (size_t)count; \ + cb_data.args.hipGraphExecMemcpyNodeSetParams1D.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphExecMemcpyNodeSetParamsFromSymbol[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphExecMemcpyNodeSetParamsFromSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsFromSymbol.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsFromSymbol.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsFromSymbol.dst = (void*)dst; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsFromSymbol.symbol = (const void*)symbol; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsFromSymbol.count = (size_t)count; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsFromSymbol.offset = (size_t)offset; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsFromSymbol.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphExecMemcpyNodeSetParamsToSymbol[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphExecMemcpyNodeSetParamsToSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsToSymbol.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsToSymbol.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsToSymbol.symbol = (const void*)symbol; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsToSymbol.src = (const void*)src; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsToSymbol.count = (size_t)count; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsToSymbol.offset = (size_t)offset; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsToSymbol.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphExecMemsetNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const hipMemsetParams*', 'pNodeParams')] +#define INIT_hipGraphExecMemsetNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecMemsetNodeSetParams.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecMemsetNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecMemsetNodeSetParams.pNodeParams = (const hipMemsetParams*)pNodeParams; \ +}; +// hipGraphExecNodeSetParams[('hipGraphExec_t', 'graphExec'), ('hipGraphNode_t', 'node'), ('hipGraphNodeParams*', 'nodeParams')] +#define INIT_hipGraphExecNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecNodeSetParams.graphExec = (hipGraphExec_t)graphExec; \ + cb_data.args.hipGraphExecNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecNodeSetParams.nodeParams = (hipGraphNodeParams*)nodeParams; \ +}; +// hipGraphExecUpdate[('hipGraphExec_t', 'hGraphExec'), ('hipGraph_t', 'hGraph'), ('hipGraphNode_t*', 'hErrorNode_out'), ('hipGraphExecUpdateResult*', 'updateResult_out')] +#define INIT_hipGraphExecUpdate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecUpdate.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecUpdate.hGraph = (hipGraph_t)hGraph; \ + cb_data.args.hipGraphExecUpdate.hErrorNode_out = (hipGraphNode_t*)hErrorNode_out; \ + cb_data.args.hipGraphExecUpdate.updateResult_out = (hipGraphExecUpdateResult*)updateResult_out; \ +}; +// hipGraphExternalSemaphoresSignalNodeGetParams[('hipGraphNode_t', 'hNode'), ('hipExternalSemaphoreSignalNodeParams*', 'params_out')] +#define INIT_hipGraphExternalSemaphoresSignalNodeGetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExternalSemaphoresSignalNodeGetParams.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphExternalSemaphoresSignalNodeGetParams.params_out = (hipExternalSemaphoreSignalNodeParams*)params_out; \ +}; +// hipGraphExternalSemaphoresSignalNodeSetParams[('hipGraphNode_t', 'hNode'), ('const hipExternalSemaphoreSignalNodeParams*', 'nodeParams')] +#define INIT_hipGraphExternalSemaphoresSignalNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExternalSemaphoresSignalNodeSetParams.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphExternalSemaphoresSignalNodeSetParams.nodeParams = (const hipExternalSemaphoreSignalNodeParams*)nodeParams; \ +}; +// hipGraphExternalSemaphoresWaitNodeGetParams[('hipGraphNode_t', 'hNode'), ('hipExternalSemaphoreWaitNodeParams*', 'params_out')] +#define INIT_hipGraphExternalSemaphoresWaitNodeGetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExternalSemaphoresWaitNodeGetParams.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphExternalSemaphoresWaitNodeGetParams.params_out = (hipExternalSemaphoreWaitNodeParams*)params_out; \ +}; +// hipGraphExternalSemaphoresWaitNodeSetParams[('hipGraphNode_t', 'hNode'), ('const hipExternalSemaphoreWaitNodeParams*', 'nodeParams')] +#define INIT_hipGraphExternalSemaphoresWaitNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExternalSemaphoresWaitNodeSetParams.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphExternalSemaphoresWaitNodeSetParams.nodeParams = (const hipExternalSemaphoreWaitNodeParams*)nodeParams; \ +}; +// hipGraphGetEdges[('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'from'), ('hipGraphNode_t*', 'to'), ('size_t*', 'numEdges')] +#define INIT_hipGraphGetEdges_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphGetEdges.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphGetEdges.from = (hipGraphNode_t*)from; \ + cb_data.args.hipGraphGetEdges.to = (hipGraphNode_t*)to; \ + cb_data.args.hipGraphGetEdges.numEdges = (size_t*)numEdges; \ +}; +// hipGraphGetNodes[('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'nodes'), ('size_t*', 'numNodes')] +#define INIT_hipGraphGetNodes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphGetNodes.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphGetNodes.nodes = (hipGraphNode_t*)nodes; \ + cb_data.args.hipGraphGetNodes.numNodes = (size_t*)numNodes; \ +}; +// hipGraphGetRootNodes[('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'pRootNodes'), ('size_t*', 'pNumRootNodes')] +#define INIT_hipGraphGetRootNodes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphGetRootNodes.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphGetRootNodes.pRootNodes = (hipGraphNode_t*)pRootNodes; \ + cb_data.args.hipGraphGetRootNodes.pNumRootNodes = (size_t*)pNumRootNodes; \ +}; +// hipGraphHostNodeGetParams[('hipGraphNode_t', 'node'), ('hipHostNodeParams*', 'pNodeParams')] +#define INIT_hipGraphHostNodeGetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphHostNodeGetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphHostNodeGetParams.pNodeParams = (hipHostNodeParams*)pNodeParams; \ +}; +// hipGraphHostNodeSetParams[('hipGraphNode_t', 'node'), ('const hipHostNodeParams*', 'pNodeParams')] +#define INIT_hipGraphHostNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphHostNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphHostNodeSetParams.pNodeParams = (const hipHostNodeParams*)pNodeParams; \ +}; +// hipGraphInstantiate[('hipGraphExec_t*', 'pGraphExec'), ('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'pErrorNode'), ('char*', 'pLogBuffer'), ('size_t', 'bufferSize')] +#define INIT_hipGraphInstantiate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphInstantiate.pGraphExec = (hipGraphExec_t*)pGraphExec; \ + cb_data.args.hipGraphInstantiate.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphInstantiate.pErrorNode = (hipGraphNode_t*)pErrorNode; \ + cb_data.args.hipGraphInstantiate.pLogBuffer = (char*)pLogBuffer; \ + cb_data.args.hipGraphInstantiate.bufferSize = (size_t)bufferSize; \ +}; +// hipGraphInstantiateWithFlags[('hipGraphExec_t*', 'pGraphExec'), ('hipGraph_t', 'graph'), ('unsigned long long', 'flags')] +#define INIT_hipGraphInstantiateWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphInstantiateWithFlags.pGraphExec = (hipGraphExec_t*)pGraphExec; \ + cb_data.args.hipGraphInstantiateWithFlags.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphInstantiateWithFlags.flags = (unsigned long long)flags; \ +}; +// hipGraphInstantiateWithParams[('hipGraphExec_t*', 'pGraphExec'), ('hipGraph_t', 'graph'), ('hipGraphInstantiateParams*', 'instantiateParams')] +#define INIT_hipGraphInstantiateWithParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphInstantiateWithParams.pGraphExec = (hipGraphExec_t*)pGraphExec; \ + cb_data.args.hipGraphInstantiateWithParams.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphInstantiateWithParams.instantiateParams = (hipGraphInstantiateParams*)instantiateParams; \ +}; +// hipGraphKernelNodeCopyAttributes[('hipGraphNode_t', 'hSrc'), ('hipGraphNode_t', 'hDst')] +#define INIT_hipGraphKernelNodeCopyAttributes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphKernelNodeCopyAttributes.hSrc = (hipGraphNode_t)hSrc; \ + cb_data.args.hipGraphKernelNodeCopyAttributes.hDst = (hipGraphNode_t)hDst; \ +}; +// hipGraphKernelNodeGetAttribute[('hipGraphNode_t', 'hNode'), ('hipLaunchAttributeID', 'attr'), ('hipLaunchAttributeValue*', 'value')] +#define INIT_hipGraphKernelNodeGetAttribute_CB_ARGS_DATA(cb_data) { \ +}; +// hipGraphKernelNodeGetParams[('hipGraphNode_t', 'node'), ('hipKernelNodeParams*', 'pNodeParams')] +#define INIT_hipGraphKernelNodeGetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphKernelNodeGetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphKernelNodeGetParams.pNodeParams = (hipKernelNodeParams*)pNodeParams; \ +}; +// hipGraphKernelNodeSetAttribute[('hipGraphNode_t', 'hNode'), ('hipLaunchAttributeID', 'attr'), ('const hipLaunchAttributeValue*', 'value')] +#define INIT_hipGraphKernelNodeSetAttribute_CB_ARGS_DATA(cb_data) { \ +}; +// hipGraphKernelNodeSetParams[('hipGraphNode_t', 'node'), ('const hipKernelNodeParams*', 'pNodeParams')] +#define INIT_hipGraphKernelNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphKernelNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphKernelNodeSetParams.pNodeParams = (const hipKernelNodeParams*)pNodeParams; \ +}; +// hipGraphLaunch[('hipGraphExec_t', 'graphExec'), ('hipStream_t', 'stream')] +#define INIT_hipGraphLaunch_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphLaunch.graphExec = (hipGraphExec_t)graphExec; \ + cb_data.args.hipGraphLaunch.stream = (hipStream_t)stream; \ +}; +// hipGraphMemAllocNodeGetParams[('hipGraphNode_t', 'node'), ('hipMemAllocNodeParams*', 'pNodeParams')] +#define INIT_hipGraphMemAllocNodeGetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemAllocNodeGetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemAllocNodeGetParams.pNodeParams = (hipMemAllocNodeParams*)pNodeParams; \ +}; +// hipGraphMemFreeNodeGetParams[('hipGraphNode_t', 'node'), ('void*', 'dev_ptr')] +#define INIT_hipGraphMemFreeNodeGetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemFreeNodeGetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemFreeNodeGetParams.dev_ptr = (void*)dev_ptr; \ +}; +// hipGraphMemcpyNodeGetParams[('hipGraphNode_t', 'node'), ('hipMemcpy3DParms*', 'pNodeParams')] +#define INIT_hipGraphMemcpyNodeGetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemcpyNodeGetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemcpyNodeGetParams.pNodeParams = (hipMemcpy3DParms*)pNodeParams; \ +}; +// hipGraphMemcpyNodeSetParams[('hipGraphNode_t', 'node'), ('const hipMemcpy3DParms*', 'pNodeParams')] +#define INIT_hipGraphMemcpyNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemcpyNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemcpyNodeSetParams.pNodeParams = (const hipMemcpy3DParms*)pNodeParams; \ +}; +// hipGraphMemcpyNodeSetParams1D[('hipGraphNode_t', 'node'), ('void*', 'dst'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphMemcpyNodeSetParams1D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemcpyNodeSetParams1D.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemcpyNodeSetParams1D.dst = (void*)dst; \ + cb_data.args.hipGraphMemcpyNodeSetParams1D.src = (const void*)src; \ + cb_data.args.hipGraphMemcpyNodeSetParams1D.count = (size_t)count; \ + cb_data.args.hipGraphMemcpyNodeSetParams1D.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphMemcpyNodeSetParamsFromSymbol[('hipGraphNode_t', 'node'), ('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphMemcpyNodeSetParamsFromSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemcpyNodeSetParamsFromSymbol.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemcpyNodeSetParamsFromSymbol.dst = (void*)dst; \ + cb_data.args.hipGraphMemcpyNodeSetParamsFromSymbol.symbol = (const void*)symbol; \ + cb_data.args.hipGraphMemcpyNodeSetParamsFromSymbol.count = (size_t)count; \ + cb_data.args.hipGraphMemcpyNodeSetParamsFromSymbol.offset = (size_t)offset; \ + cb_data.args.hipGraphMemcpyNodeSetParamsFromSymbol.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphMemcpyNodeSetParamsToSymbol[('hipGraphNode_t', 'node'), ('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphMemcpyNodeSetParamsToSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemcpyNodeSetParamsToSymbol.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemcpyNodeSetParamsToSymbol.symbol = (const void*)symbol; \ + cb_data.args.hipGraphMemcpyNodeSetParamsToSymbol.src = (const void*)src; \ + cb_data.args.hipGraphMemcpyNodeSetParamsToSymbol.count = (size_t)count; \ + cb_data.args.hipGraphMemcpyNodeSetParamsToSymbol.offset = (size_t)offset; \ + cb_data.args.hipGraphMemcpyNodeSetParamsToSymbol.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphMemsetNodeGetParams[('hipGraphNode_t', 'node'), ('hipMemsetParams*', 'pNodeParams')] +#define INIT_hipGraphMemsetNodeGetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemsetNodeGetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemsetNodeGetParams.pNodeParams = (hipMemsetParams*)pNodeParams; \ +}; +// hipGraphMemsetNodeSetParams[('hipGraphNode_t', 'node'), ('const hipMemsetParams*', 'pNodeParams')] +#define INIT_hipGraphMemsetNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemsetNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemsetNodeSetParams.pNodeParams = (const hipMemsetParams*)pNodeParams; \ +}; +// hipGraphNodeFindInClone[('hipGraphNode_t*', 'pNode'), ('hipGraphNode_t', 'originalNode'), ('hipGraph_t', 'clonedGraph')] +#define INIT_hipGraphNodeFindInClone_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphNodeFindInClone.pNode = (hipGraphNode_t*)pNode; \ + cb_data.args.hipGraphNodeFindInClone.originalNode = (hipGraphNode_t)originalNode; \ + cb_data.args.hipGraphNodeFindInClone.clonedGraph = (hipGraph_t)clonedGraph; \ +}; +// hipGraphNodeGetDependencies[('hipGraphNode_t', 'node'), ('hipGraphNode_t*', 'pDependencies'), ('size_t*', 'pNumDependencies')] +#define INIT_hipGraphNodeGetDependencies_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphNodeGetDependencies.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphNodeGetDependencies.pDependencies = (hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphNodeGetDependencies.pNumDependencies = (size_t*)pNumDependencies; \ +}; +// hipGraphNodeGetDependentNodes[('hipGraphNode_t', 'node'), ('hipGraphNode_t*', 'pDependentNodes'), ('size_t*', 'pNumDependentNodes')] +#define INIT_hipGraphNodeGetDependentNodes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphNodeGetDependentNodes.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphNodeGetDependentNodes.pDependentNodes = (hipGraphNode_t*)pDependentNodes; \ + cb_data.args.hipGraphNodeGetDependentNodes.pNumDependentNodes = (size_t*)pNumDependentNodes; \ +}; +// hipGraphNodeGetEnabled[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('unsigned int*', 'isEnabled')] +#define INIT_hipGraphNodeGetEnabled_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphNodeGetEnabled.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphNodeGetEnabled.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphNodeGetEnabled.isEnabled = (unsigned int*)isEnabled; \ +}; +// hipGraphNodeGetType[('hipGraphNode_t', 'node'), ('hipGraphNodeType*', 'pType')] +#define INIT_hipGraphNodeGetType_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphNodeGetType.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphNodeGetType.pType = (hipGraphNodeType*)pType; \ +}; +// hipGraphNodeSetEnabled[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('unsigned int', 'isEnabled')] +#define INIT_hipGraphNodeSetEnabled_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphNodeSetEnabled.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphNodeSetEnabled.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphNodeSetEnabled.isEnabled = (unsigned int)isEnabled; \ +}; +// hipGraphNodeSetParams[('hipGraphNode_t', 'node'), ('hipGraphNodeParams*', 'nodeParams')] +#define INIT_hipGraphNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphNodeSetParams.nodeParams = (hipGraphNodeParams*)nodeParams; \ +}; +// hipGraphReleaseUserObject[('hipGraph_t', 'graph'), ('hipUserObject_t', 'object'), ('unsigned int', 'count')] +#define INIT_hipGraphReleaseUserObject_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphReleaseUserObject.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphReleaseUserObject.object = (hipUserObject_t)object; \ + cb_data.args.hipGraphReleaseUserObject.count = (unsigned int)count; \ +}; +// hipGraphRemoveDependencies[('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'from'), ('const hipGraphNode_t*', 'to'), ('size_t', 'numDependencies')] +#define INIT_hipGraphRemoveDependencies_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphRemoveDependencies.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphRemoveDependencies.from = (const hipGraphNode_t*)from; \ + cb_data.args.hipGraphRemoveDependencies.to = (const hipGraphNode_t*)to; \ + cb_data.args.hipGraphRemoveDependencies.numDependencies = (size_t)numDependencies; \ +}; +// hipGraphRetainUserObject[('hipGraph_t', 'graph'), ('hipUserObject_t', 'object'), ('unsigned int', 'count'), ('unsigned int', 'flags')] +#define INIT_hipGraphRetainUserObject_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphRetainUserObject.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphRetainUserObject.object = (hipUserObject_t)object; \ + cb_data.args.hipGraphRetainUserObject.count = (unsigned int)count; \ + cb_data.args.hipGraphRetainUserObject.flags = (unsigned int)flags; \ +}; +// hipGraphUpload[('hipGraphExec_t', 'graphExec'), ('hipStream_t', 'stream')] +#define INIT_hipGraphUpload_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphUpload.graphExec = (hipGraphExec_t)graphExec; \ + cb_data.args.hipGraphUpload.stream = (hipStream_t)stream; \ +}; +// hipGraphicsGLRegisterBuffer[('hipGraphicsResource**', 'resource'), ('GLuint', 'buffer'), ('unsigned int', 'flags')] +#define INIT_hipGraphicsGLRegisterBuffer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphicsGLRegisterBuffer.resource = (hipGraphicsResource**)resource; \ + cb_data.args.hipGraphicsGLRegisterBuffer.buffer = (GLuint)buffer; \ + cb_data.args.hipGraphicsGLRegisterBuffer.flags = (unsigned int)flags; \ +}; +// hipGraphicsGLRegisterImage[('hipGraphicsResource**', 'resource'), ('GLuint', 'image'), ('GLenum', 'target'), ('unsigned int', 'flags')] +#define INIT_hipGraphicsGLRegisterImage_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphicsGLRegisterImage.resource = (hipGraphicsResource**)resource; \ + cb_data.args.hipGraphicsGLRegisterImage.image = (GLuint)image; \ + cb_data.args.hipGraphicsGLRegisterImage.target = (GLenum)target; \ + cb_data.args.hipGraphicsGLRegisterImage.flags = (unsigned int)flags; \ +}; +// hipGraphicsMapResources[('int', 'count'), ('hipGraphicsResource_t*', 'resources'), ('hipStream_t', 'stream')] +#define INIT_hipGraphicsMapResources_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphicsMapResources.count = (int)count; \ + cb_data.args.hipGraphicsMapResources.resources = (hipGraphicsResource_t*)resources; \ + cb_data.args.hipGraphicsMapResources.stream = (hipStream_t)stream; \ +}; +// hipGraphicsResourceGetMappedPointer[('void**', 'devPtr'), ('size_t*', 'size'), ('hipGraphicsResource_t', 'resource')] +#define INIT_hipGraphicsResourceGetMappedPointer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphicsResourceGetMappedPointer.devPtr = (void**)devPtr; \ + cb_data.args.hipGraphicsResourceGetMappedPointer.size = (size_t*)size; \ + cb_data.args.hipGraphicsResourceGetMappedPointer.resource = (hipGraphicsResource_t)resource; \ +}; +// hipGraphicsSubResourceGetMappedArray[('hipArray_t*', 'array'), ('hipGraphicsResource_t', 'resource'), ('unsigned int', 'arrayIndex'), ('unsigned int', 'mipLevel')] +#define INIT_hipGraphicsSubResourceGetMappedArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphicsSubResourceGetMappedArray.array = (hipArray_t*)array; \ + cb_data.args.hipGraphicsSubResourceGetMappedArray.resource = (hipGraphicsResource_t)resource; \ + cb_data.args.hipGraphicsSubResourceGetMappedArray.arrayIndex = (unsigned int)arrayIndex; \ + cb_data.args.hipGraphicsSubResourceGetMappedArray.mipLevel = (unsigned int)mipLevel; \ +}; +// hipGraphicsUnmapResources[('int', 'count'), ('hipGraphicsResource_t*', 'resources'), ('hipStream_t', 'stream')] +#define INIT_hipGraphicsUnmapResources_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphicsUnmapResources.count = (int)count; \ + cb_data.args.hipGraphicsUnmapResources.resources = (hipGraphicsResource_t*)resources; \ + cb_data.args.hipGraphicsUnmapResources.stream = (hipStream_t)stream; \ +}; +// hipGraphicsUnregisterResource[('hipGraphicsResource_t', 'resource')] +#define INIT_hipGraphicsUnregisterResource_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphicsUnregisterResource.resource = (hipGraphicsResource_t)resource; \ +}; +// hipHccModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'globalWorkSizeX'), ('unsigned int', 'globalWorkSizeY'), ('unsigned int', 'globalWorkSizeZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'hStream'), ('void**', 'kernelParams'), ('void**', 'extra'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent')] +#define INIT_hipHccModuleLaunchKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHccModuleLaunchKernel.f = (hipFunction_t)f; \ + cb_data.args.hipHccModuleLaunchKernel.globalWorkSizeX = (unsigned int)globalWorkSizeX; \ + cb_data.args.hipHccModuleLaunchKernel.globalWorkSizeY = (unsigned int)globalWorkSizeY; \ + cb_data.args.hipHccModuleLaunchKernel.globalWorkSizeZ = (unsigned int)globalWorkSizeZ; \ + cb_data.args.hipHccModuleLaunchKernel.blockDimX = (unsigned int)blockDimX; \ + cb_data.args.hipHccModuleLaunchKernel.blockDimY = (unsigned int)blockDimY; \ + cb_data.args.hipHccModuleLaunchKernel.blockDimZ = (unsigned int)blockDimZ; \ + cb_data.args.hipHccModuleLaunchKernel.sharedMemBytes = (size_t)sharedMemBytes; \ + cb_data.args.hipHccModuleLaunchKernel.hStream = (hipStream_t)hStream; \ + cb_data.args.hipHccModuleLaunchKernel.kernelParams = (void**)kernelParams; \ + cb_data.args.hipHccModuleLaunchKernel.extra = (void**)extra; \ + cb_data.args.hipHccModuleLaunchKernel.startEvent = (hipEvent_t)startEvent; \ + cb_data.args.hipHccModuleLaunchKernel.stopEvent = (hipEvent_t)stopEvent; \ +}; +// hipHostAlloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')] +#define INIT_hipHostAlloc_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostAlloc.ptr = (void**)ptr; \ + cb_data.args.hipHostAlloc.size = (size_t)sizeBytes; \ + cb_data.args.hipHostAlloc.flags = (unsigned int)flags; \ +}; +// hipHostFree[('void*', 'ptr')] +#define INIT_hipHostFree_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostFree.ptr = (void*)ptr; \ +}; +// hipHostGetDevicePointer[('void**', 'devPtr'), ('void*', 'hstPtr'), ('unsigned int', 'flags')] +#define INIT_hipHostGetDevicePointer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostGetDevicePointer.devPtr = (void**)devicePointer; \ + cb_data.args.hipHostGetDevicePointer.hstPtr = (void*)hostPointer; \ + cb_data.args.hipHostGetDevicePointer.flags = (unsigned int)flags; \ +}; +// hipHostGetFlags[('unsigned int*', 'flagsPtr'), ('void*', 'hostPtr')] +#define INIT_hipHostGetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostGetFlags.flagsPtr = (unsigned int*)flagsPtr; \ + cb_data.args.hipHostGetFlags.hostPtr = (void*)hostPtr; \ +}; +// hipHostMalloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')] +#define INIT_hipHostMalloc_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostMalloc.ptr = (void**)ptr; \ + cb_data.args.hipHostMalloc.size = (size_t)sizeBytes; \ + cb_data.args.hipHostMalloc.flags = (unsigned int)flags; \ +}; +// hipHostRegister[('void*', 'hostPtr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')] +#define INIT_hipHostRegister_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostRegister.hostPtr = (void*)hostPtr; \ + cb_data.args.hipHostRegister.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipHostRegister.flags = (unsigned int)flags; \ +}; +// hipHostUnregister[('void*', 'hostPtr')] +#define INIT_hipHostUnregister_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostUnregister.hostPtr = (void*)hostPtr; \ +}; +// hipImportExternalMemory[('hipExternalMemory_t*', 'extMem_out'), ('const hipExternalMemoryHandleDesc*', 'memHandleDesc')] +#define INIT_hipImportExternalMemory_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipImportExternalMemory.extMem_out = (hipExternalMemory_t*)extMem_out; \ + cb_data.args.hipImportExternalMemory.memHandleDesc = (const hipExternalMemoryHandleDesc*)memHandleDesc; \ +}; +// hipImportExternalSemaphore[('hipExternalSemaphore_t*', 'extSem_out'), ('const hipExternalSemaphoreHandleDesc*', 'semHandleDesc')] +#define INIT_hipImportExternalSemaphore_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipImportExternalSemaphore.extSem_out = (hipExternalSemaphore_t*)extSem_out; \ + cb_data.args.hipImportExternalSemaphore.semHandleDesc = (const hipExternalSemaphoreHandleDesc*)semHandleDesc; \ +}; +// hipInit[('unsigned int', 'flags')] +#define INIT_hipInit_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipInit.flags = (unsigned int)flags; \ +}; +// hipIpcCloseMemHandle[('void*', 'devPtr')] +#define INIT_hipIpcCloseMemHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipIpcCloseMemHandle.devPtr = (void*)dev_ptr; \ +}; +// hipIpcGetEventHandle[('hipIpcEventHandle_t*', 'handle'), ('hipEvent_t', 'event')] +#define INIT_hipIpcGetEventHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipIpcGetEventHandle.handle = (hipIpcEventHandle_t*)handle; \ + cb_data.args.hipIpcGetEventHandle.event = (hipEvent_t)event; \ +}; +// hipIpcGetMemHandle[('hipIpcMemHandle_t*', 'handle'), ('void*', 'devPtr')] +#define INIT_hipIpcGetMemHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipIpcGetMemHandle.handle = (hipIpcMemHandle_t*)handle; \ + cb_data.args.hipIpcGetMemHandle.devPtr = (void*)dev_ptr; \ +}; +// hipIpcOpenEventHandle[('hipEvent_t*', 'event'), ('hipIpcEventHandle_t', 'handle')] +#define INIT_hipIpcOpenEventHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipIpcOpenEventHandle.event = (hipEvent_t*)event; \ + cb_data.args.hipIpcOpenEventHandle.handle = (hipIpcEventHandle_t)handle; \ +}; +// hipIpcOpenMemHandle[('void**', 'devPtr'), ('hipIpcMemHandle_t', 'handle'), ('unsigned int', 'flags')] +#define INIT_hipIpcOpenMemHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipIpcOpenMemHandle.devPtr = (void**)dev_ptr; \ + cb_data.args.hipIpcOpenMemHandle.handle = (hipIpcMemHandle_t)handle; \ + cb_data.args.hipIpcOpenMemHandle.flags = (unsigned int)flags; \ +}; +// hipLaunchByPtr[('const void*', 'hostFunction')] +#define INIT_hipLaunchByPtr_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLaunchByPtr.hostFunction = (const void*)hostFunction; \ +}; +// hipLaunchCooperativeKernel[('const void*', 'f'), ('dim3', 'gridDim'), ('dim3', 'blockDimX'), ('void**', 'kernelParams'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream')] +#define INIT_hipLaunchCooperativeKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLaunchCooperativeKernel.f = (const void*)f; \ + cb_data.args.hipLaunchCooperativeKernel.gridDim = (dim3)gridDim; \ + cb_data.args.hipLaunchCooperativeKernel.blockDimX = (dim3)blockDim; \ + cb_data.args.hipLaunchCooperativeKernel.kernelParams = (void**)kernelParams; \ + cb_data.args.hipLaunchCooperativeKernel.sharedMemBytes = (unsigned int)sharedMemBytes; \ + cb_data.args.hipLaunchCooperativeKernel.stream = (hipStream_t)hStream; \ +}; +// hipLaunchCooperativeKernelMultiDevice[('hipLaunchParams*', 'launchParamsList'), ('int', 'numDevices'), ('unsigned int', 'flags')] +#define INIT_hipLaunchCooperativeKernelMultiDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLaunchCooperativeKernelMultiDevice.launchParamsList = (hipLaunchParams*)launchParamsList; \ + cb_data.args.hipLaunchCooperativeKernelMultiDevice.numDevices = (int)numDevices; \ + cb_data.args.hipLaunchCooperativeKernelMultiDevice.flags = (unsigned int)flags; \ +}; +// hipLaunchHostFunc[('hipStream_t', 'stream'), ('hipHostFn_t', 'fn'), ('void*', 'userData')] +#define INIT_hipLaunchHostFunc_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLaunchHostFunc.stream = (hipStream_t)stream; \ + cb_data.args.hipLaunchHostFunc.fn = (hipHostFn_t)fn; \ + cb_data.args.hipLaunchHostFunc.userData = (void*)userData; \ +}; +// hipLaunchKernel[('const void*', 'function_address'), ('dim3', 'numBlocks'), ('dim3', 'dimBlocks'), ('void**', 'args'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'stream')] +#define INIT_hipLaunchKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLaunchKernel.function_address = (const void*)hostFunction; \ + cb_data.args.hipLaunchKernel.numBlocks = (dim3)gridDim; \ + cb_data.args.hipLaunchKernel.dimBlocks = (dim3)blockDim; \ + cb_data.args.hipLaunchKernel.args = (void**)args; \ + cb_data.args.hipLaunchKernel.sharedMemBytes = (size_t)sharedMemBytes; \ + cb_data.args.hipLaunchKernel.stream = (hipStream_t)stream; \ +}; +// hipLaunchKernelExC[('const hipLaunchConfig_t*', 'config'), ('const void*', 'fPtr'), ('void**', 'args')] +#define INIT_hipLaunchKernelExC_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLaunchKernelExC.config = (const hipLaunchConfig_t*)config; \ + cb_data.args.hipLaunchKernelExC.fPtr = (const void*)fPtr; \ + cb_data.args.hipLaunchKernelExC.args = (void**)args; \ +}; +// hipLibraryGetKernel[('hipKernel_t*', 'pKernel'), ('hipLibrary_t', 'library'), ('const char*', 'name')] +#define INIT_hipLibraryGetKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLibraryGetKernel.pKernel = (hipKernel_t*)kernel; \ + cb_data.args.hipLibraryGetKernel.library = (hipLibrary_t)library; \ + cb_data.args.hipLibraryGetKernel.name = (kname) ? strdup(kname) : NULL; \ +}; +// hipLibraryGetKernelCount[('unsigned int*', 'count'), ('hipLibrary_t', 'library')] +#define INIT_hipLibraryGetKernelCount_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLibraryGetKernelCount.count = (unsigned int*)count; \ + cb_data.args.hipLibraryGetKernelCount.library = (hipLibrary_t)library; \ +}; +// hipLibraryLoadData[('hipLibrary_t*', 'library'), ('const void*', 'code'), ('hipJitOption**', 'jitOptions'), ('void**', 'jitOptionsValues'), ('unsigned int', 'numJitOptions'), ('hipLibraryOption**', 'libraryOptions'), ('void**', 'libraryOptionValues'), ('unsigned int', 'numLibraryOptions')] +#define INIT_hipLibraryLoadData_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLibraryLoadData.library = (hipLibrary_t*)library; \ + cb_data.args.hipLibraryLoadData.code = (const void*)image; \ + cb_data.args.hipLibraryLoadData.jitOptions = (hipJitOption**)jitOptions; \ + cb_data.args.hipLibraryLoadData.jitOptionsValues = (void**)jitOptionsValues; \ + cb_data.args.hipLibraryLoadData.numJitOptions = (unsigned int)numJitOptions; \ + cb_data.args.hipLibraryLoadData.libraryOptions = (hipLibraryOption**)libraryOptions; \ + cb_data.args.hipLibraryLoadData.libraryOptionValues = (void**)libraryOptionValues; \ + cb_data.args.hipLibraryLoadData.numLibraryOptions = (unsigned int)numLibraryOptions; \ +}; +// hipLibraryLoadFromFile[('hipLibrary_t*', 'library'), ('const char*', 'fileName'), ('hipJitOption**', 'jitOptions'), ('void**', 'jitOptionsValues'), ('unsigned int', 'numJitOptions'), ('hipLibraryOption**', 'libraryOptions'), ('void**', 'libraryOptionValues'), ('unsigned int', 'numLibraryOptions')] +#define INIT_hipLibraryLoadFromFile_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLibraryLoadFromFile.library = (hipLibrary_t*)library; \ + cb_data.args.hipLibraryLoadFromFile.fileName = (fname) ? strdup(fname) : NULL; \ + cb_data.args.hipLibraryLoadFromFile.jitOptions = (hipJitOption**)jitOptions; \ + cb_data.args.hipLibraryLoadFromFile.jitOptionsValues = (void**)jitOptionsValues; \ + cb_data.args.hipLibraryLoadFromFile.numJitOptions = (unsigned int)numJitOptions; \ + cb_data.args.hipLibraryLoadFromFile.libraryOptions = (hipLibraryOption**)libraryOptions; \ + cb_data.args.hipLibraryLoadFromFile.libraryOptionValues = (void**)libraryOptionValues; \ + cb_data.args.hipLibraryLoadFromFile.numLibraryOptions = (unsigned int)numLibraryOptions; \ +}; +// hipLibraryUnload[('hipLibrary_t', 'library')] +#define INIT_hipLibraryUnload_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLibraryUnload.library = (hipLibrary_t)library; \ +}; +// hipLinkAddData[('hipLinkState_t', 'state'), ('hipJitInputType', 'type'), ('void*', 'data'), ('size_t', 'size'), ('const char*', 'name'), ('unsigned int', 'numOptions'), ('hipJitOption*', 'options'), ('void**', 'optionValues')] +#define INIT_hipLinkAddData_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLinkAddData.state = (hipLinkState_t)hip_link_state; \ + cb_data.args.hipLinkAddData.type = (hipJitInputType)input_type; \ + cb_data.args.hipLinkAddData.data = (void*)image; \ + cb_data.args.hipLinkAddData.size = (size_t)image_size; \ + cb_data.args.hipLinkAddData.name = (name) ? strdup(name) : NULL; \ + cb_data.args.hipLinkAddData.numOptions = (unsigned int)num_options; \ + cb_data.args.hipLinkAddData.options = (hipJitOption*)options_ptr; \ + cb_data.args.hipLinkAddData.optionValues = (void**)option_values; \ +}; +// hipLinkAddFile[('hipLinkState_t', 'state'), ('hipJitInputType', 'type'), ('const char*', 'path'), ('unsigned int', 'numOptions'), ('hipJitOption*', 'options'), ('void**', 'optionValues')] +#define INIT_hipLinkAddFile_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLinkAddFile.state = (hipLinkState_t)hip_link_state; \ + cb_data.args.hipLinkAddFile.type = (hipJitInputType)input_type; \ + cb_data.args.hipLinkAddFile.path = (file_path) ? strdup(file_path) : NULL; \ + cb_data.args.hipLinkAddFile.numOptions = (unsigned int)num_options; \ + cb_data.args.hipLinkAddFile.options = (hipJitOption*)options_ptr; \ + cb_data.args.hipLinkAddFile.optionValues = (void**)option_values; \ +}; +// hipLinkComplete[('hipLinkState_t', 'state'), ('void**', 'hipBinOut'), ('size_t*', 'sizeOut')] +#define INIT_hipLinkComplete_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLinkComplete.state = (hipLinkState_t)hip_link_state; \ + cb_data.args.hipLinkComplete.hipBinOut = (void**)bin_out; \ + cb_data.args.hipLinkComplete.sizeOut = (size_t*)size_out; \ +}; +// hipLinkCreate[('unsigned int', 'numOptions'), ('hipJitOption*', 'options'), ('void**', 'optionValues'), ('hipLinkState_t*', 'stateOut')] +#define INIT_hipLinkCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLinkCreate.numOptions = (unsigned int)num_options; \ + cb_data.args.hipLinkCreate.options = (hipJitOption*)options_ptr; \ + cb_data.args.hipLinkCreate.optionValues = (void**)options_vals_pptr; \ + cb_data.args.hipLinkCreate.stateOut = (hipLinkState_t*)hip_link_state_ptr; \ +}; +// hipLinkDestroy[('hipLinkState_t', 'state')] +#define INIT_hipLinkDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLinkDestroy.state = (hipLinkState_t)hip_link_state; \ +}; +// hipMalloc[('void**', 'ptr'), ('size_t', 'size')] +#define INIT_hipMalloc_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMalloc.ptr = (void**)ptr; \ + cb_data.args.hipMalloc.size = (size_t)sizeBytes; \ +}; +// hipMalloc3D[('hipPitchedPtr*', 'pitchedDevPtr'), ('hipExtent', 'extent')] +#define INIT_hipMalloc3D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMalloc3D.pitchedDevPtr = (hipPitchedPtr*)pitchedDevPtr; \ + cb_data.args.hipMalloc3D.extent = (hipExtent)extent; \ +}; +// hipMalloc3DArray[('hipArray_t*', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('hipExtent', 'extent'), ('unsigned int', 'flags')] +#define INIT_hipMalloc3DArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMalloc3DArray.array = (hipArray_t*)array; \ + cb_data.args.hipMalloc3DArray.desc = (const hipChannelFormatDesc*)desc; \ + cb_data.args.hipMalloc3DArray.extent = (hipExtent)extent; \ + cb_data.args.hipMalloc3DArray.flags = (unsigned int)flags; \ +}; +// hipMallocArray[('hipArray_t*', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('size_t', 'width'), ('size_t', 'height'), ('unsigned int', 'flags')] +#define INIT_hipMallocArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocArray.array = (hipArray_t*)array; \ + cb_data.args.hipMallocArray.desc = (const hipChannelFormatDesc*)desc; \ + cb_data.args.hipMallocArray.width = (size_t)width; \ + cb_data.args.hipMallocArray.height = (size_t)height; \ + cb_data.args.hipMallocArray.flags = (unsigned int)flags; \ +}; +// hipMallocAsync[('void**', 'dev_ptr'), ('size_t', 'size'), ('hipStream_t', 'stream')] +#define INIT_hipMallocAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocAsync.dev_ptr = (void**)dev_ptr; \ + cb_data.args.hipMallocAsync.size = (size_t)size; \ + cb_data.args.hipMallocAsync.stream = (hipStream_t)stream; \ +}; +// hipMallocFromPoolAsync[('void**', 'dev_ptr'), ('size_t', 'size'), ('hipMemPool_t', 'mem_pool'), ('hipStream_t', 'stream')] +#define INIT_hipMallocFromPoolAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocFromPoolAsync.dev_ptr = (void**)dev_ptr; \ + cb_data.args.hipMallocFromPoolAsync.size = (size_t)size; \ + cb_data.args.hipMallocFromPoolAsync.mem_pool = (hipMemPool_t)mem_pool; \ + cb_data.args.hipMallocFromPoolAsync.stream = (hipStream_t)stream; \ +}; +// hipMallocHost[('void**', 'ptr'), ('size_t', 'size')] +#define INIT_hipMallocHost_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocHost.ptr = (void**)ptr; \ + cb_data.args.hipMallocHost.size = (size_t)size; \ +}; +// hipMallocManaged[('void**', 'dev_ptr'), ('size_t', 'size'), ('unsigned int', 'flags')] +#define INIT_hipMallocManaged_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocManaged.dev_ptr = (void**)dev_ptr; \ + cb_data.args.hipMallocManaged.size = (size_t)size; \ + cb_data.args.hipMallocManaged.flags = (unsigned int)flags; \ +}; +// hipMallocMipmappedArray[('hipMipmappedArray_t*', 'mipmappedArray'), ('const hipChannelFormatDesc*', 'desc'), ('hipExtent', 'extent'), ('unsigned int', 'numLevels'), ('unsigned int', 'flags')] +#define INIT_hipMallocMipmappedArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocMipmappedArray.mipmappedArray = (hipMipmappedArray_t*)mipmappedArray; \ + cb_data.args.hipMallocMipmappedArray.desc = (const hipChannelFormatDesc*)desc; \ + cb_data.args.hipMallocMipmappedArray.extent = (hipExtent)extent; \ + cb_data.args.hipMallocMipmappedArray.numLevels = (unsigned int)numLevels; \ + cb_data.args.hipMallocMipmappedArray.flags = (unsigned int)flags; \ +}; +// hipMallocPitch[('void**', 'ptr'), ('size_t*', 'pitch'), ('size_t', 'width'), ('size_t', 'height')] +#define INIT_hipMallocPitch_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocPitch.ptr = (void**)ptr; \ + cb_data.args.hipMallocPitch.pitch = (size_t*)pitch; \ + cb_data.args.hipMallocPitch.width = (size_t)width; \ + cb_data.args.hipMallocPitch.height = (size_t)height; \ +}; +// hipMemAddressFree[('void*', 'devPtr'), ('size_t', 'size')] +#define INIT_hipMemAddressFree_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemAddressFree.devPtr = (void*)devPtr; \ + cb_data.args.hipMemAddressFree.size = (size_t)size; \ +}; +// hipMemAddressReserve[('void**', 'ptr'), ('size_t', 'size'), ('size_t', 'alignment'), ('void*', 'addr'), ('unsigned long long', 'flags')] +#define INIT_hipMemAddressReserve_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemAddressReserve.ptr = (void**)ptr; \ + cb_data.args.hipMemAddressReserve.size = (size_t)size; \ + cb_data.args.hipMemAddressReserve.alignment = (size_t)alignment; \ + cb_data.args.hipMemAddressReserve.addr = (void*)addr; \ + cb_data.args.hipMemAddressReserve.flags = (unsigned long long)flags; \ +}; +// hipMemAdvise[('const void*', 'dev_ptr'), ('size_t', 'count'), ('hipMemoryAdvise', 'advice'), ('int', 'device')] +#define INIT_hipMemAdvise_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemAdvise.dev_ptr = (const void*)dev_ptr; \ + cb_data.args.hipMemAdvise.count = (size_t)count; \ + cb_data.args.hipMemAdvise.advice = (hipMemoryAdvise)advice; \ + cb_data.args.hipMemAdvise.device = (int)device; \ +}; +// hipMemAdvise_v2[('const void*', 'dev_ptr'), ('size_t', 'count'), ('hipMemoryAdvise', 'advice'), ('hipMemLocation', 'location')] +#define INIT_hipMemAdvise_v2_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemAdvise_v2.dev_ptr = (const void*)dev_ptr; \ + cb_data.args.hipMemAdvise_v2.count = (size_t)count; \ + cb_data.args.hipMemAdvise_v2.advice = (hipMemoryAdvise)advice; \ + cb_data.args.hipMemAdvise_v2.location = (hipMemLocation)location; \ +}; +// hipMemAllocHost[('void**', 'ptr'), ('size_t', 'size')] +#define INIT_hipMemAllocHost_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemAllocHost.ptr = (void**)ptr; \ + cb_data.args.hipMemAllocHost.size = (size_t)size; \ +}; +// hipMemAllocPitch[('hipDeviceptr_t*', 'dptr'), ('size_t*', 'pitch'), ('size_t', 'widthInBytes'), ('size_t', 'height'), ('unsigned int', 'elementSizeBytes')] +#define INIT_hipMemAllocPitch_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemAllocPitch.dptr = (hipDeviceptr_t*)dptr; \ + cb_data.args.hipMemAllocPitch.pitch = (size_t*)pitch; \ + cb_data.args.hipMemAllocPitch.widthInBytes = (size_t)widthInBytes; \ + cb_data.args.hipMemAllocPitch.height = (size_t)height; \ + cb_data.args.hipMemAllocPitch.elementSizeBytes = (unsigned int)elementSizeBytes; \ +}; +// hipMemCreate[('hipMemGenericAllocationHandle_t*', 'handle'), ('size_t', 'size'), ('const hipMemAllocationProp*', 'prop'), ('unsigned long long', 'flags')] +#define INIT_hipMemCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemCreate.handle = (hipMemGenericAllocationHandle_t*)handle; \ + cb_data.args.hipMemCreate.size = (size_t)size; \ + cb_data.args.hipMemCreate.prop = (const hipMemAllocationProp*)prop; \ + cb_data.args.hipMemCreate.flags = (unsigned long long)flags; \ +}; +// hipMemExportToShareableHandle[('void*', 'shareableHandle'), ('hipMemGenericAllocationHandle_t', 'handle'), ('hipMemAllocationHandleType', 'handleType'), ('unsigned long long', 'flags')] +#define INIT_hipMemExportToShareableHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemExportToShareableHandle.shareableHandle = (void*)shareableHandle; \ + cb_data.args.hipMemExportToShareableHandle.handle = (hipMemGenericAllocationHandle_t)handle; \ + cb_data.args.hipMemExportToShareableHandle.handleType = (hipMemAllocationHandleType)handleType; \ + cb_data.args.hipMemExportToShareableHandle.flags = (unsigned long long)flags; \ +}; +// hipMemGetAccess[('unsigned long long*', 'flags'), ('const hipMemLocation*', 'location'), ('void*', 'ptr')] +#define INIT_hipMemGetAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemGetAccess.flags = (unsigned long long*)flags; \ + cb_data.args.hipMemGetAccess.location = (const hipMemLocation*)location; \ + cb_data.args.hipMemGetAccess.ptr = (void*)ptr; \ +}; +// hipMemGetAddressRange[('hipDeviceptr_t*', 'pbase'), ('size_t*', 'psize'), ('hipDeviceptr_t', 'dptr')] +#define INIT_hipMemGetAddressRange_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemGetAddressRange.pbase = (hipDeviceptr_t*)pbase; \ + cb_data.args.hipMemGetAddressRange.psize = (size_t*)psize; \ + cb_data.args.hipMemGetAddressRange.dptr = (hipDeviceptr_t)dptr; \ +}; +// hipMemGetAllocationGranularity[('size_t*', 'granularity'), ('const hipMemAllocationProp*', 'prop'), ('hipMemAllocationGranularity_flags', 'option')] +#define INIT_hipMemGetAllocationGranularity_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemGetAllocationGranularity.granularity = (size_t*)granularity; \ + cb_data.args.hipMemGetAllocationGranularity.prop = (const hipMemAllocationProp*)prop; \ + cb_data.args.hipMemGetAllocationGranularity.option = (hipMemAllocationGranularity_flags)option; \ +}; +// hipMemGetAllocationPropertiesFromHandle[('hipMemAllocationProp*', 'prop'), ('hipMemGenericAllocationHandle_t', 'handle')] +#define INIT_hipMemGetAllocationPropertiesFromHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemGetAllocationPropertiesFromHandle.prop = (hipMemAllocationProp*)prop; \ + cb_data.args.hipMemGetAllocationPropertiesFromHandle.handle = (hipMemGenericAllocationHandle_t)handle; \ +}; +// hipMemGetHandleForAddressRange[('void*', 'handle'), ('hipDeviceptr_t', 'dptr'), ('size_t', 'size'), ('hipMemRangeHandleType', 'handleType'), ('unsigned long long', 'flags')] +#define INIT_hipMemGetHandleForAddressRange_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemGetHandleForAddressRange.handle = (void*)handle; \ + cb_data.args.hipMemGetHandleForAddressRange.dptr = (hipDeviceptr_t)dptr; \ + cb_data.args.hipMemGetHandleForAddressRange.size = (size_t)size; \ + cb_data.args.hipMemGetHandleForAddressRange.handleType = (hipMemRangeHandleType)handleType; \ + cb_data.args.hipMemGetHandleForAddressRange.flags = (unsigned long long)flags; \ +}; +// hipMemGetInfo[('size_t*', 'free'), ('size_t*', 'total')] +#define INIT_hipMemGetInfo_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemGetInfo.free = (size_t*)free; \ + cb_data.args.hipMemGetInfo.total = (size_t*)total; \ +}; +// hipMemImportFromShareableHandle[('hipMemGenericAllocationHandle_t*', 'handle'), ('void*', 'osHandle'), ('hipMemAllocationHandleType', 'shHandleType')] +#define INIT_hipMemImportFromShareableHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemImportFromShareableHandle.handle = (hipMemGenericAllocationHandle_t*)handle; \ + cb_data.args.hipMemImportFromShareableHandle.osHandle = (void*)osHandle; \ + cb_data.args.hipMemImportFromShareableHandle.shHandleType = (hipMemAllocationHandleType)shHandleType; \ +}; +// hipMemMap[('void*', 'ptr'), ('size_t', 'size'), ('size_t', 'offset'), ('hipMemGenericAllocationHandle_t', 'handle'), ('unsigned long long', 'flags')] +#define INIT_hipMemMap_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemMap.ptr = (void*)ptr; \ + cb_data.args.hipMemMap.size = (size_t)size; \ + cb_data.args.hipMemMap.offset = (size_t)offset; \ + cb_data.args.hipMemMap.handle = (hipMemGenericAllocationHandle_t)handle; \ + cb_data.args.hipMemMap.flags = (unsigned long long)flags; \ +}; +// hipMemMapArrayAsync[('hipArrayMapInfo*', 'mapInfoList'), ('unsigned int', 'count'), ('hipStream_t', 'stream')] +#define INIT_hipMemMapArrayAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemMapArrayAsync.mapInfoList = (hipArrayMapInfo*)mapInfoList; \ + cb_data.args.hipMemMapArrayAsync.count = (unsigned int)count; \ + cb_data.args.hipMemMapArrayAsync.stream = (hipStream_t)stream; \ +}; +// hipMemPoolCreate[('hipMemPool_t*', 'mem_pool'), ('const hipMemPoolProps*', 'pool_props')] +#define INIT_hipMemPoolCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolCreate.mem_pool = (hipMemPool_t*)mem_pool; \ + cb_data.args.hipMemPoolCreate.pool_props = (const hipMemPoolProps*)pool_props; \ +}; +// hipMemPoolDestroy[('hipMemPool_t', 'mem_pool')] +#define INIT_hipMemPoolDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolDestroy.mem_pool = (hipMemPool_t)mem_pool; \ +}; +// hipMemPoolExportPointer[('hipMemPoolPtrExportData*', 'export_data'), ('void*', 'dev_ptr')] +#define INIT_hipMemPoolExportPointer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolExportPointer.export_data = (hipMemPoolPtrExportData*)export_data; \ + cb_data.args.hipMemPoolExportPointer.dev_ptr = (void*)ptr; \ +}; +// hipMemPoolExportToShareableHandle[('void*', 'shared_handle'), ('hipMemPool_t', 'mem_pool'), ('hipMemAllocationHandleType', 'handle_type'), ('unsigned int', 'flags')] +#define INIT_hipMemPoolExportToShareableHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolExportToShareableHandle.shared_handle = (void*)shared_handle; \ + cb_data.args.hipMemPoolExportToShareableHandle.mem_pool = (hipMemPool_t)mem_pool; \ + cb_data.args.hipMemPoolExportToShareableHandle.handle_type = (hipMemAllocationHandleType)handle_type; \ + cb_data.args.hipMemPoolExportToShareableHandle.flags = (unsigned int)flags; \ +}; +// hipMemPoolGetAccess[('hipMemAccessFlags*', 'flags'), ('hipMemPool_t', 'mem_pool'), ('hipMemLocation*', 'location')] +#define INIT_hipMemPoolGetAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolGetAccess.flags = (hipMemAccessFlags*)flags; \ + cb_data.args.hipMemPoolGetAccess.mem_pool = (hipMemPool_t)mem_pool; \ + cb_data.args.hipMemPoolGetAccess.location = (hipMemLocation*)location; \ +}; +// hipMemPoolGetAttribute[('hipMemPool_t', 'mem_pool'), ('hipMemPoolAttr', 'attr'), ('void*', 'value')] +#define INIT_hipMemPoolGetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolGetAttribute.mem_pool = (hipMemPool_t)mem_pool; \ + cb_data.args.hipMemPoolGetAttribute.attr = (hipMemPoolAttr)attr; \ + cb_data.args.hipMemPoolGetAttribute.value = (void*)value; \ +}; +// hipMemPoolImportFromShareableHandle[('hipMemPool_t*', 'mem_pool'), ('void*', 'shared_handle'), ('hipMemAllocationHandleType', 'handle_type'), ('unsigned int', 'flags')] +#define INIT_hipMemPoolImportFromShareableHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolImportFromShareableHandle.mem_pool = (hipMemPool_t*)mem_pool; \ + cb_data.args.hipMemPoolImportFromShareableHandle.shared_handle = (void*)shared_handle; \ + cb_data.args.hipMemPoolImportFromShareableHandle.handle_type = (hipMemAllocationHandleType)handle_type; \ + cb_data.args.hipMemPoolImportFromShareableHandle.flags = (unsigned int)flags; \ +}; +// hipMemPoolImportPointer[('void**', 'dev_ptr'), ('hipMemPool_t', 'mem_pool'), ('hipMemPoolPtrExportData*', 'export_data')] +#define INIT_hipMemPoolImportPointer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolImportPointer.dev_ptr = (void**)ptr; \ + cb_data.args.hipMemPoolImportPointer.mem_pool = (hipMemPool_t)mem_pool; \ + cb_data.args.hipMemPoolImportPointer.export_data = (hipMemPoolPtrExportData*)export_data; \ +}; +// hipMemPoolSetAccess[('hipMemPool_t', 'mem_pool'), ('const hipMemAccessDesc*', 'desc_list'), ('size_t', 'count')] +#define INIT_hipMemPoolSetAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolSetAccess.mem_pool = (hipMemPool_t)mem_pool; \ + cb_data.args.hipMemPoolSetAccess.desc_list = (const hipMemAccessDesc*)desc_list; \ + cb_data.args.hipMemPoolSetAccess.count = (size_t)count; \ +}; +// hipMemPoolSetAttribute[('hipMemPool_t', 'mem_pool'), ('hipMemPoolAttr', 'attr'), ('void*', 'value')] +#define INIT_hipMemPoolSetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolSetAttribute.mem_pool = (hipMemPool_t)mem_pool; \ + cb_data.args.hipMemPoolSetAttribute.attr = (hipMemPoolAttr)attr; \ + cb_data.args.hipMemPoolSetAttribute.value = (void*)value; \ +}; +// hipMemPoolTrimTo[('hipMemPool_t', 'mem_pool'), ('size_t', 'min_bytes_to_hold')] +#define INIT_hipMemPoolTrimTo_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolTrimTo.mem_pool = (hipMemPool_t)mem_pool; \ + cb_data.args.hipMemPoolTrimTo.min_bytes_to_hold = (size_t)min_bytes_to_hold; \ +}; +// hipMemPrefetchAsync[('const void*', 'dev_ptr'), ('size_t', 'count'), ('int', 'device'), ('hipStream_t', 'stream')] +#define INIT_hipMemPrefetchAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPrefetchAsync.dev_ptr = (const void*)dev_ptr; \ + cb_data.args.hipMemPrefetchAsync.count = (size_t)count; \ + cb_data.args.hipMemPrefetchAsync.device = (int)device; \ + cb_data.args.hipMemPrefetchAsync.stream = (hipStream_t)stream; \ +}; +// hipMemPrefetchAsync_v2[('const void*', 'dev_ptr'), ('size_t', 'count'), ('hipMemLocation', 'location'), ('unsigned int', 'flags'), ('hipStream_t', 'stream')] +#define INIT_hipMemPrefetchAsync_v2_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPrefetchAsync_v2.dev_ptr = (const void*)dev_ptr; \ + cb_data.args.hipMemPrefetchAsync_v2.count = (size_t)count; \ + cb_data.args.hipMemPrefetchAsync_v2.location = (hipMemLocation)location; \ + cb_data.args.hipMemPrefetchAsync_v2.flags = (unsigned int)flags; \ + cb_data.args.hipMemPrefetchAsync_v2.stream = (hipStream_t)stream; \ +}; +// hipMemPtrGetInfo[('void*', 'ptr'), ('size_t*', 'size')] +#define INIT_hipMemPtrGetInfo_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPtrGetInfo.ptr = (void*)ptr; \ + cb_data.args.hipMemPtrGetInfo.size = (size_t*)size; \ +}; +// hipMemRangeGetAttribute[('void*', 'data'), ('size_t', 'data_size'), ('hipMemRangeAttribute', 'attribute'), ('const void*', 'dev_ptr'), ('size_t', 'count')] +#define INIT_hipMemRangeGetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemRangeGetAttribute.data = (void*)data; \ + cb_data.args.hipMemRangeGetAttribute.data_size = (size_t)data_size; \ + cb_data.args.hipMemRangeGetAttribute.attribute = (hipMemRangeAttribute)attribute; \ + cb_data.args.hipMemRangeGetAttribute.dev_ptr = (const void*)dev_ptr; \ + cb_data.args.hipMemRangeGetAttribute.count = (size_t)count; \ +}; +// hipMemRangeGetAttributes[('void**', 'data'), ('size_t*', 'data_sizes'), ('hipMemRangeAttribute*', 'attributes'), ('size_t', 'num_attributes'), ('const void*', 'dev_ptr'), ('size_t', 'count')] +#define INIT_hipMemRangeGetAttributes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemRangeGetAttributes.data = (void**)data; \ + cb_data.args.hipMemRangeGetAttributes.data_sizes = (size_t*)data_sizes; \ + cb_data.args.hipMemRangeGetAttributes.attributes = (hipMemRangeAttribute*)attributes; \ + cb_data.args.hipMemRangeGetAttributes.num_attributes = (size_t)num_attributes; \ + cb_data.args.hipMemRangeGetAttributes.dev_ptr = (const void*)dev_ptr; \ + cb_data.args.hipMemRangeGetAttributes.count = (size_t)count; \ +}; +// hipMemRelease[('hipMemGenericAllocationHandle_t', 'handle')] +#define INIT_hipMemRelease_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemRelease.handle = (hipMemGenericAllocationHandle_t)handle; \ +}; +// hipMemRetainAllocationHandle[('hipMemGenericAllocationHandle_t*', 'handle'), ('void*', 'addr')] +#define INIT_hipMemRetainAllocationHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemRetainAllocationHandle.handle = (hipMemGenericAllocationHandle_t*)handle; \ + cb_data.args.hipMemRetainAllocationHandle.addr = (void*)addr; \ +}; +// hipMemSetAccess[('void*', 'ptr'), ('size_t', 'size'), ('const hipMemAccessDesc*', 'desc'), ('size_t', 'count')] +#define INIT_hipMemSetAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemSetAccess.ptr = (void*)ptr; \ + cb_data.args.hipMemSetAccess.size = (size_t)size; \ + cb_data.args.hipMemSetAccess.desc = (const hipMemAccessDesc*)desc; \ + cb_data.args.hipMemSetAccess.count = (size_t)count; \ +}; +// hipMemUnmap[('void*', 'ptr'), ('size_t', 'size')] +#define INIT_hipMemUnmap_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemUnmap.ptr = (void*)ptr; \ + cb_data.args.hipMemUnmap.size = (size_t)size; \ +}; +// hipMemcpy[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy.dst = (void*)dst; \ + cb_data.args.hipMemcpy.src = (const void*)src; \ + cb_data.args.hipMemcpy.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpy.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpy2D[('void*', 'dst'), ('size_t', 'dpitch'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpy2D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy2D.dst = (void*)dst; \ + cb_data.args.hipMemcpy2D.dpitch = (size_t)dpitch; \ + cb_data.args.hipMemcpy2D.src = (const void*)src; \ + cb_data.args.hipMemcpy2D.spitch = (size_t)spitch; \ + cb_data.args.hipMemcpy2D.width = (size_t)width; \ + cb_data.args.hipMemcpy2D.height = (size_t)height; \ + cb_data.args.hipMemcpy2D.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpy2DArrayToArray[('hipArray_t', 'dst'), ('size_t', 'wOffsetDst'), ('size_t', 'hOffsetDst'), ('hipArray_const_t', 'src'), ('size_t', 'wOffsetSrc'), ('size_t', 'hOffsetSrc'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpy2DArrayToArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy2DArrayToArray.dst = (hipArray_t)dst; \ + cb_data.args.hipMemcpy2DArrayToArray.wOffsetDst = (size_t)wOffsetDst; \ + cb_data.args.hipMemcpy2DArrayToArray.hOffsetDst = (size_t)hOffsetDst; \ + cb_data.args.hipMemcpy2DArrayToArray.src = (hipArray_const_t)src; \ + cb_data.args.hipMemcpy2DArrayToArray.wOffsetSrc = (size_t)wOffsetSrc; \ + cb_data.args.hipMemcpy2DArrayToArray.hOffsetSrc = (size_t)hOffsetSrc; \ + cb_data.args.hipMemcpy2DArrayToArray.width = (size_t)width; \ + cb_data.args.hipMemcpy2DArrayToArray.height = (size_t)height; \ + cb_data.args.hipMemcpy2DArrayToArray.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpy2DAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpy2DAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy2DAsync.dst = (void*)dst; \ + cb_data.args.hipMemcpy2DAsync.dpitch = (size_t)dpitch; \ + cb_data.args.hipMemcpy2DAsync.src = (const void*)src; \ + cb_data.args.hipMemcpy2DAsync.spitch = (size_t)spitch; \ + cb_data.args.hipMemcpy2DAsync.width = (size_t)width; \ + cb_data.args.hipMemcpy2DAsync.height = (size_t)height; \ + cb_data.args.hipMemcpy2DAsync.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpy2DAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpy2DFromArray[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpy2DFromArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy2DFromArray.dst = (void*)dst; \ + cb_data.args.hipMemcpy2DFromArray.dpitch = (size_t)dpitch; \ + cb_data.args.hipMemcpy2DFromArray.src = (hipArray_const_t)src; \ + cb_data.args.hipMemcpy2DFromArray.wOffset = (size_t)wOffsetSrc; \ + cb_data.args.hipMemcpy2DFromArray.hOffset = (size_t)hOffset; \ + cb_data.args.hipMemcpy2DFromArray.width = (size_t)width; \ + cb_data.args.hipMemcpy2DFromArray.height = (size_t)height; \ + cb_data.args.hipMemcpy2DFromArray.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpy2DFromArrayAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpy2DFromArrayAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy2DFromArrayAsync.dst = (void*)dst; \ + cb_data.args.hipMemcpy2DFromArrayAsync.dpitch = (size_t)dpitch; \ + cb_data.args.hipMemcpy2DFromArrayAsync.src = (hipArray_const_t)src; \ + cb_data.args.hipMemcpy2DFromArrayAsync.wOffset = (size_t)wOffsetSrc; \ + cb_data.args.hipMemcpy2DFromArrayAsync.hOffset = (size_t)hOffsetSrc; \ + cb_data.args.hipMemcpy2DFromArrayAsync.width = (size_t)width; \ + cb_data.args.hipMemcpy2DFromArrayAsync.height = (size_t)height; \ + cb_data.args.hipMemcpy2DFromArrayAsync.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpy2DFromArrayAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpy2DToArray[('hipArray_t', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpy2DToArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy2DToArray.dst = (hipArray_t)dst; \ + cb_data.args.hipMemcpy2DToArray.wOffset = (size_t)wOffset; \ + cb_data.args.hipMemcpy2DToArray.hOffset = (size_t)hOffset; \ + cb_data.args.hipMemcpy2DToArray.src = (const void*)src; \ + cb_data.args.hipMemcpy2DToArray.spitch = (size_t)spitch; \ + cb_data.args.hipMemcpy2DToArray.width = (size_t)width; \ + cb_data.args.hipMemcpy2DToArray.height = (size_t)height; \ + cb_data.args.hipMemcpy2DToArray.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpy2DToArrayAsync[('hipArray_t', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpy2DToArrayAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy2DToArrayAsync.dst = (hipArray_t)dst; \ + cb_data.args.hipMemcpy2DToArrayAsync.wOffset = (size_t)wOffset; \ + cb_data.args.hipMemcpy2DToArrayAsync.hOffset = (size_t)hOffset; \ + cb_data.args.hipMemcpy2DToArrayAsync.src = (const void*)src; \ + cb_data.args.hipMemcpy2DToArrayAsync.spitch = (size_t)spitch; \ + cb_data.args.hipMemcpy2DToArrayAsync.width = (size_t)width; \ + cb_data.args.hipMemcpy2DToArrayAsync.height = (size_t)height; \ + cb_data.args.hipMemcpy2DToArrayAsync.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpy2DToArrayAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpy3D[('const hipMemcpy3DParms*', 'p')] +#define INIT_hipMemcpy3D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy3D.p = (const hipMemcpy3DParms*)p; \ +}; +// hipMemcpy3DAsync[('const hipMemcpy3DParms*', 'p'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpy3DAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy3DAsync.p = (const hipMemcpy3DParms*)p; \ + cb_data.args.hipMemcpy3DAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpy3DBatchAsync[('size_t', 'numOps'), ('hipMemcpy3DBatchOp*', 'opList'), ('size_t*', 'failIdx'), ('unsigned long long', 'flags'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpy3DBatchAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy3DBatchAsync.numOps = (size_t)numOps; \ + cb_data.args.hipMemcpy3DBatchAsync.opList = (hipMemcpy3DBatchOp*)opList; \ + cb_data.args.hipMemcpy3DBatchAsync.failIdx = (size_t*)failIdx; \ + cb_data.args.hipMemcpy3DBatchAsync.flags = (unsigned long long)flags; \ + cb_data.args.hipMemcpy3DBatchAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpy3DPeer[('hipMemcpy3DPeerParms*', 'p')] +#define INIT_hipMemcpy3DPeer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy3DPeer.p = (hipMemcpy3DPeerParms*)p; \ +}; +// hipMemcpy3DPeerAsync[('hipMemcpy3DPeerParms*', 'p'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpy3DPeerAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy3DPeerAsync.p = (hipMemcpy3DPeerParms*)p; \ + cb_data.args.hipMemcpy3DPeerAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyAsync[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyAsync.dst = (void*)dst; \ + cb_data.args.hipMemcpyAsync.src = (const void*)src; \ + cb_data.args.hipMemcpyAsync.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyAsync.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpyAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyAtoA[('hipArray_t', 'dstArray'), ('size_t', 'dstOffset'), ('hipArray_t', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'ByteCount')] +#define INIT_hipMemcpyAtoA_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyAtoA.dstArray = (hipArray_t)dstArray; \ + cb_data.args.hipMemcpyAtoA.dstOffset = (size_t)dstOffset; \ + cb_data.args.hipMemcpyAtoA.srcArray = (hipArray_t)srcArray; \ + cb_data.args.hipMemcpyAtoA.srcOffset = (size_t)srcOffset; \ + cb_data.args.hipMemcpyAtoA.ByteCount = (size_t)ByteCount; \ +}; +// hipMemcpyAtoD[('hipDeviceptr_t', 'dstDevice'), ('hipArray_t', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'ByteCount')] +#define INIT_hipMemcpyAtoD_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyAtoD.dstDevice = (hipDeviceptr_t)dstDevice; \ + cb_data.args.hipMemcpyAtoD.srcArray = (hipArray_t)srcArray; \ + cb_data.args.hipMemcpyAtoD.srcOffset = (size_t)srcOffset; \ + cb_data.args.hipMemcpyAtoD.ByteCount = (size_t)ByteCount; \ +}; +// hipMemcpyAtoH[('void*', 'dst'), ('hipArray_t', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'count')] +#define INIT_hipMemcpyAtoH_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyAtoH.dst = (void*)dstHost; \ + cb_data.args.hipMemcpyAtoH.srcArray = (hipArray_t)srcArray; \ + cb_data.args.hipMemcpyAtoH.srcOffset = (size_t)srcOffset; \ + cb_data.args.hipMemcpyAtoH.count = (size_t)ByteCount; \ +}; +// hipMemcpyAtoHAsync[('void*', 'dstHost'), ('hipArray_t', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'ByteCount'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyAtoHAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyAtoHAsync.dstHost = (void*)dstHost; \ + cb_data.args.hipMemcpyAtoHAsync.srcArray = (hipArray_t)srcArray; \ + cb_data.args.hipMemcpyAtoHAsync.srcOffset = (size_t)srcOffset; \ + cb_data.args.hipMemcpyAtoHAsync.ByteCount = (size_t)ByteCount; \ + cb_data.args.hipMemcpyAtoHAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyBatchAsync[('void**', 'dsts'), ('void**', 'srcs'), ('size_t*', 'sizes'), ('size_t', 'count'), ('hipMemcpyAttributes*', 'attrs'), ('size_t*', 'attrsIdxs'), ('size_t', 'numAttrs'), ('size_t*', 'failIdx'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyBatchAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyBatchAsync.dsts = (void**)dsts; \ + cb_data.args.hipMemcpyBatchAsync.srcs = (void**)srcs; \ + cb_data.args.hipMemcpyBatchAsync.sizes = (size_t*)sizes; \ + cb_data.args.hipMemcpyBatchAsync.count = (size_t)count; \ + cb_data.args.hipMemcpyBatchAsync.attrs = (hipMemcpyAttributes*)attrs; \ + cb_data.args.hipMemcpyBatchAsync.attrsIdxs = (size_t*)attrsIdxs; \ + cb_data.args.hipMemcpyBatchAsync.numAttrs = (size_t)numAttrs; \ + cb_data.args.hipMemcpyBatchAsync.failIdx = (size_t*)failIdx; \ + cb_data.args.hipMemcpyBatchAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyDtoA[('hipArray_t', 'dstArray'), ('size_t', 'dstOffset'), ('hipDeviceptr_t', 'srcDevice'), ('size_t', 'ByteCount')] +#define INIT_hipMemcpyDtoA_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyDtoA.dstArray = (hipArray_t)dstArray; \ + cb_data.args.hipMemcpyDtoA.dstOffset = (size_t)dstOffset; \ + cb_data.args.hipMemcpyDtoA.srcDevice = (hipDeviceptr_t)srcDevice; \ + cb_data.args.hipMemcpyDtoA.ByteCount = (size_t)ByteCount; \ +}; +// hipMemcpyDtoD[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')] +#define INIT_hipMemcpyDtoD_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyDtoD.dst = (hipDeviceptr_t)dstDevice; \ + cb_data.args.hipMemcpyDtoD.src = (hipDeviceptr_t)srcDevice; \ + cb_data.args.hipMemcpyDtoD.sizeBytes = (size_t)ByteCount; \ +}; +// hipMemcpyDtoDAsync[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyDtoDAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyDtoDAsync.dst = (hipDeviceptr_t)dstDevice; \ + cb_data.args.hipMemcpyDtoDAsync.src = (hipDeviceptr_t)srcDevice; \ + cb_data.args.hipMemcpyDtoDAsync.sizeBytes = (size_t)ByteCount; \ + cb_data.args.hipMemcpyDtoDAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyDtoH[('void*', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')] +#define INIT_hipMemcpyDtoH_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyDtoH.dst = (void*)dstHost; \ + cb_data.args.hipMemcpyDtoH.src = (hipDeviceptr_t)srcDevice; \ + cb_data.args.hipMemcpyDtoH.sizeBytes = (size_t)ByteCount; \ +}; +// hipMemcpyDtoHAsync[('void*', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyDtoHAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyDtoHAsync.dst = (void*)dstHost; \ + cb_data.args.hipMemcpyDtoHAsync.src = (hipDeviceptr_t)srcDevice; \ + cb_data.args.hipMemcpyDtoHAsync.sizeBytes = (size_t)ByteCount; \ + cb_data.args.hipMemcpyDtoHAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyFromArray[('void*', 'dst'), ('hipArray_const_t', 'srcArray'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpyFromArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyFromArray.dst = (void*)dst; \ + cb_data.args.hipMemcpyFromArray.srcArray = (hipArray_const_t)src; \ + cb_data.args.hipMemcpyFromArray.wOffset = (size_t)wOffsetSrc; \ + cb_data.args.hipMemcpyFromArray.hOffset = (size_t)hOffset; \ + cb_data.args.hipMemcpyFromArray.count = (size_t)count; \ + cb_data.args.hipMemcpyFromArray.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpyFromSymbol[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpyFromSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyFromSymbol.dst = (void*)dst; \ + cb_data.args.hipMemcpyFromSymbol.symbol = (const void*)symbol; \ + cb_data.args.hipMemcpyFromSymbol.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyFromSymbol.offset = (size_t)offset; \ + cb_data.args.hipMemcpyFromSymbol.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpyFromSymbolAsync[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyFromSymbolAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyFromSymbolAsync.dst = (void*)dst; \ + cb_data.args.hipMemcpyFromSymbolAsync.symbol = (const void*)symbol; \ + cb_data.args.hipMemcpyFromSymbolAsync.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyFromSymbolAsync.offset = (size_t)offset; \ + cb_data.args.hipMemcpyFromSymbolAsync.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpyFromSymbolAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyHtoA[('hipArray_t', 'dstArray'), ('size_t', 'dstOffset'), ('const void*', 'srcHost'), ('size_t', 'count')] +#define INIT_hipMemcpyHtoA_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyHtoA.dstArray = (hipArray_t)dstArray; \ + cb_data.args.hipMemcpyHtoA.dstOffset = (size_t)dstOffset; \ + cb_data.args.hipMemcpyHtoA.srcHost = (const void*)srcHost; \ + cb_data.args.hipMemcpyHtoA.count = (size_t)ByteCount; \ +}; +// hipMemcpyHtoAAsync[('hipArray_t', 'dstArray'), ('size_t', 'dstOffset'), ('const void*', 'srcHost'), ('size_t', 'ByteCount'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyHtoAAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyHtoAAsync.dstArray = (hipArray_t)dstArray; \ + cb_data.args.hipMemcpyHtoAAsync.dstOffset = (size_t)dstOffset; \ + cb_data.args.hipMemcpyHtoAAsync.srcHost = (const void*)srcHost; \ + cb_data.args.hipMemcpyHtoAAsync.ByteCount = (size_t)ByteCount; \ + cb_data.args.hipMemcpyHtoAAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyHtoD[('hipDeviceptr_t', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes')] +#define INIT_hipMemcpyHtoD_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyHtoD.dst = (hipDeviceptr_t)dstDevice; \ + cb_data.args.hipMemcpyHtoD.src = (const void*)srcHost; \ + cb_data.args.hipMemcpyHtoD.sizeBytes = (size_t)ByteCount; \ +}; +// hipMemcpyHtoDAsync[('hipDeviceptr_t', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyHtoDAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyHtoDAsync.dst = (hipDeviceptr_t)dstDevice; \ + cb_data.args.hipMemcpyHtoDAsync.src = (const void*)srcHost; \ + cb_data.args.hipMemcpyHtoDAsync.sizeBytes = (size_t)ByteCount; \ + cb_data.args.hipMemcpyHtoDAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyParam2D[('const hip_Memcpy2D*', 'pCopy')] +#define INIT_hipMemcpyParam2D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyParam2D.pCopy = (const hip_Memcpy2D*)pCopy; \ +}; +// hipMemcpyParam2DAsync[('const hip_Memcpy2D*', 'pCopy'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyParam2DAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyParam2DAsync.pCopy = (const hip_Memcpy2D*)pCopy; \ + cb_data.args.hipMemcpyParam2DAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyPeer[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDeviceId'), ('size_t', 'sizeBytes')] +#define INIT_hipMemcpyPeer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyPeer.dst = (void*)dst; \ + cb_data.args.hipMemcpyPeer.dstDeviceId = (int)dstDevice; \ + cb_data.args.hipMemcpyPeer.src = (const void*)src; \ + cb_data.args.hipMemcpyPeer.srcDeviceId = (int)srcDevice; \ + cb_data.args.hipMemcpyPeer.sizeBytes = (size_t)sizeBytes; \ +}; +// hipMemcpyPeerAsync[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDevice'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyPeerAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyPeerAsync.dst = (void*)dst; \ + cb_data.args.hipMemcpyPeerAsync.dstDeviceId = (int)dstDevice; \ + cb_data.args.hipMemcpyPeerAsync.src = (const void*)src; \ + cb_data.args.hipMemcpyPeerAsync.srcDevice = (int)srcDevice; \ + cb_data.args.hipMemcpyPeerAsync.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyPeerAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyToArray[('hipArray_t', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpyToArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyToArray.dst = (hipArray_t)dst; \ + cb_data.args.hipMemcpyToArray.wOffset = (size_t)wOffset; \ + cb_data.args.hipMemcpyToArray.hOffset = (size_t)hOffset; \ + cb_data.args.hipMemcpyToArray.src = (const void*)src; \ + cb_data.args.hipMemcpyToArray.count = (size_t)count; \ + cb_data.args.hipMemcpyToArray.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpyToSymbol[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpyToSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyToSymbol.symbol = (const void*)symbol; \ + cb_data.args.hipMemcpyToSymbol.src = (const void*)src; \ + cb_data.args.hipMemcpyToSymbol.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyToSymbol.offset = (size_t)offset; \ + cb_data.args.hipMemcpyToSymbol.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpyToSymbolAsync[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyToSymbolAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyToSymbolAsync.symbol = (const void*)symbol; \ + cb_data.args.hipMemcpyToSymbolAsync.src = (const void*)src; \ + cb_data.args.hipMemcpyToSymbolAsync.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyToSymbolAsync.offset = (size_t)offset; \ + cb_data.args.hipMemcpyToSymbolAsync.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpyToSymbolAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyWithStream[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyWithStream_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyWithStream.dst = (void*)dst; \ + cb_data.args.hipMemcpyWithStream.src = (const void*)src; \ + cb_data.args.hipMemcpyWithStream.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyWithStream.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpyWithStream.stream = (hipStream_t)stream; \ +}; +// hipMemset[('void*', 'dst'), ('int', 'value'), ('size_t', 'sizeBytes')] +#define INIT_hipMemset_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemset.dst = (void*)dst; \ + cb_data.args.hipMemset.value = (int)value; \ + cb_data.args.hipMemset.sizeBytes = (size_t)sizeBytes; \ +}; +// hipMemset2D[('void*', 'dst'), ('size_t', 'pitch'), ('int', 'value'), ('size_t', 'width'), ('size_t', 'height')] +#define INIT_hipMemset2D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemset2D.dst = (void*)dst; \ + cb_data.args.hipMemset2D.pitch = (size_t)pitch; \ + cb_data.args.hipMemset2D.value = (int)value; \ + cb_data.args.hipMemset2D.width = (size_t)width; \ + cb_data.args.hipMemset2D.height = (size_t)height; \ +}; +// hipMemset2DAsync[('void*', 'dst'), ('size_t', 'pitch'), ('int', 'value'), ('size_t', 'width'), ('size_t', 'height'), ('hipStream_t', 'stream')] +#define INIT_hipMemset2DAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemset2DAsync.dst = (void*)dst; \ + cb_data.args.hipMemset2DAsync.pitch = (size_t)pitch; \ + cb_data.args.hipMemset2DAsync.value = (int)value; \ + cb_data.args.hipMemset2DAsync.width = (size_t)width; \ + cb_data.args.hipMemset2DAsync.height = (size_t)height; \ + cb_data.args.hipMemset2DAsync.stream = (hipStream_t)stream; \ +}; +// hipMemset3D[('hipPitchedPtr', 'pitchedDevPtr'), ('int', 'value'), ('hipExtent', 'extent')] +#define INIT_hipMemset3D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemset3D.pitchedDevPtr = (hipPitchedPtr)pitchedDevPtr; \ + cb_data.args.hipMemset3D.value = (int)value; \ + cb_data.args.hipMemset3D.extent = (hipExtent)extent; \ +}; +// hipMemset3DAsync[('hipPitchedPtr', 'pitchedDevPtr'), ('int', 'value'), ('hipExtent', 'extent'), ('hipStream_t', 'stream')] +#define INIT_hipMemset3DAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemset3DAsync.pitchedDevPtr = (hipPitchedPtr)pitchedDevPtr; \ + cb_data.args.hipMemset3DAsync.value = (int)value; \ + cb_data.args.hipMemset3DAsync.extent = (hipExtent)extent; \ + cb_data.args.hipMemset3DAsync.stream = (hipStream_t)stream; \ +}; +// hipMemsetAsync[('void*', 'dst'), ('int', 'value'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] +#define INIT_hipMemsetAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetAsync.dst = (void*)dst; \ + cb_data.args.hipMemsetAsync.value = (int)value; \ + cb_data.args.hipMemsetAsync.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemsetAsync.stream = (hipStream_t)stream; \ +}; +// hipMemsetD16[('hipDeviceptr_t', 'dest'), ('unsigned short', 'value'), ('size_t', 'count')] +#define INIT_hipMemsetD16_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD16.dest = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD16.value = (unsigned short)value; \ + cb_data.args.hipMemsetD16.count = (size_t)count; \ +}; +// hipMemsetD16Async[('hipDeviceptr_t', 'dest'), ('unsigned short', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')] +#define INIT_hipMemsetD16Async_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD16Async.dest = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD16Async.value = (unsigned short)value; \ + cb_data.args.hipMemsetD16Async.count = (size_t)count; \ + cb_data.args.hipMemsetD16Async.stream = (hipStream_t)stream; \ +}; +// hipMemsetD2D16[('hipDeviceptr_t', 'dst'), ('size_t', 'dstPitch'), ('unsigned short', 'value'), ('size_t', 'width'), ('size_t', 'height')] +#define INIT_hipMemsetD2D16_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD2D16.dst = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD2D16.dstPitch = (size_t)dstPitch; \ + cb_data.args.hipMemsetD2D16.value = (unsigned short)value; \ + cb_data.args.hipMemsetD2D16.width = (size_t)width; \ + cb_data.args.hipMemsetD2D16.height = (size_t)height; \ +}; +// hipMemsetD2D16Async[('hipDeviceptr_t', 'dst'), ('size_t', 'dstPitch'), ('unsigned short', 'value'), ('size_t', 'width'), ('size_t', 'height'), ('hipStream_t', 'stream')] +#define INIT_hipMemsetD2D16Async_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD2D16Async.dst = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD2D16Async.dstPitch = (size_t)dstPitch; \ + cb_data.args.hipMemsetD2D16Async.value = (unsigned short)value; \ + cb_data.args.hipMemsetD2D16Async.width = (size_t)width; \ + cb_data.args.hipMemsetD2D16Async.height = (size_t)height; \ + cb_data.args.hipMemsetD2D16Async.stream = (hipStream_t)stream; \ +}; +// hipMemsetD2D32[('hipDeviceptr_t', 'dst'), ('size_t', 'dstPitch'), ('unsigned int', 'value'), ('size_t', 'width'), ('size_t', 'height')] +#define INIT_hipMemsetD2D32_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD2D32.dst = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD2D32.dstPitch = (size_t)dstPitch; \ + cb_data.args.hipMemsetD2D32.value = (unsigned int)value; \ + cb_data.args.hipMemsetD2D32.width = (size_t)width; \ + cb_data.args.hipMemsetD2D32.height = (size_t)height; \ +}; +// hipMemsetD2D32Async[('hipDeviceptr_t', 'dst'), ('size_t', 'dstPitch'), ('unsigned int', 'value'), ('size_t', 'width'), ('size_t', 'height'), ('hipStream_t', 'stream')] +#define INIT_hipMemsetD2D32Async_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD2D32Async.dst = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD2D32Async.dstPitch = (size_t)dstPitch; \ + cb_data.args.hipMemsetD2D32Async.value = (unsigned int)value; \ + cb_data.args.hipMemsetD2D32Async.width = (size_t)width; \ + cb_data.args.hipMemsetD2D32Async.height = (size_t)height; \ + cb_data.args.hipMemsetD2D32Async.stream = (hipStream_t)stream; \ +}; +// hipMemsetD2D8[('hipDeviceptr_t', 'dst'), ('size_t', 'dstPitch'), ('unsigned char', 'value'), ('size_t', 'width'), ('size_t', 'height')] +#define INIT_hipMemsetD2D8_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD2D8.dst = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD2D8.dstPitch = (size_t)dstPitch; \ + cb_data.args.hipMemsetD2D8.value = (unsigned char)value; \ + cb_data.args.hipMemsetD2D8.width = (size_t)width; \ + cb_data.args.hipMemsetD2D8.height = (size_t)height; \ +}; +// hipMemsetD2D8Async[('hipDeviceptr_t', 'dst'), ('size_t', 'dstPitch'), ('unsigned char', 'value'), ('size_t', 'width'), ('size_t', 'height'), ('hipStream_t', 'stream')] +#define INIT_hipMemsetD2D8Async_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD2D8Async.dst = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD2D8Async.dstPitch = (size_t)dstPitch; \ + cb_data.args.hipMemsetD2D8Async.value = (unsigned char)value; \ + cb_data.args.hipMemsetD2D8Async.width = (size_t)width; \ + cb_data.args.hipMemsetD2D8Async.height = (size_t)height; \ + cb_data.args.hipMemsetD2D8Async.stream = (hipStream_t)stream; \ +}; +// hipMemsetD32[('hipDeviceptr_t', 'dest'), ('int', 'value'), ('size_t', 'count')] +#define INIT_hipMemsetD32_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD32.dest = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD32.value = (int)value; \ + cb_data.args.hipMemsetD32.count = (size_t)count; \ +}; +// hipMemsetD32Async[('hipDeviceptr_t', 'dst'), ('int', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')] +#define INIT_hipMemsetD32Async_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD32Async.dst = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD32Async.value = (int)value; \ + cb_data.args.hipMemsetD32Async.count = (size_t)count; \ + cb_data.args.hipMemsetD32Async.stream = (hipStream_t)stream; \ +}; +// hipMemsetD8[('hipDeviceptr_t', 'dest'), ('unsigned char', 'value'), ('size_t', 'count')] +#define INIT_hipMemsetD8_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD8.dest = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD8.value = (unsigned char)value; \ + cb_data.args.hipMemsetD8.count = (size_t)count; \ +}; +// hipMemsetD8Async[('hipDeviceptr_t', 'dest'), ('unsigned char', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')] +#define INIT_hipMemsetD8Async_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD8Async.dest = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD8Async.value = (unsigned char)value; \ + cb_data.args.hipMemsetD8Async.count = (size_t)count; \ + cb_data.args.hipMemsetD8Async.stream = (hipStream_t)stream; \ +}; +// hipMipmappedArrayCreate[('hipMipmappedArray_t*', 'pHandle'), ('HIP_ARRAY3D_DESCRIPTOR*', 'pMipmappedArrayDesc'), ('unsigned int', 'numMipmapLevels')] +#define INIT_hipMipmappedArrayCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMipmappedArrayCreate.pHandle = (hipMipmappedArray_t*)mipmapped_array_pptr; \ + cb_data.args.hipMipmappedArrayCreate.pMipmappedArrayDesc = (HIP_ARRAY3D_DESCRIPTOR*)mipmapped_array_desc_ptr; \ + cb_data.args.hipMipmappedArrayCreate.numMipmapLevels = (unsigned int)num_mipmap_levels; \ +}; +// hipMipmappedArrayDestroy[('hipMipmappedArray_t', 'hMipmappedArray')] +#define INIT_hipMipmappedArrayDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMipmappedArrayDestroy.hMipmappedArray = (hipMipmappedArray_t)mipmapped_array_ptr; \ +}; +// hipMipmappedArrayGetLevel[('hipArray_t*', 'pLevelArray'), ('hipMipmappedArray_t', 'hMipMappedArray'), ('unsigned int', 'level')] +#define INIT_hipMipmappedArrayGetLevel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMipmappedArrayGetLevel.pLevelArray = (hipArray_t*)level_array_pptr; \ + cb_data.args.hipMipmappedArrayGetLevel.hMipMappedArray = (hipMipmappedArray_t)mipmapped_array_ptr; \ + cb_data.args.hipMipmappedArrayGetLevel.level = (unsigned int)mip_level; \ +}; +// hipModuleGetFunction[('hipFunction_t*', 'function'), ('hipModule_t', 'module'), ('const char*', 'kname')] +#define INIT_hipModuleGetFunction_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleGetFunction.function = (hipFunction_t*)hfunc; \ + cb_data.args.hipModuleGetFunction.module = (hipModule_t)hmod; \ + cb_data.args.hipModuleGetFunction.kname = (name) ? strdup(name) : NULL; \ +}; +// hipModuleGetFunctionCount[('unsigned int*', 'count'), ('hipModule_t', 'mod')] +#define INIT_hipModuleGetFunctionCount_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleGetFunctionCount.count = (unsigned int*)count; \ + cb_data.args.hipModuleGetFunctionCount.mod = (hipModule_t)mod; \ +}; +// hipModuleGetGlobal[('hipDeviceptr_t*', 'dptr'), ('size_t*', 'bytes'), ('hipModule_t', 'hmod'), ('const char*', 'name')] +#define INIT_hipModuleGetGlobal_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleGetGlobal.dptr = (hipDeviceptr_t*)dptr; \ + cb_data.args.hipModuleGetGlobal.bytes = (size_t*)bytes; \ + cb_data.args.hipModuleGetGlobal.hmod = (hipModule_t)hmod; \ + cb_data.args.hipModuleGetGlobal.name = (name) ? strdup(name) : NULL; \ +}; +// hipModuleGetTexRef[('textureReference**', 'texRef'), ('hipModule_t', 'hmod'), ('const char*', 'name')] +#define INIT_hipModuleGetTexRef_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleGetTexRef.texRef = (textureReference**)texRef; \ + cb_data.args.hipModuleGetTexRef.hmod = (hipModule_t)hmod; \ + cb_data.args.hipModuleGetTexRef.name = (name) ? strdup(name) : NULL; \ +}; +// hipModuleLaunchCooperativeKernel[('hipFunction_t', 'f'), ('unsigned int', 'gridDimX'), ('unsigned int', 'gridDimY'), ('unsigned int', 'gridDimZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('void**', 'kernelParams')] +#define INIT_hipModuleLaunchCooperativeKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLaunchCooperativeKernel.f = (hipFunction_t)f; \ + cb_data.args.hipModuleLaunchCooperativeKernel.gridDimX = (unsigned int)gridDimX; \ + cb_data.args.hipModuleLaunchCooperativeKernel.gridDimY = (unsigned int)gridDimY; \ + cb_data.args.hipModuleLaunchCooperativeKernel.gridDimZ = (unsigned int)gridDimZ; \ + cb_data.args.hipModuleLaunchCooperativeKernel.blockDimX = (unsigned int)blockDimX; \ + cb_data.args.hipModuleLaunchCooperativeKernel.blockDimY = (unsigned int)blockDimY; \ + cb_data.args.hipModuleLaunchCooperativeKernel.blockDimZ = (unsigned int)blockDimZ; \ + cb_data.args.hipModuleLaunchCooperativeKernel.sharedMemBytes = (unsigned int)sharedMemBytes; \ + cb_data.args.hipModuleLaunchCooperativeKernel.stream = (hipStream_t)stream; \ + cb_data.args.hipModuleLaunchCooperativeKernel.kernelParams = (void**)kernelParams; \ +}; +// hipModuleLaunchCooperativeKernelMultiDevice[('hipFunctionLaunchParams*', 'launchParamsList'), ('unsigned int', 'numDevices'), ('unsigned int', 'flags')] +#define INIT_hipModuleLaunchCooperativeKernelMultiDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLaunchCooperativeKernelMultiDevice.launchParamsList = (hipFunctionLaunchParams*)launchParamsList; \ + cb_data.args.hipModuleLaunchCooperativeKernelMultiDevice.numDevices = (unsigned int)numDevices; \ + cb_data.args.hipModuleLaunchCooperativeKernelMultiDevice.flags = (unsigned int)flags; \ +}; +// hipModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'gridDimX'), ('unsigned int', 'gridDimY'), ('unsigned int', 'gridDimZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('void**', 'kernelParams'), ('void**', 'extra')] +#define INIT_hipModuleLaunchKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLaunchKernel.f = (hipFunction_t)f; \ + cb_data.args.hipModuleLaunchKernel.gridDimX = (unsigned int)gridDimX; \ + cb_data.args.hipModuleLaunchKernel.gridDimY = (unsigned int)gridDimY; \ + cb_data.args.hipModuleLaunchKernel.gridDimZ = (unsigned int)gridDimZ; \ + cb_data.args.hipModuleLaunchKernel.blockDimX = (unsigned int)blockDimX; \ + cb_data.args.hipModuleLaunchKernel.blockDimY = (unsigned int)blockDimY; \ + cb_data.args.hipModuleLaunchKernel.blockDimZ = (unsigned int)blockDimZ; \ + cb_data.args.hipModuleLaunchKernel.sharedMemBytes = (unsigned int)sharedMemBytes; \ + cb_data.args.hipModuleLaunchKernel.stream = (hipStream_t)hStream; \ + cb_data.args.hipModuleLaunchKernel.kernelParams = (void**)kernelParams; \ + cb_data.args.hipModuleLaunchKernel.extra = (void**)extra; \ +}; +// hipModuleLoad[('hipModule_t*', 'module'), ('const char*', 'fname')] +#define INIT_hipModuleLoad_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLoad.module = (hipModule_t*)module; \ + cb_data.args.hipModuleLoad.fname = (fname) ? strdup(fname) : NULL; \ +}; +// hipModuleLoadData[('hipModule_t*', 'module'), ('const void*', 'image')] +#define INIT_hipModuleLoadData_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLoadData.module = (hipModule_t*)module; \ + cb_data.args.hipModuleLoadData.image = (const void*)image; \ +}; +// hipModuleLoadDataEx[('hipModule_t*', 'module'), ('const void*', 'image'), ('unsigned int', 'numOptions'), ('hipJitOption*', 'options'), ('void**', 'optionsValues')] +#define INIT_hipModuleLoadDataEx_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLoadDataEx.module = (hipModule_t*)module; \ + cb_data.args.hipModuleLoadDataEx.image = (const void*)image; \ + cb_data.args.hipModuleLoadDataEx.numOptions = (unsigned int)numOptions; \ + cb_data.args.hipModuleLoadDataEx.options = (hipJitOption*)options; \ + cb_data.args.hipModuleLoadDataEx.optionsValues = (void**)optionsValues; \ +}; +// hipModuleLoadFatBinary[('hipModule_t*', 'module'), ('const void*', 'fatbin')] +#define INIT_hipModuleLoadFatBinary_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLoadFatBinary.module = (hipModule_t*)module; \ + cb_data.args.hipModuleLoadFatBinary.fatbin = (const void*)fatbin; \ +}; +// hipModuleOccupancyMaxActiveBlocksPerMultiprocessor[('int*', 'numBlocks'), ('hipFunction_t', 'f'), ('int', 'blockSize'), ('size_t', 'dynSharedMemPerBlk')] +#define INIT_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks = (int*)numBlocks; \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.f = (hipFunction_t)f; \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.blockSize = (int)blockSize; \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \ +}; +// hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[('int*', 'numBlocks'), ('hipFunction_t', 'f'), ('int', 'blockSize'), ('size_t', 'dynSharedMemPerBlk'), ('unsigned int', 'flags')] +#define INIT_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks = (int*)numBlocks; \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.f = (hipFunction_t)f; \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.blockSize = (int)blockSize; \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.flags = (unsigned int)flags; \ +}; +// hipModuleOccupancyMaxPotentialBlockSize[('int*', 'gridSize'), ('int*', 'blockSize'), ('hipFunction_t', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit')] +#define INIT_hipModuleOccupancyMaxPotentialBlockSize_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.gridSize = (int*)gridSize; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.blockSize = (int*)blockSize; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.f = (hipFunction_t)f; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.blockSizeLimit = (int)blockSizeLimit; \ +}; +// hipModuleOccupancyMaxPotentialBlockSizeWithFlags[('int*', 'gridSize'), ('int*', 'blockSize'), ('hipFunction_t', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit'), ('unsigned int', 'flags')] +#define INIT_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize = (int*)gridSize; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize = (int*)blockSize; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.f = (hipFunction_t)f; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSizeLimit = (int)blockSizeLimit; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.flags = (unsigned int)flags; \ +}; +// hipModuleUnload[('hipModule_t', 'module')] +#define INIT_hipModuleUnload_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleUnload.module = (hipModule_t)hmod; \ +}; +// hipOccupancyMaxActiveBlocksPerMultiprocessor[('int*', 'numBlocks'), ('const void*', 'f'), ('int', 'blockSize'), ('size_t', 'dynamicSMemSize')] +#define INIT_hipOccupancyMaxActiveBlocksPerMultiprocessor_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks = (int*)numBlocks; \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessor.f = (const void*)f; \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessor.blockSize = (int)blockSize; \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessor.dynamicSMemSize = (size_t)dynamicSMemSize; \ +}; +// hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[('int*', 'numBlocks'), ('const void*', 'f'), ('int', 'blockSize'), ('size_t', 'dynamicSMemSize'), ('unsigned int', 'flags')] +#define INIT_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks = (int*)numBlocks; \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.f = (const void*)f; \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.blockSize = (int)blockSize; \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.dynamicSMemSize = (size_t)dynamicSMemSize; \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.flags = (unsigned int)flags; \ +}; +// hipOccupancyMaxPotentialBlockSize[('int*', 'gridSize'), ('int*', 'blockSize'), ('const void*', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit')] +#define INIT_hipOccupancyMaxPotentialBlockSize_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipOccupancyMaxPotentialBlockSize.gridSize = (int*)gridSize; \ + cb_data.args.hipOccupancyMaxPotentialBlockSize.blockSize = (int*)blockSize; \ + cb_data.args.hipOccupancyMaxPotentialBlockSize.f = (const void*)f; \ + cb_data.args.hipOccupancyMaxPotentialBlockSize.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \ + cb_data.args.hipOccupancyMaxPotentialBlockSize.blockSizeLimit = (int)blockSizeLimit; \ +}; +// hipPeekAtLastError[] +#define INIT_hipPeekAtLastError_CB_ARGS_DATA(cb_data) { \ +}; +// hipPointerGetAttribute[('void*', 'data'), ('hipPointer_attribute', 'attribute'), ('hipDeviceptr_t', 'ptr')] +#define INIT_hipPointerGetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipPointerGetAttribute.data = (void*)data; \ + cb_data.args.hipPointerGetAttribute.attribute = (hipPointer_attribute)attribute; \ + cb_data.args.hipPointerGetAttribute.ptr = (hipDeviceptr_t)ptr; \ +}; +// hipPointerGetAttributes[('hipPointerAttribute_t*', 'attributes'), ('const void*', 'ptr')] +#define INIT_hipPointerGetAttributes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipPointerGetAttributes.attributes = (hipPointerAttribute_t*)attributes; \ + cb_data.args.hipPointerGetAttributes.ptr = (const void*)ptr; \ +}; +// hipPointerSetAttribute[('const void*', 'value'), ('hipPointer_attribute', 'attribute'), ('hipDeviceptr_t', 'ptr')] +#define INIT_hipPointerSetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipPointerSetAttribute.value = (const void*)value; \ + cb_data.args.hipPointerSetAttribute.attribute = (hipPointer_attribute)attribute; \ + cb_data.args.hipPointerSetAttribute.ptr = (hipDeviceptr_t)ptr; \ +}; +// hipProfilerStart[] +#define INIT_hipProfilerStart_CB_ARGS_DATA(cb_data) { \ +}; +// hipProfilerStop[] +#define INIT_hipProfilerStop_CB_ARGS_DATA(cb_data) { \ +}; +// hipRuntimeGetVersion[('int*', 'runtimeVersion')] +#define INIT_hipRuntimeGetVersion_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipRuntimeGetVersion.runtimeVersion = (int*)runtimeVersion; \ +}; +// hipSetDevice[('int', 'deviceId')] +#define INIT_hipSetDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipSetDevice.deviceId = (int)device; \ +}; +// hipSetDeviceFlags[('unsigned int', 'flags')] +#define INIT_hipSetDeviceFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipSetDeviceFlags.flags = (unsigned int)flags; \ +}; +// hipSetValidDevices[('int*', 'device_arr'), ('int', 'len')] +#define INIT_hipSetValidDevices_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipSetValidDevices.device_arr = (int*)device_arr; \ + cb_data.args.hipSetValidDevices.len = (int)len; \ +}; +// hipSetupArgument[('const void*', 'arg'), ('size_t', 'size'), ('size_t', 'offset')] +#define INIT_hipSetupArgument_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipSetupArgument.arg = (const void*)arg; \ + cb_data.args.hipSetupArgument.size = (size_t)size; \ + cb_data.args.hipSetupArgument.offset = (size_t)offset; \ +}; +// hipSignalExternalSemaphoresAsync[('const hipExternalSemaphore_t*', 'extSemArray'), ('const hipExternalSemaphoreSignalParams*', 'paramsArray'), ('unsigned int', 'numExtSems'), ('hipStream_t', 'stream')] +#define INIT_hipSignalExternalSemaphoresAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipSignalExternalSemaphoresAsync.extSemArray = (const hipExternalSemaphore_t*)extSemArray; \ + cb_data.args.hipSignalExternalSemaphoresAsync.paramsArray = (const hipExternalSemaphoreSignalParams*)paramsArray; \ + cb_data.args.hipSignalExternalSemaphoresAsync.numExtSems = (unsigned int)numExtSems; \ + cb_data.args.hipSignalExternalSemaphoresAsync.stream = (hipStream_t)stream; \ +}; +// hipStreamAddCallback[('hipStream_t', 'stream'), ('hipStreamCallback_t', 'callback'), ('void*', 'userData'), ('unsigned int', 'flags')] +#define INIT_hipStreamAddCallback_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamAddCallback.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamAddCallback.callback = (hipStreamCallback_t)callback; \ + cb_data.args.hipStreamAddCallback.userData = (void*)userData; \ + cb_data.args.hipStreamAddCallback.flags = (unsigned int)flags; \ +}; +// hipStreamAttachMemAsync[('hipStream_t', 'stream'), ('void*', 'dev_ptr'), ('size_t', 'length'), ('unsigned int', 'flags')] +#define INIT_hipStreamAttachMemAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamAttachMemAsync.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamAttachMemAsync.dev_ptr = (void*)dev_ptr; \ + cb_data.args.hipStreamAttachMemAsync.length = (size_t)length; \ + cb_data.args.hipStreamAttachMemAsync.flags = (unsigned int)flags; \ +}; +// hipStreamBatchMemOp[('hipStream_t', 'stream'), ('unsigned int', 'count'), ('hipStreamBatchMemOpParams*', 'paramArray'), ('unsigned int', 'flags')] +#define INIT_hipStreamBatchMemOp_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamBatchMemOp.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamBatchMemOp.count = (unsigned int)count; \ + cb_data.args.hipStreamBatchMemOp.paramArray = (hipStreamBatchMemOpParams*)paramArray; \ + cb_data.args.hipStreamBatchMemOp.flags = (unsigned int)flags; \ +}; +// hipStreamBeginCapture[('hipStream_t', 'stream'), ('hipStreamCaptureMode', 'mode')] +#define INIT_hipStreamBeginCapture_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamBeginCapture.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamBeginCapture.mode = (hipStreamCaptureMode)mode; \ +}; +// hipStreamBeginCaptureToGraph[('hipStream_t', 'stream'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'dependencies'), ('const hipGraphEdgeData*', 'dependencyData'), ('size_t', 'numDependencies'), ('hipStreamCaptureMode', 'mode')] +#define INIT_hipStreamBeginCaptureToGraph_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamBeginCaptureToGraph.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamBeginCaptureToGraph.graph = (hipGraph_t)graph; \ + cb_data.args.hipStreamBeginCaptureToGraph.dependencies = (const hipGraphNode_t*)dependencies; \ + cb_data.args.hipStreamBeginCaptureToGraph.dependencyData = (const hipGraphEdgeData*)dependencyData; \ + cb_data.args.hipStreamBeginCaptureToGraph.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipStreamBeginCaptureToGraph.mode = (hipStreamCaptureMode)mode; \ +}; +// hipStreamCreate[('hipStream_t*', 'stream')] +#define INIT_hipStreamCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamCreate.stream = (hipStream_t*)stream; \ +}; +// hipStreamCreateWithFlags[('hipStream_t*', 'stream'), ('unsigned int', 'flags')] +#define INIT_hipStreamCreateWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamCreateWithFlags.stream = (hipStream_t*)stream; \ + cb_data.args.hipStreamCreateWithFlags.flags = (unsigned int)flags; \ +}; +// hipStreamCreateWithPriority[('hipStream_t*', 'stream'), ('unsigned int', 'flags'), ('int', 'priority')] +#define INIT_hipStreamCreateWithPriority_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamCreateWithPriority.stream = (hipStream_t*)stream; \ + cb_data.args.hipStreamCreateWithPriority.flags = (unsigned int)flags; \ + cb_data.args.hipStreamCreateWithPriority.priority = (int)priority; \ +}; +// hipStreamDestroy[('hipStream_t', 'stream')] +#define INIT_hipStreamDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamDestroy.stream = (hipStream_t)stream; \ +}; +// hipStreamEndCapture[('hipStream_t', 'stream'), ('hipGraph_t*', 'pGraph')] +#define INIT_hipStreamEndCapture_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamEndCapture.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamEndCapture.pGraph = (hipGraph_t*)pGraph; \ +}; +// hipStreamGetAttribute[('hipStream_t', 'stream'), ('hipLaunchAttributeID', 'attr'), ('hipLaunchAttributeValue*', 'value_out')] +#define INIT_hipStreamGetAttribute_CB_ARGS_DATA(cb_data) { \ +}; +// hipStreamGetCaptureInfo[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'pCaptureStatus'), ('unsigned long long*', 'pId')] +#define INIT_hipStreamGetCaptureInfo_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamGetCaptureInfo.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamGetCaptureInfo.pCaptureStatus = (hipStreamCaptureStatus*)pCaptureStatus; \ + cb_data.args.hipStreamGetCaptureInfo.pId = (unsigned long long*)pId; \ +}; +// hipStreamGetCaptureInfo_v2[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'captureStatus_out'), ('unsigned long long*', 'id_out'), ('hipGraph_t*', 'graph_out'), ('const hipGraphNode_t**', 'dependencies_out'), ('size_t*', 'numDependencies_out')] +#define INIT_hipStreamGetCaptureInfo_v2_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamGetCaptureInfo_v2.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamGetCaptureInfo_v2.captureStatus_out = (hipStreamCaptureStatus*)captureStatus_out; \ + cb_data.args.hipStreamGetCaptureInfo_v2.id_out = (unsigned long long*)id_out; \ + cb_data.args.hipStreamGetCaptureInfo_v2.graph_out = (hipGraph_t*)graph_out; \ + cb_data.args.hipStreamGetCaptureInfo_v2.dependencies_out = (const hipGraphNode_t**)dependencies_out; \ + cb_data.args.hipStreamGetCaptureInfo_v2.numDependencies_out = (size_t*)numDependencies_out; \ +}; +// hipStreamGetDevice[('hipStream_t', 'stream'), ('hipDevice_t*', 'device')] +#define INIT_hipStreamGetDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamGetDevice.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamGetDevice.device = (hipDevice_t*)device; \ +}; +// hipStreamGetFlags[('hipStream_t', 'stream'), ('unsigned int*', 'flags')] +#define INIT_hipStreamGetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamGetFlags.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamGetFlags.flags = (unsigned int*)flags; \ +}; +// hipStreamGetId[('hipStream_t', 'stream'), ('unsigned long long*', 'streamId')] +#define INIT_hipStreamGetId_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamGetId.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamGetId.streamId = (unsigned long long*)streamId; \ +}; +// hipStreamGetPriority[('hipStream_t', 'stream'), ('int*', 'priority')] +#define INIT_hipStreamGetPriority_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamGetPriority.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamGetPriority.priority = (int*)priority; \ +}; +// hipStreamIsCapturing[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'pCaptureStatus')] +#define INIT_hipStreamIsCapturing_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamIsCapturing.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamIsCapturing.pCaptureStatus = (hipStreamCaptureStatus*)pCaptureStatus; \ +}; +// hipStreamQuery[('hipStream_t', 'stream')] +#define INIT_hipStreamQuery_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamQuery.stream = (hipStream_t)stream; \ +}; +// hipStreamSetAttribute[('hipStream_t', 'stream'), ('hipLaunchAttributeID', 'attr'), ('const hipLaunchAttributeValue*', 'value')] +#define INIT_hipStreamSetAttribute_CB_ARGS_DATA(cb_data) { \ +}; +// hipStreamSynchronize[('hipStream_t', 'stream')] +#define INIT_hipStreamSynchronize_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamSynchronize.stream = (hipStream_t)stream; \ +}; +// hipStreamUpdateCaptureDependencies[('hipStream_t', 'stream'), ('hipGraphNode_t*', 'dependencies'), ('size_t', 'numDependencies'), ('unsigned int', 'flags')] +#define INIT_hipStreamUpdateCaptureDependencies_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamUpdateCaptureDependencies.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamUpdateCaptureDependencies.dependencies = (hipGraphNode_t*)dependencies; \ + cb_data.args.hipStreamUpdateCaptureDependencies.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipStreamUpdateCaptureDependencies.flags = (unsigned int)flags; \ +}; +// hipStreamWaitEvent[('hipStream_t', 'stream'), ('hipEvent_t', 'event'), ('unsigned int', 'flags')] +#define INIT_hipStreamWaitEvent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamWaitEvent.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamWaitEvent.event = (hipEvent_t)event; \ + cb_data.args.hipStreamWaitEvent.flags = (unsigned int)flags; \ +}; +// hipStreamWaitValue32[('hipStream_t', 'stream'), ('void*', 'ptr'), ('unsigned int', 'value'), ('unsigned int', 'flags'), ('unsigned int', 'mask')] +#define INIT_hipStreamWaitValue32_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamWaitValue32.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamWaitValue32.ptr = (void*)ptr; \ + cb_data.args.hipStreamWaitValue32.value = (unsigned int)value; \ + cb_data.args.hipStreamWaitValue32.flags = (unsigned int)flags; \ + cb_data.args.hipStreamWaitValue32.mask = (unsigned int)mask; \ +}; +// hipStreamWaitValue64[('hipStream_t', 'stream'), ('void*', 'ptr'), ('uint64_t', 'value'), ('unsigned int', 'flags'), ('uint64_t', 'mask')] +#define INIT_hipStreamWaitValue64_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamWaitValue64.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamWaitValue64.ptr = (void*)ptr; \ + cb_data.args.hipStreamWaitValue64.value = (uint64_t)value; \ + cb_data.args.hipStreamWaitValue64.flags = (unsigned int)flags; \ + cb_data.args.hipStreamWaitValue64.mask = (uint64_t)mask; \ +}; +// hipStreamWriteValue32[('hipStream_t', 'stream'), ('void*', 'ptr'), ('unsigned int', 'value'), ('unsigned int', 'flags')] +#define INIT_hipStreamWriteValue32_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamWriteValue32.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamWriteValue32.ptr = (void*)ptr; \ + cb_data.args.hipStreamWriteValue32.value = (unsigned int)value; \ + cb_data.args.hipStreamWriteValue32.flags = (unsigned int)flags; \ +}; +// hipStreamWriteValue64[('hipStream_t', 'stream'), ('void*', 'ptr'), ('uint64_t', 'value'), ('unsigned int', 'flags')] +#define INIT_hipStreamWriteValue64_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamWriteValue64.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamWriteValue64.ptr = (void*)ptr; \ + cb_data.args.hipStreamWriteValue64.value = (uint64_t)value; \ + cb_data.args.hipStreamWriteValue64.flags = (unsigned int)flags; \ +}; +// hipTexRefGetAddress[('hipDeviceptr_t*', 'dev_ptr'), ('const textureReference*', 'texRef')] +#define INIT_hipTexRefGetAddress_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefGetAddress.dev_ptr = (hipDeviceptr_t*)dptr; \ + cb_data.args.hipTexRefGetAddress.texRef = (const textureReference*)texRef; \ +}; +// hipTexRefGetArray[('hipArray_t*', 'pArray'), ('const textureReference*', 'texRef')] +#define INIT_hipTexRefGetArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefGetArray.pArray = (hipArray_t*)pArray; \ + cb_data.args.hipTexRefGetArray.texRef = (const textureReference*)texRef; \ +}; +// hipTexRefGetBorderColor[('float*', 'pBorderColor'), ('const textureReference*', 'texRef')] +#define INIT_hipTexRefGetBorderColor_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefGetBorderColor.pBorderColor = (float*)pBorderColor; \ + cb_data.args.hipTexRefGetBorderColor.texRef = (const textureReference*)texRef; \ +}; +// hipTexRefGetFlags[('unsigned int*', 'pFlags'), ('const textureReference*', 'texRef')] +#define INIT_hipTexRefGetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefGetFlags.pFlags = (unsigned int*)pFlags; \ + cb_data.args.hipTexRefGetFlags.texRef = (const textureReference*)texRef; \ +}; +// hipTexRefGetFormat[('hipArray_Format*', 'pFormat'), ('int*', 'pNumChannels'), ('const textureReference*', 'texRef')] +#define INIT_hipTexRefGetFormat_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefGetFormat.pFormat = (hipArray_Format*)pFormat; \ + cb_data.args.hipTexRefGetFormat.pNumChannels = (int*)pNumChannels; \ + cb_data.args.hipTexRefGetFormat.texRef = (const textureReference*)texRef; \ +}; +// hipTexRefGetMaxAnisotropy[('int*', 'pmaxAnsio'), ('const textureReference*', 'texRef')] +#define INIT_hipTexRefGetMaxAnisotropy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefGetMaxAnisotropy.pmaxAnsio = (int*)pmaxAnsio; \ + cb_data.args.hipTexRefGetMaxAnisotropy.texRef = (const textureReference*)texRef; \ +}; +// hipTexRefGetMipMappedArray[('hipMipmappedArray_t*', 'pArray'), ('const textureReference*', 'texRef')] +#define INIT_hipTexRefGetMipMappedArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefGetMipMappedArray.pArray = (hipMipmappedArray_t*)pArray; \ + cb_data.args.hipTexRefGetMipMappedArray.texRef = (const textureReference*)texRef; \ +}; +// hipTexRefGetMipmapLevelBias[('float*', 'pbias'), ('const textureReference*', 'texRef')] +#define INIT_hipTexRefGetMipmapLevelBias_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefGetMipmapLevelBias.pbias = (float*)pbias; \ + cb_data.args.hipTexRefGetMipmapLevelBias.texRef = (const textureReference*)texRef; \ +}; +// hipTexRefGetMipmapLevelClamp[('float*', 'pminMipmapLevelClamp'), ('float*', 'pmaxMipmapLevelClamp'), ('const textureReference*', 'texRef')] +#define INIT_hipTexRefGetMipmapLevelClamp_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefGetMipmapLevelClamp.pminMipmapLevelClamp = (float*)pminMipmapLevelClamp; \ + cb_data.args.hipTexRefGetMipmapLevelClamp.pmaxMipmapLevelClamp = (float*)pmaxMipmapLevelClamp; \ + cb_data.args.hipTexRefGetMipmapLevelClamp.texRef = (const textureReference*)texRef; \ +}; +// hipTexRefSetAddress[('size_t*', 'ByteOffset'), ('textureReference*', 'texRef'), ('hipDeviceptr_t', 'dptr'), ('size_t', 'bytes')] +#define INIT_hipTexRefSetAddress_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetAddress.ByteOffset = (size_t*)ByteOffset; \ + cb_data.args.hipTexRefSetAddress.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetAddress.dptr = (hipDeviceptr_t)dptr; \ + cb_data.args.hipTexRefSetAddress.bytes = (size_t)bytes; \ +}; +// hipTexRefSetAddress2D[('textureReference*', 'texRef'), ('const HIP_ARRAY_DESCRIPTOR*', 'desc'), ('hipDeviceptr_t', 'dptr'), ('size_t', 'Pitch')] +#define INIT_hipTexRefSetAddress2D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetAddress2D.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetAddress2D.desc = (const HIP_ARRAY_DESCRIPTOR*)desc; \ + cb_data.args.hipTexRefSetAddress2D.dptr = (hipDeviceptr_t)dptr; \ + cb_data.args.hipTexRefSetAddress2D.Pitch = (size_t)Pitch; \ +}; +// hipTexRefSetArray[('textureReference*', 'tex'), ('hipArray_const_t', 'array'), ('unsigned int', 'flags')] +#define INIT_hipTexRefSetArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetArray.tex = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetArray.array = (hipArray_const_t)array; \ + cb_data.args.hipTexRefSetArray.flags = (unsigned int)flags; \ +}; +// hipTexRefSetBorderColor[('textureReference*', 'texRef'), ('float*', 'pBorderColor')] +#define INIT_hipTexRefSetBorderColor_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetBorderColor.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetBorderColor.pBorderColor = (float*)pBorderColor; \ +}; +// hipTexRefSetFlags[('textureReference*', 'texRef'), ('unsigned int', 'Flags')] +#define INIT_hipTexRefSetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetFlags.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetFlags.Flags = (unsigned int)Flags; \ +}; +// hipTexRefSetFormat[('textureReference*', 'texRef'), ('hipArray_Format', 'fmt'), ('int', 'NumPackedComponents')] +#define INIT_hipTexRefSetFormat_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetFormat.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetFormat.fmt = (hipArray_Format)fmt; \ + cb_data.args.hipTexRefSetFormat.NumPackedComponents = (int)NumPackedComponents; \ +}; +// hipTexRefSetMaxAnisotropy[('textureReference*', 'texRef'), ('unsigned int', 'maxAniso')] +#define INIT_hipTexRefSetMaxAnisotropy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetMaxAnisotropy.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetMaxAnisotropy.maxAniso = (unsigned int)maxAniso; \ +}; +// hipTexRefSetMipmapLevelBias[('textureReference*', 'texRef'), ('float', 'bias')] +#define INIT_hipTexRefSetMipmapLevelBias_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetMipmapLevelBias.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetMipmapLevelBias.bias = (float)bias; \ +}; +// hipTexRefSetMipmapLevelClamp[('textureReference*', 'texRef'), ('float', 'minMipMapLevelClamp'), ('float', 'maxMipMapLevelClamp')] +#define INIT_hipTexRefSetMipmapLevelClamp_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetMipmapLevelClamp.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetMipmapLevelClamp.minMipMapLevelClamp = (float)minMipMapLevelClamp; \ + cb_data.args.hipTexRefSetMipmapLevelClamp.maxMipMapLevelClamp = (float)maxMipMapLevelClamp; \ +}; +// hipTexRefSetMipmappedArray[('textureReference*', 'texRef'), ('hipMipmappedArray*', 'mipmappedArray'), ('unsigned int', 'Flags')] +#define INIT_hipTexRefSetMipmappedArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetMipmappedArray.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetMipmappedArray.mipmappedArray = (hipMipmappedArray*)mipmappedArray; \ + cb_data.args.hipTexRefSetMipmappedArray.Flags = (unsigned int)Flags; \ +}; +// hipThreadExchangeStreamCaptureMode[('hipStreamCaptureMode*', 'mode')] +#define INIT_hipThreadExchangeStreamCaptureMode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipThreadExchangeStreamCaptureMode.mode = (hipStreamCaptureMode*)mode; \ +}; +// hipUserObjectCreate[('hipUserObject_t*', 'object_out'), ('void*', 'ptr'), ('hipHostFn_t', 'destroy'), ('unsigned int', 'initialRefcount'), ('unsigned int', 'flags')] +#define INIT_hipUserObjectCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipUserObjectCreate.object_out = (hipUserObject_t*)object_out; \ + cb_data.args.hipUserObjectCreate.ptr = (void*)ptr; \ + cb_data.args.hipUserObjectCreate.destroy = (hipHostFn_t)destroy; \ + cb_data.args.hipUserObjectCreate.initialRefcount = (unsigned int)initialRefcount; \ + cb_data.args.hipUserObjectCreate.flags = (unsigned int)flags; \ +}; +// hipUserObjectRelease[('hipUserObject_t', 'object'), ('unsigned int', 'count')] +#define INIT_hipUserObjectRelease_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipUserObjectRelease.object = (hipUserObject_t)object; \ + cb_data.args.hipUserObjectRelease.count = (unsigned int)count; \ +}; +// hipUserObjectRetain[('hipUserObject_t', 'object'), ('unsigned int', 'count')] +#define INIT_hipUserObjectRetain_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipUserObjectRetain.object = (hipUserObject_t)object; \ + cb_data.args.hipUserObjectRetain.count = (unsigned int)count; \ +}; +// hipWaitExternalSemaphoresAsync[('const hipExternalSemaphore_t*', 'extSemArray'), ('const hipExternalSemaphoreWaitParams*', 'paramsArray'), ('unsigned int', 'numExtSems'), ('hipStream_t', 'stream')] +#define INIT_hipWaitExternalSemaphoresAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipWaitExternalSemaphoresAsync.extSemArray = (const hipExternalSemaphore_t*)extSemArray; \ + cb_data.args.hipWaitExternalSemaphoresAsync.paramsArray = (const hipExternalSemaphoreWaitParams*)paramsArray; \ + cb_data.args.hipWaitExternalSemaphoresAsync.numExtSems = (unsigned int)numExtSems; \ + cb_data.args.hipWaitExternalSemaphoresAsync.stream = (hipStream_t)stream; \ +}; +#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data) + +// Macros for non-public API primitives +// hipBindTexture() +#define INIT_hipBindTexture_CB_ARGS_DATA(cb_data) {}; +// hipBindTexture2D() +#define INIT_hipBindTexture2D_CB_ARGS_DATA(cb_data) {}; +// hipBindTextureToArray() +#define INIT_hipBindTextureToArray_CB_ARGS_DATA(cb_data) {}; +// hipBindTextureToMipmappedArray() +#define INIT_hipBindTextureToMipmappedArray_CB_ARGS_DATA(cb_data) {}; +// hipCreateTextureObject() +#define INIT_hipCreateTextureObject_CB_ARGS_DATA(cb_data) {}; +// hipDestroyTextureObject() +#define INIT_hipDestroyTextureObject_CB_ARGS_DATA(cb_data) {}; +// hipDeviceGetCount() +#define INIT_hipDeviceGetCount_CB_ARGS_DATA(cb_data) {}; +// hipDeviceGetTexture1DLinearMaxWidth() +#define INIT_hipDeviceGetTexture1DLinearMaxWidth_CB_ARGS_DATA(cb_data) {}; +// hipGetTextureAlignmentOffset() +#define INIT_hipGetTextureAlignmentOffset_CB_ARGS_DATA(cb_data) {}; +// hipGetTextureObjectResourceDesc() +#define INIT_hipGetTextureObjectResourceDesc_CB_ARGS_DATA(cb_data) {}; +// hipGetTextureObjectResourceViewDesc() +#define INIT_hipGetTextureObjectResourceViewDesc_CB_ARGS_DATA(cb_data) {}; +// hipGetTextureObjectTextureDesc() +#define INIT_hipGetTextureObjectTextureDesc_CB_ARGS_DATA(cb_data) {}; +// hipGetTextureReference() +#define INIT_hipGetTextureReference_CB_ARGS_DATA(cb_data) {}; +// hipTexObjectCreate() +#define INIT_hipTexObjectCreate_CB_ARGS_DATA(cb_data) {}; +// hipTexObjectDestroy() +#define INIT_hipTexObjectDestroy_CB_ARGS_DATA(cb_data) {}; +// hipTexObjectGetResourceDesc() +#define INIT_hipTexObjectGetResourceDesc_CB_ARGS_DATA(cb_data) {}; +// hipTexObjectGetResourceViewDesc() +#define INIT_hipTexObjectGetResourceViewDesc_CB_ARGS_DATA(cb_data) {}; +// hipTexObjectGetTextureDesc() +#define INIT_hipTexObjectGetTextureDesc_CB_ARGS_DATA(cb_data) {}; +// hipTexRefGetAddressMode() +#define INIT_hipTexRefGetAddressMode_CB_ARGS_DATA(cb_data) {}; +// hipTexRefGetFilterMode() +#define INIT_hipTexRefGetFilterMode_CB_ARGS_DATA(cb_data) {}; +// hipTexRefGetMipmapFilterMode() +#define INIT_hipTexRefGetMipmapFilterMode_CB_ARGS_DATA(cb_data) {}; +// hipTexRefSetAddressMode() +#define INIT_hipTexRefSetAddressMode_CB_ARGS_DATA(cb_data) {}; +// hipTexRefSetFilterMode() +#define INIT_hipTexRefSetFilterMode_CB_ARGS_DATA(cb_data) {}; +// hipTexRefSetMipmapFilterMode() +#define INIT_hipTexRefSetMipmapFilterMode_CB_ARGS_DATA(cb_data) {}; +// hipUnbindTexture() +#define INIT_hipUnbindTexture_CB_ARGS_DATA(cb_data) {}; + +#define INIT_NONE_CB_ARGS_DATA(cb_data) {}; + +#if HIP_PROF_HIP_API_STRING +// HIP API args filling helper +static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { + switch (id) { +// __hipPopCallConfiguration[('dim3*', 'gridDim'), ('dim3*', 'blockDim'), ('size_t*', 'sharedMem'), ('hipStream_t*', 'stream')] + case HIP_API_ID___hipPopCallConfiguration: + if (data->args.__hipPopCallConfiguration.gridDim) data->args.__hipPopCallConfiguration.gridDim__val = *(data->args.__hipPopCallConfiguration.gridDim); + if (data->args.__hipPopCallConfiguration.blockDim) data->args.__hipPopCallConfiguration.blockDim__val = *(data->args.__hipPopCallConfiguration.blockDim); + if (data->args.__hipPopCallConfiguration.sharedMem) data->args.__hipPopCallConfiguration.sharedMem__val = *(data->args.__hipPopCallConfiguration.sharedMem); + if (data->args.__hipPopCallConfiguration.stream) data->args.__hipPopCallConfiguration.stream__val = *(data->args.__hipPopCallConfiguration.stream); + break; +// __hipPushCallConfiguration[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')] + case HIP_API_ID___hipPushCallConfiguration: + break; +// hipArray3DCreate[('hipArray_t*', 'array'), ('const HIP_ARRAY3D_DESCRIPTOR*', 'pAllocateArray')] + case HIP_API_ID_hipArray3DCreate: + if (data->args.hipArray3DCreate.array) data->args.hipArray3DCreate.array__val = *(data->args.hipArray3DCreate.array); + if (data->args.hipArray3DCreate.pAllocateArray) data->args.hipArray3DCreate.pAllocateArray__val = *(data->args.hipArray3DCreate.pAllocateArray); + break; +// hipArray3DGetDescriptor[('HIP_ARRAY3D_DESCRIPTOR*', 'pArrayDescriptor'), ('hipArray_t', 'array')] + case HIP_API_ID_hipArray3DGetDescriptor: + if (data->args.hipArray3DGetDescriptor.pArrayDescriptor) data->args.hipArray3DGetDescriptor.pArrayDescriptor__val = *(data->args.hipArray3DGetDescriptor.pArrayDescriptor); + break; +// hipArrayCreate[('hipArray_t*', 'pHandle'), ('const HIP_ARRAY_DESCRIPTOR*', 'pAllocateArray')] + case HIP_API_ID_hipArrayCreate: + if (data->args.hipArrayCreate.pHandle) data->args.hipArrayCreate.pHandle__val = *(data->args.hipArrayCreate.pHandle); + if (data->args.hipArrayCreate.pAllocateArray) data->args.hipArrayCreate.pAllocateArray__val = *(data->args.hipArrayCreate.pAllocateArray); + break; +// hipArrayDestroy[('hipArray_t', 'array')] + case HIP_API_ID_hipArrayDestroy: + break; +// hipArrayGetDescriptor[('HIP_ARRAY_DESCRIPTOR*', 'pArrayDescriptor'), ('hipArray_t', 'array')] + case HIP_API_ID_hipArrayGetDescriptor: + if (data->args.hipArrayGetDescriptor.pArrayDescriptor) data->args.hipArrayGetDescriptor.pArrayDescriptor__val = *(data->args.hipArrayGetDescriptor.pArrayDescriptor); + break; +// hipArrayGetInfo[('hipChannelFormatDesc*', 'desc'), ('hipExtent*', 'extent'), ('unsigned int*', 'flags'), ('hipArray_t', 'array')] + case HIP_API_ID_hipArrayGetInfo: + if (data->args.hipArrayGetInfo.desc) data->args.hipArrayGetInfo.desc__val = *(data->args.hipArrayGetInfo.desc); + if (data->args.hipArrayGetInfo.extent) data->args.hipArrayGetInfo.extent__val = *(data->args.hipArrayGetInfo.extent); + if (data->args.hipArrayGetInfo.flags) data->args.hipArrayGetInfo.flags__val = *(data->args.hipArrayGetInfo.flags); + break; +// hipChooseDeviceR0000[('int*', 'device'), ('const hipDeviceProp_tR0000*', 'prop')] + case HIP_API_ID_hipChooseDeviceR0000: + if (data->args.hipChooseDeviceR0000.device) data->args.hipChooseDeviceR0000.device__val = *(data->args.hipChooseDeviceR0000.device); + if (data->args.hipChooseDeviceR0000.prop) data->args.hipChooseDeviceR0000.prop__val = *(data->args.hipChooseDeviceR0000.prop); + break; +// hipChooseDeviceR0600[('int*', 'device'), ('const hipDeviceProp_tR0600*', 'prop')] + case HIP_API_ID_hipChooseDeviceR0600: + if (data->args.hipChooseDeviceR0600.device) data->args.hipChooseDeviceR0600.device__val = *(data->args.hipChooseDeviceR0600.device); + if (data->args.hipChooseDeviceR0600.prop) data->args.hipChooseDeviceR0600.prop__val = *(data->args.hipChooseDeviceR0600.prop); + break; +// hipConfigureCall[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipConfigureCall: + break; +// hipCreateSurfaceObject[('hipSurfaceObject_t*', 'pSurfObject'), ('const hipResourceDesc*', 'pResDesc')] + case HIP_API_ID_hipCreateSurfaceObject: + if (data->args.hipCreateSurfaceObject.pSurfObject) data->args.hipCreateSurfaceObject.pSurfObject__val = *(data->args.hipCreateSurfaceObject.pSurfObject); + if (data->args.hipCreateSurfaceObject.pResDesc) data->args.hipCreateSurfaceObject.pResDesc__val = *(data->args.hipCreateSurfaceObject.pResDesc); + break; +// hipCtxCreate[('hipCtx_t*', 'ctx'), ('unsigned int', 'flags'), ('hipDevice_t', 'device')] + case HIP_API_ID_hipCtxCreate: + if (data->args.hipCtxCreate.ctx) data->args.hipCtxCreate.ctx__val = *(data->args.hipCtxCreate.ctx); + break; +// hipCtxDestroy[('hipCtx_t', 'ctx')] + case HIP_API_ID_hipCtxDestroy: + break; +// hipCtxDisablePeerAccess[('hipCtx_t', 'peerCtx')] + case HIP_API_ID_hipCtxDisablePeerAccess: + break; +// hipCtxEnablePeerAccess[('hipCtx_t', 'peerCtx'), ('unsigned int', 'flags')] + case HIP_API_ID_hipCtxEnablePeerAccess: + break; +// hipCtxGetApiVersion[('hipCtx_t', 'ctx'), ('unsigned int*', 'apiVersion')] + case HIP_API_ID_hipCtxGetApiVersion: + if (data->args.hipCtxGetApiVersion.apiVersion) data->args.hipCtxGetApiVersion.apiVersion__val = *(data->args.hipCtxGetApiVersion.apiVersion); + break; +// hipCtxGetCacheConfig[('hipFuncCache_t*', 'cacheConfig')] + case HIP_API_ID_hipCtxGetCacheConfig: + if (data->args.hipCtxGetCacheConfig.cacheConfig) data->args.hipCtxGetCacheConfig.cacheConfig__val = *(data->args.hipCtxGetCacheConfig.cacheConfig); + break; +// hipCtxGetCurrent[('hipCtx_t*', 'ctx')] + case HIP_API_ID_hipCtxGetCurrent: + if (data->args.hipCtxGetCurrent.ctx) data->args.hipCtxGetCurrent.ctx__val = *(data->args.hipCtxGetCurrent.ctx); + break; +// hipCtxGetDevice[('hipDevice_t*', 'device')] + case HIP_API_ID_hipCtxGetDevice: + if (data->args.hipCtxGetDevice.device) data->args.hipCtxGetDevice.device__val = *(data->args.hipCtxGetDevice.device); + break; +// hipCtxGetFlags[('unsigned int*', 'flags')] + case HIP_API_ID_hipCtxGetFlags: + if (data->args.hipCtxGetFlags.flags) data->args.hipCtxGetFlags.flags__val = *(data->args.hipCtxGetFlags.flags); + break; +// hipCtxGetSharedMemConfig[('hipSharedMemConfig*', 'pConfig')] + case HIP_API_ID_hipCtxGetSharedMemConfig: + if (data->args.hipCtxGetSharedMemConfig.pConfig) data->args.hipCtxGetSharedMemConfig.pConfig__val = *(data->args.hipCtxGetSharedMemConfig.pConfig); + break; +// hipCtxPopCurrent[('hipCtx_t*', 'ctx')] + case HIP_API_ID_hipCtxPopCurrent: + if (data->args.hipCtxPopCurrent.ctx) data->args.hipCtxPopCurrent.ctx__val = *(data->args.hipCtxPopCurrent.ctx); + break; +// hipCtxPushCurrent[('hipCtx_t', 'ctx')] + case HIP_API_ID_hipCtxPushCurrent: + break; +// hipCtxSetCacheConfig[('hipFuncCache_t', 'cacheConfig')] + case HIP_API_ID_hipCtxSetCacheConfig: + break; +// hipCtxSetCurrent[('hipCtx_t', 'ctx')] + case HIP_API_ID_hipCtxSetCurrent: + break; +// hipCtxSetSharedMemConfig[('hipSharedMemConfig', 'config')] + case HIP_API_ID_hipCtxSetSharedMemConfig: + break; +// hipCtxSynchronize[] + case HIP_API_ID_hipCtxSynchronize: + break; +// hipDestroyExternalMemory[('hipExternalMemory_t', 'extMem')] + case HIP_API_ID_hipDestroyExternalMemory: + break; +// hipDestroyExternalSemaphore[('hipExternalSemaphore_t', 'extSem')] + case HIP_API_ID_hipDestroyExternalSemaphore: + break; +// hipDestroySurfaceObject[('hipSurfaceObject_t', 'surfaceObject')] + case HIP_API_ID_hipDestroySurfaceObject: + break; +// hipDeviceCanAccessPeer[('int*', 'canAccessPeer'), ('int', 'deviceId'), ('int', 'peerDeviceId')] + case HIP_API_ID_hipDeviceCanAccessPeer: + if (data->args.hipDeviceCanAccessPeer.canAccessPeer) data->args.hipDeviceCanAccessPeer.canAccessPeer__val = *(data->args.hipDeviceCanAccessPeer.canAccessPeer); + break; +// hipDeviceComputeCapability[('int*', 'major'), ('int*', 'minor'), ('hipDevice_t', 'device')] + case HIP_API_ID_hipDeviceComputeCapability: + if (data->args.hipDeviceComputeCapability.major) data->args.hipDeviceComputeCapability.major__val = *(data->args.hipDeviceComputeCapability.major); + if (data->args.hipDeviceComputeCapability.minor) data->args.hipDeviceComputeCapability.minor__val = *(data->args.hipDeviceComputeCapability.minor); + break; +// hipDeviceDisablePeerAccess[('int', 'peerDeviceId')] + case HIP_API_ID_hipDeviceDisablePeerAccess: + break; +// hipDeviceEnablePeerAccess[('int', 'peerDeviceId'), ('unsigned int', 'flags')] + case HIP_API_ID_hipDeviceEnablePeerAccess: + break; +// hipDeviceGet[('hipDevice_t*', 'device'), ('int', 'ordinal')] + case HIP_API_ID_hipDeviceGet: + if (data->args.hipDeviceGet.device) data->args.hipDeviceGet.device__val = *(data->args.hipDeviceGet.device); + break; +// hipDeviceGetAttribute[('int*', 'pi'), ('hipDeviceAttribute_t', 'attr'), ('int', 'deviceId')] + case HIP_API_ID_hipDeviceGetAttribute: + if (data->args.hipDeviceGetAttribute.pi) data->args.hipDeviceGetAttribute.pi__val = *(data->args.hipDeviceGetAttribute.pi); + break; +// hipDeviceGetByPCIBusId[('int*', 'device'), ('const char*', 'pciBusId')] + case HIP_API_ID_hipDeviceGetByPCIBusId: + if (data->args.hipDeviceGetByPCIBusId.device) data->args.hipDeviceGetByPCIBusId.device__val = *(data->args.hipDeviceGetByPCIBusId.device); + if (data->args.hipDeviceGetByPCIBusId.pciBusId) data->args.hipDeviceGetByPCIBusId.pciBusId__val = *(data->args.hipDeviceGetByPCIBusId.pciBusId); + break; +// hipDeviceGetCacheConfig[('hipFuncCache_t*', 'cacheConfig')] + case HIP_API_ID_hipDeviceGetCacheConfig: + if (data->args.hipDeviceGetCacheConfig.cacheConfig) data->args.hipDeviceGetCacheConfig.cacheConfig__val = *(data->args.hipDeviceGetCacheConfig.cacheConfig); + break; +// hipDeviceGetDefaultMemPool[('hipMemPool_t*', 'mem_pool'), ('int', 'device')] + case HIP_API_ID_hipDeviceGetDefaultMemPool: + if (data->args.hipDeviceGetDefaultMemPool.mem_pool) data->args.hipDeviceGetDefaultMemPool.mem_pool__val = *(data->args.hipDeviceGetDefaultMemPool.mem_pool); + break; +// hipDeviceGetGraphMemAttribute[('int', 'device'), ('hipGraphMemAttributeType', 'attr'), ('void*', 'value')] + case HIP_API_ID_hipDeviceGetGraphMemAttribute: + break; +// hipDeviceGetLimit[('size_t*', 'pValue'), ('hipLimit_t', 'limit')] + case HIP_API_ID_hipDeviceGetLimit: + if (data->args.hipDeviceGetLimit.pValue) data->args.hipDeviceGetLimit.pValue__val = *(data->args.hipDeviceGetLimit.pValue); + break; +// hipDeviceGetMemPool[('hipMemPool_t*', 'mem_pool'), ('int', 'device')] + case HIP_API_ID_hipDeviceGetMemPool: + if (data->args.hipDeviceGetMemPool.mem_pool) data->args.hipDeviceGetMemPool.mem_pool__val = *(data->args.hipDeviceGetMemPool.mem_pool); + break; +// hipDeviceGetName[('char*', 'name'), ('int', 'len'), ('hipDevice_t', 'device')] + case HIP_API_ID_hipDeviceGetName: + data->args.hipDeviceGetName.name = (data->args.hipDeviceGetName.name) ? strdup(data->args.hipDeviceGetName.name) : NULL; + break; +// hipDeviceGetP2PAttribute[('int*', 'value'), ('hipDeviceP2PAttr', 'attr'), ('int', 'srcDevice'), ('int', 'dstDevice')] + case HIP_API_ID_hipDeviceGetP2PAttribute: + if (data->args.hipDeviceGetP2PAttribute.value) data->args.hipDeviceGetP2PAttribute.value__val = *(data->args.hipDeviceGetP2PAttribute.value); + break; +// hipDeviceGetPCIBusId[('char*', 'pciBusId'), ('int', 'len'), ('int', 'device')] + case HIP_API_ID_hipDeviceGetPCIBusId: + data->args.hipDeviceGetPCIBusId.pciBusId = (data->args.hipDeviceGetPCIBusId.pciBusId) ? strdup(data->args.hipDeviceGetPCIBusId.pciBusId) : NULL; + break; +// hipDeviceGetSharedMemConfig[('hipSharedMemConfig*', 'pConfig')] + case HIP_API_ID_hipDeviceGetSharedMemConfig: + if (data->args.hipDeviceGetSharedMemConfig.pConfig) data->args.hipDeviceGetSharedMemConfig.pConfig__val = *(data->args.hipDeviceGetSharedMemConfig.pConfig); + break; +// hipDeviceGetStreamPriorityRange[('int*', 'leastPriority'), ('int*', 'greatestPriority')] + case HIP_API_ID_hipDeviceGetStreamPriorityRange: + if (data->args.hipDeviceGetStreamPriorityRange.leastPriority) data->args.hipDeviceGetStreamPriorityRange.leastPriority__val = *(data->args.hipDeviceGetStreamPriorityRange.leastPriority); + if (data->args.hipDeviceGetStreamPriorityRange.greatestPriority) data->args.hipDeviceGetStreamPriorityRange.greatestPriority__val = *(data->args.hipDeviceGetStreamPriorityRange.greatestPriority); + break; +// hipDeviceGetUuid[('hipUUID*', 'uuid'), ('hipDevice_t', 'device')] + case HIP_API_ID_hipDeviceGetUuid: + if (data->args.hipDeviceGetUuid.uuid) data->args.hipDeviceGetUuid.uuid__val = *(data->args.hipDeviceGetUuid.uuid); + break; +// hipDeviceGraphMemTrim[('int', 'device')] + case HIP_API_ID_hipDeviceGraphMemTrim: + break; +// hipDevicePrimaryCtxGetState[('hipDevice_t', 'dev'), ('unsigned int*', 'flags'), ('int*', 'active')] + case HIP_API_ID_hipDevicePrimaryCtxGetState: + if (data->args.hipDevicePrimaryCtxGetState.flags) data->args.hipDevicePrimaryCtxGetState.flags__val = *(data->args.hipDevicePrimaryCtxGetState.flags); + if (data->args.hipDevicePrimaryCtxGetState.active) data->args.hipDevicePrimaryCtxGetState.active__val = *(data->args.hipDevicePrimaryCtxGetState.active); + break; +// hipDevicePrimaryCtxRelease[('hipDevice_t', 'dev')] + case HIP_API_ID_hipDevicePrimaryCtxRelease: + break; +// hipDevicePrimaryCtxReset[('hipDevice_t', 'dev')] + case HIP_API_ID_hipDevicePrimaryCtxReset: + break; +// hipDevicePrimaryCtxRetain[('hipCtx_t*', 'pctx'), ('hipDevice_t', 'dev')] + case HIP_API_ID_hipDevicePrimaryCtxRetain: + if (data->args.hipDevicePrimaryCtxRetain.pctx) data->args.hipDevicePrimaryCtxRetain.pctx__val = *(data->args.hipDevicePrimaryCtxRetain.pctx); + break; +// hipDevicePrimaryCtxSetFlags[('hipDevice_t', 'dev'), ('unsigned int', 'flags')] + case HIP_API_ID_hipDevicePrimaryCtxSetFlags: + break; +// hipDeviceReset[] + case HIP_API_ID_hipDeviceReset: + break; +// hipDeviceSetCacheConfig[('hipFuncCache_t', 'cacheConfig')] + case HIP_API_ID_hipDeviceSetCacheConfig: + break; +// hipDeviceSetGraphMemAttribute[('int', 'device'), ('hipGraphMemAttributeType', 'attr'), ('void*', 'value')] + case HIP_API_ID_hipDeviceSetGraphMemAttribute: + break; +// hipDeviceSetLimit[('hipLimit_t', 'limit'), ('size_t', 'value')] + case HIP_API_ID_hipDeviceSetLimit: + break; +// hipDeviceSetMemPool[('int', 'device'), ('hipMemPool_t', 'mem_pool')] + case HIP_API_ID_hipDeviceSetMemPool: + break; +// hipDeviceSetSharedMemConfig[('hipSharedMemConfig', 'config')] + case HIP_API_ID_hipDeviceSetSharedMemConfig: + break; +// hipDeviceSynchronize[] + case HIP_API_ID_hipDeviceSynchronize: + break; +// hipDeviceTotalMem[('size_t*', 'bytes'), ('hipDevice_t', 'device')] + case HIP_API_ID_hipDeviceTotalMem: + if (data->args.hipDeviceTotalMem.bytes) data->args.hipDeviceTotalMem.bytes__val = *(data->args.hipDeviceTotalMem.bytes); + break; +// hipDriverGetVersion[('int*', 'driverVersion')] + case HIP_API_ID_hipDriverGetVersion: + if (data->args.hipDriverGetVersion.driverVersion) data->args.hipDriverGetVersion.driverVersion__val = *(data->args.hipDriverGetVersion.driverVersion); + break; +// hipDrvGraphAddMemFreeNode[('hipGraphNode_t*', 'phGraphNode'), ('hipGraph_t', 'hGraph'), ('const hipGraphNode_t*', 'dependencies'), ('size_t', 'numDependencies'), ('hipDeviceptr_t', 'dptr')] + case HIP_API_ID_hipDrvGraphAddMemFreeNode: + if (data->args.hipDrvGraphAddMemFreeNode.phGraphNode) data->args.hipDrvGraphAddMemFreeNode.phGraphNode__val = *(data->args.hipDrvGraphAddMemFreeNode.phGraphNode); + if (data->args.hipDrvGraphAddMemFreeNode.dependencies) data->args.hipDrvGraphAddMemFreeNode.dependencies__val = *(data->args.hipDrvGraphAddMemFreeNode.dependencies); + break; +// hipDrvGraphAddMemcpyNode[('hipGraphNode_t*', 'phGraphNode'), ('hipGraph_t', 'hGraph'), ('const hipGraphNode_t*', 'dependencies'), ('size_t', 'numDependencies'), ('const HIP_MEMCPY3D*', 'copyParams'), ('hipCtx_t', 'ctx')] + case HIP_API_ID_hipDrvGraphAddMemcpyNode: + if (data->args.hipDrvGraphAddMemcpyNode.phGraphNode) data->args.hipDrvGraphAddMemcpyNode.phGraphNode__val = *(data->args.hipDrvGraphAddMemcpyNode.phGraphNode); + if (data->args.hipDrvGraphAddMemcpyNode.dependencies) data->args.hipDrvGraphAddMemcpyNode.dependencies__val = *(data->args.hipDrvGraphAddMemcpyNode.dependencies); + if (data->args.hipDrvGraphAddMemcpyNode.copyParams) data->args.hipDrvGraphAddMemcpyNode.copyParams__val = *(data->args.hipDrvGraphAddMemcpyNode.copyParams); + break; +// hipDrvGraphAddMemsetNode[('hipGraphNode_t*', 'phGraphNode'), ('hipGraph_t', 'hGraph'), ('const hipGraphNode_t*', 'dependencies'), ('size_t', 'numDependencies'), ('const hipMemsetParams*', 'memsetParams'), ('hipCtx_t', 'ctx')] + case HIP_API_ID_hipDrvGraphAddMemsetNode: + if (data->args.hipDrvGraphAddMemsetNode.phGraphNode) data->args.hipDrvGraphAddMemsetNode.phGraphNode__val = *(data->args.hipDrvGraphAddMemsetNode.phGraphNode); + if (data->args.hipDrvGraphAddMemsetNode.dependencies) data->args.hipDrvGraphAddMemsetNode.dependencies__val = *(data->args.hipDrvGraphAddMemsetNode.dependencies); + if (data->args.hipDrvGraphAddMemsetNode.memsetParams) data->args.hipDrvGraphAddMemsetNode.memsetParams__val = *(data->args.hipDrvGraphAddMemsetNode.memsetParams); + break; +// hipDrvGraphExecMemcpyNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('const HIP_MEMCPY3D*', 'copyParams'), ('hipCtx_t', 'ctx')] + case HIP_API_ID_hipDrvGraphExecMemcpyNodeSetParams: + if (data->args.hipDrvGraphExecMemcpyNodeSetParams.copyParams) data->args.hipDrvGraphExecMemcpyNodeSetParams.copyParams__val = *(data->args.hipDrvGraphExecMemcpyNodeSetParams.copyParams); + break; +// hipDrvGraphExecMemsetNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('const hipMemsetParams*', 'memsetParams'), ('hipCtx_t', 'ctx')] + case HIP_API_ID_hipDrvGraphExecMemsetNodeSetParams: + if (data->args.hipDrvGraphExecMemsetNodeSetParams.memsetParams) data->args.hipDrvGraphExecMemsetNodeSetParams.memsetParams__val = *(data->args.hipDrvGraphExecMemsetNodeSetParams.memsetParams); + break; +// hipDrvGraphMemcpyNodeGetParams[('hipGraphNode_t', 'hNode'), ('HIP_MEMCPY3D*', 'nodeParams')] + case HIP_API_ID_hipDrvGraphMemcpyNodeGetParams: + if (data->args.hipDrvGraphMemcpyNodeGetParams.nodeParams) data->args.hipDrvGraphMemcpyNodeGetParams.nodeParams__val = *(data->args.hipDrvGraphMemcpyNodeGetParams.nodeParams); + break; +// hipDrvGraphMemcpyNodeSetParams[('hipGraphNode_t', 'hNode'), ('const HIP_MEMCPY3D*', 'nodeParams')] + case HIP_API_ID_hipDrvGraphMemcpyNodeSetParams: + if (data->args.hipDrvGraphMemcpyNodeSetParams.nodeParams) data->args.hipDrvGraphMemcpyNodeSetParams.nodeParams__val = *(data->args.hipDrvGraphMemcpyNodeSetParams.nodeParams); + break; +// hipDrvLaunchKernelEx[('const HIP_LAUNCH_CONFIG*', 'config'), ('hipFunction_t', 'f'), ('void**', 'params'), ('void**', 'extra')] + case HIP_API_ID_hipDrvLaunchKernelEx: + if (data->args.hipDrvLaunchKernelEx.config) data->args.hipDrvLaunchKernelEx.config__val = *(data->args.hipDrvLaunchKernelEx.config); + if (data->args.hipDrvLaunchKernelEx.params) data->args.hipDrvLaunchKernelEx.params__val = *(data->args.hipDrvLaunchKernelEx.params); + if (data->args.hipDrvLaunchKernelEx.extra) data->args.hipDrvLaunchKernelEx.extra__val = *(data->args.hipDrvLaunchKernelEx.extra); + break; +// hipDrvMemcpy2DUnaligned[('const hip_Memcpy2D*', 'pCopy')] + case HIP_API_ID_hipDrvMemcpy2DUnaligned: + if (data->args.hipDrvMemcpy2DUnaligned.pCopy) data->args.hipDrvMemcpy2DUnaligned.pCopy__val = *(data->args.hipDrvMemcpy2DUnaligned.pCopy); + break; +// hipDrvMemcpy3D[('const HIP_MEMCPY3D*', 'pCopy')] + case HIP_API_ID_hipDrvMemcpy3D: + if (data->args.hipDrvMemcpy3D.pCopy) data->args.hipDrvMemcpy3D.pCopy__val = *(data->args.hipDrvMemcpy3D.pCopy); + break; +// hipDrvMemcpy3DAsync[('const HIP_MEMCPY3D*', 'pCopy'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipDrvMemcpy3DAsync: + if (data->args.hipDrvMemcpy3DAsync.pCopy) data->args.hipDrvMemcpy3DAsync.pCopy__val = *(data->args.hipDrvMemcpy3DAsync.pCopy); + break; +// hipDrvPointerGetAttributes[('unsigned int', 'numAttributes'), ('hipPointer_attribute*', 'attributes'), ('void**', 'data'), ('hipDeviceptr_t', 'ptr')] + case HIP_API_ID_hipDrvPointerGetAttributes: + if (data->args.hipDrvPointerGetAttributes.attributes) data->args.hipDrvPointerGetAttributes.attributes__val = *(data->args.hipDrvPointerGetAttributes.attributes); + if (data->args.hipDrvPointerGetAttributes.data) data->args.hipDrvPointerGetAttributes.data__val = *(data->args.hipDrvPointerGetAttributes.data); + break; +// hipEventCreate[('hipEvent_t*', 'event')] + case HIP_API_ID_hipEventCreate: + if (data->args.hipEventCreate.event) data->args.hipEventCreate.event__val = *(data->args.hipEventCreate.event); + break; +// hipEventCreateWithFlags[('hipEvent_t*', 'event'), ('unsigned int', 'flags')] + case HIP_API_ID_hipEventCreateWithFlags: + if (data->args.hipEventCreateWithFlags.event) data->args.hipEventCreateWithFlags.event__val = *(data->args.hipEventCreateWithFlags.event); + break; +// hipEventDestroy[('hipEvent_t', 'event')] + case HIP_API_ID_hipEventDestroy: + break; +// hipEventElapsedTime[('float*', 'ms'), ('hipEvent_t', 'start'), ('hipEvent_t', 'stop')] + case HIP_API_ID_hipEventElapsedTime: + if (data->args.hipEventElapsedTime.ms) data->args.hipEventElapsedTime.ms__val = *(data->args.hipEventElapsedTime.ms); + break; +// hipEventQuery[('hipEvent_t', 'event')] + case HIP_API_ID_hipEventQuery: + break; +// hipEventRecord[('hipEvent_t', 'event'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipEventRecord: + break; +// hipEventRecordWithFlags[('hipEvent_t', 'event'), ('hipStream_t', 'stream'), ('unsigned int', 'flags')] + case HIP_API_ID_hipEventRecordWithFlags: + break; +// hipEventSynchronize[('hipEvent_t', 'event')] + case HIP_API_ID_hipEventSynchronize: + break; +// hipExtGetLastError[] + case HIP_API_ID_hipExtGetLastError: + break; +// hipExtGetLinkTypeAndHopCount[('int', 'device1'), ('int', 'device2'), ('unsigned int*', 'linktype'), ('unsigned int*', 'hopcount')] + case HIP_API_ID_hipExtGetLinkTypeAndHopCount: + if (data->args.hipExtGetLinkTypeAndHopCount.linktype) data->args.hipExtGetLinkTypeAndHopCount.linktype__val = *(data->args.hipExtGetLinkTypeAndHopCount.linktype); + if (data->args.hipExtGetLinkTypeAndHopCount.hopcount) data->args.hipExtGetLinkTypeAndHopCount.hopcount__val = *(data->args.hipExtGetLinkTypeAndHopCount.hopcount); + break; +// hipExtLaunchKernel[('const void*', 'function_address'), ('dim3', 'numBlocks'), ('dim3', 'dimBlocks'), ('void**', 'args'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent'), ('int', 'flags')] + case HIP_API_ID_hipExtLaunchKernel: + if (data->args.hipExtLaunchKernel.args) data->args.hipExtLaunchKernel.args__val = *(data->args.hipExtLaunchKernel.args); + break; +// hipExtLaunchMultiKernelMultiDevice[('hipLaunchParams*', 'launchParamsList'), ('int', 'numDevices'), ('unsigned int', 'flags')] + case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: + if (data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList) data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList__val = *(data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList); + break; +// hipExtMallocWithFlags[('void**', 'ptr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')] + case HIP_API_ID_hipExtMallocWithFlags: + if (data->args.hipExtMallocWithFlags.ptr) data->args.hipExtMallocWithFlags.ptr__val = *(data->args.hipExtMallocWithFlags.ptr); + break; +// hipExtModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'globalWorkSizeX'), ('unsigned int', 'globalWorkSizeY'), ('unsigned int', 'globalWorkSizeZ'), ('unsigned int', 'localWorkSizeX'), ('unsigned int', 'localWorkSizeY'), ('unsigned int', 'localWorkSizeZ'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'hStream'), ('void**', 'kernelParams'), ('void**', 'extra'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent'), ('unsigned int', 'flags')] + case HIP_API_ID_hipExtModuleLaunchKernel: + if (data->args.hipExtModuleLaunchKernel.kernelParams) data->args.hipExtModuleLaunchKernel.kernelParams__val = *(data->args.hipExtModuleLaunchKernel.kernelParams); + if (data->args.hipExtModuleLaunchKernel.extra) data->args.hipExtModuleLaunchKernel.extra__val = *(data->args.hipExtModuleLaunchKernel.extra); + break; +// hipExtStreamCreateWithCUMask[('hipStream_t*', 'stream'), ('unsigned int', 'cuMaskSize'), ('const unsigned int*', 'cuMask')] + case HIP_API_ID_hipExtStreamCreateWithCUMask: + if (data->args.hipExtStreamCreateWithCUMask.stream) data->args.hipExtStreamCreateWithCUMask.stream__val = *(data->args.hipExtStreamCreateWithCUMask.stream); + if (data->args.hipExtStreamCreateWithCUMask.cuMask) data->args.hipExtStreamCreateWithCUMask.cuMask__val = *(data->args.hipExtStreamCreateWithCUMask.cuMask); + break; +// hipExtStreamGetCUMask[('hipStream_t', 'stream'), ('unsigned int', 'cuMaskSize'), ('unsigned int*', 'cuMask')] + case HIP_API_ID_hipExtStreamGetCUMask: + if (data->args.hipExtStreamGetCUMask.cuMask) data->args.hipExtStreamGetCUMask.cuMask__val = *(data->args.hipExtStreamGetCUMask.cuMask); + break; +// hipExternalMemoryGetMappedBuffer[('void**', 'devPtr'), ('hipExternalMemory_t', 'extMem'), ('const hipExternalMemoryBufferDesc*', 'bufferDesc')] + case HIP_API_ID_hipExternalMemoryGetMappedBuffer: + if (data->args.hipExternalMemoryGetMappedBuffer.devPtr) data->args.hipExternalMemoryGetMappedBuffer.devPtr__val = *(data->args.hipExternalMemoryGetMappedBuffer.devPtr); + if (data->args.hipExternalMemoryGetMappedBuffer.bufferDesc) data->args.hipExternalMemoryGetMappedBuffer.bufferDesc__val = *(data->args.hipExternalMemoryGetMappedBuffer.bufferDesc); + break; +// hipExternalMemoryGetMappedMipmappedArray[('hipMipmappedArray_t*', 'mipmap'), ('hipExternalMemory_t', 'extMem'), ('const hipExternalMemoryMipmappedArrayDesc*', 'mipmapDesc')] + case HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray: + if (data->args.hipExternalMemoryGetMappedMipmappedArray.mipmap) data->args.hipExternalMemoryGetMappedMipmappedArray.mipmap__val = *(data->args.hipExternalMemoryGetMappedMipmappedArray.mipmap); + if (data->args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc) data->args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc__val = *(data->args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc); + break; +// hipFree[('void*', 'ptr')] + case HIP_API_ID_hipFree: + break; +// hipFreeArray[('hipArray_t', 'array')] + case HIP_API_ID_hipFreeArray: + break; +// hipFreeAsync[('void*', 'dev_ptr'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipFreeAsync: + break; +// hipFreeHost[('void*', 'ptr')] + case HIP_API_ID_hipFreeHost: + break; +// hipFreeMipmappedArray[('hipMipmappedArray_t', 'mipmappedArray')] + case HIP_API_ID_hipFreeMipmappedArray: + break; +// hipFuncGetAttribute[('int*', 'value'), ('hipFunction_attribute', 'attrib'), ('hipFunction_t', 'hfunc')] + case HIP_API_ID_hipFuncGetAttribute: + if (data->args.hipFuncGetAttribute.value) data->args.hipFuncGetAttribute.value__val = *(data->args.hipFuncGetAttribute.value); + break; +// hipFuncGetAttributes[('hipFuncAttributes*', 'attr'), ('const void*', 'func')] + case HIP_API_ID_hipFuncGetAttributes: + if (data->args.hipFuncGetAttributes.attr) data->args.hipFuncGetAttributes.attr__val = *(data->args.hipFuncGetAttributes.attr); + break; +// hipFuncSetAttribute[('const void*', 'func'), ('hipFuncAttribute', 'attr'), ('int', 'value')] + case HIP_API_ID_hipFuncSetAttribute: + break; +// hipFuncSetCacheConfig[('const void*', 'func'), ('hipFuncCache_t', 'config')] + case HIP_API_ID_hipFuncSetCacheConfig: + break; +// hipFuncSetSharedMemConfig[('const void*', 'func'), ('hipSharedMemConfig', 'config')] + case HIP_API_ID_hipFuncSetSharedMemConfig: + break; +// hipGLGetDevices[('unsigned int*', 'pHipDeviceCount'), ('int*', 'pHipDevices'), ('unsigned int', 'hipDeviceCount'), ('hipGLDeviceList', 'deviceList')] + case HIP_API_ID_hipGLGetDevices: + if (data->args.hipGLGetDevices.pHipDeviceCount) data->args.hipGLGetDevices.pHipDeviceCount__val = *(data->args.hipGLGetDevices.pHipDeviceCount); + if (data->args.hipGLGetDevices.pHipDevices) data->args.hipGLGetDevices.pHipDevices__val = *(data->args.hipGLGetDevices.pHipDevices); + break; +// hipGetChannelDesc[('hipChannelFormatDesc*', 'desc'), ('hipArray_const_t', 'array')] + case HIP_API_ID_hipGetChannelDesc: + if (data->args.hipGetChannelDesc.desc) data->args.hipGetChannelDesc.desc__val = *(data->args.hipGetChannelDesc.desc); + break; +// hipGetDevice[('int*', 'deviceId')] + case HIP_API_ID_hipGetDevice: + if (data->args.hipGetDevice.deviceId) data->args.hipGetDevice.deviceId__val = *(data->args.hipGetDevice.deviceId); + break; +// hipGetDeviceCount[('int*', 'count')] + case HIP_API_ID_hipGetDeviceCount: + if (data->args.hipGetDeviceCount.count) data->args.hipGetDeviceCount.count__val = *(data->args.hipGetDeviceCount.count); + break; +// hipGetDeviceFlags[('unsigned int*', 'flags')] + case HIP_API_ID_hipGetDeviceFlags: + if (data->args.hipGetDeviceFlags.flags) data->args.hipGetDeviceFlags.flags__val = *(data->args.hipGetDeviceFlags.flags); + break; +// hipGetDevicePropertiesR0000[('hipDeviceProp_tR0000*', 'prop'), ('int', 'device')] + case HIP_API_ID_hipGetDevicePropertiesR0000: + if (data->args.hipGetDevicePropertiesR0000.prop) data->args.hipGetDevicePropertiesR0000.prop__val = *(data->args.hipGetDevicePropertiesR0000.prop); + break; +// hipGetDevicePropertiesR0600[('hipDeviceProp_tR0600*', 'prop'), ('int', 'deviceId')] + case HIP_API_ID_hipGetDevicePropertiesR0600: + if (data->args.hipGetDevicePropertiesR0600.prop) data->args.hipGetDevicePropertiesR0600.prop__val = *(data->args.hipGetDevicePropertiesR0600.prop); + break; +// hipGetDriverEntryPoint[('const char*', 'symbol'), ('void**', 'funcPtr'), ('unsigned long long', 'flags'), ('hipDriverEntryPointQueryResult*', 'driverStatus')] + case HIP_API_ID_hipGetDriverEntryPoint: + if (data->args.hipGetDriverEntryPoint.symbol) data->args.hipGetDriverEntryPoint.symbol__val = *(data->args.hipGetDriverEntryPoint.symbol); + if (data->args.hipGetDriverEntryPoint.funcPtr) data->args.hipGetDriverEntryPoint.funcPtr__val = *(data->args.hipGetDriverEntryPoint.funcPtr); + if (data->args.hipGetDriverEntryPoint.driverStatus) data->args.hipGetDriverEntryPoint.driverStatus__val = *(data->args.hipGetDriverEntryPoint.driverStatus); + break; +// hipGetFuncBySymbol[('hipFunction_t*', 'functionPtr'), ('const void*', 'symbolPtr')] + case HIP_API_ID_hipGetFuncBySymbol: + if (data->args.hipGetFuncBySymbol.functionPtr) data->args.hipGetFuncBySymbol.functionPtr__val = *(data->args.hipGetFuncBySymbol.functionPtr); + break; +// hipGetLastError[] + case HIP_API_ID_hipGetLastError: + break; +// hipGetMipmappedArrayLevel[('hipArray_t*', 'levelArray'), ('hipMipmappedArray_const_t', 'mipmappedArray'), ('unsigned int', 'level')] + case HIP_API_ID_hipGetMipmappedArrayLevel: + if (data->args.hipGetMipmappedArrayLevel.levelArray) data->args.hipGetMipmappedArrayLevel.levelArray__val = *(data->args.hipGetMipmappedArrayLevel.levelArray); + break; +// hipGetProcAddress[('const char*', 'symbol'), ('void**', 'pfn'), ('int', 'hipVersion'), ('uint64_t', 'flags'), ('hipDriverProcAddressQueryResult*', 'symbolStatus')] + case HIP_API_ID_hipGetProcAddress: + if (data->args.hipGetProcAddress.symbol) data->args.hipGetProcAddress.symbol__val = *(data->args.hipGetProcAddress.symbol); + if (data->args.hipGetProcAddress.pfn) data->args.hipGetProcAddress.pfn__val = *(data->args.hipGetProcAddress.pfn); + if (data->args.hipGetProcAddress.symbolStatus) data->args.hipGetProcAddress.symbolStatus__val = *(data->args.hipGetProcAddress.symbolStatus); + break; +// hipGetSymbolAddress[('void**', 'devPtr'), ('const void*', 'symbol')] + case HIP_API_ID_hipGetSymbolAddress: + if (data->args.hipGetSymbolAddress.devPtr) data->args.hipGetSymbolAddress.devPtr__val = *(data->args.hipGetSymbolAddress.devPtr); + break; +// hipGetSymbolSize[('size_t*', 'size'), ('const void*', 'symbol')] + case HIP_API_ID_hipGetSymbolSize: + if (data->args.hipGetSymbolSize.size) data->args.hipGetSymbolSize.size__val = *(data->args.hipGetSymbolSize.size); + break; +// hipGraphAddBatchMemOpNode[('hipGraphNode_t*', 'phGraphNode'), ('hipGraph_t', 'hGraph'), ('const hipGraphNode_t*', 'dependencies'), ('size_t', 'numDependencies'), ('const hipBatchMemOpNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphAddBatchMemOpNode: + if (data->args.hipGraphAddBatchMemOpNode.phGraphNode) data->args.hipGraphAddBatchMemOpNode.phGraphNode__val = *(data->args.hipGraphAddBatchMemOpNode.phGraphNode); + if (data->args.hipGraphAddBatchMemOpNode.dependencies) data->args.hipGraphAddBatchMemOpNode.dependencies__val = *(data->args.hipGraphAddBatchMemOpNode.dependencies); + if (data->args.hipGraphAddBatchMemOpNode.nodeParams) data->args.hipGraphAddBatchMemOpNode.nodeParams__val = *(data->args.hipGraphAddBatchMemOpNode.nodeParams); + break; +// hipGraphAddChildGraphNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipGraph_t', 'childGraph')] + case HIP_API_ID_hipGraphAddChildGraphNode: + if (data->args.hipGraphAddChildGraphNode.pGraphNode) data->args.hipGraphAddChildGraphNode.pGraphNode__val = *(data->args.hipGraphAddChildGraphNode.pGraphNode); + if (data->args.hipGraphAddChildGraphNode.pDependencies) data->args.hipGraphAddChildGraphNode.pDependencies__val = *(data->args.hipGraphAddChildGraphNode.pDependencies); + break; +// hipGraphAddDependencies[('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'from'), ('const hipGraphNode_t*', 'to'), ('size_t', 'numDependencies')] + case HIP_API_ID_hipGraphAddDependencies: + if (data->args.hipGraphAddDependencies.from) data->args.hipGraphAddDependencies.from__val = *(data->args.hipGraphAddDependencies.from); + if (data->args.hipGraphAddDependencies.to) data->args.hipGraphAddDependencies.to__val = *(data->args.hipGraphAddDependencies.to); + break; +// hipGraphAddEmptyNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies')] + case HIP_API_ID_hipGraphAddEmptyNode: + if (data->args.hipGraphAddEmptyNode.pGraphNode) data->args.hipGraphAddEmptyNode.pGraphNode__val = *(data->args.hipGraphAddEmptyNode.pGraphNode); + if (data->args.hipGraphAddEmptyNode.pDependencies) data->args.hipGraphAddEmptyNode.pDependencies__val = *(data->args.hipGraphAddEmptyNode.pDependencies); + break; +// hipGraphAddEventRecordNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipEvent_t', 'event')] + case HIP_API_ID_hipGraphAddEventRecordNode: + if (data->args.hipGraphAddEventRecordNode.pGraphNode) data->args.hipGraphAddEventRecordNode.pGraphNode__val = *(data->args.hipGraphAddEventRecordNode.pGraphNode); + if (data->args.hipGraphAddEventRecordNode.pDependencies) data->args.hipGraphAddEventRecordNode.pDependencies__val = *(data->args.hipGraphAddEventRecordNode.pDependencies); + break; +// hipGraphAddEventWaitNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipEvent_t', 'event')] + case HIP_API_ID_hipGraphAddEventWaitNode: + if (data->args.hipGraphAddEventWaitNode.pGraphNode) data->args.hipGraphAddEventWaitNode.pGraphNode__val = *(data->args.hipGraphAddEventWaitNode.pGraphNode); + if (data->args.hipGraphAddEventWaitNode.pDependencies) data->args.hipGraphAddEventWaitNode.pDependencies__val = *(data->args.hipGraphAddEventWaitNode.pDependencies); + break; +// hipGraphAddExternalSemaphoresSignalNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipExternalSemaphoreSignalNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphAddExternalSemaphoresSignalNode: + if (data->args.hipGraphAddExternalSemaphoresSignalNode.pGraphNode) data->args.hipGraphAddExternalSemaphoresSignalNode.pGraphNode__val = *(data->args.hipGraphAddExternalSemaphoresSignalNode.pGraphNode); + if (data->args.hipGraphAddExternalSemaphoresSignalNode.pDependencies) data->args.hipGraphAddExternalSemaphoresSignalNode.pDependencies__val = *(data->args.hipGraphAddExternalSemaphoresSignalNode.pDependencies); + if (data->args.hipGraphAddExternalSemaphoresSignalNode.nodeParams) data->args.hipGraphAddExternalSemaphoresSignalNode.nodeParams__val = *(data->args.hipGraphAddExternalSemaphoresSignalNode.nodeParams); + break; +// hipGraphAddExternalSemaphoresWaitNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipExternalSemaphoreWaitNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphAddExternalSemaphoresWaitNode: + if (data->args.hipGraphAddExternalSemaphoresWaitNode.pGraphNode) data->args.hipGraphAddExternalSemaphoresWaitNode.pGraphNode__val = *(data->args.hipGraphAddExternalSemaphoresWaitNode.pGraphNode); + if (data->args.hipGraphAddExternalSemaphoresWaitNode.pDependencies) data->args.hipGraphAddExternalSemaphoresWaitNode.pDependencies__val = *(data->args.hipGraphAddExternalSemaphoresWaitNode.pDependencies); + if (data->args.hipGraphAddExternalSemaphoresWaitNode.nodeParams) data->args.hipGraphAddExternalSemaphoresWaitNode.nodeParams__val = *(data->args.hipGraphAddExternalSemaphoresWaitNode.nodeParams); + break; +// hipGraphAddHostNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipHostNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphAddHostNode: + if (data->args.hipGraphAddHostNode.pGraphNode) data->args.hipGraphAddHostNode.pGraphNode__val = *(data->args.hipGraphAddHostNode.pGraphNode); + if (data->args.hipGraphAddHostNode.pDependencies) data->args.hipGraphAddHostNode.pDependencies__val = *(data->args.hipGraphAddHostNode.pDependencies); + if (data->args.hipGraphAddHostNode.pNodeParams) data->args.hipGraphAddHostNode.pNodeParams__val = *(data->args.hipGraphAddHostNode.pNodeParams); + break; +// hipGraphAddKernelNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipKernelNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphAddKernelNode: + if (data->args.hipGraphAddKernelNode.pGraphNode) data->args.hipGraphAddKernelNode.pGraphNode__val = *(data->args.hipGraphAddKernelNode.pGraphNode); + if (data->args.hipGraphAddKernelNode.pDependencies) data->args.hipGraphAddKernelNode.pDependencies__val = *(data->args.hipGraphAddKernelNode.pDependencies); + if (data->args.hipGraphAddKernelNode.pNodeParams) data->args.hipGraphAddKernelNode.pNodeParams__val = *(data->args.hipGraphAddKernelNode.pNodeParams); + break; +// hipGraphAddMemAllocNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipMemAllocNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphAddMemAllocNode: + if (data->args.hipGraphAddMemAllocNode.pGraphNode) data->args.hipGraphAddMemAllocNode.pGraphNode__val = *(data->args.hipGraphAddMemAllocNode.pGraphNode); + if (data->args.hipGraphAddMemAllocNode.pDependencies) data->args.hipGraphAddMemAllocNode.pDependencies__val = *(data->args.hipGraphAddMemAllocNode.pDependencies); + if (data->args.hipGraphAddMemAllocNode.pNodeParams) data->args.hipGraphAddMemAllocNode.pNodeParams__val = *(data->args.hipGraphAddMemAllocNode.pNodeParams); + break; +// hipGraphAddMemFreeNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('void*', 'dev_ptr')] + case HIP_API_ID_hipGraphAddMemFreeNode: + if (data->args.hipGraphAddMemFreeNode.pGraphNode) data->args.hipGraphAddMemFreeNode.pGraphNode__val = *(data->args.hipGraphAddMemFreeNode.pGraphNode); + if (data->args.hipGraphAddMemFreeNode.pDependencies) data->args.hipGraphAddMemFreeNode.pDependencies__val = *(data->args.hipGraphAddMemFreeNode.pDependencies); + break; +// hipGraphAddMemcpyNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipMemcpy3DParms*', 'pCopyParams')] + case HIP_API_ID_hipGraphAddMemcpyNode: + if (data->args.hipGraphAddMemcpyNode.pGraphNode) data->args.hipGraphAddMemcpyNode.pGraphNode__val = *(data->args.hipGraphAddMemcpyNode.pGraphNode); + if (data->args.hipGraphAddMemcpyNode.pDependencies) data->args.hipGraphAddMemcpyNode.pDependencies__val = *(data->args.hipGraphAddMemcpyNode.pDependencies); + if (data->args.hipGraphAddMemcpyNode.pCopyParams) data->args.hipGraphAddMemcpyNode.pCopyParams__val = *(data->args.hipGraphAddMemcpyNode.pCopyParams); + break; +// hipGraphAddMemcpyNode1D[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('void*', 'dst'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphAddMemcpyNode1D: + if (data->args.hipGraphAddMemcpyNode1D.pGraphNode) data->args.hipGraphAddMemcpyNode1D.pGraphNode__val = *(data->args.hipGraphAddMemcpyNode1D.pGraphNode); + if (data->args.hipGraphAddMemcpyNode1D.pDependencies) data->args.hipGraphAddMemcpyNode1D.pDependencies__val = *(data->args.hipGraphAddMemcpyNode1D.pDependencies); + break; +// hipGraphAddMemcpyNodeFromSymbol[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphAddMemcpyNodeFromSymbol: + if (data->args.hipGraphAddMemcpyNodeFromSymbol.pGraphNode) data->args.hipGraphAddMemcpyNodeFromSymbol.pGraphNode__val = *(data->args.hipGraphAddMemcpyNodeFromSymbol.pGraphNode); + if (data->args.hipGraphAddMemcpyNodeFromSymbol.pDependencies) data->args.hipGraphAddMemcpyNodeFromSymbol.pDependencies__val = *(data->args.hipGraphAddMemcpyNodeFromSymbol.pDependencies); + break; +// hipGraphAddMemcpyNodeToSymbol[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphAddMemcpyNodeToSymbol: + if (data->args.hipGraphAddMemcpyNodeToSymbol.pGraphNode) data->args.hipGraphAddMemcpyNodeToSymbol.pGraphNode__val = *(data->args.hipGraphAddMemcpyNodeToSymbol.pGraphNode); + if (data->args.hipGraphAddMemcpyNodeToSymbol.pDependencies) data->args.hipGraphAddMemcpyNodeToSymbol.pDependencies__val = *(data->args.hipGraphAddMemcpyNodeToSymbol.pDependencies); + break; +// hipGraphAddMemsetNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipMemsetParams*', 'pMemsetParams')] + case HIP_API_ID_hipGraphAddMemsetNode: + if (data->args.hipGraphAddMemsetNode.pGraphNode) data->args.hipGraphAddMemsetNode.pGraphNode__val = *(data->args.hipGraphAddMemsetNode.pGraphNode); + if (data->args.hipGraphAddMemsetNode.pDependencies) data->args.hipGraphAddMemsetNode.pDependencies__val = *(data->args.hipGraphAddMemsetNode.pDependencies); + if (data->args.hipGraphAddMemsetNode.pMemsetParams) data->args.hipGraphAddMemsetNode.pMemsetParams__val = *(data->args.hipGraphAddMemsetNode.pMemsetParams); + break; +// hipGraphAddNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipGraphNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphAddNode: + if (data->args.hipGraphAddNode.pGraphNode) data->args.hipGraphAddNode.pGraphNode__val = *(data->args.hipGraphAddNode.pGraphNode); + if (data->args.hipGraphAddNode.pDependencies) data->args.hipGraphAddNode.pDependencies__val = *(data->args.hipGraphAddNode.pDependencies); + if (data->args.hipGraphAddNode.nodeParams) data->args.hipGraphAddNode.nodeParams__val = *(data->args.hipGraphAddNode.nodeParams); + break; +// hipGraphBatchMemOpNodeGetParams[('hipGraphNode_t', 'hNode'), ('hipBatchMemOpNodeParams*', 'nodeParams_out')] + case HIP_API_ID_hipGraphBatchMemOpNodeGetParams: + if (data->args.hipGraphBatchMemOpNodeGetParams.nodeParams_out) data->args.hipGraphBatchMemOpNodeGetParams.nodeParams_out__val = *(data->args.hipGraphBatchMemOpNodeGetParams.nodeParams_out); + break; +// hipGraphBatchMemOpNodeSetParams[('hipGraphNode_t', 'hNode'), ('hipBatchMemOpNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphBatchMemOpNodeSetParams: + if (data->args.hipGraphBatchMemOpNodeSetParams.nodeParams) data->args.hipGraphBatchMemOpNodeSetParams.nodeParams__val = *(data->args.hipGraphBatchMemOpNodeSetParams.nodeParams); + break; +// hipGraphChildGraphNodeGetGraph[('hipGraphNode_t', 'node'), ('hipGraph_t*', 'pGraph')] + case HIP_API_ID_hipGraphChildGraphNodeGetGraph: + if (data->args.hipGraphChildGraphNodeGetGraph.pGraph) data->args.hipGraphChildGraphNodeGetGraph.pGraph__val = *(data->args.hipGraphChildGraphNodeGetGraph.pGraph); + break; +// hipGraphClone[('hipGraph_t*', 'pGraphClone'), ('hipGraph_t', 'originalGraph')] + case HIP_API_ID_hipGraphClone: + if (data->args.hipGraphClone.pGraphClone) data->args.hipGraphClone.pGraphClone__val = *(data->args.hipGraphClone.pGraphClone); + break; +// hipGraphCreate[('hipGraph_t*', 'pGraph'), ('unsigned int', 'flags')] + case HIP_API_ID_hipGraphCreate: + if (data->args.hipGraphCreate.pGraph) data->args.hipGraphCreate.pGraph__val = *(data->args.hipGraphCreate.pGraph); + break; +// hipGraphDebugDotPrint[('hipGraph_t', 'graph'), ('const char*', 'path'), ('unsigned int', 'flags')] + case HIP_API_ID_hipGraphDebugDotPrint: + if (data->args.hipGraphDebugDotPrint.path) data->args.hipGraphDebugDotPrint.path__val = *(data->args.hipGraphDebugDotPrint.path); + break; +// hipGraphDestroy[('hipGraph_t', 'graph')] + case HIP_API_ID_hipGraphDestroy: + break; +// hipGraphDestroyNode[('hipGraphNode_t', 'node')] + case HIP_API_ID_hipGraphDestroyNode: + break; +// hipGraphEventRecordNodeGetEvent[('hipGraphNode_t', 'node'), ('hipEvent_t*', 'event_out')] + case HIP_API_ID_hipGraphEventRecordNodeGetEvent: + if (data->args.hipGraphEventRecordNodeGetEvent.event_out) data->args.hipGraphEventRecordNodeGetEvent.event_out__val = *(data->args.hipGraphEventRecordNodeGetEvent.event_out); + break; +// hipGraphEventRecordNodeSetEvent[('hipGraphNode_t', 'node'), ('hipEvent_t', 'event')] + case HIP_API_ID_hipGraphEventRecordNodeSetEvent: + break; +// hipGraphEventWaitNodeGetEvent[('hipGraphNode_t', 'node'), ('hipEvent_t*', 'event_out')] + case HIP_API_ID_hipGraphEventWaitNodeGetEvent: + if (data->args.hipGraphEventWaitNodeGetEvent.event_out) data->args.hipGraphEventWaitNodeGetEvent.event_out__val = *(data->args.hipGraphEventWaitNodeGetEvent.event_out); + break; +// hipGraphEventWaitNodeSetEvent[('hipGraphNode_t', 'node'), ('hipEvent_t', 'event')] + case HIP_API_ID_hipGraphEventWaitNodeSetEvent: + break; +// hipGraphExecBatchMemOpNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('const hipBatchMemOpNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphExecBatchMemOpNodeSetParams: + if (data->args.hipGraphExecBatchMemOpNodeSetParams.nodeParams) data->args.hipGraphExecBatchMemOpNodeSetParams.nodeParams__val = *(data->args.hipGraphExecBatchMemOpNodeSetParams.nodeParams); + break; +// hipGraphExecChildGraphNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('hipGraph_t', 'childGraph')] + case HIP_API_ID_hipGraphExecChildGraphNodeSetParams: + break; +// hipGraphExecDestroy[('hipGraphExec_t', 'graphExec')] + case HIP_API_ID_hipGraphExecDestroy: + break; +// hipGraphExecEventRecordNodeSetEvent[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('hipEvent_t', 'event')] + case HIP_API_ID_hipGraphExecEventRecordNodeSetEvent: + break; +// hipGraphExecEventWaitNodeSetEvent[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('hipEvent_t', 'event')] + case HIP_API_ID_hipGraphExecEventWaitNodeSetEvent: + break; +// hipGraphExecExternalSemaphoresSignalNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('const hipExternalSemaphoreSignalNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphExecExternalSemaphoresSignalNodeSetParams: + if (data->args.hipGraphExecExternalSemaphoresSignalNodeSetParams.nodeParams) data->args.hipGraphExecExternalSemaphoresSignalNodeSetParams.nodeParams__val = *(data->args.hipGraphExecExternalSemaphoresSignalNodeSetParams.nodeParams); + break; +// hipGraphExecExternalSemaphoresWaitNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('const hipExternalSemaphoreWaitNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphExecExternalSemaphoresWaitNodeSetParams: + if (data->args.hipGraphExecExternalSemaphoresWaitNodeSetParams.nodeParams) data->args.hipGraphExecExternalSemaphoresWaitNodeSetParams.nodeParams__val = *(data->args.hipGraphExecExternalSemaphoresWaitNodeSetParams.nodeParams); + break; +// hipGraphExecGetFlags[('hipGraphExec_t', 'graphExec'), ('unsigned long long*', 'flags')] + case HIP_API_ID_hipGraphExecGetFlags: + if (data->args.hipGraphExecGetFlags.flags) data->args.hipGraphExecGetFlags.flags__val = *(data->args.hipGraphExecGetFlags.flags); + break; +// hipGraphExecHostNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const hipHostNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphExecHostNodeSetParams: + if (data->args.hipGraphExecHostNodeSetParams.pNodeParams) data->args.hipGraphExecHostNodeSetParams.pNodeParams__val = *(data->args.hipGraphExecHostNodeSetParams.pNodeParams); + break; +// hipGraphExecKernelNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const hipKernelNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphExecKernelNodeSetParams: + if (data->args.hipGraphExecKernelNodeSetParams.pNodeParams) data->args.hipGraphExecKernelNodeSetParams.pNodeParams__val = *(data->args.hipGraphExecKernelNodeSetParams.pNodeParams); + break; +// hipGraphExecMemcpyNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('hipMemcpy3DParms*', 'pNodeParams')] + case HIP_API_ID_hipGraphExecMemcpyNodeSetParams: + if (data->args.hipGraphExecMemcpyNodeSetParams.pNodeParams) data->args.hipGraphExecMemcpyNodeSetParams.pNodeParams__val = *(data->args.hipGraphExecMemcpyNodeSetParams.pNodeParams); + break; +// hipGraphExecMemcpyNodeSetParams1D[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('void*', 'dst'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphExecMemcpyNodeSetParams1D: + break; +// hipGraphExecMemcpyNodeSetParamsFromSymbol[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphExecMemcpyNodeSetParamsFromSymbol: + break; +// hipGraphExecMemcpyNodeSetParamsToSymbol[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphExecMemcpyNodeSetParamsToSymbol: + break; +// hipGraphExecMemsetNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const hipMemsetParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphExecMemsetNodeSetParams: + if (data->args.hipGraphExecMemsetNodeSetParams.pNodeParams) data->args.hipGraphExecMemsetNodeSetParams.pNodeParams__val = *(data->args.hipGraphExecMemsetNodeSetParams.pNodeParams); + break; +// hipGraphExecNodeSetParams[('hipGraphExec_t', 'graphExec'), ('hipGraphNode_t', 'node'), ('hipGraphNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphExecNodeSetParams: + if (data->args.hipGraphExecNodeSetParams.nodeParams) data->args.hipGraphExecNodeSetParams.nodeParams__val = *(data->args.hipGraphExecNodeSetParams.nodeParams); + break; +// hipGraphExecUpdate[('hipGraphExec_t', 'hGraphExec'), ('hipGraph_t', 'hGraph'), ('hipGraphNode_t*', 'hErrorNode_out'), ('hipGraphExecUpdateResult*', 'updateResult_out')] + case HIP_API_ID_hipGraphExecUpdate: + if (data->args.hipGraphExecUpdate.hErrorNode_out) data->args.hipGraphExecUpdate.hErrorNode_out__val = *(data->args.hipGraphExecUpdate.hErrorNode_out); + if (data->args.hipGraphExecUpdate.updateResult_out) data->args.hipGraphExecUpdate.updateResult_out__val = *(data->args.hipGraphExecUpdate.updateResult_out); + break; +// hipGraphExternalSemaphoresSignalNodeGetParams[('hipGraphNode_t', 'hNode'), ('hipExternalSemaphoreSignalNodeParams*', 'params_out')] + case HIP_API_ID_hipGraphExternalSemaphoresSignalNodeGetParams: + if (data->args.hipGraphExternalSemaphoresSignalNodeGetParams.params_out) data->args.hipGraphExternalSemaphoresSignalNodeGetParams.params_out__val = *(data->args.hipGraphExternalSemaphoresSignalNodeGetParams.params_out); + break; +// hipGraphExternalSemaphoresSignalNodeSetParams[('hipGraphNode_t', 'hNode'), ('const hipExternalSemaphoreSignalNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphExternalSemaphoresSignalNodeSetParams: + if (data->args.hipGraphExternalSemaphoresSignalNodeSetParams.nodeParams) data->args.hipGraphExternalSemaphoresSignalNodeSetParams.nodeParams__val = *(data->args.hipGraphExternalSemaphoresSignalNodeSetParams.nodeParams); + break; +// hipGraphExternalSemaphoresWaitNodeGetParams[('hipGraphNode_t', 'hNode'), ('hipExternalSemaphoreWaitNodeParams*', 'params_out')] + case HIP_API_ID_hipGraphExternalSemaphoresWaitNodeGetParams: + if (data->args.hipGraphExternalSemaphoresWaitNodeGetParams.params_out) data->args.hipGraphExternalSemaphoresWaitNodeGetParams.params_out__val = *(data->args.hipGraphExternalSemaphoresWaitNodeGetParams.params_out); + break; +// hipGraphExternalSemaphoresWaitNodeSetParams[('hipGraphNode_t', 'hNode'), ('const hipExternalSemaphoreWaitNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphExternalSemaphoresWaitNodeSetParams: + if (data->args.hipGraphExternalSemaphoresWaitNodeSetParams.nodeParams) data->args.hipGraphExternalSemaphoresWaitNodeSetParams.nodeParams__val = *(data->args.hipGraphExternalSemaphoresWaitNodeSetParams.nodeParams); + break; +// hipGraphGetEdges[('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'from'), ('hipGraphNode_t*', 'to'), ('size_t*', 'numEdges')] + case HIP_API_ID_hipGraphGetEdges: + if (data->args.hipGraphGetEdges.from) data->args.hipGraphGetEdges.from__val = *(data->args.hipGraphGetEdges.from); + if (data->args.hipGraphGetEdges.to) data->args.hipGraphGetEdges.to__val = *(data->args.hipGraphGetEdges.to); + if (data->args.hipGraphGetEdges.numEdges) data->args.hipGraphGetEdges.numEdges__val = *(data->args.hipGraphGetEdges.numEdges); + break; +// hipGraphGetNodes[('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'nodes'), ('size_t*', 'numNodes')] + case HIP_API_ID_hipGraphGetNodes: + if (data->args.hipGraphGetNodes.nodes) data->args.hipGraphGetNodes.nodes__val = *(data->args.hipGraphGetNodes.nodes); + if (data->args.hipGraphGetNodes.numNodes) data->args.hipGraphGetNodes.numNodes__val = *(data->args.hipGraphGetNodes.numNodes); + break; +// hipGraphGetRootNodes[('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'pRootNodes'), ('size_t*', 'pNumRootNodes')] + case HIP_API_ID_hipGraphGetRootNodes: + if (data->args.hipGraphGetRootNodes.pRootNodes) data->args.hipGraphGetRootNodes.pRootNodes__val = *(data->args.hipGraphGetRootNodes.pRootNodes); + if (data->args.hipGraphGetRootNodes.pNumRootNodes) data->args.hipGraphGetRootNodes.pNumRootNodes__val = *(data->args.hipGraphGetRootNodes.pNumRootNodes); + break; +// hipGraphHostNodeGetParams[('hipGraphNode_t', 'node'), ('hipHostNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphHostNodeGetParams: + if (data->args.hipGraphHostNodeGetParams.pNodeParams) data->args.hipGraphHostNodeGetParams.pNodeParams__val = *(data->args.hipGraphHostNodeGetParams.pNodeParams); + break; +// hipGraphHostNodeSetParams[('hipGraphNode_t', 'node'), ('const hipHostNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphHostNodeSetParams: + if (data->args.hipGraphHostNodeSetParams.pNodeParams) data->args.hipGraphHostNodeSetParams.pNodeParams__val = *(data->args.hipGraphHostNodeSetParams.pNodeParams); + break; +// hipGraphInstantiate[('hipGraphExec_t*', 'pGraphExec'), ('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'pErrorNode'), ('char*', 'pLogBuffer'), ('size_t', 'bufferSize')] + case HIP_API_ID_hipGraphInstantiate: + if (data->args.hipGraphInstantiate.pGraphExec) data->args.hipGraphInstantiate.pGraphExec__val = *(data->args.hipGraphInstantiate.pGraphExec); + if (data->args.hipGraphInstantiate.pErrorNode) data->args.hipGraphInstantiate.pErrorNode__val = *(data->args.hipGraphInstantiate.pErrorNode); + data->args.hipGraphInstantiate.pLogBuffer = (data->args.hipGraphInstantiate.pLogBuffer) ? strdup(data->args.hipGraphInstantiate.pLogBuffer) : NULL; + break; +// hipGraphInstantiateWithFlags[('hipGraphExec_t*', 'pGraphExec'), ('hipGraph_t', 'graph'), ('unsigned long long', 'flags')] + case HIP_API_ID_hipGraphInstantiateWithFlags: + if (data->args.hipGraphInstantiateWithFlags.pGraphExec) data->args.hipGraphInstantiateWithFlags.pGraphExec__val = *(data->args.hipGraphInstantiateWithFlags.pGraphExec); + break; +// hipGraphInstantiateWithParams[('hipGraphExec_t*', 'pGraphExec'), ('hipGraph_t', 'graph'), ('hipGraphInstantiateParams*', 'instantiateParams')] + case HIP_API_ID_hipGraphInstantiateWithParams: + if (data->args.hipGraphInstantiateWithParams.pGraphExec) data->args.hipGraphInstantiateWithParams.pGraphExec__val = *(data->args.hipGraphInstantiateWithParams.pGraphExec); + if (data->args.hipGraphInstantiateWithParams.instantiateParams) data->args.hipGraphInstantiateWithParams.instantiateParams__val = *(data->args.hipGraphInstantiateWithParams.instantiateParams); + break; +// hipGraphKernelNodeCopyAttributes[('hipGraphNode_t', 'hSrc'), ('hipGraphNode_t', 'hDst')] + case HIP_API_ID_hipGraphKernelNodeCopyAttributes: + break; +// hipGraphKernelNodeGetAttribute[('hipGraphNode_t', 'hNode'), ('hipLaunchAttributeID', 'attr'), ('hipLaunchAttributeValue*', 'value')] + case HIP_API_ID_hipGraphKernelNodeGetAttribute: + if (data->args.hipGraphKernelNodeGetAttribute.value) data->args.hipGraphKernelNodeGetAttribute.value__val = *(data->args.hipGraphKernelNodeGetAttribute.value); + break; +// hipGraphKernelNodeGetParams[('hipGraphNode_t', 'node'), ('hipKernelNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphKernelNodeGetParams: + if (data->args.hipGraphKernelNodeGetParams.pNodeParams) data->args.hipGraphKernelNodeGetParams.pNodeParams__val = *(data->args.hipGraphKernelNodeGetParams.pNodeParams); + break; +// hipGraphKernelNodeSetAttribute[('hipGraphNode_t', 'hNode'), ('hipLaunchAttributeID', 'attr'), ('const hipLaunchAttributeValue*', 'value')] + case HIP_API_ID_hipGraphKernelNodeSetAttribute: + if (data->args.hipGraphKernelNodeSetAttribute.value) data->args.hipGraphKernelNodeSetAttribute.value__val = *(data->args.hipGraphKernelNodeSetAttribute.value); + break; +// hipGraphKernelNodeSetParams[('hipGraphNode_t', 'node'), ('const hipKernelNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphKernelNodeSetParams: + if (data->args.hipGraphKernelNodeSetParams.pNodeParams) data->args.hipGraphKernelNodeSetParams.pNodeParams__val = *(data->args.hipGraphKernelNodeSetParams.pNodeParams); + break; +// hipGraphLaunch[('hipGraphExec_t', 'graphExec'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipGraphLaunch: + break; +// hipGraphMemAllocNodeGetParams[('hipGraphNode_t', 'node'), ('hipMemAllocNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphMemAllocNodeGetParams: + if (data->args.hipGraphMemAllocNodeGetParams.pNodeParams) data->args.hipGraphMemAllocNodeGetParams.pNodeParams__val = *(data->args.hipGraphMemAllocNodeGetParams.pNodeParams); + break; +// hipGraphMemFreeNodeGetParams[('hipGraphNode_t', 'node'), ('void*', 'dev_ptr')] + case HIP_API_ID_hipGraphMemFreeNodeGetParams: + break; +// hipGraphMemcpyNodeGetParams[('hipGraphNode_t', 'node'), ('hipMemcpy3DParms*', 'pNodeParams')] + case HIP_API_ID_hipGraphMemcpyNodeGetParams: + if (data->args.hipGraphMemcpyNodeGetParams.pNodeParams) data->args.hipGraphMemcpyNodeGetParams.pNodeParams__val = *(data->args.hipGraphMemcpyNodeGetParams.pNodeParams); + break; +// hipGraphMemcpyNodeSetParams[('hipGraphNode_t', 'node'), ('const hipMemcpy3DParms*', 'pNodeParams')] + case HIP_API_ID_hipGraphMemcpyNodeSetParams: + if (data->args.hipGraphMemcpyNodeSetParams.pNodeParams) data->args.hipGraphMemcpyNodeSetParams.pNodeParams__val = *(data->args.hipGraphMemcpyNodeSetParams.pNodeParams); + break; +// hipGraphMemcpyNodeSetParams1D[('hipGraphNode_t', 'node'), ('void*', 'dst'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphMemcpyNodeSetParams1D: + break; +// hipGraphMemcpyNodeSetParamsFromSymbol[('hipGraphNode_t', 'node'), ('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphMemcpyNodeSetParamsFromSymbol: + break; +// hipGraphMemcpyNodeSetParamsToSymbol[('hipGraphNode_t', 'node'), ('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphMemcpyNodeSetParamsToSymbol: + break; +// hipGraphMemsetNodeGetParams[('hipGraphNode_t', 'node'), ('hipMemsetParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphMemsetNodeGetParams: + if (data->args.hipGraphMemsetNodeGetParams.pNodeParams) data->args.hipGraphMemsetNodeGetParams.pNodeParams__val = *(data->args.hipGraphMemsetNodeGetParams.pNodeParams); + break; +// hipGraphMemsetNodeSetParams[('hipGraphNode_t', 'node'), ('const hipMemsetParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphMemsetNodeSetParams: + if (data->args.hipGraphMemsetNodeSetParams.pNodeParams) data->args.hipGraphMemsetNodeSetParams.pNodeParams__val = *(data->args.hipGraphMemsetNodeSetParams.pNodeParams); + break; +// hipGraphNodeFindInClone[('hipGraphNode_t*', 'pNode'), ('hipGraphNode_t', 'originalNode'), ('hipGraph_t', 'clonedGraph')] + case HIP_API_ID_hipGraphNodeFindInClone: + if (data->args.hipGraphNodeFindInClone.pNode) data->args.hipGraphNodeFindInClone.pNode__val = *(data->args.hipGraphNodeFindInClone.pNode); + break; +// hipGraphNodeGetDependencies[('hipGraphNode_t', 'node'), ('hipGraphNode_t*', 'pDependencies'), ('size_t*', 'pNumDependencies')] + case HIP_API_ID_hipGraphNodeGetDependencies: + if (data->args.hipGraphNodeGetDependencies.pDependencies) data->args.hipGraphNodeGetDependencies.pDependencies__val = *(data->args.hipGraphNodeGetDependencies.pDependencies); + if (data->args.hipGraphNodeGetDependencies.pNumDependencies) data->args.hipGraphNodeGetDependencies.pNumDependencies__val = *(data->args.hipGraphNodeGetDependencies.pNumDependencies); + break; +// hipGraphNodeGetDependentNodes[('hipGraphNode_t', 'node'), ('hipGraphNode_t*', 'pDependentNodes'), ('size_t*', 'pNumDependentNodes')] + case HIP_API_ID_hipGraphNodeGetDependentNodes: + if (data->args.hipGraphNodeGetDependentNodes.pDependentNodes) data->args.hipGraphNodeGetDependentNodes.pDependentNodes__val = *(data->args.hipGraphNodeGetDependentNodes.pDependentNodes); + if (data->args.hipGraphNodeGetDependentNodes.pNumDependentNodes) data->args.hipGraphNodeGetDependentNodes.pNumDependentNodes__val = *(data->args.hipGraphNodeGetDependentNodes.pNumDependentNodes); + break; +// hipGraphNodeGetEnabled[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('unsigned int*', 'isEnabled')] + case HIP_API_ID_hipGraphNodeGetEnabled: + if (data->args.hipGraphNodeGetEnabled.isEnabled) data->args.hipGraphNodeGetEnabled.isEnabled__val = *(data->args.hipGraphNodeGetEnabled.isEnabled); + break; +// hipGraphNodeGetType[('hipGraphNode_t', 'node'), ('hipGraphNodeType*', 'pType')] + case HIP_API_ID_hipGraphNodeGetType: + if (data->args.hipGraphNodeGetType.pType) data->args.hipGraphNodeGetType.pType__val = *(data->args.hipGraphNodeGetType.pType); + break; +// hipGraphNodeSetEnabled[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('unsigned int', 'isEnabled')] + case HIP_API_ID_hipGraphNodeSetEnabled: + break; +// hipGraphNodeSetParams[('hipGraphNode_t', 'node'), ('hipGraphNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphNodeSetParams: + if (data->args.hipGraphNodeSetParams.nodeParams) data->args.hipGraphNodeSetParams.nodeParams__val = *(data->args.hipGraphNodeSetParams.nodeParams); + break; +// hipGraphReleaseUserObject[('hipGraph_t', 'graph'), ('hipUserObject_t', 'object'), ('unsigned int', 'count')] + case HIP_API_ID_hipGraphReleaseUserObject: + break; +// hipGraphRemoveDependencies[('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'from'), ('const hipGraphNode_t*', 'to'), ('size_t', 'numDependencies')] + case HIP_API_ID_hipGraphRemoveDependencies: + if (data->args.hipGraphRemoveDependencies.from) data->args.hipGraphRemoveDependencies.from__val = *(data->args.hipGraphRemoveDependencies.from); + if (data->args.hipGraphRemoveDependencies.to) data->args.hipGraphRemoveDependencies.to__val = *(data->args.hipGraphRemoveDependencies.to); + break; +// hipGraphRetainUserObject[('hipGraph_t', 'graph'), ('hipUserObject_t', 'object'), ('unsigned int', 'count'), ('unsigned int', 'flags')] + case HIP_API_ID_hipGraphRetainUserObject: + break; +// hipGraphUpload[('hipGraphExec_t', 'graphExec'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipGraphUpload: + break; +// hipGraphicsGLRegisterBuffer[('hipGraphicsResource**', 'resource'), ('GLuint', 'buffer'), ('unsigned int', 'flags')] + case HIP_API_ID_hipGraphicsGLRegisterBuffer: + if (data->args.hipGraphicsGLRegisterBuffer.resource) data->args.hipGraphicsGLRegisterBuffer.resource__val = *(data->args.hipGraphicsGLRegisterBuffer.resource); + break; +// hipGraphicsGLRegisterImage[('hipGraphicsResource**', 'resource'), ('GLuint', 'image'), ('GLenum', 'target'), ('unsigned int', 'flags')] + case HIP_API_ID_hipGraphicsGLRegisterImage: + if (data->args.hipGraphicsGLRegisterImage.resource) data->args.hipGraphicsGLRegisterImage.resource__val = *(data->args.hipGraphicsGLRegisterImage.resource); + break; +// hipGraphicsMapResources[('int', 'count'), ('hipGraphicsResource_t*', 'resources'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipGraphicsMapResources: + if (data->args.hipGraphicsMapResources.resources) data->args.hipGraphicsMapResources.resources__val = *(data->args.hipGraphicsMapResources.resources); + break; +// hipGraphicsResourceGetMappedPointer[('void**', 'devPtr'), ('size_t*', 'size'), ('hipGraphicsResource_t', 'resource')] + case HIP_API_ID_hipGraphicsResourceGetMappedPointer: + if (data->args.hipGraphicsResourceGetMappedPointer.devPtr) data->args.hipGraphicsResourceGetMappedPointer.devPtr__val = *(data->args.hipGraphicsResourceGetMappedPointer.devPtr); + if (data->args.hipGraphicsResourceGetMappedPointer.size) data->args.hipGraphicsResourceGetMappedPointer.size__val = *(data->args.hipGraphicsResourceGetMappedPointer.size); + break; +// hipGraphicsSubResourceGetMappedArray[('hipArray_t*', 'array'), ('hipGraphicsResource_t', 'resource'), ('unsigned int', 'arrayIndex'), ('unsigned int', 'mipLevel')] + case HIP_API_ID_hipGraphicsSubResourceGetMappedArray: + if (data->args.hipGraphicsSubResourceGetMappedArray.array) data->args.hipGraphicsSubResourceGetMappedArray.array__val = *(data->args.hipGraphicsSubResourceGetMappedArray.array); + break; +// hipGraphicsUnmapResources[('int', 'count'), ('hipGraphicsResource_t*', 'resources'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipGraphicsUnmapResources: + if (data->args.hipGraphicsUnmapResources.resources) data->args.hipGraphicsUnmapResources.resources__val = *(data->args.hipGraphicsUnmapResources.resources); + break; +// hipGraphicsUnregisterResource[('hipGraphicsResource_t', 'resource')] + case HIP_API_ID_hipGraphicsUnregisterResource: + break; +// hipHccModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'globalWorkSizeX'), ('unsigned int', 'globalWorkSizeY'), ('unsigned int', 'globalWorkSizeZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'hStream'), ('void**', 'kernelParams'), ('void**', 'extra'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent')] + case HIP_API_ID_hipHccModuleLaunchKernel: + if (data->args.hipHccModuleLaunchKernel.kernelParams) data->args.hipHccModuleLaunchKernel.kernelParams__val = *(data->args.hipHccModuleLaunchKernel.kernelParams); + if (data->args.hipHccModuleLaunchKernel.extra) data->args.hipHccModuleLaunchKernel.extra__val = *(data->args.hipHccModuleLaunchKernel.extra); + break; +// hipHostAlloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')] + case HIP_API_ID_hipHostAlloc: + if (data->args.hipHostAlloc.ptr) data->args.hipHostAlloc.ptr__val = *(data->args.hipHostAlloc.ptr); + break; +// hipHostFree[('void*', 'ptr')] + case HIP_API_ID_hipHostFree: + break; +// hipHostGetDevicePointer[('void**', 'devPtr'), ('void*', 'hstPtr'), ('unsigned int', 'flags')] + case HIP_API_ID_hipHostGetDevicePointer: + if (data->args.hipHostGetDevicePointer.devPtr) data->args.hipHostGetDevicePointer.devPtr__val = *(data->args.hipHostGetDevicePointer.devPtr); + break; +// hipHostGetFlags[('unsigned int*', 'flagsPtr'), ('void*', 'hostPtr')] + case HIP_API_ID_hipHostGetFlags: + if (data->args.hipHostGetFlags.flagsPtr) data->args.hipHostGetFlags.flagsPtr__val = *(data->args.hipHostGetFlags.flagsPtr); + break; +// hipHostMalloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')] + case HIP_API_ID_hipHostMalloc: + if (data->args.hipHostMalloc.ptr) data->args.hipHostMalloc.ptr__val = *(data->args.hipHostMalloc.ptr); + break; +// hipHostRegister[('void*', 'hostPtr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')] + case HIP_API_ID_hipHostRegister: + break; +// hipHostUnregister[('void*', 'hostPtr')] + case HIP_API_ID_hipHostUnregister: + break; +// hipImportExternalMemory[('hipExternalMemory_t*', 'extMem_out'), ('const hipExternalMemoryHandleDesc*', 'memHandleDesc')] + case HIP_API_ID_hipImportExternalMemory: + if (data->args.hipImportExternalMemory.extMem_out) data->args.hipImportExternalMemory.extMem_out__val = *(data->args.hipImportExternalMemory.extMem_out); + if (data->args.hipImportExternalMemory.memHandleDesc) data->args.hipImportExternalMemory.memHandleDesc__val = *(data->args.hipImportExternalMemory.memHandleDesc); + break; +// hipImportExternalSemaphore[('hipExternalSemaphore_t*', 'extSem_out'), ('const hipExternalSemaphoreHandleDesc*', 'semHandleDesc')] + case HIP_API_ID_hipImportExternalSemaphore: + if (data->args.hipImportExternalSemaphore.extSem_out) data->args.hipImportExternalSemaphore.extSem_out__val = *(data->args.hipImportExternalSemaphore.extSem_out); + if (data->args.hipImportExternalSemaphore.semHandleDesc) data->args.hipImportExternalSemaphore.semHandleDesc__val = *(data->args.hipImportExternalSemaphore.semHandleDesc); + break; +// hipInit[('unsigned int', 'flags')] + case HIP_API_ID_hipInit: + break; +// hipIpcCloseMemHandle[('void*', 'devPtr')] + case HIP_API_ID_hipIpcCloseMemHandle: + break; +// hipIpcGetEventHandle[('hipIpcEventHandle_t*', 'handle'), ('hipEvent_t', 'event')] + case HIP_API_ID_hipIpcGetEventHandle: + if (data->args.hipIpcGetEventHandle.handle) data->args.hipIpcGetEventHandle.handle__val = *(data->args.hipIpcGetEventHandle.handle); + break; +// hipIpcGetMemHandle[('hipIpcMemHandle_t*', 'handle'), ('void*', 'devPtr')] + case HIP_API_ID_hipIpcGetMemHandle: + if (data->args.hipIpcGetMemHandle.handle) data->args.hipIpcGetMemHandle.handle__val = *(data->args.hipIpcGetMemHandle.handle); + break; +// hipIpcOpenEventHandle[('hipEvent_t*', 'event'), ('hipIpcEventHandle_t', 'handle')] + case HIP_API_ID_hipIpcOpenEventHandle: + if (data->args.hipIpcOpenEventHandle.event) data->args.hipIpcOpenEventHandle.event__val = *(data->args.hipIpcOpenEventHandle.event); + break; +// hipIpcOpenMemHandle[('void**', 'devPtr'), ('hipIpcMemHandle_t', 'handle'), ('unsigned int', 'flags')] + case HIP_API_ID_hipIpcOpenMemHandle: + if (data->args.hipIpcOpenMemHandle.devPtr) data->args.hipIpcOpenMemHandle.devPtr__val = *(data->args.hipIpcOpenMemHandle.devPtr); + break; +// hipLaunchByPtr[('const void*', 'hostFunction')] + case HIP_API_ID_hipLaunchByPtr: + break; +// hipLaunchCooperativeKernel[('const void*', 'f'), ('dim3', 'gridDim'), ('dim3', 'blockDimX'), ('void**', 'kernelParams'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipLaunchCooperativeKernel: + if (data->args.hipLaunchCooperativeKernel.kernelParams) data->args.hipLaunchCooperativeKernel.kernelParams__val = *(data->args.hipLaunchCooperativeKernel.kernelParams); + break; +// hipLaunchCooperativeKernelMultiDevice[('hipLaunchParams*', 'launchParamsList'), ('int', 'numDevices'), ('unsigned int', 'flags')] + case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: + if (data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList) data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList__val = *(data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList); + break; +// hipLaunchHostFunc[('hipStream_t', 'stream'), ('hipHostFn_t', 'fn'), ('void*', 'userData')] + case HIP_API_ID_hipLaunchHostFunc: + break; +// hipLaunchKernel[('const void*', 'function_address'), ('dim3', 'numBlocks'), ('dim3', 'dimBlocks'), ('void**', 'args'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipLaunchKernel: + if (data->args.hipLaunchKernel.args) data->args.hipLaunchKernel.args__val = *(data->args.hipLaunchKernel.args); + break; +// hipLaunchKernelExC[('const hipLaunchConfig_t*', 'config'), ('const void*', 'fPtr'), ('void**', 'args')] + case HIP_API_ID_hipLaunchKernelExC: + if (data->args.hipLaunchKernelExC.config) data->args.hipLaunchKernelExC.config__val = *(data->args.hipLaunchKernelExC.config); + if (data->args.hipLaunchKernelExC.args) data->args.hipLaunchKernelExC.args__val = *(data->args.hipLaunchKernelExC.args); + break; +// hipLibraryGetKernel[('hipKernel_t*', 'pKernel'), ('hipLibrary_t', 'library'), ('const char*', 'name')] + case HIP_API_ID_hipLibraryGetKernel: + if (data->args.hipLibraryGetKernel.pKernel) data->args.hipLibraryGetKernel.pKernel__val = *(data->args.hipLibraryGetKernel.pKernel); + if (data->args.hipLibraryGetKernel.name) data->args.hipLibraryGetKernel.name__val = *(data->args.hipLibraryGetKernel.name); + break; +// hipLibraryGetKernelCount[('unsigned int*', 'count'), ('hipLibrary_t', 'library')] + case HIP_API_ID_hipLibraryGetKernelCount: + if (data->args.hipLibraryGetKernelCount.count) data->args.hipLibraryGetKernelCount.count__val = *(data->args.hipLibraryGetKernelCount.count); + break; +// hipLibraryLoadData[('hipLibrary_t*', 'library'), ('const void*', 'code'), ('hipJitOption**', 'jitOptions'), ('void**', 'jitOptionsValues'), ('unsigned int', 'numJitOptions'), ('hipLibraryOption**', 'libraryOptions'), ('void**', 'libraryOptionValues'), ('unsigned int', 'numLibraryOptions')] + case HIP_API_ID_hipLibraryLoadData: + if (data->args.hipLibraryLoadData.library) data->args.hipLibraryLoadData.library__val = *(data->args.hipLibraryLoadData.library); + if (data->args.hipLibraryLoadData.jitOptions) data->args.hipLibraryLoadData.jitOptions__val = *(data->args.hipLibraryLoadData.jitOptions); + if (data->args.hipLibraryLoadData.jitOptionsValues) data->args.hipLibraryLoadData.jitOptionsValues__val = *(data->args.hipLibraryLoadData.jitOptionsValues); + if (data->args.hipLibraryLoadData.libraryOptions) data->args.hipLibraryLoadData.libraryOptions__val = *(data->args.hipLibraryLoadData.libraryOptions); + if (data->args.hipLibraryLoadData.libraryOptionValues) data->args.hipLibraryLoadData.libraryOptionValues__val = *(data->args.hipLibraryLoadData.libraryOptionValues); + break; +// hipLibraryLoadFromFile[('hipLibrary_t*', 'library'), ('const char*', 'fileName'), ('hipJitOption**', 'jitOptions'), ('void**', 'jitOptionsValues'), ('unsigned int', 'numJitOptions'), ('hipLibraryOption**', 'libraryOptions'), ('void**', 'libraryOptionValues'), ('unsigned int', 'numLibraryOptions')] + case HIP_API_ID_hipLibraryLoadFromFile: + if (data->args.hipLibraryLoadFromFile.library) data->args.hipLibraryLoadFromFile.library__val = *(data->args.hipLibraryLoadFromFile.library); + if (data->args.hipLibraryLoadFromFile.fileName) data->args.hipLibraryLoadFromFile.fileName__val = *(data->args.hipLibraryLoadFromFile.fileName); + if (data->args.hipLibraryLoadFromFile.jitOptions) data->args.hipLibraryLoadFromFile.jitOptions__val = *(data->args.hipLibraryLoadFromFile.jitOptions); + if (data->args.hipLibraryLoadFromFile.jitOptionsValues) data->args.hipLibraryLoadFromFile.jitOptionsValues__val = *(data->args.hipLibraryLoadFromFile.jitOptionsValues); + if (data->args.hipLibraryLoadFromFile.libraryOptions) data->args.hipLibraryLoadFromFile.libraryOptions__val = *(data->args.hipLibraryLoadFromFile.libraryOptions); + if (data->args.hipLibraryLoadFromFile.libraryOptionValues) data->args.hipLibraryLoadFromFile.libraryOptionValues__val = *(data->args.hipLibraryLoadFromFile.libraryOptionValues); + break; +// hipLibraryUnload[('hipLibrary_t', 'library')] + case HIP_API_ID_hipLibraryUnload: + break; +// hipLinkAddData[('hipLinkState_t', 'state'), ('hipJitInputType', 'type'), ('void*', 'data'), ('size_t', 'size'), ('const char*', 'name'), ('unsigned int', 'numOptions'), ('hipJitOption*', 'options'), ('void**', 'optionValues')] + case HIP_API_ID_hipLinkAddData: + if (data->args.hipLinkAddData.name) data->args.hipLinkAddData.name__val = *(data->args.hipLinkAddData.name); + if (data->args.hipLinkAddData.options) data->args.hipLinkAddData.options__val = *(data->args.hipLinkAddData.options); + if (data->args.hipLinkAddData.optionValues) data->args.hipLinkAddData.optionValues__val = *(data->args.hipLinkAddData.optionValues); + break; +// hipLinkAddFile[('hipLinkState_t', 'state'), ('hipJitInputType', 'type'), ('const char*', 'path'), ('unsigned int', 'numOptions'), ('hipJitOption*', 'options'), ('void**', 'optionValues')] + case HIP_API_ID_hipLinkAddFile: + if (data->args.hipLinkAddFile.path) data->args.hipLinkAddFile.path__val = *(data->args.hipLinkAddFile.path); + if (data->args.hipLinkAddFile.options) data->args.hipLinkAddFile.options__val = *(data->args.hipLinkAddFile.options); + if (data->args.hipLinkAddFile.optionValues) data->args.hipLinkAddFile.optionValues__val = *(data->args.hipLinkAddFile.optionValues); + break; +// hipLinkComplete[('hipLinkState_t', 'state'), ('void**', 'hipBinOut'), ('size_t*', 'sizeOut')] + case HIP_API_ID_hipLinkComplete: + if (data->args.hipLinkComplete.hipBinOut) data->args.hipLinkComplete.hipBinOut__val = *(data->args.hipLinkComplete.hipBinOut); + if (data->args.hipLinkComplete.sizeOut) data->args.hipLinkComplete.sizeOut__val = *(data->args.hipLinkComplete.sizeOut); + break; +// hipLinkCreate[('unsigned int', 'numOptions'), ('hipJitOption*', 'options'), ('void**', 'optionValues'), ('hipLinkState_t*', 'stateOut')] + case HIP_API_ID_hipLinkCreate: + if (data->args.hipLinkCreate.options) data->args.hipLinkCreate.options__val = *(data->args.hipLinkCreate.options); + if (data->args.hipLinkCreate.optionValues) data->args.hipLinkCreate.optionValues__val = *(data->args.hipLinkCreate.optionValues); + if (data->args.hipLinkCreate.stateOut) data->args.hipLinkCreate.stateOut__val = *(data->args.hipLinkCreate.stateOut); + break; +// hipLinkDestroy[('hipLinkState_t', 'state')] + case HIP_API_ID_hipLinkDestroy: + break; +// hipMalloc[('void**', 'ptr'), ('size_t', 'size')] + case HIP_API_ID_hipMalloc: + if (data->args.hipMalloc.ptr) data->args.hipMalloc.ptr__val = *(data->args.hipMalloc.ptr); + break; +// hipMalloc3D[('hipPitchedPtr*', 'pitchedDevPtr'), ('hipExtent', 'extent')] + case HIP_API_ID_hipMalloc3D: + if (data->args.hipMalloc3D.pitchedDevPtr) data->args.hipMalloc3D.pitchedDevPtr__val = *(data->args.hipMalloc3D.pitchedDevPtr); + break; +// hipMalloc3DArray[('hipArray_t*', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('hipExtent', 'extent'), ('unsigned int', 'flags')] + case HIP_API_ID_hipMalloc3DArray: + if (data->args.hipMalloc3DArray.array) data->args.hipMalloc3DArray.array__val = *(data->args.hipMalloc3DArray.array); + if (data->args.hipMalloc3DArray.desc) data->args.hipMalloc3DArray.desc__val = *(data->args.hipMalloc3DArray.desc); + break; +// hipMallocArray[('hipArray_t*', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('size_t', 'width'), ('size_t', 'height'), ('unsigned int', 'flags')] + case HIP_API_ID_hipMallocArray: + if (data->args.hipMallocArray.array) data->args.hipMallocArray.array__val = *(data->args.hipMallocArray.array); + if (data->args.hipMallocArray.desc) data->args.hipMallocArray.desc__val = *(data->args.hipMallocArray.desc); + break; +// hipMallocAsync[('void**', 'dev_ptr'), ('size_t', 'size'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMallocAsync: + if (data->args.hipMallocAsync.dev_ptr) data->args.hipMallocAsync.dev_ptr__val = *(data->args.hipMallocAsync.dev_ptr); + break; +// hipMallocFromPoolAsync[('void**', 'dev_ptr'), ('size_t', 'size'), ('hipMemPool_t', 'mem_pool'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMallocFromPoolAsync: + if (data->args.hipMallocFromPoolAsync.dev_ptr) data->args.hipMallocFromPoolAsync.dev_ptr__val = *(data->args.hipMallocFromPoolAsync.dev_ptr); + break; +// hipMallocHost[('void**', 'ptr'), ('size_t', 'size')] + case HIP_API_ID_hipMallocHost: + if (data->args.hipMallocHost.ptr) data->args.hipMallocHost.ptr__val = *(data->args.hipMallocHost.ptr); + break; +// hipMallocManaged[('void**', 'dev_ptr'), ('size_t', 'size'), ('unsigned int', 'flags')] + case HIP_API_ID_hipMallocManaged: + if (data->args.hipMallocManaged.dev_ptr) data->args.hipMallocManaged.dev_ptr__val = *(data->args.hipMallocManaged.dev_ptr); + break; +// hipMallocMipmappedArray[('hipMipmappedArray_t*', 'mipmappedArray'), ('const hipChannelFormatDesc*', 'desc'), ('hipExtent', 'extent'), ('unsigned int', 'numLevels'), ('unsigned int', 'flags')] + case HIP_API_ID_hipMallocMipmappedArray: + if (data->args.hipMallocMipmappedArray.mipmappedArray) data->args.hipMallocMipmappedArray.mipmappedArray__val = *(data->args.hipMallocMipmappedArray.mipmappedArray); + if (data->args.hipMallocMipmappedArray.desc) data->args.hipMallocMipmappedArray.desc__val = *(data->args.hipMallocMipmappedArray.desc); + break; +// hipMallocPitch[('void**', 'ptr'), ('size_t*', 'pitch'), ('size_t', 'width'), ('size_t', 'height')] + case HIP_API_ID_hipMallocPitch: + if (data->args.hipMallocPitch.ptr) data->args.hipMallocPitch.ptr__val = *(data->args.hipMallocPitch.ptr); + if (data->args.hipMallocPitch.pitch) data->args.hipMallocPitch.pitch__val = *(data->args.hipMallocPitch.pitch); + break; +// hipMemAddressFree[('void*', 'devPtr'), ('size_t', 'size')] + case HIP_API_ID_hipMemAddressFree: + break; +// hipMemAddressReserve[('void**', 'ptr'), ('size_t', 'size'), ('size_t', 'alignment'), ('void*', 'addr'), ('unsigned long long', 'flags')] + case HIP_API_ID_hipMemAddressReserve: + if (data->args.hipMemAddressReserve.ptr) data->args.hipMemAddressReserve.ptr__val = *(data->args.hipMemAddressReserve.ptr); + break; +// hipMemAdvise[('const void*', 'dev_ptr'), ('size_t', 'count'), ('hipMemoryAdvise', 'advice'), ('int', 'device')] + case HIP_API_ID_hipMemAdvise: + break; +// hipMemAdvise_v2[('const void*', 'dev_ptr'), ('size_t', 'count'), ('hipMemoryAdvise', 'advice'), ('hipMemLocation', 'location')] + case HIP_API_ID_hipMemAdvise_v2: + break; +// hipMemAllocHost[('void**', 'ptr'), ('size_t', 'size')] + case HIP_API_ID_hipMemAllocHost: + if (data->args.hipMemAllocHost.ptr) data->args.hipMemAllocHost.ptr__val = *(data->args.hipMemAllocHost.ptr); + break; +// hipMemAllocPitch[('hipDeviceptr_t*', 'dptr'), ('size_t*', 'pitch'), ('size_t', 'widthInBytes'), ('size_t', 'height'), ('unsigned int', 'elementSizeBytes')] + case HIP_API_ID_hipMemAllocPitch: + if (data->args.hipMemAllocPitch.dptr) data->args.hipMemAllocPitch.dptr__val = *(data->args.hipMemAllocPitch.dptr); + if (data->args.hipMemAllocPitch.pitch) data->args.hipMemAllocPitch.pitch__val = *(data->args.hipMemAllocPitch.pitch); + break; +// hipMemCreate[('hipMemGenericAllocationHandle_t*', 'handle'), ('size_t', 'size'), ('const hipMemAllocationProp*', 'prop'), ('unsigned long long', 'flags')] + case HIP_API_ID_hipMemCreate: + if (data->args.hipMemCreate.handle) data->args.hipMemCreate.handle__val = *(data->args.hipMemCreate.handle); + if (data->args.hipMemCreate.prop) data->args.hipMemCreate.prop__val = *(data->args.hipMemCreate.prop); + break; +// hipMemExportToShareableHandle[('void*', 'shareableHandle'), ('hipMemGenericAllocationHandle_t', 'handle'), ('hipMemAllocationHandleType', 'handleType'), ('unsigned long long', 'flags')] + case HIP_API_ID_hipMemExportToShareableHandle: + break; +// hipMemGetAccess[('unsigned long long*', 'flags'), ('const hipMemLocation*', 'location'), ('void*', 'ptr')] + case HIP_API_ID_hipMemGetAccess: + if (data->args.hipMemGetAccess.flags) data->args.hipMemGetAccess.flags__val = *(data->args.hipMemGetAccess.flags); + if (data->args.hipMemGetAccess.location) data->args.hipMemGetAccess.location__val = *(data->args.hipMemGetAccess.location); + break; +// hipMemGetAddressRange[('hipDeviceptr_t*', 'pbase'), ('size_t*', 'psize'), ('hipDeviceptr_t', 'dptr')] + case HIP_API_ID_hipMemGetAddressRange: + if (data->args.hipMemGetAddressRange.pbase) data->args.hipMemGetAddressRange.pbase__val = *(data->args.hipMemGetAddressRange.pbase); + if (data->args.hipMemGetAddressRange.psize) data->args.hipMemGetAddressRange.psize__val = *(data->args.hipMemGetAddressRange.psize); + break; +// hipMemGetAllocationGranularity[('size_t*', 'granularity'), ('const hipMemAllocationProp*', 'prop'), ('hipMemAllocationGranularity_flags', 'option')] + case HIP_API_ID_hipMemGetAllocationGranularity: + if (data->args.hipMemGetAllocationGranularity.granularity) data->args.hipMemGetAllocationGranularity.granularity__val = *(data->args.hipMemGetAllocationGranularity.granularity); + if (data->args.hipMemGetAllocationGranularity.prop) data->args.hipMemGetAllocationGranularity.prop__val = *(data->args.hipMemGetAllocationGranularity.prop); + break; +// hipMemGetAllocationPropertiesFromHandle[('hipMemAllocationProp*', 'prop'), ('hipMemGenericAllocationHandle_t', 'handle')] + case HIP_API_ID_hipMemGetAllocationPropertiesFromHandle: + if (data->args.hipMemGetAllocationPropertiesFromHandle.prop) data->args.hipMemGetAllocationPropertiesFromHandle.prop__val = *(data->args.hipMemGetAllocationPropertiesFromHandle.prop); + break; +// hipMemGetHandleForAddressRange[('void*', 'handle'), ('hipDeviceptr_t', 'dptr'), ('size_t', 'size'), ('hipMemRangeHandleType', 'handleType'), ('unsigned long long', 'flags')] + case HIP_API_ID_hipMemGetHandleForAddressRange: + break; +// hipMemGetInfo[('size_t*', 'free'), ('size_t*', 'total')] + case HIP_API_ID_hipMemGetInfo: + if (data->args.hipMemGetInfo.free) data->args.hipMemGetInfo.free__val = *(data->args.hipMemGetInfo.free); + if (data->args.hipMemGetInfo.total) data->args.hipMemGetInfo.total__val = *(data->args.hipMemGetInfo.total); + break; +// hipMemImportFromShareableHandle[('hipMemGenericAllocationHandle_t*', 'handle'), ('void*', 'osHandle'), ('hipMemAllocationHandleType', 'shHandleType')] + case HIP_API_ID_hipMemImportFromShareableHandle: + if (data->args.hipMemImportFromShareableHandle.handle) data->args.hipMemImportFromShareableHandle.handle__val = *(data->args.hipMemImportFromShareableHandle.handle); + break; +// hipMemMap[('void*', 'ptr'), ('size_t', 'size'), ('size_t', 'offset'), ('hipMemGenericAllocationHandle_t', 'handle'), ('unsigned long long', 'flags')] + case HIP_API_ID_hipMemMap: + break; +// hipMemMapArrayAsync[('hipArrayMapInfo*', 'mapInfoList'), ('unsigned int', 'count'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemMapArrayAsync: + if (data->args.hipMemMapArrayAsync.mapInfoList) data->args.hipMemMapArrayAsync.mapInfoList__val = *(data->args.hipMemMapArrayAsync.mapInfoList); + break; +// hipMemPoolCreate[('hipMemPool_t*', 'mem_pool'), ('const hipMemPoolProps*', 'pool_props')] + case HIP_API_ID_hipMemPoolCreate: + if (data->args.hipMemPoolCreate.mem_pool) data->args.hipMemPoolCreate.mem_pool__val = *(data->args.hipMemPoolCreate.mem_pool); + if (data->args.hipMemPoolCreate.pool_props) data->args.hipMemPoolCreate.pool_props__val = *(data->args.hipMemPoolCreate.pool_props); + break; +// hipMemPoolDestroy[('hipMemPool_t', 'mem_pool')] + case HIP_API_ID_hipMemPoolDestroy: + break; +// hipMemPoolExportPointer[('hipMemPoolPtrExportData*', 'export_data'), ('void*', 'dev_ptr')] + case HIP_API_ID_hipMemPoolExportPointer: + if (data->args.hipMemPoolExportPointer.export_data) data->args.hipMemPoolExportPointer.export_data__val = *(data->args.hipMemPoolExportPointer.export_data); + break; +// hipMemPoolExportToShareableHandle[('void*', 'shared_handle'), ('hipMemPool_t', 'mem_pool'), ('hipMemAllocationHandleType', 'handle_type'), ('unsigned int', 'flags')] + case HIP_API_ID_hipMemPoolExportToShareableHandle: + break; +// hipMemPoolGetAccess[('hipMemAccessFlags*', 'flags'), ('hipMemPool_t', 'mem_pool'), ('hipMemLocation*', 'location')] + case HIP_API_ID_hipMemPoolGetAccess: + if (data->args.hipMemPoolGetAccess.flags) data->args.hipMemPoolGetAccess.flags__val = *(data->args.hipMemPoolGetAccess.flags); + if (data->args.hipMemPoolGetAccess.location) data->args.hipMemPoolGetAccess.location__val = *(data->args.hipMemPoolGetAccess.location); + break; +// hipMemPoolGetAttribute[('hipMemPool_t', 'mem_pool'), ('hipMemPoolAttr', 'attr'), ('void*', 'value')] + case HIP_API_ID_hipMemPoolGetAttribute: + break; +// hipMemPoolImportFromShareableHandle[('hipMemPool_t*', 'mem_pool'), ('void*', 'shared_handle'), ('hipMemAllocationHandleType', 'handle_type'), ('unsigned int', 'flags')] + case HIP_API_ID_hipMemPoolImportFromShareableHandle: + if (data->args.hipMemPoolImportFromShareableHandle.mem_pool) data->args.hipMemPoolImportFromShareableHandle.mem_pool__val = *(data->args.hipMemPoolImportFromShareableHandle.mem_pool); + break; +// hipMemPoolImportPointer[('void**', 'dev_ptr'), ('hipMemPool_t', 'mem_pool'), ('hipMemPoolPtrExportData*', 'export_data')] + case HIP_API_ID_hipMemPoolImportPointer: + if (data->args.hipMemPoolImportPointer.dev_ptr) data->args.hipMemPoolImportPointer.dev_ptr__val = *(data->args.hipMemPoolImportPointer.dev_ptr); + if (data->args.hipMemPoolImportPointer.export_data) data->args.hipMemPoolImportPointer.export_data__val = *(data->args.hipMemPoolImportPointer.export_data); + break; +// hipMemPoolSetAccess[('hipMemPool_t', 'mem_pool'), ('const hipMemAccessDesc*', 'desc_list'), ('size_t', 'count')] + case HIP_API_ID_hipMemPoolSetAccess: + if (data->args.hipMemPoolSetAccess.desc_list) data->args.hipMemPoolSetAccess.desc_list__val = *(data->args.hipMemPoolSetAccess.desc_list); + break; +// hipMemPoolSetAttribute[('hipMemPool_t', 'mem_pool'), ('hipMemPoolAttr', 'attr'), ('void*', 'value')] + case HIP_API_ID_hipMemPoolSetAttribute: + break; +// hipMemPoolTrimTo[('hipMemPool_t', 'mem_pool'), ('size_t', 'min_bytes_to_hold')] + case HIP_API_ID_hipMemPoolTrimTo: + break; +// hipMemPrefetchAsync[('const void*', 'dev_ptr'), ('size_t', 'count'), ('int', 'device'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemPrefetchAsync: + break; +// hipMemPrefetchAsync_v2[('const void*', 'dev_ptr'), ('size_t', 'count'), ('hipMemLocation', 'location'), ('unsigned int', 'flags'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemPrefetchAsync_v2: + break; +// hipMemPtrGetInfo[('void*', 'ptr'), ('size_t*', 'size')] + case HIP_API_ID_hipMemPtrGetInfo: + if (data->args.hipMemPtrGetInfo.size) data->args.hipMemPtrGetInfo.size__val = *(data->args.hipMemPtrGetInfo.size); + break; +// hipMemRangeGetAttribute[('void*', 'data'), ('size_t', 'data_size'), ('hipMemRangeAttribute', 'attribute'), ('const void*', 'dev_ptr'), ('size_t', 'count')] + case HIP_API_ID_hipMemRangeGetAttribute: + break; +// hipMemRangeGetAttributes[('void**', 'data'), ('size_t*', 'data_sizes'), ('hipMemRangeAttribute*', 'attributes'), ('size_t', 'num_attributes'), ('const void*', 'dev_ptr'), ('size_t', 'count')] + case HIP_API_ID_hipMemRangeGetAttributes: + if (data->args.hipMemRangeGetAttributes.data) data->args.hipMemRangeGetAttributes.data__val = *(data->args.hipMemRangeGetAttributes.data); + if (data->args.hipMemRangeGetAttributes.data_sizes) data->args.hipMemRangeGetAttributes.data_sizes__val = *(data->args.hipMemRangeGetAttributes.data_sizes); + if (data->args.hipMemRangeGetAttributes.attributes) data->args.hipMemRangeGetAttributes.attributes__val = *(data->args.hipMemRangeGetAttributes.attributes); + break; +// hipMemRelease[('hipMemGenericAllocationHandle_t', 'handle')] + case HIP_API_ID_hipMemRelease: + break; +// hipMemRetainAllocationHandle[('hipMemGenericAllocationHandle_t*', 'handle'), ('void*', 'addr')] + case HIP_API_ID_hipMemRetainAllocationHandle: + if (data->args.hipMemRetainAllocationHandle.handle) data->args.hipMemRetainAllocationHandle.handle__val = *(data->args.hipMemRetainAllocationHandle.handle); + break; +// hipMemSetAccess[('void*', 'ptr'), ('size_t', 'size'), ('const hipMemAccessDesc*', 'desc'), ('size_t', 'count')] + case HIP_API_ID_hipMemSetAccess: + if (data->args.hipMemSetAccess.desc) data->args.hipMemSetAccess.desc__val = *(data->args.hipMemSetAccess.desc); + break; +// hipMemUnmap[('void*', 'ptr'), ('size_t', 'size')] + case HIP_API_ID_hipMemUnmap: + break; +// hipMemcpy[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpy: + break; +// hipMemcpy2D[('void*', 'dst'), ('size_t', 'dpitch'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpy2D: + break; +// hipMemcpy2DArrayToArray[('hipArray_t', 'dst'), ('size_t', 'wOffsetDst'), ('size_t', 'hOffsetDst'), ('hipArray_const_t', 'src'), ('size_t', 'wOffsetSrc'), ('size_t', 'hOffsetSrc'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpy2DArrayToArray: + break; +// hipMemcpy2DAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpy2DAsync: + break; +// hipMemcpy2DFromArray[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpy2DFromArray: + break; +// hipMemcpy2DFromArrayAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpy2DFromArrayAsync: + break; +// hipMemcpy2DToArray[('hipArray_t', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpy2DToArray: + break; +// hipMemcpy2DToArrayAsync[('hipArray_t', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpy2DToArrayAsync: + break; +// hipMemcpy3D[('const hipMemcpy3DParms*', 'p')] + case HIP_API_ID_hipMemcpy3D: + if (data->args.hipMemcpy3D.p) data->args.hipMemcpy3D.p__val = *(data->args.hipMemcpy3D.p); + break; +// hipMemcpy3DAsync[('const hipMemcpy3DParms*', 'p'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpy3DAsync: + if (data->args.hipMemcpy3DAsync.p) data->args.hipMemcpy3DAsync.p__val = *(data->args.hipMemcpy3DAsync.p); + break; +// hipMemcpy3DBatchAsync[('size_t', 'numOps'), ('hipMemcpy3DBatchOp*', 'opList'), ('size_t*', 'failIdx'), ('unsigned long long', 'flags'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpy3DBatchAsync: + if (data->args.hipMemcpy3DBatchAsync.opList) data->args.hipMemcpy3DBatchAsync.opList__val = *(data->args.hipMemcpy3DBatchAsync.opList); + if (data->args.hipMemcpy3DBatchAsync.failIdx) data->args.hipMemcpy3DBatchAsync.failIdx__val = *(data->args.hipMemcpy3DBatchAsync.failIdx); + break; +// hipMemcpy3DPeer[('hipMemcpy3DPeerParms*', 'p')] + case HIP_API_ID_hipMemcpy3DPeer: + if (data->args.hipMemcpy3DPeer.p) data->args.hipMemcpy3DPeer.p__val = *(data->args.hipMemcpy3DPeer.p); + break; +// hipMemcpy3DPeerAsync[('hipMemcpy3DPeerParms*', 'p'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpy3DPeerAsync: + if (data->args.hipMemcpy3DPeerAsync.p) data->args.hipMemcpy3DPeerAsync.p__val = *(data->args.hipMemcpy3DPeerAsync.p); + break; +// hipMemcpyAsync[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyAsync: + break; +// hipMemcpyAtoA[('hipArray_t', 'dstArray'), ('size_t', 'dstOffset'), ('hipArray_t', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'ByteCount')] + case HIP_API_ID_hipMemcpyAtoA: + break; +// hipMemcpyAtoD[('hipDeviceptr_t', 'dstDevice'), ('hipArray_t', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'ByteCount')] + case HIP_API_ID_hipMemcpyAtoD: + break; +// hipMemcpyAtoH[('void*', 'dst'), ('hipArray_t', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'count')] + case HIP_API_ID_hipMemcpyAtoH: + break; +// hipMemcpyAtoHAsync[('void*', 'dstHost'), ('hipArray_t', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'ByteCount'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyAtoHAsync: + break; +// hipMemcpyBatchAsync[('void**', 'dsts'), ('void**', 'srcs'), ('size_t*', 'sizes'), ('size_t', 'count'), ('hipMemcpyAttributes*', 'attrs'), ('size_t*', 'attrsIdxs'), ('size_t', 'numAttrs'), ('size_t*', 'failIdx'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyBatchAsync: + if (data->args.hipMemcpyBatchAsync.dsts) data->args.hipMemcpyBatchAsync.dsts__val = *(data->args.hipMemcpyBatchAsync.dsts); + if (data->args.hipMemcpyBatchAsync.srcs) data->args.hipMemcpyBatchAsync.srcs__val = *(data->args.hipMemcpyBatchAsync.srcs); + if (data->args.hipMemcpyBatchAsync.sizes) data->args.hipMemcpyBatchAsync.sizes__val = *(data->args.hipMemcpyBatchAsync.sizes); + if (data->args.hipMemcpyBatchAsync.attrs) data->args.hipMemcpyBatchAsync.attrs__val = *(data->args.hipMemcpyBatchAsync.attrs); + if (data->args.hipMemcpyBatchAsync.attrsIdxs) data->args.hipMemcpyBatchAsync.attrsIdxs__val = *(data->args.hipMemcpyBatchAsync.attrsIdxs); + if (data->args.hipMemcpyBatchAsync.failIdx) data->args.hipMemcpyBatchAsync.failIdx__val = *(data->args.hipMemcpyBatchAsync.failIdx); + break; +// hipMemcpyDtoA[('hipArray_t', 'dstArray'), ('size_t', 'dstOffset'), ('hipDeviceptr_t', 'srcDevice'), ('size_t', 'ByteCount')] + case HIP_API_ID_hipMemcpyDtoA: + break; +// hipMemcpyDtoD[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')] + case HIP_API_ID_hipMemcpyDtoD: + break; +// hipMemcpyDtoDAsync[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyDtoDAsync: + break; +// hipMemcpyDtoH[('void*', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')] + case HIP_API_ID_hipMemcpyDtoH: + break; +// hipMemcpyDtoHAsync[('void*', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyDtoHAsync: + break; +// hipMemcpyFromArray[('void*', 'dst'), ('hipArray_const_t', 'srcArray'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpyFromArray: + break; +// hipMemcpyFromSymbol[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpyFromSymbol: + break; +// hipMemcpyFromSymbolAsync[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyFromSymbolAsync: + break; +// hipMemcpyHtoA[('hipArray_t', 'dstArray'), ('size_t', 'dstOffset'), ('const void*', 'srcHost'), ('size_t', 'count')] + case HIP_API_ID_hipMemcpyHtoA: + break; +// hipMemcpyHtoAAsync[('hipArray_t', 'dstArray'), ('size_t', 'dstOffset'), ('const void*', 'srcHost'), ('size_t', 'ByteCount'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyHtoAAsync: + break; +// hipMemcpyHtoD[('hipDeviceptr_t', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes')] + case HIP_API_ID_hipMemcpyHtoD: + break; +// hipMemcpyHtoDAsync[('hipDeviceptr_t', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyHtoDAsync: + break; +// hipMemcpyParam2D[('const hip_Memcpy2D*', 'pCopy')] + case HIP_API_ID_hipMemcpyParam2D: + if (data->args.hipMemcpyParam2D.pCopy) data->args.hipMemcpyParam2D.pCopy__val = *(data->args.hipMemcpyParam2D.pCopy); + break; +// hipMemcpyParam2DAsync[('const hip_Memcpy2D*', 'pCopy'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyParam2DAsync: + if (data->args.hipMemcpyParam2DAsync.pCopy) data->args.hipMemcpyParam2DAsync.pCopy__val = *(data->args.hipMemcpyParam2DAsync.pCopy); + break; +// hipMemcpyPeer[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDeviceId'), ('size_t', 'sizeBytes')] + case HIP_API_ID_hipMemcpyPeer: + break; +// hipMemcpyPeerAsync[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDevice'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyPeerAsync: + break; +// hipMemcpyToArray[('hipArray_t', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpyToArray: + break; +// hipMemcpyToSymbol[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpyToSymbol: + break; +// hipMemcpyToSymbolAsync[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyToSymbolAsync: + break; +// hipMemcpyWithStream[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyWithStream: + break; +// hipMemset[('void*', 'dst'), ('int', 'value'), ('size_t', 'sizeBytes')] + case HIP_API_ID_hipMemset: + break; +// hipMemset2D[('void*', 'dst'), ('size_t', 'pitch'), ('int', 'value'), ('size_t', 'width'), ('size_t', 'height')] + case HIP_API_ID_hipMemset2D: + break; +// hipMemset2DAsync[('void*', 'dst'), ('size_t', 'pitch'), ('int', 'value'), ('size_t', 'width'), ('size_t', 'height'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemset2DAsync: + break; +// hipMemset3D[('hipPitchedPtr', 'pitchedDevPtr'), ('int', 'value'), ('hipExtent', 'extent')] + case HIP_API_ID_hipMemset3D: + break; +// hipMemset3DAsync[('hipPitchedPtr', 'pitchedDevPtr'), ('int', 'value'), ('hipExtent', 'extent'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemset3DAsync: + break; +// hipMemsetAsync[('void*', 'dst'), ('int', 'value'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemsetAsync: + break; +// hipMemsetD16[('hipDeviceptr_t', 'dest'), ('unsigned short', 'value'), ('size_t', 'count')] + case HIP_API_ID_hipMemsetD16: + break; +// hipMemsetD16Async[('hipDeviceptr_t', 'dest'), ('unsigned short', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemsetD16Async: + break; +// hipMemsetD2D16[('hipDeviceptr_t', 'dst'), ('size_t', 'dstPitch'), ('unsigned short', 'value'), ('size_t', 'width'), ('size_t', 'height')] + case HIP_API_ID_hipMemsetD2D16: + break; +// hipMemsetD2D16Async[('hipDeviceptr_t', 'dst'), ('size_t', 'dstPitch'), ('unsigned short', 'value'), ('size_t', 'width'), ('size_t', 'height'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemsetD2D16Async: + break; +// hipMemsetD2D32[('hipDeviceptr_t', 'dst'), ('size_t', 'dstPitch'), ('unsigned int', 'value'), ('size_t', 'width'), ('size_t', 'height')] + case HIP_API_ID_hipMemsetD2D32: + break; +// hipMemsetD2D32Async[('hipDeviceptr_t', 'dst'), ('size_t', 'dstPitch'), ('unsigned int', 'value'), ('size_t', 'width'), ('size_t', 'height'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemsetD2D32Async: + break; +// hipMemsetD2D8[('hipDeviceptr_t', 'dst'), ('size_t', 'dstPitch'), ('unsigned char', 'value'), ('size_t', 'width'), ('size_t', 'height')] + case HIP_API_ID_hipMemsetD2D8: + break; +// hipMemsetD2D8Async[('hipDeviceptr_t', 'dst'), ('size_t', 'dstPitch'), ('unsigned char', 'value'), ('size_t', 'width'), ('size_t', 'height'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemsetD2D8Async: + break; +// hipMemsetD32[('hipDeviceptr_t', 'dest'), ('int', 'value'), ('size_t', 'count')] + case HIP_API_ID_hipMemsetD32: + break; +// hipMemsetD32Async[('hipDeviceptr_t', 'dst'), ('int', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemsetD32Async: + break; +// hipMemsetD8[('hipDeviceptr_t', 'dest'), ('unsigned char', 'value'), ('size_t', 'count')] + case HIP_API_ID_hipMemsetD8: + break; +// hipMemsetD8Async[('hipDeviceptr_t', 'dest'), ('unsigned char', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemsetD8Async: + break; +// hipMipmappedArrayCreate[('hipMipmappedArray_t*', 'pHandle'), ('HIP_ARRAY3D_DESCRIPTOR*', 'pMipmappedArrayDesc'), ('unsigned int', 'numMipmapLevels')] + case HIP_API_ID_hipMipmappedArrayCreate: + if (data->args.hipMipmappedArrayCreate.pHandle) data->args.hipMipmappedArrayCreate.pHandle__val = *(data->args.hipMipmappedArrayCreate.pHandle); + if (data->args.hipMipmappedArrayCreate.pMipmappedArrayDesc) data->args.hipMipmappedArrayCreate.pMipmappedArrayDesc__val = *(data->args.hipMipmappedArrayCreate.pMipmappedArrayDesc); + break; +// hipMipmappedArrayDestroy[('hipMipmappedArray_t', 'hMipmappedArray')] + case HIP_API_ID_hipMipmappedArrayDestroy: + break; +// hipMipmappedArrayGetLevel[('hipArray_t*', 'pLevelArray'), ('hipMipmappedArray_t', 'hMipMappedArray'), ('unsigned int', 'level')] + case HIP_API_ID_hipMipmappedArrayGetLevel: + if (data->args.hipMipmappedArrayGetLevel.pLevelArray) data->args.hipMipmappedArrayGetLevel.pLevelArray__val = *(data->args.hipMipmappedArrayGetLevel.pLevelArray); + break; +// hipModuleGetFunction[('hipFunction_t*', 'function'), ('hipModule_t', 'module'), ('const char*', 'kname')] + case HIP_API_ID_hipModuleGetFunction: + if (data->args.hipModuleGetFunction.function) data->args.hipModuleGetFunction.function__val = *(data->args.hipModuleGetFunction.function); + if (data->args.hipModuleGetFunction.kname) data->args.hipModuleGetFunction.kname__val = *(data->args.hipModuleGetFunction.kname); + break; +// hipModuleGetFunctionCount[('unsigned int*', 'count'), ('hipModule_t', 'mod')] + case HIP_API_ID_hipModuleGetFunctionCount: + if (data->args.hipModuleGetFunctionCount.count) data->args.hipModuleGetFunctionCount.count__val = *(data->args.hipModuleGetFunctionCount.count); + break; +// hipModuleGetGlobal[('hipDeviceptr_t*', 'dptr'), ('size_t*', 'bytes'), ('hipModule_t', 'hmod'), ('const char*', 'name')] + case HIP_API_ID_hipModuleGetGlobal: + if (data->args.hipModuleGetGlobal.dptr) data->args.hipModuleGetGlobal.dptr__val = *(data->args.hipModuleGetGlobal.dptr); + if (data->args.hipModuleGetGlobal.bytes) data->args.hipModuleGetGlobal.bytes__val = *(data->args.hipModuleGetGlobal.bytes); + if (data->args.hipModuleGetGlobal.name) data->args.hipModuleGetGlobal.name__val = *(data->args.hipModuleGetGlobal.name); + break; +// hipModuleGetTexRef[('textureReference**', 'texRef'), ('hipModule_t', 'hmod'), ('const char*', 'name')] + case HIP_API_ID_hipModuleGetTexRef: + if (data->args.hipModuleGetTexRef.texRef) data->args.hipModuleGetTexRef.texRef__val = *(data->args.hipModuleGetTexRef.texRef); + if (data->args.hipModuleGetTexRef.name) data->args.hipModuleGetTexRef.name__val = *(data->args.hipModuleGetTexRef.name); + break; +// hipModuleLaunchCooperativeKernel[('hipFunction_t', 'f'), ('unsigned int', 'gridDimX'), ('unsigned int', 'gridDimY'), ('unsigned int', 'gridDimZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('void**', 'kernelParams')] + case HIP_API_ID_hipModuleLaunchCooperativeKernel: + if (data->args.hipModuleLaunchCooperativeKernel.kernelParams) data->args.hipModuleLaunchCooperativeKernel.kernelParams__val = *(data->args.hipModuleLaunchCooperativeKernel.kernelParams); + break; +// hipModuleLaunchCooperativeKernelMultiDevice[('hipFunctionLaunchParams*', 'launchParamsList'), ('unsigned int', 'numDevices'), ('unsigned int', 'flags')] + case HIP_API_ID_hipModuleLaunchCooperativeKernelMultiDevice: + if (data->args.hipModuleLaunchCooperativeKernelMultiDevice.launchParamsList) data->args.hipModuleLaunchCooperativeKernelMultiDevice.launchParamsList__val = *(data->args.hipModuleLaunchCooperativeKernelMultiDevice.launchParamsList); + break; +// hipModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'gridDimX'), ('unsigned int', 'gridDimY'), ('unsigned int', 'gridDimZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('void**', 'kernelParams'), ('void**', 'extra')] + case HIP_API_ID_hipModuleLaunchKernel: + if (data->args.hipModuleLaunchKernel.kernelParams) data->args.hipModuleLaunchKernel.kernelParams__val = *(data->args.hipModuleLaunchKernel.kernelParams); + if (data->args.hipModuleLaunchKernel.extra) data->args.hipModuleLaunchKernel.extra__val = *(data->args.hipModuleLaunchKernel.extra); + break; +// hipModuleLoad[('hipModule_t*', 'module'), ('const char*', 'fname')] + case HIP_API_ID_hipModuleLoad: + if (data->args.hipModuleLoad.module) data->args.hipModuleLoad.module__val = *(data->args.hipModuleLoad.module); + if (data->args.hipModuleLoad.fname) data->args.hipModuleLoad.fname__val = *(data->args.hipModuleLoad.fname); + break; +// hipModuleLoadData[('hipModule_t*', 'module'), ('const void*', 'image')] + case HIP_API_ID_hipModuleLoadData: + if (data->args.hipModuleLoadData.module) data->args.hipModuleLoadData.module__val = *(data->args.hipModuleLoadData.module); + break; +// hipModuleLoadDataEx[('hipModule_t*', 'module'), ('const void*', 'image'), ('unsigned int', 'numOptions'), ('hipJitOption*', 'options'), ('void**', 'optionsValues')] + case HIP_API_ID_hipModuleLoadDataEx: + if (data->args.hipModuleLoadDataEx.module) data->args.hipModuleLoadDataEx.module__val = *(data->args.hipModuleLoadDataEx.module); + if (data->args.hipModuleLoadDataEx.options) data->args.hipModuleLoadDataEx.options__val = *(data->args.hipModuleLoadDataEx.options); + if (data->args.hipModuleLoadDataEx.optionsValues) data->args.hipModuleLoadDataEx.optionsValues__val = *(data->args.hipModuleLoadDataEx.optionsValues); + break; +// hipModuleLoadFatBinary[('hipModule_t*', 'module'), ('const void*', 'fatbin')] + case HIP_API_ID_hipModuleLoadFatBinary: + if (data->args.hipModuleLoadFatBinary.module) data->args.hipModuleLoadFatBinary.module__val = *(data->args.hipModuleLoadFatBinary.module); + break; +// hipModuleOccupancyMaxActiveBlocksPerMultiprocessor[('int*', 'numBlocks'), ('hipFunction_t', 'f'), ('int', 'blockSize'), ('size_t', 'dynSharedMemPerBlk')] + case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor: + if (data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks) data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks__val = *(data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks); + break; +// hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[('int*', 'numBlocks'), ('hipFunction_t', 'f'), ('int', 'blockSize'), ('size_t', 'dynSharedMemPerBlk'), ('unsigned int', 'flags')] + case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags: + if (data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks) data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks__val = *(data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks); + break; +// hipModuleOccupancyMaxPotentialBlockSize[('int*', 'gridSize'), ('int*', 'blockSize'), ('hipFunction_t', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit')] + case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize: + if (data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize) data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize__val = *(data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize); + if (data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize) data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize__val = *(data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize); + break; +// hipModuleOccupancyMaxPotentialBlockSizeWithFlags[('int*', 'gridSize'), ('int*', 'blockSize'), ('hipFunction_t', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit'), ('unsigned int', 'flags')] + case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags: + if (data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize) data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize__val = *(data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize); + if (data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize) data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize__val = *(data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize); + break; +// hipModuleUnload[('hipModule_t', 'module')] + case HIP_API_ID_hipModuleUnload: + break; +// hipOccupancyMaxActiveBlocksPerMultiprocessor[('int*', 'numBlocks'), ('const void*', 'f'), ('int', 'blockSize'), ('size_t', 'dynamicSMemSize')] + case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor: + if (data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks) data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks__val = *(data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks); + break; +// hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[('int*', 'numBlocks'), ('const void*', 'f'), ('int', 'blockSize'), ('size_t', 'dynamicSMemSize'), ('unsigned int', 'flags')] + case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags: + if (data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks) data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks__val = *(data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks); + break; +// hipOccupancyMaxPotentialBlockSize[('int*', 'gridSize'), ('int*', 'blockSize'), ('const void*', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit')] + case HIP_API_ID_hipOccupancyMaxPotentialBlockSize: + if (data->args.hipOccupancyMaxPotentialBlockSize.gridSize) data->args.hipOccupancyMaxPotentialBlockSize.gridSize__val = *(data->args.hipOccupancyMaxPotentialBlockSize.gridSize); + if (data->args.hipOccupancyMaxPotentialBlockSize.blockSize) data->args.hipOccupancyMaxPotentialBlockSize.blockSize__val = *(data->args.hipOccupancyMaxPotentialBlockSize.blockSize); + break; +// hipPeekAtLastError[] + case HIP_API_ID_hipPeekAtLastError: + break; +// hipPointerGetAttribute[('void*', 'data'), ('hipPointer_attribute', 'attribute'), ('hipDeviceptr_t', 'ptr')] + case HIP_API_ID_hipPointerGetAttribute: + break; +// hipPointerGetAttributes[('hipPointerAttribute_t*', 'attributes'), ('const void*', 'ptr')] + case HIP_API_ID_hipPointerGetAttributes: + if (data->args.hipPointerGetAttributes.attributes) data->args.hipPointerGetAttributes.attributes__val = *(data->args.hipPointerGetAttributes.attributes); + break; +// hipPointerSetAttribute[('const void*', 'value'), ('hipPointer_attribute', 'attribute'), ('hipDeviceptr_t', 'ptr')] + case HIP_API_ID_hipPointerSetAttribute: + break; +// hipProfilerStart[] + case HIP_API_ID_hipProfilerStart: + break; +// hipProfilerStop[] + case HIP_API_ID_hipProfilerStop: + break; +// hipRuntimeGetVersion[('int*', 'runtimeVersion')] + case HIP_API_ID_hipRuntimeGetVersion: + if (data->args.hipRuntimeGetVersion.runtimeVersion) data->args.hipRuntimeGetVersion.runtimeVersion__val = *(data->args.hipRuntimeGetVersion.runtimeVersion); + break; +// hipSetDevice[('int', 'deviceId')] + case HIP_API_ID_hipSetDevice: + break; +// hipSetDeviceFlags[('unsigned int', 'flags')] + case HIP_API_ID_hipSetDeviceFlags: + break; +// hipSetValidDevices[('int*', 'device_arr'), ('int', 'len')] + case HIP_API_ID_hipSetValidDevices: + if (data->args.hipSetValidDevices.device_arr) data->args.hipSetValidDevices.device_arr__val = *(data->args.hipSetValidDevices.device_arr); + break; +// hipSetupArgument[('const void*', 'arg'), ('size_t', 'size'), ('size_t', 'offset')] + case HIP_API_ID_hipSetupArgument: + break; +// hipSignalExternalSemaphoresAsync[('const hipExternalSemaphore_t*', 'extSemArray'), ('const hipExternalSemaphoreSignalParams*', 'paramsArray'), ('unsigned int', 'numExtSems'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipSignalExternalSemaphoresAsync: + if (data->args.hipSignalExternalSemaphoresAsync.extSemArray) data->args.hipSignalExternalSemaphoresAsync.extSemArray__val = *(data->args.hipSignalExternalSemaphoresAsync.extSemArray); + if (data->args.hipSignalExternalSemaphoresAsync.paramsArray) data->args.hipSignalExternalSemaphoresAsync.paramsArray__val = *(data->args.hipSignalExternalSemaphoresAsync.paramsArray); + break; +// hipStreamAddCallback[('hipStream_t', 'stream'), ('hipStreamCallback_t', 'callback'), ('void*', 'userData'), ('unsigned int', 'flags')] + case HIP_API_ID_hipStreamAddCallback: + break; +// hipStreamAttachMemAsync[('hipStream_t', 'stream'), ('void*', 'dev_ptr'), ('size_t', 'length'), ('unsigned int', 'flags')] + case HIP_API_ID_hipStreamAttachMemAsync: + break; +// hipStreamBatchMemOp[('hipStream_t', 'stream'), ('unsigned int', 'count'), ('hipStreamBatchMemOpParams*', 'paramArray'), ('unsigned int', 'flags')] + case HIP_API_ID_hipStreamBatchMemOp: + if (data->args.hipStreamBatchMemOp.paramArray) data->args.hipStreamBatchMemOp.paramArray__val = *(data->args.hipStreamBatchMemOp.paramArray); + break; +// hipStreamBeginCapture[('hipStream_t', 'stream'), ('hipStreamCaptureMode', 'mode')] + case HIP_API_ID_hipStreamBeginCapture: + break; +// hipStreamBeginCaptureToGraph[('hipStream_t', 'stream'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'dependencies'), ('const hipGraphEdgeData*', 'dependencyData'), ('size_t', 'numDependencies'), ('hipStreamCaptureMode', 'mode')] + case HIP_API_ID_hipStreamBeginCaptureToGraph: + if (data->args.hipStreamBeginCaptureToGraph.dependencies) data->args.hipStreamBeginCaptureToGraph.dependencies__val = *(data->args.hipStreamBeginCaptureToGraph.dependencies); + if (data->args.hipStreamBeginCaptureToGraph.dependencyData) data->args.hipStreamBeginCaptureToGraph.dependencyData__val = *(data->args.hipStreamBeginCaptureToGraph.dependencyData); + break; +// hipStreamCreate[('hipStream_t*', 'stream')] + case HIP_API_ID_hipStreamCreate: + if (data->args.hipStreamCreate.stream) data->args.hipStreamCreate.stream__val = *(data->args.hipStreamCreate.stream); + break; +// hipStreamCreateWithFlags[('hipStream_t*', 'stream'), ('unsigned int', 'flags')] + case HIP_API_ID_hipStreamCreateWithFlags: + if (data->args.hipStreamCreateWithFlags.stream) data->args.hipStreamCreateWithFlags.stream__val = *(data->args.hipStreamCreateWithFlags.stream); + break; +// hipStreamCreateWithPriority[('hipStream_t*', 'stream'), ('unsigned int', 'flags'), ('int', 'priority')] + case HIP_API_ID_hipStreamCreateWithPriority: + if (data->args.hipStreamCreateWithPriority.stream) data->args.hipStreamCreateWithPriority.stream__val = *(data->args.hipStreamCreateWithPriority.stream); + break; +// hipStreamDestroy[('hipStream_t', 'stream')] + case HIP_API_ID_hipStreamDestroy: + break; +// hipStreamEndCapture[('hipStream_t', 'stream'), ('hipGraph_t*', 'pGraph')] + case HIP_API_ID_hipStreamEndCapture: + if (data->args.hipStreamEndCapture.pGraph) data->args.hipStreamEndCapture.pGraph__val = *(data->args.hipStreamEndCapture.pGraph); + break; +// hipStreamGetAttribute[('hipStream_t', 'stream'), ('hipLaunchAttributeID', 'attr'), ('hipLaunchAttributeValue*', 'value_out')] + case HIP_API_ID_hipStreamGetAttribute: + if (data->args.hipStreamGetAttribute.value_out) data->args.hipStreamGetAttribute.value_out__val = *(data->args.hipStreamGetAttribute.value_out); + break; +// hipStreamGetCaptureInfo[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'pCaptureStatus'), ('unsigned long long*', 'pId')] + case HIP_API_ID_hipStreamGetCaptureInfo: + if (data->args.hipStreamGetCaptureInfo.pCaptureStatus) data->args.hipStreamGetCaptureInfo.pCaptureStatus__val = *(data->args.hipStreamGetCaptureInfo.pCaptureStatus); + if (data->args.hipStreamGetCaptureInfo.pId) data->args.hipStreamGetCaptureInfo.pId__val = *(data->args.hipStreamGetCaptureInfo.pId); + break; +// hipStreamGetCaptureInfo_v2[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'captureStatus_out'), ('unsigned long long*', 'id_out'), ('hipGraph_t*', 'graph_out'), ('const hipGraphNode_t**', 'dependencies_out'), ('size_t*', 'numDependencies_out')] + case HIP_API_ID_hipStreamGetCaptureInfo_v2: + if (data->args.hipStreamGetCaptureInfo_v2.captureStatus_out) data->args.hipStreamGetCaptureInfo_v2.captureStatus_out__val = *(data->args.hipStreamGetCaptureInfo_v2.captureStatus_out); + if (data->args.hipStreamGetCaptureInfo_v2.id_out) data->args.hipStreamGetCaptureInfo_v2.id_out__val = *(data->args.hipStreamGetCaptureInfo_v2.id_out); + if (data->args.hipStreamGetCaptureInfo_v2.graph_out) data->args.hipStreamGetCaptureInfo_v2.graph_out__val = *(data->args.hipStreamGetCaptureInfo_v2.graph_out); + if (data->args.hipStreamGetCaptureInfo_v2.dependencies_out) data->args.hipStreamGetCaptureInfo_v2.dependencies_out__val = *(data->args.hipStreamGetCaptureInfo_v2.dependencies_out); + if (data->args.hipStreamGetCaptureInfo_v2.numDependencies_out) data->args.hipStreamGetCaptureInfo_v2.numDependencies_out__val = *(data->args.hipStreamGetCaptureInfo_v2.numDependencies_out); + break; +// hipStreamGetDevice[('hipStream_t', 'stream'), ('hipDevice_t*', 'device')] + case HIP_API_ID_hipStreamGetDevice: + if (data->args.hipStreamGetDevice.device) data->args.hipStreamGetDevice.device__val = *(data->args.hipStreamGetDevice.device); + break; +// hipStreamGetFlags[('hipStream_t', 'stream'), ('unsigned int*', 'flags')] + case HIP_API_ID_hipStreamGetFlags: + if (data->args.hipStreamGetFlags.flags) data->args.hipStreamGetFlags.flags__val = *(data->args.hipStreamGetFlags.flags); + break; +// hipStreamGetId[('hipStream_t', 'stream'), ('unsigned long long*', 'streamId')] + case HIP_API_ID_hipStreamGetId: + if (data->args.hipStreamGetId.streamId) data->args.hipStreamGetId.streamId__val = *(data->args.hipStreamGetId.streamId); + break; +// hipStreamGetPriority[('hipStream_t', 'stream'), ('int*', 'priority')] + case HIP_API_ID_hipStreamGetPriority: + if (data->args.hipStreamGetPriority.priority) data->args.hipStreamGetPriority.priority__val = *(data->args.hipStreamGetPriority.priority); + break; +// hipStreamIsCapturing[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'pCaptureStatus')] + case HIP_API_ID_hipStreamIsCapturing: + if (data->args.hipStreamIsCapturing.pCaptureStatus) data->args.hipStreamIsCapturing.pCaptureStatus__val = *(data->args.hipStreamIsCapturing.pCaptureStatus); + break; +// hipStreamQuery[('hipStream_t', 'stream')] + case HIP_API_ID_hipStreamQuery: + break; +// hipStreamSetAttribute[('hipStream_t', 'stream'), ('hipLaunchAttributeID', 'attr'), ('const hipLaunchAttributeValue*', 'value')] + case HIP_API_ID_hipStreamSetAttribute: + if (data->args.hipStreamSetAttribute.value) data->args.hipStreamSetAttribute.value__val = *(data->args.hipStreamSetAttribute.value); + break; +// hipStreamSynchronize[('hipStream_t', 'stream')] + case HIP_API_ID_hipStreamSynchronize: + break; +// hipStreamUpdateCaptureDependencies[('hipStream_t', 'stream'), ('hipGraphNode_t*', 'dependencies'), ('size_t', 'numDependencies'), ('unsigned int', 'flags')] + case HIP_API_ID_hipStreamUpdateCaptureDependencies: + if (data->args.hipStreamUpdateCaptureDependencies.dependencies) data->args.hipStreamUpdateCaptureDependencies.dependencies__val = *(data->args.hipStreamUpdateCaptureDependencies.dependencies); + break; +// hipStreamWaitEvent[('hipStream_t', 'stream'), ('hipEvent_t', 'event'), ('unsigned int', 'flags')] + case HIP_API_ID_hipStreamWaitEvent: + break; +// hipStreamWaitValue32[('hipStream_t', 'stream'), ('void*', 'ptr'), ('unsigned int', 'value'), ('unsigned int', 'flags'), ('unsigned int', 'mask')] + case HIP_API_ID_hipStreamWaitValue32: + break; +// hipStreamWaitValue64[('hipStream_t', 'stream'), ('void*', 'ptr'), ('uint64_t', 'value'), ('unsigned int', 'flags'), ('uint64_t', 'mask')] + case HIP_API_ID_hipStreamWaitValue64: + break; +// hipStreamWriteValue32[('hipStream_t', 'stream'), ('void*', 'ptr'), ('unsigned int', 'value'), ('unsigned int', 'flags')] + case HIP_API_ID_hipStreamWriteValue32: + break; +// hipStreamWriteValue64[('hipStream_t', 'stream'), ('void*', 'ptr'), ('uint64_t', 'value'), ('unsigned int', 'flags')] + case HIP_API_ID_hipStreamWriteValue64: + break; +// hipTexRefGetAddress[('hipDeviceptr_t*', 'dev_ptr'), ('const textureReference*', 'texRef')] + case HIP_API_ID_hipTexRefGetAddress: + if (data->args.hipTexRefGetAddress.dev_ptr) data->args.hipTexRefGetAddress.dev_ptr__val = *(data->args.hipTexRefGetAddress.dev_ptr); + if (data->args.hipTexRefGetAddress.texRef) data->args.hipTexRefGetAddress.texRef__val = *(data->args.hipTexRefGetAddress.texRef); + break; +// hipTexRefGetArray[('hipArray_t*', 'pArray'), ('const textureReference*', 'texRef')] + case HIP_API_ID_hipTexRefGetArray: + if (data->args.hipTexRefGetArray.pArray) data->args.hipTexRefGetArray.pArray__val = *(data->args.hipTexRefGetArray.pArray); + if (data->args.hipTexRefGetArray.texRef) data->args.hipTexRefGetArray.texRef__val = *(data->args.hipTexRefGetArray.texRef); + break; +// hipTexRefGetBorderColor[('float*', 'pBorderColor'), ('const textureReference*', 'texRef')] + case HIP_API_ID_hipTexRefGetBorderColor: + if (data->args.hipTexRefGetBorderColor.pBorderColor) data->args.hipTexRefGetBorderColor.pBorderColor__val = *(data->args.hipTexRefGetBorderColor.pBorderColor); + if (data->args.hipTexRefGetBorderColor.texRef) data->args.hipTexRefGetBorderColor.texRef__val = *(data->args.hipTexRefGetBorderColor.texRef); + break; +// hipTexRefGetFlags[('unsigned int*', 'pFlags'), ('const textureReference*', 'texRef')] + case HIP_API_ID_hipTexRefGetFlags: + if (data->args.hipTexRefGetFlags.pFlags) data->args.hipTexRefGetFlags.pFlags__val = *(data->args.hipTexRefGetFlags.pFlags); + if (data->args.hipTexRefGetFlags.texRef) data->args.hipTexRefGetFlags.texRef__val = *(data->args.hipTexRefGetFlags.texRef); + break; +// hipTexRefGetFormat[('hipArray_Format*', 'pFormat'), ('int*', 'pNumChannels'), ('const textureReference*', 'texRef')] + case HIP_API_ID_hipTexRefGetFormat: + if (data->args.hipTexRefGetFormat.pFormat) data->args.hipTexRefGetFormat.pFormat__val = *(data->args.hipTexRefGetFormat.pFormat); + if (data->args.hipTexRefGetFormat.pNumChannels) data->args.hipTexRefGetFormat.pNumChannels__val = *(data->args.hipTexRefGetFormat.pNumChannels); + if (data->args.hipTexRefGetFormat.texRef) data->args.hipTexRefGetFormat.texRef__val = *(data->args.hipTexRefGetFormat.texRef); + break; +// hipTexRefGetMaxAnisotropy[('int*', 'pmaxAnsio'), ('const textureReference*', 'texRef')] + case HIP_API_ID_hipTexRefGetMaxAnisotropy: + if (data->args.hipTexRefGetMaxAnisotropy.pmaxAnsio) data->args.hipTexRefGetMaxAnisotropy.pmaxAnsio__val = *(data->args.hipTexRefGetMaxAnisotropy.pmaxAnsio); + if (data->args.hipTexRefGetMaxAnisotropy.texRef) data->args.hipTexRefGetMaxAnisotropy.texRef__val = *(data->args.hipTexRefGetMaxAnisotropy.texRef); + break; +// hipTexRefGetMipMappedArray[('hipMipmappedArray_t*', 'pArray'), ('const textureReference*', 'texRef')] + case HIP_API_ID_hipTexRefGetMipMappedArray: + if (data->args.hipTexRefGetMipMappedArray.pArray) data->args.hipTexRefGetMipMappedArray.pArray__val = *(data->args.hipTexRefGetMipMappedArray.pArray); + if (data->args.hipTexRefGetMipMappedArray.texRef) data->args.hipTexRefGetMipMappedArray.texRef__val = *(data->args.hipTexRefGetMipMappedArray.texRef); + break; +// hipTexRefGetMipmapLevelBias[('float*', 'pbias'), ('const textureReference*', 'texRef')] + case HIP_API_ID_hipTexRefGetMipmapLevelBias: + if (data->args.hipTexRefGetMipmapLevelBias.pbias) data->args.hipTexRefGetMipmapLevelBias.pbias__val = *(data->args.hipTexRefGetMipmapLevelBias.pbias); + if (data->args.hipTexRefGetMipmapLevelBias.texRef) data->args.hipTexRefGetMipmapLevelBias.texRef__val = *(data->args.hipTexRefGetMipmapLevelBias.texRef); + break; +// hipTexRefGetMipmapLevelClamp[('float*', 'pminMipmapLevelClamp'), ('float*', 'pmaxMipmapLevelClamp'), ('const textureReference*', 'texRef')] + case HIP_API_ID_hipTexRefGetMipmapLevelClamp: + if (data->args.hipTexRefGetMipmapLevelClamp.pminMipmapLevelClamp) data->args.hipTexRefGetMipmapLevelClamp.pminMipmapLevelClamp__val = *(data->args.hipTexRefGetMipmapLevelClamp.pminMipmapLevelClamp); + if (data->args.hipTexRefGetMipmapLevelClamp.pmaxMipmapLevelClamp) data->args.hipTexRefGetMipmapLevelClamp.pmaxMipmapLevelClamp__val = *(data->args.hipTexRefGetMipmapLevelClamp.pmaxMipmapLevelClamp); + if (data->args.hipTexRefGetMipmapLevelClamp.texRef) data->args.hipTexRefGetMipmapLevelClamp.texRef__val = *(data->args.hipTexRefGetMipmapLevelClamp.texRef); + break; +// hipTexRefSetAddress[('size_t*', 'ByteOffset'), ('textureReference*', 'texRef'), ('hipDeviceptr_t', 'dptr'), ('size_t', 'bytes')] + case HIP_API_ID_hipTexRefSetAddress: + if (data->args.hipTexRefSetAddress.ByteOffset) data->args.hipTexRefSetAddress.ByteOffset__val = *(data->args.hipTexRefSetAddress.ByteOffset); + if (data->args.hipTexRefSetAddress.texRef) data->args.hipTexRefSetAddress.texRef__val = *(data->args.hipTexRefSetAddress.texRef); + break; +// hipTexRefSetAddress2D[('textureReference*', 'texRef'), ('const HIP_ARRAY_DESCRIPTOR*', 'desc'), ('hipDeviceptr_t', 'dptr'), ('size_t', 'Pitch')] + case HIP_API_ID_hipTexRefSetAddress2D: + if (data->args.hipTexRefSetAddress2D.texRef) data->args.hipTexRefSetAddress2D.texRef__val = *(data->args.hipTexRefSetAddress2D.texRef); + if (data->args.hipTexRefSetAddress2D.desc) data->args.hipTexRefSetAddress2D.desc__val = *(data->args.hipTexRefSetAddress2D.desc); + break; +// hipTexRefSetArray[('textureReference*', 'tex'), ('hipArray_const_t', 'array'), ('unsigned int', 'flags')] + case HIP_API_ID_hipTexRefSetArray: + if (data->args.hipTexRefSetArray.tex) data->args.hipTexRefSetArray.tex__val = *(data->args.hipTexRefSetArray.tex); + break; +// hipTexRefSetBorderColor[('textureReference*', 'texRef'), ('float*', 'pBorderColor')] + case HIP_API_ID_hipTexRefSetBorderColor: + if (data->args.hipTexRefSetBorderColor.texRef) data->args.hipTexRefSetBorderColor.texRef__val = *(data->args.hipTexRefSetBorderColor.texRef); + if (data->args.hipTexRefSetBorderColor.pBorderColor) data->args.hipTexRefSetBorderColor.pBorderColor__val = *(data->args.hipTexRefSetBorderColor.pBorderColor); + break; +// hipTexRefSetFlags[('textureReference*', 'texRef'), ('unsigned int', 'Flags')] + case HIP_API_ID_hipTexRefSetFlags: + if (data->args.hipTexRefSetFlags.texRef) data->args.hipTexRefSetFlags.texRef__val = *(data->args.hipTexRefSetFlags.texRef); + break; +// hipTexRefSetFormat[('textureReference*', 'texRef'), ('hipArray_Format', 'fmt'), ('int', 'NumPackedComponents')] + case HIP_API_ID_hipTexRefSetFormat: + if (data->args.hipTexRefSetFormat.texRef) data->args.hipTexRefSetFormat.texRef__val = *(data->args.hipTexRefSetFormat.texRef); + break; +// hipTexRefSetMaxAnisotropy[('textureReference*', 'texRef'), ('unsigned int', 'maxAniso')] + case HIP_API_ID_hipTexRefSetMaxAnisotropy: + if (data->args.hipTexRefSetMaxAnisotropy.texRef) data->args.hipTexRefSetMaxAnisotropy.texRef__val = *(data->args.hipTexRefSetMaxAnisotropy.texRef); + break; +// hipTexRefSetMipmapLevelBias[('textureReference*', 'texRef'), ('float', 'bias')] + case HIP_API_ID_hipTexRefSetMipmapLevelBias: + if (data->args.hipTexRefSetMipmapLevelBias.texRef) data->args.hipTexRefSetMipmapLevelBias.texRef__val = *(data->args.hipTexRefSetMipmapLevelBias.texRef); + break; +// hipTexRefSetMipmapLevelClamp[('textureReference*', 'texRef'), ('float', 'minMipMapLevelClamp'), ('float', 'maxMipMapLevelClamp')] + case HIP_API_ID_hipTexRefSetMipmapLevelClamp: + if (data->args.hipTexRefSetMipmapLevelClamp.texRef) data->args.hipTexRefSetMipmapLevelClamp.texRef__val = *(data->args.hipTexRefSetMipmapLevelClamp.texRef); + break; +// hipTexRefSetMipmappedArray[('textureReference*', 'texRef'), ('hipMipmappedArray*', 'mipmappedArray'), ('unsigned int', 'Flags')] + case HIP_API_ID_hipTexRefSetMipmappedArray: + if (data->args.hipTexRefSetMipmappedArray.texRef) data->args.hipTexRefSetMipmappedArray.texRef__val = *(data->args.hipTexRefSetMipmappedArray.texRef); + if (data->args.hipTexRefSetMipmappedArray.mipmappedArray) data->args.hipTexRefSetMipmappedArray.mipmappedArray__val = *(data->args.hipTexRefSetMipmappedArray.mipmappedArray); + break; +// hipThreadExchangeStreamCaptureMode[('hipStreamCaptureMode*', 'mode')] + case HIP_API_ID_hipThreadExchangeStreamCaptureMode: + if (data->args.hipThreadExchangeStreamCaptureMode.mode) data->args.hipThreadExchangeStreamCaptureMode.mode__val = *(data->args.hipThreadExchangeStreamCaptureMode.mode); + break; +// hipUserObjectCreate[('hipUserObject_t*', 'object_out'), ('void*', 'ptr'), ('hipHostFn_t', 'destroy'), ('unsigned int', 'initialRefcount'), ('unsigned int', 'flags')] + case HIP_API_ID_hipUserObjectCreate: + if (data->args.hipUserObjectCreate.object_out) data->args.hipUserObjectCreate.object_out__val = *(data->args.hipUserObjectCreate.object_out); + break; +// hipUserObjectRelease[('hipUserObject_t', 'object'), ('unsigned int', 'count')] + case HIP_API_ID_hipUserObjectRelease: + break; +// hipUserObjectRetain[('hipUserObject_t', 'object'), ('unsigned int', 'count')] + case HIP_API_ID_hipUserObjectRetain: + break; +// hipWaitExternalSemaphoresAsync[('const hipExternalSemaphore_t*', 'extSemArray'), ('const hipExternalSemaphoreWaitParams*', 'paramsArray'), ('unsigned int', 'numExtSems'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipWaitExternalSemaphoresAsync: + if (data->args.hipWaitExternalSemaphoresAsync.extSemArray) data->args.hipWaitExternalSemaphoresAsync.extSemArray__val = *(data->args.hipWaitExternalSemaphoresAsync.extSemArray); + if (data->args.hipWaitExternalSemaphoresAsync.paramsArray) data->args.hipWaitExternalSemaphoresAsync.paramsArray__val = *(data->args.hipWaitExternalSemaphoresAsync.paramsArray); + break; + default: break; + }; +} + +#include +#include +// HIP API string method, method name and parameters +static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) { + std::ostringstream oss; + switch (id) { + case HIP_API_ID___hipPopCallConfiguration: + oss << "__hipPopCallConfiguration("; + if (data->args.__hipPopCallConfiguration.gridDim == NULL) oss << "gridDim=NULL"; + else { oss << "gridDim="; roctracer::hip_support::detail::operator<<(oss, data->args.__hipPopCallConfiguration.gridDim__val); } + if (data->args.__hipPopCallConfiguration.blockDim == NULL) oss << ", blockDim=NULL"; + else { oss << ", blockDim="; roctracer::hip_support::detail::operator<<(oss, data->args.__hipPopCallConfiguration.blockDim__val); } + if (data->args.__hipPopCallConfiguration.sharedMem == NULL) oss << ", sharedMem=NULL"; + else { oss << ", sharedMem="; roctracer::hip_support::detail::operator<<(oss, data->args.__hipPopCallConfiguration.sharedMem__val); } + if (data->args.__hipPopCallConfiguration.stream == NULL) oss << ", stream=NULL"; + else { oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.__hipPopCallConfiguration.stream__val); } + oss << ")"; + break; + case HIP_API_ID___hipPushCallConfiguration: + oss << "__hipPushCallConfiguration("; + oss << "gridDim="; roctracer::hip_support::detail::operator<<(oss, data->args.__hipPushCallConfiguration.gridDim); + oss << ", blockDim="; roctracer::hip_support::detail::operator<<(oss, data->args.__hipPushCallConfiguration.blockDim); + oss << ", sharedMem="; roctracer::hip_support::detail::operator<<(oss, data->args.__hipPushCallConfiguration.sharedMem); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.__hipPushCallConfiguration.stream); + oss << ")"; + break; + case HIP_API_ID_hipArray3DCreate: + oss << "hipArray3DCreate("; + if (data->args.hipArray3DCreate.array == NULL) oss << "array=NULL"; + else { oss << "array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArray3DCreate.array__val); } + if (data->args.hipArray3DCreate.pAllocateArray == NULL) oss << ", pAllocateArray=NULL"; + else { oss << ", pAllocateArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArray3DCreate.pAllocateArray__val); } + oss << ")"; + break; + case HIP_API_ID_hipArray3DGetDescriptor: + oss << "hipArray3DGetDescriptor("; + if (data->args.hipArray3DGetDescriptor.pArrayDescriptor == NULL) oss << "pArrayDescriptor=NULL"; + else { oss << "pArrayDescriptor="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArray3DGetDescriptor.pArrayDescriptor__val); } + oss << ", array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArray3DGetDescriptor.array); + oss << ")"; + break; + case HIP_API_ID_hipArrayCreate: + oss << "hipArrayCreate("; + if (data->args.hipArrayCreate.pHandle == NULL) oss << "pHandle=NULL"; + else { oss << "pHandle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayCreate.pHandle__val); } + if (data->args.hipArrayCreate.pAllocateArray == NULL) oss << ", pAllocateArray=NULL"; + else { oss << ", pAllocateArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayCreate.pAllocateArray__val); } + oss << ")"; + break; + case HIP_API_ID_hipArrayDestroy: + oss << "hipArrayDestroy("; + oss << "array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayDestroy.array); + oss << ")"; + break; + case HIP_API_ID_hipArrayGetDescriptor: + oss << "hipArrayGetDescriptor("; + if (data->args.hipArrayGetDescriptor.pArrayDescriptor == NULL) oss << "pArrayDescriptor=NULL"; + else { oss << "pArrayDescriptor="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetDescriptor.pArrayDescriptor__val); } + oss << ", array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetDescriptor.array); + oss << ")"; + break; + case HIP_API_ID_hipArrayGetInfo: + oss << "hipArrayGetInfo("; + if (data->args.hipArrayGetInfo.desc == NULL) oss << "desc=NULL"; + else { oss << "desc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetInfo.desc__val); } + if (data->args.hipArrayGetInfo.extent == NULL) oss << ", extent=NULL"; + else { oss << ", extent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetInfo.extent__val); } + if (data->args.hipArrayGetInfo.flags == NULL) oss << ", flags=NULL"; + else { oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetInfo.flags__val); } + oss << ", array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetInfo.array); + oss << ")"; + break; + case HIP_API_ID_hipChooseDeviceR0000: + oss << "hipChooseDeviceR0000("; + if (data->args.hipChooseDeviceR0000.device == NULL) oss << "device=NULL"; + else { oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipChooseDeviceR0000.device__val); } + if (data->args.hipChooseDeviceR0000.prop == NULL) oss << ", prop=NULL"; + else { oss << ", prop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipChooseDeviceR0000.prop__val); } + oss << ")"; + break; + case HIP_API_ID_hipChooseDeviceR0600: + oss << "hipChooseDeviceR0600("; + if (data->args.hipChooseDeviceR0600.device == NULL) oss << "device=NULL"; + else { oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipChooseDeviceR0600.device__val); } + if (data->args.hipChooseDeviceR0600.prop == NULL) oss << ", prop=NULL"; + else { oss << ", prop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipChooseDeviceR0600.prop__val); } + oss << ")"; + break; + case HIP_API_ID_hipConfigureCall: + oss << "hipConfigureCall("; + oss << "gridDim="; roctracer::hip_support::detail::operator<<(oss, data->args.hipConfigureCall.gridDim); + oss << ", blockDim="; roctracer::hip_support::detail::operator<<(oss, data->args.hipConfigureCall.blockDim); + oss << ", sharedMem="; roctracer::hip_support::detail::operator<<(oss, data->args.hipConfigureCall.sharedMem); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipConfigureCall.stream); + oss << ")"; + break; + case HIP_API_ID_hipCreateSurfaceObject: + oss << "hipCreateSurfaceObject("; + if (data->args.hipCreateSurfaceObject.pSurfObject == NULL) oss << "pSurfObject=NULL"; + else { oss << "pSurfObject="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCreateSurfaceObject.pSurfObject__val); } + if (data->args.hipCreateSurfaceObject.pResDesc == NULL) oss << ", pResDesc=NULL"; + else { oss << ", pResDesc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCreateSurfaceObject.pResDesc__val); } + oss << ")"; + break; + case HIP_API_ID_hipCtxCreate: + oss << "hipCtxCreate("; + if (data->args.hipCtxCreate.ctx == NULL) oss << "ctx=NULL"; + else { oss << "ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxCreate.ctx__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxCreate.flags); + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxCreate.device); + oss << ")"; + break; + case HIP_API_ID_hipCtxDestroy: + oss << "hipCtxDestroy("; + oss << "ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxDestroy.ctx); + oss << ")"; + break; + case HIP_API_ID_hipCtxDisablePeerAccess: + oss << "hipCtxDisablePeerAccess("; + oss << "peerCtx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxDisablePeerAccess.peerCtx); + oss << ")"; + break; + case HIP_API_ID_hipCtxEnablePeerAccess: + oss << "hipCtxEnablePeerAccess("; + oss << "peerCtx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxEnablePeerAccess.peerCtx); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxEnablePeerAccess.flags); + oss << ")"; + break; + case HIP_API_ID_hipCtxGetApiVersion: + oss << "hipCtxGetApiVersion("; + oss << "ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxGetApiVersion.ctx); + if (data->args.hipCtxGetApiVersion.apiVersion == NULL) oss << ", apiVersion=NULL"; + else { oss << ", apiVersion="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxGetApiVersion.apiVersion__val); } + oss << ")"; + break; + case HIP_API_ID_hipCtxGetCacheConfig: + oss << "hipCtxGetCacheConfig("; + if (data->args.hipCtxGetCacheConfig.cacheConfig == NULL) oss << "cacheConfig=NULL"; + else { oss << "cacheConfig="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxGetCacheConfig.cacheConfig__val); } + oss << ")"; + break; + case HIP_API_ID_hipCtxGetCurrent: + oss << "hipCtxGetCurrent("; + if (data->args.hipCtxGetCurrent.ctx == NULL) oss << "ctx=NULL"; + else { oss << "ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxGetCurrent.ctx__val); } + oss << ")"; + break; + case HIP_API_ID_hipCtxGetDevice: + oss << "hipCtxGetDevice("; + if (data->args.hipCtxGetDevice.device == NULL) oss << "device=NULL"; + else { oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxGetDevice.device__val); } + oss << ")"; + break; + case HIP_API_ID_hipCtxGetFlags: + oss << "hipCtxGetFlags("; + if (data->args.hipCtxGetFlags.flags == NULL) oss << "flags=NULL"; + else { oss << "flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxGetFlags.flags__val); } + oss << ")"; + break; + case HIP_API_ID_hipCtxGetSharedMemConfig: + oss << "hipCtxGetSharedMemConfig("; + if (data->args.hipCtxGetSharedMemConfig.pConfig == NULL) oss << "pConfig=NULL"; + else { oss << "pConfig="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxGetSharedMemConfig.pConfig__val); } + oss << ")"; + break; + case HIP_API_ID_hipCtxPopCurrent: + oss << "hipCtxPopCurrent("; + if (data->args.hipCtxPopCurrent.ctx == NULL) oss << "ctx=NULL"; + else { oss << "ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxPopCurrent.ctx__val); } + oss << ")"; + break; + case HIP_API_ID_hipCtxPushCurrent: + oss << "hipCtxPushCurrent("; + oss << "ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxPushCurrent.ctx); + oss << ")"; + break; + case HIP_API_ID_hipCtxSetCacheConfig: + oss << "hipCtxSetCacheConfig("; + oss << "cacheConfig="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxSetCacheConfig.cacheConfig); + oss << ")"; + break; + case HIP_API_ID_hipCtxSetCurrent: + oss << "hipCtxSetCurrent("; + oss << "ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxSetCurrent.ctx); + oss << ")"; + break; + case HIP_API_ID_hipCtxSetSharedMemConfig: + oss << "hipCtxSetSharedMemConfig("; + oss << "config="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxSetSharedMemConfig.config); + oss << ")"; + break; + case HIP_API_ID_hipCtxSynchronize: + oss << "hipCtxSynchronize("; + oss << ")"; + break; + case HIP_API_ID_hipDestroyExternalMemory: + oss << "hipDestroyExternalMemory("; + oss << "extMem="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDestroyExternalMemory.extMem); + oss << ")"; + break; + case HIP_API_ID_hipDestroyExternalSemaphore: + oss << "hipDestroyExternalSemaphore("; + oss << "extSem="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDestroyExternalSemaphore.extSem); + oss << ")"; + break; + case HIP_API_ID_hipDestroySurfaceObject: + oss << "hipDestroySurfaceObject("; + oss << "surfaceObject="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDestroySurfaceObject.surfaceObject); + oss << ")"; + break; + case HIP_API_ID_hipDeviceCanAccessPeer: + oss << "hipDeviceCanAccessPeer("; + if (data->args.hipDeviceCanAccessPeer.canAccessPeer == NULL) oss << "canAccessPeer=NULL"; + else { oss << "canAccessPeer="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceCanAccessPeer.canAccessPeer__val); } + oss << ", deviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceCanAccessPeer.deviceId); + oss << ", peerDeviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceCanAccessPeer.peerDeviceId); + oss << ")"; + break; + case HIP_API_ID_hipDeviceComputeCapability: + oss << "hipDeviceComputeCapability("; + if (data->args.hipDeviceComputeCapability.major == NULL) oss << "major=NULL"; + else { oss << "major="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceComputeCapability.major__val); } + if (data->args.hipDeviceComputeCapability.minor == NULL) oss << ", minor=NULL"; + else { oss << ", minor="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceComputeCapability.minor__val); } + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceComputeCapability.device); + oss << ")"; + break; + case HIP_API_ID_hipDeviceDisablePeerAccess: + oss << "hipDeviceDisablePeerAccess("; + oss << "peerDeviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceDisablePeerAccess.peerDeviceId); + oss << ")"; + break; + case HIP_API_ID_hipDeviceEnablePeerAccess: + oss << "hipDeviceEnablePeerAccess("; + oss << "peerDeviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceEnablePeerAccess.peerDeviceId); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceEnablePeerAccess.flags); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGet: + oss << "hipDeviceGet("; + if (data->args.hipDeviceGet.device == NULL) oss << "device=NULL"; + else { oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGet.device__val); } + oss << ", ordinal="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGet.ordinal); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetAttribute: + oss << "hipDeviceGetAttribute("; + if (data->args.hipDeviceGetAttribute.pi == NULL) oss << "pi=NULL"; + else { oss << "pi="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetAttribute.pi__val); } + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetAttribute.attr); + oss << ", deviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetAttribute.deviceId); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetByPCIBusId: + oss << "hipDeviceGetByPCIBusId("; + if (data->args.hipDeviceGetByPCIBusId.device == NULL) oss << "device=NULL"; + else { oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetByPCIBusId.device__val); } + if (data->args.hipDeviceGetByPCIBusId.pciBusId == NULL) oss << ", pciBusId=NULL"; + else { oss << ", pciBusId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetByPCIBusId.pciBusId__val); } + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetCacheConfig: + oss << "hipDeviceGetCacheConfig("; + if (data->args.hipDeviceGetCacheConfig.cacheConfig == NULL) oss << "cacheConfig=NULL"; + else { oss << "cacheConfig="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetCacheConfig.cacheConfig__val); } + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetDefaultMemPool: + oss << "hipDeviceGetDefaultMemPool("; + if (data->args.hipDeviceGetDefaultMemPool.mem_pool == NULL) oss << "mem_pool=NULL"; + else { oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetDefaultMemPool.mem_pool__val); } + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetDefaultMemPool.device); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetGraphMemAttribute: + oss << "hipDeviceGetGraphMemAttribute("; + oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetGraphMemAttribute.device); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetGraphMemAttribute.attr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetGraphMemAttribute.value); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetLimit: + oss << "hipDeviceGetLimit("; + if (data->args.hipDeviceGetLimit.pValue == NULL) oss << "pValue=NULL"; + else { oss << "pValue="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetLimit.pValue__val); } + oss << ", limit="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetLimit.limit); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetMemPool: + oss << "hipDeviceGetMemPool("; + if (data->args.hipDeviceGetMemPool.mem_pool == NULL) oss << "mem_pool=NULL"; + else { oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetMemPool.mem_pool__val); } + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetMemPool.device); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetName: + oss << "hipDeviceGetName("; + if (data->args.hipDeviceGetName.name == NULL) oss << "name=NULL"; + else { oss << "name="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetName.name__val); } + oss << ", len="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetName.len); + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetName.device); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetP2PAttribute: + oss << "hipDeviceGetP2PAttribute("; + if (data->args.hipDeviceGetP2PAttribute.value == NULL) oss << "value=NULL"; + else { oss << "value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetP2PAttribute.value__val); } + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetP2PAttribute.attr); + oss << ", srcDevice="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetP2PAttribute.srcDevice); + oss << ", dstDevice="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetP2PAttribute.dstDevice); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetPCIBusId: + oss << "hipDeviceGetPCIBusId("; + if (data->args.hipDeviceGetPCIBusId.pciBusId == NULL) oss << "pciBusId=NULL"; + else { oss << "pciBusId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetPCIBusId.pciBusId__val); } + oss << ", len="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetPCIBusId.len); + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetPCIBusId.device); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetSharedMemConfig: + oss << "hipDeviceGetSharedMemConfig("; + if (data->args.hipDeviceGetSharedMemConfig.pConfig == NULL) oss << "pConfig=NULL"; + else { oss << "pConfig="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetSharedMemConfig.pConfig__val); } + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetStreamPriorityRange: + oss << "hipDeviceGetStreamPriorityRange("; + if (data->args.hipDeviceGetStreamPriorityRange.leastPriority == NULL) oss << "leastPriority=NULL"; + else { oss << "leastPriority="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetStreamPriorityRange.leastPriority__val); } + if (data->args.hipDeviceGetStreamPriorityRange.greatestPriority == NULL) oss << ", greatestPriority=NULL"; + else { oss << ", greatestPriority="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetStreamPriorityRange.greatestPriority__val); } + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetUuid: + oss << "hipDeviceGetUuid("; + if (data->args.hipDeviceGetUuid.uuid == NULL) oss << "uuid=NULL"; + else { oss << "uuid="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetUuid.uuid__val); } + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetUuid.device); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGraphMemTrim: + oss << "hipDeviceGraphMemTrim("; + oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGraphMemTrim.device); + oss << ")"; + break; + case HIP_API_ID_hipDevicePrimaryCtxGetState: + oss << "hipDevicePrimaryCtxGetState("; + oss << "dev="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxGetState.dev); + if (data->args.hipDevicePrimaryCtxGetState.flags == NULL) oss << ", flags=NULL"; + else { oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxGetState.flags__val); } + if (data->args.hipDevicePrimaryCtxGetState.active == NULL) oss << ", active=NULL"; + else { oss << ", active="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxGetState.active__val); } + oss << ")"; + break; + case HIP_API_ID_hipDevicePrimaryCtxRelease: + oss << "hipDevicePrimaryCtxRelease("; + oss << "dev="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxRelease.dev); + oss << ")"; + break; + case HIP_API_ID_hipDevicePrimaryCtxReset: + oss << "hipDevicePrimaryCtxReset("; + oss << "dev="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxReset.dev); + oss << ")"; + break; + case HIP_API_ID_hipDevicePrimaryCtxRetain: + oss << "hipDevicePrimaryCtxRetain("; + if (data->args.hipDevicePrimaryCtxRetain.pctx == NULL) oss << "pctx=NULL"; + else { oss << "pctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxRetain.pctx__val); } + oss << ", dev="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxRetain.dev); + oss << ")"; + break; + case HIP_API_ID_hipDevicePrimaryCtxSetFlags: + oss << "hipDevicePrimaryCtxSetFlags("; + oss << "dev="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxSetFlags.dev); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxSetFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipDeviceReset: + oss << "hipDeviceReset("; + oss << ")"; + break; + case HIP_API_ID_hipDeviceSetCacheConfig: + oss << "hipDeviceSetCacheConfig("; + oss << "cacheConfig="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetCacheConfig.cacheConfig); + oss << ")"; + break; + case HIP_API_ID_hipDeviceSetGraphMemAttribute: + oss << "hipDeviceSetGraphMemAttribute("; + oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetGraphMemAttribute.device); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetGraphMemAttribute.attr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetGraphMemAttribute.value); + oss << ")"; + break; + case HIP_API_ID_hipDeviceSetLimit: + oss << "hipDeviceSetLimit("; + oss << "limit="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetLimit.limit); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetLimit.value); + oss << ")"; + break; + case HIP_API_ID_hipDeviceSetMemPool: + oss << "hipDeviceSetMemPool("; + oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetMemPool.device); + oss << ", mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetMemPool.mem_pool); + oss << ")"; + break; + case HIP_API_ID_hipDeviceSetSharedMemConfig: + oss << "hipDeviceSetSharedMemConfig("; + oss << "config="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetSharedMemConfig.config); + oss << ")"; + break; + case HIP_API_ID_hipDeviceSynchronize: + oss << "hipDeviceSynchronize("; + oss << ")"; + break; + case HIP_API_ID_hipDeviceTotalMem: + oss << "hipDeviceTotalMem("; + if (data->args.hipDeviceTotalMem.bytes == NULL) oss << "bytes=NULL"; + else { oss << "bytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceTotalMem.bytes__val); } + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceTotalMem.device); + oss << ")"; + break; + case HIP_API_ID_hipDriverGetVersion: + oss << "hipDriverGetVersion("; + if (data->args.hipDriverGetVersion.driverVersion == NULL) oss << "driverVersion=NULL"; + else { oss << "driverVersion="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDriverGetVersion.driverVersion__val); } + oss << ")"; + break; + case HIP_API_ID_hipDrvGraphAddMemFreeNode: + oss << "hipDrvGraphAddMemFreeNode("; + if (data->args.hipDrvGraphAddMemFreeNode.phGraphNode == NULL) oss << "phGraphNode=NULL"; + else { oss << "phGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemFreeNode.phGraphNode__val); } + oss << ", hGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemFreeNode.hGraph); + if (data->args.hipDrvGraphAddMemFreeNode.dependencies == NULL) oss << ", dependencies=NULL"; + else { oss << ", dependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemFreeNode.dependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemFreeNode.numDependencies); + oss << ", dptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemFreeNode.dptr); + oss << ")"; + break; + case HIP_API_ID_hipDrvGraphAddMemcpyNode: + oss << "hipDrvGraphAddMemcpyNode("; + if (data->args.hipDrvGraphAddMemcpyNode.phGraphNode == NULL) oss << "phGraphNode=NULL"; + else { oss << "phGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemcpyNode.phGraphNode__val); } + oss << ", hGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemcpyNode.hGraph); + if (data->args.hipDrvGraphAddMemcpyNode.dependencies == NULL) oss << ", dependencies=NULL"; + else { oss << ", dependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemcpyNode.dependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemcpyNode.numDependencies); + if (data->args.hipDrvGraphAddMemcpyNode.copyParams == NULL) oss << ", copyParams=NULL"; + else { oss << ", copyParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemcpyNode.copyParams__val); } + oss << ", ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemcpyNode.ctx); + oss << ")"; + break; + case HIP_API_ID_hipDrvGraphAddMemsetNode: + oss << "hipDrvGraphAddMemsetNode("; + if (data->args.hipDrvGraphAddMemsetNode.phGraphNode == NULL) oss << "phGraphNode=NULL"; + else { oss << "phGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemsetNode.phGraphNode__val); } + oss << ", hGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemsetNode.hGraph); + if (data->args.hipDrvGraphAddMemsetNode.dependencies == NULL) oss << ", dependencies=NULL"; + else { oss << ", dependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemsetNode.dependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemsetNode.numDependencies); + if (data->args.hipDrvGraphAddMemsetNode.memsetParams == NULL) oss << ", memsetParams=NULL"; + else { oss << ", memsetParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemsetNode.memsetParams__val); } + oss << ", ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemsetNode.ctx); + oss << ")"; + break; + case HIP_API_ID_hipDrvGraphExecMemcpyNodeSetParams: + oss << "hipDrvGraphExecMemcpyNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphExecMemcpyNodeSetParams.hGraphExec); + oss << ", hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphExecMemcpyNodeSetParams.hNode); + if (data->args.hipDrvGraphExecMemcpyNodeSetParams.copyParams == NULL) oss << ", copyParams=NULL"; + else { oss << ", copyParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphExecMemcpyNodeSetParams.copyParams__val); } + oss << ", ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphExecMemcpyNodeSetParams.ctx); + oss << ")"; + break; + case HIP_API_ID_hipDrvGraphExecMemsetNodeSetParams: + oss << "hipDrvGraphExecMemsetNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphExecMemsetNodeSetParams.hGraphExec); + oss << ", hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphExecMemsetNodeSetParams.hNode); + if (data->args.hipDrvGraphExecMemsetNodeSetParams.memsetParams == NULL) oss << ", memsetParams=NULL"; + else { oss << ", memsetParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphExecMemsetNodeSetParams.memsetParams__val); } + oss << ", ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphExecMemsetNodeSetParams.ctx); + oss << ")"; + break; + case HIP_API_ID_hipDrvGraphMemcpyNodeGetParams: + oss << "hipDrvGraphMemcpyNodeGetParams("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphMemcpyNodeGetParams.hNode); + if (data->args.hipDrvGraphMemcpyNodeGetParams.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphMemcpyNodeGetParams.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipDrvGraphMemcpyNodeSetParams: + oss << "hipDrvGraphMemcpyNodeSetParams("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphMemcpyNodeSetParams.hNode); + if (data->args.hipDrvGraphMemcpyNodeSetParams.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphMemcpyNodeSetParams.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipDrvLaunchKernelEx: + oss << "hipDrvLaunchKernelEx("; + if (data->args.hipDrvLaunchKernelEx.config == NULL) oss << "config=NULL"; + else { oss << "config="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvLaunchKernelEx.config__val); } + oss << ", f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvLaunchKernelEx.f); + if (data->args.hipDrvLaunchKernelEx.params == NULL) oss << ", params=NULL"; + else { oss << ", params="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvLaunchKernelEx.params__val); } + if (data->args.hipDrvLaunchKernelEx.extra == NULL) oss << ", extra=NULL"; + else { oss << ", extra="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvLaunchKernelEx.extra__val); } + oss << ")"; + break; + case HIP_API_ID_hipDrvMemcpy2DUnaligned: + oss << "hipDrvMemcpy2DUnaligned("; + if (data->args.hipDrvMemcpy2DUnaligned.pCopy == NULL) oss << "pCopy=NULL"; + else { oss << "pCopy="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvMemcpy2DUnaligned.pCopy__val); } + oss << ")"; + break; + case HIP_API_ID_hipDrvMemcpy3D: + oss << "hipDrvMemcpy3D("; + if (data->args.hipDrvMemcpy3D.pCopy == NULL) oss << "pCopy=NULL"; + else { oss << "pCopy="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvMemcpy3D.pCopy__val); } + oss << ")"; + break; + case HIP_API_ID_hipDrvMemcpy3DAsync: + oss << "hipDrvMemcpy3DAsync("; + if (data->args.hipDrvMemcpy3DAsync.pCopy == NULL) oss << "pCopy=NULL"; + else { oss << "pCopy="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvMemcpy3DAsync.pCopy__val); } + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvMemcpy3DAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipDrvPointerGetAttributes: + oss << "hipDrvPointerGetAttributes("; + oss << "numAttributes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvPointerGetAttributes.numAttributes); + if (data->args.hipDrvPointerGetAttributes.attributes == NULL) oss << ", attributes=NULL"; + else { oss << ", attributes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvPointerGetAttributes.attributes__val); } + if (data->args.hipDrvPointerGetAttributes.data == NULL) oss << ", data=NULL"; + else { oss << ", data="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvPointerGetAttributes.data__val); } + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvPointerGetAttributes.ptr); + oss << ")"; + break; + case HIP_API_ID_hipEventCreate: + oss << "hipEventCreate("; + if (data->args.hipEventCreate.event == NULL) oss << "event=NULL"; + else { oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventCreate.event__val); } + oss << ")"; + break; + case HIP_API_ID_hipEventCreateWithFlags: + oss << "hipEventCreateWithFlags("; + if (data->args.hipEventCreateWithFlags.event == NULL) oss << "event=NULL"; + else { oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventCreateWithFlags.event__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventCreateWithFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipEventDestroy: + oss << "hipEventDestroy("; + oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventDestroy.event); + oss << ")"; + break; + case HIP_API_ID_hipEventElapsedTime: + oss << "hipEventElapsedTime("; + if (data->args.hipEventElapsedTime.ms == NULL) oss << "ms=NULL"; + else { oss << "ms="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventElapsedTime.ms__val); } + oss << ", start="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventElapsedTime.start); + oss << ", stop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventElapsedTime.stop); + oss << ")"; + break; + case HIP_API_ID_hipEventQuery: + oss << "hipEventQuery("; + oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventQuery.event); + oss << ")"; + break; + case HIP_API_ID_hipEventRecord: + oss << "hipEventRecord("; + oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventRecord.event); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventRecord.stream); + oss << ")"; + break; + case HIP_API_ID_hipEventRecordWithFlags: + oss << "hipEventRecordWithFlags("; + oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventRecordWithFlags.event); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventRecordWithFlags.stream); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventRecordWithFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipEventSynchronize: + oss << "hipEventSynchronize("; + oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventSynchronize.event); + oss << ")"; + break; + case HIP_API_ID_hipExtGetLastError: + oss << "hipExtGetLastError("; + oss << ")"; + break; + case HIP_API_ID_hipExtGetLinkTypeAndHopCount: + oss << "hipExtGetLinkTypeAndHopCount("; + oss << "device1="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtGetLinkTypeAndHopCount.device1); + oss << ", device2="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtGetLinkTypeAndHopCount.device2); + if (data->args.hipExtGetLinkTypeAndHopCount.linktype == NULL) oss << ", linktype=NULL"; + else { oss << ", linktype="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtGetLinkTypeAndHopCount.linktype__val); } + if (data->args.hipExtGetLinkTypeAndHopCount.hopcount == NULL) oss << ", hopcount=NULL"; + else { oss << ", hopcount="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtGetLinkTypeAndHopCount.hopcount__val); } + oss << ")"; + break; + case HIP_API_ID_hipExtLaunchKernel: + oss << "hipExtLaunchKernel("; + oss << "function_address="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.function_address); + oss << ", numBlocks="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.numBlocks); + oss << ", dimBlocks="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.dimBlocks); + if (data->args.hipExtLaunchKernel.args == NULL) oss << ", args=NULL"; + else { oss << ", args="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.args__val); } + oss << ", sharedMemBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.sharedMemBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.stream); + oss << ", startEvent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.startEvent); + oss << ", stopEvent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.stopEvent); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.flags); + oss << ")"; + break; + case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: + oss << "hipExtLaunchMultiKernelMultiDevice("; + if (data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList == NULL) oss << "launchParamsList=NULL"; + else { oss << "launchParamsList="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList__val); } + oss << ", numDevices="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchMultiKernelMultiDevice.numDevices); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchMultiKernelMultiDevice.flags); + oss << ")"; + break; + case HIP_API_ID_hipExtMallocWithFlags: + oss << "hipExtMallocWithFlags("; + if (data->args.hipExtMallocWithFlags.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtMallocWithFlags.ptr__val); } + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtMallocWithFlags.sizeBytes); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtMallocWithFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipExtModuleLaunchKernel: + oss << "hipExtModuleLaunchKernel("; + oss << "f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.f); + oss << ", globalWorkSizeX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.globalWorkSizeX); + oss << ", globalWorkSizeY="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.globalWorkSizeY); + oss << ", globalWorkSizeZ="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.globalWorkSizeZ); + oss << ", localWorkSizeX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.localWorkSizeX); + oss << ", localWorkSizeY="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.localWorkSizeY); + oss << ", localWorkSizeZ="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.localWorkSizeZ); + oss << ", sharedMemBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.sharedMemBytes); + oss << ", hStream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.hStream); + if (data->args.hipExtModuleLaunchKernel.kernelParams == NULL) oss << ", kernelParams=NULL"; + else { oss << ", kernelParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.kernelParams__val); } + if (data->args.hipExtModuleLaunchKernel.extra == NULL) oss << ", extra=NULL"; + else { oss << ", extra="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.extra__val); } + oss << ", startEvent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.startEvent); + oss << ", stopEvent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.stopEvent); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.flags); + oss << ")"; + break; + case HIP_API_ID_hipExtStreamCreateWithCUMask: + oss << "hipExtStreamCreateWithCUMask("; + if (data->args.hipExtStreamCreateWithCUMask.stream == NULL) oss << "stream=NULL"; + else { oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtStreamCreateWithCUMask.stream__val); } + oss << ", cuMaskSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtStreamCreateWithCUMask.cuMaskSize); + if (data->args.hipExtStreamCreateWithCUMask.cuMask == NULL) oss << ", cuMask=NULL"; + else { oss << ", cuMask="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtStreamCreateWithCUMask.cuMask__val); } + oss << ")"; + break; + case HIP_API_ID_hipExtStreamGetCUMask: + oss << "hipExtStreamGetCUMask("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtStreamGetCUMask.stream); + oss << ", cuMaskSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtStreamGetCUMask.cuMaskSize); + if (data->args.hipExtStreamGetCUMask.cuMask == NULL) oss << ", cuMask=NULL"; + else { oss << ", cuMask="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtStreamGetCUMask.cuMask__val); } + oss << ")"; + break; + case HIP_API_ID_hipExternalMemoryGetMappedBuffer: + oss << "hipExternalMemoryGetMappedBuffer("; + if (data->args.hipExternalMemoryGetMappedBuffer.devPtr == NULL) oss << "devPtr=NULL"; + else { oss << "devPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedBuffer.devPtr__val); } + oss << ", extMem="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedBuffer.extMem); + if (data->args.hipExternalMemoryGetMappedBuffer.bufferDesc == NULL) oss << ", bufferDesc=NULL"; + else { oss << ", bufferDesc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedBuffer.bufferDesc__val); } + oss << ")"; + break; + case HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray: + oss << "hipExternalMemoryGetMappedMipmappedArray("; + if (data->args.hipExternalMemoryGetMappedMipmappedArray.mipmap == NULL) oss << "mipmap=NULL"; + else { oss << "mipmap="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedMipmappedArray.mipmap__val); } + oss << ", extMem="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedMipmappedArray.extMem); + if (data->args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc == NULL) oss << ", mipmapDesc=NULL"; + else { oss << ", mipmapDesc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc__val); } + oss << ")"; + break; + case HIP_API_ID_hipFree: + oss << "hipFree("; + oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFree.ptr); + oss << ")"; + break; + case HIP_API_ID_hipFreeArray: + oss << "hipFreeArray("; + oss << "array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFreeArray.array); + oss << ")"; + break; + case HIP_API_ID_hipFreeAsync: + oss << "hipFreeAsync("; + oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFreeAsync.dev_ptr); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFreeAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipFreeHost: + oss << "hipFreeHost("; + oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFreeHost.ptr); + oss << ")"; + break; + case HIP_API_ID_hipFreeMipmappedArray: + oss << "hipFreeMipmappedArray("; + oss << "mipmappedArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFreeMipmappedArray.mipmappedArray); + oss << ")"; + break; + case HIP_API_ID_hipFuncGetAttribute: + oss << "hipFuncGetAttribute("; + if (data->args.hipFuncGetAttribute.value == NULL) oss << "value=NULL"; + else { oss << "value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncGetAttribute.value__val); } + oss << ", attrib="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncGetAttribute.attrib); + oss << ", hfunc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncGetAttribute.hfunc); + oss << ")"; + break; + case HIP_API_ID_hipFuncGetAttributes: + oss << "hipFuncGetAttributes("; + if (data->args.hipFuncGetAttributes.attr == NULL) oss << "attr=NULL"; + else { oss << "attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncGetAttributes.attr__val); } + oss << ", func="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncGetAttributes.func); + oss << ")"; + break; + case HIP_API_ID_hipFuncSetAttribute: + oss << "hipFuncSetAttribute("; + oss << "func="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncSetAttribute.func); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncSetAttribute.attr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncSetAttribute.value); + oss << ")"; + break; + case HIP_API_ID_hipFuncSetCacheConfig: + oss << "hipFuncSetCacheConfig("; + oss << "func="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncSetCacheConfig.func); + oss << ", config="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncSetCacheConfig.config); + oss << ")"; + break; + case HIP_API_ID_hipFuncSetSharedMemConfig: + oss << "hipFuncSetSharedMemConfig("; + oss << "func="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncSetSharedMemConfig.func); + oss << ", config="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncSetSharedMemConfig.config); + oss << ")"; + break; + case HIP_API_ID_hipGLGetDevices: + oss << "hipGLGetDevices("; + if (data->args.hipGLGetDevices.pHipDeviceCount == NULL) oss << "pHipDeviceCount=NULL"; + else { oss << "pHipDeviceCount="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGLGetDevices.pHipDeviceCount__val); } + if (data->args.hipGLGetDevices.pHipDevices == NULL) oss << ", pHipDevices=NULL"; + else { oss << ", pHipDevices="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGLGetDevices.pHipDevices__val); } + oss << ", hipDeviceCount="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGLGetDevices.hipDeviceCount); + oss << ", deviceList="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGLGetDevices.deviceList); + oss << ")"; + break; + case HIP_API_ID_hipGetChannelDesc: + oss << "hipGetChannelDesc("; + if (data->args.hipGetChannelDesc.desc == NULL) oss << "desc=NULL"; + else { oss << "desc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetChannelDesc.desc__val); } + oss << ", array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetChannelDesc.array); + oss << ")"; + break; + case HIP_API_ID_hipGetDevice: + oss << "hipGetDevice("; + if (data->args.hipGetDevice.deviceId == NULL) oss << "deviceId=NULL"; + else { oss << "deviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDevice.deviceId__val); } + oss << ")"; + break; + case HIP_API_ID_hipGetDeviceCount: + oss << "hipGetDeviceCount("; + if (data->args.hipGetDeviceCount.count == NULL) oss << "count=NULL"; + else { oss << "count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDeviceCount.count__val); } + oss << ")"; + break; + case HIP_API_ID_hipGetDeviceFlags: + oss << "hipGetDeviceFlags("; + if (data->args.hipGetDeviceFlags.flags == NULL) oss << "flags=NULL"; + else { oss << "flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDeviceFlags.flags__val); } + oss << ")"; + break; + case HIP_API_ID_hipGetDevicePropertiesR0000: + oss << "hipGetDevicePropertiesR0000("; + if (data->args.hipGetDevicePropertiesR0000.prop == NULL) oss << "prop=NULL"; + else { oss << "prop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDevicePropertiesR0000.prop__val); } + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDevicePropertiesR0000.device); + oss << ")"; + break; + case HIP_API_ID_hipGetDevicePropertiesR0600: + oss << "hipGetDevicePropertiesR0600("; + if (data->args.hipGetDevicePropertiesR0600.prop == NULL) oss << "prop=NULL"; + else { oss << "prop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDevicePropertiesR0600.prop__val); } + oss << ", deviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDevicePropertiesR0600.deviceId); + oss << ")"; + break; + case HIP_API_ID_hipGetDriverEntryPoint: + oss << "hipGetDriverEntryPoint("; + if (data->args.hipGetDriverEntryPoint.symbol == NULL) oss << "symbol=NULL"; + else { oss << "symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDriverEntryPoint.symbol__val); } + if (data->args.hipGetDriverEntryPoint.funcPtr == NULL) oss << ", funcPtr=NULL"; + else { oss << ", funcPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDriverEntryPoint.funcPtr__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDriverEntryPoint.flags); + if (data->args.hipGetDriverEntryPoint.driverStatus == NULL) oss << ", driverStatus=NULL"; + else { oss << ", driverStatus="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDriverEntryPoint.driverStatus__val); } + oss << ")"; + break; + case HIP_API_ID_hipGetFuncBySymbol: + oss << "hipGetFuncBySymbol("; + if (data->args.hipGetFuncBySymbol.functionPtr == NULL) oss << "functionPtr=NULL"; + else { oss << "functionPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetFuncBySymbol.functionPtr__val); } + oss << ", symbolPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetFuncBySymbol.symbolPtr); + oss << ")"; + break; + case HIP_API_ID_hipGetLastError: + oss << "hipGetLastError("; + oss << ")"; + break; + case HIP_API_ID_hipGetMipmappedArrayLevel: + oss << "hipGetMipmappedArrayLevel("; + if (data->args.hipGetMipmappedArrayLevel.levelArray == NULL) oss << "levelArray=NULL"; + else { oss << "levelArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetMipmappedArrayLevel.levelArray__val); } + oss << ", mipmappedArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetMipmappedArrayLevel.mipmappedArray); + oss << ", level="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetMipmappedArrayLevel.level); + oss << ")"; + break; + case HIP_API_ID_hipGetProcAddress: + oss << "hipGetProcAddress("; + if (data->args.hipGetProcAddress.symbol == NULL) oss << "symbol=NULL"; + else { oss << "symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetProcAddress.symbol__val); } + if (data->args.hipGetProcAddress.pfn == NULL) oss << ", pfn=NULL"; + else { oss << ", pfn="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetProcAddress.pfn__val); } + oss << ", hipVersion="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetProcAddress.hipVersion); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetProcAddress.flags); + if (data->args.hipGetProcAddress.symbolStatus == NULL) oss << ", symbolStatus=NULL"; + else { oss << ", symbolStatus="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetProcAddress.symbolStatus__val); } + oss << ")"; + break; + case HIP_API_ID_hipGetSymbolAddress: + oss << "hipGetSymbolAddress("; + if (data->args.hipGetSymbolAddress.devPtr == NULL) oss << "devPtr=NULL"; + else { oss << "devPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetSymbolAddress.devPtr__val); } + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetSymbolAddress.symbol); + oss << ")"; + break; + case HIP_API_ID_hipGetSymbolSize: + oss << "hipGetSymbolSize("; + if (data->args.hipGetSymbolSize.size == NULL) oss << "size=NULL"; + else { oss << "size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetSymbolSize.size__val); } + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetSymbolSize.symbol); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddBatchMemOpNode: + oss << "hipGraphAddBatchMemOpNode("; + if (data->args.hipGraphAddBatchMemOpNode.phGraphNode == NULL) oss << "phGraphNode=NULL"; + else { oss << "phGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddBatchMemOpNode.phGraphNode__val); } + oss << ", hGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddBatchMemOpNode.hGraph); + if (data->args.hipGraphAddBatchMemOpNode.dependencies == NULL) oss << ", dependencies=NULL"; + else { oss << ", dependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddBatchMemOpNode.dependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddBatchMemOpNode.numDependencies); + if (data->args.hipGraphAddBatchMemOpNode.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddBatchMemOpNode.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphAddChildGraphNode: + oss << "hipGraphAddChildGraphNode("; + if (data->args.hipGraphAddChildGraphNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddChildGraphNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddChildGraphNode.graph); + if (data->args.hipGraphAddChildGraphNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddChildGraphNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddChildGraphNode.numDependencies); + oss << ", childGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddChildGraphNode.childGraph); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddDependencies: + oss << "hipGraphAddDependencies("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddDependencies.graph); + if (data->args.hipGraphAddDependencies.from == NULL) oss << ", from=NULL"; + else { oss << ", from="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddDependencies.from__val); } + if (data->args.hipGraphAddDependencies.to == NULL) oss << ", to=NULL"; + else { oss << ", to="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddDependencies.to__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddDependencies.numDependencies); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddEmptyNode: + oss << "hipGraphAddEmptyNode("; + if (data->args.hipGraphAddEmptyNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEmptyNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEmptyNode.graph); + if (data->args.hipGraphAddEmptyNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEmptyNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEmptyNode.numDependencies); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddEventRecordNode: + oss << "hipGraphAddEventRecordNode("; + if (data->args.hipGraphAddEventRecordNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventRecordNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventRecordNode.graph); + if (data->args.hipGraphAddEventRecordNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventRecordNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventRecordNode.numDependencies); + oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventRecordNode.event); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddEventWaitNode: + oss << "hipGraphAddEventWaitNode("; + if (data->args.hipGraphAddEventWaitNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventWaitNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventWaitNode.graph); + if (data->args.hipGraphAddEventWaitNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventWaitNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventWaitNode.numDependencies); + oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventWaitNode.event); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddExternalSemaphoresSignalNode: + oss << "hipGraphAddExternalSemaphoresSignalNode("; + if (data->args.hipGraphAddExternalSemaphoresSignalNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresSignalNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresSignalNode.graph); + if (data->args.hipGraphAddExternalSemaphoresSignalNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresSignalNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresSignalNode.numDependencies); + if (data->args.hipGraphAddExternalSemaphoresSignalNode.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresSignalNode.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphAddExternalSemaphoresWaitNode: + oss << "hipGraphAddExternalSemaphoresWaitNode("; + if (data->args.hipGraphAddExternalSemaphoresWaitNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresWaitNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresWaitNode.graph); + if (data->args.hipGraphAddExternalSemaphoresWaitNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresWaitNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresWaitNode.numDependencies); + if (data->args.hipGraphAddExternalSemaphoresWaitNode.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresWaitNode.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphAddHostNode: + oss << "hipGraphAddHostNode("; + if (data->args.hipGraphAddHostNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddHostNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddHostNode.graph); + if (data->args.hipGraphAddHostNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddHostNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddHostNode.numDependencies); + if (data->args.hipGraphAddHostNode.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddHostNode.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphAddKernelNode: + oss << "hipGraphAddKernelNode("; + if (data->args.hipGraphAddKernelNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddKernelNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddKernelNode.graph); + if (data->args.hipGraphAddKernelNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddKernelNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddKernelNode.numDependencies); + if (data->args.hipGraphAddKernelNode.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddKernelNode.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphAddMemAllocNode: + oss << "hipGraphAddMemAllocNode("; + if (data->args.hipGraphAddMemAllocNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemAllocNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemAllocNode.graph); + if (data->args.hipGraphAddMemAllocNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemAllocNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemAllocNode.numDependencies); + if (data->args.hipGraphAddMemAllocNode.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemAllocNode.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphAddMemFreeNode: + oss << "hipGraphAddMemFreeNode("; + if (data->args.hipGraphAddMemFreeNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemFreeNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemFreeNode.graph); + if (data->args.hipGraphAddMemFreeNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemFreeNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemFreeNode.numDependencies); + oss << ", dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemFreeNode.dev_ptr); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddMemcpyNode: + oss << "hipGraphAddMemcpyNode("; + if (data->args.hipGraphAddMemcpyNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode.graph); + if (data->args.hipGraphAddMemcpyNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode.numDependencies); + if (data->args.hipGraphAddMemcpyNode.pCopyParams == NULL) oss << ", pCopyParams=NULL"; + else { oss << ", pCopyParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode.pCopyParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphAddMemcpyNode1D: + oss << "hipGraphAddMemcpyNode1D("; + if (data->args.hipGraphAddMemcpyNode1D.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode1D.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode1D.graph); + if (data->args.hipGraphAddMemcpyNode1D.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode1D.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode1D.numDependencies); + oss << ", dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode1D.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode1D.src); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode1D.count); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode1D.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddMemcpyNodeFromSymbol: + oss << "hipGraphAddMemcpyNodeFromSymbol("; + if (data->args.hipGraphAddMemcpyNodeFromSymbol.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.graph); + if (data->args.hipGraphAddMemcpyNodeFromSymbol.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.numDependencies); + oss << ", dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.dst); + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.symbol); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.count); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddMemcpyNodeToSymbol: + oss << "hipGraphAddMemcpyNodeToSymbol("; + if (data->args.hipGraphAddMemcpyNodeToSymbol.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.graph); + if (data->args.hipGraphAddMemcpyNodeToSymbol.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.numDependencies); + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.symbol); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.src); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.count); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddMemsetNode: + oss << "hipGraphAddMemsetNode("; + if (data->args.hipGraphAddMemsetNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemsetNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemsetNode.graph); + if (data->args.hipGraphAddMemsetNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemsetNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemsetNode.numDependencies); + if (data->args.hipGraphAddMemsetNode.pMemsetParams == NULL) oss << ", pMemsetParams=NULL"; + else { oss << ", pMemsetParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemsetNode.pMemsetParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphAddNode: + oss << "hipGraphAddNode("; + if (data->args.hipGraphAddNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddNode.graph); + if (data->args.hipGraphAddNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddNode.numDependencies); + if (data->args.hipGraphAddNode.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddNode.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphBatchMemOpNodeGetParams: + oss << "hipGraphBatchMemOpNodeGetParams("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphBatchMemOpNodeGetParams.hNode); + if (data->args.hipGraphBatchMemOpNodeGetParams.nodeParams_out == NULL) oss << ", nodeParams_out=NULL"; + else { oss << ", nodeParams_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphBatchMemOpNodeGetParams.nodeParams_out__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphBatchMemOpNodeSetParams: + oss << "hipGraphBatchMemOpNodeSetParams("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphBatchMemOpNodeSetParams.hNode); + if (data->args.hipGraphBatchMemOpNodeSetParams.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphBatchMemOpNodeSetParams.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphChildGraphNodeGetGraph: + oss << "hipGraphChildGraphNodeGetGraph("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphChildGraphNodeGetGraph.node); + if (data->args.hipGraphChildGraphNodeGetGraph.pGraph == NULL) oss << ", pGraph=NULL"; + else { oss << ", pGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphChildGraphNodeGetGraph.pGraph__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphClone: + oss << "hipGraphClone("; + if (data->args.hipGraphClone.pGraphClone == NULL) oss << "pGraphClone=NULL"; + else { oss << "pGraphClone="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphClone.pGraphClone__val); } + oss << ", originalGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphClone.originalGraph); + oss << ")"; + break; + case HIP_API_ID_hipGraphCreate: + oss << "hipGraphCreate("; + if (data->args.hipGraphCreate.pGraph == NULL) oss << "pGraph=NULL"; + else { oss << "pGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphCreate.pGraph__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphCreate.flags); + oss << ")"; + break; + case HIP_API_ID_hipGraphDebugDotPrint: + oss << "hipGraphDebugDotPrint("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphDebugDotPrint.graph); + if (data->args.hipGraphDebugDotPrint.path == NULL) oss << ", path=NULL"; + else { oss << ", path="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphDebugDotPrint.path__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphDebugDotPrint.flags); + oss << ")"; + break; + case HIP_API_ID_hipGraphDestroy: + oss << "hipGraphDestroy("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphDestroy.graph); + oss << ")"; + break; + case HIP_API_ID_hipGraphDestroyNode: + oss << "hipGraphDestroyNode("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphDestroyNode.node); + oss << ")"; + break; + case HIP_API_ID_hipGraphEventRecordNodeGetEvent: + oss << "hipGraphEventRecordNodeGetEvent("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphEventRecordNodeGetEvent.node); + if (data->args.hipGraphEventRecordNodeGetEvent.event_out == NULL) oss << ", event_out=NULL"; + else { oss << ", event_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphEventRecordNodeGetEvent.event_out__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphEventRecordNodeSetEvent: + oss << "hipGraphEventRecordNodeSetEvent("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphEventRecordNodeSetEvent.node); + oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphEventRecordNodeSetEvent.event); + oss << ")"; + break; + case HIP_API_ID_hipGraphEventWaitNodeGetEvent: + oss << "hipGraphEventWaitNodeGetEvent("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphEventWaitNodeGetEvent.node); + if (data->args.hipGraphEventWaitNodeGetEvent.event_out == NULL) oss << ", event_out=NULL"; + else { oss << ", event_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphEventWaitNodeGetEvent.event_out__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphEventWaitNodeSetEvent: + oss << "hipGraphEventWaitNodeSetEvent("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphEventWaitNodeSetEvent.node); + oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphEventWaitNodeSetEvent.event); + oss << ")"; + break; + case HIP_API_ID_hipGraphExecBatchMemOpNodeSetParams: + oss << "hipGraphExecBatchMemOpNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecBatchMemOpNodeSetParams.hGraphExec); + oss << ", hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecBatchMemOpNodeSetParams.hNode); + if (data->args.hipGraphExecBatchMemOpNodeSetParams.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecBatchMemOpNodeSetParams.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExecChildGraphNodeSetParams: + oss << "hipGraphExecChildGraphNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecChildGraphNodeSetParams.hGraphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecChildGraphNodeSetParams.node); + oss << ", childGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecChildGraphNodeSetParams.childGraph); + oss << ")"; + break; + case HIP_API_ID_hipGraphExecDestroy: + oss << "hipGraphExecDestroy("; + oss << "graphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecDestroy.graphExec); + oss << ")"; + break; + case HIP_API_ID_hipGraphExecEventRecordNodeSetEvent: + oss << "hipGraphExecEventRecordNodeSetEvent("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecEventRecordNodeSetEvent.hGraphExec); + oss << ", hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecEventRecordNodeSetEvent.hNode); + oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecEventRecordNodeSetEvent.event); + oss << ")"; + break; + case HIP_API_ID_hipGraphExecEventWaitNodeSetEvent: + oss << "hipGraphExecEventWaitNodeSetEvent("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecEventWaitNodeSetEvent.hGraphExec); + oss << ", hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecEventWaitNodeSetEvent.hNode); + oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecEventWaitNodeSetEvent.event); + oss << ")"; + break; + case HIP_API_ID_hipGraphExecExternalSemaphoresSignalNodeSetParams: + oss << "hipGraphExecExternalSemaphoresSignalNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecExternalSemaphoresSignalNodeSetParams.hGraphExec); + oss << ", hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecExternalSemaphoresSignalNodeSetParams.hNode); + if (data->args.hipGraphExecExternalSemaphoresSignalNodeSetParams.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecExternalSemaphoresSignalNodeSetParams.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExecExternalSemaphoresWaitNodeSetParams: + oss << "hipGraphExecExternalSemaphoresWaitNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecExternalSemaphoresWaitNodeSetParams.hGraphExec); + oss << ", hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecExternalSemaphoresWaitNodeSetParams.hNode); + if (data->args.hipGraphExecExternalSemaphoresWaitNodeSetParams.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecExternalSemaphoresWaitNodeSetParams.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExecGetFlags: + oss << "hipGraphExecGetFlags("; + oss << "graphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecGetFlags.graphExec); + if (data->args.hipGraphExecGetFlags.flags == NULL) oss << ", flags=NULL"; + else { oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecGetFlags.flags__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExecHostNodeSetParams: + oss << "hipGraphExecHostNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecHostNodeSetParams.hGraphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecHostNodeSetParams.node); + if (data->args.hipGraphExecHostNodeSetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecHostNodeSetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExecKernelNodeSetParams: + oss << "hipGraphExecKernelNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecKernelNodeSetParams.hGraphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecKernelNodeSetParams.node); + if (data->args.hipGraphExecKernelNodeSetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecKernelNodeSetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExecMemcpyNodeSetParams: + oss << "hipGraphExecMemcpyNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams.hGraphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams.node); + if (data->args.hipGraphExecMemcpyNodeSetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExecMemcpyNodeSetParams1D: + oss << "hipGraphExecMemcpyNodeSetParams1D("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams1D.hGraphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams1D.node); + oss << ", dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams1D.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams1D.src); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams1D.count); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams1D.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphExecMemcpyNodeSetParamsFromSymbol: + oss << "hipGraphExecMemcpyNodeSetParamsFromSymbol("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsFromSymbol.hGraphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsFromSymbol.node); + oss << ", dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsFromSymbol.dst); + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsFromSymbol.symbol); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsFromSymbol.count); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsFromSymbol.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsFromSymbol.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphExecMemcpyNodeSetParamsToSymbol: + oss << "hipGraphExecMemcpyNodeSetParamsToSymbol("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsToSymbol.hGraphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsToSymbol.node); + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsToSymbol.symbol); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsToSymbol.src); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsToSymbol.count); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsToSymbol.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsToSymbol.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphExecMemsetNodeSetParams: + oss << "hipGraphExecMemsetNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemsetNodeSetParams.hGraphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemsetNodeSetParams.node); + if (data->args.hipGraphExecMemsetNodeSetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemsetNodeSetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExecNodeSetParams: + oss << "hipGraphExecNodeSetParams("; + oss << "graphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecNodeSetParams.graphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecNodeSetParams.node); + if (data->args.hipGraphExecNodeSetParams.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecNodeSetParams.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExecUpdate: + oss << "hipGraphExecUpdate("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecUpdate.hGraphExec); + oss << ", hGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecUpdate.hGraph); + if (data->args.hipGraphExecUpdate.hErrorNode_out == NULL) oss << ", hErrorNode_out=NULL"; + else { oss << ", hErrorNode_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecUpdate.hErrorNode_out__val); } + if (data->args.hipGraphExecUpdate.updateResult_out == NULL) oss << ", updateResult_out=NULL"; + else { oss << ", updateResult_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecUpdate.updateResult_out__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExternalSemaphoresSignalNodeGetParams: + oss << "hipGraphExternalSemaphoresSignalNodeGetParams("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExternalSemaphoresSignalNodeGetParams.hNode); + if (data->args.hipGraphExternalSemaphoresSignalNodeGetParams.params_out == NULL) oss << ", params_out=NULL"; + else { oss << ", params_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExternalSemaphoresSignalNodeGetParams.params_out__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExternalSemaphoresSignalNodeSetParams: + oss << "hipGraphExternalSemaphoresSignalNodeSetParams("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExternalSemaphoresSignalNodeSetParams.hNode); + if (data->args.hipGraphExternalSemaphoresSignalNodeSetParams.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExternalSemaphoresSignalNodeSetParams.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExternalSemaphoresWaitNodeGetParams: + oss << "hipGraphExternalSemaphoresWaitNodeGetParams("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExternalSemaphoresWaitNodeGetParams.hNode); + if (data->args.hipGraphExternalSemaphoresWaitNodeGetParams.params_out == NULL) oss << ", params_out=NULL"; + else { oss << ", params_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExternalSemaphoresWaitNodeGetParams.params_out__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExternalSemaphoresWaitNodeSetParams: + oss << "hipGraphExternalSemaphoresWaitNodeSetParams("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExternalSemaphoresWaitNodeSetParams.hNode); + if (data->args.hipGraphExternalSemaphoresWaitNodeSetParams.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExternalSemaphoresWaitNodeSetParams.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphGetEdges: + oss << "hipGraphGetEdges("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetEdges.graph); + if (data->args.hipGraphGetEdges.from == NULL) oss << ", from=NULL"; + else { oss << ", from="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetEdges.from__val); } + if (data->args.hipGraphGetEdges.to == NULL) oss << ", to=NULL"; + else { oss << ", to="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetEdges.to__val); } + if (data->args.hipGraphGetEdges.numEdges == NULL) oss << ", numEdges=NULL"; + else { oss << ", numEdges="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetEdges.numEdges__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphGetNodes: + oss << "hipGraphGetNodes("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetNodes.graph); + if (data->args.hipGraphGetNodes.nodes == NULL) oss << ", nodes=NULL"; + else { oss << ", nodes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetNodes.nodes__val); } + if (data->args.hipGraphGetNodes.numNodes == NULL) oss << ", numNodes=NULL"; + else { oss << ", numNodes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetNodes.numNodes__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphGetRootNodes: + oss << "hipGraphGetRootNodes("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetRootNodes.graph); + if (data->args.hipGraphGetRootNodes.pRootNodes == NULL) oss << ", pRootNodes=NULL"; + else { oss << ", pRootNodes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetRootNodes.pRootNodes__val); } + if (data->args.hipGraphGetRootNodes.pNumRootNodes == NULL) oss << ", pNumRootNodes=NULL"; + else { oss << ", pNumRootNodes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetRootNodes.pNumRootNodes__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphHostNodeGetParams: + oss << "hipGraphHostNodeGetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphHostNodeGetParams.node); + if (data->args.hipGraphHostNodeGetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphHostNodeGetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphHostNodeSetParams: + oss << "hipGraphHostNodeSetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphHostNodeSetParams.node); + if (data->args.hipGraphHostNodeSetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphHostNodeSetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphInstantiate: + oss << "hipGraphInstantiate("; + if (data->args.hipGraphInstantiate.pGraphExec == NULL) oss << "pGraphExec=NULL"; + else { oss << "pGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiate.pGraphExec__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiate.graph); + if (data->args.hipGraphInstantiate.pErrorNode == NULL) oss << ", pErrorNode=NULL"; + else { oss << ", pErrorNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiate.pErrorNode__val); } + if (data->args.hipGraphInstantiate.pLogBuffer == NULL) oss << ", pLogBuffer=NULL"; + else { oss << ", pLogBuffer="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiate.pLogBuffer__val); } + oss << ", bufferSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiate.bufferSize); + oss << ")"; + break; + case HIP_API_ID_hipGraphInstantiateWithFlags: + oss << "hipGraphInstantiateWithFlags("; + if (data->args.hipGraphInstantiateWithFlags.pGraphExec == NULL) oss << "pGraphExec=NULL"; + else { oss << "pGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiateWithFlags.pGraphExec__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiateWithFlags.graph); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiateWithFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipGraphInstantiateWithParams: + oss << "hipGraphInstantiateWithParams("; + if (data->args.hipGraphInstantiateWithParams.pGraphExec == NULL) oss << "pGraphExec=NULL"; + else { oss << "pGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiateWithParams.pGraphExec__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiateWithParams.graph); + if (data->args.hipGraphInstantiateWithParams.instantiateParams == NULL) oss << ", instantiateParams=NULL"; + else { oss << ", instantiateParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiateWithParams.instantiateParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphKernelNodeCopyAttributes: + oss << "hipGraphKernelNodeCopyAttributes("; + oss << "hSrc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeCopyAttributes.hSrc); + oss << ", hDst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeCopyAttributes.hDst); + oss << ")"; + break; + case HIP_API_ID_hipGraphKernelNodeGetAttribute: + oss << "hipGraphKernelNodeGetAttribute("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeGetAttribute.hNode); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeGetAttribute.attr); + if (data->args.hipGraphKernelNodeGetAttribute.value == NULL) oss << ", value=NULL"; + else { oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeGetAttribute.value__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphKernelNodeGetParams: + oss << "hipGraphKernelNodeGetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeGetParams.node); + if (data->args.hipGraphKernelNodeGetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeGetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphKernelNodeSetAttribute: + oss << "hipGraphKernelNodeSetAttribute("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeSetAttribute.hNode); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeSetAttribute.attr); + if (data->args.hipGraphKernelNodeSetAttribute.value == NULL) oss << ", value=NULL"; + else { oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeSetAttribute.value__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphKernelNodeSetParams: + oss << "hipGraphKernelNodeSetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeSetParams.node); + if (data->args.hipGraphKernelNodeSetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeSetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphLaunch: + oss << "hipGraphLaunch("; + oss << "graphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphLaunch.graphExec); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphLaunch.stream); + oss << ")"; + break; + case HIP_API_ID_hipGraphMemAllocNodeGetParams: + oss << "hipGraphMemAllocNodeGetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemAllocNodeGetParams.node); + if (data->args.hipGraphMemAllocNodeGetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemAllocNodeGetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphMemFreeNodeGetParams: + oss << "hipGraphMemFreeNodeGetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemFreeNodeGetParams.node); + oss << ", dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemFreeNodeGetParams.dev_ptr); + oss << ")"; + break; + case HIP_API_ID_hipGraphMemcpyNodeGetParams: + oss << "hipGraphMemcpyNodeGetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeGetParams.node); + if (data->args.hipGraphMemcpyNodeGetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeGetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphMemcpyNodeSetParams: + oss << "hipGraphMemcpyNodeSetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParams.node); + if (data->args.hipGraphMemcpyNodeSetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphMemcpyNodeSetParams1D: + oss << "hipGraphMemcpyNodeSetParams1D("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParams1D.node); + oss << ", dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParams1D.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParams1D.src); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParams1D.count); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParams1D.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphMemcpyNodeSetParamsFromSymbol: + oss << "hipGraphMemcpyNodeSetParamsFromSymbol("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsFromSymbol.node); + oss << ", dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsFromSymbol.dst); + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsFromSymbol.symbol); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsFromSymbol.count); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsFromSymbol.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsFromSymbol.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphMemcpyNodeSetParamsToSymbol: + oss << "hipGraphMemcpyNodeSetParamsToSymbol("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsToSymbol.node); + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsToSymbol.symbol); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsToSymbol.src); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsToSymbol.count); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsToSymbol.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsToSymbol.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphMemsetNodeGetParams: + oss << "hipGraphMemsetNodeGetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemsetNodeGetParams.node); + if (data->args.hipGraphMemsetNodeGetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemsetNodeGetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphMemsetNodeSetParams: + oss << "hipGraphMemsetNodeSetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemsetNodeSetParams.node); + if (data->args.hipGraphMemsetNodeSetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemsetNodeSetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphNodeFindInClone: + oss << "hipGraphNodeFindInClone("; + if (data->args.hipGraphNodeFindInClone.pNode == NULL) oss << "pNode=NULL"; + else { oss << "pNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeFindInClone.pNode__val); } + oss << ", originalNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeFindInClone.originalNode); + oss << ", clonedGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeFindInClone.clonedGraph); + oss << ")"; + break; + case HIP_API_ID_hipGraphNodeGetDependencies: + oss << "hipGraphNodeGetDependencies("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetDependencies.node); + if (data->args.hipGraphNodeGetDependencies.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetDependencies.pDependencies__val); } + if (data->args.hipGraphNodeGetDependencies.pNumDependencies == NULL) oss << ", pNumDependencies=NULL"; + else { oss << ", pNumDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetDependencies.pNumDependencies__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphNodeGetDependentNodes: + oss << "hipGraphNodeGetDependentNodes("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetDependentNodes.node); + if (data->args.hipGraphNodeGetDependentNodes.pDependentNodes == NULL) oss << ", pDependentNodes=NULL"; + else { oss << ", pDependentNodes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetDependentNodes.pDependentNodes__val); } + if (data->args.hipGraphNodeGetDependentNodes.pNumDependentNodes == NULL) oss << ", pNumDependentNodes=NULL"; + else { oss << ", pNumDependentNodes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetDependentNodes.pNumDependentNodes__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphNodeGetEnabled: + oss << "hipGraphNodeGetEnabled("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetEnabled.hGraphExec); + oss << ", hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetEnabled.hNode); + if (data->args.hipGraphNodeGetEnabled.isEnabled == NULL) oss << ", isEnabled=NULL"; + else { oss << ", isEnabled="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetEnabled.isEnabled__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphNodeGetType: + oss << "hipGraphNodeGetType("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetType.node); + if (data->args.hipGraphNodeGetType.pType == NULL) oss << ", pType=NULL"; + else { oss << ", pType="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetType.pType__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphNodeSetEnabled: + oss << "hipGraphNodeSetEnabled("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeSetEnabled.hGraphExec); + oss << ", hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeSetEnabled.hNode); + oss << ", isEnabled="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeSetEnabled.isEnabled); + oss << ")"; + break; + case HIP_API_ID_hipGraphNodeSetParams: + oss << "hipGraphNodeSetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeSetParams.node); + if (data->args.hipGraphNodeSetParams.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeSetParams.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphReleaseUserObject: + oss << "hipGraphReleaseUserObject("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphReleaseUserObject.graph); + oss << ", object="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphReleaseUserObject.object); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphReleaseUserObject.count); + oss << ")"; + break; + case HIP_API_ID_hipGraphRemoveDependencies: + oss << "hipGraphRemoveDependencies("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphRemoveDependencies.graph); + if (data->args.hipGraphRemoveDependencies.from == NULL) oss << ", from=NULL"; + else { oss << ", from="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphRemoveDependencies.from__val); } + if (data->args.hipGraphRemoveDependencies.to == NULL) oss << ", to=NULL"; + else { oss << ", to="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphRemoveDependencies.to__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphRemoveDependencies.numDependencies); + oss << ")"; + break; + case HIP_API_ID_hipGraphRetainUserObject: + oss << "hipGraphRetainUserObject("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphRetainUserObject.graph); + oss << ", object="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphRetainUserObject.object); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphRetainUserObject.count); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphRetainUserObject.flags); + oss << ")"; + break; + case HIP_API_ID_hipGraphUpload: + oss << "hipGraphUpload("; + oss << "graphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphUpload.graphExec); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphUpload.stream); + oss << ")"; + break; + case HIP_API_ID_hipGraphicsGLRegisterBuffer: + oss << "hipGraphicsGLRegisterBuffer("; + if (data->args.hipGraphicsGLRegisterBuffer.resource == NULL) oss << "resource=NULL"; + else { oss << "resource="; roctracer::hip_support::detail::operator<<(oss, (void*)data->args.hipGraphicsGLRegisterBuffer.resource__val); } + oss << ", buffer="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsGLRegisterBuffer.buffer); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsGLRegisterBuffer.flags); + oss << ")"; + break; + case HIP_API_ID_hipGraphicsGLRegisterImage: + oss << "hipGraphicsGLRegisterImage("; + if (data->args.hipGraphicsGLRegisterImage.resource == NULL) oss << "resource=NULL"; + else { oss << "resource="; roctracer::hip_support::detail::operator<<(oss, (void*)data->args.hipGraphicsGLRegisterImage.resource__val); } + oss << ", image="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsGLRegisterImage.image); + oss << ", target="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsGLRegisterImage.target); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsGLRegisterImage.flags); + oss << ")"; + break; + case HIP_API_ID_hipGraphicsMapResources: + oss << "hipGraphicsMapResources("; + oss << "count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsMapResources.count); + if (data->args.hipGraphicsMapResources.resources == NULL) oss << ", resources=NULL"; + else { oss << ", resources="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsMapResources.resources__val); } + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsMapResources.stream); + oss << ")"; + break; + case HIP_API_ID_hipGraphicsResourceGetMappedPointer: + oss << "hipGraphicsResourceGetMappedPointer("; + if (data->args.hipGraphicsResourceGetMappedPointer.devPtr == NULL) oss << "devPtr=NULL"; + else { oss << "devPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsResourceGetMappedPointer.devPtr__val); } + if (data->args.hipGraphicsResourceGetMappedPointer.size == NULL) oss << ", size=NULL"; + else { oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsResourceGetMappedPointer.size__val); } + oss << ", resource="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsResourceGetMappedPointer.resource); + oss << ")"; + break; + case HIP_API_ID_hipGraphicsSubResourceGetMappedArray: + oss << "hipGraphicsSubResourceGetMappedArray("; + if (data->args.hipGraphicsSubResourceGetMappedArray.array == NULL) oss << "array=NULL"; + else { oss << "array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsSubResourceGetMappedArray.array__val); } + oss << ", resource="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsSubResourceGetMappedArray.resource); + oss << ", arrayIndex="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsSubResourceGetMappedArray.arrayIndex); + oss << ", mipLevel="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsSubResourceGetMappedArray.mipLevel); + oss << ")"; + break; + case HIP_API_ID_hipGraphicsUnmapResources: + oss << "hipGraphicsUnmapResources("; + oss << "count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsUnmapResources.count); + if (data->args.hipGraphicsUnmapResources.resources == NULL) oss << ", resources=NULL"; + else { oss << ", resources="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsUnmapResources.resources__val); } + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsUnmapResources.stream); + oss << ")"; + break; + case HIP_API_ID_hipGraphicsUnregisterResource: + oss << "hipGraphicsUnregisterResource("; + oss << "resource="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsUnregisterResource.resource); + oss << ")"; + break; + case HIP_API_ID_hipHccModuleLaunchKernel: + oss << "hipHccModuleLaunchKernel("; + oss << "f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.f); + oss << ", globalWorkSizeX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.globalWorkSizeX); + oss << ", globalWorkSizeY="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.globalWorkSizeY); + oss << ", globalWorkSizeZ="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.globalWorkSizeZ); + oss << ", blockDimX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.blockDimX); + oss << ", blockDimY="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.blockDimY); + oss << ", blockDimZ="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.blockDimZ); + oss << ", sharedMemBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.sharedMemBytes); + oss << ", hStream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.hStream); + if (data->args.hipHccModuleLaunchKernel.kernelParams == NULL) oss << ", kernelParams=NULL"; + else { oss << ", kernelParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.kernelParams__val); } + if (data->args.hipHccModuleLaunchKernel.extra == NULL) oss << ", extra=NULL"; + else { oss << ", extra="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.extra__val); } + oss << ", startEvent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.startEvent); + oss << ", stopEvent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.stopEvent); + oss << ")"; + break; + case HIP_API_ID_hipHostAlloc: + oss << "hipHostAlloc("; + if (data->args.hipHostAlloc.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostAlloc.ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostAlloc.size); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostAlloc.flags); + oss << ")"; + break; + case HIP_API_ID_hipHostFree: + oss << "hipHostFree("; + oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostFree.ptr); + oss << ")"; + break; + case HIP_API_ID_hipHostGetDevicePointer: + oss << "hipHostGetDevicePointer("; + if (data->args.hipHostGetDevicePointer.devPtr == NULL) oss << "devPtr=NULL"; + else { oss << "devPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostGetDevicePointer.devPtr__val); } + oss << ", hstPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostGetDevicePointer.hstPtr); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostGetDevicePointer.flags); + oss << ")"; + break; + case HIP_API_ID_hipHostGetFlags: + oss << "hipHostGetFlags("; + if (data->args.hipHostGetFlags.flagsPtr == NULL) oss << "flagsPtr=NULL"; + else { oss << "flagsPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostGetFlags.flagsPtr__val); } + oss << ", hostPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostGetFlags.hostPtr); + oss << ")"; + break; + case HIP_API_ID_hipHostMalloc: + oss << "hipHostMalloc("; + if (data->args.hipHostMalloc.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostMalloc.ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostMalloc.size); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostMalloc.flags); + oss << ")"; + break; + case HIP_API_ID_hipHostRegister: + oss << "hipHostRegister("; + oss << "hostPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostRegister.hostPtr); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostRegister.sizeBytes); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostRegister.flags); + oss << ")"; + break; + case HIP_API_ID_hipHostUnregister: + oss << "hipHostUnregister("; + oss << "hostPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostUnregister.hostPtr); + oss << ")"; + break; + case HIP_API_ID_hipImportExternalMemory: + oss << "hipImportExternalMemory("; + if (data->args.hipImportExternalMemory.extMem_out == NULL) oss << "extMem_out=NULL"; + else { oss << "extMem_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipImportExternalMemory.extMem_out__val); } + if (data->args.hipImportExternalMemory.memHandleDesc == NULL) oss << ", memHandleDesc=NULL"; + else { oss << ", memHandleDesc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipImportExternalMemory.memHandleDesc__val); } + oss << ")"; + break; + case HIP_API_ID_hipImportExternalSemaphore: + oss << "hipImportExternalSemaphore("; + if (data->args.hipImportExternalSemaphore.extSem_out == NULL) oss << "extSem_out=NULL"; + else { oss << "extSem_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipImportExternalSemaphore.extSem_out__val); } + if (data->args.hipImportExternalSemaphore.semHandleDesc == NULL) oss << ", semHandleDesc=NULL"; + else { oss << ", semHandleDesc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipImportExternalSemaphore.semHandleDesc__val); } + oss << ")"; + break; + case HIP_API_ID_hipInit: + oss << "hipInit("; + oss << "flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipInit.flags); + oss << ")"; + break; + case HIP_API_ID_hipIpcCloseMemHandle: + oss << "hipIpcCloseMemHandle("; + oss << "devPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcCloseMemHandle.devPtr); + oss << ")"; + break; + case HIP_API_ID_hipIpcGetEventHandle: + oss << "hipIpcGetEventHandle("; + if (data->args.hipIpcGetEventHandle.handle == NULL) oss << "handle=NULL"; + else { oss << "handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcGetEventHandle.handle__val); } + oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcGetEventHandle.event); + oss << ")"; + break; + case HIP_API_ID_hipIpcGetMemHandle: + oss << "hipIpcGetMemHandle("; + if (data->args.hipIpcGetMemHandle.handle == NULL) oss << "handle=NULL"; + else { oss << "handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcGetMemHandle.handle__val); } + oss << ", devPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcGetMemHandle.devPtr); + oss << ")"; + break; + case HIP_API_ID_hipIpcOpenEventHandle: + oss << "hipIpcOpenEventHandle("; + if (data->args.hipIpcOpenEventHandle.event == NULL) oss << "event=NULL"; + else { oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcOpenEventHandle.event__val); } + oss << ", handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcOpenEventHandle.handle); + oss << ")"; + break; + case HIP_API_ID_hipIpcOpenMemHandle: + oss << "hipIpcOpenMemHandle("; + if (data->args.hipIpcOpenMemHandle.devPtr == NULL) oss << "devPtr=NULL"; + else { oss << "devPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcOpenMemHandle.devPtr__val); } + oss << ", handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcOpenMemHandle.handle); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcOpenMemHandle.flags); + oss << ")"; + break; + case HIP_API_ID_hipLaunchByPtr: + oss << "hipLaunchByPtr("; + oss << "hostFunction="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchByPtr.hostFunction); + oss << ")"; + break; + case HIP_API_ID_hipLaunchCooperativeKernel: + oss << "hipLaunchCooperativeKernel("; + oss << "f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernel.f); + oss << ", gridDim="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernel.gridDim); + oss << ", blockDimX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernel.blockDimX); + if (data->args.hipLaunchCooperativeKernel.kernelParams == NULL) oss << ", kernelParams=NULL"; + else { oss << ", kernelParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernel.kernelParams__val); } + oss << ", sharedMemBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernel.sharedMemBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernel.stream); + oss << ")"; + break; + case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: + oss << "hipLaunchCooperativeKernelMultiDevice("; + if (data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList == NULL) oss << "launchParamsList=NULL"; + else { oss << "launchParamsList="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList__val); } + oss << ", numDevices="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernelMultiDevice.numDevices); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernelMultiDevice.flags); + oss << ")"; + break; + case HIP_API_ID_hipLaunchHostFunc: + oss << "hipLaunchHostFunc("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchHostFunc.stream); + oss << ", fn="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchHostFunc.fn); + oss << ", userData="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchHostFunc.userData); + oss << ")"; + break; + case HIP_API_ID_hipLaunchKernel: + oss << "hipLaunchKernel("; + oss << "function_address="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchKernel.function_address); + oss << ", numBlocks="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchKernel.numBlocks); + oss << ", dimBlocks="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchKernel.dimBlocks); + if (data->args.hipLaunchKernel.args == NULL) oss << ", args=NULL"; + else { oss << ", args="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchKernel.args__val); } + oss << ", sharedMemBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchKernel.sharedMemBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchKernel.stream); + oss << ")"; + break; + case HIP_API_ID_hipLaunchKernelExC: + oss << "hipLaunchKernelExC("; + if (data->args.hipLaunchKernelExC.config == NULL) oss << "config=NULL"; + else { oss << "config="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchKernelExC.config__val); } + oss << ", fPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchKernelExC.fPtr); + if (data->args.hipLaunchKernelExC.args == NULL) oss << ", args=NULL"; + else { oss << ", args="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchKernelExC.args__val); } + oss << ")"; + break; + case HIP_API_ID_hipLibraryGetKernel: + oss << "hipLibraryGetKernel("; + if (data->args.hipLibraryGetKernel.pKernel == NULL) oss << "pKernel=NULL"; + else { oss << "pKernel="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryGetKernel.pKernel__val); } + oss << ", library="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryGetKernel.library); + if (data->args.hipLibraryGetKernel.name == NULL) oss << ", name=NULL"; + else { oss << ", name="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryGetKernel.name__val); } + oss << ")"; + break; + case HIP_API_ID_hipLibraryGetKernelCount: + oss << "hipLibraryGetKernelCount("; + if (data->args.hipLibraryGetKernelCount.count == NULL) oss << "count=NULL"; + else { oss << "count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryGetKernelCount.count__val); } + oss << ", library="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryGetKernelCount.library); + oss << ")"; + break; + case HIP_API_ID_hipLibraryLoadData: + oss << "hipLibraryLoadData("; + if (data->args.hipLibraryLoadData.library == NULL) oss << "library=NULL"; + else { oss << "library="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadData.library__val); } + oss << ", code="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadData.code); + if (data->args.hipLibraryLoadData.jitOptions == NULL) oss << ", jitOptions=NULL"; + else { oss << ", jitOptions="; roctracer::hip_support::detail::operator<<(oss, (void*)data->args.hipLibraryLoadData.jitOptions__val); } + if (data->args.hipLibraryLoadData.jitOptionsValues == NULL) oss << ", jitOptionsValues=NULL"; + else { oss << ", jitOptionsValues="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadData.jitOptionsValues__val); } + oss << ", numJitOptions="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadData.numJitOptions); + if (data->args.hipLibraryLoadData.libraryOptions == NULL) oss << ", libraryOptions=NULL"; + else { oss << ", libraryOptions="; roctracer::hip_support::detail::operator<<(oss, (void*)data->args.hipLibraryLoadData.libraryOptions__val); } + if (data->args.hipLibraryLoadData.libraryOptionValues == NULL) oss << ", libraryOptionValues=NULL"; + else { oss << ", libraryOptionValues="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadData.libraryOptionValues__val); } + oss << ", numLibraryOptions="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadData.numLibraryOptions); + oss << ")"; + break; + case HIP_API_ID_hipLibraryLoadFromFile: + oss << "hipLibraryLoadFromFile("; + if (data->args.hipLibraryLoadFromFile.library == NULL) oss << "library=NULL"; + else { oss << "library="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadFromFile.library__val); } + if (data->args.hipLibraryLoadFromFile.fileName == NULL) oss << ", fileName=NULL"; + else { oss << ", fileName="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadFromFile.fileName__val); } + if (data->args.hipLibraryLoadFromFile.jitOptions == NULL) oss << ", jitOptions=NULL"; + else { oss << ", jitOptions="; roctracer::hip_support::detail::operator<<(oss, (void*)data->args.hipLibraryLoadFromFile.jitOptions__val); } + if (data->args.hipLibraryLoadFromFile.jitOptionsValues == NULL) oss << ", jitOptionsValues=NULL"; + else { oss << ", jitOptionsValues="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadFromFile.jitOptionsValues__val); } + oss << ", numJitOptions="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadFromFile.numJitOptions); + if (data->args.hipLibraryLoadFromFile.libraryOptions == NULL) oss << ", libraryOptions=NULL"; + else { oss << ", libraryOptions="; roctracer::hip_support::detail::operator<<(oss, (void*)data->args.hipLibraryLoadFromFile.libraryOptions__val); } + if (data->args.hipLibraryLoadFromFile.libraryOptionValues == NULL) oss << ", libraryOptionValues=NULL"; + else { oss << ", libraryOptionValues="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadFromFile.libraryOptionValues__val); } + oss << ", numLibraryOptions="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadFromFile.numLibraryOptions); + oss << ")"; + break; + case HIP_API_ID_hipLibraryUnload: + oss << "hipLibraryUnload("; + oss << "library="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryUnload.library); + oss << ")"; + break; + case HIP_API_ID_hipLinkAddData: + oss << "hipLinkAddData("; + oss << "state="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkAddData.state); + oss << ", type="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkAddData.type); + oss << ", data="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkAddData.data); + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkAddData.size); + if (data->args.hipLinkAddData.name == NULL) oss << ", name=NULL"; + else { oss << ", name="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkAddData.name__val); } + oss << ", numOptions="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkAddData.numOptions); + if (data->args.hipLinkAddData.options == NULL) oss << ", options=NULL"; + else { oss << ", options="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkAddData.options__val); } + if (data->args.hipLinkAddData.optionValues == NULL) oss << ", optionValues=NULL"; + else { oss << ", optionValues="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkAddData.optionValues__val); } + oss << ")"; + break; + case HIP_API_ID_hipLinkAddFile: + oss << "hipLinkAddFile("; + oss << "state="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkAddFile.state); + oss << ", type="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkAddFile.type); + if (data->args.hipLinkAddFile.path == NULL) oss << ", path=NULL"; + else { oss << ", path="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkAddFile.path__val); } + oss << ", numOptions="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkAddFile.numOptions); + if (data->args.hipLinkAddFile.options == NULL) oss << ", options=NULL"; + else { oss << ", options="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkAddFile.options__val); } + if (data->args.hipLinkAddFile.optionValues == NULL) oss << ", optionValues=NULL"; + else { oss << ", optionValues="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkAddFile.optionValues__val); } + oss << ")"; + break; + case HIP_API_ID_hipLinkComplete: + oss << "hipLinkComplete("; + oss << "state="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkComplete.state); + if (data->args.hipLinkComplete.hipBinOut == NULL) oss << ", hipBinOut=NULL"; + else { oss << ", hipBinOut="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkComplete.hipBinOut__val); } + if (data->args.hipLinkComplete.sizeOut == NULL) oss << ", sizeOut=NULL"; + else { oss << ", sizeOut="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkComplete.sizeOut__val); } + oss << ")"; + break; + case HIP_API_ID_hipLinkCreate: + oss << "hipLinkCreate("; + oss << "numOptions="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkCreate.numOptions); + if (data->args.hipLinkCreate.options == NULL) oss << ", options=NULL"; + else { oss << ", options="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkCreate.options__val); } + if (data->args.hipLinkCreate.optionValues == NULL) oss << ", optionValues=NULL"; + else { oss << ", optionValues="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkCreate.optionValues__val); } + if (data->args.hipLinkCreate.stateOut == NULL) oss << ", stateOut=NULL"; + else { oss << ", stateOut="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkCreate.stateOut__val); } + oss << ")"; + break; + case HIP_API_ID_hipLinkDestroy: + oss << "hipLinkDestroy("; + oss << "state="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLinkDestroy.state); + oss << ")"; + break; + case HIP_API_ID_hipMalloc: + oss << "hipMalloc("; + if (data->args.hipMalloc.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMalloc.ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMalloc.size); + oss << ")"; + break; + case HIP_API_ID_hipMalloc3D: + oss << "hipMalloc3D("; + if (data->args.hipMalloc3D.pitchedDevPtr == NULL) oss << "pitchedDevPtr=NULL"; + else { oss << "pitchedDevPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMalloc3D.pitchedDevPtr__val); } + oss << ", extent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMalloc3D.extent); + oss << ")"; + break; + case HIP_API_ID_hipMalloc3DArray: + oss << "hipMalloc3DArray("; + if (data->args.hipMalloc3DArray.array == NULL) oss << "array=NULL"; + else { oss << "array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMalloc3DArray.array__val); } + if (data->args.hipMalloc3DArray.desc == NULL) oss << ", desc=NULL"; + else { oss << ", desc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMalloc3DArray.desc__val); } + oss << ", extent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMalloc3DArray.extent); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMalloc3DArray.flags); + oss << ")"; + break; + case HIP_API_ID_hipMallocArray: + oss << "hipMallocArray("; + if (data->args.hipMallocArray.array == NULL) oss << "array=NULL"; + else { oss << "array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocArray.array__val); } + if (data->args.hipMallocArray.desc == NULL) oss << ", desc=NULL"; + else { oss << ", desc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocArray.desc__val); } + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocArray.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocArray.height); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocArray.flags); + oss << ")"; + break; + case HIP_API_ID_hipMallocAsync: + oss << "hipMallocAsync("; + if (data->args.hipMallocAsync.dev_ptr == NULL) oss << "dev_ptr=NULL"; + else { oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocAsync.dev_ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocAsync.size); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMallocFromPoolAsync: + oss << "hipMallocFromPoolAsync("; + if (data->args.hipMallocFromPoolAsync.dev_ptr == NULL) oss << "dev_ptr=NULL"; + else { oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocFromPoolAsync.dev_ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocFromPoolAsync.size); + oss << ", mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocFromPoolAsync.mem_pool); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocFromPoolAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMallocHost: + oss << "hipMallocHost("; + if (data->args.hipMallocHost.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocHost.ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocHost.size); + oss << ")"; + break; + case HIP_API_ID_hipMallocManaged: + oss << "hipMallocManaged("; + if (data->args.hipMallocManaged.dev_ptr == NULL) oss << "dev_ptr=NULL"; + else { oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocManaged.dev_ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocManaged.size); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocManaged.flags); + oss << ")"; + break; + case HIP_API_ID_hipMallocMipmappedArray: + oss << "hipMallocMipmappedArray("; + if (data->args.hipMallocMipmappedArray.mipmappedArray == NULL) oss << "mipmappedArray=NULL"; + else { oss << "mipmappedArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocMipmappedArray.mipmappedArray__val); } + if (data->args.hipMallocMipmappedArray.desc == NULL) oss << ", desc=NULL"; + else { oss << ", desc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocMipmappedArray.desc__val); } + oss << ", extent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocMipmappedArray.extent); + oss << ", numLevels="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocMipmappedArray.numLevels); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocMipmappedArray.flags); + oss << ")"; + break; + case HIP_API_ID_hipMallocPitch: + oss << "hipMallocPitch("; + if (data->args.hipMallocPitch.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocPitch.ptr__val); } + if (data->args.hipMallocPitch.pitch == NULL) oss << ", pitch=NULL"; + else { oss << ", pitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocPitch.pitch__val); } + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocPitch.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocPitch.height); + oss << ")"; + break; + case HIP_API_ID_hipMemAddressFree: + oss << "hipMemAddressFree("; + oss << "devPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAddressFree.devPtr); + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAddressFree.size); + oss << ")"; + break; + case HIP_API_ID_hipMemAddressReserve: + oss << "hipMemAddressReserve("; + if (data->args.hipMemAddressReserve.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAddressReserve.ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAddressReserve.size); + oss << ", alignment="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAddressReserve.alignment); + oss << ", addr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAddressReserve.addr); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAddressReserve.flags); + oss << ")"; + break; + case HIP_API_ID_hipMemAdvise: + oss << "hipMemAdvise("; + oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAdvise.dev_ptr); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAdvise.count); + oss << ", advice="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAdvise.advice); + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAdvise.device); + oss << ")"; + break; + case HIP_API_ID_hipMemAdvise_v2: + oss << "hipMemAdvise_v2("; + oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAdvise_v2.dev_ptr); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAdvise_v2.count); + oss << ", advice="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAdvise_v2.advice); + oss << ", location="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAdvise_v2.location); + oss << ")"; + break; + case HIP_API_ID_hipMemAllocHost: + oss << "hipMemAllocHost("; + if (data->args.hipMemAllocHost.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAllocHost.ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAllocHost.size); + oss << ")"; + break; + case HIP_API_ID_hipMemAllocPitch: + oss << "hipMemAllocPitch("; + if (data->args.hipMemAllocPitch.dptr == NULL) oss << "dptr=NULL"; + else { oss << "dptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAllocPitch.dptr__val); } + if (data->args.hipMemAllocPitch.pitch == NULL) oss << ", pitch=NULL"; + else { oss << ", pitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAllocPitch.pitch__val); } + oss << ", widthInBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAllocPitch.widthInBytes); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAllocPitch.height); + oss << ", elementSizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAllocPitch.elementSizeBytes); + oss << ")"; + break; + case HIP_API_ID_hipMemCreate: + oss << "hipMemCreate("; + if (data->args.hipMemCreate.handle == NULL) oss << "handle=NULL"; + else { oss << "handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemCreate.handle__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemCreate.size); + if (data->args.hipMemCreate.prop == NULL) oss << ", prop=NULL"; + else { oss << ", prop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemCreate.prop__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemCreate.flags); + oss << ")"; + break; + case HIP_API_ID_hipMemExportToShareableHandle: + oss << "hipMemExportToShareableHandle("; + oss << "shareableHandle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemExportToShareableHandle.shareableHandle); + oss << ", handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemExportToShareableHandle.handle); + oss << ", handleType="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemExportToShareableHandle.handleType); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemExportToShareableHandle.flags); + oss << ")"; + break; + case HIP_API_ID_hipMemGetAccess: + oss << "hipMemGetAccess("; + if (data->args.hipMemGetAccess.flags == NULL) oss << "flags=NULL"; + else { oss << "flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAccess.flags__val); } + if (data->args.hipMemGetAccess.location == NULL) oss << ", location=NULL"; + else { oss << ", location="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAccess.location__val); } + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAccess.ptr); + oss << ")"; + break; + case HIP_API_ID_hipMemGetAddressRange: + oss << "hipMemGetAddressRange("; + if (data->args.hipMemGetAddressRange.pbase == NULL) oss << "pbase=NULL"; + else { oss << "pbase="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAddressRange.pbase__val); } + if (data->args.hipMemGetAddressRange.psize == NULL) oss << ", psize=NULL"; + else { oss << ", psize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAddressRange.psize__val); } + oss << ", dptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAddressRange.dptr); + oss << ")"; + break; + case HIP_API_ID_hipMemGetAllocationGranularity: + oss << "hipMemGetAllocationGranularity("; + if (data->args.hipMemGetAllocationGranularity.granularity == NULL) oss << "granularity=NULL"; + else { oss << "granularity="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAllocationGranularity.granularity__val); } + if (data->args.hipMemGetAllocationGranularity.prop == NULL) oss << ", prop=NULL"; + else { oss << ", prop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAllocationGranularity.prop__val); } + oss << ", option="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAllocationGranularity.option); + oss << ")"; + break; + case HIP_API_ID_hipMemGetAllocationPropertiesFromHandle: + oss << "hipMemGetAllocationPropertiesFromHandle("; + if (data->args.hipMemGetAllocationPropertiesFromHandle.prop == NULL) oss << "prop=NULL"; + else { oss << "prop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAllocationPropertiesFromHandle.prop__val); } + oss << ", handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAllocationPropertiesFromHandle.handle); + oss << ")"; + break; + case HIP_API_ID_hipMemGetHandleForAddressRange: + oss << "hipMemGetHandleForAddressRange("; + oss << "handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetHandleForAddressRange.handle); + oss << ", dptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetHandleForAddressRange.dptr); + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetHandleForAddressRange.size); + oss << ", handleType="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetHandleForAddressRange.handleType); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetHandleForAddressRange.flags); + oss << ")"; + break; + case HIP_API_ID_hipMemGetInfo: + oss << "hipMemGetInfo("; + if (data->args.hipMemGetInfo.free == NULL) oss << "free=NULL"; + else { oss << "free="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetInfo.free__val); } + if (data->args.hipMemGetInfo.total == NULL) oss << ", total=NULL"; + else { oss << ", total="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetInfo.total__val); } + oss << ")"; + break; + case HIP_API_ID_hipMemImportFromShareableHandle: + oss << "hipMemImportFromShareableHandle("; + if (data->args.hipMemImportFromShareableHandle.handle == NULL) oss << "handle=NULL"; + else { oss << "handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemImportFromShareableHandle.handle__val); } + oss << ", osHandle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemImportFromShareableHandle.osHandle); + oss << ", shHandleType="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemImportFromShareableHandle.shHandleType); + oss << ")"; + break; + case HIP_API_ID_hipMemMap: + oss << "hipMemMap("; + oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemMap.ptr); + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemMap.size); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemMap.offset); + oss << ", handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemMap.handle); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemMap.flags); + oss << ")"; + break; + case HIP_API_ID_hipMemMapArrayAsync: + oss << "hipMemMapArrayAsync("; + if (data->args.hipMemMapArrayAsync.mapInfoList == NULL) oss << "mapInfoList=NULL"; + else { oss << "mapInfoList="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemMapArrayAsync.mapInfoList__val); } + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemMapArrayAsync.count); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemMapArrayAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemPoolCreate: + oss << "hipMemPoolCreate("; + if (data->args.hipMemPoolCreate.mem_pool == NULL) oss << "mem_pool=NULL"; + else { oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolCreate.mem_pool__val); } + if (data->args.hipMemPoolCreate.pool_props == NULL) oss << ", pool_props=NULL"; + else { oss << ", pool_props="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolCreate.pool_props__val); } + oss << ")"; + break; + case HIP_API_ID_hipMemPoolDestroy: + oss << "hipMemPoolDestroy("; + oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolDestroy.mem_pool); + oss << ")"; + break; + case HIP_API_ID_hipMemPoolExportPointer: + oss << "hipMemPoolExportPointer("; + if (data->args.hipMemPoolExportPointer.export_data == NULL) oss << "export_data=NULL"; + else { oss << "export_data="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolExportPointer.export_data__val); } + oss << ", dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolExportPointer.dev_ptr); + oss << ")"; + break; + case HIP_API_ID_hipMemPoolExportToShareableHandle: + oss << "hipMemPoolExportToShareableHandle("; + oss << "shared_handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolExportToShareableHandle.shared_handle); + oss << ", mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolExportToShareableHandle.mem_pool); + oss << ", handle_type="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolExportToShareableHandle.handle_type); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolExportToShareableHandle.flags); + oss << ")"; + break; + case HIP_API_ID_hipMemPoolGetAccess: + oss << "hipMemPoolGetAccess("; + if (data->args.hipMemPoolGetAccess.flags == NULL) oss << "flags=NULL"; + else { oss << "flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolGetAccess.flags__val); } + oss << ", mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolGetAccess.mem_pool); + if (data->args.hipMemPoolGetAccess.location == NULL) oss << ", location=NULL"; + else { oss << ", location="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolGetAccess.location__val); } + oss << ")"; + break; + case HIP_API_ID_hipMemPoolGetAttribute: + oss << "hipMemPoolGetAttribute("; + oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolGetAttribute.mem_pool); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolGetAttribute.attr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolGetAttribute.value); + oss << ")"; + break; + case HIP_API_ID_hipMemPoolImportFromShareableHandle: + oss << "hipMemPoolImportFromShareableHandle("; + if (data->args.hipMemPoolImportFromShareableHandle.mem_pool == NULL) oss << "mem_pool=NULL"; + else { oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolImportFromShareableHandle.mem_pool__val); } + oss << ", shared_handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolImportFromShareableHandle.shared_handle); + oss << ", handle_type="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolImportFromShareableHandle.handle_type); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolImportFromShareableHandle.flags); + oss << ")"; + break; + case HIP_API_ID_hipMemPoolImportPointer: + oss << "hipMemPoolImportPointer("; + if (data->args.hipMemPoolImportPointer.dev_ptr == NULL) oss << "dev_ptr=NULL"; + else { oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolImportPointer.dev_ptr__val); } + oss << ", mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolImportPointer.mem_pool); + if (data->args.hipMemPoolImportPointer.export_data == NULL) oss << ", export_data=NULL"; + else { oss << ", export_data="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolImportPointer.export_data__val); } + oss << ")"; + break; + case HIP_API_ID_hipMemPoolSetAccess: + oss << "hipMemPoolSetAccess("; + oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolSetAccess.mem_pool); + if (data->args.hipMemPoolSetAccess.desc_list == NULL) oss << ", desc_list=NULL"; + else { oss << ", desc_list="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolSetAccess.desc_list__val); } + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolSetAccess.count); + oss << ")"; + break; + case HIP_API_ID_hipMemPoolSetAttribute: + oss << "hipMemPoolSetAttribute("; + oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolSetAttribute.mem_pool); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolSetAttribute.attr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolSetAttribute.value); + oss << ")"; + break; + case HIP_API_ID_hipMemPoolTrimTo: + oss << "hipMemPoolTrimTo("; + oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolTrimTo.mem_pool); + oss << ", min_bytes_to_hold="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolTrimTo.min_bytes_to_hold); + oss << ")"; + break; + case HIP_API_ID_hipMemPrefetchAsync: + oss << "hipMemPrefetchAsync("; + oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchAsync.dev_ptr); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchAsync.count); + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchAsync.device); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemPrefetchAsync_v2: + oss << "hipMemPrefetchAsync_v2("; + oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchAsync_v2.dev_ptr); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchAsync_v2.count); + oss << ", location="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchAsync_v2.location); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchAsync_v2.flags); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchAsync_v2.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemPtrGetInfo: + oss << "hipMemPtrGetInfo("; + oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPtrGetInfo.ptr); + if (data->args.hipMemPtrGetInfo.size == NULL) oss << ", size=NULL"; + else { oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPtrGetInfo.size__val); } + oss << ")"; + break; + case HIP_API_ID_hipMemRangeGetAttribute: + oss << "hipMemRangeGetAttribute("; + oss << "data="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttribute.data); + oss << ", data_size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttribute.data_size); + oss << ", attribute="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttribute.attribute); + oss << ", dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttribute.dev_ptr); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttribute.count); + oss << ")"; + break; + case HIP_API_ID_hipMemRangeGetAttributes: + oss << "hipMemRangeGetAttributes("; + if (data->args.hipMemRangeGetAttributes.data == NULL) oss << "data=NULL"; + else { oss << "data="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttributes.data__val); } + if (data->args.hipMemRangeGetAttributes.data_sizes == NULL) oss << ", data_sizes=NULL"; + else { oss << ", data_sizes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttributes.data_sizes__val); } + if (data->args.hipMemRangeGetAttributes.attributes == NULL) oss << ", attributes=NULL"; + else { oss << ", attributes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttributes.attributes__val); } + oss << ", num_attributes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttributes.num_attributes); + oss << ", dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttributes.dev_ptr); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttributes.count); + oss << ")"; + break; + case HIP_API_ID_hipMemRelease: + oss << "hipMemRelease("; + oss << "handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRelease.handle); + oss << ")"; + break; + case HIP_API_ID_hipMemRetainAllocationHandle: + oss << "hipMemRetainAllocationHandle("; + if (data->args.hipMemRetainAllocationHandle.handle == NULL) oss << "handle=NULL"; + else { oss << "handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRetainAllocationHandle.handle__val); } + oss << ", addr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRetainAllocationHandle.addr); + oss << ")"; + break; + case HIP_API_ID_hipMemSetAccess: + oss << "hipMemSetAccess("; + oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemSetAccess.ptr); + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemSetAccess.size); + if (data->args.hipMemSetAccess.desc == NULL) oss << ", desc=NULL"; + else { oss << ", desc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemSetAccess.desc__val); } + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemSetAccess.count); + oss << ")"; + break; + case HIP_API_ID_hipMemUnmap: + oss << "hipMemUnmap("; + oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemUnmap.ptr); + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemUnmap.size); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy: + oss << "hipMemcpy("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy.sizeBytes); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy2D: + oss << "hipMemcpy2D("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2D.dst); + oss << ", dpitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2D.dpitch); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2D.src); + oss << ", spitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2D.spitch); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2D.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2D.height); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2D.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy2DArrayToArray: + oss << "hipMemcpy2DArrayToArray("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DArrayToArray.dst); + oss << ", wOffsetDst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DArrayToArray.wOffsetDst); + oss << ", hOffsetDst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DArrayToArray.hOffsetDst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DArrayToArray.src); + oss << ", wOffsetSrc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DArrayToArray.wOffsetSrc); + oss << ", hOffsetSrc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DArrayToArray.hOffsetSrc); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DArrayToArray.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DArrayToArray.height); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DArrayToArray.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy2DAsync: + oss << "hipMemcpy2DAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DAsync.dst); + oss << ", dpitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DAsync.dpitch); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DAsync.src); + oss << ", spitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DAsync.spitch); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DAsync.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DAsync.height); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DAsync.kind); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy2DFromArray: + oss << "hipMemcpy2DFromArray("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArray.dst); + oss << ", dpitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArray.dpitch); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArray.src); + oss << ", wOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArray.wOffset); + oss << ", hOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArray.hOffset); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArray.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArray.height); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArray.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy2DFromArrayAsync: + oss << "hipMemcpy2DFromArrayAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.dst); + oss << ", dpitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.dpitch); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.src); + oss << ", wOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.wOffset); + oss << ", hOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.hOffset); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.height); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.kind); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy2DToArray: + oss << "hipMemcpy2DToArray("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.dst); + oss << ", wOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.wOffset); + oss << ", hOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.hOffset); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.src); + oss << ", spitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.spitch); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.height); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy2DToArrayAsync: + oss << "hipMemcpy2DToArrayAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.dst); + oss << ", wOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.wOffset); + oss << ", hOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.hOffset); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.src); + oss << ", spitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.spitch); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.height); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.kind); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy3D: + oss << "hipMemcpy3D("; + if (data->args.hipMemcpy3D.p == NULL) oss << "p=NULL"; + else { oss << "p="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3D.p__val); } + oss << ")"; + break; + case HIP_API_ID_hipMemcpy3DAsync: + oss << "hipMemcpy3DAsync("; + if (data->args.hipMemcpy3DAsync.p == NULL) oss << "p=NULL"; + else { oss << "p="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DAsync.p__val); } + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy3DBatchAsync: + oss << "hipMemcpy3DBatchAsync("; + oss << "numOps="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.numOps); + if (data->args.hipMemcpy3DBatchAsync.opList == NULL) oss << ", opList=NULL"; + else { oss << ", opList="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.opList__val); } + if (data->args.hipMemcpy3DBatchAsync.failIdx == NULL) oss << ", failIdx=NULL"; + else { oss << ", failIdx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.failIdx__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.flags); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy3DPeer: + oss << "hipMemcpy3DPeer("; + if (data->args.hipMemcpy3DPeer.p == NULL) oss << "p=NULL"; + else { oss << "p="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DPeer.p__val); } + oss << ")"; + break; + case HIP_API_ID_hipMemcpy3DPeerAsync: + oss << "hipMemcpy3DPeerAsync("; + if (data->args.hipMemcpy3DPeerAsync.p == NULL) oss << "p=NULL"; + else { oss << "p="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DPeerAsync.p__val); } + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DPeerAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyAsync: + oss << "hipMemcpyAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAsync.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAsync.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAsync.sizeBytes); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAsync.kind); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyAtoA: + oss << "hipMemcpyAtoA("; + oss << "dstArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoA.dstArray); + oss << ", dstOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoA.dstOffset); + oss << ", srcArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoA.srcArray); + oss << ", srcOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoA.srcOffset); + oss << ", ByteCount="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoA.ByteCount); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyAtoD: + oss << "hipMemcpyAtoD("; + oss << "dstDevice="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoD.dstDevice); + oss << ", srcArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoD.srcArray); + oss << ", srcOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoD.srcOffset); + oss << ", ByteCount="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoD.ByteCount); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyAtoH: + oss << "hipMemcpyAtoH("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoH.dst); + oss << ", srcArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoH.srcArray); + oss << ", srcOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoH.srcOffset); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoH.count); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyAtoHAsync: + oss << "hipMemcpyAtoHAsync("; + oss << "dstHost="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoHAsync.dstHost); + oss << ", srcArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoHAsync.srcArray); + oss << ", srcOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoHAsync.srcOffset); + oss << ", ByteCount="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoHAsync.ByteCount); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoHAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyBatchAsync: + oss << "hipMemcpyBatchAsync("; + if (data->args.hipMemcpyBatchAsync.dsts == NULL) oss << "dsts=NULL"; + else { oss << "dsts="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.dsts__val); } + if (data->args.hipMemcpyBatchAsync.srcs == NULL) oss << ", srcs=NULL"; + else { oss << ", srcs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.srcs__val); } + if (data->args.hipMemcpyBatchAsync.sizes == NULL) oss << ", sizes=NULL"; + else { oss << ", sizes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.sizes__val); } + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.count); + if (data->args.hipMemcpyBatchAsync.attrs == NULL) oss << ", attrs=NULL"; + else { oss << ", attrs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.attrs__val); } + if (data->args.hipMemcpyBatchAsync.attrsIdxs == NULL) oss << ", attrsIdxs=NULL"; + else { oss << ", attrsIdxs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.attrsIdxs__val); } + oss << ", numAttrs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.numAttrs); + if (data->args.hipMemcpyBatchAsync.failIdx == NULL) oss << ", failIdx=NULL"; + else { oss << ", failIdx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.failIdx__val); } + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyDtoA: + oss << "hipMemcpyDtoA("; + oss << "dstArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoA.dstArray); + oss << ", dstOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoA.dstOffset); + oss << ", srcDevice="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoA.srcDevice); + oss << ", ByteCount="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoA.ByteCount); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyDtoD: + oss << "hipMemcpyDtoD("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoD.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoD.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoD.sizeBytes); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyDtoDAsync: + oss << "hipMemcpyDtoDAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoDAsync.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoDAsync.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoDAsync.sizeBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoDAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyDtoH: + oss << "hipMemcpyDtoH("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoH.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoH.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoH.sizeBytes); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyDtoHAsync: + oss << "hipMemcpyDtoHAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoHAsync.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoHAsync.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoHAsync.sizeBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoHAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyFromArray: + oss << "hipMemcpyFromArray("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromArray.dst); + oss << ", srcArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromArray.srcArray); + oss << ", wOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromArray.wOffset); + oss << ", hOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromArray.hOffset); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromArray.count); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromArray.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyFromSymbol: + oss << "hipMemcpyFromSymbol("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbol.dst); + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbol.symbol); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbol.sizeBytes); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbol.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbol.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyFromSymbolAsync: + oss << "hipMemcpyFromSymbolAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbolAsync.dst); + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbolAsync.symbol); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbolAsync.sizeBytes); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbolAsync.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbolAsync.kind); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbolAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyHtoA: + oss << "hipMemcpyHtoA("; + oss << "dstArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoA.dstArray); + oss << ", dstOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoA.dstOffset); + oss << ", srcHost="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoA.srcHost); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoA.count); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyHtoAAsync: + oss << "hipMemcpyHtoAAsync("; + oss << "dstArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoAAsync.dstArray); + oss << ", dstOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoAAsync.dstOffset); + oss << ", srcHost="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoAAsync.srcHost); + oss << ", ByteCount="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoAAsync.ByteCount); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoAAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyHtoD: + oss << "hipMemcpyHtoD("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoD.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoD.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoD.sizeBytes); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyHtoDAsync: + oss << "hipMemcpyHtoDAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoDAsync.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoDAsync.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoDAsync.sizeBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoDAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyParam2D: + oss << "hipMemcpyParam2D("; + if (data->args.hipMemcpyParam2D.pCopy == NULL) oss << "pCopy=NULL"; + else { oss << "pCopy="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyParam2D.pCopy__val); } + oss << ")"; + break; + case HIP_API_ID_hipMemcpyParam2DAsync: + oss << "hipMemcpyParam2DAsync("; + if (data->args.hipMemcpyParam2DAsync.pCopy == NULL) oss << "pCopy=NULL"; + else { oss << "pCopy="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyParam2DAsync.pCopy__val); } + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyParam2DAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyPeer: + oss << "hipMemcpyPeer("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeer.dst); + oss << ", dstDeviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeer.dstDeviceId); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeer.src); + oss << ", srcDeviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeer.srcDeviceId); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeer.sizeBytes); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyPeerAsync: + oss << "hipMemcpyPeerAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeerAsync.dst); + oss << ", dstDeviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeerAsync.dstDeviceId); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeerAsync.src); + oss << ", srcDevice="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeerAsync.srcDevice); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeerAsync.sizeBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeerAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyToArray: + oss << "hipMemcpyToArray("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.dst); + oss << ", wOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.wOffset); + oss << ", hOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.hOffset); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.src); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.count); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyToSymbol: + oss << "hipMemcpyToSymbol("; + oss << "symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbol.symbol); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbol.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbol.sizeBytes); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbol.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbol.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyToSymbolAsync: + oss << "hipMemcpyToSymbolAsync("; + oss << "symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbolAsync.symbol); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbolAsync.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbolAsync.sizeBytes); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbolAsync.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbolAsync.kind); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbolAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyWithStream: + oss << "hipMemcpyWithStream("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyWithStream.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyWithStream.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyWithStream.sizeBytes); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyWithStream.kind); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyWithStream.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemset: + oss << "hipMemset("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset.dst); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset.value); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset.sizeBytes); + oss << ")"; + break; + case HIP_API_ID_hipMemset2D: + oss << "hipMemset2D("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2D.dst); + oss << ", pitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2D.pitch); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2D.value); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2D.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2D.height); + oss << ")"; + break; + case HIP_API_ID_hipMemset2DAsync: + oss << "hipMemset2DAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2DAsync.dst); + oss << ", pitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2DAsync.pitch); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2DAsync.value); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2DAsync.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2DAsync.height); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2DAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemset3D: + oss << "hipMemset3D("; + oss << "pitchedDevPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset3D.pitchedDevPtr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset3D.value); + oss << ", extent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset3D.extent); + oss << ")"; + break; + case HIP_API_ID_hipMemset3DAsync: + oss << "hipMemset3DAsync("; + oss << "pitchedDevPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset3DAsync.pitchedDevPtr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset3DAsync.value); + oss << ", extent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset3DAsync.extent); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset3DAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemsetAsync: + oss << "hipMemsetAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetAsync.dst); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetAsync.value); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetAsync.sizeBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD16: + oss << "hipMemsetD16("; + oss << "dest="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD16.dest); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD16.value); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD16.count); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD16Async: + oss << "hipMemsetD16Async("; + oss << "dest="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD16Async.dest); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD16Async.value); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD16Async.count); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD16Async.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD2D16: + oss << "hipMemsetD2D16("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D16.dst); + oss << ", dstPitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D16.dstPitch); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D16.value); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D16.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D16.height); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD2D16Async: + oss << "hipMemsetD2D16Async("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D16Async.dst); + oss << ", dstPitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D16Async.dstPitch); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D16Async.value); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D16Async.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D16Async.height); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D16Async.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD2D32: + oss << "hipMemsetD2D32("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D32.dst); + oss << ", dstPitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D32.dstPitch); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D32.value); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D32.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D32.height); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD2D32Async: + oss << "hipMemsetD2D32Async("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D32Async.dst); + oss << ", dstPitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D32Async.dstPitch); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D32Async.value); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D32Async.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D32Async.height); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D32Async.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD2D8: + oss << "hipMemsetD2D8("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D8.dst); + oss << ", dstPitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D8.dstPitch); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D8.value); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D8.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D8.height); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD2D8Async: + oss << "hipMemsetD2D8Async("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D8Async.dst); + oss << ", dstPitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D8Async.dstPitch); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D8Async.value); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D8Async.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D8Async.height); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD2D8Async.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD32: + oss << "hipMemsetD32("; + oss << "dest="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD32.dest); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD32.value); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD32.count); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD32Async: + oss << "hipMemsetD32Async("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD32Async.dst); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD32Async.value); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD32Async.count); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD32Async.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD8: + oss << "hipMemsetD8("; + oss << "dest="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD8.dest); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD8.value); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD8.count); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD8Async: + oss << "hipMemsetD8Async("; + oss << "dest="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD8Async.dest); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD8Async.value); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD8Async.count); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD8Async.stream); + oss << ")"; + break; + case HIP_API_ID_hipMipmappedArrayCreate: + oss << "hipMipmappedArrayCreate("; + if (data->args.hipMipmappedArrayCreate.pHandle == NULL) oss << "pHandle=NULL"; + else { oss << "pHandle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMipmappedArrayCreate.pHandle__val); } + if (data->args.hipMipmappedArrayCreate.pMipmappedArrayDesc == NULL) oss << ", pMipmappedArrayDesc=NULL"; + else { oss << ", pMipmappedArrayDesc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMipmappedArrayCreate.pMipmappedArrayDesc__val); } + oss << ", numMipmapLevels="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMipmappedArrayCreate.numMipmapLevels); + oss << ")"; + break; + case HIP_API_ID_hipMipmappedArrayDestroy: + oss << "hipMipmappedArrayDestroy("; + oss << "hMipmappedArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMipmappedArrayDestroy.hMipmappedArray); + oss << ")"; + break; + case HIP_API_ID_hipMipmappedArrayGetLevel: + oss << "hipMipmappedArrayGetLevel("; + if (data->args.hipMipmappedArrayGetLevel.pLevelArray == NULL) oss << "pLevelArray=NULL"; + else { oss << "pLevelArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMipmappedArrayGetLevel.pLevelArray__val); } + oss << ", hMipMappedArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMipmappedArrayGetLevel.hMipMappedArray); + oss << ", level="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMipmappedArrayGetLevel.level); + oss << ")"; + break; + case HIP_API_ID_hipModuleGetFunction: + oss << "hipModuleGetFunction("; + if (data->args.hipModuleGetFunction.function == NULL) oss << "function=NULL"; + else { oss << "function="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetFunction.function__val); } + oss << ", module="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetFunction.module); + if (data->args.hipModuleGetFunction.kname == NULL) oss << ", kname=NULL"; + else { oss << ", kname="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetFunction.kname__val); } + oss << ")"; + break; + case HIP_API_ID_hipModuleGetFunctionCount: + oss << "hipModuleGetFunctionCount("; + if (data->args.hipModuleGetFunctionCount.count == NULL) oss << "count=NULL"; + else { oss << "count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetFunctionCount.count__val); } + oss << ", mod="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetFunctionCount.mod); + oss << ")"; + break; + case HIP_API_ID_hipModuleGetGlobal: + oss << "hipModuleGetGlobal("; + if (data->args.hipModuleGetGlobal.dptr == NULL) oss << "dptr=NULL"; + else { oss << "dptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetGlobal.dptr__val); } + if (data->args.hipModuleGetGlobal.bytes == NULL) oss << ", bytes=NULL"; + else { oss << ", bytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetGlobal.bytes__val); } + oss << ", hmod="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetGlobal.hmod); + if (data->args.hipModuleGetGlobal.name == NULL) oss << ", name=NULL"; + else { oss << ", name="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetGlobal.name__val); } + oss << ")"; + break; + case HIP_API_ID_hipModuleGetTexRef: + oss << "hipModuleGetTexRef("; + if (data->args.hipModuleGetTexRef.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, (void*)data->args.hipModuleGetTexRef.texRef__val); } + oss << ", hmod="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetTexRef.hmod); + if (data->args.hipModuleGetTexRef.name == NULL) oss << ", name=NULL"; + else { oss << ", name="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetTexRef.name__val); } + oss << ")"; + break; + case HIP_API_ID_hipModuleLaunchCooperativeKernel: + oss << "hipModuleLaunchCooperativeKernel("; + oss << "f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.f); + oss << ", gridDimX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.gridDimX); + oss << ", gridDimY="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.gridDimY); + oss << ", gridDimZ="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.gridDimZ); + oss << ", blockDimX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.blockDimX); + oss << ", blockDimY="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.blockDimY); + oss << ", blockDimZ="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.blockDimZ); + oss << ", sharedMemBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.sharedMemBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.stream); + if (data->args.hipModuleLaunchCooperativeKernel.kernelParams == NULL) oss << ", kernelParams=NULL"; + else { oss << ", kernelParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.kernelParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipModuleLaunchCooperativeKernelMultiDevice: + oss << "hipModuleLaunchCooperativeKernelMultiDevice("; + if (data->args.hipModuleLaunchCooperativeKernelMultiDevice.launchParamsList == NULL) oss << "launchParamsList=NULL"; + else { oss << "launchParamsList="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernelMultiDevice.launchParamsList__val); } + oss << ", numDevices="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernelMultiDevice.numDevices); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernelMultiDevice.flags); + oss << ")"; + break; + case HIP_API_ID_hipModuleLaunchKernel: + oss << "hipModuleLaunchKernel("; + oss << "f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.f); + oss << ", gridDimX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.gridDimX); + oss << ", gridDimY="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.gridDimY); + oss << ", gridDimZ="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.gridDimZ); + oss << ", blockDimX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.blockDimX); + oss << ", blockDimY="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.blockDimY); + oss << ", blockDimZ="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.blockDimZ); + oss << ", sharedMemBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.sharedMemBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.stream); + if (data->args.hipModuleLaunchKernel.kernelParams == NULL) oss << ", kernelParams=NULL"; + else { oss << ", kernelParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.kernelParams__val); } + if (data->args.hipModuleLaunchKernel.extra == NULL) oss << ", extra=NULL"; + else { oss << ", extra="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.extra__val); } + oss << ")"; + break; + case HIP_API_ID_hipModuleLoad: + oss << "hipModuleLoad("; + if (data->args.hipModuleLoad.module == NULL) oss << "module=NULL"; + else { oss << "module="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoad.module__val); } + if (data->args.hipModuleLoad.fname == NULL) oss << ", fname=NULL"; + else { oss << ", fname="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoad.fname__val); } + oss << ")"; + break; + case HIP_API_ID_hipModuleLoadData: + oss << "hipModuleLoadData("; + if (data->args.hipModuleLoadData.module == NULL) oss << "module=NULL"; + else { oss << "module="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoadData.module__val); } + oss << ", image="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoadData.image); + oss << ")"; + break; + case HIP_API_ID_hipModuleLoadDataEx: + oss << "hipModuleLoadDataEx("; + if (data->args.hipModuleLoadDataEx.module == NULL) oss << "module=NULL"; + else { oss << "module="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoadDataEx.module__val); } + oss << ", image="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoadDataEx.image); + oss << ", numOptions="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoadDataEx.numOptions); + if (data->args.hipModuleLoadDataEx.options == NULL) oss << ", options=NULL"; + else { oss << ", options="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoadDataEx.options__val); } + if (data->args.hipModuleLoadDataEx.optionsValues == NULL) oss << ", optionsValues=NULL"; + else { oss << ", optionsValues="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoadDataEx.optionsValues__val); } + oss << ")"; + break; + case HIP_API_ID_hipModuleLoadFatBinary: + oss << "hipModuleLoadFatBinary("; + if (data->args.hipModuleLoadFatBinary.module == NULL) oss << "module=NULL"; + else { oss << "module="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoadFatBinary.module__val); } + oss << ", fatbin="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoadFatBinary.fatbin); + oss << ")"; + break; + case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor: + oss << "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor("; + if (data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks == NULL) oss << "numBlocks=NULL"; + else { oss << "numBlocks="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks__val); } + oss << ", f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.f); + oss << ", blockSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.blockSize); + oss << ", dynSharedMemPerBlk="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.dynSharedMemPerBlk); + oss << ")"; + break; + case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags: + oss << "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags("; + if (data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks == NULL) oss << "numBlocks=NULL"; + else { oss << "numBlocks="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks__val); } + oss << ", f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.f); + oss << ", blockSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.blockSize); + oss << ", dynSharedMemPerBlk="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.dynSharedMemPerBlk); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize: + oss << "hipModuleOccupancyMaxPotentialBlockSize("; + if (data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize == NULL) oss << "gridSize=NULL"; + else { oss << "gridSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize__val); } + if (data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize == NULL) oss << ", blockSize=NULL"; + else { oss << ", blockSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize__val); } + oss << ", f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSize.f); + oss << ", dynSharedMemPerBlk="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSize.dynSharedMemPerBlk); + oss << ", blockSizeLimit="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSizeLimit); + oss << ")"; + break; + case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags: + oss << "hipModuleOccupancyMaxPotentialBlockSizeWithFlags("; + if (data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize == NULL) oss << "gridSize=NULL"; + else { oss << "gridSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize__val); } + if (data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize == NULL) oss << ", blockSize=NULL"; + else { oss << ", blockSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize__val); } + oss << ", f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.f); + oss << ", dynSharedMemPerBlk="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.dynSharedMemPerBlk); + oss << ", blockSizeLimit="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSizeLimit); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipModuleUnload: + oss << "hipModuleUnload("; + oss << "module="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleUnload.module); + oss << ")"; + break; + case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor: + oss << "hipOccupancyMaxActiveBlocksPerMultiprocessor("; + if (data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks == NULL) oss << "numBlocks=NULL"; + else { oss << "numBlocks="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks__val); } + oss << ", f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.f); + oss << ", blockSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.blockSize); + oss << ", dynamicSMemSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.dynamicSMemSize); + oss << ")"; + break; + case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags: + oss << "hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags("; + if (data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks == NULL) oss << "numBlocks=NULL"; + else { oss << "numBlocks="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks__val); } + oss << ", f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.f); + oss << ", blockSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.blockSize); + oss << ", dynamicSMemSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.dynamicSMemSize); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipOccupancyMaxPotentialBlockSize: + oss << "hipOccupancyMaxPotentialBlockSize("; + if (data->args.hipOccupancyMaxPotentialBlockSize.gridSize == NULL) oss << "gridSize=NULL"; + else { oss << "gridSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxPotentialBlockSize.gridSize__val); } + if (data->args.hipOccupancyMaxPotentialBlockSize.blockSize == NULL) oss << ", blockSize=NULL"; + else { oss << ", blockSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxPotentialBlockSize.blockSize__val); } + oss << ", f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxPotentialBlockSize.f); + oss << ", dynSharedMemPerBlk="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxPotentialBlockSize.dynSharedMemPerBlk); + oss << ", blockSizeLimit="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxPotentialBlockSize.blockSizeLimit); + oss << ")"; + break; + case HIP_API_ID_hipPeekAtLastError: + oss << "hipPeekAtLastError("; + oss << ")"; + break; + case HIP_API_ID_hipPointerGetAttribute: + oss << "hipPointerGetAttribute("; + oss << "data="; roctracer::hip_support::detail::operator<<(oss, data->args.hipPointerGetAttribute.data); + oss << ", attribute="; roctracer::hip_support::detail::operator<<(oss, data->args.hipPointerGetAttribute.attribute); + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipPointerGetAttribute.ptr); + oss << ")"; + break; + case HIP_API_ID_hipPointerGetAttributes: + oss << "hipPointerGetAttributes("; + if (data->args.hipPointerGetAttributes.attributes == NULL) oss << "attributes=NULL"; + else { oss << "attributes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipPointerGetAttributes.attributes__val); } + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipPointerGetAttributes.ptr); + oss << ")"; + break; + case HIP_API_ID_hipPointerSetAttribute: + oss << "hipPointerSetAttribute("; + oss << "value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipPointerSetAttribute.value); + oss << ", attribute="; roctracer::hip_support::detail::operator<<(oss, data->args.hipPointerSetAttribute.attribute); + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipPointerSetAttribute.ptr); + oss << ")"; + break; + case HIP_API_ID_hipProfilerStart: + oss << "hipProfilerStart("; + oss << ")"; + break; + case HIP_API_ID_hipProfilerStop: + oss << "hipProfilerStop("; + oss << ")"; + break; + case HIP_API_ID_hipRuntimeGetVersion: + oss << "hipRuntimeGetVersion("; + if (data->args.hipRuntimeGetVersion.runtimeVersion == NULL) oss << "runtimeVersion=NULL"; + else { oss << "runtimeVersion="; roctracer::hip_support::detail::operator<<(oss, data->args.hipRuntimeGetVersion.runtimeVersion__val); } + oss << ")"; + break; + case HIP_API_ID_hipSetDevice: + oss << "hipSetDevice("; + oss << "deviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSetDevice.deviceId); + oss << ")"; + break; + case HIP_API_ID_hipSetDeviceFlags: + oss << "hipSetDeviceFlags("; + oss << "flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSetDeviceFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipSetValidDevices: + oss << "hipSetValidDevices("; + if (data->args.hipSetValidDevices.device_arr == NULL) oss << "device_arr=NULL"; + else { oss << "device_arr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSetValidDevices.device_arr__val); } + oss << ", len="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSetValidDevices.len); + oss << ")"; + break; + case HIP_API_ID_hipSetupArgument: + oss << "hipSetupArgument("; + oss << "arg="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSetupArgument.arg); + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSetupArgument.size); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSetupArgument.offset); + oss << ")"; + break; + case HIP_API_ID_hipSignalExternalSemaphoresAsync: + oss << "hipSignalExternalSemaphoresAsync("; + if (data->args.hipSignalExternalSemaphoresAsync.extSemArray == NULL) oss << "extSemArray=NULL"; + else { oss << "extSemArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSignalExternalSemaphoresAsync.extSemArray__val); } + if (data->args.hipSignalExternalSemaphoresAsync.paramsArray == NULL) oss << ", paramsArray=NULL"; + else { oss << ", paramsArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSignalExternalSemaphoresAsync.paramsArray__val); } + oss << ", numExtSems="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSignalExternalSemaphoresAsync.numExtSems); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSignalExternalSemaphoresAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipStreamAddCallback: + oss << "hipStreamAddCallback("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamAddCallback.stream); + oss << ", callback="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamAddCallback.callback); + oss << ", userData="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamAddCallback.userData); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamAddCallback.flags); + oss << ")"; + break; + case HIP_API_ID_hipStreamAttachMemAsync: + oss << "hipStreamAttachMemAsync("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamAttachMemAsync.stream); + oss << ", dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamAttachMemAsync.dev_ptr); + oss << ", length="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamAttachMemAsync.length); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamAttachMemAsync.flags); + oss << ")"; + break; + case HIP_API_ID_hipStreamBatchMemOp: + oss << "hipStreamBatchMemOp("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.stream); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.count); + if (data->args.hipStreamBatchMemOp.paramArray == NULL) oss << ", paramArray=NULL"; + else { oss << ", paramArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.paramArray__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.flags); + oss << ")"; + break; + case HIP_API_ID_hipStreamBeginCapture: + oss << "hipStreamBeginCapture("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBeginCapture.stream); + oss << ", mode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBeginCapture.mode); + oss << ")"; + break; + case HIP_API_ID_hipStreamBeginCaptureToGraph: + oss << "hipStreamBeginCaptureToGraph("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBeginCaptureToGraph.stream); + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBeginCaptureToGraph.graph); + if (data->args.hipStreamBeginCaptureToGraph.dependencies == NULL) oss << ", dependencies=NULL"; + else { oss << ", dependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBeginCaptureToGraph.dependencies__val); } + if (data->args.hipStreamBeginCaptureToGraph.dependencyData == NULL) oss << ", dependencyData=NULL"; + else { oss << ", dependencyData="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBeginCaptureToGraph.dependencyData__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBeginCaptureToGraph.numDependencies); + oss << ", mode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBeginCaptureToGraph.mode); + oss << ")"; + break; + case HIP_API_ID_hipStreamCreate: + oss << "hipStreamCreate("; + if (data->args.hipStreamCreate.stream == NULL) oss << "stream=NULL"; + else { oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamCreate.stream__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamCreateWithFlags: + oss << "hipStreamCreateWithFlags("; + if (data->args.hipStreamCreateWithFlags.stream == NULL) oss << "stream=NULL"; + else { oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamCreateWithFlags.stream__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamCreateWithFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipStreamCreateWithPriority: + oss << "hipStreamCreateWithPriority("; + if (data->args.hipStreamCreateWithPriority.stream == NULL) oss << "stream=NULL"; + else { oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamCreateWithPriority.stream__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamCreateWithPriority.flags); + oss << ", priority="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamCreateWithPriority.priority); + oss << ")"; + break; + case HIP_API_ID_hipStreamDestroy: + oss << "hipStreamDestroy("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamDestroy.stream); + oss << ")"; + break; + case HIP_API_ID_hipStreamEndCapture: + oss << "hipStreamEndCapture("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamEndCapture.stream); + if (data->args.hipStreamEndCapture.pGraph == NULL) oss << ", pGraph=NULL"; + else { oss << ", pGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamEndCapture.pGraph__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamGetAttribute: + oss << "hipStreamGetAttribute("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetAttribute.stream); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetAttribute.attr); + if (data->args.hipStreamGetAttribute.value_out == NULL) oss << ", value_out=NULL"; + else { oss << ", value_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetAttribute.value_out__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamGetCaptureInfo: + oss << "hipStreamGetCaptureInfo("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo.stream); + if (data->args.hipStreamGetCaptureInfo.pCaptureStatus == NULL) oss << ", pCaptureStatus=NULL"; + else { oss << ", pCaptureStatus="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo.pCaptureStatus__val); } + if (data->args.hipStreamGetCaptureInfo.pId == NULL) oss << ", pId=NULL"; + else { oss << ", pId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo.pId__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamGetCaptureInfo_v2: + oss << "hipStreamGetCaptureInfo_v2("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo_v2.stream); + if (data->args.hipStreamGetCaptureInfo_v2.captureStatus_out == NULL) oss << ", captureStatus_out=NULL"; + else { oss << ", captureStatus_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo_v2.captureStatus_out__val); } + if (data->args.hipStreamGetCaptureInfo_v2.id_out == NULL) oss << ", id_out=NULL"; + else { oss << ", id_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo_v2.id_out__val); } + if (data->args.hipStreamGetCaptureInfo_v2.graph_out == NULL) oss << ", graph_out=NULL"; + else { oss << ", graph_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo_v2.graph_out__val); } + if (data->args.hipStreamGetCaptureInfo_v2.dependencies_out == NULL) oss << ", dependencies_out=NULL"; + else { oss << ", dependencies_out="; roctracer::hip_support::detail::operator<<(oss, (void*)data->args.hipStreamGetCaptureInfo_v2.dependencies_out__val); } + if (data->args.hipStreamGetCaptureInfo_v2.numDependencies_out == NULL) oss << ", numDependencies_out=NULL"; + else { oss << ", numDependencies_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo_v2.numDependencies_out__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamGetDevice: + oss << "hipStreamGetDevice("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetDevice.stream); + if (data->args.hipStreamGetDevice.device == NULL) oss << ", device=NULL"; + else { oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetDevice.device__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamGetFlags: + oss << "hipStreamGetFlags("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetFlags.stream); + if (data->args.hipStreamGetFlags.flags == NULL) oss << ", flags=NULL"; + else { oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetFlags.flags__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamGetId: + oss << "hipStreamGetId("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetId.stream); + if (data->args.hipStreamGetId.streamId == NULL) oss << ", streamId=NULL"; + else { oss << ", streamId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetId.streamId__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamGetPriority: + oss << "hipStreamGetPriority("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetPriority.stream); + if (data->args.hipStreamGetPriority.priority == NULL) oss << ", priority=NULL"; + else { oss << ", priority="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetPriority.priority__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamIsCapturing: + oss << "hipStreamIsCapturing("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamIsCapturing.stream); + if (data->args.hipStreamIsCapturing.pCaptureStatus == NULL) oss << ", pCaptureStatus=NULL"; + else { oss << ", pCaptureStatus="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamIsCapturing.pCaptureStatus__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamQuery: + oss << "hipStreamQuery("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamQuery.stream); + oss << ")"; + break; + case HIP_API_ID_hipStreamSetAttribute: + oss << "hipStreamSetAttribute("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSetAttribute.stream); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSetAttribute.attr); + if (data->args.hipStreamSetAttribute.value == NULL) oss << ", value=NULL"; + else { oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSetAttribute.value__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamSynchronize: + oss << "hipStreamSynchronize("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSynchronize.stream); + oss << ")"; + break; + case HIP_API_ID_hipStreamUpdateCaptureDependencies: + oss << "hipStreamUpdateCaptureDependencies("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamUpdateCaptureDependencies.stream); + if (data->args.hipStreamUpdateCaptureDependencies.dependencies == NULL) oss << ", dependencies=NULL"; + else { oss << ", dependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamUpdateCaptureDependencies.dependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamUpdateCaptureDependencies.numDependencies); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamUpdateCaptureDependencies.flags); + oss << ")"; + break; + case HIP_API_ID_hipStreamWaitEvent: + oss << "hipStreamWaitEvent("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitEvent.stream); + oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitEvent.event); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitEvent.flags); + oss << ")"; + break; + case HIP_API_ID_hipStreamWaitValue32: + oss << "hipStreamWaitValue32("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue32.stream); + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue32.ptr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue32.value); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue32.flags); + oss << ", mask="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue32.mask); + oss << ")"; + break; + case HIP_API_ID_hipStreamWaitValue64: + oss << "hipStreamWaitValue64("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue64.stream); + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue64.ptr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue64.value); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue64.flags); + oss << ", mask="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue64.mask); + oss << ")"; + break; + case HIP_API_ID_hipStreamWriteValue32: + oss << "hipStreamWriteValue32("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue32.stream); + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue32.ptr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue32.value); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue32.flags); + oss << ")"; + break; + case HIP_API_ID_hipStreamWriteValue64: + oss << "hipStreamWriteValue64("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue64.stream); + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue64.ptr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue64.value); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue64.flags); + oss << ")"; + break; + case HIP_API_ID_hipTexRefGetAddress: + oss << "hipTexRefGetAddress("; + if (data->args.hipTexRefGetAddress.dev_ptr == NULL) oss << "dev_ptr=NULL"; + else { oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetAddress.dev_ptr__val); } + if (data->args.hipTexRefGetAddress.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetAddress.texRef__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefGetArray: + oss << "hipTexRefGetArray("; + if (data->args.hipTexRefGetArray.pArray == NULL) oss << "pArray=NULL"; + else { oss << "pArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetArray.pArray__val); } + if (data->args.hipTexRefGetArray.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetArray.texRef__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefGetBorderColor: + oss << "hipTexRefGetBorderColor("; + if (data->args.hipTexRefGetBorderColor.pBorderColor == NULL) oss << "pBorderColor=NULL"; + else { oss << "pBorderColor="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetBorderColor.pBorderColor__val); } + if (data->args.hipTexRefGetBorderColor.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetBorderColor.texRef__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefGetFlags: + oss << "hipTexRefGetFlags("; + if (data->args.hipTexRefGetFlags.pFlags == NULL) oss << "pFlags=NULL"; + else { oss << "pFlags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetFlags.pFlags__val); } + if (data->args.hipTexRefGetFlags.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetFlags.texRef__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefGetFormat: + oss << "hipTexRefGetFormat("; + if (data->args.hipTexRefGetFormat.pFormat == NULL) oss << "pFormat=NULL"; + else { oss << "pFormat="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetFormat.pFormat__val); } + if (data->args.hipTexRefGetFormat.pNumChannels == NULL) oss << ", pNumChannels=NULL"; + else { oss << ", pNumChannels="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetFormat.pNumChannels__val); } + if (data->args.hipTexRefGetFormat.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetFormat.texRef__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefGetMaxAnisotropy: + oss << "hipTexRefGetMaxAnisotropy("; + if (data->args.hipTexRefGetMaxAnisotropy.pmaxAnsio == NULL) oss << "pmaxAnsio=NULL"; + else { oss << "pmaxAnsio="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMaxAnisotropy.pmaxAnsio__val); } + if (data->args.hipTexRefGetMaxAnisotropy.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMaxAnisotropy.texRef__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefGetMipMappedArray: + oss << "hipTexRefGetMipMappedArray("; + if (data->args.hipTexRefGetMipMappedArray.pArray == NULL) oss << "pArray=NULL"; + else { oss << "pArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMipMappedArray.pArray__val); } + if (data->args.hipTexRefGetMipMappedArray.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMipMappedArray.texRef__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefGetMipmapLevelBias: + oss << "hipTexRefGetMipmapLevelBias("; + if (data->args.hipTexRefGetMipmapLevelBias.pbias == NULL) oss << "pbias=NULL"; + else { oss << "pbias="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMipmapLevelBias.pbias__val); } + if (data->args.hipTexRefGetMipmapLevelBias.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMipmapLevelBias.texRef__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefGetMipmapLevelClamp: + oss << "hipTexRefGetMipmapLevelClamp("; + if (data->args.hipTexRefGetMipmapLevelClamp.pminMipmapLevelClamp == NULL) oss << "pminMipmapLevelClamp=NULL"; + else { oss << "pminMipmapLevelClamp="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMipmapLevelClamp.pminMipmapLevelClamp__val); } + if (data->args.hipTexRefGetMipmapLevelClamp.pmaxMipmapLevelClamp == NULL) oss << ", pmaxMipmapLevelClamp=NULL"; + else { oss << ", pmaxMipmapLevelClamp="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMipmapLevelClamp.pmaxMipmapLevelClamp__val); } + if (data->args.hipTexRefGetMipmapLevelClamp.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMipmapLevelClamp.texRef__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetAddress: + oss << "hipTexRefSetAddress("; + if (data->args.hipTexRefSetAddress.ByteOffset == NULL) oss << "ByteOffset=NULL"; + else { oss << "ByteOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetAddress.ByteOffset__val); } + if (data->args.hipTexRefSetAddress.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetAddress.texRef__val); } + oss << ", dptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetAddress.dptr); + oss << ", bytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetAddress.bytes); + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetAddress2D: + oss << "hipTexRefSetAddress2D("; + if (data->args.hipTexRefSetAddress2D.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetAddress2D.texRef__val); } + if (data->args.hipTexRefSetAddress2D.desc == NULL) oss << ", desc=NULL"; + else { oss << ", desc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetAddress2D.desc__val); } + oss << ", dptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetAddress2D.dptr); + oss << ", Pitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetAddress2D.Pitch); + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetArray: + oss << "hipTexRefSetArray("; + if (data->args.hipTexRefSetArray.tex == NULL) oss << "tex=NULL"; + else { oss << "tex="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetArray.tex__val); } + oss << ", array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetArray.array); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetArray.flags); + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetBorderColor: + oss << "hipTexRefSetBorderColor("; + if (data->args.hipTexRefSetBorderColor.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetBorderColor.texRef__val); } + if (data->args.hipTexRefSetBorderColor.pBorderColor == NULL) oss << ", pBorderColor=NULL"; + else { oss << ", pBorderColor="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetBorderColor.pBorderColor__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetFlags: + oss << "hipTexRefSetFlags("; + if (data->args.hipTexRefSetFlags.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetFlags.texRef__val); } + oss << ", Flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetFlags.Flags); + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetFormat: + oss << "hipTexRefSetFormat("; + if (data->args.hipTexRefSetFormat.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetFormat.texRef__val); } + oss << ", fmt="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetFormat.fmt); + oss << ", NumPackedComponents="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetFormat.NumPackedComponents); + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetMaxAnisotropy: + oss << "hipTexRefSetMaxAnisotropy("; + if (data->args.hipTexRefSetMaxAnisotropy.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMaxAnisotropy.texRef__val); } + oss << ", maxAniso="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMaxAnisotropy.maxAniso); + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetMipmapLevelBias: + oss << "hipTexRefSetMipmapLevelBias("; + if (data->args.hipTexRefSetMipmapLevelBias.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMipmapLevelBias.texRef__val); } + oss << ", bias="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMipmapLevelBias.bias); + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetMipmapLevelClamp: + oss << "hipTexRefSetMipmapLevelClamp("; + if (data->args.hipTexRefSetMipmapLevelClamp.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMipmapLevelClamp.texRef__val); } + oss << ", minMipMapLevelClamp="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMipmapLevelClamp.minMipMapLevelClamp); + oss << ", maxMipMapLevelClamp="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMipmapLevelClamp.maxMipMapLevelClamp); + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetMipmappedArray: + oss << "hipTexRefSetMipmappedArray("; + if (data->args.hipTexRefSetMipmappedArray.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMipmappedArray.texRef__val); } + if (data->args.hipTexRefSetMipmappedArray.mipmappedArray == NULL) oss << ", mipmappedArray=NULL"; + else { oss << ", mipmappedArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMipmappedArray.mipmappedArray__val); } + oss << ", Flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMipmappedArray.Flags); + oss << ")"; + break; + case HIP_API_ID_hipThreadExchangeStreamCaptureMode: + oss << "hipThreadExchangeStreamCaptureMode("; + if (data->args.hipThreadExchangeStreamCaptureMode.mode == NULL) oss << "mode=NULL"; + else { oss << "mode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipThreadExchangeStreamCaptureMode.mode__val); } + oss << ")"; + break; + case HIP_API_ID_hipUserObjectCreate: + oss << "hipUserObjectCreate("; + if (data->args.hipUserObjectCreate.object_out == NULL) oss << "object_out=NULL"; + else { oss << "object_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectCreate.object_out__val); } + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectCreate.ptr); + oss << ", destroy="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectCreate.destroy); + oss << ", initialRefcount="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectCreate.initialRefcount); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectCreate.flags); + oss << ")"; + break; + case HIP_API_ID_hipUserObjectRelease: + oss << "hipUserObjectRelease("; + oss << "object="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectRelease.object); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectRelease.count); + oss << ")"; + break; + case HIP_API_ID_hipUserObjectRetain: + oss << "hipUserObjectRetain("; + oss << "object="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectRetain.object); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectRetain.count); + oss << ")"; + break; + case HIP_API_ID_hipWaitExternalSemaphoresAsync: + oss << "hipWaitExternalSemaphoresAsync("; + if (data->args.hipWaitExternalSemaphoresAsync.extSemArray == NULL) oss << "extSemArray=NULL"; + else { oss << "extSemArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipWaitExternalSemaphoresAsync.extSemArray__val); } + if (data->args.hipWaitExternalSemaphoresAsync.paramsArray == NULL) oss << ", paramsArray=NULL"; + else { oss << ", paramsArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipWaitExternalSemaphoresAsync.paramsArray__val); } + oss << ", numExtSems="; roctracer::hip_support::detail::operator<<(oss, data->args.hipWaitExternalSemaphoresAsync.numExtSems); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipWaitExternalSemaphoresAsync.stream); + oss << ")"; + break; + default: oss << "unknown"; + }; + return strdup(oss.str().c_str()); +} +#endif // HIP_PROF_HIP_API_STRING +#endif // _HIP_PROF_STR_H diff --git a/3rdparty/hip-headers/include/hip/amd_detail/hip_runtime_prof.h b/3rdparty/hip-headers/include/hip/amd_detail/hip_runtime_prof.h new file mode 100644 index 0000000000..d53eeffca6 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/hip_runtime_prof.h @@ -0,0 +1,77 @@ +/* +Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_RUNTIME_PROF_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_RUNTIME_PROF_H + +// HIP ROCclr Op IDs enumeration +enum HipVdiOpId { + kHipVdiOpIdDispatch = 0, + kHipVdiOpIdCopy = 1, + kHipVdiOpIdBarrier = 2, + kHipVdiOpIdNumber = 3 +}; + +// Types of ROCclr commands +enum HipVdiCommandKind { + kHipVdiCommandKernel = 0x11F0, + kHipVdiCommandTask = 0x11F1, + kHipVdiMemcpyDeviceToHost = 0x11F3, + kHipHipVdiMemcpyHostToDevice = 0x11F4, + kHipVdiMemcpyDeviceToDevice = 0x11F5, + kHipVidMemcpyDeviceToHostRect = 0x1201, + kHipVdiMemcpyHostToDeviceRect = 0x1202, + kHipVdiMemcpyDeviceToDeviceRect = 0x1203, + kHipVdiFillMemory = 0x1207, +}; + +/** + * @brief Initializes activity callback + * + * @param [input] id_callback Event ID callback function + * @param [input] op_callback Event operation callback function + * @param [input] arg Arguments passed into callback + * + * @returns None + */ +void hipInitActivityCallback(void* id_callback, void* op_callback, void* arg); + +/** + * @brief Enables activity callback + * + * @param [input] op Operation, which will trigger a callback (@see HipVdiOpId) + * @param [input] enable Enable state for the callback + * + * @returns True if successful + */ +bool hipEnableActivityCallback(uint32_t op, bool enable); + +/** + * @brief Returns the description string for the operation kind + * + * @param [input] id Command kind id (@see HipVdiCommandKind) + * + * @returns A pointer to a const string with the command description + */ +const char* hipGetCmdName(uint32_t id); + +#endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_RUNTIME_PROF_H diff --git a/3rdparty/hip-headers/include/hip/amd_detail/host_defines.h b/3rdparty/hip-headers/include/hip/amd_detail/host_defines.h new file mode 100644 index 0000000000..8081966cf7 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/host_defines.h @@ -0,0 +1,262 @@ +/* +Copyright (c) 2015 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file amd_detail/host_defines.h + * @brief TODO-doc + */ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HOST_DEFINES_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_HOST_DEFINES_H + +// Add guard to Generic Grid Launch method +#ifndef GENERIC_GRID_LAUNCH +#define GENERIC_GRID_LAUNCH 1 +#endif + +#if defined(__cplusplus) +namespace __hip_internal { +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; +typedef signed long long int64_t; +#if defined(_MSC_VER) +typedef unsigned long long size_t; +#else +typedef unsigned long size_t; +#endif + +template struct integral_constant { + static constexpr const _Tp value = __v; + typedef _Tp value_type; + typedef integral_constant type; + constexpr operator value_type() const { return value; } + constexpr value_type operator()() const { return value; } +}; +template constexpr const _Tp integral_constant<_Tp, __v>::value; + +typedef integral_constant true_type; +typedef integral_constant false_type; + +template using bool_constant = integral_constant; +typedef bool_constant true_type; +typedef bool_constant false_type; + +template struct enable_if {}; +template struct enable_if { + typedef __T type; +}; + +template struct true_or_false_type : public false_type {}; +template <> struct true_or_false_type : public true_type {}; + +template struct is_integral : public false_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; + +template struct is_arithmetic : public false_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; + +template struct is_floating_point : public false_type {}; +template <> struct is_floating_point : public true_type {}; +template <> struct is_floating_point : public true_type {}; +template <> struct is_floating_point : public true_type {}; + +template struct is_same : public false_type {}; +template struct is_same<__T, __T> : public true_type {}; + +template ::value> struct is_signed : public false_type {}; +template struct is_signed<_Tp, true> : public true_or_false_type<_Tp(-1) < _Tp(0)> { +}; + +template auto test_returnable(int) + -> decltype(void(static_cast(nullptr)), true_type{}); +template auto test_returnable(...) -> false_type; + +template struct type_identity { + using type = T; +}; + +template // Note that `cv void&` is a substitution failure +auto try_add_lvalue_reference(int) -> type_identity; +template // Handle T = cv void case +auto try_add_lvalue_reference(...) -> type_identity; + +template auto try_add_rvalue_reference(int) -> type_identity; +template auto try_add_rvalue_reference(...) -> type_identity; + +template struct add_lvalue_reference : decltype(try_add_lvalue_reference(0)) {}; + +template struct add_rvalue_reference : decltype(try_add_rvalue_reference(0)) {}; + +template typename add_rvalue_reference::type declval() noexcept; + +template auto test_implicitly_convertible(int) + -> decltype(void(declval()(declval())), true_type{}); + +template auto test_implicitly_convertible(...) -> false_type; + +template struct remove_cv { + typedef T type; +}; +template struct remove_cv { + typedef T type; +}; +template struct remove_cv { + typedef T type; +}; +template struct remove_cv { + typedef T type; +}; + +template struct is_void : public is_same::type> {}; + +template struct is_convertible + : public integral_constant(0))::value && + decltype(test_implicitly_convertible(0))::value) || + (is_void::value && is_void::value)> {}; + +template struct char_traits; +template > class basic_istream; +template > class basic_ostream; +typedef basic_istream istream; +typedef basic_ostream ostream; + +template struct is_standard_layout + : public integral_constant {}; + +template struct is_trivial : public integral_constant {}; + + +template struct conditional { + using type = T; +}; +template struct conditional { + using type = F; +}; + +template struct alignment_of : integral_constant {}; + +template struct integer_sequence { + using value_type = T; + static constexpr size_t size() noexcept { return sizeof...(Ints); } +}; + +template using index_sequence = integer_sequence; + +template struct make_index_sequence_impl + : make_index_sequence_impl<_hip_N - 1, _hip_N - 1, Ints...> {}; + +template struct make_index_sequence_impl<0, Ints...> { + using type = index_sequence; +}; + +template using make_index_sequence = + typename make_index_sequence_impl<_hip_N>::type; + +template +constexpr index_sequence make_index_sequence_value(index_sequence) { + return {}; +} +} // namespace __hip_internal +typedef __hip_internal::uint8_t __hip_uint8_t; +typedef __hip_internal::uint16_t __hip_uint16_t; +typedef __hip_internal::uint32_t __hip_uint32_t; +typedef __hip_internal::uint64_t __hip_uint64_t; +typedef __hip_internal::int8_t __hip_int8_t; +typedef __hip_internal::int16_t __hip_int16_t; +typedef __hip_internal::int32_t __hip_int32_t; +typedef __hip_internal::int64_t __hip_int64_t; +#endif // defined(__cplusplus) + +#if defined(__clang__) && defined(__HIP__) +#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ +#define __host__ __attribute__((host)) +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) +#define __shared__ __attribute__((shared)) +#define __constant__ __attribute__((constant)) +#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ + +#if !defined(__has_feature) || !__has_feature(cuda_noinline_keyword) +#define __noinline__ __attribute__((noinline)) +#endif + +#define __forceinline__ inline __attribute__((always_inline)) + +#if __HIP_NO_IMAGE_SUPPORT +#define __hip_img_chk__ \ + __attribute__((unavailable("The image/texture API not supported on the device"))) +#else +#define __hip_img_chk__ +#endif + +#else + +// Non-HCC compiler +/** + * Function and kernel markers + */ +#define __host__ +#define __device__ + +#define __global__ + +#define __noinline__ +#define __forceinline__ inline + +#define __shared__ +#define __constant__ + +#define __hip_img_chk__ +#endif // defined(__clang__) && defined(__HIP__) + +#endif diff --git a/3rdparty/hip-headers/include/hip/amd_detail/math_fwd.h b/3rdparty/hip-headers/include/hip/amd_detail/math_fwd.h new file mode 100644 index 0000000000..a20594d617 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/math_fwd.h @@ -0,0 +1,289 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if !defined(__HIPCC_RTC__) +#include "host_defines.h" +#include "amd_hip_vector_types.h" // For Native_vec_ +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +// DOT FUNCTIONS +#if defined(__clang__) && defined(__HIP__) +__device__ __attribute__((const)) int __ockl_sdot2(HIP_vector_base::Native_vec_, + HIP_vector_base::Native_vec_, int, + bool); + +__device__ __attribute__((const)) unsigned int __ockl_udot2( + HIP_vector_base::Native_vec_, + HIP_vector_base::Native_vec_, unsigned int, bool); + +__device__ __attribute__((const)) int __ockl_sdot4(HIP_vector_base::Native_vec_, + HIP_vector_base::Native_vec_, int, + bool); + +__device__ __attribute__((const)) unsigned int __ockl_udot4( + HIP_vector_base::Native_vec_, HIP_vector_base::Native_vec_, + unsigned int, bool); + +__device__ __attribute__((const)) int __ockl_sdot8(int, int, int, bool); + +__device__ __attribute__((const)) unsigned int __ockl_udot8(unsigned int, unsigned int, + unsigned int, bool); +#endif + +#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ +// BEGIN FLOAT +__device__ __attribute__((const)) float __ocml_acos_f32(float); +__device__ __attribute__((pure)) float __ocml_acosh_f32(float); +__device__ __attribute__((const)) float __ocml_asin_f32(float); +__device__ __attribute__((pure)) float __ocml_asinh_f32(float); +__device__ __attribute__((const)) float __ocml_atan2_f32(float, float); +__device__ __attribute__((const)) float __ocml_atan_f32(float); +__device__ __attribute__((pure)) float __ocml_atanh_f32(float); +__device__ __attribute__((pure)) float __ocml_cbrt_f32(float); +__device__ __attribute__((const)) float __ocml_ceil_f32(float); +__device__ __attribute__((const)) __device__ float __ocml_copysign_f32(float, float); +__device__ float __ocml_cos_f32(float); +__device__ float __ocml_native_cos_f32(float); +__device__ __attribute__((pure)) __device__ float __ocml_cosh_f32(float); +__device__ float __ocml_cospi_f32(float); +__device__ float __ocml_i0_f32(float); +__device__ float __ocml_i1_f32(float); +__device__ __attribute__((pure)) float __ocml_erfc_f32(float); +__device__ __attribute__((pure)) float __ocml_erfcinv_f32(float); +__device__ __attribute__((pure)) float __ocml_erfcx_f32(float); +__device__ __attribute__((pure)) float __ocml_erf_f32(float); +__device__ __attribute__((pure)) float __ocml_erfinv_f32(float); +__device__ __attribute__((pure)) float __ocml_exp10_f32(float); +__device__ __attribute__((pure)) float __ocml_native_exp10_f32(float); +__device__ __attribute__((pure)) float __ocml_exp2_f32(float); +__device__ __attribute__((pure)) float __ocml_exp_f32(float); +__device__ __attribute__((pure)) float __ocml_native_exp_f32(float); +__device__ __attribute__((pure)) float __ocml_expm1_f32(float); +__device__ __attribute__((const)) float __ocml_fabs_f32(float); +__device__ __attribute__((const)) float __ocml_fdim_f32(float, float); +__device__ __attribute__((const)) float __ocml_floor_f32(float); +__device__ __attribute__((const)) float __ocml_fma_f32(float, float, float); +__device__ __attribute__((const)) float __ocml_fmax_f32(float, float); +__device__ __attribute__((const)) float __ocml_fmin_f32(float, float); +__device__ __attribute__((const)) __device__ float __ocml_fmod_f32(float, float); +__device__ float __ocml_frexp_f32(float, __attribute__((address_space(5))) int*); +__device__ __attribute__((const)) float __ocml_hypot_f32(float, float); +__device__ __attribute__((const)) int __ocml_ilogb_f32(float); +__device__ __attribute__((const)) int __ocml_isfinite_f32(float); +__device__ __attribute__((const)) int __ocml_isinf_f32(float); +__device__ __attribute__((const)) int __ocml_isnan_f32(float); +__device__ float __ocml_j0_f32(float); +__device__ float __ocml_j1_f32(float); +__device__ __attribute__((const)) float __ocml_ldexp_f32(float, int); +__device__ float __ocml_lgamma_f32(float); +__device__ __attribute__((pure)) float __ocml_log10_f32(float); +__device__ __attribute__((pure)) float __ocml_native_log10_f32(float); +__device__ __attribute__((pure)) float __ocml_log1p_f32(float); +__device__ __attribute__((pure)) float __ocml_log2_f32(float); +__device__ __attribute__((pure)) float __ocml_native_log2_f32(float); +__device__ __attribute__((const)) float __ocml_logb_f32(float); +__device__ __attribute__((pure)) float __ocml_log_f32(float); +__device__ __attribute__((pure)) float __ocml_native_log_f32(float); +__device__ float __ocml_modf_f32(float, __attribute__((address_space(5))) float*); +__device__ __attribute__((const)) float __ocml_nearbyint_f32(float); +__device__ __attribute__((const)) float __ocml_nextafter_f32(float, float); +__device__ __attribute__((const)) float __ocml_len3_f32(float, float, float); +__device__ __attribute__((const)) float __ocml_len4_f32(float, float, float, float); +__device__ __attribute__((pure)) float __ocml_ncdf_f32(float); +__device__ __attribute__((pure)) float __ocml_ncdfinv_f32(float); +__device__ __attribute__((pure)) float __ocml_pow_f32(float, float); +__device__ __attribute__((pure)) float __ocml_pown_f32(float, int); +__device__ __attribute__((pure)) float __ocml_rcbrt_f32(float); +__device__ __attribute__((const)) float __ocml_remainder_f32(float, float); +__device__ float __ocml_remquo_f32(float, float, __attribute__((address_space(5))) int*); +__device__ __attribute__((const)) float __ocml_rhypot_f32(float, float); +__device__ __attribute__((const)) float __ocml_rint_f32(float); +__device__ __attribute__((const)) float __ocml_rlen3_f32(float, float, float); +__device__ __attribute__((const)) float __ocml_rlen4_f32(float, float, float, float); +__device__ __attribute__((const)) float __ocml_round_f32(float); +__device__ __attribute__((pure)) float __ocml_rsqrt_f32(float); +__device__ __attribute__((const)) float __ocml_scalb_f32(float, float); +__device__ __attribute__((const)) float __ocml_scalbn_f32(float, int); +__device__ __attribute__((const)) int __ocml_signbit_f32(float); +__device__ float __ocml_sincos_f32(float, __attribute__((address_space(5))) float*); +__device__ float __ocml_sincospi_f32(float, __attribute__((address_space(5))) float*); +__device__ float __ocml_sin_f32(float); +__device__ float __ocml_native_sin_f32(float); +__device__ __attribute__((pure)) float __ocml_sinh_f32(float); +__device__ float __ocml_sinpi_f32(float); +__device__ __attribute__((const)) float __ocml_sqrt_f32(float); +__device__ __attribute__((const)) float __ocml_native_sqrt_f32(float); +__device__ float __ocml_tan_f32(float); +__device__ __attribute__((pure)) float __ocml_tanh_f32(float); +__device__ float __ocml_tgamma_f32(float); +__device__ __attribute__((const)) float __ocml_trunc_f32(float); +__device__ float __ocml_y0_f32(float); +__device__ float __ocml_y1_f32(float); + +// BEGIN INTRINSICS +__device__ __attribute__((const)) float __ocml_add_rte_f32(float, float); +__device__ __attribute__((const)) float __ocml_add_rtn_f32(float, float); +__device__ __attribute__((const)) float __ocml_add_rtp_f32(float, float); +__device__ __attribute__((const)) float __ocml_add_rtz_f32(float, float); +__device__ __attribute__((const)) float __ocml_sub_rte_f32(float, float); +__device__ __attribute__((const)) float __ocml_sub_rtn_f32(float, float); +__device__ __attribute__((const)) float __ocml_sub_rtp_f32(float, float); +__device__ __attribute__((const)) float __ocml_sub_rtz_f32(float, float); +__device__ __attribute__((const)) float __ocml_mul_rte_f32(float, float); +__device__ __attribute__((const)) float __ocml_mul_rtn_f32(float, float); +__device__ __attribute__((const)) float __ocml_mul_rtp_f32(float, float); +__device__ __attribute__((const)) float __ocml_mul_rtz_f32(float, float); +__device__ __attribute__((const)) float __ocml_div_rte_f32(float, float); +__device__ __attribute__((const)) float __ocml_div_rtn_f32(float, float); +__device__ __attribute__((const)) float __ocml_div_rtp_f32(float, float); +__device__ __attribute__((const)) float __ocml_div_rtz_f32(float, float); +__device__ __attribute__((const)) float __ocml_sqrt_rte_f32(float); +__device__ __attribute__((const)) float __ocml_sqrt_rtn_f32(float); +__device__ __attribute__((const)) float __ocml_sqrt_rtp_f32(float); +__device__ __attribute__((const)) float __ocml_sqrt_rtz_f32(float); +__device__ __attribute__((const)) float __ocml_fma_rte_f32(float, float, float); +__device__ __attribute__((const)) float __ocml_fma_rtn_f32(float, float, float); +__device__ __attribute__((const)) float __ocml_fma_rtp_f32(float, float, float); +__device__ __attribute__((const)) float __ocml_fma_rtz_f32(float, float, float); +// END INTRINSICS +// END FLOAT + +// BEGIN DOUBLE +__device__ __attribute__((const)) double __ocml_acos_f64(double); +__device__ __attribute__((pure)) double __ocml_acosh_f64(double); +__device__ __attribute__((const)) double __ocml_asin_f64(double); +__device__ __attribute__((pure)) double __ocml_asinh_f64(double); +__device__ __attribute__((const)) double __ocml_atan2_f64(double, double); +__device__ __attribute__((const)) double __ocml_atan_f64(double); +__device__ __attribute__((pure)) double __ocml_atanh_f64(double); +__device__ __attribute__((pure)) double __ocml_cbrt_f64(double); +__device__ __attribute__((const)) double __ocml_ceil_f64(double); +__device__ __attribute__((const)) double __ocml_copysign_f64(double, double); +__device__ double __ocml_cos_f64(double); +__device__ __attribute__((pure)) double __ocml_cosh_f64(double); +__device__ double __ocml_cospi_f64(double); +__device__ double __ocml_i0_f64(double); +__device__ double __ocml_i1_f64(double); +__device__ __attribute__((pure)) double __ocml_erfc_f64(double); +__device__ __attribute__((pure)) double __ocml_erfcinv_f64(double); +__device__ __attribute__((pure)) double __ocml_erfcx_f64(double); +__device__ __attribute__((pure)) double __ocml_erf_f64(double); +__device__ __attribute__((pure)) double __ocml_erfinv_f64(double); +__device__ __attribute__((pure)) double __ocml_exp10_f64(double); +__device__ __attribute__((pure)) double __ocml_exp2_f64(double); +__device__ __attribute__((pure)) double __ocml_exp_f64(double); +__device__ __attribute__((pure)) double __ocml_expm1_f64(double); +__device__ __attribute__((const)) double __ocml_fabs_f64(double); +__device__ __attribute__((const)) double __ocml_fdim_f64(double, double); +__device__ __attribute__((const)) double __ocml_floor_f64(double); +__device__ __attribute__((const)) double __ocml_fma_f64(double, double, double); +__device__ __attribute__((const)) double __ocml_fmax_f64(double, double); +__device__ __attribute__((const)) double __ocml_fmin_f64(double, double); +__device__ __attribute__((const)) double __ocml_fmod_f64(double, double); +__device__ double __ocml_frexp_f64(double, __attribute__((address_space(5))) int*); +__device__ __attribute__((const)) double __ocml_hypot_f64(double, double); +__device__ __attribute__((const)) int __ocml_ilogb_f64(double); +__device__ __attribute__((const)) int __ocml_isfinite_f64(double); +__device__ __attribute__((const)) int __ocml_isinf_f64(double); +__device__ __attribute__((const)) int __ocml_isnan_f64(double); +__device__ double __ocml_j0_f64(double); +__device__ double __ocml_j1_f64(double); +__device__ __attribute__((const)) double __ocml_ldexp_f64(double, int); +__device__ double __ocml_lgamma_f64(double); +__device__ __attribute__((pure)) double __ocml_log10_f64(double); +__device__ __attribute__((pure)) double __ocml_log1p_f64(double); +__device__ __attribute__((pure)) double __ocml_log2_f64(double); +__device__ __attribute__((const)) double __ocml_logb_f64(double); +__device__ __attribute__((pure)) double __ocml_log_f64(double); +__device__ double __ocml_modf_f64(double, __attribute__((address_space(5))) double*); +__device__ __attribute__((const)) double __ocml_nearbyint_f64(double); +__device__ __attribute__((const)) double __ocml_nextafter_f64(double, double); +__device__ __attribute__((const)) double __ocml_len3_f64(double, double, double); +__device__ __attribute__((const)) double __ocml_len4_f64(double, double, double, double); +__device__ __attribute__((pure)) double __ocml_ncdf_f64(double); +__device__ __attribute__((pure)) double __ocml_ncdfinv_f64(double); +__device__ __attribute__((pure)) double __ocml_pow_f64(double, double); +__device__ __attribute__((pure)) double __ocml_pown_f64(double, int); +__device__ __attribute__((pure)) double __ocml_rcbrt_f64(double); +__device__ __attribute__((const)) double __ocml_remainder_f64(double, double); +__device__ double __ocml_remquo_f64(double, double, __attribute__((address_space(5))) int*); +__device__ __attribute__((const)) double __ocml_rhypot_f64(double, double); +__device__ __attribute__((const)) double __ocml_rint_f64(double); +__device__ __attribute__((const)) double __ocml_rlen3_f64(double, double, double); +__device__ __attribute__((const)) double __ocml_rlen4_f64(double, double, double, double); +__device__ __attribute__((const)) double __ocml_round_f64(double); +__device__ __attribute__((pure)) double __ocml_rsqrt_f64(double); +__device__ __attribute__((const)) double __ocml_scalb_f64(double, double); +__device__ __attribute__((const)) double __ocml_scalbn_f64(double, int); +__device__ __attribute__((const)) int __ocml_signbit_f64(double); +__device__ double __ocml_sincos_f64(double, __attribute__((address_space(5))) double*); +__device__ double __ocml_sincospi_f64(double, __attribute__((address_space(5))) double*); +__device__ double __ocml_sin_f64(double); +__device__ __attribute__((pure)) double __ocml_sinh_f64(double); +__device__ double __ocml_sinpi_f64(double); +__device__ __attribute__((const)) double __ocml_sqrt_f64(double); +__device__ double __ocml_tan_f64(double); +__device__ __attribute__((pure)) double __ocml_tanh_f64(double); +__device__ double __ocml_tgamma_f64(double); +__device__ __attribute__((const)) double __ocml_trunc_f64(double); +__device__ double __ocml_y0_f64(double); +__device__ double __ocml_y1_f64(double); + +// BEGIN INTRINSICS +__device__ __attribute__((const)) double __ocml_add_rte_f64(double, double); +__device__ __attribute__((const)) double __ocml_add_rtn_f64(double, double); +__device__ __attribute__((const)) double __ocml_add_rtp_f64(double, double); +__device__ __attribute__((const)) double __ocml_add_rtz_f64(double, double); +__device__ __attribute__((const)) double __ocml_sub_rte_f64(double, double); +__device__ __attribute__((const)) double __ocml_sub_rtn_f64(double, double); +__device__ __attribute__((const)) double __ocml_sub_rtp_f64(double, double); +__device__ __attribute__((const)) double __ocml_sub_rtz_f64(double, double); +__device__ __attribute__((const)) double __ocml_mul_rte_f64(double, double); +__device__ __attribute__((const)) double __ocml_mul_rtn_f64(double, double); +__device__ __attribute__((const)) double __ocml_mul_rtp_f64(double, double); +__device__ __attribute__((const)) double __ocml_mul_rtz_f64(double, double); +__device__ __attribute__((const)) double __ocml_div_rte_f64(double, double); +__device__ __attribute__((const)) double __ocml_div_rtn_f64(double, double); +__device__ __attribute__((const)) double __ocml_div_rtp_f64(double, double); +__device__ __attribute__((const)) double __ocml_div_rtz_f64(double, double); +__device__ __attribute__((const)) double __ocml_sqrt_rte_f64(double); +__device__ __attribute__((const)) double __ocml_sqrt_rtn_f64(double); +__device__ __attribute__((const)) double __ocml_sqrt_rtp_f64(double); +__device__ __attribute__((const)) double __ocml_sqrt_rtz_f64(double); +__device__ __attribute__((const)) double __ocml_fma_rte_f64(double, double, double); +__device__ __attribute__((const)) double __ocml_fma_rtn_f64(double, double, double); +__device__ __attribute__((const)) double __ocml_fma_rtp_f64(double, double, double); +__device__ __attribute__((const)) double __ocml_fma_rtz_f64(double, double, double); +// END INTRINSICS +// END DOUBLE + +#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ + +#if defined(__cplusplus) +} // extern "C" +#endif diff --git a/3rdparty/hip-headers/include/hip/amd_detail/ockl_image.h b/3rdparty/hip-headers/include/hip/amd_detail/ockl_image.h new file mode 100644 index 0000000000..d874bee487 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/ockl_image.h @@ -0,0 +1,257 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if !defined(__HIPCC_RTC__) +#include +#endif + +extern "C" { + +#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(4))) + +__device__ float4::Native_vec_ __ockl_image_load_1D(unsigned int ADDRESS_SPACE_CONSTANT* i, int c); + +__device__ float4::Native_vec_ __ockl_image_load_1Db(unsigned int ADDRESS_SPACE_CONSTANT* i, int c); + +__device__ float4::Native_vec_ __ockl_image_load_1Da(unsigned int ADDRESS_SPACE_CONSTANT* i, + int2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_2D(unsigned int ADDRESS_SPACE_CONSTANT* i, + int2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_2Da(unsigned int ADDRESS_SPACE_CONSTANT* i, + int4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_3D(unsigned int ADDRESS_SPACE_CONSTANT* i, + int4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_CM(unsigned int ADDRESS_SPACE_CONSTANT* i, + int2::Native_vec_ c, int f); + +__device__ float4::Native_vec_ __ockl_image_load_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i, + int4::Native_vec_ c, int f); + +__device__ float4::Native_vec_ __ockl_image_load_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT* i, + int c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT* i, + int2::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT* i, + int2::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT* i, + int4::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT* i, + int4::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT* i, + int2::Native_vec_ c, int f, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i, + int4::Native_vec_ c, int f, int l); + +__device__ void __ockl_image_store_1D(unsigned int ADDRESS_SPACE_CONSTANT* i, int c, + float4::Native_vec_ p); + +__device__ void __ockl_image_store_1Da(unsigned int ADDRESS_SPACE_CONSTANT* i, int2::Native_vec_ c, + float4::Native_vec_ p); + +__device__ void __ockl_image_store_2D(unsigned int ADDRESS_SPACE_CONSTANT* i, int2::Native_vec_ c, + float4::Native_vec_ p); + +__device__ void __ockl_image_store_2Da(unsigned int ADDRESS_SPACE_CONSTANT* i, int4::Native_vec_ c, + float4::Native_vec_ p); + +__device__ void __ockl_image_store_3D(unsigned int ADDRESS_SPACE_CONSTANT* i, int4::Native_vec_ c, + float4::Native_vec_ p); + +__device__ void __ockl_image_store_CM(unsigned int ADDRESS_SPACE_CONSTANT* i, int2::Native_vec_ c, + int f, float4::Native_vec_ p); + +__device__ void __ockl_image_store_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i, int4::Native_vec_ c, + int f, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT* i, int c, int l, + float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT* i, + int2::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT* i, + int2::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT* i, + int4::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT* i, + int4::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT* i, + int2::Native_vec_ c, int f, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i, + int4::Native_vec_ c, int f, int l, + float4::Native_vec_ p); + +__device__ float4::Native_vec_ __ockl_image_sample_1D(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float c); + +__device__ float4::Native_vec_ __ockl_image_sample_1Da(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_2D(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_2Da(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_3D(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_CM(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_1D(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float c, float dx, float dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_1Da(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float2::Native_vec_ c, float dx, + float dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_2D(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float2::Native_vec_ c, + float2::Native_vec_ dx, + float2::Native_vec_ dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_2Da(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float4::Native_vec_ c, + float2::Native_vec_ dx, + float2::Native_vec_ dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_3D(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float4::Native_vec_ c, + float4::Native_vec_ dx, + float4::Native_vec_ dy); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float2::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float2::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_gather4r_2D(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_gather4g_2D(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_gather4b_2D(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_gather4a_2D(unsigned int ADDRESS_SPACE_CONSTANT* i, + unsigned int ADDRESS_SPACE_CONSTANT* s, + float2::Native_vec_ c); + +__device__ int __ockl_image_channel_data_type_1D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_1Da(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_1Db(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_2D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_2Da(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_2Dad(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_2Dd(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_3D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_CM(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_1D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_1Da(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_1Db(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_2D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_2Da(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_2Dad(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_2Dd(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_3D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_CM(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i); +} diff --git a/3rdparty/hip-headers/include/hip/amd_detail/texture_fetch_functions.h b/3rdparty/hip-headers/include/hip/amd_detail/texture_fetch_functions.h new file mode 100644 index 0000000000..dd1580c1ed --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/texture_fetch_functions.h @@ -0,0 +1,466 @@ +/* +Copyright (c) 2015 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if defined(__cplusplus) + +#if !defined(__HIPCC_RTC__) +#include +#include +#include +#include +#endif // !defined(__HIPCC_RTC__) + +#define TEXTURE_PARAMETERS_INIT \ + unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)t.textureObject; \ + unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; \ + (void)s; + +template struct __hip_is_tex_surf_scalar_channel_type { + static constexpr bool value = + __hip_internal::is_same::value || __hip_internal::is_same::value || + __hip_internal::is_same::value || + __hip_internal::is_same::value || __hip_internal::is_same::value || + __hip_internal::is_same::value || __hip_internal::is_same::value; +}; + +template struct __hip_is_tex_surf_channel_type { + static constexpr bool value = __hip_is_tex_surf_scalar_channel_type::value; +}; + +template +struct __hip_is_tex_surf_channel_type> { + static constexpr bool value = __hip_is_tex_surf_scalar_channel_type::value && + ((rank == 1) || (rank == 2) || (rank == 4)); +}; + +template struct __hip_is_tex_normalized_channel_type { + static constexpr bool value = + __hip_internal::is_same::value || __hip_internal::is_same::value || + __hip_internal::is_same::value || __hip_internal::is_same::value; +}; + +template +struct __hip_is_tex_normalized_channel_type> { + static constexpr bool value = + __hip_is_tex_normalized_channel_type::value && ((rank == 1) || (rank == 2) || (rank == 4)); +}; + +template struct __hip_tex_ret { + static_assert(__hip_internal::is_same::value, "Invalid channel type!"); +}; + +/* + * Map from device function return U to scalar texture type T + */ +template __forceinline__ __device__ + typename __hip_internal::enable_if<__hip_is_tex_surf_scalar_channel_type::value, + const T>::type + __hipMapFrom(const U& u) { + if constexpr (sizeof(T) < sizeof(float)) { + union { + U u; + int i; + } d = {u}; + return static_cast(d.i); + } else { // sizeof(T) == sizeof(float) + union { + U u; + T t; + } d = {u}; + return d.t; + } +} + +/* + * Map from device function return U to vector texture type T + */ +template __forceinline__ __device__ typename __hip_internal::enable_if< + __hip_is_tex_surf_scalar_channel_type::value, const T>::type +__hipMapFrom(const U& u) { + if constexpr (sizeof(typename T::value_type) < sizeof(float)) { + union { + U u; + int4 i4; + } d = {u}; + return __hipMapVector(d.i4); + } else { // sizeof(typename T::value_type) == sizeof(float) + union { + U u; + T t; + } d = {u}; + return d.t; + } +} + +/* + * Map from scalar texture type T to device function input U + */ +template __forceinline__ __device__ + typename __hip_internal::enable_if<__hip_is_tex_surf_scalar_channel_type::value, + const U>::type + __hipMapTo(const T& t) { + if constexpr (sizeof(T) < sizeof(float)) { + union { + U u; + int i; + } d = {0}; + d.i = static_cast(t); + return d.u; + } else { // sizeof(T) == sizeof(float) + union { + U u; + T t; + } d = {0}; + d.t = t; + return d.u; + } +} + +/* + * Map from vector texture type T to device function input U + */ +template __forceinline__ __device__ typename __hip_internal::enable_if< + __hip_is_tex_surf_scalar_channel_type::value, const U>::type +__hipMapTo(const T& t) { + if constexpr (sizeof(typename T::value_type) < sizeof(float)) { + union { + U u; + int4 i4; + } d = {0}; + d.i4 = __hipMapVector(t); + return d.u; + } else { // sizeof(typename T::value_type) == sizeof(float) + union { + U u; + T t; + } d = {0}; + d.t = t; + return d.u; + } +} + +template using __hip_tex_ret_t = + typename __hip_tex_ret::type; + +template struct __hip_tex_ret< + T, hipReadModeElementType, + typename __hip_internal::enable_if<__hip_is_tex_surf_channel_type::value, bool>::type> { + using type = T; +}; + +template struct __hip_tex_ret< + HIP_vector_type, hipReadModeElementType, + typename __hip_internal::enable_if< + __hip_is_tex_surf_channel_type>::value, bool>::type> { + using type = HIP_vector_type<__hip_tex_ret_t, rank>; +}; + +template +struct __hip_tex_ret::value, bool>::type> { + using type = float; +}; + +template struct __hip_tex_ret< + HIP_vector_type, hipReadModeNormalizedFloat, + typename __hip_internal::enable_if< + __hip_is_tex_normalized_channel_type>::value, bool>::type> { + using type = HIP_vector_type<__hip_tex_ret_t, rank>; +}; + + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1Dfetch( + texture t, int x) { + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_load_1Db(i, x); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1D( + texture t, float x) { + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_1D(i, s, x); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2D( + texture t, float x, float y) { + TEXTURE_PARAMETERS_INIT; + float2 coords{x, y}; + auto tmp = __ockl_image_sample_2D(i, s, get_native_vector(coords)); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1DLayered( + texture t, float x, int layer) { + TEXTURE_PARAMETERS_INIT; + float2 coords{x, layer}; + auto tmp = __ockl_image_sample_1Da(i, s, get_native_vector(coords)); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2DLayered( + texture t, float x, float y, int layer) { + TEXTURE_PARAMETERS_INIT; + float4 coords{x, y, layer, 0.0f}; + auto tmp = __ockl_image_sample_2Da(i, s, get_native_vector(coords)); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex3D( + texture t, float x, float y, float z) { + TEXTURE_PARAMETERS_INIT; + float4 coords{x, y, z, 0.0f}; + auto tmp = __ockl_image_sample_3D(i, s, get_native_vector(coords)); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t texCubemap( + texture t, float x, float y, float z) { + TEXTURE_PARAMETERS_INIT; + float4 coords{x, y, z, 0.0f}; + auto tmp = __ockl_image_sample_CM(i, s, get_native_vector(coords)); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1DLod( + texture t, float x, float level) { + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_1D(i, s, x, level); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2DLod( + texture t, float x, float y, float level) { + TEXTURE_PARAMETERS_INIT; + float2 coords{x, y}; + auto tmp = __ockl_image_sample_lod_2D(i, s, get_native_vector(coords), level); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1DLayeredLod( + texture t, float x, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + float2 coords{x, layer}; + auto tmp = __ockl_image_sample_lod_1Da(i, s, get_native_vector(coords), level); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2DLayeredLod( + texture t, float x, float y, int layer, float level) { + TEXTURE_PARAMETERS_INIT; + float4 coords{x, y, layer, 0.0f}; + auto tmp = __ockl_image_sample_lod_2Da(i, s, get_native_vector(coords), level); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex3DLod( + texture t, float x, float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + float4 coords{x, y, z, 0.0f}; + auto tmp = __ockl_image_sample_lod_3D(i, s, get_native_vector(coords), level); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t texCubemapLod( + texture t, float x, float y, float z, float level) { + TEXTURE_PARAMETERS_INIT; + float4 coords{x, y, z, 0.0f}; + auto tmp = __ockl_image_sample_lod_CM(i, s, get_native_vector(coords), level); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t texCubemapLayered( + texture t, float x, float y, float z, int layer) { + TEXTURE_PARAMETERS_INIT; + float4 coords{x, y, z, layer}; + auto tmp = __ockl_image_sample_CMa(i, s, get_native_vector(coords)); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t texCubemapLayeredLod( + texture t, float x, float y, float z, int layer, + float level) { + TEXTURE_PARAMETERS_INIT; + float4 coords{x, y, z, layer}; + auto tmp = __ockl_image_sample_lod_CMa(i, s, get_native_vector(coords), level); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t texCubemapGrad( + texture t, float x, float y, float z, float4 dPdx, + float4 dPdy) { + TEXTURE_PARAMETERS_INIT; + (void)x; + (void)y; + (void)z; + (void)dPdx; + (void)dPdy; + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CM(i, s, get_native_vector(float4(x, y, z, 0.0f)), + // get_native_vector(float4(dPdx.x, dPdx.y, dPdx.z, 0.0f)), get_native_vector(float4(dPdy.x, + // dPdy.y, dPdy.z, 0.0f))); return __hipMapFrom<__hip_tex_ret_t>(tmp); + return {}; +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t +texCubemapLayeredGrad(texture t, float x, float y, + float z, int layer, float4 dPdx, float4 dPdy) { + TEXTURE_PARAMETERS_INIT; + (void)x; + (void)y; + (void)z; + (void)layer; + (void)dPdx; + (void)dPdy; + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CMa(i, s, get_native_vector(float4(x, y, z, layer)), + // get_native_vector(float4(dPdx.x, dPdx.y, dPdx.z, 0.0f)), get_native_vector(float4(dPdy.x, + // dPdy.y, dPdy.z, 0.0f))); return __hipMapFrom<__hip_tex_ret_t>(tmp); + return {}; +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1DGrad( + texture t, float x, float dPdx, float dPdy) { + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2DGrad( + texture t, float x, float y, float2 dPdx, float2 dPdy) { + TEXTURE_PARAMETERS_INIT; + float2 coords{x, y}; + auto tmp = __ockl_image_sample_grad_2D(i, s, get_native_vector(coords), get_native_vector(dPdx), + get_native_vector(dPdy)); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1DLayeredGrad( + texture t, float x, int layer, float dPdx, float dPdy) { + TEXTURE_PARAMETERS_INIT; + float2 coords{x, layer}; + auto tmp = __ockl_image_sample_grad_1Da(i, s, get_native_vector(coords), dPdx, dPdy); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2DLayeredGrad( + texture t, float x, float y, int layer, float2 dPdx, + float2 dPdy) { + TEXTURE_PARAMETERS_INIT; + float4 coords{x, y, layer, 0.0f}; + auto tmp = __ockl_image_sample_grad_2Da(i, s, get_native_vector(coords), get_native_vector(dPdx), + get_native_vector(dPdy)); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex3DGrad( + texture t, float x, float y, float z, float4 dPdx, float4 dPdy) { + TEXTURE_PARAMETERS_INIT; + float4 coords{x, y, z, 0.0f}; + float4 gradx{dPdx.x, dPdx.y, dPdx.z, 0.0f}; + float4 grady{dPdy.x, dPdy.y, dPdy.z, 0.0f}; + auto tmp = __ockl_image_sample_grad_3D(i, s, get_native_vector(coords), get_native_vector(gradx), + get_native_vector(grady)); + return __hipMapFrom<__hip_tex_ret_t>(tmp); +} + +template +struct __hip_tex2dgather_ret { + static_assert(__hip_internal::is_same::value, "Invalid channel type!"); +}; + +template using __hip_tex2dgather_ret_t = + typename __hip_tex2dgather_ret::type; + +template struct __hip_tex2dgather_ret< + T, hipReadModeElementType, + typename __hip_internal::enable_if<__hip_is_tex_surf_channel_type::value, bool>::type> { + using type = HIP_vector_type; +}; + +template struct __hip_tex2dgather_ret< + HIP_vector_type, hipReadModeElementType, + typename __hip_internal::enable_if< + __hip_is_tex_surf_channel_type>::value, bool>::type> { + using type = HIP_vector_type; +}; + +template +struct __hip_tex2dgather_ret::value, bool>::type> { + using type = float4; +}; + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex2dgather_ret_t tex2Dgather( + texture t, float x, float y, int comp = 0) { + TEXTURE_PARAMETERS_INIT; + float2 coords{x, y}; + switch (comp) { + case 1: { + auto tmp = __ockl_image_gather4g_2D(i, s, get_native_vector(coords)); + return __hipMapFrom<__hip_tex2dgather_ret_t>(tmp); + } + case 2: { + auto tmp = __ockl_image_gather4b_2D(i, s, get_native_vector(coords)); + return __hipMapFrom<__hip_tex2dgather_ret_t>(tmp); + } + case 3: { + auto tmp = __ockl_image_gather4a_2D(i, s, get_native_vector(coords)); + return __hipMapFrom<__hip_tex2dgather_ret_t>(tmp); + } + default: { + auto tmp = __ockl_image_gather4r_2D(i, s, get_native_vector(coords)); + return __hipMapFrom<__hip_tex2dgather_ret_t>(tmp); + } + } + return {}; +} + +#endif diff --git a/3rdparty/hip-headers/include/hip/amd_detail/texture_indirect_functions.h b/3rdparty/hip-headers/include/hip/amd_detail/texture_indirect_functions.h new file mode 100644 index 0000000000..f48b3bcf8c --- /dev/null +++ b/3rdparty/hip-headers/include/hip/amd_detail/texture_indirect_functions.h @@ -0,0 +1,474 @@ +/* +Copyright (c) 2015 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if defined(__cplusplus) + +#if !defined(__HIPCC_RTC__) +#include +#include +#include +#include +#include +#endif // !defined(__HIPCC_RTC__) + +#define TEXTURE_OBJECT_PARAMETERS_INIT \ + unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)textureObject; \ + unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; \ + (void)s; + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex1Dfetch(hipTextureObject_t textureObject, int x) { + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_load_1Db(i, x); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex1Dfetch(T* ptr, hipTextureObject_t textureObject, int x) { + *ptr = tex1Dfetch(textureObject, x); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex1D(hipTextureObject_t textureObject, float x) { + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_1D(i, s, x); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex1D(T* ptr, hipTextureObject_t textureObject, float x) { + *ptr = tex1D(textureObject, x); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex2D(hipTextureObject_t textureObject, float x, float y) { + TEXTURE_OBJECT_PARAMETERS_INIT + float2 coords{x, y}; + auto tmp = __ockl_image_sample_2D(i, s, get_native_vector(coords)); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex2D(T* ptr, hipTextureObject_t textureObject, float x, + float y) { + *ptr = tex2D(textureObject, x, y); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex3D(hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_OBJECT_PARAMETERS_INIT + float4 coords{x, y, z, 0.0f}; + auto tmp = __ockl_image_sample_3D(i, s, get_native_vector(coords)); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex3D(T* ptr, hipTextureObject_t textureObject, float x, + float y, float z) { + *ptr = tex3D(textureObject, x, y, z); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex1DLayered(hipTextureObject_t textureObject, float x, + int layer) { + TEXTURE_OBJECT_PARAMETERS_INIT + float2 coords{x, layer}; + auto tmp = __ockl_image_sample_1Da(i, s, get_native_vector(coords)); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex1DLayered(T* ptr, hipTextureObject_t textureObject, + float x, int layer) { + *ptr = tex1DLayered(textureObject, x, layer); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y, + int layer) { + TEXTURE_OBJECT_PARAMETERS_INIT + float4 coords{x, y, layer, 0.0f}; + auto tmp = __ockl_image_sample_2Da(i, s, get_native_vector(coords)); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex2DLayered(T* ptr, hipTextureObject_t textureObject, + float x, float y, int layer) { + *ptr = tex1DLayered(textureObject, x, y, layer); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T texCubemap(hipTextureObject_t textureObject, float x, float y, + float z) { + TEXTURE_OBJECT_PARAMETERS_INIT + float4 coords{x, y, z, 0.0f}; + auto tmp = __ockl_image_sample_CM(i, s, get_native_vector(coords)); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void texCubemap(T* ptr, hipTextureObject_t textureObject, float x, + float y, float z) { + *ptr = texCubemap(textureObject, x, y, z); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T texCubemapLayered(hipTextureObject_t textureObject, float x, + float y, float z, int layer) { + TEXTURE_OBJECT_PARAMETERS_INIT + float4 coords{x, y, z, layer}; + auto tmp = __ockl_image_sample_CMa(i, s, get_native_vector(coords)); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void texCubemapLayered(T* ptr, hipTextureObject_t textureObject, + float x, float y, float z, int layer) { + *ptr = texCubemapLayered(textureObject, x, y, z, layer); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex2Dgather(hipTextureObject_t textureObject, float x, float y, + int comp = 0) { + TEXTURE_OBJECT_PARAMETERS_INIT + float2 coords{x, y}; + switch (comp) { + case 1: { + auto tmp = __ockl_image_gather4r_2D(i, s, get_native_vector(coords)); + return __hipMapFrom(tmp); + break; + } + case 2: { + auto tmp = __ockl_image_gather4g_2D(i, s, get_native_vector(coords)); + return __hipMapFrom(tmp); + break; + } + case 3: { + auto tmp = __ockl_image_gather4b_2D(i, s, get_native_vector(coords)); + return __hipMapFrom(tmp); + break; + } + default: { + auto tmp = __ockl_image_gather4a_2D(i, s, get_native_vector(coords)); + return __hipMapFrom(tmp); + break; + } + } + return {}; +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex2Dgather(T* ptr, hipTextureObject_t textureObject, + float x, float y, int comp = 0) { + *ptr = texCubemapLayered(textureObject, x, y, comp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex1DLod(hipTextureObject_t textureObject, float x, + float level) { + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_1D(i, s, x, level); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex1DLod(T* ptr, hipTextureObject_t textureObject, float x, + float level) { + *ptr = tex1DLod(textureObject, x, level); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex2DLod(hipTextureObject_t textureObject, float x, float y, + float level) { + TEXTURE_OBJECT_PARAMETERS_INIT + float2 coords{x, y}; + auto tmp = __ockl_image_sample_lod_2D(i, s, get_native_vector(coords), level); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex2DLod(T* ptr, hipTextureObject_t textureObject, float x, + float y, float level) { + *ptr = tex2DLod(textureObject, x, y, level); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, + float z, float level) { + TEXTURE_OBJECT_PARAMETERS_INIT + float4 coords{x, y, z, 0.0f}; + auto tmp = __ockl_image_sample_lod_3D(i, s, get_native_vector(coords), level); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex3DLod(T* ptr, hipTextureObject_t textureObject, float x, + float y, float z, float level) { + *ptr = tex3DLod(textureObject, x, y, z, level); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, + int layer, float level) { + TEXTURE_OBJECT_PARAMETERS_INIT; + (void)level; + float2 coords{x, layer}; + auto tmp = __ockl_image_sample_1Da(i, s, get_native_vector(coords)); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex1DLayeredLod(T* ptr, hipTextureObject_t textureObject, + float x, int layer, float level) { + *ptr = tex1DLayeredLod(textureObject, x, layer, level); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex2DLayeredLod(hipTextureObject_t textureObject, float x, + float y, int layer, float level) { + TEXTURE_OBJECT_PARAMETERS_INIT; + (void)level; + float4 coords{x, y, layer, 0.0f}; + auto tmp = __ockl_image_sample_2Da(i, s, get_native_vector(coords)); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex2DLayeredLod(T* ptr, hipTextureObject_t textureObject, + float x, float y, int layer, float level) { + *ptr = tex2DLayeredLod(textureObject, x, y, layer, level); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T texCubemapLod(hipTextureObject_t textureObject, float x, + float y, float z, float level) { + TEXTURE_OBJECT_PARAMETERS_INIT + float4 coords{x, y, z, 0.0f}; + auto tmp = __ockl_image_sample_lod_CM(i, s, get_native_vector(coords), level); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void texCubemapLod(T* ptr, hipTextureObject_t textureObject, + float x, float y, float z, float level) { + *ptr = texCubemapLod(textureObject, x, y, z, level); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T texCubemapGrad(hipTextureObject_t textureObject, float x, + float y, float z, float4 dPdx, float4 dPdy) { + TEXTURE_OBJECT_PARAMETERS_INIT; + (void)x; + (void)y; + (void)z; + (void)dPdx; + (void)dPdy; + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CM(i, s, get_native_vector(float4(x, y, z, 0.0f)), + // get_native_vector(float4(dPdx.x, dPdx.y, dPdx.z, 0.0f)), get_native_vector(float4(dPdy.x, + // dPdy.y, dPdy.z, 0.0f))); return __hipMapFrom(tmp); + return {}; +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void texCubemapGrad(T* ptr, hipTextureObject_t textureObject, + float x, float y, float z, float4 dPdx, + float4 dPdy) { + *ptr = texCubemapGrad(textureObject, x, y, z, dPdx, dPdy); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T texCubemapLayeredLod(hipTextureObject_t textureObject, float x, + float y, float z, int layer, float level) { + TEXTURE_OBJECT_PARAMETERS_INIT + float4 coords{x, y, z, layer}; + auto tmp = __ockl_image_sample_lod_CMa(i, s, get_native_vector(coords), level); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void texCubemapLayeredLod(T* ptr, + hipTextureObject_t textureObject, + float x, float y, float z, int layer, + float level) { + *ptr = texCubemapLayeredLod(textureObject, x, y, z, layer, level); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex1DGrad(hipTextureObject_t textureObject, float x, float dPdx, + float dPdy) { + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex1DGrad(T* ptr, hipTextureObject_t textureObject, float x, + float dPdx, float dPdy) { + *ptr = tex1DGrad(textureObject, x, dPdx, dPdy); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex2DGrad(hipTextureObject_t textureObject, float x, float y, + float2 dPdx, float2 dPdy) { + TEXTURE_OBJECT_PARAMETERS_INIT + float2 coords{x, y}; + auto tmp = __ockl_image_sample_grad_2D(i, s, get_native_vector(coords), get_native_vector(dPdx), + get_native_vector(dPdy)); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex2DGrad(T* ptr, hipTextureObject_t textureObject, float x, + float y, float2 dPdx, float2 dPdy) { + *ptr = tex2DGrad(textureObject, x, y, dPdx, dPdy); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex3DGrad(hipTextureObject_t textureObject, float x, float y, + float z, float4 dPdx, float4 dPdy) { + TEXTURE_OBJECT_PARAMETERS_INIT; + (void)dPdx; + float4 coords{x, y, z, 0.0f}; + float4 gradx{dPdy.x, dPdy.y, dPdy.z, 0.0f}; + float4 grady{dPdy.x, dPdy.y, dPdy.z, 0.0f}; + auto tmp = __ockl_image_sample_grad_3D(i, s, get_native_vector(coords), get_native_vector(gradx), + get_native_vector(grady)); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex3DGrad(T* ptr, hipTextureObject_t textureObject, float x, + float y, float z, float4 dPdx, float4 dPdy) { + *ptr = tex3DGrad(textureObject, x, y, z, dPdx, dPdy); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, + int layer, float dPdx, float dPdy) { + TEXTURE_OBJECT_PARAMETERS_INIT + float2 coords{x, layer}; + auto tmp = __ockl_image_sample_grad_1Da(i, s, get_native_vector(coords), dPdx, dPdy); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex1DLayeredGrad(T* ptr, hipTextureObject_t textureObject, + float x, int layer, float dPdx, + float dPdy) { + *ptr = tex1DLayeredGrad(textureObject, x, layer, dPdx, dPdy); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex2DLayeredGrad(hipTextureObject_t textureObject, float x, + float y, int layer, float2 dPdx, float2 dPdy) { + TEXTURE_OBJECT_PARAMETERS_INIT + float4 coords{x, y, layer, 0.0f}; + auto tmp = __ockl_image_sample_grad_2Da(i, s, get_native_vector(coords), get_native_vector(dPdx), + get_native_vector(dPdy)); + return __hipMapFrom(tmp); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex2DLayeredGrad(T* ptr, hipTextureObject_t textureObject, + float x, float y, int layer, float2 dPdx, + float2 dPdy) { + *ptr = tex2DLayeredGrad(textureObject, x, y, layer, dPdx, dPdy); +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ T texCubemapLayeredGrad(hipTextureObject_t textureObject, float x, + float y, float z, int layer, float4 dPdx, + float4 dPdy) { + TEXTURE_OBJECT_PARAMETERS_INIT; + (void)x; + (void)y; + (void)z; + (void)layer; + (void)dPdx; + (void)dPdy; + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CMa(i, s, get_native_vector(float4(x, y, z, layer)), + // get_native_vector(float4(dPdx.x, dPdx.y, dPdx.z, 0.0f)), get_native_vector(float4(dPdy.x, + // dPdy.y, dPdy.z, 0.0f))); return __hipMapFrom(tmp); + return {}; +} + +template ::value>::type* = nullptr> +static __device__ __hip_img_chk__ void texCubemapLayeredGrad(T* ptr, + hipTextureObject_t textureObject, + float x, float y, float z, int layer, + float4 dPdx, float4 dPdy) { + *ptr = texCubemapLayeredGrad(textureObject, x, y, z, layer, dPdx, dPdy); +} + +#endif diff --git a/3rdparty/hip-headers/include/hip/channel_descriptor.h b/3rdparty/hip-headers/include/hip/channel_descriptor.h new file mode 100644 index 0000000000..21d5f2052e --- /dev/null +++ b/3rdparty/hip-headers/include/hip/channel_descriptor.h @@ -0,0 +1,39 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H +#define HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H + +// Some standard header files, these are included by hc.hpp and so want to make them avail on both +// paths to provide a consistent include env and avoid "missing symbol" errors that only appears +// on NVCC path: + + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/3rdparty/hip-headers/include/hip/driver_types.h b/3rdparty/hip-headers/include/hip/driver_types.h new file mode 100644 index 0000000000..1b64d165a4 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/driver_types.h @@ -0,0 +1,681 @@ +/* +Copyright (c) 2015 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_DRIVER_TYPES_H +#define HIP_INCLUDE_HIP_DRIVER_TYPES_H + +#if !defined(__HIPCC_RTC__) +#include +#if __cplusplus +#include +#else +#include // size_t +#endif +#endif + +#if !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include "driver_types.h" +#elif defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + +/** + * @defgroup DriverTypes Driver Types + * @{ + * This section describes the driver data types. + * + */ + +typedef void* hipDeviceptr_t; +/** + * HIP channel format kinds + */ +typedef enum hipChannelFormatKind { + hipChannelFormatKindSigned = 0, ///< Signed channel format + hipChannelFormatKindUnsigned = 1, ///< Unsigned channel format + hipChannelFormatKindFloat = 2, ///< Float channel format + hipChannelFormatKindNone = 3 ///< No channel format +} hipChannelFormatKind; +/** + * HIP channel format descriptor + */ +typedef struct hipChannelFormatDesc { + int x; + int y; + int z; + int w; + enum hipChannelFormatKind f; ///< Channel format kind +} hipChannelFormatDesc; +/** @brief The hipTexRefSetArray function flags parameter override format value*/ +#define HIP_TRSA_OVERRIDE_FORMAT 0x01 +/** @brief The hipTexRefSetFlags function flags parameter read as integer value*/ +#define HIP_TRSF_READ_AS_INTEGER 0x01 +/** @brief The hipTexRefSetFlags function flags parameter normalized coordinate value*/ +#define HIP_TRSF_NORMALIZED_COORDINATES 0x02 +/** @brief The hipTexRefSetFlags function flags parameter srgb value*/ +#define HIP_TRSF_SRGB 0x10 + +typedef struct hipArray* hipArray_t; +typedef const struct hipArray* hipArray_const_t; +/** + * HIP array format + */ +typedef enum hipArray_Format { + HIP_AD_FORMAT_UNSIGNED_INT8 = 0x01, ///< Unsigned 8-bit array format + HIP_AD_FORMAT_UNSIGNED_INT16 = 0x02, ///< Unsigned 16-bit array format + HIP_AD_FORMAT_UNSIGNED_INT32 = 0x03, ///< Unsigned 32-bit array format + HIP_AD_FORMAT_SIGNED_INT8 = 0x08, ///< Signed 8-bit array format + HIP_AD_FORMAT_SIGNED_INT16 = 0x09, ///< Signed 16-bit array format + HIP_AD_FORMAT_SIGNED_INT32 = 0x0a, ///< Signed 32-bit array format + HIP_AD_FORMAT_HALF = 0x10, ///< Half array format + HIP_AD_FORMAT_FLOAT = 0x20 ///< Float array format +} hipArray_Format; +/** + * HIP array descriptor + */ +typedef struct HIP_ARRAY_DESCRIPTOR { + size_t Width; ///< Width of the array + size_t Height; ///< Height of the array + enum hipArray_Format Format; ///< Format of the array + unsigned int NumChannels; ///< Number of channels of the array +} HIP_ARRAY_DESCRIPTOR; + +/** + * HIP 3D array descriptor + */ +typedef struct HIP_ARRAY3D_DESCRIPTOR { + size_t Width; ///< Width of the array + size_t Height; ///< Height of the array + size_t Depth; ///< Depth of the array + enum hipArray_Format Format; ///< Format of the array + unsigned int NumChannels; ///< Number of channels of the array + unsigned int Flags; ///< Flags of the array +} HIP_ARRAY3D_DESCRIPTOR; +#if !defined(__HIPCC_RTC__) +/** + * HIP 2D memory copy parameters + */ +typedef struct hip_Memcpy2D { + size_t srcXInBytes; ///< Source width in bytes + size_t srcY; ///< Source height + hipMemoryType srcMemoryType; ///< Source memory type + const void* srcHost; ///< Source pointer + hipDeviceptr_t srcDevice; ///< Source device + hipArray_t srcArray; ///< Source array + size_t srcPitch; ///< Source pitch + size_t dstXInBytes; ///< Destination width in bytes + size_t dstY; ///< Destination height + hipMemoryType dstMemoryType; ///< Destination memory type + void* dstHost; ///< Destination pointer + hipDeviceptr_t dstDevice; ///< Destination device + hipArray_t dstArray; ///< Destination array + size_t dstPitch; ///< Destination pitch + size_t WidthInBytes; ///< Width in bytes of the 2D memory copy + size_t Height; ///< Height of the 2D memory copy +} hip_Memcpy2D; +#endif // !defined(__HIPCC_RTC__) +/** + * HIP mipmapped array + */ +typedef struct hipMipmappedArray { + void* data; ///< Data pointer of the mipmapped array + struct hipChannelFormatDesc desc; ///< Description of the mipmapped array + unsigned int type; ///< Type of the mipmapped array + unsigned int width; ///< Width of the mipmapped array + unsigned int height; ///< Height of the mipmapped array + unsigned int depth; ///< Depth of the mipmapped array + unsigned int min_mipmap_level; ///< Minimum level of the mipmapped array + unsigned int max_mipmap_level; ///< Maximum level of the mipmapped array + unsigned int flags; ///< Flags of the mipmapped array + enum hipArray_Format format; ///< Format of the mipmapped array + unsigned int num_channels; ///< Number of channels of the mipmapped array +} hipMipmappedArray; +/** + * HIP mipmapped array pointer + */ +typedef struct hipMipmappedArray* hipMipmappedArray_t; +typedef hipMipmappedArray_t hipmipmappedArray; +typedef const struct hipMipmappedArray* hipMipmappedArray_const_t; +/** + * HIP resource types + */ +typedef enum hipResourceType { + hipResourceTypeArray = 0x00, ///< Array resource + hipResourceTypeMipmappedArray = 0x01, ///< Mipmapped array resource + hipResourceTypeLinear = 0x02, ///< Linear resource + hipResourceTypePitch2D = 0x03 ///< Pitch 2D resource +} hipResourceType; +typedef enum HIPresourcetype_enum { + HIP_RESOURCE_TYPE_ARRAY = 0x00, ///< Array resource + HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01, ///< Mipmapped array resource + HIP_RESOURCE_TYPE_LINEAR = 0x02, ///< Linear resource + HIP_RESOURCE_TYPE_PITCH2D = 0x03 ///< Pitch 2D resource +} HIPresourcetype, + hipResourcetype; +/** + * HIP texture address modes + */ +typedef enum HIPaddress_mode_enum { + HIP_TR_ADDRESS_MODE_WRAP = 0, ///< Wrap address mode + HIP_TR_ADDRESS_MODE_CLAMP = 1, ///< Clamp address mode + HIP_TR_ADDRESS_MODE_MIRROR = 2, ///< Mirror address mode + HIP_TR_ADDRESS_MODE_BORDER = 3 ///< Border address mode +} HIPaddress_mode; +/** + * HIP filter modes + */ +typedef enum HIPfilter_mode_enum { + HIP_TR_FILTER_MODE_POINT = 0, ///< Filter mode point + HIP_TR_FILTER_MODE_LINEAR = 1 ///< Filter mode linear +} HIPfilter_mode; +/** + * HIP texture descriptor + */ +typedef struct HIP_TEXTURE_DESC_st { + HIPaddress_mode addressMode[3]; ///< Address modes + HIPfilter_mode filterMode; ///< Filter mode + unsigned int flags; ///< Flags + unsigned int maxAnisotropy; ///< Maximum anisotropy ratio + HIPfilter_mode mipmapFilterMode; ///< Mipmap filter mode + float mipmapLevelBias; ///< Mipmap level bias + float minMipmapLevelClamp; ///< Mipmap minimum level clamp + float maxMipmapLevelClamp; ///< Mipmap maximum level clamp + float borderColor[4]; ///< Border Color + int reserved[12]; +} HIP_TEXTURE_DESC; +/** + * HIP texture resource view formats + */ +typedef enum hipResourceViewFormat { + hipResViewFormatNone = 0x00, ///< No resource view format (use underlying resource format) + hipResViewFormatUnsignedChar1 = 0x01, ///< 1 channel, unsigned 8-bit integers + hipResViewFormatUnsignedChar2 = 0x02, ///< 2 channels, unsigned 8-bit integers + hipResViewFormatUnsignedChar4 = 0x03, ///< 4 channels, unsigned 8-bit integers + hipResViewFormatSignedChar1 = 0x04, ///< 1 channel, signed 8-bit integers + hipResViewFormatSignedChar2 = 0x05, ///< 2 channels, signed 8-bit integers + hipResViewFormatSignedChar4 = 0x06, ///< 4 channels, signed 8-bit integers + hipResViewFormatUnsignedShort1 = 0x07, ///< 1 channel, unsigned 16-bit integers + hipResViewFormatUnsignedShort2 = 0x08, ///< 2 channels, unsigned 16-bit integers + hipResViewFormatUnsignedShort4 = 0x09, ///< 4 channels, unsigned 16-bit integers + hipResViewFormatSignedShort1 = 0x0a, ///< 1 channel, signed 16-bit integers + hipResViewFormatSignedShort2 = 0x0b, ///< 2 channels, signed 16-bit integers + hipResViewFormatSignedShort4 = 0x0c, ///< 4 channels, signed 16-bit integers + hipResViewFormatUnsignedInt1 = 0x0d, ///< 1 channel, unsigned 32-bit integers + hipResViewFormatUnsignedInt2 = 0x0e, ///< 2 channels, unsigned 32-bit integers + hipResViewFormatUnsignedInt4 = 0x0f, ///< 4 channels, unsigned 32-bit integers + hipResViewFormatSignedInt1 = 0x10, ///< 1 channel, signed 32-bit integers + hipResViewFormatSignedInt2 = 0x11, ///< 2 channels, signed 32-bit integers + hipResViewFormatSignedInt4 = 0x12, ///< 4 channels, signed 32-bit integers + hipResViewFormatHalf1 = 0x13, ///< 1 channel, 16-bit floating point + hipResViewFormatHalf2 = 0x14, ///< 2 channels, 16-bit floating point + hipResViewFormatHalf4 = 0x15, ///< 4 channels, 16-bit floating point + hipResViewFormatFloat1 = 0x16, ///< 1 channel, 32-bit floating point + hipResViewFormatFloat2 = 0x17, ///< 2 channels, 32-bit floating point + hipResViewFormatFloat4 = 0x18, ///< 4 channels, 32-bit floating point + hipResViewFormatUnsignedBlockCompressed1 = 0x19, ///< Block-compressed 1 + hipResViewFormatUnsignedBlockCompressed2 = 0x1a, ///< Block-compressed 2 + hipResViewFormatUnsignedBlockCompressed3 = 0x1b, ///< Block-compressed 3 + hipResViewFormatUnsignedBlockCompressed4 = 0x1c, ///< Block-compressed 4 unsigned + hipResViewFormatSignedBlockCompressed4 = 0x1d, ///< Block-compressed 4 signed + hipResViewFormatUnsignedBlockCompressed5 = 0x1e, ///< Block-compressed 5 unsigned + hipResViewFormatSignedBlockCompressed5 = 0x1f, ///< Block-compressed 5 signed + hipResViewFormatUnsignedBlockCompressed6H = 0x20, ///< Block-compressed 6 unsigned half-float + hipResViewFormatSignedBlockCompressed6H = 0x21, ///< Block-compressed 6 signed half-float + hipResViewFormatUnsignedBlockCompressed7 = 0x22 ///< Block-compressed 7 +} hipResourceViewFormat; +/** + * HIP texture resource view formats + */ +typedef enum HIPresourceViewFormat_enum { + HIP_RES_VIEW_FORMAT_NONE = 0x00, ///< No resource view format (use underlying resource format) + HIP_RES_VIEW_FORMAT_UINT_1X8 = 0x01, ///< 1 channel, unsigned 8-bit integers + HIP_RES_VIEW_FORMAT_UINT_2X8 = 0x02, ///< 2 channels, unsigned 8-bit integers + HIP_RES_VIEW_FORMAT_UINT_4X8 = 0x03, ///< 4 channels, unsigned 8-bit integers + HIP_RES_VIEW_FORMAT_SINT_1X8 = 0x04, ///< 1 channel, signed 8-bit integers + HIP_RES_VIEW_FORMAT_SINT_2X8 = 0x05, ///< 2 channels, signed 8-bit integers + HIP_RES_VIEW_FORMAT_SINT_4X8 = 0x06, ///< 4 channels, signed 8-bit integers + HIP_RES_VIEW_FORMAT_UINT_1X16 = 0x07, ///< 1 channel, unsigned 16-bit integers + HIP_RES_VIEW_FORMAT_UINT_2X16 = 0x08, ///< 2 channels, unsigned 16-bit integers + HIP_RES_VIEW_FORMAT_UINT_4X16 = 0x09, ///< 4 channels, unsigned 16-bit integers + HIP_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, ///< 1 channel, signed 16-bit integers + HIP_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, ///< 2 channels, signed 16-bit integers + HIP_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, ///< 4 channels, signed 16-bit integers + HIP_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, ///< 1 channel, unsigned 32-bit integers + HIP_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, ///< 2 channels, unsigned 32-bit integers + HIP_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, ///< 4 channels, unsigned 32-bit integers + HIP_RES_VIEW_FORMAT_SINT_1X32 = 0x10, ///< 1 channel, signed 32-bit integers + HIP_RES_VIEW_FORMAT_SINT_2X32 = 0x11, ///< 2 channels, signed 32-bit integers + HIP_RES_VIEW_FORMAT_SINT_4X32 = 0x12, ///< 4 channels, signed 32-bit integers + HIP_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, ///< 1 channel, 16-bit floating point + HIP_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, ///< 2 channels, 16-bit floating point + HIP_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, ///< 4 channels, 16-bit floating point + HIP_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16, ///< 1 channel, 32-bit floating point + HIP_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17, ///< 2 channels, 32-bit floating point + HIP_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18, ///< 4 channels, 32-bit floating point + HIP_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19, ///< Block-compressed 1 + HIP_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a, ///< Block-compressed 2 + HIP_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b, ///< Block-compressed 3 + HIP_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c, ///< Block-compressed 4 unsigned + HIP_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d, ///< Block-compressed 4 signed + HIP_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e, ///< Block-compressed 5 unsigned + HIP_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f, ///< Block-compressed 5 signed + HIP_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20, ///< Block-compressed 6 unsigned half-float + HIP_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21, ///< Block-compressed 6 signed half-float + HIP_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22 ///< Block-compressed 7 +} HIPresourceViewFormat; +/** + * HIP resource descriptor + */ +typedef struct hipResourceDesc { + enum hipResourceType resType; ///< Resource type + union { + struct { + hipArray_t array; ///< HIP array + } array; + struct { + hipMipmappedArray_t mipmap; ///< HIP mipmapped array + } mipmap; + struct { + void* devPtr; ///< Device pointer + struct hipChannelFormatDesc desc; ///< Channel format description + size_t sizeInBytes; ///< Size in bytes + } linear; + struct { + void* devPtr; ///< Device pointer + struct hipChannelFormatDesc desc; ///< Channel format description + size_t width; ///< Width of the array in elements + size_t height; ///< Height of the array in elements + size_t pitchInBytes; ///< Pitch between two rows in bytes + } pitch2D; + } res; +} hipResourceDesc; + +/** + * HIP resource view descriptor struct + */ +typedef struct HIP_RESOURCE_DESC_st { + HIPresourcetype resType; ///< Resource type + union { + struct { + hipArray_t hArray; ///< HIP array + } array; + struct { + hipMipmappedArray_t hMipmappedArray; ///< HIP mipmapped array + } mipmap; + struct { + hipDeviceptr_t devPtr; ///< Device pointer + hipArray_Format format; ///< Array format + unsigned int numChannels; ///< Channels per array element + size_t sizeInBytes; ///< Size in bytes + } linear; + struct { + hipDeviceptr_t devPtr; ///< Device pointer + hipArray_Format format; ///< Array format + unsigned int numChannels; ///< Channels per array element + size_t width; ///< Width of the array in elements + size_t height; ///< Height of the array in elements + size_t pitchInBytes; ///< Pitch between two rows in bytes + } pitch2D; + struct { + int reserved[32]; + } reserved; + } res; + unsigned int flags; ///< Flags (must be zero) +} HIP_RESOURCE_DESC; +/** + * HIP resource view descriptor + */ +struct hipResourceViewDesc { + enum hipResourceViewFormat format; ///< Resource view format + size_t width; ///< Width of the resource view + size_t height; ///< Height of the resource view + size_t depth; ///< Depth of the resource view + unsigned int firstMipmapLevel; ///< First defined mipmap level + unsigned int lastMipmapLevel; ///< Last defined mipmap level + unsigned int firstLayer; ///< First layer index + unsigned int lastLayer; ///< Last layer index +}; +/** + * Resource view descriptor + */ +typedef struct HIP_RESOURCE_VIEW_DESC_st { + HIPresourceViewFormat format; ///< Resource view format + size_t width; ///< Width of the resource view + size_t height; ///< Height of the resource view + size_t depth; ///< Depth of the resource view + unsigned int firstMipmapLevel; ///< First defined mipmap level + unsigned int lastMipmapLevel; ///< Last defined mipmap level + unsigned int firstLayer; ///< First layer index + unsigned int lastLayer; ///< Last layer index + unsigned int reserved[16]; +} HIP_RESOURCE_VIEW_DESC; +/** + * Memory copy types + */ +#if !defined(__HIPCC_RTC__) +typedef enum hipMemcpyKind { + hipMemcpyHostToHost = 0, ///< Host-to-Host Copy + hipMemcpyHostToDevice = 1, ///< Host-to-Device Copy + hipMemcpyDeviceToHost = 2, ///< Device-to-Host Copy + hipMemcpyDeviceToDevice = 3, ///< Device-to-Device Copy + hipMemcpyDefault = 4, ///< Runtime will automatically determine + ///< copy-kind based on virtual addresses. + hipMemcpyDeviceToDeviceNoCU = 1024 ///< Device-to-Device Copy without using compute units +} hipMemcpyKind; +/** + * HIP pithed pointer + */ +typedef struct hipPitchedPtr { + void* ptr; ///< Pointer to the allocated memory + size_t pitch; ///< Pitch in bytes + size_t xsize; ///< Logical size of the first dimension of allocation in elements + size_t ysize; ///< Logical size of the second dimension of allocation in elements +} hipPitchedPtr; +/** + * HIP extent + */ +typedef struct hipExtent { + size_t width; // Width in elements when referring to array memory, in bytes when referring to + // linear memory + size_t height; + size_t depth; +} hipExtent; +/** + * HIP position + */ +typedef struct hipPos { + size_t x; ///< X coordinate + size_t y; ///< Y coordinate + size_t z; ///< Z coordinate +} hipPos; +/** + * HIP 3D memory copy parameters + */ +typedef struct hipMemcpy3DParms { + hipArray_t srcArray; ///< Source array + struct hipPos srcPos; ///< Source position + struct hipPitchedPtr srcPtr; ///< Source pointer + hipArray_t dstArray; ///< Destination array + struct hipPos dstPos; ///< Destination position + struct hipPitchedPtr dstPtr; ///< Destination pointer + struct hipExtent extent; ///< Extent of 3D memory copy + enum hipMemcpyKind kind; ///< Kind of 3D memory copy +} hipMemcpy3DParms; +/** + * HIP 3D memory copy + */ +typedef struct HIP_MEMCPY3D { + size_t srcXInBytes; ///< Source X in bytes + size_t srcY; ///< Source Y + size_t srcZ; ///< Source Z + size_t srcLOD; ///< Source LOD + hipMemoryType srcMemoryType; ///< Source memory type + const void* srcHost; ///< Source host pointer + hipDeviceptr_t srcDevice; ///< Source device + hipArray_t srcArray; ///< Source array + size_t srcPitch; ///< Source pitch + size_t srcHeight; ///< Source height + size_t dstXInBytes; ///< Destination X in bytes + size_t dstY; ///< Destination Y + size_t dstZ; ///< Destination Z + size_t dstLOD; ///< Destination LOD + hipMemoryType dstMemoryType; ///< Destination memory type + void* dstHost; ///< Destination host pointer + hipDeviceptr_t dstDevice; ///< Destination device + hipArray_t dstArray; ///< Destination array + size_t dstPitch; ///< Destination pitch + size_t dstHeight; ///< Destination height + size_t WidthInBytes; ///< Width in bytes of 3D memory copy + size_t Height; ///< Height in bytes of 3D memory copy + size_t Depth; ///< Depth in bytes of 3D memory copy +} HIP_MEMCPY3D; +/** + * Specifies the type of location + */ +typedef enum hipMemLocationType { + hipMemLocationTypeInvalid = 0, + hipMemLocationTypeNone = 0, + hipMemLocationTypeDevice = 1, ///< Device location, thus it's HIP device ID + hipMemLocationTypeHost = 2, ///< Host location, id is ignored + hipMemLocationTypeHostNuma = 3, ///< Host NUMA node location, id is host NUMA node id + hipMemLocationTypeHostNumaCurrent = + 4 ///< Host NUMA node closest to current thread’s CPU, id is ignored +} hipMemLocationType; +/** + * Specifies a memory location. + * + * To specify a gpu, set type = @p hipMemLocationTypeDevice and set id = the gpu's device ID + */ +typedef struct hipMemLocation { + hipMemLocationType type; ///< Specifies the location type, which describes the meaning of id + int id; ///< Identifier for the provided location type @p hipMemLocationType +} hipMemLocation; + +/** + * Flags to specify for copies within a batch. Used with hipMemcpyBatchAsync + */ +typedef enum hipMemcpyFlags { + hipMemcpyFlagDefault = 0x0, ///< Default flag + hipMemcpyFlagPreferOverlapWithCompute = 0x1 ///< Tries to overlap copy with compute work. +} hipMemcpyFlags; + +/** + * Flags to specify order in which source pointer is accessed by Batch memcpy + */ +typedef enum hipMemcpySrcAccessOrder { + hipMemcpySrcAccessOrderInvalid = 0x0, ///< Default Invalid. + hipMemcpySrcAccessOrderStream = 0x1, ///< Access to source pointer must be in stream order. + hipMemcpySrcAccessOrderDuringApiCall = + 0x2, ///< Access to source pointer can be out of stream order and all accesses must be + ///< complete before API call returns. + hipMemcpySrcAccessOrderAny = + 0x3, ///< Access to the source pointer can be out of stream order and the accesses can happen + ///< even after the API call return. + hipMemcpySrcAccessOrderMax = 0x7FFFFFFF +} hipMemcpySrcAccessOrder; + +/** + * Attributes for copies within a batch. + */ +typedef struct hipMemcpyAttributes { + hipMemcpySrcAccessOrder + srcAccessOrder; ///< Source access ordering to be observed for copies with this attribute. + hipMemLocation srcLocHint; ///< Location hint for src operand. + hipMemLocation dstLocHint; ///< Location hint for destination operand. + unsigned int flags; ///< Additional Flags for copies. See hipMemcpyFlags. +} hipMemcpyAttributes; +/** + * Operand types for individual copies within a batch + */ +typedef enum hipMemcpy3DOperandType { + hipMemcpyOperandTypePointer = 0x1, ///< Mempcy operand is a valid pointer. + hipMemcpyOperandTypeArray = 0x2, ///< Memcpy operand is a valid hipArray. + hipMemcpyOperandTypeMax = 0x7FFFFFFF +} hipMemcpy3DOperandType; + +/** + * Struct representing offset into a hipArray_t in elements. + */ +typedef struct hipOffset3D { + size_t x; + size_t y; + size_t z; +} hipOffset3D; +/** + * Struct representing an operand for copy with hipMemcpy3DBatchAsync. + */ +typedef struct hipMemcpy3DOperand { + hipMemcpy3DOperandType type; + union { + struct { + void* ptr; + size_t rowLength; ///< Length of each row in elements. + size_t layerHeight; ///< Height of each layer in elements. + hipMemLocation locHint; ///< Location Hint for the operand. + } ptr; + struct { + hipArray_t array; ///< Array struct for hipMemcpyOperandTypeArray. + hipOffset3D offset; ///< Offset into array in elements. + } array; + } op; +} hipMemcpy3DOperand; + +/** + * HIP 3D Batch Op + */ +typedef struct hipMemcpy3DBatchOp { + hipMemcpy3DOperand src; + hipMemcpy3DOperand dst; + hipExtent extent; + hipMemcpySrcAccessOrder srcAccessOrder; + unsigned int flags; +} hipMemcpy3DBatchOp; + +typedef struct hipMemcpy3DPeerParms { + hipArray_t srcArray; ///< Source memory address + hipPos srcPos; ///< Source position offset + hipPitchedPtr srcPtr; ///< Pitched source memory address + int srcDevice; ///< Source device + hipArray_t dstArray; ///< Destination memory address + hipPos dstPos; ///< Destination position offset + hipPitchedPtr dstPtr; ///< Pitched destination memory address + int dstDevice; ///< Destination device + hipExtent extent; ///< Requested memory copy size +} hipMemcpy3DPeerParms; + +/** + * @brief Make hipPitchedPtr + * + * @param [in] d Pointer to the allocated memory + * @param [in] p Pitch in bytes + * @param [in] xsz Logical size of the first dimension of allocation in elements + * @param [in] ysz Logical size of the second dimension of allocation in elements + * + * @returns The created hipPitchedPtr + */ +static inline struct hipPitchedPtr make_hipPitchedPtr(void* d, size_t p, size_t xsz, size_t ysz) { + struct hipPitchedPtr s; + s.ptr = d; + s.pitch = p; + s.xsize = xsz; + s.ysize = ysz; + return s; +} +/** + * @brief Make hipPos struct + * + * @param [in] x X coordinate of the new hipPos + * @param [in] y Y coordinate of the new hipPos + * @param [in] z Z coordinate of the new hipPos + * + * @returns The created hipPos struct + */ +static inline struct hipPos make_hipPos(size_t x, size_t y, size_t z) { + struct hipPos p; + p.x = x; + p.y = y; + p.z = z; + return p; +} +/** + * @brief Make hipExtent struct + * + * @param [in] w Width of the new hipExtent + * @param [in] h Height of the new hipExtent + * @param [in] d Depth of the new hipExtent + * + * @returns The created hipExtent struct + */ +static inline struct hipExtent make_hipExtent(size_t w, size_t h, size_t d) { + struct hipExtent e; + e.width = w; + e.height = h; + e.depth = d; + return e; +} +typedef enum hipFunction_attribute { + HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, ///< The maximum number of threads per block. Depends + ///< on function and device. + HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, ///< The statically allocated shared memory size in bytes + ///< per block required by the function. + HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES, ///< The user-allocated constant memory by the function in + ///< bytes. + HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, ///< The local memory usage of each thread by this function + ///< in bytes. + HIP_FUNC_ATTRIBUTE_NUM_REGS, ///< The number of registers used by each thread of this function. + HIP_FUNC_ATTRIBUTE_PTX_VERSION, ///< PTX version + HIP_FUNC_ATTRIBUTE_BINARY_VERSION, ///< Binary version + HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA, ///< Cache mode + HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, ///< The maximum dynamic shared memory per + ///< block for this function in bytes. + HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT, ///< The shared memory carveout preference + ///< in percent of the maximum shared + ///< memory. + HIP_FUNC_ATTRIBUTE_MAX +} hipFunction_attribute; + +typedef enum hipPointer_attribute { + HIP_POINTER_ATTRIBUTE_CONTEXT = 1, ///< The context on which a pointer was allocated + ///< @warning This attribute is not supported in HIP + HIP_POINTER_ATTRIBUTE_MEMORY_TYPE, ///< memory type describing the location of a pointer + HIP_POINTER_ATTRIBUTE_DEVICE_POINTER, ///< address at which the pointer is allocated on the + ///< device + HIP_POINTER_ATTRIBUTE_HOST_POINTER, ///< address at which the pointer is allocated on the host + HIP_POINTER_ATTRIBUTE_P2P_TOKENS, ///< A pair of tokens for use with Linux kernel interface + ///< @warning This attribute is not supported in HIP + HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, ///< Synchronize every synchronous memory operation + ///< initiated on this region + HIP_POINTER_ATTRIBUTE_BUFFER_ID, ///< Unique ID for an allocated memory region + HIP_POINTER_ATTRIBUTE_IS_MANAGED, ///< Indicates if the pointer points to managed memory + HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL, ///< device ordinal of a device on which a pointer + ///< was allocated or registered + HIP_POINTER_ATTRIBUTE_IS_LEGACY_HIP_IPC_CAPABLE, ///< if this pointer maps to an allocation + ///< that is suitable for hipIpcGetMemHandle + ///< @warning This attribute is not supported in + ///< HIP + HIP_POINTER_ATTRIBUTE_RANGE_START_ADDR, ///< Starting address for this requested pointer + HIP_POINTER_ATTRIBUTE_RANGE_SIZE, ///< Size of the address range for this requested pointer + HIP_POINTER_ATTRIBUTE_MAPPED, ///< tells if this pointer is in a valid address range + ///< that is mapped to a backing allocation + HIP_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES, ///< Bitmask of allowed hipmemAllocationHandleType + ///< for this allocation @warning This attribute is + ///< not supported in HIP + HIP_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE, ///< returns if the memory referenced by + ///< this pointer can be used with the + ///< GPUDirect RDMA API + ///< @warning This attribute is not supported + ///< in HIP + HIP_POINTER_ATTRIBUTE_ACCESS_FLAGS, ///< Returns the access flags the device associated with + ///< for the corresponding memory referenced by the ptr + HIP_POINTER_ATTRIBUTE_MEMPOOL_HANDLE ///< Returns the mempool handle for the allocation if + ///< it was allocated from a mempool + ///< @warning This attribute is not supported in HIP +} hipPointer_attribute; + +// doxygen end DriverTypes +/** + * @} + */ + +#endif // !defined(__HIPCC_RTC__) +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif +#endif diff --git a/3rdparty/hip-headers/include/hip/hip_common.h b/3rdparty/hip-headers/include/hip/hip_common.h new file mode 100644 index 0000000000..4a7dcff6cb --- /dev/null +++ b/3rdparty/hip-headers/include/hip/hip_common.h @@ -0,0 +1,100 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_COMMON_H +#define HIP_INCLUDE_HIP_HIP_COMMON_H + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" +#endif +// Common code included at start of every hip file. +// Auto enable __HIP_PLATFORM_AMD__ if compiling on AMD platform +// Other compiler (GCC,ICC,etc) need to set one of these macros explicitly +#if defined(__clang__) && defined(__HIP__) +#ifndef __HIP_PLATFORM_AMD__ +#define __HIP_PLATFORM_AMD__ +#endif +#endif // defined(__clang__) && defined(__HIP__) + +// Auto enable __HIP_PLATFORM_NVIDIA__ if compiling with NVIDIA platform +#if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__) && !defined(__HIP__)) +#ifndef __HIP_PLATFORM_NVIDIA__ +#define __HIP_PLATFORM_NVIDIA__ +#endif + +#ifdef __CUDACC__ +#define __HIPCC__ +#endif + +#endif //__NVCC__ + +// Auto enable __HIP_DEVICE_COMPILE__ if compiled in HCC or NVCC device path +#if (defined(__HCC_ACCELERATOR__) && __HCC_ACCELERATOR__ != 0) || \ + (defined(__CUDA_ARCH__) && __CUDA_ARCH__ != 0) +#define __HIP_DEVICE_COMPILE__ 1 +#endif + +#ifdef __GNUC__ +#define HIP_PUBLIC_API __attribute__((visibility("default"))) +#define HIP_INTERNAL_EXPORTED_API __attribute__((visibility("default"))) +#else +#define HIP_PUBLIC_API +#define HIP_INTERNAL_EXPORTED_API +#endif + +#if __HIP_DEVICE_COMPILE__ == 0 +// 32-bit Atomics +#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (0) +#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (0) +#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (0) +#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (0) +#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0) + +// 64-bit Atomics +#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (0) +#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0) + +// Doubles +#define __HIP_ARCH_HAS_DOUBLES__ (0) + +// Warp cross-lane operations +#define __HIP_ARCH_HAS_WARP_VOTE__ (0) +#define __HIP_ARCH_HAS_WARP_BALLOT__ (0) +#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (0) +#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0) + +// Sync +#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0) +#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0) + +// Misc +#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0) +#define __HIP_ARCH_HAS_3DGRID__ (0) +#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0) +#endif + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + +#endif diff --git a/3rdparty/hip-headers/include/hip/hip_deprecated.h b/3rdparty/hip-headers/include/hip/hip_deprecated.h new file mode 100644 index 0000000000..91c58e28b5 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/hip_deprecated.h @@ -0,0 +1,119 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +// This file will add older hip functions used in the versioning system +// Find the deprecated functions and structs in hip_device.cpp + +// This struct is also kept in hip_device.cpp +typedef struct hipDeviceProp_tR0000 { + char name[256]; ///< Device name. + size_t totalGlobalMem; ///< Size of global memory region (in bytes). + size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes). + int regsPerBlock; ///< Registers per block. + int warpSize; ///< Warp size. + int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. + int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. + int maxGridSize[3]; ///< Max grid dimensions (XYZ). + int clockRate; ///< Max clock frequency of the multiProcessors in khz. + int memoryClockRate; ///< Max global memory clock frequency in khz. + int memoryBusWidth; ///< Global memory bus width in bits. + size_t totalConstMem; ///< Size of shared memory region (in bytes). + int major; ///< Major compute capability. On HCC, this is an approximation and features may + ///< differ from CUDA CC. See the arch feature flags for portable ways to query + ///< feature caps. + int minor; ///< Minor compute capability. On HCC, this is an approximation and features may + ///< differ from CUDA CC. See the arch feature flags for portable ways to query + ///< feature caps. + int multiProcessorCount; ///< Number of multi-processors. When the GPU works in Compute + ///< Unit (CU) mode, this value equals the number of CUs; + ///< when in Workgroup Processor (WGP) mode, this value equels + ///< half of CUs, because a single WGP contains two CUs. + int l2CacheSize; ///< L2 cache size. + int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. + int computeMode; ///< Compute mode. + int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" + ///< instructions. New for HIP. + hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. + int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. + int pciDomainID; ///< PCI Domain ID + int pciBusID; ///< PCI Bus ID. + int pciDeviceID; ///< PCI Device ID. + size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor. + int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. + int canMapHostMemory; ///< Check whether HIP can map host memory + int gcnArch; ///< DEPRECATED: use gcnArchName instead + char gcnArchName[256]; ///< AMD GCN Arch Name. + int integrated; ///< APU vs dGPU + int cooperativeLaunch; ///< HIP device supports cooperative launch + int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple + ///< devices + int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory + int maxTexture1D; ///< Maximum number of elements in 1D images + int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements + int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image + ///< elements + unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register + unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register + size_t memPitch; ///< Maximum pitch in bytes allowed by memory copies + size_t textureAlignment; ///< Alignment requirement for textures + size_t texturePitchAlignment; ///< Pitch alignment requirement for texture references bound to + ///< pitched memory + int kernelExecTimeoutEnabled; ///< Run time limit for kernels executed on the device + int ECCEnabled; ///< Device has ECC support enabled + int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0 + int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched functions + int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched grid dimensions + int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched block dimensions + int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched shared memories + int isLargeBar; ///< 1: if it is a large PCI bar device, else 0 + int asicRevision; ///< Revision of the GPU in this device + int managedMemory; ///< Device supports allocating managed memory on this system + int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device + ///< without migration + int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with + ///< the CPU + int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory + ///< without calling hipHostRegister on it + int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's + ///< page tables +} hipDeviceProp_tR0000; + + +#ifdef __cplusplus +extern "C" { +#endif + +hipError_t hipGetDevicePropertiesR0000(hipDeviceProp_tR0000* prop, int device); +hipError_t hipChooseDeviceR0000(int* device, const hipDeviceProp_tR0000* prop); + +#ifdef __cplusplus +} +#endif diff --git a/3rdparty/hip-headers/include/hip/hip_runtime.h b/3rdparty/hip-headers/include/hip/hip_runtime.h new file mode 100644 index 0000000000..7834d0e0d9 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/hip_runtime.h @@ -0,0 +1,70 @@ +/* +Copyright (c) 2015 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +//! HIP = Heterogeneous-compute Interface for Portability +//! +//! Define a extremely thin runtime layer that allows source code to be compiled unmodified +//! through either AMD CLANG or NVCC. Key features tend to be in the spirit +//! and terminology of CUDA, but with a portable path to other accelerators as well: +// +//! Both paths support rich C++ features including classes, templates, lambdas, etc. +//! Runtime API is C +//! Memory management is based on pure pointers and resembles malloc/free/copy. +// +//! hip_runtime.h : includes everything in hip_api.h, plus math builtins and kernel launch +//! macros. hip_runtime_api.h : Defines HIP API. This is a C header file and does not use any C++ +//! features. + +#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_H +#define HIP_INCLUDE_HIP_HIP_RUNTIME_H + +#if !defined(__HIPCC_RTC__) +// Some standard header files, these are included by hc.hpp and so want to make them avail on both +// paths to provide a consistent include env and avoid "missing symbol" errors that only appears +// on NVCC path: +#if __cplusplus +#include +#include +#else +#include +#include +#endif // __cplusplus +#endif // !defined(__HIPCC_RTC__) + +#include +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#if !defined(__HIPCC_RTC__) +#include +#include +#endif // !defined(__HIPCC_RTC__) +#include + +#endif diff --git a/3rdparty/hip-headers/include/hip/hip_runtime_api.h b/3rdparty/hip-headers/include/hip/hip_runtime_api.h new file mode 100644 index 0000000000..d91d9eeb5a --- /dev/null +++ b/3rdparty/hip-headers/include/hip/hip_runtime_api.h @@ -0,0 +1,10267 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + +* @file hip_runtime_api.h + * + * @brief Defines the API signatures for HIP runtime. + * This file can be compiled with a standard compiler. + */ + +#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_API_H +#define HIP_INCLUDE_HIP_HIP_RUNTIME_API_H + +#if __cplusplus +#include +#include +#include +#include +#else +#include +#include +#include +#endif + +#include +#include +#include + +enum { + HIP_SUCCESS = 0, + HIP_ERROR_INVALID_VALUE, + HIP_ERROR_NOT_INITIALIZED, + HIP_ERROR_LAUNCH_OUT_OF_RESOURCES +}; +// hack to get these to show up in Doxygen: +/** + * @defgroup GlobalDefs Global enum and defines + * @{ + * + */ +/** + * hipDeviceArch_t + * + */ +typedef struct { + // 32-bit Atomics + unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory. + unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory. + unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory. + unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory. + unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory. + + // 64-bit Atomics + unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory. + unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory. + + // Doubles + unsigned hasDoubles : 1; ///< Double-precision floating point. + + // Warp cross-lane operations + unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all). + unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot). + unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*). + unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps. + + // Sync + unsigned hasThreadFenceSystem : 1; ///< __threadfence_system. + unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or. + + // Misc + unsigned hasSurfaceFuncs : 1; ///< Surface functions. + unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D). + unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism. +} hipDeviceArch_t; + +typedef struct hipUUID_t { + char bytes[16]; +} hipUUID; + +//--- +// Common headers for both NVCC and HIP-Clang paths: + +#define hipGetDeviceProperties hipGetDevicePropertiesR0600 +#define hipDeviceProp_t hipDeviceProp_tR0600 +#define hipChooseDevice hipChooseDeviceR0600 + +/** + * hipDeviceProp + * + */ +typedef struct hipDeviceProp_t { + char name[256]; ///< Device name. + hipUUID uuid; ///< UUID of a device + char luid[8]; ///< 8-byte unique identifier. Only valid on windows + unsigned int luidDeviceNodeMask; ///< LUID node mask + size_t totalGlobalMem; ///< Size of global memory region (in bytes). + size_t sharedMemPerBlock; ///< Size of shared memory per block (in bytes). + int regsPerBlock; ///< Registers per block. + int warpSize; ///< Warp size. + size_t memPitch; ///< Maximum pitch in bytes allowed by memory copies + ///< pitched memory + int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. + int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. + int maxGridSize[3]; ///< Max grid dimensions (XYZ). + int clockRate; ///< Max clock frequency of the multiProcessors in khz. + size_t totalConstMem; ///< Size of shared constant memory region on the device + ///< (in bytes). + int major; ///< Major compute capability version. This indicates the core instruction set + ///< of the GPU architecture. For example, a value of 11 would correspond to + ///< Navi III (RDNA3). See the arch feature flags for portable ways to query + ///< feature caps. + int minor; ///< Minor compute capability version. This indicates a particular configuration, + ///< feature set, or variation within the group represented by the major compute + ///< capability version. For example, different models within the same major version + ///< might have varying levels of support for certain features or optimizations. + ///< See the arch feature flags for portable ways to query feature caps. + size_t textureAlignment; ///< Alignment requirement for textures + size_t texturePitchAlignment; ///< Pitch alignment requirement for texture references bound to + int deviceOverlap; ///< Deprecated. Use asyncEngineCount instead + int multiProcessorCount; ///< Number of multi-processors. When the GPU works in Compute + ///< Unit (CU) mode, this value equals the number of CUs; + ///< when in Workgroup Processor (WGP) mode, this value equels + ///< half of CUs, because a single WGP contains two CUs. + int kernelExecTimeoutEnabled; ///< Run time limit for kernels executed on the device + int integrated; ///< APU vs dGPU + int canMapHostMemory; ///< Check whether HIP can map host memory + int computeMode; ///< Compute mode. + int maxTexture1D; ///< Maximum number of elements in 1D images + int maxTexture1DMipmap; ///< Maximum 1D mipmap texture size + int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory + int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements + int maxTexture2DMipmap[2]; ///< Maximum number of elements in 2D array mipmap of images + int maxTexture2DLinear[3]; ///< Maximum 2D tex dimensions if tex are bound to pitched memory + int maxTexture2DGather[2]; ///< Maximum 2D tex dimensions if gather has to be performed + int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image + ///< elements + int maxTexture3DAlt[3]; ///< Maximum alternate 3D texture dims + int maxTextureCubemap; ///< Maximum cubemap texture dims + int maxTexture1DLayered[2]; ///< Maximum number of elements in 1D array images + int maxTexture2DLayered[3]; ///< Maximum number of elements in 2D array images + int maxTextureCubemapLayered[2]; ///< Maximum cubemaps layered texture dims + int maxSurface1D; ///< Maximum 1D surface size + int maxSurface2D[2]; ///< Maximum 2D surface size + int maxSurface3D[3]; ///< Maximum 3D surface size + int maxSurface1DLayered[2]; ///< Maximum 1D layered surface size + int maxSurface2DLayered[3]; ///< Maximum 2D layared surface size + int maxSurfaceCubemap; ///< Maximum cubemap surface size + int maxSurfaceCubemapLayered[2]; ///< Maximum cubemap layered surface size + size_t surfaceAlignment; ///< Alignment requirement for surface + int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. + int ECCEnabled; ///< Device has ECC support enabled + int pciBusID; ///< PCI Bus ID. + int pciDeviceID; ///< PCI Device ID + int pciDomainID; ///< PCI Domain ID + int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0 + int asyncEngineCount; ///< Number of async engines + int unifiedAddressing; ///< Does device and host share unified address space + int memoryClockRate; ///< Max global memory clock frequency in khz. + int memoryBusWidth; ///< Global memory bus width in bits. + int l2CacheSize; ///< L2 cache size. + int persistingL2CacheMaxSize; ///< Device's max L2 persisting lines in bytes + int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. + int streamPrioritiesSupported; ///< Device supports stream priority + int globalL1CacheSupported; ///< Indicates globals are cached in L1 + int localL1CacheSupported; ///< Locals are cahced in L1 + size_t sharedMemPerMultiprocessor; ///< Amount of shared memory available per multiprocessor. + int regsPerMultiprocessor; ///< registers available per multiprocessor + int managedMemory; ///< Device supports allocating managed memory on this system + int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. + int multiGpuBoardGroupID; ///< Unique identifier for a group of devices on same multiboard GPU + int hostNativeAtomicSupported; ///< Link between host and device supports native atomics + int singleToDoublePrecisionPerfRatio; ///< Deprecated. CUDA only. + int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory + ///< without calling hipHostRegister on it + int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with + ///< the CPU + int computePreemptionSupported; ///< Is compute preemption supported on the device + int canUseHostPointerForRegisteredMem; ///< Device can access host registered memory with same + ///< address as the host + int cooperativeLaunch; ///< HIP device supports cooperative launch + int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple + ///< devices + size_t sharedMemPerBlockOptin; ///< Per device m ax shared mem per block usable by special opt in + int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's + ///< page tables + int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device + ///< without migration + int maxBlocksPerMultiProcessor; ///< Max number of blocks on CU + int accessPolicyMaxWindowSize; ///< Max value of access policy window + size_t reservedSharedMemPerBlock; ///< Shared memory reserved by driver per block + int hostRegisterSupported; ///< Device supports hipHostRegister + int sparseHipArraySupported; ///< Indicates if device supports sparse hip arrays + int hostRegisterReadOnlySupported; ///< Device supports using the hipHostRegisterReadOnly flag + ///< with hipHostRegistger + int timelineSemaphoreInteropSupported; ///< Indicates external timeline semaphore support + int memoryPoolsSupported; ///< Indicates if device supports hipMallocAsync and hipMemPool APIs + int gpuDirectRDMASupported; ///< Indicates device support of RDMA APIs + unsigned int gpuDirectRDMAFlushWritesOptions; ///< Bitmask to be interpreted according to + ///< hipFlushGPUDirectRDMAWritesOptions + int gpuDirectRDMAWritesOrdering; ///< value of hipGPUDirectRDMAWritesOrdering + unsigned int + memoryPoolSupportedHandleTypes; ///< Bitmask of handle types support with mempool based IPC + int deferredMappingHipArraySupported; ///< Device supports deferred mapping HIP arrays and HIP + ///< mipmapped arrays + int ipcEventSupported; ///< Device supports IPC events + int clusterLaunch; ///< Device supports cluster launch + int unifiedFunctionPointers; ///< Indicates device supports unified function pointers + int reserved[63]; ///< CUDA Reserved. + + int hipReserved[32]; ///< Reserved for adding new entries for HIP/CUDA. + + /* HIP Only struct members */ + char gcnArchName[256]; ///< AMD GCN Arch Name. HIP Only. + size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per CU. HIP Only. + int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" + ///< instructions. New for HIP. + hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. + unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register + unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register + int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched functions + int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched grid dimensions + int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched block dimensions + int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched shared memories + int isLargeBar; ///< 1: if it is a large PCI bar device, else 0 + int asicRevision; ///< Revision of the GPU in this device +} hipDeviceProp_t; + +/** + * hipMemoryType (for pointer attributes) + * + * @note hipMemoryType enum values are combination of cudaMemoryType and cuMemoryType and AMD + * specific enum values. + * + */ +typedef enum hipMemoryType { + hipMemoryTypeUnregistered = 0, ///< Unregistered memory + hipMemoryTypeHost = 1, ///< Memory is physically located on host + hipMemoryTypeDevice = 2, ///< Memory is physically located on device. (see deviceId for + ///< specific device) + hipMemoryTypeManaged = 3, ///< Managed memory, automaticallly managed by the unified + ///< memory system + ///< place holder for new values. + hipMemoryTypeArray = 10, ///< Array memory, physically located on device. (see deviceId for + ///< specific device) + hipMemoryTypeUnified = 11 ///< unified address space + +} hipMemoryType; + +/** + * Pointer attributes + */ +typedef struct hipPointerAttribute_t { + enum hipMemoryType type; + int device; + void* devicePointer; + void* hostPointer; + int isManaged; + unsigned allocationFlags; /* flags specified when memory was allocated*/ + /* peers? */ +} hipPointerAttribute_t; + +// Ignoring error-code return values from hip APIs is discouraged. On C++17, +// we can make that yield a warning +#if __cplusplus >= 201703L +#define __HIP_NODISCARD [[nodiscard]] +#else +#define __HIP_NODISCARD +#endif + +/** + * HIP error type + * + */ +// Developer note - when updating these, update the hipErrorName and hipErrorString functions in +// NVCC and HIP-Clang paths Also update the hipCUDAErrorTohipError function in NVCC path. + +typedef enum __HIP_NODISCARD hipError_t { + hipSuccess = 0, ///< Successful completion. + hipErrorInvalidValue = 1, ///< One or more of the parameters passed to the API call is NULL + ///< or not in an acceptable range. + hipErrorOutOfMemory = 2, ///< out of memory range. + // Deprecated + hipErrorMemoryAllocation = 2, ///< Memory allocation error. + hipErrorNotInitialized = 3, ///< Invalid not initialized + // Deprecated + hipErrorInitializationError = 3, + hipErrorDeinitialized = 4, ///< Deinitialized + hipErrorProfilerDisabled = 5, + hipErrorProfilerNotInitialized = 6, + hipErrorProfilerAlreadyStarted = 7, + hipErrorProfilerAlreadyStopped = 8, + hipErrorInvalidConfiguration = 9, ///< Invalide configuration + hipErrorInvalidPitchValue = 12, ///< Invalid pitch value + hipErrorInvalidSymbol = 13, ///< Invalid symbol + hipErrorInvalidDevicePointer = 17, ///< Invalid Device Pointer + hipErrorInvalidMemcpyDirection = 21, ///< Invalid memory copy direction + hipErrorInsufficientDriver = 35, + hipErrorMissingConfiguration = 52, + hipErrorPriorLaunchFailure = 53, + hipErrorInvalidDeviceFunction = 98, ///< Invalid device function + hipErrorNoDevice = 100, ///< Call to hipGetDeviceCount returned 0 devices + hipErrorInvalidDevice = 101, ///< DeviceID must be in range from 0 to compute-devices. + hipErrorInvalidImage = 200, ///< Invalid image + hipErrorInvalidContext = 201, ///< Produced when input context is invalid. + hipErrorContextAlreadyCurrent = 202, + hipErrorMapFailed = 205, + // Deprecated + hipErrorMapBufferObjectFailed = 205, ///< Produced when the IPC memory attach failed from ROCr. + hipErrorUnmapFailed = 206, + hipErrorArrayIsMapped = 207, + hipErrorAlreadyMapped = 208, + hipErrorNoBinaryForGpu = 209, + hipErrorAlreadyAcquired = 210, + hipErrorNotMapped = 211, + hipErrorNotMappedAsArray = 212, + hipErrorNotMappedAsPointer = 213, + hipErrorECCNotCorrectable = 214, + hipErrorUnsupportedLimit = 215, ///< Unsupported limit + hipErrorContextAlreadyInUse = 216, ///< The context is already in use + hipErrorPeerAccessUnsupported = 217, + hipErrorInvalidKernelFile = 218, ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX + hipErrorInvalidGraphicsContext = 219, + hipErrorInvalidSource = 300, ///< Invalid source. + hipErrorFileNotFound = 301, ///< the file is not found. + hipErrorSharedObjectSymbolNotFound = 302, + hipErrorSharedObjectInitFailed = 303, ///< Failed to initialize shared object. + hipErrorOperatingSystem = 304, ///< Not the correct operating system + hipErrorInvalidHandle = 400, ///< Invalide handle + // Deprecated + hipErrorInvalidResourceHandle = 400, ///< Resource handle (hipEvent_t or hipStream_t) invalid. + hipErrorIllegalState = 401, ///< Resource required is not in a valid state to perform operation. + hipErrorNotFound = 500, ///< Not found + hipErrorNotReady = 600, ///< Indicates that asynchronous operations enqueued earlier are not + ///< ready. This is not actually an error, but is used to distinguish + ///< from hipSuccess (which indicates completion). APIs that return + ///< this error include hipEventQuery and hipStreamQuery. + hipErrorIllegalAddress = 700, + hipErrorLaunchOutOfResources = 701, ///< Out of resources error. + hipErrorLaunchTimeOut = 702, ///< Timeout for the launch. + hipErrorPeerAccessAlreadyEnabled = 704, ///< Peer access was already enabled from the current + ///< device. + hipErrorPeerAccessNotEnabled = 705, ///< Peer access was never enabled from the current device. + hipErrorSetOnActiveProcess = 708, ///< The process is active. + hipErrorContextIsDestroyed = 709, ///< The context is already destroyed + hipErrorAssert = 710, ///< Produced when the kernel calls assert. + hipErrorHostMemoryAlreadyRegistered = 712, ///< Produced when trying to lock a page-locked + ///< memory. + hipErrorHostMemoryNotRegistered = 713, ///< Produced when trying to unlock a non-page-locked + ///< memory. + hipErrorLaunchFailure = 719, ///< An exception occurred on the device while executing a kernel. + hipErrorCooperativeLaunchTooLarge = 720, ///< This error indicates that the number of blocks + ///< launched per grid for a kernel that was launched + ///< via cooperative launch APIs exceeds the maximum + ///< number of allowed blocks for the current device. + hipErrorNotSupported = 801, ///< Produced when the hip API is not supported/implemented + hipErrorStreamCaptureUnsupported = 900, ///< The operation is not permitted when the stream + ///< is capturing. + hipErrorStreamCaptureInvalidated = 901, ///< The current capture sequence on the stream + ///< has been invalidated due to a previous error. + hipErrorStreamCaptureMerge = 902, ///< The operation would have resulted in a merge of + ///< two independent capture sequences. + hipErrorStreamCaptureUnmatched = 903, ///< The capture was not initiated in this stream. + hipErrorStreamCaptureUnjoined = 904, ///< The capture sequence contains a fork that was not + ///< joined to the primary stream. + hipErrorStreamCaptureIsolation = 905, ///< A dependency would have been created which crosses + ///< the capture sequence boundary. Only implicit + ///< in-stream ordering dependencies are allowed + ///< to cross the boundary + hipErrorStreamCaptureImplicit = 906, ///< The operation would have resulted in a disallowed + ///< implicit dependency on a current capture sequence + ///< from hipStreamLegacy. + hipErrorCapturedEvent = 907, ///< The operation is not permitted on an event which was last + ///< recorded in a capturing stream. + hipErrorStreamCaptureWrongThread = 908, ///< A stream capture sequence not initiated with + ///< the hipStreamCaptureModeRelaxed argument to + ///< hipStreamBeginCapture was passed to + ///< hipStreamEndCapture in a different thread. + hipErrorGraphExecUpdateFailure = 910, ///< This error indicates that the graph update + ///< not performed because it included changes which + ///< violated constraintsspecific to instantiated graph + ///< update. + hipErrorInvalidChannelDescriptor = 911, ///< Invalid channel descriptor. + hipErrorInvalidTexture = 912, ///< Invalid texture. + hipErrorUnknown = 999, ///< Unknown error. + // HSA Runtime Error Codes start here. + hipErrorRuntimeMemory = 1052, ///< HSA runtime memory call returned error. Typically not seen + ///< in production systems. + hipErrorRuntimeOther = 1053, ///< HSA runtime call other than memory returned error. Typically + ///< not seen in production systems. + hipErrorTbd ///< Marker that more error codes are needed. +} hipError_t; + +#undef __HIP_NODISCARD + +/** + * hipDeviceAttribute_t + * hipDeviceAttributeUnused number: 5 + */ +typedef enum hipDeviceAttribute_t { + hipDeviceAttributeCudaCompatibleBegin = 0, + + hipDeviceAttributeEccEnabled = + hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled. + hipDeviceAttributeAccessPolicyMaxWindowSize, ///< Cuda only. The maximum size of the window + ///< policy in bytes. + hipDeviceAttributeAsyncEngineCount, ///< Asynchronous engines number. + hipDeviceAttributeCanMapHostMemory, ///< Whether host memory can be mapped into device address + ///< space + hipDeviceAttributeCanUseHostPointerForRegisteredMem, ///< Device can access host registered + ///< memory at the same virtual address as + ///< the CPU + hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. + hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. + hipDeviceAttributeComputePreemptionSupported, ///< Device supports Compute Preemption. + hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels + ///< concurrently. + hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory + ///< concurrently with the CPU + hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch + hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple + ///< devices + hipDeviceAttributeDeviceOverlap, ///< Device can concurrently copy memory and execute a kernel. + ///< Deprecated. Use instead asyncEngineCount. + hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on + ///< the device without migration + hipDeviceAttributeGlobalL1CacheSupported, ///< Device supports caching globals in L1 + hipDeviceAttributeHostNativeAtomicSupported, ///< Link between the device and the host supports + ///< native atomic operations + hipDeviceAttributeIntegrated, ///< Device is integrated GPU + hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices. + hipDeviceAttributeKernelExecTimeout, ///< Run time limit for kernels executed on the device + hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 + ///< cache. + hipDeviceAttributeLocalL1CacheSupported, ///< caching locals in L1 is supported + hipDeviceAttributeLuid, ///< 8-byte locally unique identifier in 8 bytes. Undefined on TCC and + ///< non-Windows platforms + hipDeviceAttributeLuidDeviceNodeMask, ///< Luid device node mask. Undefined on TCC and + ///< non-Windows platforms + hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. + hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system + hipDeviceAttributeMaxBlocksPerMultiProcessor, ///< Max block size per multiprocessor + hipDeviceAttributeMaxBlockDimX, ///< Max block size in width. + hipDeviceAttributeMaxBlockDimY, ///< Max block size in height. + hipDeviceAttributeMaxBlockDimZ, ///< Max block size in depth. + hipDeviceAttributeMaxGridDimX, ///< Max grid size in width. + hipDeviceAttributeMaxGridDimY, ///< Max grid size in height. + hipDeviceAttributeMaxGridDimZ, ///< Max grid size in depth. + hipDeviceAttributeMaxSurface1D, ///< Maximum size of 1D surface. + hipDeviceAttributeMaxSurface1DLayered, ///< Cuda only. Maximum dimensions of 1D layered surface. + hipDeviceAttributeMaxSurface2D, ///< Maximum dimension (width, height) of 2D surface. + hipDeviceAttributeMaxSurface2DLayered, ///< Cuda only. Maximum dimensions of 2D layered surface. + hipDeviceAttributeMaxSurface3D, ///< Maximum dimension (width, height, depth) of 3D surface. + hipDeviceAttributeMaxSurfaceCubemap, ///< Cuda only. Maximum dimensions of Cubemap surface. + hipDeviceAttributeMaxSurfaceCubemapLayered, ///< Cuda only. Maximum dimension of Cubemap layered + ///< surface. + hipDeviceAttributeMaxTexture1DWidth, ///< Maximum size of 1D texture. + hipDeviceAttributeMaxTexture1DLayered, ///< Maximum dimensions of 1D layered texture. + hipDeviceAttributeMaxTexture1DLinear, ///< Maximum number of elements allocatable in a 1D linear + ///< texture. Use cudaDeviceGetTexture1DLinearMaxWidth() + ///< instead on Cuda. + hipDeviceAttributeMaxTexture1DMipmap, ///< Maximum size of 1D mipmapped texture. + hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D texture. + hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension hight of 2D texture. + hipDeviceAttributeMaxTexture2DGather, ///< Maximum dimensions of 2D texture if gather operations + ///< performed. + hipDeviceAttributeMaxTexture2DLayered, ///< Maximum dimensions of 2D layered texture. + hipDeviceAttributeMaxTexture2DLinear, ///< Maximum dimensions (width, height, pitch) of 2D + ///< textures bound to pitched memory. + hipDeviceAttributeMaxTexture2DMipmap, ///< Maximum dimensions of 2D mipmapped texture. + hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D texture. + hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimension height of 3D texture. + hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimension depth of 3D texture. + hipDeviceAttributeMaxTexture3DAlt, ///< Maximum dimensions of alternate 3D texture. + hipDeviceAttributeMaxTextureCubemap, ///< Maximum dimensions of Cubemap texture + hipDeviceAttributeMaxTextureCubemapLayered, ///< Maximum dimensions of Cubemap layered texture. + hipDeviceAttributeMaxThreadsDim, ///< Maximum dimension of a block + hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block. + hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor. + hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies + hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits. + hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz. + hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. + hipDeviceAttributeMultiGpuBoardGroupID, ///< Unique ID of device group on the same multi-GPU + ///< board + hipDeviceAttributeMultiprocessorCount, ///< Number of multi-processors. When the GPU works in Compute + ///< Unit (CU) mode, this value equals the number of CUs; + ///< when in Workgroup Processor (WGP) mode, this value equels + ///< half of CUs, because a single WGP contains two CUs. + hipDeviceAttributeUnused1, ///< Previously hipDeviceAttributeName + hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory + ///< without calling hipHostRegister on it + hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory + ///< via the host's page tables + hipDeviceAttributePciBusId, ///< PCI Bus ID. + hipDeviceAttributePciDeviceId, ///< PCI Device ID. Returns pcie slot id + hipDeviceAttributePciDomainId, ///< PCI Domain Id. + hipDeviceAttributePciDomainID = + hipDeviceAttributePciDomainId, ///< PCI Domain ID, for backward compatibility. + hipDeviceAttributePersistingL2CacheMaxSize, ///< Maximum l2 persisting lines capacity in bytes + hipDeviceAttributeMaxRegistersPerBlock, ///< 32-bit registers available to a thread block. This + ///< number is shared by all thread blocks simultaneously + ///< resident on a multiprocessor. + hipDeviceAttributeMaxRegistersPerMultiprocessor, ///< 32-bit registers available per block. + hipDeviceAttributeReservedSharedMemPerBlock, ///< Shared memory reserved by CUDA driver per + ///< block. + hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in + ///< bytes. + hipDeviceAttributeSharedMemPerBlockOptin, ///< Maximum shared memory per block usable by special + ///< opt in. + hipDeviceAttributeSharedMemPerMultiprocessor, ///< Shared memory available per multiprocessor. + hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single + ///< precision to double precision. + hipDeviceAttributeStreamPrioritiesSupported, ///< Whether to support stream priorities. + hipDeviceAttributeSurfaceAlignment, ///< Alignment requirement for surfaces + hipDeviceAttributeTccDriver, ///< Cuda only. Whether device is a Tesla device using TCC driver + hipDeviceAttributeTextureAlignment, ///< Alignment requirement for textures + hipDeviceAttributeTexturePitchAlignment, ///< Pitch alignment requirement for 2D texture + ///< references bound to pitched memory; + hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes. + hipDeviceAttributeTotalGlobalMem, ///< Global memory available on devicice. + hipDeviceAttributeUnifiedAddressing, ///< Cuda only. An unified address space shared with the + ///< host. + hipDeviceAttributeUnused2, ///< Previously hipDeviceAttributeUuid + hipDeviceAttributeWarpSize, ///< Warp size in threads. + hipDeviceAttributeMemoryPoolsSupported, ///< Device supports HIP Stream Ordered Memory Allocator + hipDeviceAttributeVirtualMemoryManagementSupported, ///< Device supports HIP virtual memory + ///< management + hipDeviceAttributeHostRegisterSupported, ///< Can device support host memory registration via + ///< hipHostRegister + hipDeviceAttributeMemoryPoolSupportedHandleTypes, ///< Supported handle mask for HIP Stream + ///< Ordered Memory Allocator + + hipDeviceAttributeCudaCompatibleEnd = 9999, + hipDeviceAttributeAmdSpecificBegin = 10000, + + hipDeviceAttributeClockInstructionRate = + hipDeviceAttributeAmdSpecificBegin, ///< Frequency in khz of the timer used by the + ///< device-side "clock*" + hipDeviceAttributeUnused3, ///< Previously hipDeviceAttributeArch + hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory PerMultiprocessor. + hipDeviceAttributeUnused4, ///< Previously hipDeviceAttributeGcnArch + hipDeviceAttributeUnused5, ///< Previously hipDeviceAttributeGcnArchName + hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register + hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register + hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on + ///< multiple devices with unmatched + ///< functions + hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on + ///< multiple devices with unmatched + ///< grid dimensions + hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on + ///< multiple devices with unmatched + ///< block dimensions + hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on + ///< multiple devices with unmatched + ///< shared memories + hipDeviceAttributeIsLargeBar, ///< Whether it is LargeBar + hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device + hipDeviceAttributeCanUseStreamWaitValue, ///< '1' if Device supports hipStreamWaitValue32() and + ///< hipStreamWaitValue64(), '0' otherwise. + hipDeviceAttributeImageSupport, ///< '1' if Device supports image, '0' otherwise. + hipDeviceAttributePhysicalMultiProcessorCount, ///< All available physical compute + ///< units for the device + hipDeviceAttributeFineGrainSupport, ///< '1' if Device supports fine grain, '0' otherwise + hipDeviceAttributeWallClockRate, ///< Constant frequency of wall clock in kilohertz. + hipDeviceAttributeNumberOfXccs, ///< The number of XCC(s) on the device + hipDeviceAttributeMaxAvailableVgprsPerThread, ///< Max number of available (directly or + ///< indirectly addressable) VGPRs per thread in + ///< DWORDs. + hipDeviceAttributePciChipId, ///< GPU Manufacturer device id + + hipDeviceAttributeAmdSpecificEnd = 19999, + hipDeviceAttributeVendorSpecificBegin = 20000, + // Extended attributes for vendors +} hipDeviceAttribute_t; + +typedef enum hipDriverProcAddressQueryResult { + HIP_GET_PROC_ADDRESS_SUCCESS = 0, + HIP_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND = 1, + HIP_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT = 2 +} hipDriverProcAddressQueryResult; + +enum hipComputeMode { + hipComputeModeDefault = 0, + hipComputeModeExclusive = 1, + hipComputeModeProhibited = 2, + hipComputeModeExclusiveProcess = 3 +}; + +enum hipFlushGPUDirectRDMAWritesOptions { + hipFlushGPUDirectRDMAWritesOptionHost = 1 << 0, + hipFlushGPUDirectRDMAWritesOptionMemOps = 1 << 1 +}; + +enum hipGPUDirectRDMAWritesOrdering { + hipGPUDirectRDMAWritesOrderingNone = 0, + hipGPUDirectRDMAWritesOrderingOwner = 100, + hipGPUDirectRDMAWritesOrderingAllDevices = 200 +}; + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + +#ifndef GENERIC_GRID_LAUNCH +#define GENERIC_GRID_LAUNCH 1 +#endif +#include +#include +#include +#include +#if defined(_MSC_VER) +#define HIP_DEPRECATED(msg) __declspec(deprecated(msg)) +#else // !defined(_MSC_VER) +#define HIP_DEPRECATED(msg) __attribute__((deprecated(msg))) +#endif // !defined(_MSC_VER) +#define HIP_DEPRECATED_MSG \ + "This API is marked as deprecated and might not be supported in future releases. For more " \ + "details please refer " \ + "https://github.com/ROCm/HIP/blob/develop/docs/reference/deprecated_api_list.md" +#define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01) +#define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02) +#define HIP_LAUNCH_PARAM_END ((void*)0x03) +#ifdef __cplusplus +#define __dparm(x) = x +#else +#define __dparm(x) +#endif +#ifdef __GNUC__ +#pragma GCC visibility push(default) +#endif +#ifdef __cplusplus +namespace hip_impl { +hipError_t hip_init(); +} // namespace hip_impl +#endif +// Structure definitions: +#ifdef __cplusplus +extern "C" { +#endif +//--- +// API-visible structures +typedef struct ihipCtx_t* hipCtx_t; +// Note many APIs also use integer deviceIds as an alternative to the device pointer: +typedef int hipDevice_t; +typedef enum hipDeviceP2PAttr { + hipDevP2PAttrPerformanceRank = 0, + hipDevP2PAttrAccessSupported, + hipDevP2PAttrNativeAtomicSupported, + hipDevP2PAttrHipArrayAccessSupported +} hipDeviceP2PAttr; +typedef enum hipDriverEntryPointQueryResult { + hipDriverEntryPointSuccess = 0, + hipDriverEntryPointSymbolNotFound = 1, + hipDriverEntryPointVersionNotSufficent = 2 +} hipDriverEntryPointQueryResult; +typedef struct ihipStream_t* hipStream_t; +#define hipIpcMemLazyEnablePeerAccess 0x01 +#define HIP_IPC_HANDLE_SIZE 64 +typedef struct hipIpcMemHandle_st { + char reserved[HIP_IPC_HANDLE_SIZE]; +} hipIpcMemHandle_t; +typedef struct hipIpcEventHandle_st { + char reserved[HIP_IPC_HANDLE_SIZE]; +} hipIpcEventHandle_t; +typedef struct ihipModule_t* hipModule_t; +typedef struct ihipModuleSymbol_t* hipFunction_t; +typedef struct ihipLinkState_t* hipLinkState_t; +typedef struct ihipLibrary_t* hipLibrary_t; +typedef struct ihipKernel_t* hipKernel_t; +/** + * HIP memory pool + */ +typedef struct ihipMemPoolHandle_t* hipMemPool_t; + +typedef struct hipFuncAttributes { + int binaryVersion; + int cacheModeCA; + size_t constSizeBytes; + size_t localSizeBytes; + int maxDynamicSharedSizeBytes; + int maxThreadsPerBlock; + int numRegs; + int preferredShmemCarveout; + int ptxVersion; + size_t sharedSizeBytes; +} hipFuncAttributes; +typedef struct ihipEvent_t* hipEvent_t; + +/** + * hipLimit + * + * @note In HIP device limit-related APIs, any input limit value other than those defined in the + * enum is treated as "UnsupportedLimit" by default. + */ +enum hipLimit_t { + hipLimitStackSize = 0x0, ///< Limit of stack size in bytes on the current device, per + ///< thread. The size is in units of 256 dwords, up to the + ///< limit of (128K - 16) + hipLimitPrintfFifoSize = 0x01, ///< Size limit in bytes of fifo used by printf call on the + ///< device. Currently not supported + hipLimitMallocHeapSize = 0x02, ///< Limit of heap size in bytes on the current device, should + ///< be less than the global memory size on the device + hipExtLimitScratchMin = 0x1000, ///< Minimum allowed value in bytes for scratch limit on this + ///< device. Valid only on Rocm device. This is read only. + hipExtLimitScratchMax = 0x1001, ///< Maximum allowed value in bytes for scratch limit on this + ///< device. Valid only on Rocm device. This is read only. + hipExtLimitScratchCurrent = 0x1002, ///< Current scratch limit threshold in bytes on this + ///< device. Must be between hipExtLimitScratchMin and + ///< hipExtLimitScratchMaxValid values. Valid only on Rocm + ///< device. This can be modified. + hipLimitRange ///< Supported limit range +}; + +/** + * Flags that can be used with hipStreamCreateWithFlags. + */ +// Flags that can be used with hipStreamCreateWithFlags. +/** Default stream creation flags. These are used with hipStreamCreate().*/ +#define hipStreamDefault 0x00 + +/** Stream does not implicitly synchronize with null stream.*/ +#define hipStreamNonBlocking 0x01 + +// Flags that can be used with hipEventCreateWithFlags. +/** Default flags.*/ +#define hipEventDefault 0x0 + +/** Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency.*/ +#define hipEventBlockingSync 0x1 + +/** Disable event's capability to record timing information. May improve performance.*/ +#define hipEventDisableTiming 0x2 + +/** Event can support IPC. hipEventDisableTiming also must be set.*/ +#define hipEventInterprocess 0x4 + +// Flags that can be used with hipEventRecordWithFlags. +/** Default flag. */ +#define hipEventRecordDefault 0x00 + +/** Event is captured in the graph as an external event node when performing stream capture. */ +#define hipEventRecordExternal 0x01 + +//Flags that can be used with hipStreamWaitEvent. +/** Default flag. */ +#define hipEventWaitDefault 0x00 + +/** Wait is captured in the graph as an external event node when performing stream capture. */ +#define hipEventWaitExternal 0x01 + +/** Disable performing a system scope sequentially consistent memory fence when the event + * transitions from recording to recorded. This can be used for events that are only being + * used to measure timing, and do not require the event inspection operations + * (see ::hipEventSynchronize, ::hipEventQuery, and ::hipEventElapsedTime) to synchronize-with + * the work on which the recorded event (see ::hipEventRecord) is waiting. + * On some AMD GPU devices this can improve the accuracy of timing measurements by avoiding the + * cost of cache writeback and invalidation, and the performance impact of those actions on the + * execution of following work. */ +#define hipEventDisableSystemFence 0x20000000 + +/** Use a device-scope release when recording this event. This flag is useful to obtain more + * precise timings of commands between events. The flag is a no-op on CUDA platforms.*/ +#define hipEventReleaseToDevice 0x40000000 + +/** Use a system-scope release when recording this event. This flag is useful to make + * non-coherent host memory visible to the host. The flag is a no-op on CUDA platforms.*/ +#define hipEventReleaseToSystem 0x80000000 + +// Flags that can be used with hipGetDriverEntryPoint. +/** Default flag. Equivalent to hipEnablePerThreadDefaultStream if compiled with + * -fgpu-default-stream=per-thread flag or HIP_API_PER_THREAD_DEFAULT_STREAM macro is + * defined.*/ +#define hipEnableDefault 0x0 + +/** Search for all symbols except the corresponding per-thread versions.*/ +#define hipEnableLegacyStream 0x1 + +/** Search for all symbols including the per-thread versions. If a per-thread version cannot be + * found, returns the legacy version.*/ +#define hipEnablePerThreadDefaultStream 0x2 + +// Flags that can be used with hipHostMalloc/hipHostAlloc. +/** Default pinned memory allocation on the host.*/ +#define hipHostAllocDefault 0x0 + +/** Default pinned memory allocation on the host. + * @note This is the same definition as #hipHostAllocPortable.*/ +#define hipHostMallocDefault 0x0 + +/** Memory is considered allocated by all contexts.*/ +#define hipHostAllocPortable 0x1 + +/** Memory is considered allocated by all contexts. + * @note This is the same definition as #hipHostAllocPortable.*/ +#define hipHostMallocPortable 0x1 + +/** Map the allocation into the address space for the current device. The device pointer + * can be obtained with #hipHostGetDevicePointer.*/ +#define hipHostAllocMapped 0x2 + +/** Map the allocation into the address space for the current device. The device pointer + * can be obtained with #hipHostGetDevicePointer. + * @note This is the same #hipHostMallocMapped.*/ +#define hipHostMallocMapped 0x2 + +/** Allocates the memory as write-combined. On some system configurations, write-combined allocation + * may be transferred faster across the PCI Express bus, however, could have low read efficiency by + * most CPUs. It's a good option for data transfer from host to device via mapped pinned memory. + * @note This flag is only for CUDA source compatibility but not functional within HIP runtime, + * because the allocation path is currently not supported on the AMD platform.*/ +#define hipHostAllocWriteCombined 0x4 + +/** Allocates the memory as write-combined. On some system configurations, write-combined allocation + * may be transferred faster across the PCI Express bus, however, could have low read efficiency by + * most CPUs. It's a good option for data transfer from host to device via mapped pinned memory. + * @note This flag is the same definition as #hipHostAllocWriteCombined which is equivalent to + * cudaHostAllocWriteCombined. It is only for CUDA source compatibility but not functional within + * HIP runtime, because the allocation path is currently not supported on the AMD platform.*/ +#define hipHostMallocWriteCombined 0x4 + +/** + * Host memory will be forcedly allocated on extended fine grained system memory + * pool which is with MTYPE_UC. + * @note This allocation flag is applicable on AMD devices, except for Navi4X, in Linux only. + */ +#define hipHostMallocUncached 0x10000000 +#define hipHostAllocUncached hipHostMallocUncached + +/** + * Host memory allocation will follow numa policy set by user. + * @note This numa allocation flag is applicable on Linux, under development on Windows. + */ +#define hipHostMallocNumaUser 0x20000000 + +/** Allocate coherent memory. Overrides HIP_HOST_COHERENT for specific allocation.*/ +#define hipHostMallocCoherent 0x40000000 + +/** Allocate non-coherent memory. Overrides HIP_HOST_COHERENT for specific allocation.*/ +#define hipHostMallocNonCoherent 0x80000000 + +/** Memory can be accessed by any stream on any device*/ +#define hipMemAttachGlobal 0x01 + +/** Memory cannot be accessed by any stream on any device.*/ +#define hipMemAttachHost 0x02 + +/** Memory can only be accessed by a single stream on the associated device.*/ +#define hipMemAttachSingle 0x04 + +#define hipDeviceMallocDefault 0x0 + +/** Memory is allocated in fine grained region of device.*/ +#define hipDeviceMallocFinegrained 0x1 + +/** Memory represents a HSA signal.*/ +#define hipMallocSignalMemory 0x2 + +/** Memory allocated will be uncached. */ +#define hipDeviceMallocUncached 0x3 + +/** Memory allocated will be contiguous. */ +#define hipDeviceMallocContiguous 0x4 + +// Flags that can be used with hipHostRegister. +/** Memory is Mapped and Portable.*/ +#define hipHostRegisterDefault 0x0 + +/** Memory is considered registered by all contexts.*/ +#define hipHostRegisterPortable 0x1 + +/** Map the allocation into the address space for the current device. The device pointer + * can be obtained with #hipHostGetDevicePointer.*/ +#define hipHostRegisterMapped 0x2 + +/** Not supported.*/ +#define hipHostRegisterIoMemory 0x4 + +/** This flag is ignored On AMD devices.*/ +#define hipHostRegisterReadOnly 0x08 + +/** Coarse Grained host memory lock.*/ +#define hipExtHostRegisterCoarseGrained 0x8 + +/** Map host memory onto extended fine grained access host memory pool when enabled. + * It is applicable on AMD devices, except for Navi4X, in Linux only. + */ +#define hipExtHostRegisterUncached 0x80000000 + +/** Automatically select between Spin and Yield.*/ +#define hipDeviceScheduleAuto 0x0 + +/** Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and may + * consume more power.*/ +#define hipDeviceScheduleSpin 0x1 + +/** Yield the CPU to the operating system when waiting. May increase latency, but lowers power + * and is friendlier to other threads in the system.*/ +#define hipDeviceScheduleYield 0x2 +#define hipDeviceScheduleBlockingSync 0x4 +#define hipDeviceScheduleMask 0x7 +#define hipDeviceMapHost 0x8 +#define hipDeviceLmemResizeToMax 0x10 +/** Default HIP array allocation flag.*/ +#define hipArrayDefault 0x00 +#define hipArrayLayered 0x01 +#define hipArraySurfaceLoadStore 0x02 +#define hipArrayCubemap 0x04 +#define hipArrayTextureGather 0x08 +#define hipOccupancyDefault 0x00 +#define hipOccupancyDisableCachingOverride 0x01 +#define hipCooperativeLaunchMultiDeviceNoPreSync 0x01 +#define hipCooperativeLaunchMultiDeviceNoPostSync 0x02 +#define hipCpuDeviceId ((int)-1) +#define hipInvalidDeviceId ((int)-2) +// Flags that can be used with hipExtLaunch Set of APIs. +/** AnyOrderLaunch of kernels.*/ +#define hipExtAnyOrderLaunch 0x01 +// Flags to be used with hipStreamWaitValue32 and hipStreamWaitValue64. +#define hipStreamWaitValueGte 0x0 +#define hipStreamWaitValueEq 0x1 +#define hipStreamWaitValueAnd 0x2 +#define hipStreamWaitValueNor 0x3 + +/** Operations for hipStreamBatchMemOp*/ +typedef enum hipStreamBatchMemOpType { + hipStreamMemOpWaitValue32 = 0x1, + hipStreamMemOpWriteValue32 = 0x2, + hipStreamMemOpWaitValue64 = 0x4, + hipStreamMemOpWriteValue64 = 0x5, + hipStreamMemOpBarrier = 0x6, ///< Currently not supported + hipStreamMemOpFlushRemoteWrites = 0x3 ///< Currently not supported +} hipStreamBatchMemOpType; + +/** + * @brief Union representing batch memory operation parameters for HIP streams. + * + * hipStreamBatchMemOpParams is used to specify the parameters for batch memory + * operations in a HIP stream. This union supports various operations including + * waiting for a specific value, writing a value, and different flags for wait conditions. + * + * @details + * The union includes fields for different types of operations defined in the + * enum hipStreamBatchMemOpType: + * - hipStreamMemOpWaitValue32: Wait for a 32-bit value. + * - hipStreamMemOpWriteValue32: Write a 32-bit value. + * - hipStreamMemOpWaitValue64: Wait for a 64-bit value. + * - hipStreamMemOpWriteValue64: Write a 64-bit value. + * + * Each operation type includes an address, the value to wait for or write, flags, and an + * optional alias that is not relevant on AMD GPUs. Flags can be used to specify different + * wait conditions such as equality, bitwise AND, greater than or equal, and bitwise NOR. + * + * Example usage: + * @code + * hipStreamBatchMemOpParams myArray[2]; + * myArray[0].operation = hipStreamMemOpWaitValue32; + * myArray[0].waitValue.address = waitAddr1; + * myArray[0].waitValue.value = 0x1; + * myArray[0].waitValue.flags = CU_STREAM_WAIT_VALUE_EQ; + * + * myArray[1].operation = hipStreamMemOpWriteValue32; + * myArray[1].writeValue.address = writeAddr1; + * myArray[1].writeValue.value = 0x1; + * myArray[1].writeValue.flags = 0x0; + * + * result = hipStreamBatchMemOp(stream, 2, myArray, 0); + * @endcode + */ + +typedef union hipStreamBatchMemOpParams_union { + hipStreamBatchMemOpType operation; + struct hipStreamMemOpWaitValueParams_t { + hipStreamBatchMemOpType operation; + hipDeviceptr_t address; + union { + uint32_t value; + uint64_t value64; + }; + unsigned int flags; + hipDeviceptr_t alias; ///< Not valid for AMD backend. Initial value is unimportant + } waitValue; + struct hipStreamMemOpWriteValueParams_t { + hipStreamBatchMemOpType operation; + hipDeviceptr_t address; + union { + uint32_t value; + uint64_t value64; + }; + unsigned int flags; + hipDeviceptr_t alias; ///< Not valid for AMD backend. Initial value is unimportant + } writeValue; + struct hipStreamMemOpFlushRemoteWritesParams_t { + hipStreamBatchMemOpType operation; + unsigned int flags; + } flushRemoteWrites; ///< Currently not supported on AMD + struct hipStreamMemOpMemoryBarrierParams_t { + hipStreamBatchMemOpType operation; + unsigned int flags; + } memoryBarrier; ///< Currently not supported on AMD + uint64_t pad[6]; +} hipStreamBatchMemOpParams; + +/** + * @brief Structure representing node parameters for batch memory operations in HIP graphs. + * + * hipBatchMemOpNodeParams is used to specify the parameters for batch memory + * operations in HIP graphs. This struct includes the context to use for the operations, the + * number of operations, and an array of hipStreamBatchMemOpParams that describe the operations. + * + * @details + * The structure includes the following fields: + * - ctx: The HIP context to use for the operations. + * - count: The number of operations in the paramArray. + * - paramArray: A pointer to an array of hipStreamBatchMemOpParams. + * - flags: Flags to control the node. + * + * Example usage: + * @code + * hipBatchMemOpNodeParams nodeParams; + * nodeParams.ctx = context; + * nodeParams.count = ARRAY_SIZE; + * nodeParams.paramArray = myArray; + * nodeParams.flags = 0; + * + * Pass nodeParams to a HIP graph APIs hipGraphAddBatchMemOpNode, hipGraphBatchMemOpNodeGetParams, + * hipGraphBatchMemOpNodeSetParams, hipGraphExecBatchMemOpNodeSetParams + * @endcode + */ + +typedef struct hipBatchMemOpNodeParams { + hipCtx_t ctx; + unsigned int count; + hipStreamBatchMemOpParams* paramArray; + unsigned int flags; +} hipBatchMemOpNodeParams; + +// Stream per thread +/** Implicit stream per application thread.*/ +#define hipStreamPerThread ((hipStream_t)2) + +#define hipStreamLegacy ((hipStream_t)1) + +// Indicates that the external memory object is a dedicated resource +#define hipExternalMemoryDedicated 0x1 +/** + * HIP Memory Advise values + * + * @note This memory advise enumeration is used on Linux, not Windows. + */ +typedef enum hipMemoryAdvise { + hipMemAdviseSetReadMostly = 1, ///< Data will mostly be read and only occassionally + ///< be written to + hipMemAdviseUnsetReadMostly = 2, ///< Undo the effect of hipMemAdviseSetReadMostly + hipMemAdviseSetPreferredLocation = 3, ///< Set the preferred location for the data as + ///< the specified device + hipMemAdviseUnsetPreferredLocation = 4, ///< Clear the preferred location for the data + hipMemAdviseSetAccessedBy = 5, ///< Data will be accessed by the specified device + ///< so prevent page faults as much as possible + hipMemAdviseUnsetAccessedBy = 6, ///< Let HIP to decide on the page faulting policy + ///< for the specified device + hipMemAdviseSetCoarseGrain = 100, ///< The default memory model is fine-grain. That allows + ///< coherent operations between host and device, while + ///< executing kernels. The coarse-grain can be used + ///< for data that only needs to be coherent at dispatch + ///< boundaries for better performance + hipMemAdviseUnsetCoarseGrain = 101 ///< Restores cache coherency policy back to fine-grain +} hipMemoryAdvise; +/** + * HIP Coherency Mode + */ +typedef enum hipMemRangeCoherencyMode { + hipMemRangeCoherencyModeFineGrain = 0, ///< Updates to memory with this attribute can be + ///< done coherently from all devices + hipMemRangeCoherencyModeCoarseGrain = 1, ///< Writes to memory with this attribute can be + ///< performed by a single device at a time + hipMemRangeCoherencyModeIndeterminate = 2 ///< Memory region queried contains subregions with + ///< both hipMemRangeCoherencyModeFineGrain and + ///< hipMemRangeCoherencyModeCoarseGrain attributes +} hipMemRangeCoherencyMode; +/** + * HIP range attributes + */ +typedef enum hipMemRangeAttribute { + hipMemRangeAttributeReadMostly = 1, ///< Whether the range will mostly be read and + ///< only occassionally be written to + hipMemRangeAttributePreferredLocation = 2, ///< The preferred location of the range + hipMemRangeAttributeAccessedBy = 3, ///< Memory range has hipMemAdviseSetAccessedBy + ///< set for the specified device + hipMemRangeAttributeLastPrefetchLocation = 4, ///< The last location to where the range was + ///< prefetched + hipMemRangeAttributeCoherencyMode = 100, ///< Returns coherency mode + ///< @ref hipMemRangeCoherencyMode for the range +} hipMemRangeAttribute; + +/** + * HIP memory pool attributes + */ +typedef enum hipMemPoolAttr { + /** + * (value type = int) + * Allow @p hipMemAllocAsync to use memory asynchronously freed + * in another streams as long as a stream ordering dependency + * of the allocating stream on the free action exists. + * hip events and null stream interactions can create the required + * stream ordered dependencies. (default enabled) + */ + hipMemPoolReuseFollowEventDependencies = 0x1, + /** + * (value type = int) + * Allow reuse of already completed frees when there is no dependency + * between the free and allocation. (default enabled) + */ + hipMemPoolReuseAllowOpportunistic = 0x2, + /** + * (value type = int) + * Allow @p hipMemAllocAsync to insert new stream dependencies + * in order to establish the stream ordering required to reuse + * a piece of memory released by cuFreeAsync (default enabled). + */ + hipMemPoolReuseAllowInternalDependencies = 0x3, + /** + * (value type = uint64_t) + * Amount of reserved memory in bytes to hold onto before trying + * to release memory back to the OS. When more than the release + * threshold bytes of memory are held by the memory pool, the + * allocator will try to release memory back to the OS on the + * next call to stream, event or context synchronize. (default 0) + */ + hipMemPoolAttrReleaseThreshold = 0x4, + /** + * (value type = uint64_t) + * Amount of backing memory currently allocated for the mempool. + */ + hipMemPoolAttrReservedMemCurrent = 0x5, + /** + * (value type = uint64_t) + * High watermark of backing memory allocated for the mempool since the + * last time it was reset. High watermark can only be reset to zero. + */ + hipMemPoolAttrReservedMemHigh = 0x6, + /** + * (value type = uint64_t) + * Amount of memory from the pool that is currently in use by the application. + */ + hipMemPoolAttrUsedMemCurrent = 0x7, + /** + * (value type = uint64_t) + * High watermark of the amount of memory from the pool that was in use by the application since + * the last time it was reset. High watermark can only be reset to zero. + */ + hipMemPoolAttrUsedMemHigh = 0x8 +} hipMemPoolAttr; + +/** + * Specifies the memory protection flags for mapping + * + */ +typedef enum hipMemAccessFlags { + hipMemAccessFlagsProtNone = 0, ///< Default, make the address range not accessible + hipMemAccessFlagsProtRead = 1, ///< Set the address range read accessible + hipMemAccessFlagsProtReadWrite = 3 ///< Set the address range read-write accessible +} hipMemAccessFlags; +/** + * Memory access descriptor structure is used to specify memory access + * permissions for a virtual memory region in Virtual Memory Management API. + * This structure changes read, and write permissions for + * specific memory regions. + */ +typedef struct hipMemAccessDesc { + hipMemLocation location; ///< Location on which the accessibility has to change + hipMemAccessFlags flags; ///< Accessibility flags to set +} hipMemAccessDesc; +/** + * Defines the allocation types + */ +typedef enum hipMemAllocationType { + hipMemAllocationTypeInvalid = 0x0, + /** This allocation type is 'pinned', i.e. cannot migrate from its current + * location while the application is actively using it + */ + hipMemAllocationTypePinned = 0x1, + hipMemAllocationTypeUncached = 0x40000000, + hipMemAllocationTypeMax = 0x7FFFFFFF +} hipMemAllocationType; +/** + * Flags for specifying handle types for memory pool allocations + * + */ +typedef enum hipMemAllocationHandleType { + hipMemHandleTypeNone = 0x0, ///< Does not allow any export mechanism + hipMemHandleTypePosixFileDescriptor = + 0x1, ///< Allows a file descriptor for exporting. Permitted only on POSIX systems + hipMemHandleTypeWin32 = 0x2, ///< Allows a Win32 NT handle for exporting. (HANDLE) + hipMemHandleTypeWin32Kmt = 0x4 ///< Allows a Win32 KMT handle for exporting. (D3DKMT_HANDLE) +} hipMemAllocationHandleType; +/** + * Specifies the properties of allocations made from the pool. + */ +typedef struct hipMemPoolProps { + hipMemAllocationType + allocType; ///< Allocation type. Currently must be specified as @p hipMemAllocationTypePinned + hipMemAllocationHandleType + handleTypes; ///< Handle types that will be supported by allocations from the pool + hipMemLocation location; ///< Location where allocations should reside + /** + * Windows-specific LPSECURITYATTRIBUTES required when @p hipMemHandleTypeWin32 is specified + */ + void* win32SecurityAttributes; + size_t maxSize; ///< Maximum pool size. When set to 0, defaults to a system dependent value + unsigned char reserved[56]; ///< Reserved for future use, must be 0 +} hipMemPoolProps; +/** + * Opaque data structure for exporting a pool allocation + */ +typedef struct hipMemPoolPtrExportData { + unsigned char reserved[64]; +} hipMemPoolPtrExportData; + +/** + * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. + */ +typedef enum hipFuncAttribute { + hipFuncAttributeMaxDynamicSharedMemorySize = + 8, ///< The maximum number of bytes requested for dynamically allocated shared memory + hipFuncAttributePreferredSharedMemoryCarveout = + 9, ///< Sets the percentage of total shared memory allocated as the shared memory carveout + hipFuncAttributeMax +} hipFuncAttribute; +/** + * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. + */ +typedef enum hipFuncCache_t { + hipFuncCachePreferNone, ///< no preference for shared memory or L1 (default) + hipFuncCachePreferShared, ///< prefer larger shared memory and smaller L1 cache + hipFuncCachePreferL1, ///< prefer larger L1 cache and smaller shared memory + hipFuncCachePreferEqual, ///< prefer equal size L1 cache and shared memory +} hipFuncCache_t; +/** + * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. + */ +typedef enum hipSharedMemConfig { + hipSharedMemBankSizeDefault, ///< The compiler selects a device-specific value for the banking. + hipSharedMemBankSizeFourByte, ///< Shared mem is banked at 4-bytes intervals and performs best + ///< when adjacent threads access data 4 bytes apart. + hipSharedMemBankSizeEightByte ///< Shared mem is banked at 8-byte intervals and performs best + ///< when adjacent threads access data 4 bytes apart. +} hipSharedMemConfig; +/** + * Struct for data in 3D + */ +typedef struct dim3 { + uint32_t x; ///< x + uint32_t y; ///< y + uint32_t z; ///< z +#ifdef __cplusplus + constexpr __host__ __device__ dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) + : x(_x), y(_y), z(_z) {}; +#endif +} dim3; +/** + * struct hipLaunchParams_t + */ +typedef struct hipLaunchParams_t { + void* func; ///< Device function symbol + dim3 gridDim; ///< Grid dimensions + dim3 blockDim; ///< Block dimensions + void** args; ///< Arguments + size_t sharedMem; ///< Shared memory + hipStream_t stream; ///< Stream identifier +} hipLaunchParams; +/** + * struct hipFunctionLaunchParams_t + */ +typedef struct hipFunctionLaunchParams_t { + hipFunction_t function; ///< Kernel to launch + unsigned int gridDimX; ///< Width(X) of grid in blocks + unsigned int gridDimY; ///< Height(Y) of grid in blocks + unsigned int gridDimZ; ///< Depth(Z) of grid in blocks + unsigned int blockDimX; ///< X dimension of each thread block + unsigned int blockDimY; ///< Y dimension of each thread block + unsigned int blockDimZ; ///< Z dimension of each thread block + unsigned int sharedMemBytes; ///< Shared memory + hipStream_t hStream; ///< Stream identifier + void** kernelParams; ///< Kernel parameters +} hipFunctionLaunchParams; +typedef enum hipExternalMemoryHandleType_enum { + hipExternalMemoryHandleTypeOpaqueFd = 1, + hipExternalMemoryHandleTypeOpaqueWin32 = 2, + hipExternalMemoryHandleTypeOpaqueWin32Kmt = 3, + hipExternalMemoryHandleTypeD3D12Heap = 4, + hipExternalMemoryHandleTypeD3D12Resource = 5, + hipExternalMemoryHandleTypeD3D11Resource = 6, + hipExternalMemoryHandleTypeD3D11ResourceKmt = 7, + hipExternalMemoryHandleTypeNvSciBuf = 8 +} hipExternalMemoryHandleType; +typedef struct hipExternalMemoryHandleDesc_st { + hipExternalMemoryHandleType type; + union { + int fd; + struct { + void* handle; + const void* name; + } win32; + const void* nvSciBufObject; + } handle; + unsigned long long size; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalMemoryHandleDesc; +typedef struct hipExternalMemoryBufferDesc_st { + unsigned long long offset; + unsigned long long size; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalMemoryBufferDesc; +typedef struct hipExternalMemoryMipmappedArrayDesc_st { + unsigned long long offset; + hipChannelFormatDesc formatDesc; + hipExtent extent; + unsigned int flags; + unsigned int numLevels; +} hipExternalMemoryMipmappedArrayDesc; +typedef void* hipExternalMemory_t; +typedef enum hipExternalSemaphoreHandleType_enum { + hipExternalSemaphoreHandleTypeOpaqueFd = 1, + hipExternalSemaphoreHandleTypeOpaqueWin32 = 2, + hipExternalSemaphoreHandleTypeOpaqueWin32Kmt = 3, + hipExternalSemaphoreHandleTypeD3D12Fence = 4, + hipExternalSemaphoreHandleTypeD3D11Fence = 5, + hipExternalSemaphoreHandleTypeNvSciSync = 6, + hipExternalSemaphoreHandleTypeKeyedMutex = 7, + hipExternalSemaphoreHandleTypeKeyedMutexKmt = 8, + hipExternalSemaphoreHandleTypeTimelineSemaphoreFd = 9, + hipExternalSemaphoreHandleTypeTimelineSemaphoreWin32 = 10 +} hipExternalSemaphoreHandleType; +typedef struct hipExternalSemaphoreHandleDesc_st { + hipExternalSemaphoreHandleType type; + union { + int fd; + struct { + void* handle; + const void* name; + } win32; + const void* NvSciSyncObj; + } handle; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalSemaphoreHandleDesc; +typedef void* hipExternalSemaphore_t; +typedef struct hipExternalSemaphoreSignalParams_st { + struct { + struct { + unsigned long long value; + } fence; + union { + void* fence; + unsigned long long reserved; + } nvSciSync; + struct { + unsigned long long key; + } keyedMutex; + unsigned int reserved[12]; + } params; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalSemaphoreSignalParams; +/** + * External semaphore wait parameters, compatible with driver type + */ +typedef struct hipExternalSemaphoreWaitParams_st { + struct { + struct { + unsigned long long value; + } fence; + union { + void* fence; + unsigned long long reserved; + } nvSciSync; + struct { + unsigned long long key; + unsigned int timeoutMs; + } keyedMutex; + unsigned int reserved[10]; + } params; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalSemaphoreWaitParams; + +#if __HIP_HAS_GET_PCH +/** + * Internal use only. This API may change in the future + * Pre-Compiled header for online compilation + */ +void __hipGetPCH(const char** pch, unsigned int* size); +#endif + +/** + * HIP Access falgs for Interop resources. + */ +typedef enum hipGraphicsRegisterFlags { + hipGraphicsRegisterFlagsNone = 0, + hipGraphicsRegisterFlagsReadOnly = 1, ///< HIP will not write to this registered resource + hipGraphicsRegisterFlagsWriteDiscard = + 2, ///< HIP will only write and will not read from this registered resource + hipGraphicsRegisterFlagsSurfaceLoadStore = 4, ///< HIP will bind this resource to a surface + hipGraphicsRegisterFlagsTextureGather = + 8 ///< HIP will perform texture gather operations on this registered resource +} hipGraphicsRegisterFlags; + +typedef struct _hipGraphicsResource hipGraphicsResource; + +typedef hipGraphicsResource* hipGraphicsResource_t; + +/** + * An opaque value that represents a hip graph + */ +typedef struct ihipGraph* hipGraph_t; +/** + * An opaque value that represents a hip graph node + */ +typedef struct hipGraphNode* hipGraphNode_t; +/** + * An opaque value that represents a hip graph Exec + */ +typedef struct hipGraphExec* hipGraphExec_t; + +/** + * An opaque value that represents a user obj + */ +typedef struct hipUserObject* hipUserObject_t; + + +/** + * hipGraphNodeType + */ +typedef enum hipGraphNodeType { + hipGraphNodeTypeKernel = 0, ///< GPU kernel node + hipGraphNodeTypeMemcpy = 1, ///< Memcpy node + hipGraphNodeTypeMemset = 2, ///< Memset node + hipGraphNodeTypeHost = 3, ///< Host (executable) node + hipGraphNodeTypeGraph = 4, ///< Node which executes an embedded graph + hipGraphNodeTypeEmpty = 5, ///< Empty (no-op) node + hipGraphNodeTypeWaitEvent = 6, ///< External event wait node + hipGraphNodeTypeEventRecord = 7, ///< External event record node + hipGraphNodeTypeExtSemaphoreSignal = 8, ///< External Semaphore signal node + hipGraphNodeTypeExtSemaphoreWait = 9, ///< External Semaphore wait node + hipGraphNodeTypeMemAlloc = 10, ///< Memory alloc node + hipGraphNodeTypeMemFree = 11, ///< Memory free node + hipGraphNodeTypeMemcpyFromSymbol = 12, ///< MemcpyFromSymbol node + hipGraphNodeTypeMemcpyToSymbol = 13, ///< MemcpyToSymbol node + hipGraphNodeTypeBatchMemOp = 14, ///< BatchMemOp node + hipGraphNodeTypeCount +} hipGraphNodeType; + +typedef void (*hipHostFn_t)(void* userData); +typedef struct hipHostNodeParams { + hipHostFn_t fn; + void* userData; +} hipHostNodeParams; +typedef struct hipKernelNodeParams { + dim3 blockDim; + void** extra; + void* func; + dim3 gridDim; + void** kernelParams; + unsigned int sharedMemBytes; +} hipKernelNodeParams; +typedef struct hipMemsetParams { + void* dst; + unsigned int elementSize; + size_t height; + size_t pitch; + unsigned int value; + size_t width; +} hipMemsetParams; + +typedef struct hipMemAllocNodeParams { + hipMemPoolProps poolProps; ///< Pool properties, which contain where + ///< the location should reside + const hipMemAccessDesc* accessDescs; ///< The number of memory access descriptors. + size_t accessDescCount; ///< The number of access descriptors. + ///< Must not be bigger than the number of GPUs + size_t bytesize; ///< The size of the requested allocation in bytes + void* dptr; ///< Returned device address of the allocation +} hipMemAllocNodeParams; + +/** + * Specifies performance hint with hipAccessPolicyWindow + */ +typedef enum hipAccessProperty { + hipAccessPropertyNormal = 0, ///< Normal cache persistence. + hipAccessPropertyStreaming = 1, ///< Streaming access is less likely to persist from cache + hipAccessPropertyPersisting = 2, ///< Persisting access is more likely to persist in cache +} hipAccessProperty; + +/*** + * Specifies access policy for a window, a contiguous extent of memory + * beginning at base_ptr and ending at base_ptr + num_bytes. + */ +typedef struct hipAccessPolicyWindow { + void* base_ptr; ///< Starting address of the access policy window + hipAccessProperty hitProp; ///< hipAccessProperty set for hit + float hitRatio; ///< hitRatio specifies percentage of lines assigned hitProp + hipAccessProperty missProp; ///< hipAccessProperty set for miss + size_t num_bytes; ///< Size in bytes of the window policy. +} hipAccessPolicyWindow; + +/** + * Memory Synchronization Domain map + */ +typedef struct hipLaunchMemSyncDomainMap { + unsigned char default_; /**< The default domain ID to use for designated kernels */ + unsigned char remote; /**< The remote domain ID to use for designated kernels */ +} hipLaunchMemSyncDomainMap; + +/** + * Memory Synchronization Domain + */ +typedef enum hipLaunchMemSyncDomain { + hipLaunchMemSyncDomainDefault = 0, /**< Launch kernels in the default domain */ + hipLaunchMemSyncDomainRemote = 1 /**< Launch kernels in the remote domain */ +} hipLaunchMemSyncDomain; + +/** + * Stream Synchronization Policy. + * Can be set with hipStreamSetAttribute + */ +typedef enum hipSynchronizationPolicy { + hipSyncPolicyAuto = 1, /**< Default Synchronization Policy. Host thread waits actively */ + hipSyncPolicySpin = 2, /**< Host thread spins in tight loop waiting for completition */ + hipSyncPolicyYield = 3, /**< Host spins but yields to other threads, reducing CPU usage */ + hipSyncPolicyBlockingSync = 4 /**< Host thread blocks (sleeps) until the stream completes */ +} hipSynchronizationPolicy; + +/** + * Launch Attribute ID + */ +typedef enum hipLaunchAttributeID { + hipLaunchAttributeAccessPolicyWindow = 1, ///< Valid for Streams, graph nodes, launches + hipLaunchAttributeCooperative = 2, ///< Valid for graph nodes, launches + hipLaunchAttributeSynchronizationPolicy = 3, ///< Valid for streams + hipLaunchAttributePriority = 8, ///< Valid for graph node, streams, launches + hipLaunchAttributeMemSyncDomainMap = 9, ///< Valid for streams, graph nodes, launches + hipLaunchAttributeMemSyncDomain = 10, ///< Valid for streams, graph nodes, launches + hipLaunchAttributeMax +} hipLaunchAttributeID; + + +/** + * Launch Attribute Value + */ +typedef union hipLaunchAttributeValue { + char pad[64]; ///< 64 byte padding + hipAccessPolicyWindow + accessPolicyWindow; ///< Value of launch attribute ::hipLaunchAttributeAccessPolicyWindow. + int cooperative; ///< Value of launch attribute ::hipLaunchAttributeCooperative. Indicates + ///< whether the kernel is cooperative. + int priority; ///< Value of launch attribute :: hipLaunchAttributePriority. Execution priority of + ///< kernel + hipSynchronizationPolicy + syncPolicy; ///< Value of launch attribute :: hipLaunchAttributeSynchronizationPolicy. Used + ///< to work queued up in stream + hipLaunchMemSyncDomainMap + memSyncDomainMap; ///< Value of launch attribute hipLaunchAttributeMemSyncDomainMap + hipLaunchMemSyncDomain + memSyncDomain; ///< Value of launch attribute hipLaunchAttributeMemSyncDomain +} hipLaunchAttributeValue; + +/** + * Stream attributes + */ +#define hipStreamAttrID hipLaunchAttributeID +#define hipStreamAttributeAccessPolicyWindow hipLaunchAttributeAccessPolicyWindow +#define hipStreamAttributeSynchronizationPolicy hipLaunchAttributeSynchronizationPolicy +#define hipStreamAttributeMemSyncDomainMap hipLaunchAttributeMemSyncDomainMap +#define hipStreamAttributeMemSyncDomain hipLaunchAttributeMemSyncDomain +#define hipStreamAttributePriority hipLaunchAttributePriority + +#define hipStreamAttrValue hipLaunchAttributeValue + +/** + * Kernel node attributeID + */ +#define hipKernelNodeAttrID hipLaunchAttributeID +#define hipKernelNodeAttributeAccessPolicyWindow hipLaunchAttributeAccessPolicyWindow +#define hipKernelNodeAttributeCooperative hipLaunchAttributeCooperative +#define hipKernelNodeAttributePriority hipLaunchAttributePriority + +/** + * Kernel node attribute value + */ +#define hipKernelNodeAttrValue hipLaunchAttributeValue + +/** + * hip Drv attributes + */ +#define hipDrvLaunchAttributeCooperative hipLaunchAttributeCooperative + +#define hipDrvLaunchAttributeID hipLaunchAttributeID +#define hipDrvLaunchAttributeValue hipLaunchAttributeValue +#define hipDrvLaunchAttribute hipLaunchAttribute + +/** + * Graph execution update result + */ +typedef enum hipGraphExecUpdateResult { + hipGraphExecUpdateSuccess = 0x0, ///< The update succeeded + hipGraphExecUpdateError = 0x1, ///< The update failed for an unexpected reason which is described + ///< in the return value of the function + hipGraphExecUpdateErrorTopologyChanged = 0x2, ///< The update failed because the topology changed + hipGraphExecUpdateErrorNodeTypeChanged = 0x3, ///< The update failed because a node type changed + hipGraphExecUpdateErrorFunctionChanged = + 0x4, ///< The update failed because the function of a kernel node changed + hipGraphExecUpdateErrorParametersChanged = + 0x5, ///< The update failed because the parameters changed in a way that is not supported + hipGraphExecUpdateErrorNotSupported = + 0x6, ///< The update failed because something about the node is not supported + hipGraphExecUpdateErrorUnsupportedFunctionChange = 0x7 +} hipGraphExecUpdateResult; + +typedef enum hipStreamCaptureMode { + hipStreamCaptureModeGlobal = 0, + hipStreamCaptureModeThreadLocal, + hipStreamCaptureModeRelaxed +} hipStreamCaptureMode; +typedef enum hipStreamCaptureStatus { + hipStreamCaptureStatusNone = 0, ///< Stream is not capturing + hipStreamCaptureStatusActive, ///< Stream is actively capturing + hipStreamCaptureStatusInvalidated ///< Stream is part of a capture sequence that has been + ///< invalidated, but not terminated +} hipStreamCaptureStatus; + +typedef enum hipStreamUpdateCaptureDependenciesFlags { + hipStreamAddCaptureDependencies = 0, ///< Add new nodes to the dependency set + hipStreamSetCaptureDependencies, ///< Replace the dependency set with the new nodes +} hipStreamUpdateCaptureDependenciesFlags; + +typedef enum hipGraphMemAttributeType { + hipGraphMemAttrUsedMemCurrent = + 0, ///< Amount of memory, in bytes, currently associated with graphs + hipGraphMemAttrUsedMemHigh, ///< High watermark of memory, in bytes, associated with graphs since + ///< the last time. + hipGraphMemAttrReservedMemCurrent, ///< Amount of memory, in bytes, currently allocated for + ///< graphs. + hipGraphMemAttrReservedMemHigh, ///< High watermark of memory, in bytes, currently allocated for + ///< graphs +} hipGraphMemAttributeType; +typedef enum hipUserObjectFlags { + hipUserObjectNoDestructorSync = 0x1, ///< Destructor execution is not synchronized. +} hipUserObjectFlags; + +typedef enum hipUserObjectRetainFlags { + hipGraphUserObjectMove = 0x1, ///< Add new reference or retain. +} hipUserObjectRetainFlags; + +typedef enum hipGraphInstantiateFlags { + hipGraphInstantiateFlagAutoFreeOnLaunch = + 1, ///< Automatically free memory allocated in a graph before relaunching. + hipGraphInstantiateFlagUpload = 2, ///< Automatically upload the graph after instantiation. + hipGraphInstantiateFlagDeviceLaunch = + 4, ///< Instantiate the graph to be launched from the device. + hipGraphInstantiateFlagUseNodePriority = + 8, ///< Run the graph using the per-node priority attributes rather than the priority of the + ///< stream it is launched into. +} hipGraphInstantiateFlags; + +enum hipGraphDebugDotFlags { + hipGraphDebugDotFlagsVerbose = + 1 << 0, /**< Output all debug data as if every debug flag is enabled */ + hipGraphDebugDotFlagsKernelNodeParams = 1 << 2, /**< Adds hipKernelNodeParams to output */ + hipGraphDebugDotFlagsMemcpyNodeParams = 1 << 3, /**< Adds hipMemcpy3DParms to output */ + hipGraphDebugDotFlagsMemsetNodeParams = 1 << 4, /**< Adds hipMemsetParams to output */ + hipGraphDebugDotFlagsHostNodeParams = 1 << 5, /**< Adds hipHostNodeParams to output */ + hipGraphDebugDotFlagsEventNodeParams = + 1 << 6, /**< Adds hipEvent_t handle from record and wait nodes to output */ + hipGraphDebugDotFlagsExtSemasSignalNodeParams = + 1 << 7, /**< Adds hipExternalSemaphoreSignalNodeParams values to output */ + hipGraphDebugDotFlagsExtSemasWaitNodeParams = + 1 << 8, /**< Adds hipExternalSemaphoreWaitNodeParams to output */ + hipGraphDebugDotFlagsKernelNodeAttributes = + 1 << 9, /**< Adds hipKernelNodeAttrID values to output */ + hipGraphDebugDotFlagsHandles = + 1 << 10 /**< Adds node handles and every kernel function handle to output */ +}; + +/** + * hipGraphInstantiateWithParams results + */ +typedef enum hipGraphInstantiateResult { + hipGraphInstantiateSuccess = 0, /**< Instantiation Success */ + hipGraphInstantiateError = 1, /**< Instantiation failed for an + unexpected reason which is described in the return value of the function */ + hipGraphInstantiateInvalidStructure = 2, /**< Instantiation failed due + to invalid structure, such as cycles */ + hipGraphInstantiateNodeOperationNotSupported = 3, /**< Instantiation for device launch failed + because the graph contained an unsupported operation */ + hipGraphInstantiateMultipleDevicesNotSupported = 4, /**< Instantiation for device launch failed + due to the nodes belonging to different contexts */ +} hipGraphInstantiateResult; + +/** + * Graph Instantiation parameters + */ +typedef struct hipGraphInstantiateParams { + hipGraphNode_t errNode_out; /**< The node which caused instantiation to fail, if any*/ + unsigned long long flags; /**< Instantiation flags */ + hipGraphInstantiateResult result_out; /**< Whether instantiation was successful. + If it failed, the reason why */ + hipStream_t uploadStream; /**< Upload stream */ +} hipGraphInstantiateParams; + + +/** + * Memory allocation properties + */ +typedef struct hipMemAllocationProp { + hipMemAllocationType type; ///< Memory allocation type + union { + hipMemAllocationHandleType requestedHandleType; ///< Requested handle type + hipMemAllocationHandleType requestedHandleTypes; ///< Requested handle types + }; + hipMemLocation location; ///< Memory location + void* win32HandleMetaData; ///< Metadata for Win32 handles + struct { + unsigned char compressionType; ///< Compression type + unsigned char gpuDirectRDMACapable; ///< RDMA capable + unsigned short usage; ///< Usage + } allocFlags; +} hipMemAllocationProp; + +/** + * External semaphore signal node parameters + */ +typedef struct hipExternalSemaphoreSignalNodeParams { + ///< Array containing external semaphore handles. + hipExternalSemaphore_t* extSemArray; + ///< Array containing parameters of external signal semaphore. + const hipExternalSemaphoreSignalParams* paramsArray; + ///< Total number of handles and parameters contained in extSemArray and paramsArray. + unsigned int numExtSems; +} hipExternalSemaphoreSignalNodeParams; + +/** + * External semaphore wait node parameters + */ +typedef struct hipExternalSemaphoreWaitNodeParams { + ///< Array containing external semaphore handles. + hipExternalSemaphore_t* extSemArray; + ///< Array containing parameters of external wait semaphore. + const hipExternalSemaphoreWaitParams* paramsArray; + ///< Total number of handles and parameters contained in extSemArray and paramsArray. + unsigned int numExtSems; +} hipExternalSemaphoreWaitNodeParams; + +/** + * Generic handle for memory allocation + */ +typedef struct ihipMemGenericAllocationHandle* hipMemGenericAllocationHandle_t; + +/** + * Flags for granularity + */ +typedef enum hipMemAllocationGranularity_flags { + hipMemAllocationGranularityMinimum = 0x0, ///< Minimum granularity + hipMemAllocationGranularityRecommended = 0x1 ///< Recommended granularity for performance +} hipMemAllocationGranularity_flags; + +/** + * Memory handle type + */ +typedef enum hipMemHandleType { + hipMemHandleTypeGeneric = 0x0 ///< Generic handle type +} hipMemHandleType; + +/** + * Memory operation types + */ +typedef enum hipMemOperationType { + hipMemOperationTypeMap = 0x1, ///< Map operation + hipMemOperationTypeUnmap = 0x2 ///< Unmap operation +} hipMemOperationType; + +/** + * Subresource types for sparse arrays + */ +typedef enum hipArraySparseSubresourceType { + hipArraySparseSubresourceTypeSparseLevel = 0x0, ///< Sparse level + hipArraySparseSubresourceTypeMiptail = 0x1 ///< Miptail +} hipArraySparseSubresourceType; + +/** + * Map info for arrays + */ +typedef struct hipArrayMapInfo { + hipResourceType resourceType; ///< Resource type + union { + hipMipmappedArray mipmap; + hipArray_t array; + } resource; + hipArraySparseSubresourceType subresourceType; ///< Sparse subresource type + union { + struct { + unsigned int + level; ///< For mipmapped arrays must be a valid mipmap level. For arrays must be zero + unsigned int + layer; ///< For layered arrays must be a valid layer index. Otherwise, must be zero + unsigned int offsetX; ///< X offset in elements + unsigned int offsetY; ///< Y offset in elements + unsigned int offsetZ; ///< Z offset in elements + unsigned int extentWidth; ///< Width in elements + unsigned int extentHeight; ///< Height in elements + unsigned int extentDepth; ///< Depth in elements + } sparseLevel; + struct { + unsigned int + layer; ///< For layered arrays must be a valid layer index. Otherwise, must be zero + unsigned long long offset; ///< Offset within mip tail + unsigned long long size; ///< Extent in bytes + } miptail; + } subresource; + hipMemOperationType memOperationType; ///< Memory operation type + hipMemHandleType memHandleType; ///< Memory handle type + union { + hipMemGenericAllocationHandle_t memHandle; + } memHandle; + unsigned long long offset; ///< Offset within the memory + unsigned int deviceBitMask; ///< Device ordinal bit mask + unsigned int flags; ///< flags for future use, must be zero now. + unsigned int reserved[2]; ///< Reserved for future use, must be zero now. +} hipArrayMapInfo; + +/** + * Memcpy node params + */ +typedef struct hipMemcpyNodeParams { + int flags; ///< Must be zero. + int reserved[3]; ///< Must be zero. + hipMemcpy3DParms copyParams; ///< Params set for the memory copy. +} hipMemcpyNodeParams; + +/** + * Child graph node params + */ +typedef struct hipChildGraphNodeParams { + hipGraph_t graph; ///< Either the child graph to clone into the node, or + ///< a handle to the graph possesed by the node used during query +} hipChildGraphNodeParams; + +/** + * Event record node params + */ +typedef struct hipEventWaitNodeParams { + hipEvent_t event; ///< Event to wait on +} hipEventWaitNodeParams; + +/** + * Event record node params + */ +typedef struct hipEventRecordNodeParams { + hipEvent_t event; ///< The event to be recorded when node executes +} hipEventRecordNodeParams; + +/** + * Memory free node params + */ +typedef struct hipMemFreeNodeParams { + void* dptr; ///< the pointer to be freed +} hipMemFreeNodeParams; + +/** + * Params for different graph nodes + */ +typedef struct hipGraphNodeParams { + hipGraphNodeType type; + int reserved0[3]; + union { + long long reserved1[29]; + hipKernelNodeParams kernel; + hipMemcpyNodeParams memcpy; + hipMemsetParams memset; + hipHostNodeParams host; + hipChildGraphNodeParams graph; + hipEventWaitNodeParams eventWait; + hipEventRecordNodeParams eventRecord; + hipExternalSemaphoreSignalNodeParams extSemSignal; + hipExternalSemaphoreWaitNodeParams extSemWait; + hipMemAllocNodeParams alloc; + hipMemFreeNodeParams free; + }; + + long long reserved2; +} hipGraphNodeParams; + +/** + * This port activates when the kernel has finished executing. + */ +#define hipGraphKernelNodePortDefault 0 + +/** + * This port activates when all blocks of the kernel have begun execution. + */ +#define hipGraphKernelNodePortLaunchCompletion 2 + +/** + * This port activates when all blocks of the kernel have performed + * hipTriggerProgrammaticLaunchCompletion() or have terminated. + * It must be used with edge type hipGraphDependencyTypeProgrammatic. + */ +#define hipGraphKernelNodePortProgrammatic 1 + +typedef enum hipGraphDependencyType { + hipGraphDependencyTypeDefault = 0, + hipGraphDependencyTypeProgrammatic = 1 +} hipGraphDependencyType; + +typedef struct hipGraphEdgeData { + unsigned char + from_port; ///< This indicates when the dependency is triggered from the upstream node on the + ///< edge. The meaning is specfic to the node type. A value of 0 in all cases + ///< means full completion of the upstream node, with memory visibility to the + ///< downstream node or portion thereof (indicated by to_port). Only kernel nodes + ///< define non-zero ports. A kernel node can use the following output port types: + ///< hipGraphKernelNodePortDefault, hipGraphKernelNodePortProgrammatic, or + ///< hipGraphKernelNodePortLaunchCompletion. + unsigned char reserved[5]; ///< These bytes are unused and must be zeroed + unsigned char + to_port; ///< Currently no node types define non-zero ports. This field must be set to zero. + unsigned char type; ///< This should be populated with a value from hipGraphDependencyType +} hipGraphEdgeData; + + +/** + * Used to specify custom attributes for launching kernels + */ +typedef struct hipLaunchAttribute_st { + hipLaunchAttributeID id; ///< Identifier of the launch attribute + char pad[8 - sizeof(hipLaunchAttributeID)]; ///< Padding to align the structure to 8 bytes + union { + hipLaunchAttributeValue val; ///< Value associated with the launch attribute + hipLaunchAttributeValue value; ///< Value associated with the launch attribute + }; +} hipLaunchAttribute; + +/** + * HIP extensible launch configuration + */ +typedef struct hipLaunchConfig_st { + dim3 gridDim; ///< Grid dimensions + dim3 blockDim; ///< Block dimensions + size_t dynamicSmemBytes; ///< Dynamic shared-memory size per thread block + hipStream_t stream; ///< Stream identifier + hipLaunchAttribute* attrs; ///< Attributes list + unsigned int numAttrs; ///< Number of attributes +} hipLaunchConfig_t; + +/** + * HIP driver extensible launch configuration + */ +typedef struct HIP_LAUNCH_CONFIG_st { + unsigned int gridDimX; ///< Grid width in blocks + unsigned int gridDimY; ///< Grid height in blocks + unsigned int gridDimZ; ///< Grid depth in blocks + unsigned int blockDimX; ///< Thread block dimension in X + unsigned int blockDimY; ///< Thread block dimension in Y + unsigned int blockDimZ; ///< Thread block dimension in Z + unsigned int sharedMemBytes; ///< Dynamic shared-memory size in bytes per block + hipStream_t hStream; ///< HIP stream identifier + hipLaunchAttribute* attrs; ///< Attribute list + unsigned int numAttrs; ///< Number of attributes +} HIP_LAUNCH_CONFIG; + +/** + * Requested handle type for address range. + */ +typedef enum hipMemRangeHandleType { + hipMemRangeHandleTypeDmaBufFd = 0x1, + hipMemRangeHandleTypeMax = 0x7fffffff +} hipMemRangeHandleType; + +/** + * Mem Range Flags used in hipMemGetHandleForAddressRange. + */ +typedef enum hipMemRangeFlags { + hipMemRangeFlagDmaBufMappingTypePcie = 0x1, + hipMemRangeFlagsMax = 0x7fffffff +} hipMemRangeFlags; + +// Doxygen end group GlobalDefs +/** + * @} + */ +/** + * @defgroup API HIP API + * @{ + * + * Defines the HIP API. See the individual sections for more information. + */ +/** + * @defgroup Driver Initialization and Version + * @{ + * This section describes the initializtion and version functions of HIP runtime API. + * + */ +/** + * @brief Explicitly initializes the HIP runtime. + * + * @param [in] flags Initialization flag, should be zero. + * + * Most HIP APIs implicitly initialize the HIP runtime. + * This API provides control over the timing of the initialization. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +// TODO-ctx - more description on error codes. +hipError_t hipInit(unsigned int flags); + +/** + * @brief Returns the approximate HIP driver version. + * + * @param [out] driverVersion driver version + * + * HIP driver version shows up in the format: + * HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning The HIP driver version does not correspond to an exact CUDA driver revision. + * On AMD platform, the API returns the HIP driver version, while on NVIDIA platform, it calls + * the corresponding CUDA runtime API and returns the CUDA driver version. + * There is no mapping/correlation between HIP driver version and CUDA driver version. + * + * @see hipRuntimeGetVersion + */ +hipError_t hipDriverGetVersion(int* driverVersion); +/** + * @brief Returns the approximate HIP Runtime version. + * + * @param [out] runtimeVersion HIP runtime version + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning The version definition of HIP runtime is different from CUDA. + * On AMD platform, the function returns HIP runtime version, + * while on NVIDIA platform, it returns CUDA runtime version. + * And there is no mapping/correlation between HIP version and CUDA version. + * + * @see hipDriverGetVersion + */ +hipError_t hipRuntimeGetVersion(int* runtimeVersion); +/** + * @brief Returns a handle to a compute device + * @param [out] device Handle of device + * @param [in] ordinal Device ordinal + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceGet(hipDevice_t* device, int ordinal); + +/** + * @brief Returns the compute capability of the device + * @param [out] major Major compute capability version number + * @param [out] minor Minor compute capability version number + * @param [in] device Device ordinal + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device); +/** + * @brief Returns an identifer string for the device. + * @param [out] name String of the device name + * @param [in] len Maximum length of string to store in device name + * @param [in] device Device ordinal + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device); +/** + * @brief Returns an UUID for the device.[BETA] + * @param [out] uuid UUID for the device + * @param [in] device device ordinal + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorNotInitialized, + * #hipErrorDeinitialized + */ +hipError_t hipDeviceGetUuid(hipUUID* uuid, hipDevice_t device); +/** + * @brief Returns a value for attribute of link between two devices + * @param [out] value Pointer of the value for the attrubute + * @param [in] attr enum of hipDeviceP2PAttr to query + * @param [in] srcDevice The source device of the link + * @param [in] dstDevice The destination device of the link + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr, int srcDevice, + int dstDevice); +/** + * @brief Returns a PCI Bus Id string for the device, overloaded to take int device ID. + * @param [out] pciBusId The string of PCI Bus Id format for the device + * @param [in] len Maximum length of string + * @param [in] device The device ordinal + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, int device); +/** + * @brief Returns a handle to a compute device. + * @param [out] device The handle of the device + * @param [in] pciBusId The string of PCI Bus Id for the device + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + */ +hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId); +/** + * @brief Returns the total amount of memory on the device. + * @param [out] bytes The size of memory in bytes, on the device + * @param [in] device The ordinal of the device + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device); +// doxygen end initialization +/** + * @} + */ +/** + * @defgroup Device Device Management + * @{ + * This section describes the device management functions of HIP runtime API. + */ +/** + * @brief Waits on all active streams on current device + * + * When this command is invoked, the host thread gets blocked until all the commands associated + * with streams associated with the device. HIP does not support multiple blocking modes (yet!). + * + * @returns #hipSuccess + * + * @see hipSetDevice, hipDeviceReset + */ +hipError_t hipDeviceSynchronize(void); +/** + * @brief The state of current device is discarded and updated to a fresh state. + * + * Calling this function deletes all streams created, memory allocated, kernels running, events + * created. Make sure that no other thread is using the device or streams, memory, kernels, events + * associated with the current device. + * + * @returns #hipSuccess + * + * @see hipDeviceSynchronize + */ +hipError_t hipDeviceReset(void); +/** + * @brief Set default device to be used for subsequent hip API calls from this thread. + * + * @param[in] deviceId Valid device in range 0...hipGetDeviceCount(). + * + * Sets @p device as the default device for the calling host thread. Valid device id's are 0... + * (hipGetDeviceCount()-1). + * + * Many HIP APIs implicitly use the "default device" : + * + * - Any device memory subsequently allocated from this host thread (using hipMalloc) will be + * allocated on device. + * - Any streams or events created from this host thread will be associated with device. + * - Any kernels launched from this host thread (using hipLaunchKernel) will be executed on device + * (unless a specific stream is specified, in which case the device associated with that stream will + * be used). + * + * This function may be called from any host thread. Multiple host threads may use the same device. + * This function does no synchronization with the previous or new device, and has very little + * runtime overhead. Applications can use hipSetDevice to quickly switch the default device before + * making a HIP runtime call which uses the default device. + * + * The default device is stored in thread-local-storage for each thread. + * Thread-pool implementations may inherit the default device of the previous thread. A good + * practice is to always call hipSetDevice at the start of HIP coding sequency to establish a known + * standard device. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorNoDevice + * + * @see #hipGetDevice, #hipGetDeviceCount + */ +hipError_t hipSetDevice(int deviceId); +/** + * @brief Set a list of devices that can be used. + * + * @param[in] device_arr List of devices to try + * @param[in] len Number of devices in specified list + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see #hipGetDevice, #hipGetDeviceCount. #hipSetDevice. #hipGetDeviceProperties. + * #hipSetDeviceFlags. #hipChooseDevice + * + * */ +hipError_t hipSetValidDevices(int* device_arr, int len); +/** + * @brief Return the default device id for the calling host thread. + * + * @param [out] deviceId *device is written with the default device + * + * HIP maintains an default device for each thread using thread-local-storage. + * This device is used implicitly for HIP runtime APIs called by this thread. + * hipGetDevice returns in * @p device the default device for the calling host thread. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see hipSetDevice, hipGetDevicesizeBytes + */ +hipError_t hipGetDevice(int* deviceId); +/** + * @brief Return number of compute-capable devices. + * + * @param [out] count Returns number of compute-capable devices. + * + * @returns #hipSuccess, #hipErrorNoDevice + * + * + * Returns in @p *count the number of devices that have ability to run compute commands. If there + * are no such devices, then @ref hipGetDeviceCount will return #hipErrorNoDevice. If 1 or more + * devices can be found, then hipGetDeviceCount returns #hipSuccess. + */ +hipError_t hipGetDeviceCount(int* count); +/** + * @brief Query for a specific device attribute. + * + * @param [out] pi pointer to value to return + * @param [in] attr attribute to query + * @param [in] deviceId which device to query for information + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + */ +hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int deviceId); +/** + * @brief Returns the default memory pool of the specified device + * + * @param [out] mem_pool Default memory pool to return + * @param [in] device Device index for query the default memory pool + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipDeviceGetDefaultMemPool, hipMallocAsync, hipMemPoolTrimTo, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + */ +hipError_t hipDeviceGetDefaultMemPool(hipMemPool_t* mem_pool, int device); +/** + * @brief Sets the current memory pool of a device + * + * The memory pool must be local to the specified device. + * @p hipMallocAsync allocates from the current mempool of the provided stream's device. + * By default, a device's current memory pool is its default memory pool. + * + * @note Use @p hipMallocFromPoolAsync for asynchronous memory allocations from a device + * different than the one the stream runs on. + * + * @param [in] device Device index for the update + * @param [in] mem_pool Memory pool for update as the current on the specified device + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice, #hipErrorNotSupported + * + * @see hipDeviceGetDefaultMemPool, hipMallocAsync, hipMemPoolTrimTo, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + */ +hipError_t hipDeviceSetMemPool(int device, hipMemPool_t mem_pool); +/** + * @brief Gets the current memory pool for the specified device + * + * Returns the last pool provided to @p hipDeviceSetMemPool for this device + * or the device's default memory pool if @p hipDeviceSetMemPool has never been called. + * By default the current mempool is the default mempool for a device, + * otherwise the returned pool must have been set with @p hipDeviceSetMemPool. + * + * @param [out] mem_pool Current memory pool on the specified device + * @param [in] device Device index to query the current memory pool + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipDeviceGetDefaultMemPool, hipMallocAsync, hipMemPoolTrimTo, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + */ +hipError_t hipDeviceGetMemPool(hipMemPool_t* mem_pool, int device); +/** + * @brief Returns device properties. + * + * @param [out] prop written with device properties + * @param [in] deviceId which device to query for information + * + * @returns #hipSuccess, #hipErrorInvalidDevice + * @bug HIP-Clang always returns 0 for maxThreadsPerMultiProcessor + * @bug HIP-Clang always returns 0 for regsPerBlock + * @bug HIP-Clang always returns 0 for l2CacheSize + * + * Populates hipGetDeviceProperties with information for the specified device. + */ +hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId); +/** + * @brief Gets the maximum width for 1D linear textures on the specified device + * + * This function queries the maximum width, in elements, of 1D linear textures that can be allocated + * on the specified device. The maximum width depends on the texture element size and the hardware + * limitations of the device. + * + * @param [out] max_width Maximum width, in elements, of 1D linear textures that the device can + * support + * @param [in] desc Requested channel format + * @param [in] device Device index to query for maximum 1D texture width + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice + * + * @see hipDeviceGetAttribute, hipMalloc, hipTexRefSetAddressMode + */ +hipError_t hipDeviceGetTexture1DLinearMaxWidth(size_t* max_width, const hipChannelFormatDesc* desc, + int device); +/** + * @brief Set L1/Shared cache partition. + * + * @param [in] cacheConfig Cache configuration + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorNotSupported + * + * Note: AMD devices do not support reconfigurable cache. This API is not implemented + * on AMD platform. If the function is called, it will return hipErrorNotSupported. + * + */ +hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig); +/** + * @brief Get Cache configuration for a specific Device + * + * @param [out] cacheConfig Pointer of cache configuration + * + * @returns #hipSuccess, #hipErrorNotInitialized + * Note: AMD devices do not support reconfigurable cache. This hint is ignored + * on these architectures. + * + */ +hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* cacheConfig); +/** + * @brief Gets resource limits of current device + * + * The function queries the size of limit value, as required by the input enum value hipLimit_t, + * which can be either #hipLimitStackSize, or #hipLimitMallocHeapSize. Any other input as + * default, the function will return #hipErrorUnsupportedLimit. + * + * @param [out] pValue Returns the size of the limit in bytes + * @param [in] limit The limit to query + * + * @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue + * + */ +hipError_t hipDeviceGetLimit(size_t* pValue, enum hipLimit_t limit); +/** + * @brief Sets resource limits of current device. + * + * As the input enum limit, + * #hipLimitStackSize sets the limit value of the stack size on the current GPU device, per thread. + * The limit size can get via hipDeviceGetLimit. The size is in units of 256 dwords, up to the limit + * (128K - 16). + * + * #hipLimitMallocHeapSize sets the limit value of the heap used by the malloc()/free() + * calls. For limit size, use the #hipDeviceGetLimit API. + * + * Any other input as default, the funtion will return hipErrorUnsupportedLimit. + * + * @param [in] limit Enum of hipLimit_t to set + * @param [in] value The size of limit value in bytes + * + * @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue + * + */ +hipError_t hipDeviceSetLimit(enum hipLimit_t limit, size_t value); +/** + * @brief Returns bank width of shared memory for current device + * + * @param [out] pConfig The pointer of the bank width for shared memory + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig); +/** + * @brief Gets the flags set for current device + * + * @param [out] flags Pointer of the flags + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + */ +hipError_t hipGetDeviceFlags(unsigned int* flags); +/** + * @brief The bank width of shared memory on current device is set + * + * @param [in] config Configuration for the bank width of shared memory + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config); +/** + * @brief The current device behavior is changed according to the flags passed. + * + * @param [in] flags Flag to set on the current device + * + * The schedule flags impact how HIP waits for the completion of a command running on a device. + * + * #hipDeviceScheduleSpin : HIP runtime will actively spin in the thread which submitted + * the work until the command completes. This offers the lowest latency, but will consume a CPU + * core and may increase power. + * + * #hipDeviceScheduleYield : The HIP runtime will yield the CPU to system so that other + * tasks can use it. This may increase latency to detect the completion but will consume less + * power and is friendlier to other tasks in the system. + * + * #hipDeviceScheduleBlockingSync : On ROCm platform, this is a synonym for hipDeviceScheduleYield. + * + * #hipDeviceScheduleAuto : This is the default value if the input 'flags' is zero. + * Uses a heuristic to select between Spin and Yield modes. If the number of HIP contexts is + * greater than the number of logical processors in the system, uses Spin scheduling, otherwise + * uses Yield scheduling. + * + * #hipDeviceMapHost : Allows mapping host memory. On ROCm, this is always allowed and + * the flag is ignored. + * + * #hipDeviceLmemResizeToMax : This flag is silently ignored on ROCm. + * + * @returns #hipSuccess, #hipErrorNoDevice, #hipErrorInvalidDevice, #hipErrorSetOnActiveProcess + * + * + */ +hipError_t hipSetDeviceFlags(unsigned flags); +/** + * @brief Device which matches hipDeviceProp_t is returned + * + * @param [out] device Pointer of the device + * @param [in] prop Pointer of the properties + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop); +/** + * @brief Returns the link type and hop count between two devices + * + * @param [in] device1 Ordinal for device1 + * @param [in] device2 Ordinal for device2 + * @param [out] linktype Returns the link type (See hsa_amd_link_info_type_t) between the two + * devices + * @param [out] hopcount Returns the hop count between the two devices + * + * Queries and returns the HSA link type and the hop count between the two specified devices. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, + uint32_t* hopcount); +// TODO: implement IPC apis +/** + * @brief Gets an interprocess memory handle for an existing device memory + * allocation + * + * Takes a pointer to the base of an existing device memory allocation created + * with hipMalloc and exports it for use in another process. This is a + * lightweight operation and may be called multiple times on an allocation + * without adverse effects. + * + * If a region of memory is freed with hipFree and a subsequent call + * to hipMalloc returns memory with the same device address, + * hipIpcGetMemHandle will return a unique handle for the + * new memory. + * + * @param handle - Pointer to user allocated hipIpcMemHandle to return + * the handle in. + * @param devPtr - Base pointer to previously allocated device memory + * + * @returns #hipSuccess, #hipErrorInvalidHandle, #hipErrorOutOfMemory, #hipErrorMapFailed + * + * @note This IPC memory related feature API on Windows may behave differently from Linux. + * + */ +hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr); +/** + * @brief Opens an interprocess memory handle exported from another process + * and returns a device pointer usable in the local process. + * + * Maps memory exported from another process with hipIpcGetMemHandle into + * the current device address space. For contexts on different devices + * hipIpcOpenMemHandle can attempt to enable peer access between the + * devices as if the user called hipDeviceEnablePeerAccess. This behavior is + * controlled by the hipIpcMemLazyEnablePeerAccess flag. + * hipDeviceCanAccessPeer can determine if a mapping is possible. + * + * Contexts that may open hipIpcMemHandles are restricted in the following way. + * hipIpcMemHandles from each device in a given process may only be opened + * by one context per device per other process. + * + * Memory returned from hipIpcOpenMemHandle must be freed with + * hipIpcCloseMemHandle. + * + * Calling hipFree on an exported memory region before calling + * hipIpcCloseMemHandle in the importing context will result in undefined + * behavior. + * + * @param devPtr - Returned device pointer + * @param handle - hipIpcMemHandle to open + * @param flags - Flags for this operation. Must be specified as hipIpcMemLazyEnablePeerAccess + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext, + * #hipErrorInvalidDevicePointer + * + * @note During multiple processes, using the same memory handle opened by the current context, + * there is no guarantee that the same device poiter will be returned in @p *devPtr. + * This is diffrent from CUDA. + * @note This IPC memory related feature API on Windows may behave differently from Linux. + * + */ +hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags); +/** + * @brief Close memory mapped with hipIpcOpenMemHandle + * + * Unmaps memory returnd by hipIpcOpenMemHandle. The original allocation + * in the exporting process as well as imported mappings in other processes + * will be unaffected. + * + * Any resources used to enable peer access will be freed if this is the + * last mapping using them. + * + * @param devPtr - Device pointer returned by hipIpcOpenMemHandle + * + * @returns #hipSuccess, #hipErrorMapFailed, #hipErrorInvalidHandle + * + * @note This IPC memory related feature API on Windows may behave differently from Linux. + * + */ +hipError_t hipIpcCloseMemHandle(void* devPtr); + +/** + * @brief Gets an opaque interprocess handle for an event. + * + * This opaque handle may be copied into other processes and opened with hipIpcOpenEventHandle. + * Then hipEventRecord, hipEventSynchronize, hipStreamWaitEvent and hipEventQuery may be used in + * either process. Operations on the imported event after the exported event has been freed with + * hipEventDestroy will result in undefined behavior. + * + * @param[out] handle Pointer to hipIpcEventHandle to return the opaque event handle + * @param[in] event Event allocated with hipEventInterprocess and hipEventDisableTiming flags + * + * @returns #hipSuccess, #hipErrorInvalidConfiguration, #hipErrorInvalidValue + * + * @note This IPC event related feature API is currently applicable on Linux. + * + */ +hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event); + +/** + * @brief Opens an interprocess event handles. + * + * Opens an interprocess event handle exported from another process with hipIpcGetEventHandle. The + * returned hipEvent_t behaves like a locally created event with the hipEventDisableTiming flag + * specified. This event need be freed with hipEventDestroy. Operations on the imported event after + * the exported event has been freed with hipEventDestroy will result in undefined behavior. If the + * function is called within the same process where handle is returned by hipIpcGetEventHandle, it + * will return hipErrorInvalidContext. + * + * @param[out] event Pointer to hipEvent_t to return the event + * @param[in] handle The opaque interprocess handle to open + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext + * + * @note This IPC event related feature API is currently applicable on Linux. + * + */ +hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle); + +// end doxygen Device +/** + * @} + */ +/** + * + * @defgroup Execution Execution Control + * @{ + * This section describes the execution control functions of HIP runtime API. + * + */ +/** + * @brief Set attribute for a specific function + * + * @param [in] func Pointer of the function + * @param [in] attr Attribute to set + * @param [in] value Value to set + * + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value); +/** + * @brief Set Cache configuration for a specific function + * + * @param [in] func Pointer of the function. + * @param [in] config Configuration to set. + * + * @returns #hipSuccess, #hipErrorNotInitialized + * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored + * on those architectures. + * + */ +hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t config); +/** + * @brief Set shared memory configuation for a specific function + * + * @param [in] func Pointer of the function + * @param [in] config Configuration + * + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config); +// doxygen end execution +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Error Error Handling + * @{ + * This section describes the error handling functions of HIP runtime API. + */ +/** + * @brief Return last error returned by any HIP runtime API call and resets the stored error code to + * #hipSuccess + * + * @returns return code from last HIP called from the active host thread + * + * Returns the last error that has been returned by any of the runtime calls in the same host + * thread, and then resets the saved error to #hipSuccess. + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipGetLastError(void); + +/** + * @brief Return last error returned by any HIP runtime API call and resets the stored error code to + * #hipSuccess + * + * @returns return code from last HIP called from the active host thread + * + * Returns the last error that has been returned by any of the runtime calls in the same host + * thread, and then resets the saved error to #hipSuccess. + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipExtGetLastError(void); + +/** + * @brief Return last error returned by any HIP runtime API call. + * + * @returns #hipSuccess + * + * Returns the last error that has been returned by any of the runtime calls in the same host + * thread. Unlike hipGetLastError, this function does not reset the saved error code. + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipPeekAtLastError(void); +/** + * @brief Return hip error as text string form. + * + * @param hip_error Error code to convert to name. + * @returns const char pointer to the NULL-terminated error name + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +const char* hipGetErrorName(hipError_t hip_error); +/** + * @brief Return handy text string message to explain the error which occurred + * + * @param hipError Error code to convert to string. + * @returns const char pointer to the NULL-terminated error string + * + * @see hipGetErrorName, hipGetLastError, hipPeakAtLastError, hipError_t + */ +const char* hipGetErrorString(hipError_t hipError); +/** + * @brief Return hip error as text string form. + * + * @param [in] hipError Error code to convert to string. + * @param [out] errorString char pointer to the NULL-terminated error string + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipGetErrorName, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipDrvGetErrorName(hipError_t hipError, const char** errorString); +/** + * @brief Return handy text string message to explain the error which occurred + * + * @param [in] hipError Error code to convert to string. + * @param [out] errorString char pointer to the NULL-terminated error string + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipGetErrorName, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipDrvGetErrorString(hipError_t hipError, const char** errorString); +// end doxygen Error +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Stream Stream Management + * @{ + * This section describes the stream management functions of HIP runtime API. + * The following Stream APIs are not (yet) supported in HIP: + * - hipStreamAttachMemAsync is a nop + * - hipDeviceGetStreamPriorityRange returns #hipSuccess + */ + +/** + * @brief Creates an asynchronous stream. + * + * @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the + * newly created stream. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Creates a new asynchronous stream with its associated current device. The @p stream returns an + * opaque handle that can be used to reference the newly created stream in subsequent hipStream* + * commands. The stream is allocated on the heap and will remain allocated even if the handle goes + * out-of-scope. To release the memory used by the stream, the application must call + * hipStreamDestroy. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamSynchronize, + * hipStreamWaitEvent, hipStreamDestroy + */ +hipError_t hipStreamCreate(hipStream_t* stream); +/** + * @brief Creates an asynchronous stream with flag. + * + * @param[in, out] stream Pointer to new stream + * @param[in] flags Parameters to control stream creation + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Creates a new asynchronous stream with its associated current device. @p stream returns an + * opaque handle that can be used to reference the newly created stream in subsequent hipStream* + * commands. The stream is allocated on the heap and will remain allocated even if the handle + * goes out-of-scope. To release the memory used by the stream, application must call + * hipStreamDestroy. + * + * The @p flags parameter controls behavior of the stream. The valid values are #hipStreamDefault + * and #hipStreamNonBlocking. + * + * @see hipStreamCreate, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamWaitEvent, + * hipStreamDestroy. + * + */ +hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags); +/** + * @brief Creates an asynchronous stream with the specified priority. + * + * @param[in, out] stream Pointer to new stream + * @param[in] flags Parameters to control stream creation + * @param[in] priority Priority of the stream. Lower numbers represent higher priorities. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Creates a new asynchronous stream with the specified priority, with its associated current + * device. + * @p stream returns an opaque handle that can be used to reference the newly created stream in + * subsequent hipStream* commands. The stream is allocated on the heap and will remain allocated + * even if the handle goes out-of-scope. To release the memory used by the stream, application must + * call hipStreamDestroy. + * + * The @p flags parameter controls behavior of the stream. The valid values are #hipStreamDefault + * and #hipStreamNonBlocking. + * + * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + * + */ +hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority); +/** + * @brief Returns numerical values that correspond to the least and greatest stream priority. + * + * @param[in, out] leastPriority Pointer in which a value corresponding to least priority + * is returned. + * @param[in, out] greatestPriority Pointer in which a value corresponding to greatest priority + * is returned. + * @returns #hipSuccess + * + * Returns in *leastPriority and *greatestPriority the numerical values that correspond to the + * least and greatest stream priority respectively. Stream priorities follow a convention where + * lower numbers imply greater priorities. The range of meaningful stream priorities is given by + * [*leastPriority,*greatestPriority]. If the user attempts to create a stream with a priority + * value that is outside the meaningful range as specified by this API, the priority is + * automatically clamped to within the valid range. + * + * @warning This API is under development on AMD GPUs and simply returns #hipSuccess. + */ +hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority); +/** + * @brief Destroys the specified stream. + * + * @param[in] stream Stream identifier + * @returns #hipSuccess #hipErrorInvalidHandle + * + * Destroys the specified stream. + * + * If commands are still executing on the specified stream, some may complete execution before the + * queue is deleted. + * + * The queue may be destroyed while some commands are still inflight, or may wait for all commands + * queued to the stream before destroying it. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamQuery, + * hipStreamWaitEvent, hipStreamSynchronize + */ +hipError_t hipStreamDestroy(hipStream_t stream); +/** + * @brief Returns #hipSuccess if all of the operations in the specified @p stream have completed, or + * #hipErrorNotReady if not. + * + * @param[in] stream Stream to query + * + * @returns #hipSuccess, #hipErrorNotReady, #hipErrorInvalidHandle + * + * This is thread-safe and returns a snapshot of the current state of the queue. However, if other + * host threads are sending work to the stream, the status may change immediately after the function + * is called. It is typically used for debug. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, + * hipStreamSynchronize, hipStreamDestroy + */ +hipError_t hipStreamQuery(hipStream_t stream); +/** + * @brief Waits for all commands in the stream to complete. + * + * @param[in] stream Stream identifier. + * + * @returns #hipSuccess, #hipErrorInvalidHandle + * + * This command is host-synchronous : the host will block until all operations on the specified + * stream with its associated device are completed. On multiple device systems, the @p stream is + * associated with its device, no need to call hipSetDevice before this API. + * + * This command follows standard null-stream semantics. Specifying the null stream will cause the + * command to wait for other streams on the same device to complete all pending operations. + * + * This command honors the #hipDeviceScheduleBlockingSync flag, which controls whether the wait is + * active or blocking. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, + * hipStreamDestroy + * + */ +hipError_t hipStreamSynchronize(hipStream_t stream); +/** + * @brief Makes the specified compute stream wait for the specified event + * + * @param[in] stream Stream to make wait + * @param[in] event Event to wait on + * @param[in] flags Parameters to control the operation + * + * @returns #hipSuccess, #hipErrorInvalidHandle, #hipErrorInvalidValue, + * #hipErrorStreamCaptureIsolation + * + * This function inserts a wait operation into the specified stream. + * All future work submitted to @p stream will wait until @p event reports completion before + * beginning execution. + * + * Flags include: + * hipEventWaitDefault: Default event creation flag. + * hipEventWaitExternal: Wait is captured in the graph as an external event node when + * performing stream capture + * + * This function only waits for commands in the current stream to complete. Notably, this function + * does not implicitly wait for commands in the default stream to complete, even if the specified + * stream is created with hipStreamNonBlocking = 0. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, + * hipStreamSynchronize, hipStreamDestroy + */ +hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags __dparm(0)); +/** + * @brief Returns flags associated with this stream. + * + * @param[in] stream Stream to be queried + * @param[in,out] flags Pointer to an unsigned integer in which the stream's flags are returned + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle. + * + * @see hipStreamCreateWithFlags + */ +hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags); +/** + * @brief Queries the Id of a stream. + * + * @param[in] stream Stream to be queried + * @param[in,out] flags Pointer to an unsigned long long in which the stream's id is returned + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle. + * + * @see hipStreamCreateWithFlags, hipStreamGetFlags, hipStreamCreateWithPriority, hipStreamGetPriority + */ +hipError_t hipStreamGetId(hipStream_t stream, unsigned long long* streamId); +/** + * @brief Queries the priority of a stream. + * + * @param[in] stream Stream to be queried + * @param[in,out] priority Pointer to an unsigned integer in which the stream's priority is + * returned + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle. + * + * @see hipStreamCreateWithPriority + */ +hipError_t hipStreamGetPriority(hipStream_t stream, int* priority); +/** + * @brief Gets the device associated with the stream. + * + * @param[in] stream Stream to be queried + * @param[out] device Device associated with the stream + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorContextIsDestroyed, #hipErrorInvalidHandle, + * #hipErrorNotInitialized, #hipErrorDeinitialized, #hipErrorInvalidContext + * + * @see hipStreamCreate, hipStreamDestroy, hipDeviceGetStreamPriorityRange + */ +hipError_t hipStreamGetDevice(hipStream_t stream, hipDevice_t* device); +/** + * @brief Creates an asynchronous stream with the specified CU mask. + * + * @param[in, out] stream Pointer to new stream + * @param[in] cuMaskSize Size of CU mask bit array passed in. + * @param[in] cuMask Bit-vector representing the CU mask. Each active bit represents using one CU. + * The first 32 bits represent the first 32 CUs, and so on. If its size is greater than physical + * CU number (i.e., multiProcessorCount member of hipDeviceProp_t), the extra elements are ignored. + * It is user's responsibility to make sure the input is meaningful. + * @returns #hipSuccess, #hipErrorInvalidHandle, #hipErrorInvalidValue + * + * Creates a new asynchronous stream with the specified CU mask. @p stream returns an opaque + * handle that can be used to reference the newly created stream in subsequent hipStream* commands. + * The stream is allocated on the heap and will remain allocated even if the handle goes + * out-of-scope. To release the memory used by the stream, application must call hipStreamDestroy. + * + * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + */ +hipError_t hipExtStreamCreateWithCUMask(hipStream_t* stream, uint32_t cuMaskSize, + const uint32_t* cuMask); +/** + * @brief Gets CU mask associated with an asynchronous stream + * + * @param[in] stream Stream to be queried + * @param[in] cuMaskSize Number of the block of memories (uint32_t *) allocated by user + * @param[out] cuMask Pointer to a pre-allocated block of memories (uint32_t *) in which + * the stream's CU mask is returned. The CU mask is returned in a chunck of 32 bits where + * each active bit represents one active CU. + * @returns #hipSuccess, #hipErrorInvalidHandle, #hipErrorInvalidValue + * + * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + */ +hipError_t hipExtStreamGetCUMask(hipStream_t stream, uint32_t cuMaskSize, uint32_t* cuMask); +/** + * Stream CallBack struct + */ +typedef void (*hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData); +/** + * @brief Adds a callback to be called on the host after all currently enqueued items in the stream + * have completed. For each hipStreamAddCallback call, a callback will be executed exactly once. + * The callback will block later work in the stream until it is finished. + * + * @param[in] stream - Stream to add callback to + * @param[in] callback - The function to call once preceding stream operations are complete + * @param[in] userData - User specified data to be passed to the callback function + * @param[in] flags - Reserved for future use, must be 0 + * @returns #hipSuccess, #hipErrorInvalidHandle, #hipErrorNotSupported + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamQuery, hipStreamSynchronize, + * hipStreamWaitEvent, hipStreamDestroy, hipStreamCreateWithPriority + * + */ +hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, + unsigned int flags); + +/** + *@brief Sets stream attribute. Updated attribute is applied to work submitted to the stream. + * @param[in] stream - Stream to set attributes to + * @param[in] attr - Attribute ID for the attribute to set + * @param[in] value - Attribute value for the attribute to set + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle + */ +hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr, + const hipStreamAttrValue* value); + +/** + *@brief queries stream attribute. + * @param[in] stream - Stream to geet attributes from + * @param[in] attr - Attribute ID for the attribute to query + * @param[out] value - Attribute value output + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle + */ +hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr, + hipStreamAttrValue* value_out); + +// end doxygen Stream +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup StreamM Stream Memory Operations + * @{ + * This section describes Stream Memory Wait and Write functions of HIP runtime API. + */ + +/** + * @brief Enqueues a wait command to the stream.[BETA] + * + * @param [in] stream - Stream identifier + * @param [in] ptr - Pointer to memory object allocated using #hipMallocSignalMemory flag + * @param [in] value - Value to be used in compare operation + * @param [in] flags - Defines the compare operation, supported values are #hipStreamWaitValueGte + * #hipStreamWaitValueEq, #hipStreamWaitValueAnd and #hipStreamWaitValueNor + * @param [in] mask - Mask to be applied on value at memory before it is compared with value, + * default value is set to enable every bit + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Enqueues a wait command to the stream, all operations enqueued on this stream after this, will + * not execute until the defined wait condition is true. + * + * #hipStreamWaitValueGte: waits until *ptr&mask >= value + * + * #hipStreamWaitValueEq : waits until *ptr&mask == value + * + * #hipStreamWaitValueAnd: waits until ((*ptr&mask) & value) != 0 + * + * #hipStreamWaitValueNor: waits until ~((*ptr&mask) | (value&mask)) != 0 + * + * @note when using #hipStreamWaitValueNor, mask is applied on both 'value' and '*ptr'. + * + * @note Support for #hipStreamWaitValue32 can be queried using 'hipDeviceGetAttribute()' and + * 'hipDeviceAttributeCanUseStreamWaitValue' flag. + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipExtMallocWithFlags, hipFree, hipStreamWaitValue64, hipStreamWriteValue64, + * hipStreamWriteValue32, hipDeviceGetAttribute + */ + +hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, uint32_t value, unsigned int flags, + uint32_t mask __dparm(0xFFFFFFFF)); + +/** + * @brief Enqueues a wait command to the stream.[BETA] + * + * @param [in] stream - Stream identifier + * @param [in] ptr - Pointer to memory object allocated using 'hipMallocSignalMemory' flag + * @param [in] value - Value to be used in compare operation + * @param [in] flags - Defines the compare operation, supported values are #hipStreamWaitValueGte + * #hipStreamWaitValueEq, #hipStreamWaitValueAnd and #hipStreamWaitValueNor. + * @param [in] mask - Mask to be applied on value at memory before it is compared with value + * default value is set to enable every bit + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Enqueues a wait command to the stream, all operations enqueued on this stream after this, will + * not execute until the defined wait condition is true. + * + * #hipStreamWaitValueGte: waits until *ptr&mask >= value + * + * #hipStreamWaitValueEq : waits until *ptr&mask == value + * + * #hipStreamWaitValueAnd: waits until ((*ptr&mask) & value) != 0 + * + * #hipStreamWaitValueNor: waits until ~((*ptr&mask) | (value&mask)) != 0 + * + * @note when using #hipStreamWaitValueNor, mask is applied on both 'value' and '*ptr'. + * + * @note Support for hipStreamWaitValue64 can be queried using 'hipDeviceGetAttribute()' and + * 'hipDeviceAttributeCanUseStreamWaitValue' flag. + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipExtMallocWithFlags, hipFree, hipStreamWaitValue32, hipStreamWriteValue64, + * hipStreamWriteValue32, hipDeviceGetAttribute + */ + +hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, uint64_t value, unsigned int flags, + uint64_t mask __dparm(0xFFFFFFFFFFFFFFFF)); + +/** + * @brief Enqueues a write command to the stream.[BETA] + * + * @param [in] stream - Stream identifier + * @param [in] ptr - Pointer to a GPU accessible memory object + * @param [in] value - Value to be written + * @param [in] flags - reserved, ignored for now, will be used in future releases + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Enqueues a write command to the stream, write operation is performed after all earlier commands + * on this stream have completed the execution. + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64 + */ + +hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, uint32_t value, unsigned int flags); +/** + * @brief Enqueues a write command to the stream.[BETA] + * + * @param [in] stream - Stream identifier + * @param [in] ptr - Pointer to a GPU accessible memory object + * @param [in] value - Value to be written + * @param [in] flags - reserved, ignored for now, will be used in future releases + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Enqueues a write command to the stream, write operation is performed after all earlier commands + * on this stream have completed the execution. + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64 + */ + +hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value, unsigned int flags); + +/** + * @brief Enqueues an array of stream memory operations in the stream.[BETA] + * + * @param [in] stream - Stream identifier + * @param [in] count - The number of operations in the array. Must be less than 256 + * @param [in] paramArray - The types and parameters of the individual operations. + * @param [in] flags - Reserved for future expansion; must be 0. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Batch operations to synchronize the stream via memory operations. + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + * + * @see hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64. hipStreamWriteValue64 + */ + +hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count, + hipStreamBatchMemOpParams* paramArray, unsigned int flags); + +/** + * @brief Creates a batch memory operation node and adds it to a graph.[BETA] + * + * @param [in] phGraphNode - Returns the newly created node + * @param [in] hGraph - Graph to which to add the node + * @param [in] dependencies - Dependencies of the node + * @param [in] numDependencies - Number of dependencies + * @param [in] nodeParams - Parameters for the node + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + * + * @see hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64. hipStreamWriteValue64, hipStreamBatchMemOp + */ +hipError_t hipGraphAddBatchMemOpNode(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, + const hipGraphNode_t* dependencies, size_t numDependencies, + const hipBatchMemOpNodeParams* nodeParams); + +/** + * @brief Returns a batch mem op node's parameters.[BETA] + * + * @param [in] hNode - Node to get the parameters for + * @param [in] nodeParams_out - Pointer to return the parameters + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Returns the parameters of batch mem op node hNode in nodeParams_out. + * The paramArray returned in nodeParams_out is owned by the node. + * This memory remains valid until the node is destroyed or its parameters are modified, + * and should not be modified directly. + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + * + * @see hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64. hipStreamWriteValue64. hipGraphBatchMemOpNodeSetParams + */ + +hipError_t hipGraphBatchMemOpNodeGetParams(hipGraphNode_t hNode, + hipBatchMemOpNodeParams* nodeParams_out); + +/** + * @brief Sets the batch mem op node's parameters.[BETA] + * + * @param [in] hNode - Node to set the parameters for + * @param [in] nodeParams - Parameters to copy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Sets the parameters of batch mem op node hNode to nodeParams. + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + * + * @see hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64. hipStreamWriteValue64, hipGraphBatchMemOpNodeGetParams + */ + +hipError_t hipGraphBatchMemOpNodeSetParams(hipGraphNode_t hNode, + hipBatchMemOpNodeParams* nodeParams); + +/** + * @brief Sets the parameters for a batch mem op node in the given graphExec.[BETA] + * + * @param [in] hGraphExec - The executable graph in which to set the specified node + * @param [in] hNode - Batch mem op node from the graph from which graphExec was instantiated + * @param [in] nodeParams - Updated Parameters to set + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Sets the parameters of a batch mem op node in an executable graph hGraphExec. + * The node is identified by the corresponding node hNode in the non-executable graph, + * from which the executable graph was instantiated. + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + * + * @see hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64. hipStreamWriteValue64, hipStreamBatchMemOp + */ +hipError_t hipGraphExecBatchMemOpNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const hipBatchMemOpNodeParams* nodeParams); + +// end doxygen Stream Memory Operations +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Event Event Management + * @{ + * This section describes the event management functions of HIP runtime API. + */ +/** + * @brief Create an event with the specified flags + * + * @param[in,out] event Returns the newly created event. + * @param[in] flags Flags to control event behavior. Valid values are #hipEventDefault, + #hipEventBlockingSync, #hipEventDisableTiming, #hipEventInterprocess + * #hipEventDefault : Default flag. The event will use active synchronization and will support + timing. Blocking synchronization provides lowest possible latency at the expense of dedicating a + CPU to poll on the event. + * #hipEventBlockingSync : The event will use blocking synchronization : if hipEventSynchronize is + called on this event, the thread will block until the event completes. This can increase latency + for the synchroniation but can result in lower power and more resources for other CPU threads. + * #hipEventDisableTiming : Disable recording of timing information. Events created with this flag + would not record profiling data and provide best performance if used for synchronization. + * #hipEventInterprocess : The event can be used as an interprocess event. hipEventDisableTiming + flag also must be set when hipEventInterprocess flag is set. + * #hipEventDisableSystemFence : Disable acquire and release system scope fence. This may + improve performance but device memory may not be visible to the host and other devices + if this flag is set. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + #hipErrorLaunchFailure, #hipErrorOutOfMemory + * + * @see hipEventCreate, hipEventSynchronize, hipEventDestroy, hipEventElapsedTime + */ +hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags); +/** + * Create an event + * + * @param[in,out] event Returns the newly created event. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + * #hipErrorLaunchFailure, #hipErrorOutOfMemory + * + * @see hipEventCreateWithFlags, hipEventRecord, hipEventQuery, hipEventSynchronize, + * hipEventDestroy, hipEventElapsedTime + */ +hipError_t hipEventCreate(hipEvent_t* event); +/** + * @brief Record an event in the specified stream. + * + * @param[in] event event to record. + * @param[in] stream stream in which to record event. + * @param[in] flags parameter for operations + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized, + * #hipErrorInvalidHandle, #hipErrorLaunchFailure + * + * hipEventQuery() or hipEventSynchronize() must be used to determine when the event + * transitions from "recording" (after hipEventRecord() is called) to "recorded" + * (when timestamps are set, if requested). + * + * Events which are recorded in a non-NULL stream will transition to + * from recording to "recorded" state when they reach the head of + * the specified stream, after all previous + * commands in that stream have completed executing. + * + * Flags include: + * hipEventRecordDefault: Default event creation flag. + * hipEventRecordExternal: Event is captured in the graph as an external event node when + * performing stream capture + * + * If hipEventRecord() has been previously called on this event, then this call will overwrite any + * existing state in event. + * + * If this function is called on an event that is currently being recorded, results are undefined + * - either outstanding recording may save state into the event, and the order is not guaranteed. + * + * @note: If this function is not called before use hipEventQuery() or hipEventSynchronize(), + * #hipSuccess is returned, meaning no pending event in the stream. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, + * hipEventDestroy, hipEventElapsedTime + * + */ +hipError_t hipEventRecordWithFlags(hipEvent_t event, hipStream_t stream __dparm(0), + unsigned int flags __dparm(0)); +/** + * @brief Record an event in the specified stream. + * + * @param[in] event event to record. + * @param[in] stream stream in which to record event. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized, + * #hipErrorInvalidHandle, #hipErrorLaunchFailure + * + * hipEventQuery() or hipEventSynchronize() must be used to determine when the event + * transitions from "recording" (after hipEventRecord() is called) to "recorded" + * (when timestamps are set, if requested). + * + * Events which are recorded in a non-NULL stream will transition to + * from recording to "recorded" state when they reach the head of + * the specified stream, after all previous + * commands in that stream have completed executing. + * + * If hipEventRecord() has been previously called on this event, then this call will overwrite any + * existing state in event. + * + * If this function is called on an event that is currently being recorded, results are undefined + * - either outstanding recording may save state into the event, and the order is not guaranteed. + * + * @note If this function is not called before use hipEventQuery() or hipEventSynchronize(), + * #hipSuccess is returned, meaning no pending event in the stream. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, + * hipEventDestroy, hipEventElapsedTime + * + */ +#ifdef __cplusplus +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream = NULL); +#else +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream); +#endif +/** + * @brief Destroy the specified event. + * + * @param[in] event Event to destroy. + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + * #hipErrorLaunchFailure + * + * Releases memory associated with the event. If the event is recording but has not completed + * recording when hipEventDestroy() is called, the function will return immediately and the + * completion_future resources will be released later, when the hipDevice is synchronized. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, hipEventRecord, + * hipEventElapsedTime + * + * @returns #hipSuccess + */ +hipError_t hipEventDestroy(hipEvent_t event); +/** + * @brief Wait for an event to complete. + * + * This function will block until the event is ready, waiting for all previous work in the stream + * specified when event was recorded with hipEventRecord(). + * + * If hipEventRecord() has not been called on @p event, this function returns #hipSuccess when no + * event is captured. + * + * + * @param[in] event Event on which to wait. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized, + * #hipErrorInvalidHandle, #hipErrorLaunchFailure + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord, + * hipEventElapsedTime + */ +hipError_t hipEventSynchronize(hipEvent_t event); +/** + * @brief Return the elapsed time between two events. + * + * @param[out] ms : Return time between start and stop in ms. + * @param[in] start : Start event. + * @param[in] stop : Stop event. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotReady, #hipErrorInvalidHandle, + * #hipErrorNotInitialized, #hipErrorLaunchFailure + * + * Computes the elapsed time between two events. Time is computed in ms, with + * a resolution of approximately 1 us. + * + * Events which are recorded in a NULL stream will block until all commands + * on all other streams complete execution, and then record the timestamp. + * + * Events which are recorded in a non-NULL stream will record their timestamp + * when they reach the head of the specified stream, after all previous + * commands in that stream have completed executing. Thus the time that + * the event recorded may be significantly after the host calls hipEventRecord(). + * + * If hipEventRecord() has not been called on either event, then #hipErrorInvalidHandle is + * returned. If hipEventRecord() has been called on both events, but the timestamp has not yet been + * recorded on one or both events (that is, hipEventQuery() would return #hipErrorNotReady on at + * least one of the events), then #hipErrorNotReady is returned. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord, + * hipEventSynchronize + */ +hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop); +/** + * @brief Query event status + * + * @param[in] event Event to query. + * @returns #hipSuccess, #hipErrorNotReady, #hipErrorInvalidHandle, #hipErrorInvalidValue, + * #hipErrorNotInitialized, #hipErrorLaunchFailure + * + * Query the status of the specified event. This function will return #hipSuccess if all + * commands in the appropriate stream (specified to hipEventRecord()) have completed. If any + * execution has not completed, then #hipErrorNotReady is returned. + * + * @note This API returns #hipSuccess, if hipEventRecord() is not called before this API. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventRecord, hipEventDestroy, + * hipEventSynchronize, hipEventElapsedTime + */ +hipError_t hipEventQuery(hipEvent_t event); +// end doxygen Events +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Memory Memory Management + * @{ + * This section describes the memory management functions of HIP runtime API. + * The following CUDA APIs are not currently supported: + * - cudaMalloc3D + * - cudaMalloc3DArray + * - TODO - more 2D, 3D, array APIs here. + * + * + */ + +/** + * @brief Sets information on the specified pointer.[BETA] + * + * @param [in] value Sets pointer attribute value + * @param [in] attribute Attribute to set + * @param [in] ptr Pointer to set attributes for + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + */ +hipError_t hipPointerSetAttribute(const void* value, hipPointer_attribute attribute, + hipDeviceptr_t ptr); + + +/** + * @brief Returns attributes for the specified pointer + * + * @param [out] attributes attributes for the specified pointer + * @param [in] ptr pointer to get attributes for + * + * The output parameter 'attributes' has a member named 'type' that describes what memory the + * pointer is associated with, such as device memory, host memory, managed memory, and others. + * Otherwise, the API cannot handle the pointer and returns #hipErrorInvalidValue. + * + * @note The unrecognized memory type is unsupported to keep the HIP functionality backward + * compatibility due to #hipMemoryType enum values. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @note The current behavior of this HIP API corresponds to the CUDA API before version 11.0. + * + * @see hipPointerGetAttribute + */ +hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr); +/** + * @brief Returns information about the specified pointer.[BETA] + * + * @param [in, out] data Returned pointer attribute value + * @param [in] attribute Attribute to query for + * @param [in] ptr Pointer to get attributes for + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipPointerGetAttributes + */ +hipError_t hipPointerGetAttribute(void* data, hipPointer_attribute attribute, hipDeviceptr_t ptr); +/** + * @brief Returns information about the specified pointer.[BETA] + * + * @param [in] numAttributes number of attributes to query for + * @param [in] attributes attributes to query for + * @param [in, out] data a two-dimensional containing pointers to memory locations + * where the result of each attribute query will be written to + * @param [in] ptr pointer to get attributes for + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipPointerGetAttribute + */ +hipError_t hipDrvPointerGetAttributes(unsigned int numAttributes, hipPointer_attribute* attributes, + void** data, hipDeviceptr_t ptr); +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup External External Resource Interoperability + * @{ + * @ingroup API + * + * This section describes the external resource interoperability functions of HIP runtime API. + * + */ +/** + * @brief Imports an external semaphore. + * + * @param[out] extSem_out External semaphores to be waited on + * @param[in] semHandleDesc Semaphore import handle descriptor + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + * + * @note This API is currently not supported on Linux. + * + */ +hipError_t hipImportExternalSemaphore(hipExternalSemaphore_t* extSem_out, + const hipExternalSemaphoreHandleDesc* semHandleDesc); +/** + * @brief Signals a set of external semaphore objects. + * + * @param[in] extSemArray External semaphores to be waited on + * @param[in] paramsArray Array of semaphore parameters + * @param[in] numExtSems Number of semaphores to wait on + * @param[in] stream Stream to enqueue the wait operations in + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + * + * @note This API is currently not supported on Linux. + * + */ +hipError_t hipSignalExternalSemaphoresAsync(const hipExternalSemaphore_t* extSemArray, + const hipExternalSemaphoreSignalParams* paramsArray, + unsigned int numExtSems, hipStream_t stream); +/** + * @brief Waits on a set of external semaphore objects + * + * @param[in] extSemArray External semaphores to be waited on + * @param[in] paramsArray Array of semaphore parameters + * @param[in] numExtSems Number of semaphores to wait on + * @param[in] stream Stream to enqueue the wait operations in + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + * + * @note This API is currently not supported on Linux. + * + */ +hipError_t hipWaitExternalSemaphoresAsync(const hipExternalSemaphore_t* extSemArray, + const hipExternalSemaphoreWaitParams* paramsArray, + unsigned int numExtSems, hipStream_t stream); +/** + * @brief Destroys an external semaphore object and releases any references to the underlying + * resource. Any outstanding signals or waits must have completed before the semaphore is destroyed. + * + * @param[in] extSem handle to an external memory object + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + * + * @note This API is currently not supported on Linux. + * + */ +hipError_t hipDestroyExternalSemaphore(hipExternalSemaphore_t extSem); + +/** + * @brief Imports an external memory object. + * + * @param[out] extMem_out Returned handle to an external memory object + * @param[in] memHandleDesc Memory import handle descriptor + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + * + */ +hipError_t hipImportExternalMemory(hipExternalMemory_t* extMem_out, + const hipExternalMemoryHandleDesc* memHandleDesc); +/** + * @brief Maps a buffer onto an imported memory object. + * + * @param[out] devPtr Returned device pointer to buffer + * @param[in] extMem Handle to external memory object + * @param[in] bufferDesc Buffer descriptor + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + */ +hipError_t hipExternalMemoryGetMappedBuffer(void** devPtr, hipExternalMemory_t extMem, + const hipExternalMemoryBufferDesc* bufferDesc); +/** + * @brief Destroys an external memory object. + * + * @param[in] extMem External memory object to be destroyed + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + */ +hipError_t hipDestroyExternalMemory(hipExternalMemory_t extMem); +/** + * @brief Maps a mipmapped array onto an external memory object. + * + * @param[out] mipmap mipmapped array to return + * @param[in] extMem external memory object handle + * @param[in] mipmapDesc external mipmapped array descriptor + * + * Returned mipmapped array must be freed using hipFreeMipmappedArray. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle + * + * @see hipImportExternalMemory, hipDestroyExternalMemory, hipExternalMemoryGetMappedBuffer, + * hipFreeMipmappedArray + */ +hipError_t hipExternalMemoryGetMappedMipmappedArray( + hipMipmappedArray_t* mipmap, hipExternalMemory_t extMem, + const hipExternalMemoryMipmappedArrayDesc* mipmapDesc); +// end of external resource +/** + * @} + */ +/** + * @brief Allocate memory on the default accelerator + * + * @param[out] ptr Pointer to the allocated memory + * @param[in] size Requested memory size + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @returns #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue (bad context, null *ptr) + * + * @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray, + * hipHostFree, hipHostMalloc + */ +hipError_t hipMalloc(void** ptr, size_t size); +/** + * @brief Allocate memory on the default accelerator + * + * @param[out] ptr Pointer to the allocated memory + * @param[in] sizeBytes Requested memory size + * @param[in] flags Type of memory allocation + * + * If requested memory size is 0, no memory is allocated, *ptr returns nullptr, and #hipSuccess + * is returned. + * + * The memory allocation flag should be either #hipDeviceMallocDefault, + * #hipDeviceMallocFinegrained, #hipDeviceMallocUncached, or #hipMallocSignalMemory. + * If the flag is any other value, the API returns #hipErrorInvalidValue. + * + * @returns #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue (bad context, null *ptr) + * + * @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray, + * hipHostFree, hiHostMalloc + */ +hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags); + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup MemoryD Memory Management [Deprecated] + * @ingroup Memory + * @{ + * This section describes the deprecated memory management functions of HIP runtime API. + * + */ + +/** + * @brief Allocate pinned host memory [Deprecated] + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * @warning This API is deprecated, use hipHostMalloc() instead + */ +HIP_DEPRECATED("use hipHostMalloc instead") +hipError_t hipMallocHost(void** ptr, size_t size); +/** + * @brief Allocate pinned host memory [Deprecated] + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * @warning This API is deprecated, use hipHostMalloc() instead + */ +HIP_DEPRECATED("use hipHostMalloc instead") +hipError_t hipMemAllocHost(void** ptr, size_t size); +// end doxygen deprecated management memory +/** + * @} + */ +/** + * @brief Allocates device accessible page locked (pinned) host memory + * + * This API allocates pinned host memory which is mapped into the address space of all GPUs + * in the system, the memory can be accessed directly by the GPU device, and can be read or + * written with much higher bandwidth than pageable memory obtained with functions such as + * malloc(). + * + * Using the pinned host memory, applications can implement faster data transfers for HostToDevice + * and DeviceToHost. The runtime tracks the hipHostMalloc allocations and can avoid some of the + * setup required for regular unpinned memory. + * + * When the memory accesses are infrequent, zero-copy memory can be a good choice, for coherent + * allocation. GPU can directly access the host memory over the CPU/GPU interconnect, without need + * to copy the data. + * + * Currently the allocation granularity is 4KB for the API. + * + * Developers need to choose proper allocation flag with consideration of synchronization. + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size in bytes + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * @param[in] flags Type of host memory allocation. See the description of flags in + * hipSetDeviceFlags. + * + * If no input for flags, it will be the default pinned memory allocation on the host. + * + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * + * @see hipSetDeviceFlags, hiptHostFree + */ +hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags); +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup MemoryM Managed Memory + * + * @ingroup Memory + * @{ + * This section describes the managed memory management functions of HIP runtime API. + * + * @note The managed memory management APIs are implemented on Linux, under developement + * on Windows. + * + */ +/** + * @brief Allocates memory that will be automatically managed by HIP. + * + * This API is used for managed memory, allows data be shared and accessible to both CPU and + * GPU using a single pointer. + * + * The API returns the allocation pointer, managed by HMM, can be used further to execute kernels + * on device and fetch data between the host and device as needed. + * + * If HMM is not supported, the function behaves the same as @p hipMallocHost . + * + * @note It is recommend to do the capability check before call this API. + * + * @param [out] dev_ptr - pointer to allocated device memory + * @param [in] size - requested allocation size in bytes, it should be granularity of 4KB + * @param [in] flags - must be either hipMemAttachGlobal or hipMemAttachHost + * (defaults to hipMemAttachGlobal) + * + * @returns #hipSuccess, #hipErrorMemoryAllocation, #hipErrorNotSupported, #hipErrorInvalidValue + * + */ +hipError_t hipMallocManaged(void** dev_ptr, size_t size, + unsigned int flags __dparm(hipMemAttachGlobal)); +/** + * @brief Prefetches memory to the specified destination device using HIP. + * + * @param [in] dev_ptr pointer to be prefetched + * @param [in] count size in bytes for prefetching + * @param [in] device destination device to prefetch to + * @param [in] stream stream to enqueue prefetch operation + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPrefetchAsync(const void* dev_ptr, size_t count, int device, + hipStream_t stream __dparm(0)); +/** + * @brief Prefetches memory to the specified destination device using HIP. + * + * @param [in] dev_ptr pointer to be prefetched + * @param [in] count size in bytes for prefetching + * @param [in] location destination location to prefetch to + * @param [in] flags flags for future use, must be zero now. + * @param [in] stream stream to enqueue prefetch operation + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPrefetchAsync_v2(const void* dev_ptr, size_t count, hipMemLocation location, + unsigned int flags, hipStream_t stream __dparm(0)); +/** + * @brief Advise about the usage of a given memory range to HIP. + * + * @param [in] dev_ptr pointer to memory to set the advice for + * @param [in] count size in bytes of the memory range, it should be CPU page size alligned. + * @param [in] advice advice to be applied for the specified memory range + * @param [in] device device to apply the advice for + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * This HIP API advises about the usage to be applied on unified memory allocation in the + * range starting from the pointer address devPtr, with the size of count bytes. + * The memory range must refer to managed memory allocated via the API hipMallocManaged, and the + * range will be handled with proper round down and round up respectively in the driver to + * be aligned to CPU page size, the same way as corresponding CUDA API behaves in CUDA version 8.0 + * and afterwards. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemAdvise(const void* dev_ptr, size_t count, hipMemoryAdvise advice, int device); +/** + * @brief Advise about the usage of a given memory range to HIP. + * + * @param [in] dev_ptr pointer to memory to set the advice for + * @param [in] count size in bytes of the memory range, it should be CPU page size alligned. + * @param [in] advice advice to be applied for the specified memory range + * @param [in] location location to apply the advice for + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * This HIP API advises about the usage to be applied on unified memory allocation in the + * range starting from the pointer address devPtr, with the size of count bytes. + * The memory range must refer to managed memory allocated via the API hipMallocManaged, and the + * range will be handled with proper round down and round up respectively in the driver to + * be aligned to CPU page size, the same way as corresponding CUDA API behaves in CUDA version 8.0 + * and afterwards. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemAdvise_v2(const void* dev_ptr, size_t count, hipMemoryAdvise advice, + hipMemLocation location); +/** + * @brief Query an attribute of a given memory range in HIP. + * + * @param [in,out] data a pointer to a memory location where the result of each + * attribute query will be written to + * @param [in] data_size the size of data + * @param [in] attribute the attribute to query + * @param [in] dev_ptr start of the range to query + * @param [in] count size of the range to query + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemRangeGetAttribute(void* data, size_t data_size, hipMemRangeAttribute attribute, + const void* dev_ptr, size_t count); +/** + * @brief Query attributes of a given memory range in HIP. + * + * @param [in,out] data a two-dimensional array containing pointers to memory locations + * where the result of each attribute query will be written to + * @param [in] data_sizes an array, containing the sizes of each result + * @param [in] attributes the attribute to query + * @param [in] num_attributes an array of attributes to query (numAttributes and the number + * of attributes in this array should match) + * @param [in] dev_ptr start of the range to query + * @param [in] count size of the range to query + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemRangeGetAttributes(void** data, size_t* data_sizes, + hipMemRangeAttribute* attributes, size_t num_attributes, + const void* dev_ptr, size_t count); +/** + * @brief Attach memory to a stream asynchronously in HIP. + * + * @param [in] stream - stream in which to enqueue the attach operation + * @param [in] dev_ptr - pointer to memory (must be a pointer to managed memory or + * to a valid host-accessible region of system-allocated memory) + * @param [in] length - length of memory (defaults to zero) + * @param [in] flags - must be one of hipMemAttachGlobal, hipMemAttachHost or + * hipMemAttachSingle (defaults to hipMemAttachSingle) + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is under development. Currently it is a no-operation (NOP) + * function on AMD GPUs and returns #hipSuccess. + */ +hipError_t hipStreamAttachMemAsync(hipStream_t stream, void* dev_ptr, size_t length __dparm(0), + unsigned int flags __dparm(hipMemAttachSingle)); +// end doxygen Managed Memory +/** + * @} + */ + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup StreamO Stream Ordered Memory Allocator + * @{ + * @ingroup Memory + * This section describes Stream Ordered Memory Allocator functions of HIP runtime API. + * + * The asynchronous allocator allows the user to allocate and free in stream order. + * All asynchronous accesses of the allocation must happen between the stream executions of + * the allocation and the free. If the memory is accessed outside of the promised stream order, + * a use before allocation / use after free error will cause undefined behavior. + * + * The allocator is free to reallocate the memory as long as it can guarantee that compliant memory + * accesses will not overlap temporally. The allocator may refer to internal stream ordering as well + * as inter-stream dependencies (such as HIP events and null stream dependencies) when establishing + * the temporal guarantee. The allocator may also insert inter-stream dependencies to establish + * the temporal guarantee. Whether or not a device supports the integrated stream ordered memory + * allocator may be queried by calling @p hipDeviceGetAttribute with the device attribute + * @p hipDeviceAttributeMemoryPoolsSupported + * + * @note APIs in this section are implemented on Linux, under development on Windows. + */ + +/** + * @brief Allocates memory with stream ordered semantics + * + * Inserts a memory allocation operation into @p stream. + * A pointer to the allocated memory is returned immediately in *dptr. + * The allocation must not be accessed until the allocation operation completes. + * The allocation comes from the memory pool associated with the stream's device. + * + * @note The default memory pool of a device contains device memory from that device. + * @note Basic stream ordering allows future work submitted into the same stream to use the + * allocation. Stream query, stream synchronize, and HIP events can be used to guarantee that + * the allocation operation completes before work submitted in a separate stream runs. + * @note During stream capture, this function results in the creation of an allocation node. + * In this case, the allocation is owned by the graph instead of the memory pool. The memory + * pool's properties are used to set the node's creation parameters. + * + * @param [out] dev_ptr Returned device pointer of memory allocation + * @param [in] size Number of bytes to allocate + * @param [in] stream The stream establishing the stream ordering contract and + * the memory pool to allocate from + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported, #hipErrorOutOfMemory + * + * @see hipMallocFromPoolAsync, hipFreeAsync, hipMemPoolTrimTo, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMallocAsync(void** dev_ptr, size_t size, hipStream_t stream); +/** + * @brief Frees memory with stream ordered semantics + * + * Inserts a free operation into @p stream. + * The allocation must not be used after stream execution reaches the free. + * After this API returns, accessing the memory from any subsequent work launched on the GPU + * or querying its pointer attributes results in undefined behavior. + * + * @note During stream capture, this function results in the creation of a free node and + * must therefore be passed the address of a graph allocation. + * + * @param [in] dev_ptr Pointer to device memory to free + * @param [in] stream The stream, where the destruciton will occur according to the execution order + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipMemPoolTrimTo, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipFreeAsync(void* dev_ptr, hipStream_t stream); +/** + * @brief Releases freed memory back to the OS + * + * Releases memory back to the OS until the pool contains fewer than @p min_bytes_to_keep + * reserved bytes, or there is no more memory that the allocator can safely release. + * The allocator cannot release OS allocations that back outstanding asynchronous allocations. + * The OS allocations may happen at different granularity from the user allocations. + * + * @note Allocations that have not been freed count as outstanding. + * @note Allocations that have been asynchronously freed but whose completion has + * not been observed on the host (eg. by a synchronize) can count as outstanding. + * + * @param[in] mem_pool The memory pool to trim allocations + * @param[in] min_bytes_to_hold If the pool has less than min_bytes_to_hold reserved, + * then the TrimTo operation is a no-op. Otherwise the memory pool will contain + * at least min_bytes_to_hold bytes reserved after the operation. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolTrimTo(hipMemPool_t mem_pool, size_t min_bytes_to_hold); +/** + * @brief Sets attributes of a memory pool + * + * Supported attributes are: + * - @p hipMemPoolAttrReleaseThreshold: (value type = cuuint64_t) + * Amount of reserved memory in bytes to hold onto before trying + * to release memory back to the OS. When more than the release + * threshold bytes of memory are held by the memory pool, the + * allocator will try to release memory back to the OS on the + * next call to stream, event or context synchronize. (default 0) + * - @p hipMemPoolReuseFollowEventDependencies: (value type = int) + * Allow @p hipMallocAsync to use memory asynchronously freed + * in another stream as long as a stream ordering dependency + * of the allocating stream on the free action exists. + * HIP events and null stream interactions can create the required + * stream ordered dependencies. (default enabled) + * - @p hipMemPoolReuseAllowOpportunistic: (value type = int) + * Allow reuse of already completed frees when there is no + * dependency between the free and allocation. (default enabled) + * - @p hipMemPoolReuseAllowInternalDependencies: (value type = int) + * Allow @p hipMallocAsync to insert new stream dependencies + * in order to establish the stream ordering required to reuse + * a piece of memory released by @p hipFreeAsync (default enabled). + * + * @param [in] mem_pool The memory pool to modify + * @param [in] attr The attribute to modify + * @param [in] value Pointer to the value to assign + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolSetAttribute(hipMemPool_t mem_pool, hipMemPoolAttr attr, void* value); +/** + * @brief Gets attributes of a memory pool + * + * Supported attributes are: + * - @p hipMemPoolAttrReleaseThreshold: (value type = cuuint64_t) + * Amount of reserved memory in bytes to hold onto before trying + * to release memory back to the OS. When more than the release + * threshold bytes of memory are held by the memory pool, the + * allocator will try to release memory back to the OS on the + * next call to stream, event or context synchronize. (default 0) + * - @p hipMemPoolReuseFollowEventDependencies: (value type = int) + * Allow @p hipMallocAsync to use memory asynchronously freed + * in another stream as long as a stream ordering dependency + * of the allocating stream on the free action exists. + * HIP events and null stream interactions can create the required + * stream ordered dependencies. (default enabled) + * - @p hipMemPoolReuseAllowOpportunistic: (value type = int) + * Allow reuse of already completed frees when there is no + * dependency between the free and allocation. (default enabled) + * - @p hipMemPoolReuseAllowInternalDependencies: (value type = int) + * Allow @p hipMallocAsync to insert new stream dependencies + * in order to establish the stream ordering required to reuse + * a piece of memory released by @p hipFreeAsync (default enabled). + * + * @param [in] mem_pool The memory pool to get attributes of + * @param [in] attr The attribute to get + * @param [in] value Retrieved value + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, + * hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, + * hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolGetAttribute(hipMemPool_t mem_pool, hipMemPoolAttr attr, void* value); +/** + * @brief Controls visibility of the specified pool between devices + * + * @param [in] mem_pool Memory pool for acccess change + * @param [in] desc_list Array of access descriptors. Each descriptor instructs the access to + * enable for a single gpu + * @param [in] count Number of descriptors in the map array. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolSetAccess(hipMemPool_t mem_pool, const hipMemAccessDesc* desc_list, + size_t count); +/** + * @brief Returns the accessibility of a pool from a device + * + * Returns the accessibility of the pool's memory from the specified location. + * + * @param [out] flags Accessibility of the memory pool from the specified location/device + * @param [in] mem_pool Memory pool being queried + * @param [in] location Location/device for memory pool access + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolGetAccess(hipMemAccessFlags* flags, hipMemPool_t mem_pool, + hipMemLocation* location); +/** + * @brief Creates a memory pool + * + * Creates a HIP memory pool and returns the handle in @p mem_pool. The @p pool_props determines + * the properties of the pool such as the backing device and IPC capabilities. + * + * By default, the memory pool will be accessible from the device it is allocated on. + * + * @param [out] mem_pool Contains createed memory pool + * @param [in] pool_props Memory pool properties + * + * @note Specifying hipMemHandleTypeNone creates a memory pool that will not support IPC. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipMemPoolDestroy, hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, + * hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolCreate(hipMemPool_t* mem_pool, const hipMemPoolProps* pool_props); +/** + * @brief Destroys the specified memory pool + * + * If any pointers obtained from this pool haven't been freed or + * the pool has free operations that haven't completed + * when @p hipMemPoolDestroy is invoked, the function will return immediately and the + * resources associated with the pool will be released automatically + * once there are no more outstanding allocations. + * + * Destroying the current mempool of a device sets the default mempool of + * that device as the current mempool for that device. + * + * @param [in] mem_pool Memory pool for destruction + * + * @note A device's default memory pool cannot be destroyed. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipMemPoolCreate hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, + * hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolDestroy(hipMemPool_t mem_pool); +/** + * @brief Allocates memory from a specified pool with stream ordered semantics. + * + * Inserts an allocation operation into @p stream. + * A pointer to the allocated memory is returned immediately in @p dev_ptr. + * The allocation must not be accessed until the allocation operation completes. + * The allocation comes from the specified memory pool. + * + * @note The specified memory pool may be from a device different than that of the specified @p + * stream. + * + * Basic stream ordering allows future work submitted into the same stream to use the allocation. + * Stream query, stream synchronize, and HIP events can be used to guarantee that the allocation + * operation completes before work submitted in a separate stream runs. + * + * @note During stream capture, this function results in the creation of an allocation node. In this + * case, the allocation is owned by the graph instead of the memory pool. The memory pool's + * properties are used to set the node's creation parameters. + * + * @param [out] dev_ptr Returned device pointer + * @param [in] size Number of bytes to allocate + * @param [in] mem_pool The pool to allocate from + * @param [in] stream The stream establishing the stream ordering semantic + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported, #hipErrorOutOfMemory + * + * @see hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, hipMemPoolCreate + * hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, + * hipMemPoolGetAccess, + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMallocFromPoolAsync(void** dev_ptr, size_t size, hipMemPool_t mem_pool, + hipStream_t stream); +/** + * @brief Exports a memory pool to the requested handle type. + * + * Given an IPC capable mempool, create an OS handle to share the pool with another process. + * A recipient process can convert the shareable handle into a mempool with @p + * hipMemPoolImportFromShareableHandle. Individual pointers can then be shared with the @p + * hipMemPoolExportPointer and @p hipMemPoolImportPointer APIs. The implementation of what the + * shareable handle is and how it can be transferred is defined by the requested handle type. + * + * @note To create an IPC capable mempool, create a mempool with a @p hipMemAllocationHandleType + * other than @p hipMemHandleTypeNone. + * + * @param [out] shared_handle Pointer to the location in which to store the requested handle + * @param [in] mem_pool Pool to export + * @param [in] handle_type The type of handle to create + * @param [in] flags Must be 0 + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory + * + * @see hipMemPoolImportFromShareableHandle + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolExportToShareableHandle(void* shared_handle, hipMemPool_t mem_pool, + hipMemAllocationHandleType handle_type, + unsigned int flags); +/** + * @brief Imports a memory pool from a shared handle. + * + * Specific allocations can be imported from the imported pool with @p hipMemPoolImportPointer. + * + * @note Imported memory pools do not support creating new allocations. + * As such imported memory pools may not be used in @p hipDeviceSetMemPool + * or @p hipMallocFromPoolAsync calls. + * + * @param [out] mem_pool Returned memory pool + * @param [in] shared_handle OS handle of the pool to open + * @param [in] handle_type The type of handle being imported + * @param [in] flags Must be 0 + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory + * + * @see hipMemPoolExportToShareableHandle + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolImportFromShareableHandle(hipMemPool_t* mem_pool, void* shared_handle, + hipMemAllocationHandleType handle_type, + unsigned int flags); +/** + * @brief Export data to share a memory pool allocation between processes. + * + * Constructs @p export_data for sharing a specific allocation from an already shared memory pool. + * The recipient process can import the allocation with the @p hipMemPoolImportPointer api. + * The data is not a handle and may be shared through any IPC mechanism. + * + * @param[out] export_data Returned export data + * @param[in] dev_ptr Pointer to memory being exported + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory + * + * @see hipMemPoolImportPointer + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolExportPointer(hipMemPoolPtrExportData* export_data, void* dev_ptr); +/** + * @brief Import a memory pool allocation from another process. + * + * Returns in @p dev_ptr a pointer to the imported memory. + * The imported memory must not be accessed before the allocation operation completes + * in the exporting process. The imported memory must be freed from all importing processes before + * being freed in the exporting process. The pointer may be freed with @p hipFree + * or @p hipFreeAsync. If @p hipFreeAsync is used, the free must be completed + * on the importing process before the free operation on the exporting process. + * + * @note The @p hipFreeAsync api may be used in the exporting process before + * the @p hipFreeAsync operation completes in its stream as long as the + * @p hipFreeAsync in the exporting process specifies a stream with + * a stream dependency on the importing process's @p hipFreeAsync. + * + * @param [out] dev_ptr Pointer to imported memory + * @param [in] mem_pool Memory pool from which to import a pointer + * @param [in] export_data Data specifying the memory to import + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized, #hipErrorOutOfMemory + * + * @see hipMemPoolExportPointer + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolImportPointer(void** dev_ptr, hipMemPool_t mem_pool, + hipMemPoolPtrExportData* export_data); +// Doxygen end of ordered memory allocator +/** + * @} + */ + +/** + * @brief Allocate device accessible page locked host memory + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size in bytes + * @param[in] flags Type of host memory allocation see below + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * Flags: + * - #hipHostAllocDefault Default pinned memory allocation on the host. + * - #hipHostAllocPortable Memory is considered allocated by all contexts. + * - #hipHostAllocMapped Map the allocation into the address space for the current device. + * - #hipHostAllocWriteCombined Allocates the memory as write-combined. + * - #hipHostAllocUncached Allocate the host memory on extended fine grained access system + * memory pool + * + * @return #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue + */ +hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags); +/** + * @brief Get Device pointer from Host Pointer allocated through hipHostMalloc + * + * @param[out] devPtr Device Pointer mapped to passed host pointer + * @param[in] hstPtr Host Pointer allocated through hipHostMalloc + * @param[in] flags Flags to be passed for extension + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory + * + * @see hipSetDeviceFlags, hipHostMalloc + */ +hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, unsigned int flags); +/** + * @brief Return flags associated with host pointer + * + * @param[out] flagsPtr Memory location to store flags + * @param[in] hostPtr Host Pointer allocated through hipHostMalloc + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipHostMalloc + */ +hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr); +/** + * @brief Register host memory so it can be accessed from the current device. + * + * @param[out] hostPtr Pointer to host memory to be registered. + * @param[in] sizeBytes Size of the host memory + * @param[in] flags See below. + * + * Flags: + * - #hipHostRegisterDefault Memory is Mapped and Portable + * - #hipHostRegisterPortable Memory is considered registered by all contexts. HIP only supports + * one context so this is always assumed true. + * - #hipHostRegisterMapped Map the allocation into the address space for the current device. + * The device pointer can be obtained with #hipHostGetDevicePointer. + * - #hipExtHostRegisterUncached Map the host memory onto extended fine grained access system + * memory pool. + * + * After registering the memory, use #hipHostGetDevicePointer to obtain the mapped device pointer. + * On many systems, the mapped device pointer will have a different value than the mapped host + * pointer. Applications must use the device pointer in device code, and the host pointer in host + * code. + * + * On some systems, registered memory is pinned. On some systems, registered memory may not be + * actually be pinned but uses OS or hardware facilities to all GPU access to the host memory. + * + * Developers are strongly encouraged to register memory blocks which are aligned to the host + * cache-line size. (typically 64-bytes but can be obtains from the CPUID instruction). + * + * If registering non-aligned pointers, the application must take care when register pointers from + * the same cache line on different devices. HIP's coarse-grained synchronization model does not + * guarantee correct results if different devices write to different parts of the same cache block - + * typically one of the writes will "win" and overwrite data from the other registered memory + * region. + * + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * @see hipHostUnregister, hipHostGetFlags, hipHostGetDevicePointer + */ +hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags); +/** + * @brief Un-register host pointer + * + * @param[in] hostPtr Host pointer previously registered with #hipHostRegister + * @returns Error code + * + * @see hipHostRegister + */ +hipError_t hipHostUnregister(void* hostPtr); +/** + * Allocates at least width (in bytes) * height bytes of linear memory + * Padding may occur to ensure alighnment requirements are met for the given row + * The change in width size due to padding will be returned in *pitch. + * Currently the alignment is set to 128 bytes + * + * @param[out] ptr Pointer to the allocated device memory + * @param[out] pitch Pitch for allocation (in bytes) + * @param[in] width Requested pitched allocation width (in bytes) + * @param[in] height Requested pitched allocation height + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @returns Error code + * + * @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ +hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height); +/** + * Allocates at least width (in bytes) * height bytes of linear memory + * Padding may occur to ensure alighnment requirements are met for the given row + * The change in width size due to padding will be returned in *pitch. + * Currently the alignment is set to 128 bytes + * + * @param[out] dptr Pointer to the allocated device memory + * @param[out] pitch Pitch for allocation (in bytes) + * @param[in] widthInBytes Requested pitched allocation width (in bytes) + * @param[in] height Requested pitched allocation height + * @param[in] elementSizeBytes The size of element bytes, should be 4, 8 or 16 + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * The intended usage of pitch is as a separate parameter of the allocation, used to compute + * addresses within the 2D array. Given the row and column of an array element of type T, the + * address is computed as: T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column; + * + * @returns Error code + * + * @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ +hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, size_t height, + unsigned int elementSizeBytes); +/** + * @brief Free memory allocated by the HIP-Clang hip memory allocation API. + * This API performs an implicit hipDeviceSynchronize() call. + * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. + * + * @param[in] ptr Pointer to memory to be freed + * @returns #hipSuccess + * @returns #hipErrorInvalidDevicePointer (if pointer is invalid, including host pointers allocated + * with hipHostMalloc) + * + * @see hipMalloc, hipMallocPitch, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ +hipError_t hipFree(void* ptr); +/** + * @brief Frees page-locked memory + * This API performs an implicit hipDeviceSynchronize() call. + * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. + * + * @param[in] ptr Pointer to memory to be freed + * @returns #hipSuccess, + * #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated + * with hipMalloc) + * + */ +hipError_t hipFreeHost(void* ptr); +/** + * @brief Free memory allocated by the HIP-Clang hip host memory allocation API + * This API performs an implicit hipDeviceSynchronize() call. + * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. + * + * @ingroup MemoryD + * + * @param[in] ptr Pointer to memory to be freed + * @returns #hipSuccess, + * #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated with + * hipMalloc) + * + * @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + * + */ +hipError_t hipHostFree(void* ptr); +/** + * @brief Copy data from src to dst. + * + * It supports memory from host to device, + * device to host, device to device and host to host + * The src and dst must not overlap. + * + * For hipMemcpy, the copy is always performed by the current device (set by hipSetDevice). + * For multi-gpu or peer-to-peer configurations, it is recommended to set the current device to the + * device where the src data is physically located. For optimal peer-to-peer copies, the copy + * device must be able to access the src and dst pointers (by calling hipDeviceEnablePeerAccess with + * copy agent as the current device and src/dst as the peerDevice argument. if this is not done, + * the hipMemcpy will still work, but will perform the copy using a staging buffer on the host. + * Calling hipMemcpy with dst and src pointers that do not match the hipMemcpyKind results in + * undefined behavior. + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] kind Kind of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind); +/** + * @brief Memory copy on the stream. + * It allows single or multiple devices to do memory copy on single or multiple streams. + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] kind Kind of transfer + * @param[in] stream Valid stream + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown, #hipErrorContextIsDestroyed + * + * @see hipMemcpy, hipStreamCreate, hipStreamSynchronize, hipStreamDestroy, hipSetDevice, + * hipLaunchKernelGGL + * + */ +hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + hipStream_t stream); +/** + * @brief Copy data from Host to Device + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, const void* src, size_t sizeBytes); +/** + * @brief Copy data from Device to Host + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes); +/** + * @brief Copy data from Device to Device + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes); +/** + * @brief Copies from one 1D array to device memory. + * + * @param[out] dstDevice Destination device pointer + * @param[in] srcArray Source array + * @param[in] srcOffset Offset in bytes of source array + * @param[in] ByteCount Size of memory copy in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyAtoD(hipDeviceptr_t dstDevice, hipArray_t srcArray, size_t srcOffset, + size_t ByteCount); +/** + * @brief Copies from device memory to a 1D array. + * + * @param[out] dstArray Destination array + * @param[in] dstOffset Offset in bytes of destination array + * @param[in] srcDevice Source device pointer + * @param[in] ByteCount Size of memory copy in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoA(hipArray_t dstArray, size_t dstOffset, hipDeviceptr_t srcDevice, + size_t ByteCount); + +/** + * @brief Copies from one 1D array to another. + * + * @param[out] dstArray Destination array + * @param[in] dstOffset Offset in bytes of destination array + * @param[in] srcArray Source array + * @param[in] srcOffset Offset in bytes of source array + * @param[in] ByteCount Size of memory copy in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyAtoA(hipArray_t dstArray, size_t dstOffset, hipArray_t srcArray, + size_t srcOffset, size_t ByteCount); +/** + * @brief Copy data from Host to Device asynchronously + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] stream Stream identifier + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, const void* src, size_t sizeBytes, + hipStream_t stream); +/** + * @brief Copy data from Device to Host asynchronously + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] stream Stream identifier + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream); +/** + * @brief Copy data from Device to Device asynchronously + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] stream Stream identifier + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, + hipStream_t stream); +/** + * @brief Copies from one 1D array to host memory. + * + * @param[out] dstHost Destination pointer + * @param[in] srcArray Source array + * @param[in] srcOffset Offset in bytes of source array + * @param[in] ByteCount Size of memory copy in bytes + * @param[in] stream Stream identifier + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyAtoHAsync(void* dstHost, hipArray_t srcArray, size_t srcOffset, + size_t ByteCount, hipStream_t stream); +/** + * @brief Copies from host memory to a 1D array. + * + * @param[out] dstArray Destination array + * @param[in] dstOffset Offset in bytes of destination array + * @param[in] srcHost Source host pointer + * @param[in] ByteCount Size of memory copy in bytes + * @param[in] stream Stream identifier + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyHtoAAsync(hipArray_t dstArray, size_t dstOffset, const void* srcHost, + size_t ByteCount, hipStream_t stream); +/** + * @brief Returns a global pointer from a module. + * @ingroup Module + * + * Returns in *dptr and *bytes the pointer and size of the global of name name located in module + * hmod. If no variable of that name exists, it returns hipErrorNotFound. Both parameters dptr and + * bytes are optional. If one of them is NULL, it is ignored and hipSuccess is returned. + * + * @param[out] dptr Returns global device pointer + * @param[out] bytes Returns global size in bytes + * @param[in] hmod Module to retrieve global from + * @param[in] name Name of global to retrieve + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotFound, #hipErrorInvalidContext + * + */ +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, + const char* name); + +/** + * @brief Gets device pointer associated with symbol on the device. + * + * @param[out] devPtr pointer to the device associated the symbole + * @param[in] symbol pointer to the symbole of the device + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol); + + +/** + * @brief Gets the size of the given symbol on the device. + * + * @param[in] symbol pointer to the device symbole + * @param[out] size pointer to the size + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetSymbolSize(size_t* size, const void* symbol); + +/** + * @brief Gets the pointer of requested HIP driver function. + * + * @param[in] symbol The Symbol name of the driver function to request. + * @param[out] pfn Output pointer to the requested driver function. + * @param[in] hipVersion The HIP version for the requested driver function symbol. + * HIP version is defined as 100*version_major + version_minor. For example, in HIP 6.1, the + * hipversion is 601, for the symbol function "hipGetDeviceProperties", the specified hipVersion 601 + * is greater or equal to the version 600, the symbol function will be handle properly as backend + * compatible function. + * + * @param[in] flags Currently only default flag is suppported. + * @param[out] symbolStatus Optional enumeration for returned status of searching for symbol driver + * function based on the input hipVersion. + * + * Returns hipSuccess if the returned pfn is addressed to the pointer of found driver function. + * + * @returns #hipSuccess, #hipErrorInvalidValue. + */ +hipError_t hipGetProcAddress(const char* symbol, void** pfn, int hipVersion, uint64_t flags, + hipDriverProcAddressQueryResult* symbolStatus); + +/** + * @brief Copies data to the given symbol on the device. + * Symbol HIP APIs allow a kernel to define a device-side data symbol which can be accessed on + * the host side. The symbol can be in __constant or device space. + * Note that the symbol name needs to be encased in the HIP_SYMBOL macro. + * This also applies to hipMemcpyFromSymbol, hipGetSymbolAddress, and hipGetSymbolSize. + * For detailed usage, see the + * memcpyToSymbol + * example in the HIP Porting Guide. + * + * + * @param[out] symbol pointer to the device symbole + * @param[in] src pointer to the source address + * @param[in] sizeBytes size in bytes to copy + * @param[in] offset offset in bytes from start of symbole + * @param[in] kind type of memory transfer + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)); + +/** + * @brief Copies data to the given symbol on the device asynchronously. + * + * @param[out] symbol pointer to the device symbole + * @param[in] src pointer to the source address + * @param[in] sizeBytes size in bytes to copy + * @param[in] offset offset in bytes from start of symbole + * @param[in] kind type of memory transfer + * @param[in] stream stream identifier + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, hipStream_t stream __dparm(0)); + +/** + * @brief Copies data from the given symbol on the device. + * + * @param[out] dst Returns pointer to destinition memory address + * @param[in] symbol Pointer to the symbole address on the device + * @param[in] sizeBytes Size in bytes to copy + * @param[in] offset Offset in bytes from the start of symbole + * @param[in] kind Type of memory transfer + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)); + +/** + * @brief Copies data from the given symbol on the device asynchronously. + * + * @param[out] dst Returns pointer to destinition memory address + * @param[in] symbol pointer to the symbole address on the device + * @param[in] sizeBytes size in bytes to copy + * @param[in] offset offset in bytes from the start of symbole + * @param[in] kind type of memory transfer + * @param[in] stream stream identifier + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)); +/** + * @brief Copies data from src to dst asynchronously. + * + * The copy is always performed by the device associated with the specified stream. + * + * For multi-gpu or peer-to-peer configurations, it is recommended to use a stream which is + * attached to the device where the src data is physically located. + * For optimal peer-to-peer copies, the copy device must be able to access the src and dst + * pointers (by calling hipDeviceEnablePeerAccess) with copy agent as the current device and + * src/dest as the peerDevice argument. If enabling device peer access is not done, the memory copy + * will still work, but will perform the copy using a staging buffer on the host. + * + * @note If host or dst are not pinned, the memory copy will be performed synchronously. For + * best performance, use hipHostMalloc to allocate host memory that is transferred asynchronously. + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] kind Type of memory transfer + * @param[in] stream Stream identifier + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown + * + * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, + * hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyToSymbol, + * hipMemcpyFromSymbol, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync, + * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync, + * hipMemcpyFromSymbolAsync + */ +hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + hipStream_t stream __dparm(0)); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * byte value value. + * + * @param[out] dst Data being filled + * @param[in] value Value to be set + * @param[in] sizeBytes Data size in bytes + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemset(void* dst, int value, size_t sizeBytes); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * byte value value. + * + * @param[out] dest Data ptr to be filled + * @param[in] value Value to be set + * @param[in] count Number of values to be set + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * byte value value. + * + * hipMemsetD8Async() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dest Data ptr to be filled + * @param[in] value Constant value to be set + * @param[in] count Number of values to be set + * @param[in] stream Stream identifier + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, + hipStream_t stream __dparm(0)); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * short value value. + * + * @param[out] dest Data ptr to be filled + * @param[in] value Constant value to be set + * @param[in] count Number of values to be set + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * short value value. + * + * hipMemsetD16Async() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dest Data ptr to be filled + * @param[in] value Constant value to be set + * @param[in] count Number of values to be set + * @param[in] stream Stream identifier + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, + hipStream_t stream __dparm(0)); +/** + * @brief Fills the memory area pointed to by dest with the constant integer + * value for specified number of times. + * + * @param[out] dest Data being filled + * @param[in] value Constant value to be set + * @param[in] count Number of values to be set + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant + * byte value value. + * + * hipMemsetAsync() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dst Pointer to device memory + * @param[in] value Value to set for each byte of specified memory + * @param[in] sizeBytes Size in bytes to set + * @param[in] stream Stream identifier + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0)); +/** + * @brief Fills the memory area pointed to by dev with the constant integer + * value for specified number of times. + * + * hipMemsetD32Async() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dst Pointer to device memory + * @param[in] value Value to set for each byte of specified memory + * @param[in] count Number of values to be set + * @param[in] stream Stream identifier + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, + hipStream_t stream __dparm(0)); +/** + * @brief Fills the memory area pointed to by dst with the constant value. + * + * @param[out] dst Pointer to 2D device memory + * @param[in] pitch Pitch size in bytes of 2D device memory, unused if height equals 1 + * @param[in] value Constant value to set for each byte of specified memory + * @param[in] width Width size in bytes in 2D memory + * @param[in] height Height size in bytes in 2D memory + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height); +/** + * @brief Fills asynchronously the memory area pointed to by dst with the constant value. + * + * @param[in] dst Pointer to 2D device memory + * @param[in] pitch Pitch size in bytes of 2D device memory, unused if height equals 1 + * @param[in] value Value to set for each byte of specified memory + * @param[in] width Width size in bytes in 2D memory + * @param[in] height Height size in bytes in 2D memory + * @param[in] stream Stream identifier + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, + hipStream_t stream __dparm(0)); +/** + * @brief Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value. + * + * @param[in] pitchedDevPtr Pointer to pitched device memory + * @param[in] value Value to set for each byte of specified memory + * @param[in] extent Size parameters for width field in bytes in device memory + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent); +/** + * @brief Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value. + * + * @param[in] pitchedDevPtr Pointer to pitched device memory + * @param[in] value Value to set for each byte of specified memory + * @param[in] extent Size parameters for width field in bytes in device memory + * @param[in] stream Stream identifier + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, + hipStream_t stream __dparm(0)); + +/** + * @brief Fills 2D memory range of 'width' 8-bit values synchronously to the specified char value. + * Height specifies numbers of rows to set and dstPitch speicifies the number of bytes between each + * row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D8(hipDeviceptr_t dst, size_t dstPitch, unsigned char value, size_t width, + size_t height); +/** + * @brief Fills 2D memory range of 'width' 8-bit values asynchronously to the specified char value. + * Height specifies numbers of rows to set and dstPitch speicifies the number of bytes between each + * row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @param[in] stream Stream Identifier + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D8Async(hipDeviceptr_t dst, size_t dstPitch, unsigned char value, + size_t width, size_t height, hipStream_t stream __dparm(0)); + +/** + * @brief Fills 2D memory range of 'width' 16-bit values synchronously to the specified short + * value. Height specifies numbers of rows to set and dstPitch speicifies the number of bytes + * between each row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D16(hipDeviceptr_t dst, size_t dstPitch, unsigned short value, size_t width, + size_t height); +/** + * @brief Fills 2D memory range of 'width' 16-bit values asynchronously to the specified short + * value. Height specifies numbers of rows to set and dstPitch speicifies the number of bytes + * between each row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @param[in] stream Stream Identifier + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D16Async(hipDeviceptr_t dst, size_t dstPitch, unsigned short value, + size_t width, size_t height, hipStream_t stream __dparm(0)); +/** + * @brief Fills 2D memory range of 'width' 32-bit values synchronously to the specified int value. + * Height specifies numbers of rows to set and dstPitch speicifies the number of bytes between each + * row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D32(hipDeviceptr_t dst, size_t dstPitch, unsigned int value, size_t width, + size_t height); +/** + * @brief Fills 2D memory range of 'width' 32-bit values asynchronously to the specified int + * value. Height specifies numbers of rows to set and dstPitch speicifies the number of bytes + * between each row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @param[in] stream Stream Identifier + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D32Async(hipDeviceptr_t dst, size_t dstPitch, unsigned int value, + size_t width, size_t height, hipStream_t stream __dparm(0)); + +/** + * @brief Query memory info. + * + * On ROCM, this function gets the actual free memory left on the current device, so supports + * the cases while running multi-workload (such as multiple processes, multiple threads, and + * multiple GPUs). + * + * @warning On Windows, the free memory only accounts for memory allocated by this process and may + * be optimistic. + * + * @param[out] free Returns free memory on the current device in bytes + * @param[out] total Returns total allocatable memory on the current device in bytes + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + **/ +hipError_t hipMemGetInfo(size_t* free, size_t* total); + +/** + * @brief Get allocated memory size via memory pointer. + * + * This function gets the allocated shared virtual memory size from memory pointer. + * + * @param[in] ptr Pointer to allocated memory + * @param[out] size Returns the allocated memory size in bytes + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + **/ +hipError_t hipMemPtrGetInfo(void* ptr, size_t* size); +/** + * @brief Allocate an array on the device. + * + * @param[out] array Pointer to allocated array in device memory + * @param[in] desc Requested channel format + * @param[in] width Requested array allocation width + * @param[in] height Requested array allocation height + * @param[in] flags Requested properties of allocated array + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree + */ +hipError_t hipMallocArray(hipArray_t* array, const hipChannelFormatDesc* desc, size_t width, + size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault)); +/** + * @brief Create an array memory pointer on the device. + * + * @param[out] pHandle Pointer to the array memory + * @param[in] pAllocateArray Requested array desciptor + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipMallocArray, hipArrayDestroy, hipFreeArray + */ +hipError_t hipArrayCreate(hipArray_t* pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray); +/** + * @brief Destroy an array memory pointer on the device. + * + * @param[in] array Pointer to the array memory + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipFreeArray + */ +hipError_t hipArrayDestroy(hipArray_t array); +/** + * @brief Create a 3D array memory pointer on the device. + * + * @param[out] array Pointer to the 3D array memory + * @param[in] pAllocateArray Requested array desciptor + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipMallocArray, hipArrayDestroy, hipFreeArray + */ +hipError_t hipArray3DCreate(hipArray_t* array, const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray); +/** + * @brief Create a 3D memory pointer on the device. + * + * @param[out] pitchedDevPtr Pointer to the 3D memory + * @param[in] extent Requested extent + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipMallocPitch, hipMemGetInfo, hipFree + */ +hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent); +/** + * @brief Frees an array on the device. + * + * @param[in] array Pointer to array to free + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + * + * @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipHostMalloc, hipHostFree + */ +hipError_t hipFreeArray(hipArray_t array); +/** + * @brief Allocate an array on the device. + * + * @param[out] array Pointer to allocated array in device memory + * @param[in] desc Requested channel format + * @param[in] extent Requested array allocation width, height and depth + * @param[in] flags Requested properties of allocated array + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree + */ +hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc* desc, + struct hipExtent extent, unsigned int flags); +/** + * @brief Gets info about the specified array + * + * @param[out] desc - Returned array type + * @param[out] extent - Returned array shape. 2D arrays will have depth of zero + * @param[out] flags - Returned array flags + * @param[in] array - The HIP array to get info for + * + * @returns #hipSuccess, #hipErrorInvalidValue #hipErrorInvalidHandle + * + * @see hipArrayGetDescriptor, hipArray3DGetDescriptor + */ +hipError_t hipArrayGetInfo(hipChannelFormatDesc* desc, hipExtent* extent, unsigned int* flags, + hipArray_t array); +/** + * @brief Gets a 1D or 2D array descriptor + * + * @param[out] pArrayDescriptor - Returned array descriptor + * @param[in] array - Array to get descriptor of + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue #hipErrorInvalidHandle + * + * @see hipArray3DCreate, hipArray3DGetDescriptor, hipArrayCreate, hipArrayDestroy, hipMemAlloc, + * hipMemAllocHost, hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, + * hipMemcpy3D, hipMemcpy3DAsync, hipMemcpyAtoA, hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, + * hipMemcpyDtoA, hipMemcpyDtoD, hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, + * hipMemcpyHtoA, hipMemcpyHtoAAsync, hipMemcpyHtoD, hipMemcpyHtoDAsync, hipMemFree, + * hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, hipMemHostAlloc, + * hipMemHostGetDevicePointer, hipMemsetD8, hipMemsetD16, hipMemsetD32, hipArrayGetInfo + */ +hipError_t hipArrayGetDescriptor(HIP_ARRAY_DESCRIPTOR* pArrayDescriptor, hipArray_t array); +/** + * @brief Gets a 3D array descriptor + * + * @param[out] pArrayDescriptor - Returned 3D array descriptor + * @param[in] array - 3D array to get descriptor of + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue #hipErrorInvalidHandle, #hipErrorContextIsDestroyed + * + * @see hipArray3DCreate, hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, + * hipMemAllocHost, hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, + * hipMemcpy3D, hipMemcpy3DAsync, hipMemcpyAtoA, hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, + * hipMemcpyDtoA, hipMemcpyDtoD, hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, + * hipMemcpyHtoA, hipMemcpyHtoAAsync, hipMemcpyHtoD, hipMemcpyHtoDAsync, hipMemFree, + * hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, hipMemHostAlloc, + * hipMemHostGetDevicePointer, hipMemsetD8, hipMemsetD16, hipMemsetD32, hipArrayGetInfo + */ +hipError_t hipArray3DGetDescriptor(HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor, hipArray_t array); +/** + * @brief Copies data between host and device. + * + * hipMemcpy2D supports memory matrix copy from the pointed area src to the pointed area dst. + * The copy direction is defined by kind which must be one of #hipMemcpyHostToDevice, + * #hipMemcpyHostToDevice, #hipMemcpyDeviceToHost #hipMemcpyDeviceToDevice or #hipMemcpyDefault. + * Device to Device copies don't need to wait for host synchronization. + * The copy is executed on the default null tream. The src and dst must not overlap. + * dpitch and spitch are the widths in bytes in memory matrix, width cannot exceed dpitch or + * spitch. + * + * For hipMemcpy2D, the copy is always performed by the current device (set by hipSetDevice). + * For multi-gpu or peer-to-peer configurations, it is recommended to set the current device to the + * device where the src data is physically located. For optimal peer-to-peer copies, the copy device + * must be able to access the src and dst pointers (by calling hipDeviceEnablePeerAccess with copy + * agent as the current device and src/dst as the peerDevice argument. if this is not done, the + * hipMemcpy2D will still work, but will perform the copy using a staging buffer on the host. + * + * @warning Calling hipMemcpy2D with dst and src pointers that do not match the hipMemcpyKind + * results in undefined behavior. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch size in bytes of destination memory + * @param[in] src Source memory address + * @param[in] spitch Pitch size in bytes of source memory + * @param[in] width Width size in bytes of matrix transfer (columns) + * @param[in] height Height size in bytes of matrix transfer (rows) + * @param[in] kind Type of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind); +/** + * @brief Copies memory for 2D arrays. + * @param[in] pCopy Parameters for the memory copy + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, + * hipMemcpyToSymbol, hipMemcpyAsync + */ +hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy); +/** + * @brief Copies memory for 2D arrays. + * @param[in] pCopy Parameters for the memory copy + * @param[in] stream Stream to use + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, + * hipMemcpyToSymbol, hipMemcpyAsync + */ +hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device asynchronously. + * + * hipMemcpy2DAsync supports memory matrix copy from the pointed area src to the pointed area dst. + * The copy direction is defined by kind which must be one of #hipMemcpyHostToDevice, + * #hipMemcpyDeviceToHost, #hipMemcpyDeviceToDevice or #hipMemcpyDefault. + * dpitch and spitch are the widths in bytes for memory matrix corresponds to dst and src. + * width cannot exceed dpitch or spitch. + * + * The copy is always performed by the device associated with the specified stream. + * The API is asynchronous with respect to the host, so the call may return before the copy is + * complete. The copy can optionally be excuted in a specific stream by passing a non-zero stream + * argument, for HostToDevice or DeviceToHost copies, the copy can overlap with operations + * in other streams. + * + * For multi-gpu or peer-to-peer configurations, it is recommended to use a stream which is + * attached to the device where the src data is physically located. + * + * For optimal peer-to-peer copies, the copy device must be able to access the src and dst pointers + * (by calling hipDeviceEnablePeerAccess) with copy agent as the current device and src/dst as the + * peerDevice argument. If enabling device peer access is not done, the API will still work, but + * will perform the copy using a staging buffer on the host. + * + * @note If host or dst are not pinned, the memory copy will be performed synchronously. For + * best performance, use hipHostMalloc to allocate host memory that is transferred asynchronously. + * + * @param[in] dst Pointer to destination memory address + * @param[in] dpitch Pitch size in bytes of destination memory + * @param[in] src Pointer to source memory address + * @param[in] spitch Pitch size in bytes of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @param[in] stream Stream to use + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] wOffset Destination starting X offset + * @param[in] hOffset Destination starting Y offset + * @param[in] src Source memory address + * @param[in] spitch Pitch of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DToArray(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, + size_t spitch, size_t width, size_t height, hipMemcpyKind kind); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] wOffset Destination starting X offset + * @param[in] hOffset Destination starting Y offset + * @param[in] src Source memory address + * @param[in] spitch Pitch of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @param[in] stream Accelerator view which the copy is being enqueued + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DToArrayAsync(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, + size_t spitch, size_t width, size_t height, hipMemcpyKind kind, + hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] wOffsetDst Destination starting X offset + * @param[in] hOffsetDst Destination starting Y offset + * @param[in] src Source memory address + * @param[in] wOffsetSrc Source starting X offset + * @param[in] hOffsetSrc Source starting Y offset (columns in bytes) + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOffsetDst, + hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, + size_t width, size_t height, hipMemcpyKind kind); +/** + * @brief Copies data between host and device [Deprecated] + * + * @ingroup MemoryD + * + * @param[in] dst Destination memory address + * @param[in] wOffset Destination starting X offset + * @param[in] hOffset Destination starting Y offset + * @param[in] src Source memory address + * @param[in] count size in bytes to copy + * @param[in] kind Type of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + * @warning This API is deprecated. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipMemcpyToArray(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, + size_t count, hipMemcpyKind kind); +/** + * @brief Copies data between host and device [Deprecated] + * + * @ingroup MemoryD + * + * @param[in] dst Destination memory address + * @param[in] srcArray Source memory address + * @param[in] wOffset Source starting X offset + * @param[in] hOffset Source starting Y offset + * @param[in] count Size in bytes to copy + * @param[in] kind Type of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + * @warning This API is deprecated. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, + size_t count, hipMemcpyKind kind); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch of destination memory + * @param[in] src Source memory address + * @param[in] wOffset Source starting X offset + * @param[in] hOffset Source starting Y offset + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DFromArray(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, + size_t hOffset, size_t width, size_t height, hipMemcpyKind kind); +/** + * @brief Copies data between host and device asynchronously. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch of destination memory + * @param[in] src Source memory address + * @param[in] wOffset Source starting X offset + * @param[in] hOffset Source starting Y offset + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @param[in] stream Accelerator view which the copy is being enqueued + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DFromArrayAsync(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, + size_t hOffset, size_t width, size_t height, + hipMemcpyKind kind, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] srcArray Source array + * @param[in] srcOffset Offset in bytes of source array + * @param[in] count Size of memory copy in bytes + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpyAtoH(void* dst, hipArray_t srcArray, size_t srcOffset, size_t count); +/** + * @brief Copies data between host and device. + * + * @param[in] dstArray Destination memory address + * @param[in] dstOffset Offset in bytes of destination array + * @param[in] srcHost Source host pointer + * @param[in] count Size of memory copy in bytes + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpyHtoA(hipArray_t dstArray, size_t dstOffset, const void* srcHost, size_t count); +/** + * @brief Copies data between host and device. + * + * @param[in] p 3D memory copy parameters + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p); +/** + * @brief Copies data between host and device asynchronously. + * + * @param[in] p 3D memory copy parameters + * @param[in] stream Stream to use + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] pCopy 3D memory copy parameters + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy); +/** + * @brief Copies data between host and device asynchronously. + * + * @param[in] pCopy 3D memory copy parameters + * @param[in] stream Stream to use + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream); +/** + * @brief Get information on memory allocations. + * + * @param [out] pbase - BAse pointer address + * @param [out] psize - Size of allocation + * @param [in] dptr- Device Pointer + * + * @returns #hipSuccess, #hipErrorNotFound + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr); + +/** + * @brief Perform Batch of 1D copies + * + * @param [in] dsts - Array of destination pointers + * @param [in] srcs - Array of source pointers. + * @param [in] sizes - Array of sizes for memcpy operations + * @param [in] count - Size of dsts, srcs and sizes arrays + * @param [in] attrs - Array of memcpy attributes (not supported) + * @param [in] attrsIdxs - Array of indices to map attrs to copies (not supported) + * @param [in] numAttrs - Size of attrs and attrsIdxs arrays (not supported) + * @param [in] failIdx - Pointer to a location to return failure index inside the batch + * @param [in] stream - stream used to enqueue operations in. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemcpyBatchAsync(void** dsts, void** srcs, size_t* sizes, size_t count, + hipMemcpyAttributes* attrs, size_t* attrsIdxs, size_t numAttrs, + size_t* failIdx, hipStream_t stream __dparm(0)); + +/** + * @brief Perform Batch of 3D copies + * + * @param [in] numOps - Total number of memcpy operations. + * @param [in] opList - Array of size numOps containing the actual memcpy operations. + * @param [in] failIdx - Pointer to a location to return the index of the copy where a failure + * - was encountered. + * @param [in] flags - Flags for future use, must be zero now. + * @param [in] stream - The stream to enqueue the operations in. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemcpy3DBatchAsync(size_t numOps, struct hipMemcpy3DBatchOp* opList, size_t* failIdx, + unsigned long long flags, hipStream_t stream __dparm(0)); + +/** + * @brief Performs 3D memory copies between devices + * This API is asynchronous with respect to host + * + * @param [in] p - Parameters for memory copy + * + * @returns #hipSuccess, #hipErrorInvalidValue, hipErrorInvalidDevice + */ +hipError_t hipMemcpy3DPeer(hipMemcpy3DPeerParms* p); + +/** + * @brief Performs 3D memory copies between devices asynchronously + * + * @param [in] p - Parameters for memory copy + * @param [in] stream - Stream to enqueue operation in. + * + * @returns #hipSuccess, #hipErrorInvalidValue, hipErrorInvalidDevice + */ +hipError_t hipMemcpy3DPeerAsync(hipMemcpy3DPeerParms* p, hipStream_t stream __dparm(0)); +// doxygen end Memory +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup PeerToPeer PeerToPeer Device Memory Access + * @{ + * @ingroup API + * This section describes the PeerToPeer device memory access functions of HIP runtime API. + */ +/** + * @brief Determines if a device can access a peer device's memory. + * + * @param [out] canAccessPeer - Returns the peer access capability (0 or 1) + * @param [in] deviceId - The device accessing the peer device memory. + * @param [in] peerDeviceId - Peer device where memory is physically located + * + * The value of @p canAccessPeer, + * + * Returns "1" if the specified @p deviceId is capable of directly accessing memory physically + * located on @p peerDeviceId, + * + * Returns "0" if the specified @p deviceId is not capable of directly accessing memory physically + * located on @p peerDeviceId. + * + * Returns "0" if @p deviceId == @p peerDeviceId, both are valid devices, + * however, a device is not a peer of itself. + * + * Returns #hipErrorInvalidDevice if deviceId or peerDeviceId are not valid devices + * + * @returns #hipSuccess, #hipErrorInvalidDevice + * + */ +hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId); +/** + * @brief Enables direct access to memory allocations on a peer device. + * + * When this API is successful, all memory allocations on peer device will be mapped into the + * address space of the current device. In addition, any future memory allocation on the + * peer device will remain accessible from the current device, until the access is disabled using + * hipDeviceDisablePeerAccess or device is reset using hipDeviceReset. + * + * @param [in] peerDeviceId - Peer device to enable direct access to from the current device + * @param [in] flags - Reserved for future use, must be zero + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, + * @returns #hipErrorPeerAccessAlreadyEnabled if peer access is already enabled for this device. + */ +hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags); +/** + * @brief Disables direct access to memory allocations on a peer device. + * + * If direct access to memory allocations on peer device has not been enabled yet from the current + * device, it returns #hipErrorPeerAccessNotEnabled. + * + * @param [in] peerDeviceId Peer device to disable direct access to + * + * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled + */ +hipError_t hipDeviceDisablePeerAccess(int peerDeviceId); + +/** + * @brief Copies memory between two peer accessible devices. + * + * @param [out] dst - Destination device pointer + * @param [in] dstDeviceId - Destination device + * @param [in] src - Source device pointer + * @param [in] srcDeviceId - Source device + * @param [in] sizeBytes - Size of memory copy in bytes + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice + */ +hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId, + size_t sizeBytes); +/** + * @brief Copies memory between two peer accessible devices asynchronously. + * + * @param [out] dst - Destination device pointer + * @param [in] dstDeviceId - Destination device + * @param [in] src - Source device pointer + * @param [in] srcDevice - Source device + * @param [in] sizeBytes - Size of memory copy in bytes + * @param [in] stream - Stream identifier + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice + */ +hipError_t hipMemcpyPeerAsync(void* dst, int dstDeviceId, const void* src, int srcDevice, + size_t sizeBytes, hipStream_t stream __dparm(0)); + +// doxygen end PeerToPeer +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Context Context Management [Deprecated] + * @{ + * This section describes the context management functions of HIP runtime API. + * + * @warning + * + * On the AMD platform, context management APIs are deprecated as there are better alternate + * interfaces, such as using hipSetDevice and stream APIs to achieve the required functionality. + * + * On the NVIDIA platform, CUDA supports the driver API that defines "Context" and "Devices" as + * separate entities. Each context contains a single device, which can theoretically have multiple + * contexts. HIP initially added limited support for these APIs to facilitate easy porting from + * existing driver codes. + * + * These APIs are only for equivalent driver APIs on the NVIDIA platform. + * + */ + +/** + * @brief Create a context and set it as current/default context + * + * @param [out] ctx Context to create + * @param [in] flags Context creation flags + * @param [in] device device handle + * + * @returns #hipSuccess + * + * @see hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxPushCurrent, + * hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device); +/** + * @brief Destroy a HIP context [Deprecated] + * + * @param [in] ctx Context to destroy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipCtxCreate, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,hipCtxSetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxDestroy(hipCtx_t ctx); +/** + * @brief Pop the current/default context and return the popped context [Deprecated] + * + * @param [out] ctx The current context to pop + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxSetCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxPopCurrent(hipCtx_t* ctx); +/** + * @brief Push the context to be set as current/ default context [Deprecated] + * + * @param [in] ctx The current context to push + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxPushCurrent(hipCtx_t ctx); +/** + * @brief Set the passed context as current/default [Deprecated] + * + * @param [in] ctx The context to set as current + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxSetCurrent(hipCtx_t ctx); +/** + * @brief Get the handle of the current/ default context [Deprecated] + * + * @param [out] ctx The context to get as current + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetCurrent(hipCtx_t* ctx); +/** + * @brief Get the handle of the device associated with current/default context [Deprecated] + * + * @param [out] device The device from the current context + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetDevice(hipDevice_t* device); +/** + * @brief Returns the approximate HIP api version. + * + * @param [in] ctx Context to check [Deprecated] + * @param [out] apiVersion API version to get + * + * @returns #hipSuccess + * + * @warning The HIP feature set does not correspond to an exact CUDA SDK api revision. + * This function always set *apiVersion to 4 as an approximation though HIP supports + * some features which were introduced in later CUDA SDK revisions. + * HIP apps code should not rely on the api revision number here and should + * use arch feature flags to test device capabilities or conditional compilation. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetApiVersion(hipCtx_t ctx, unsigned int* apiVersion); +/** + * @brief Get Cache configuration for a specific function [Deprecated] + * + * @param [out] cacheConfig Cache configuration + * + * @returns #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig); +/** + * @brief Set L1/Shared cache partition [Deprecated] + * + * @param [in] cacheConfig Cache configuration to set + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig); +/** + * @brief Set Shared memory bank configuration [Deprecated] + * + * @param [in] config Shared memory configuration to set + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config); +/** + * @brief Get Shared memory bank configuration [Deprecated] + * + * @param [out] pConfig Pointer of shared memory configuration + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig); +/** + * @brief Blocks until the default context has completed all preceding requested tasks [Deprecated] + * + * @return #hipSuccess + * + * @warning This function waits for all streams on the default context to complete execution, and + * then returns. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxSynchronize(void); +/** + * @brief Return flags used for creating default context [Deprecated] + * + * @param [out] flags Pointer of flags + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetFlags(unsigned int* flags); +/** + * @brief Enables direct access to memory allocations in a peer context [Deprecated] + * + * Memory which already allocated on peer device will be mapped into the address space of the + * current device. In addition, all future memory allocations on peerDeviceId will be mapped into + * the address space of the current device when the memory is allocated. The peer memory remains + * accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset. + * + * + * @param [in] peerCtx Peer context + * @param [in] flags flags, need to set as 0 + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, + * #hipErrorPeerAccessAlreadyEnabled + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * @warning PeerToPeer support is experimental. + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags); +/** + * @brief Disable direct access from current context's virtual address space to memory allocations + * physically located on a peer context.Disables direct access to memory allocations in a peer + * context and unregisters any registered allocations [Deprecated] + * + * Returns #hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been + * enabled from the current device. + * + * @param [in] peerCtx Peer context to be disabled + * + * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * @warning PeerToPeer support is experimental. + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx); + +/** + * @brief Get the state of the primary context [Deprecated] + * + * @param [in] dev Device to get primary context flags for + * @param [out] flags Pointer to store flags + * @param [out] active Pointer to store context state; 0 = inactive, 1 = active + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active); +/** + * @brief Release the primary context on the GPU. + * + * @param [in] dev Device which primary context is released [Deprecated] + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * @warning This function return #hipSuccess though doesn't release the primaryCtx by design on + * HIP/HIP-CLANG path. + * + * @warning This API is deprecated on the AMD platform, only for equivalent driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev); +/** + * @brief Retain the primary context on the GPU [Deprecated] + * + * @param [out] pctx Returned context handle of the new context + * @param [in] dev Device which primary context is released + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev); +/** + * @brief Resets the primary context on the GPU [Deprecated] + * + * @param [in] dev Device which primary context is reset + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev); +/** + * @brief Set flags for the primary context [Deprecated] + * + * @param [in] dev Device for which the primary context flags are set + * @param [in] flags New flags for the device + * + * @returns #hipSuccess, #hipErrorContextAlreadyInUse + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags); +// doxygen end Context Management +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * + * @defgroup Module Module Management + * @{ + * @ingroup API + * This section describes the module management functions of HIP runtime API. + * + */ +/** + * @brief Loads fatbin object + * + * @param [in] fatbin fatbin to be loaded as a module + * @param [out] module Module + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext, #hipErrorFileNotFound, + * #hipErrorOutOfMemory, #hipErrorSharedObjectInitFailed, #hipErrorNotInitialized + * + */ +hipError_t hipModuleLoadFatBinary(hipModule_t* module, const void* fatbin); +/** + * @brief Loads code object from file into a module the currrent context. + * + * @param [in] fname Filename of code object to load + + * @param [out] module Module + * + * @warning File/memory resources allocated in this function are released only in hipModuleUnload. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext, #hipErrorFileNotFound, + * #hipErrorOutOfMemory, #hipErrorSharedObjectInitFailed, #hipErrorNotInitialized + * + */ +hipError_t hipModuleLoad(hipModule_t* module, const char* fname); +/** + * @brief Frees the module + * + * @param [in] module Module to free + * + * @returns #hipSuccess, #hipErrorInvalidResourceHandle + * + * The module is freed, and the code objects associated with it are destroyed. + */ +hipError_t hipModuleUnload(hipModule_t module); +/** + * @brief Function with kname will be extracted if present in module + * + * @param [in] module Module to get function from + * @param [in] kname Pointer to the name of function + * @param [out] function Pointer to function handle + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext, #hipErrorNotInitialized, + * #hipErrorNotFound, + */ +hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, const char* kname); + +/** + * @brief Returns the number of functions within a module. + * + * @param [in] mod Module to get function count from + * @param [out] count function count from module + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext, #hipErrorNotInitialized, + * #hipErrorNotFound, + */ +hipError_t hipModuleGetFunctionCount(unsigned int* count, hipModule_t mod); + +/** + * @brief Load hip Library from inmemory object + * + * @param [out] library Output Library + * @param [in] code In memory object + * @param [in] jitOptions JIT options, CUDA only + * @param [in] jitOptionsValues JIT options values, CUDA only + * @param [in] numJitOptions Number of JIT options + * @param [in] libraryOptions Library options + * @param [in] libraryOptionValues Library options values + * @param [in] numLibraryOptions Number of library options + * @return #hipSuccess, #hipErrorInvalidValue, + */ +hipError_t hipLibraryLoadData(hipLibrary_t* library, const void* code, hipJitOption** jitOptions, + void** jitOptionsValues, unsigned int numJitOptions, + hipLibraryOption** libraryOptions, void** libraryOptionValues, + unsigned int numLibraryOptions); + +/** + * @brief Load hip Library from file + * + * @param [out] library Output Library + * @param [in] fileName file which contains code object + * @param [in] jitOptions JIT options, CUDA only + * @param [in] jitOptionsValues JIT options values, CUDA only + * @param [in] numJitOptions Number of JIT options + * @param [in] libraryOptions Library options + * @param [in] libraryOptionValues Library options values + * @param [in] numLibraryOptions Number of library options + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipLibraryLoadFromFile(hipLibrary_t* library, const char* fileName, + hipJitOption** jitOptions, void** jitOptionsValues, + unsigned int numJitOptions, hipLibraryOption** libraryOptions, + void** libraryOptionValues, unsigned int numLibraryOptions); + +/** + * @brief Unload HIP Library + * + * @param [in] library Input created hip library + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipLibraryUnload(hipLibrary_t library); + +/** + * @brief Get Kernel object from library + * + * @param [out] pKernel Output kernel object + * @param [in] library Input hip library + * @param [in] name kernel name to be searched for + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipLibraryGetKernel(hipKernel_t* pKernel, hipLibrary_t library, const char* name); + +/** + * @brief Get Kernel count in library + * + * @param [out] count Count of kernels in library + * @param [in] library Input created hip library + * @return #hipSuccess, #hipErrorInvalidValue +*/ +hipError_t hipLibraryGetKernelCount(unsigned int *count, hipLibrary_t library); + +/** + * @brief Find out attributes for a given function. + * @ingroup Execution + * @param [out] attr Attributes of funtion + * @param [in] func Pointer to the function handle + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction + */ +hipError_t hipFuncGetAttributes(struct hipFuncAttributes* attr, const void* func); +/** + * @brief Find out a specific attribute for a given function. + * @ingroup Execution + * @param [out] value Pointer to the value + * @param [in] attrib Attributes of the given funtion + * @param [in] hfunc Function to get attributes from + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction + */ +hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunction_t hfunc); +/** + * @brief Gets pointer to device entry function that matches entry function symbolPtr. + * + * @param [out] functionPtr Device entry function + * @param [in] symbolPtr Pointer to device entry function to search for + * + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction + * + */ +hipError_t hipGetFuncBySymbol(hipFunction_t* functionPtr, const void* symbolPtr); +/** + * @brief Gets function pointer of a requested HIP API + * + * @param [in] symbol The API base name + * @param [out] funcPtr Pointer to the requested function + * @param [in] flags Flags for the search + * @param [out] driverStatus Optional returned status of the search + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetDriverEntryPoint(const char* symbol, void** funcPtr, unsigned long long flags, + hipDriverEntryPointQueryResult* driverStatus); +/** + * @brief returns the handle of the texture reference with the name from the module. + * + * @param [in] hmod Module + * @param [in] name Pointer of name of texture reference + * @param [out] texRef Pointer of texture reference + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound, #hipErrorInvalidValue + */ +hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name); +/** + * @brief builds module from code object data which resides in host memory. + * + * The "image" is a pointer to the location of code object data. This data can be either + * a single code object or a fat binary (fatbin), which serves as the entry point for loading and + * launching device-specific kernel executions. + * + * By default, the following command generates a fatbin: + * + * "amdclang++ -O3 -c --offload-device-only --offload-arch= -o " + * + * For more details, refer to: + * + * Kernel Compilation in the HIP kernel language C++ support, or + * HIP runtime compilation (HIP RTC). + * + * @param [in] image The pointer to the location of data + * @param [out] module Retuned module + * + * @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized + */ +hipError_t hipModuleLoadData(hipModule_t* module, const void* image); +/** + * @brief builds module from code object which resides in host memory. Image is pointer to that + * location. Options are not used. hipModuleLoadData is called. + * + * @param [in] image The pointer to the location of data + * @param [out] module Retuned module + * @param [in] numOptions Number of options + * @param [in] options Options for JIT + * @param [in] optionValues Option values for JIT + * + * @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized + */ +hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions, + hipJitOption* options, void** optionValues); +/** + * @brief Adds bitcode data to be linked with options. + * @param [in] state hip link state + * @param [in] type Type of the input data or bitcode + * @param [in] data Input data which is null terminated + * @param [in] size Size of the input data + * @param [in] name Optional name for this input + * @param [in] numOptions Size of the options + * @param [in] options Array of options applied to this input + * @param [in] optionValues Array of option values cast to void* + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle + * + * If adding the file fails, it will + * @return #hipErrorInvalidConfiguration + * + * @see hipError_t + */ +hipError_t hipLinkAddData(hipLinkState_t state, hipJitInputType type, void* data, size_t size, + const char* name, unsigned int numOptions, hipJitOption* options, + void** optionValues); + +/** + * @brief Adds a file with bitcode to be linked with options. + * @param [in] state hip link state + * @param [in] type Type of the input data or bitcode + * @param [in] path Path to the input file where bitcode is present + * @param [in] numOptions Size of the options + * @param [in] options Array of options applied to this input + * @param [in] optionValues Array of option values cast to void* + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * If adding the file fails, it will + * @return #hipErrorInvalidConfiguration + * + * @see hipError_t + */ +hipError_t hipLinkAddFile(hipLinkState_t state, hipJitInputType type, const char* path, + unsigned int numOptions, hipJitOption* options, void** optionValues); + +/** + * @brief Completes the linking of the given program. + * @param [in] state hip link state + * @param [out] hipBinOut Upon success, points to the output binary + * @param [out] sizeOut Size of the binary is stored (optional) + * + * @returns #hipSuccess #hipErrorInvalidValue + * + * If adding the data fails, it will + * @return #hipErrorInvalidConfiguration + * + * @see hipError_t + */ + +hipError_t hipLinkComplete(hipLinkState_t state, void** hipBinOut, size_t* sizeOut); + +/** + * @brief Creates a linker instance with options. + * @param [in] numOptions Number of options + * @param [in] options Array of options + * @param [in] optionValues Array of option values cast to void* + * @param [out] stateOut hip link state created upon success + * + * @returns #hipSuccess #hipErrorInvalidValue #hipErrorInvalidConfiguration + * + * @see hipSuccess + */ +hipError_t hipLinkCreate(unsigned int numOptions, hipJitOption* options, void** optionValues, + hipLinkState_t* stateOut); +/** + * @brief Deletes the linker instance. + * @param [in] state link state instance + * + * @returns #hipSuccess #hipErrorInvalidValue + * + * @see hipSuccess + */ +hipError_t hipLinkDestroy(hipLinkState_t state); + +/** + * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed + * to kernelparams or extra + * @ingroup Execution + * @param [in] f Kernel to launch. + * @param [in] gridDimX X grid dimension specified as multiple of blockDimX. + * @param [in] gridDimY Y grid dimension specified as multiple of blockDimY. + * @param [in] gridDimZ Z grid dimension specified as multiple of blockDimZ. + * @param [in] blockDimX X block dimensions specified in work-items + * @param [in] blockDimY Y grid dimension specified in work-items + * @param [in] blockDimZ Z grid dimension specified in work-items + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th + * default stream is used with associated synchronization rules. + * @param [in] kernelParams Kernel parameters to launch + * @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and + * must be in the memory layout and alignment expected by the kernel. + * All passed arguments must be naturally aligned according to their type. The memory address of + * each argument should be a multiple of its size in bytes. Please refer to + * hip_porting_driver_api.md for sample usage. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. So gridDim.x * blockDim.x, gridDim.y * blockDim.y + * and gridDim.z * blockDim.z are always less than 2^32. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + */ +hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, + unsigned int gridDimZ, unsigned int blockDimX, + unsigned int blockDimY, unsigned int blockDimZ, + unsigned int sharedMemBytes, hipStream_t stream, + void** kernelParams, void** extra); +/** \addtogroup ModuleCooperativeG Cooperative groups kernel launch of Module management. + * \ingroup Module + * @{ */ +/** + * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed + * to kernelParams, where thread blocks can cooperate and synchronize as they execute + * + * @param [in] f Kernel to launch. + * @param [in] gridDimX X grid dimension specified as multiple of blockDimX. + * @param [in] gridDimY Y grid dimension specified as multiple of blockDimY. + * @param [in] gridDimZ Z grid dimension specified as multiple of blockDimZ. + * @param [in] blockDimX X block dimension specified in work-items. + * @param [in] blockDimY Y block dimension specified in work-items. + * @param [in] blockDimZ Z block dimension specified in work-items. + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, + * in which case the default stream is used with associated synchronization rules. + * @param [in] kernelParams A list of kernel arguments. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size \f$ gridDim \cdot blockDim \geq 2^{32} \f$. + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidHandle, #hipErrorInvalidImage, #hipErrorInvalidValue, + * #hipErrorInvalidConfiguration, #hipErrorLaunchFailure, #hipErrorLaunchOutOfResources, + * #hipErrorLaunchTimeOut, #hipErrorCooperativeLaunchTooLarge, #hipErrorSharedObjectInitFailed + */ +hipError_t hipModuleLaunchCooperativeKernel(hipFunction_t f, unsigned int gridDimX, + unsigned int gridDimY, unsigned int gridDimZ, + unsigned int blockDimX, unsigned int blockDimY, + unsigned int blockDimZ, unsigned int sharedMemBytes, + hipStream_t stream, void** kernelParams); +/** + * @brief Launches kernels on multiple devices where thread blocks can cooperate and + * synchronize as they execute. + * + * @param [in] launchParamsList List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidHandle, #hipErrorInvalidImage, #hipErrorInvalidValue, + * #hipErrorInvalidConfiguration, #hipErrorInvalidResourceHandle, #hipErrorLaunchFailure, + * #hipErrorLaunchOutOfResources, #hipErrorLaunchTimeOut, #hipErrorCooperativeLaunchTooLarge, + * #hipErrorSharedObjectInitFailed + */ +hipError_t hipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams* launchParamsList, + unsigned int numDevices, unsigned int flags); +/** + * @brief Launches kernel f with launch parameters and shared memory on stream with arguments passed + * to kernelparams or extra, where thread blocks can cooperate and synchronize as they execute. + * + * @param [in] f - Kernel to launch. + * @param [in] gridDim - Grid dimensions specified as multiple of blockDim. + * @param [in] blockDimX - Block dimensions specified in work-items + * @param [in] kernelParams - Pointer of arguments passed to the kernel. If the kernel has multiple + * parameters, 'kernelParams' should be array of pointers, each points the corresponding argument. + * @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream - Stream where the kernel should be dispatched. May be 0, in which case th + * default stream is used with associated synchronization rules. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size \f$ gridDim \cdot blockDim \geq 2^{32} \f$. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + * #hipErrorCooperativeLaunchTooLarge + */ +hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDimX, + void** kernelParams, unsigned int sharedMemBytes, + hipStream_t stream); +/** + * @brief Launches kernels on multiple devices where thread blocks can cooperate and + * synchronize as they execute. + * + * @param [in] launchParamsList List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + * #hipErrorCooperativeLaunchTooLarge + */ +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, + unsigned int flags); + +// Doxygen end group ModuleCooperativeG +/** @} */ + +/** + * @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched + * on respective streams before enqueuing any other work on the specified streams from any other + * threads + * @ingroup Execution + * @param [in] launchParamsList List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + */ +hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, + unsigned int flags); +/** + * @brief Launches a HIP kernel using a generic function pointer and the specified configuration. + * @ingroup Execution + * + * This function is equivalent to hipLaunchKernelEx but accepts the kernel as a generic function + * pointer. + * + * @param [in] config Pointer to the kernel launch configuration structure. + * @param [in] fPtr Pointer to the device kernel function. + * @param [in] args Array of pointers to the kernel arguments. + * + * @returns #hipSuccess if the kernel is launched successfully, otherwise an appropriate error code. + */ +hipError_t hipLaunchKernelExC(const hipLaunchConfig_t* config, const void* fPtr, void** args); +/** + * @brief Launches a HIP kernel using the driver API with the specified configuration. + * @ingroup Execution + * + * This function dispatches the device kernel represented by a HIP function object. + * It passes both the kernel parameters and any extra configuration arguments to the kernel launch. + * + * @param [in] config Pointer to the kernel launch configuration structure. + * @param [in] f HIP function object representing the device kernel to be launched. + * @param [in] params Array of pointers to the kernel parameters. + * @param [in] extra Array of pointers for additional launch parameters or extra configuration + * data. + * + * @returns #hipSuccess if the kernel is launched successfully, otherwise an appropriate error code. + */ +hipError_t hipDrvLaunchKernelEx(const HIP_LAUNCH_CONFIG* config, hipFunction_t f, void** params, + void** extra); +/** + * @brief Returns a handle for the address range requested. + * + * This function returns a handle to a device pointer created using either hipMalloc set of APIs + * or through hipMemAddressReserve (as long as the ptr is mapped). + * + * @param [out] handle Ptr to the handle where the fd or other types will be returned. + * @param [in] dptr Device ptr for which we get the handle. + * @param [in] size Size of the address range. + * @param [in] handleType Type of the handle requested for the address range. + * @param [in] flags Any flags set regarding the handle requested. + * + * @returns #hipSuccess if the kernel is launched successfully, otherwise an appropriate error code. + */ +hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size, + hipMemRangeHandleType handleType, + unsigned long long flags); +// doxygen end Module +/** + * @} + */ + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Occupancy Occupancy + * @{ + * This section describes the occupancy functions of HIP runtime API. + * + */ +/** + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel + * + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +// TODO - Match CUoccupancyB2DSize +hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, hipFunction_t f, + size_t dynSharedMemPerBlk, int blockSizeLimit); +/** + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel + * + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * @param [in] flags Extra flags for occupancy calculation (only default supported) + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +// TODO - Match CUoccupancyB2DSize +hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + hipFunction_t f, + size_t dynSharedMemPerBlk, + int blockSizeLimit, unsigned int flags); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function (hipFunction) for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk Dynamic shared memory usage (in bytes) intended for each block + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, hipFunction_t f, + int blockSize, + size_t dynSharedMemPerBlk); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk Dynamic shared memory usage (in bytes) intended for each block + * @param [in] flags Extra flags for occupancy calculation (only default supported) + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk Dynamic shared memory usage (in bytes) intended for each block + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue + */ +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, const void* f, + int blockSize, size_t dynSharedMemPerBlk); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk Dynamic shared memory usage (in bytes) intended for each block + * @param [in] flags Extra flags for occupancy calculation (currently ignored) + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue + */ +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, + unsigned int flags __dparm(hipOccupancyDefault)); +/** + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel + * + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, const void* f, + size_t dynSharedMemPerBlk, int blockSizeLimit); +// doxygen end Occupancy +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Profiler Profiler Control [Deprecated] + * @{ + * This section describes the profiler control functions of HIP runtime API. + * + * @warning The cudaProfilerInitialize API format for "configFile" is not supported. + * + */ +// TODO - expand descriptions: +/** + * @brief Start recording of profiling information [Deprecated] + * When using this API, start the profiler with profiling disabled. (--startdisabled) + * @returns #hipErrorNotSupported + * @warning hipProfilerStart API is deprecated, use roctracer/rocTX instead. + */ +HIP_DEPRECATED("use roctracer/rocTX instead") +hipError_t hipProfilerStart(); +/** + * @brief Stop recording of profiling information [Deprecated] + * When using this API, start the profiler with profiling disabled. (--startdisabled) + * @returns #hipErrorNotSupported + * @warning hipProfilerStart API is deprecated, use roctracer/rocTX instead. + */ +HIP_DEPRECATED("use roctracer/rocTX instead") +hipError_t hipProfilerStop(); +// doxygen end profiler +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Clang Launch API to support the triple-chevron syntax + * @{ + * This section describes the API to support the triple-chevron syntax. + */ +/** + * @brief Configure a kernel launch. + * + * @param [in] gridDim grid dimension specified as multiple of blockDim. + * @param [in] blockDim block dimensions specified in work-items + * @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the + * default stream is used with associated synchronization rules. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + * + */ +hipError_t hipConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dparm(0), + hipStream_t stream __dparm(0)); +/** + * @brief Set a kernel argument. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + * + * @param [in] arg Pointer the argument in host memory. + * @param [in] size Size of the argument. + * @param [in] offset Offset of the argument on the argument stack. + * + */ +hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset); +/** + * @brief Launch a kernel. + * + * @param [in] func Kernel to launch. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + * + */ +hipError_t hipLaunchByPtr(const void* func); +/** + * @brief Push configuration of a kernel launch. + * + * @param [in] gridDim grid dimension specified as multiple of blockDim. + * @param [in] blockDim block dimensions specified in work-items + * @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the + * default stream is used with associated synchronization rules. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + * + */ +hipError_t __hipPushCallConfiguration(dim3 gridDim, dim3 blockDim, size_t sharedMem __dparm(0), + hipStream_t stream __dparm(0)); +/** + * @brief Pop configuration of a kernel launch. + * + * @param [out] gridDim grid dimension specified as multiple of blockDim. + * @param [out] blockDim block dimensions specified in work-items + * @param [out] sharedMem Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [out] stream Stream where the kernel should be dispatched. May be 0, in which case the + * default stream is used with associated synchronization rules. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + * + */ +hipError_t __hipPopCallConfiguration(dim3* gridDim, dim3* blockDim, size_t* sharedMem, + hipStream_t* stream); +/** + * @brief C compliant kernel launch API + * + * @param [in] function_address - Kernel stub function pointer. + * @param [in] numBlocks - Number of blocks. + * @param [in] dimBlocks - Dimension of a block + * @param [in] args - Pointer of arguments passed to the kernel. If the kernel has multiple + * parameters, 'args' should be array of pointers, each points the corresponding argument. + * @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream - Stream where the kernel should be dispatched. May be 0, in which case th + * default stream is used with associated synchronization rules. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipLaunchKernel(const void* function_address, dim3 numBlocks, dim3 dimBlocks, + void** args, size_t sharedMemBytes __dparm(0), + hipStream_t stream __dparm(0)); + +/** + * @brief Enqueues a host function call in a stream. + * + * @param [in] stream - The stream to enqueue work in. + * @param [in] fn - The function to call once enqueued preceeding operations are complete. + * @param [in] userData - User-specified data to be passed to the function. + * + * @returns #hipSuccess, #hipErrorInvalidResourceHandle, #hipErrorInvalidValue, + * #hipErrorNotSupported + * + * The host function to call in this API will be executed after the preceding operations in + * the stream are complete. The function is a blocking operation that blocks operations in the + * stream that follow it, until the function is returned. + * Event synchronization and internal callback functions make sure enqueued operations will + * execute in order, in the stream. + * + * The host function must not make any HIP API calls. The host function is non-reentrant. It must + * not perform sychronization with any operation that may depend on other processing execution + * but is not enqueued to run earlier in the stream. + * + * Host functions that are enqueued respectively in different non-blocking streams can run + * concurrently. + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + */ +hipError_t hipLaunchHostFunc(hipStream_t stream, hipHostFn_t fn, void* userData); + +/** + * Copies memory for 2D arrays. + * + * @param pCopy - Parameters for the memory copy + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipDrvMemcpy2DUnaligned(const hip_Memcpy2D* pCopy); +// TODO: Move this to hip_ext.h +/** + * @brief Launches kernel from the pointer address, with arguments and shared memory on stream. + * + * @param [in] function_address - Pointer to the Kernel to launch. + * @param [in] numBlocks - Number of blocks. + * @param [in] dimBlocks - Dimension of a block. + * @param [in] args - Pointer of arguments passed to the kernel. If the kernel has multiple + * parameters, 'args' should be array of pointers, each points the corresponding argument. + * @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream - Stream where the kernel should be dispatched. + * May be 0, in which case the default stream is used with associated synchronization rules. + * @param [in] startEvent - If non-null, specified event will be updated to track the start time of + * the kernel launch. The event must be created before calling this API. + * @param [in] stopEvent - If non-null, specified event will be updated to track the stop time of + * the kernel launch. The event must be created before calling this API. + * @param [in] flags - The value of hipExtAnyOrderLaunch, signifies if kernel can be + * launched in any order. + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue. + * + */ +hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks, dim3 dimBlocks, + void** args, size_t sharedMemBytes, hipStream_t stream, + hipEvent_t startEvent, hipEvent_t stopEvent, int flags); +// doxygen end Clang launch +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Texture Texture Management + * @{ + * This section describes the texture management functions of HIP runtime API. + */ + +/** + * @brief Creates a texture object. + * + * @param [out] pTexObject pointer to the texture object to create + * @param [in] pResDesc pointer to resource descriptor + * @param [in] pTexDesc pointer to texture descriptor + * @param [in] pResViewDesc pointer to resource view descriptor + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported, #hipErrorOutOfMemory + * + * @note 3D linear filter isn't supported on GFX90A boards, on which the API @p + * hipCreateTextureObject will return hipErrorNotSupported. + * + */ +hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const struct hipResourceViewDesc* pResViewDesc); + +/** + * @brief Destroys a texture object. + * + * @param [in] textureObject texture object to destroy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject); + +/** + * @brief Gets the channel descriptor in an array. + * + * @param [in] desc pointer to channel format descriptor + * @param [out] array memory array on the device + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array); + +/** + * @brief Gets resource descriptor for the texture object. + * + * @param [out] pResDesc pointer to resource descriptor + * @param [in] textureObject texture object + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, + hipTextureObject_t textureObject); + +/** + * @brief Gets resource view descriptor for the texture object. + * + * @param [out] pResViewDesc pointer to resource view descriptor + * @param [in] textureObject texture object + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetTextureObjectResourceViewDesc(struct hipResourceViewDesc* pResViewDesc, + hipTextureObject_t textureObject); + +/** + * @brief Gets texture descriptor for the texture object. + * + * @param [out] pTexDesc pointer to texture descriptor + * @param [in] textureObject texture object + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, + hipTextureObject_t textureObject); + +/** + * @brief Creates a texture object. + * + * @param [out] pTexObject pointer to texture object to create + * @param [in] pResDesc pointer to resource descriptor + * @param [in] pTexDesc pointer to texture descriptor + * @param [in] pResViewDesc pointer to resource view descriptor + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, const HIP_RESOURCE_DESC* pResDesc, + const HIP_TEXTURE_DESC* pTexDesc, + const HIP_RESOURCE_VIEW_DESC* pResViewDesc); + +/** + * @brief Destroys a texture object. + * + * @param [in] texObject texture object to destroy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipTexObjectDestroy(hipTextureObject_t texObject); + +/** + * @brief Gets resource descriptor of a texture object. + * + * @param [out] pResDesc pointer to resource descriptor + * @param [in] texObject texture object + * + * @returns #hipSuccess, #hipErrorNotSupported, #hipErrorInvalidValue + * + */ +hipError_t hipTexObjectGetResourceDesc(HIP_RESOURCE_DESC* pResDesc, hipTextureObject_t texObject); + +/** + * @brief Gets resource view descriptor of a texture object. + * + * @param [out] pResViewDesc pointer to resource view descriptor + * @param [in] texObject texture object + * + * @returns #hipSuccess, #hipErrorNotSupported, #hipErrorInvalidValue + * + */ +hipError_t hipTexObjectGetResourceViewDesc(HIP_RESOURCE_VIEW_DESC* pResViewDesc, + hipTextureObject_t texObject); + +/** + * @brief Gets texture descriptor of a texture object. + * + * @param [out] pTexDesc pointer to texture descriptor + * @param [in] texObject texture object + * + * @returns #hipSuccess, #hipErrorNotSupported, #hipErrorInvalidValue + * + */ +hipError_t hipTexObjectGetTextureDesc(HIP_TEXTURE_DESC* pTexDesc, hipTextureObject_t texObject); + +/** + * @brief Allocate a mipmapped array on the device. + * + * @param[out] mipmappedArray - Pointer to allocated mipmapped array in device memory + * @param[in] desc - Requested channel format + * @param[in] extent - Requested allocation size (width field in elements) + * @param[in] numLevels - Number of mipmap levels to allocate + * @param[in] flags - Flags for extensions + * + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipMallocMipmappedArray(hipMipmappedArray_t* mipmappedArray, + const struct hipChannelFormatDesc* desc, struct hipExtent extent, + unsigned int numLevels, unsigned int flags __dparm(0)); + +/** + * @brief Frees a mipmapped array on the device. + * + * @param[in] mipmappedArray - Pointer to mipmapped array to free + * + * @return #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipFreeMipmappedArray(hipMipmappedArray_t mipmappedArray); + +/** + * @brief Gets a mipmap level of a HIP mipmapped array. + * + * @param[out] levelArray - Returned mipmap level HIP array + * @param[in] mipmappedArray - HIP mipmapped array + * @param[in] level - Mipmap level + * + * @return #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipGetMipmappedArrayLevel(hipArray_t* levelArray, + hipMipmappedArray_const_t mipmappedArray, unsigned int level); + +/** + * @brief Create a mipmapped array. + * + * @param [out] pHandle pointer to mipmapped array + * @param [in] pMipmappedArrayDesc mipmapped array descriptor + * @param [in] numMipmapLevels mipmap level + * + * @returns #hipSuccess, #hipErrorNotSupported, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMipmappedArrayCreate(hipMipmappedArray_t* pHandle, + HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, + unsigned int numMipmapLevels); + +/** + * @brief Destroy a mipmapped array. + * + * @param [out] hMipmappedArray pointer to mipmapped array to destroy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipMipmappedArrayDestroy(hipMipmappedArray_t hMipmappedArray); + +/** + * @brief Get a mipmapped array on a mipmapped level. + * + * @param [in] pLevelArray Pointer of array + * @param [out] hMipMappedArray Pointer of mipmapped array on the requested mipmap level + * @param [out] level Mipmap level + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipMipmappedArrayGetLevel(hipArray_t* pLevelArray, hipMipmappedArray_t hMipMappedArray, + unsigned int level); + +/** + * + * @addtogroup TextureD Texture Management [Deprecated] + * @{ + * @ingroup Texture + * This section describes the deprecated texture management functions of HIP runtime API. + */ + +/** + * @brief Binds a mipmapped array to a texture [Deprecated] + * + * @param [in] tex pointer to the texture reference to bind + * @param [in] mipmappedArray memory mipmapped array on the device + * @param [in] desc opointer to the channel format + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipBindTextureToMipmappedArray(const textureReference* tex, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc); + +/** + * @brief Gets the texture reference related with the symbol [Deprecated] + * + * @param [out] texref texture reference + * @param [in] symbol pointer to the symbol related with the texture for the reference + * + * @returns #hipSuccess, #hipErrorInvalidValue + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol); + +/** + * @brief Gets the border color used by a texture reference [Deprecated] + * + * @param [out] pBorderColor Returned Type and Value of RGBA color. + * @param [in] texRef Texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetBorderColor(float* pBorderColor, const textureReference* texRef); + +/** + * @brief Gets the array bound to a texture reference [Deprecated] + + * + * @param [in] pArray Returned array. + * @param [in] texRef texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetArray(hipArray_t* pArray, const textureReference* texRef); + +/** + * @brief Sets address mode for a texture reference [Deprecated] + * + * @param [in] texRef texture reference. + * @param [in] dim Dimension of the texture. + * @param [in] am Value of the texture address mode. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetAddressMode(textureReference* texRef, int dim, + enum hipTextureAddressMode am); +/** + * @brief Binds an array as a texture reference [Deprecated] + * + * @param [in] tex Pointer texture reference. + * @param [in] array Array to bind. + * @param [in] flags Flags should be set as HIP_TRSA_OVERRIDE_FORMAT, as a valid value. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags); +/** + * @brief Set filter mode for a texture reference [Deprecated] + * + * @param [in] texRef Pointer texture reference. + * @param [in] fm Value of texture filter mode. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetFilterMode(textureReference* texRef, enum hipTextureFilterMode fm); +/** + * @brief Set flags for a texture reference [Deprecated] + * + * @param [in] texRef Pointer texture reference. + * @param [in] Flags Value of flags. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetFlags(textureReference* texRef, unsigned int Flags); +/** + * @brief Set format for a texture reference [Deprecated] + * + * @param [in] texRef Pointer texture reference. + * @param [in] fmt Value of format. + * @param [in] NumPackedComponents Number of components per array. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetFormat(textureReference* texRef, hipArray_Format fmt, + int NumPackedComponents); +/** + * @brief Binds a memory area to a texture [Deprecated] + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of memory on the device. + * @param [in] desc Pointer of channel format descriptor. + * @param [in] size Size of memory in bites. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipBindTexture(size_t* offset, const textureReference* tex, const void* devPtr, + const hipChannelFormatDesc* desc, size_t size __dparm(UINT_MAX)); +/** + * @brief Binds a 2D memory area to a texture [Deprecated] + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of 2D memory area on the device. + * @param [in] desc Pointer of channel format descriptor. + * @param [in] width Width in texel units. + * @param [in] height Height in texel units. + * @param [in] pitch Pitch in bytes. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipBindTexture2D(size_t* offset, const textureReference* tex, const void* devPtr, + const hipChannelFormatDesc* desc, size_t width, size_t height, + size_t pitch); +/** + * @brief Binds a memory area to a texture [Deprecated] + * + * @param [in] tex Pointer of texture reference. + * @param [in] array Array to bind. + * @param [in] desc Pointer of channel format descriptor. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipBindTextureToArray(const textureReference* tex, hipArray_const_t array, + const hipChannelFormatDesc* desc); +/** + * @brief Get the offset of the alignment in a texture [Deprecated] + * + * @param [in] offset Offset in bytes. + * @param [in] texref Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref); +/** + * @brief Unbinds a texture [Deprecated] + * + * @param [in] tex Texture to unbind. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipUnbindTexture(const textureReference* tex); +/** + * @brief Gets the address for a texture reference [Deprecated] + * + * @param [out] dev_ptr Pointer of device address. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, const textureReference* texRef); +/** + * @brief Gets the address mode for a texture reference [Deprecated] + * + * @param [out] pam Pointer of address mode. + * @param [in] texRef Pointer of texture reference. + * @param [in] dim Dimension. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetAddressMode(enum hipTextureAddressMode* pam, const textureReference* texRef, + int dim); +/** + * @brief Gets filter mode for a texture reference [Deprecated] + * + * @param [out] pfm Pointer of filter mode. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetFilterMode(enum hipTextureFilterMode* pfm, const textureReference* texRef); +/** + * @brief Gets flags for a texture reference [Deprecated] + * + * @param [out] pFlags Pointer of flags. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetFlags(unsigned int* pFlags, const textureReference* texRef); +/** + * @brief Gets texture format for a texture reference [Deprecated] + * + * @param [out] pFormat Pointer of the format. + * @param [out] pNumChannels Pointer of number of channels. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetFormat(hipArray_Format* pFormat, int* pNumChannels, + const textureReference* texRef); +/** + * @brief Gets the maximum anisotropy for a texture reference [Deprecated] + * + * @param [out] pmaxAnsio Pointer of the maximum anisotropy. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetMaxAnisotropy(int* pmaxAnsio, const textureReference* texRef); +/** + * @brief Gets the mipmap filter mode for a texture reference [Deprecated] + * + * @param [out] pfm Pointer of the mipmap filter mode. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetMipmapFilterMode(enum hipTextureFilterMode* pfm, + const textureReference* texRef); +/** + * @brief Gets the mipmap level bias for a texture reference [Deprecated] + * + * @param [out] pbias Pointer of the mipmap level bias. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetMipmapLevelBias(float* pbias, const textureReference* texRef); +/** + * @brief Gets the minimum and maximum mipmap level clamps for a texture reference [Deprecated] + * + * @param [out] pminMipmapLevelClamp Pointer of the minimum mipmap level clamp. + * @param [out] pmaxMipmapLevelClamp Pointer of the maximum mipmap level clamp. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, float* pmaxMipmapLevelClamp, + const textureReference* texRef); +/** + * @brief Gets the mipmapped array bound to a texture reference [Deprecated] + * + * @param [out] pArray Pointer of the mipmapped array. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetMipMappedArray(hipMipmappedArray_t* pArray, const textureReference* texRef); +/** + * @brief Sets an bound address for a texture reference [Deprecated] + * + * @param [out] ByteOffset Pointer of the offset in bytes. + * @param [in] texRef Pointer of texture reference. + * @param [in] dptr Pointer of device address to bind. + * @param [in] bytes Size in bytes. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetAddress(size_t* ByteOffset, textureReference* texRef, hipDeviceptr_t dptr, + size_t bytes); +/** + * @brief Set a bind an address as a 2D texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [in] desc Pointer of array descriptor. + * @param [in] dptr Pointer of device address to bind. + * @param [in] Pitch Pitch in bytes. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetAddress2D(textureReference* texRef, const HIP_ARRAY_DESCRIPTOR* desc, + hipDeviceptr_t dptr, size_t Pitch); +/** + * @brief Sets the maximum anisotropy for a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [out] maxAniso Value of the maximum anisotropy. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetMaxAnisotropy(textureReference* texRef, unsigned int maxAniso); +/** + * @brief Sets border color for a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [in] pBorderColor Pointer of border color. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetBorderColor(textureReference* texRef, float* pBorderColor); +/** + * @brief Sets mipmap filter mode for a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [in] fm Value of filter mode. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetMipmapFilterMode(textureReference* texRef, enum hipTextureFilterMode fm); +/** + * @brief Sets mipmap level bias for a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [in] bias Value of mipmap bias. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetMipmapLevelBias(textureReference* texRef, float bias); +/** + * @brief Sets mipmap level clamp for a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [in] minMipMapLevelClamp Value of minimum mipmap level clamp. + * @param [in] maxMipMapLevelClamp Value of maximum mipmap level clamp. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetMipmapLevelClamp(textureReference* texRef, float minMipMapLevelClamp, + float maxMipMapLevelClamp); +/** + * @brief Binds mipmapped array to a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference to bind. + * @param [in] mipmappedArray Pointer of mipmapped array to bind. + * @param [in] Flags Flags should be set as HIP_TRSA_OVERRIDE_FORMAT, as a valid value. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetMipmappedArray(textureReference* texRef, + struct hipMipmappedArray* mipmappedArray, unsigned int Flags); + +// doxygen end deprecated texture management +/** + * @} + */ + +// doxygen end Texture management +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Runtime Runtime Compilation + * @{ + * This section describes the runtime compilation functions of HIP runtime API. + * + */ +// This group is for HIPrtc + +// doxygen end Runtime +/** + * @} + */ + +/** + * + * @defgroup Callback Callback Activity APIs + * @{ + * This section describes the callback/Activity of HIP runtime API. + */ +/** + * @brief Returns HIP API name by ID. + * + * @param [in] id ID of HIP API + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +const char* hipApiName(uint32_t id); +/** + * @brief Returns kernel name reference by function name. + * + * @param [in] f Name of function + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +const char* hipKernelNameRef(const hipFunction_t f); +/** + * @brief Retrives kernel for a given host pointer, unless stated otherwise. + * + * @param [in] hostFunction Pointer of host function. + * @param [in] stream Stream the kernel is executed on. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +const char* hipKernelNameRefByPtr(const void* hostFunction, hipStream_t stream); +/** + * @brief Returns device ID on the stream. + * + * @param [in] stream Stream of device executed on. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +int hipGetStreamDeviceId(hipStream_t stream); + +// doxygen end Callback +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Graph Graph Management + * @{ + * This section describes the graph management types & functions of HIP runtime API. + */ + +/** + * @brief Begins graph capture on a stream. + * + * @param [in] stream - Stream to initiate capture. + * @param [in] mode - Controls the interaction of this capture sequence with other API calls that + * are not safe. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipStreamBeginCapture(hipStream_t stream, hipStreamCaptureMode mode); + +/** +* @brief Begins graph capture on a stream to an existing graph. +* +* @param [in] stream - Stream to initiate capture. +* @param [in] graph - Graph to capture into. +* @param [in] dependencies - Dependencies of the first node captured in the stream. Can be NULL if +* numDependencies is 0. +* @param [in] dependencyData - Optional array of data associated with each dependency. +* @param [in] numDependencies - Number of dependencies. +* @param [in] mode - Controls the interaction of this capture sequence with other API calls that +are not safe. +* +* @returns #hipSuccess, #hipErrorInvalidValue +* +* @warning param "const hipGraphEdgeData* dependencyData" is currently not supported and has to be +passed as nullptr. This API is marked as beta, meaning, while this is feature complete, it is still +open to changes and may have outstanding issues. +* +*/ +hipError_t hipStreamBeginCaptureToGraph(hipStream_t stream, hipGraph_t graph, + const hipGraphNode_t* dependencies, + const hipGraphEdgeData* dependencyData, + size_t numDependencies, hipStreamCaptureMode mode); + +/** + * @brief Ends capture on a stream, returning the captured graph. + * + * @param [in] stream - Stream to end capture. + * @param [out] pGraph - Captured graph. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipStreamEndCapture(hipStream_t stream, hipGraph_t* pGraph); + +/** + * @brief Get capture status of a stream. + * + * @param [in] stream - Stream of which to get capture status from. + * @param [out] pCaptureStatus - Returns current capture status. + * @param [out] pId - Unique capture ID. + * + * @returns #hipSuccess, #hipErrorStreamCaptureImplicit + * + */ +hipError_t hipStreamGetCaptureInfo(hipStream_t stream, hipStreamCaptureStatus* pCaptureStatus, + unsigned long long* pId); + +/** + * @brief Get stream's capture state + * + * @param [in] stream - Stream of which to get capture status from. + * @param [out] captureStatus_out - Returns current capture status. + * @param [out] id_out - Unique capture ID. + * @param [out] graph_out - Returns the graph being captured into. + * @param [out] dependencies_out - Pointer to an array of nodes representing the graphs + * dependencies. + * @param [out] numDependencies_out - Returns size of the array returned in dependencies_out. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorStreamCaptureImplicit + * + */ +hipError_t hipStreamGetCaptureInfo_v2(hipStream_t stream, hipStreamCaptureStatus* captureStatus_out, + unsigned long long* id_out __dparm(0), + hipGraph_t* graph_out __dparm(0), + const hipGraphNode_t** dependencies_out __dparm(0), + size_t* numDependencies_out __dparm(0)); + +/** + * @brief Get stream's capture state + * + * @param [in] stream - Stream of which to get capture status from. + * @param [out] pCaptureStatus - Returns current capture status. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorStreamCaptureImplicit + * + */ +hipError_t hipStreamIsCapturing(hipStream_t stream, hipStreamCaptureStatus* pCaptureStatus); + +/** + * @brief Update the set of dependencies in a capturing stream + * + * @param [in] stream Stream that is being captured. + * @param [in] dependencies Pointer to an array of nodes to add/replace. + * @param [in] numDependencies Size of the dependencies array. + * @param [in] flags Flag to update dependency set. Should be one of the values + * in enum #hipStreamUpdateCaptureDependenciesFlags. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorIllegalState + * + */ +hipError_t hipStreamUpdateCaptureDependencies(hipStream_t stream, hipGraphNode_t* dependencies, + size_t numDependencies, + unsigned int flags __dparm(0)); + +/** + * @brief Swaps the stream capture mode of a thread. + * + * @param [in] mode - Pointer to mode value to swap with the current mode. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipThreadExchangeStreamCaptureMode(hipStreamCaptureMode* mode); + +/** + * @brief Creates a graph + * + * @param [out] pGraph - pointer to graph to create. + * @param [in] flags - flags for graph creation, must be 0. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation + * + */ +hipError_t hipGraphCreate(hipGraph_t* pGraph, unsigned int flags); + +/** + * @brief Destroys a graph + * + * @param [in] graph - instance of graph to destroy. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphDestroy(hipGraph_t graph); + +/** + * @brief Adds dependency edges to a graph. + * + * @param [in] graph - Instance of the graph to add dependencies to. + * @param [in] from - Pointer to the graph nodes with dependencies to add from. + * @param [in] to - Pointer to the graph nodes to add dependencies to. + * @param [in] numDependencies - Number of dependencies to add. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t* from, + const hipGraphNode_t* to, size_t numDependencies); + +/** + * @brief Removes dependency edges from a graph. + * + * @param [in] graph - Instance of the graph to remove dependencies from. + * @param [in] from - Array of nodes that provide the dependencies. + * @param [in] to - Array of dependent nodes. + * @param [in] numDependencies - Number of dependencies to remove. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphRemoveDependencies(hipGraph_t graph, const hipGraphNode_t* from, + const hipGraphNode_t* to, size_t numDependencies); + +/** + * @brief Returns a graph's dependency edges. + * + * @param [in] graph - Instance of the graph to get the edges from. + * @param [out] from - Pointer to the graph nodes to return edge endpoints. + * @param [out] to - Pointer to the graph nodes to return edge endpoints. + * @param [out] numEdges - Returns number of edges. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * from and to may both be NULL, in which case this function only returns the number of edges in + * numEdges. Otherwise, numEdges entries will be filled in. If numEdges is higher than the actual + * number of edges, the remaining entries in from and to will be set to NULL, and the number of + * edges actually returned will be written to numEdges. + * + */ +hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t* from, hipGraphNode_t* to, + size_t* numEdges); + +/** + * @brief Returns a graph's nodes. + * + * @param [in] graph - Instance of graph to get the nodes from. + * @param [out] nodes - Pointer to return the graph nodes. + * @param [out] numNodes - Returns the number of graph nodes. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * nodes may be NULL, in which case this function will return the number of nodes in numNodes. + * Otherwise, numNodes entries will be filled in. If numNodes is higher than the actual number of + * nodes, the remaining entries in nodes will be set to NULL, and the number of nodes actually + * obtained will be returned in numNodes. + * + */ +hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t* nodes, size_t* numNodes); + +/** + * @brief Returns a graph's root nodes. + * + * @param [in] graph - Instance of the graph to get the nodes from. + * @param [out] pRootNodes - Pointer to return the graph's root nodes. + * @param [out] pNumRootNodes - Returns the number of graph's root nodes. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * pRootNodes may be NULL, in which case this function will return the number of root nodes in + * pNumRootNodes. Otherwise, pNumRootNodes entries will be filled in. If pNumRootNodes is higher + * than the actual number of root nodes, the remaining entries in pRootNodes will be set to NULL, + * and the number of nodes actually obtained will be returned in pNumRootNodes. + * + */ +hipError_t hipGraphGetRootNodes(hipGraph_t graph, hipGraphNode_t* pRootNodes, + size_t* pNumRootNodes); + +/** + * @brief Returns a node's dependencies. + * + * @param [in] node - Graph node to get the dependencies from. + * @param [out] pDependencies - Pointer to return the dependencies. + * @param [out] pNumDependencies - Returns the number of graph node dependencies. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * pDependencies may be NULL, in which case this function will return the number of dependencies in + * pNumDependencies. Otherwise, pNumDependencies entries will be filled in. If pNumDependencies is + * higher than the actual number of dependencies, the remaining entries in pDependencies will be set + * to NULL, and the number of nodes actually obtained will be returned in pNumDependencies. + * + */ +hipError_t hipGraphNodeGetDependencies(hipGraphNode_t node, hipGraphNode_t* pDependencies, + size_t* pNumDependencies); + +/** + * @brief Returns a node's dependent nodes. + * + * @param [in] node - Graph node to get the dependent nodes from. + * @param [out] pDependentNodes - Pointer to return the graph dependent nodes. + * @param [out] pNumDependentNodes - Returns the number of graph node dependent nodes. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * pDependentNodes may be NULL, in which case this function will return the number of dependent + * nodes in pNumDependentNodes. Otherwise, pNumDependentNodes entries will be filled in. If + * pNumDependentNodes is higher than the actual number of dependent nodes, the remaining entries in + * pDependentNodes will be set to NULL, and the number of nodes actually obtained will be returned + * in pNumDependentNodes. + * + */ +hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, hipGraphNode_t* pDependentNodes, + size_t* pNumDependentNodes); + +/** + * @brief Returns a node's type. + * + * @param [in] node - Node to get type of. + * @param [out] pType - Returns the node's type. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphNodeGetType(hipGraphNode_t node, hipGraphNodeType* pType); + +/** + * @brief Remove a node from the graph. + * + * @param [in] node - graph node to remove + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphDestroyNode(hipGraphNode_t node); + +/** + * @brief Clones a graph. + * + * @param [out] pGraphClone - Returns newly created cloned graph. + * @param [in] originalGraph - original graph to clone from. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation + * + */ +hipError_t hipGraphClone(hipGraph_t* pGraphClone, hipGraph_t originalGraph); + +/** + * @brief Finds a cloned version of a node. + * + * @param [out] pNode - Returns the cloned node. + * @param [in] originalNode - original node handle. + * @param [in] clonedGraph - Cloned graph to query. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphNodeFindInClone(hipGraphNode_t* pNode, hipGraphNode_t originalNode, + hipGraph_t clonedGraph); + +/** + * @brief Creates an executable graph from a graph + * + * @param [out] pGraphExec - Pointer to instantiated executable graph. + * @param [in] graph - Instance of graph to instantiate. + * @param [out] pErrorNode - Pointer to error node. In case an error occured during + * graph instantiation, it could modify the corresponding node. + * @param [out] pLogBuffer - Pointer to log buffer. + * @param [out] bufferSize - Size of the log buffer. + * + * @returns #hipSuccess, #hipErrorOutOfMemory + * + */ +hipError_t hipGraphInstantiate(hipGraphExec_t* pGraphExec, hipGraph_t graph, + hipGraphNode_t* pErrorNode, char* pLogBuffer, size_t bufferSize); + +/** + * @brief Creates an executable graph from a graph. + * + * @param [out] pGraphExec - Pointer to instantiated executable graph. + * @param [in] graph - Instance of graph to instantiate. + * @param [in] flags - Flags to control instantiation. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API does not support any of flag and is behaving as hipGraphInstantiate. + */ +hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t graph, + unsigned long long flags); + +/** + * @brief Creates an executable graph from a graph. + * + * @param [out] pGraphExec - Pointer to instantiated executable graph. + * @param [in] graph - Instance of graph to instantiate. + * @param [in] instantiateParams - Graph instantiation Params + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphInstantiateWithParams(hipGraphExec_t* pGraphExec, hipGraph_t graph, + hipGraphInstantiateParams* instantiateParams); +/** + * @brief Launches an executable graph in the specified stream. + * + * @param [in] graphExec - Instance of executable graph to launch. + * @param [in] stream - Instance of stream in which to launch executable graph. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream); + +/** + * @brief Uploads an executable graph to a stream + * + * @param [in] graphExec - Instance of executable graph to be uploaded. + * @param [in] stream - Instance of stream to which the executable graph is uploaded to. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphUpload(hipGraphExec_t graphExec, hipStream_t stream); + +/** + * @brief Creates a kernel execution node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to kernel graph node that is created. + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - Pointer to the dependencies on the kernel execution node. + * @param [in] numDependencies - Number of dependencies. + * @param [in] nodeParams - Pointer to the node parameters. + * @returns #hipSuccess, #hipErrorInvalidValue. + * + */ +hipError_t hipGraphAddNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipGraphNodeParams* nodeParams); + +/** + * @brief Return the flags of an executable graph. + * + * @param [in] graphExec - Executable graph to get the flags from. + * @param [out] flags - Flags used to instantiate this executable graph. + * @returns #hipSuccess, #hipErrorInvalidValue. + * + */ +hipError_t hipGraphExecGetFlags(hipGraphExec_t graphExec, unsigned long long* flags); + +/** + * @brief Updates parameters of a graph's node. + * + * @param [in] node - Instance of the node to set parameters for. + * @param [in] nodeParams - Pointer to the parameters to be set. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction, + * #hipErrorNotSupported. + * + */ +hipError_t hipGraphNodeSetParams(hipGraphNode_t node, hipGraphNodeParams* nodeParams); + +/** + * @brief Updates parameters of an executable graph's node. + * + * @param [in] graphExec - Instance of the executable graph. + * @param [in] node - Instance of the node to set parameters to. + * @param [in] nodeParams - Pointer to the parameters to be set. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction, + * #hipErrorNotSupported. + * + */ +hipError_t hipGraphExecNodeSetParams(hipGraphExec_t graphExec, hipGraphNode_t node, + hipGraphNodeParams* nodeParams); + +/** + * @brief Destroys an executable graph + * + * @param [in] graphExec - Instance of executable graph to destroy. + * + * @returns #hipSuccess. + * + */ +hipError_t hipGraphExecDestroy(hipGraphExec_t graphExec); + +// Check whether an executable graph can be updated with a graph and perform the update if possible. +/** + * @brief Check whether an executable graph can be updated with a graph and perform the update if * + * possible. + * + * @param [in] hGraphExec - instance of executable graph to update. + * @param [in] hGraph - graph that contains the updated parameters. + * @param [in] hErrorNode_out - node which caused the permissibility check to forbid the update. + * @param [in] updateResult_out - Return code whether the graph update was performed. + * @returns #hipSuccess, #hipErrorGraphExecUpdateFailure + * + */ +hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, + hipGraphNode_t* hErrorNode_out, + hipGraphExecUpdateResult* updateResult_out); + +/** + * @brief Creates a kernel execution node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - Pointer to the dependencies of the kernel execution node. + * @param [in] numDependencies - The number of the dependencies. + * @param [in] pNodeParams - Pointer to the parameters of the kernel execution node. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction + * + */ +hipError_t hipGraphAddKernelNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipKernelNodeParams* pNodeParams); + +/** + * @brief Gets kernel node's parameters. + * + * @param [in] node - instance of the node to get parameters from. + * @param [out] pNodeParams - pointer to the parameters + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, hipKernelNodeParams* pNodeParams); + +/** + * @brief Sets a kernel node's parameters. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - const pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphKernelNodeSetParams(hipGraphNode_t node, const hipKernelNodeParams* pNodeParams); + +/** + * @brief Sets the parameters for a kernel node in the given graphExec. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - const pointer to the kernel node parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + const hipKernelNodeParams* pNodeParams); + +/** + * @brief Creates a memcpy node and adds it to a graph. + * + * @param [out] phGraphNode - Pointer to graph node that is created. + * @param [in] hGraph - Instance of graph to add the created node to. + * @param [in] dependencies - const pointer to the dependencies of the memcpy execution node. + * @param [in] numDependencies - The number of dependencies. + * @param [in] copyParams - const pointer to the parameters for the memory copy. + * @param [in] ctx - context related to current device. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphAddMemcpyNode(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, + const hipGraphNode_t* dependencies, size_t numDependencies, + const HIP_MEMCPY3D* copyParams, hipCtx_t ctx); +/** + * @brief Creates a memcpy node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies of the memcpy execution node. + * @param [in] numDependencies - The number of dependencies. + * @param [in] pCopyParams - const pointer to the parameters for the memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemcpyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipMemcpy3DParms* pCopyParams); +/** + * @brief Gets a memcpy node's parameters. + * + * @param [in] node - instance of the node to get parameters from. + * @param [out] pNodeParams - pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemcpyNodeGetParams(hipGraphNode_t node, hipMemcpy3DParms* pNodeParams); + +/** + * @brief Sets a memcpy node's parameters. + * + * @param [in] node - instance of the node to set parameters to. + * @param [in] pNodeParams - const pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemcpyNodeSetParams(hipGraphNode_t node, const hipMemcpy3DParms* pNodeParams); + +/** + * @brief Sets a node's attribute. + * + * @param [in] hNode - Instance of the node to set parameters of. + * @param [in] attr - The attribute type to be set. + * @param [in] value - const pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphKernelNodeSetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, + const hipKernelNodeAttrValue* value); +/** + * @brief Gets a node's attribute. + * + * @param [in] hNode - Instance of the node to set parameters of. + * @param [in] attr - The attribute type to be set. + * @param [in] value - const pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphKernelNodeGetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, + hipKernelNodeAttrValue* value); +/** + * @brief Sets the parameters of a memcpy node in the given graphExec. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - const pointer to the kernel node parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + hipMemcpy3DParms* pNodeParams); + +/** + * @brief Creates a 1D memcpy node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies of the memcpy execution node. + * @param [in] numDependencies - The number of dependencies. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] src - Pointer to memory address of the source. + * @param [in] count - Size of the memory to copy. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemcpyNode1D(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + void* dst, const void* src, size_t count, hipMemcpyKind kind); + +/** + * @brief Sets a memcpy node's parameters to perform a 1-dimensional copy. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] src - Pointer to memory address of the source. + * @param [in] count - Size of the memory to copy. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void* dst, const void* src, + size_t count, hipMemcpyKind kind); + +/** + * @brief Sets the parameters for a memcpy node in the given graphExec to perform a 1-dimensional + * copy. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] src - Pointer to memory address of the source. + * @param [in] count - Size of the memory to copy. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipGraphNode_t node, + void* dst, const void* src, size_t count, + hipMemcpyKind kind); + +/** + * @brief Creates a memcpy node to copy from a symbol on the device and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies of the memcpy execution node. + * @param [in] numDependencies - Number of the dependencies. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] symbol - Device symbol address. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemcpyNodeFromSymbol(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, void* dst, const void* symbol, + size_t count, size_t offset, hipMemcpyKind kind); + +/** + * @brief Sets a memcpy node's parameters to copy from a symbol on the device. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] symbol - Device symbol address. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemcpyNodeSetParamsFromSymbol(hipGraphNode_t node, void* dst, const void* symbol, + size_t count, size_t offset, hipMemcpyKind kind); + +/** + * @brief Sets the parameters for a memcpy node in the given graphExec to copy from a symbol on the + * * device. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] symbol - Device symbol address. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol(hipGraphExec_t hGraphExec, hipGraphNode_t node, + void* dst, const void* symbol, size_t count, + size_t offset, hipMemcpyKind kind); + +/** + * @brief Creates a memcpy node to copy to a symbol on the device and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies on the memcpy execution node. + * @param [in] numDependencies - Number of dependencies. + * @param [in] symbol - Device symbol address. + * @param [in] src - Pointer to memory address of the src. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemcpyNodeToSymbol(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, const void* symbol, + const void* src, size_t count, size_t offset, + hipMemcpyKind kind); + +/** + * @brief Sets a memcpy node's parameters to copy to a symbol on the device. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] symbol - Device symbol address. + * @param [in] src - Pointer to memory address of the src. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemcpyNodeSetParamsToSymbol(hipGraphNode_t node, const void* symbol, + const void* src, size_t count, size_t offset, + hipMemcpyKind kind); + + +/** + * @brief Sets the parameters for a memcpy node in the given graphExec to copy to a symbol on the + * device. + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] symbol - Device symbol address. + * @param [in] src - Pointer to memory address of the src. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol(hipGraphExec_t hGraphExec, hipGraphNode_t node, + const void* symbol, const void* src, + size_t count, size_t offset, hipMemcpyKind kind); + +/** + * @brief Creates a memset node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies on the memset execution node. + * @param [in] numDependencies - Number of dependencies. + * @param [in] pMemsetParams - const pointer to the parameters for the memory set. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemsetNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipMemsetParams* pMemsetParams); + +/** + * @brief Gets a memset node's parameters. + * + * @param [in] node - Instance of the node to get parameters of. + * @param [out] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipMemsetParams* pNodeParams); + +/** + * @brief Sets a memset node's parameters. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemsetNodeSetParams(hipGraphNode_t node, const hipMemsetParams* pNodeParams); + +/** + * @brief Sets the parameters for a memset node in the given graphExec. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + const hipMemsetParams* pNodeParams); + +/** + * @brief Creates a host execution node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies of the memset execution node. + * @param [in] numDependencies - Number of dependencies. + * @param [in] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddHostNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipHostNodeParams* pNodeParams); + +/** + * @brief Returns a host node's parameters. + * + * @param [in] node - Instance of the node to get parameters of. + * @param [out] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, hipHostNodeParams* pNodeParams); + +/** + * @brief Sets a host node's parameters. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphHostNodeSetParams(hipGraphNode_t node, const hipHostNodeParams* pNodeParams); + +/** + * @brief Sets the parameters for a host node in the given graphExec. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + const hipHostNodeParams* pNodeParams); + +/** + * @brief Creates a child graph node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph to add the created node. + * @param [in] pDependencies - const pointer to the dependencies of the memset execution node. + * @param [in] numDependencies - Number of dependencies. + * @param [in] childGraph - Graph to clone into this node + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddChildGraphNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipGraph_t childGraph); + +/** + * @brief Gets a handle to the embedded graph of a child graph node. + * + * @param [in] node - Instance of the node to get child graph of. + * @param [out] pGraph - Pointer to get the graph. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphChildGraphNodeGetGraph(hipGraphNode_t node, hipGraph_t* pGraph); + +/** + * @brief Updates node parameters in the child graph node in the given graphExec. + * + * @param [in] hGraphExec - instance of the executable graph with the node. + * @param [in] node - node from the graph which was used to instantiate graphExec. + * @param [in] childGraph - child graph with updated parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + hipGraph_t childGraph); + +/** + * @brief Creates an empty node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph the node is added to. + * @param [in] pDependencies - const pointer to the node dependencies. + * @param [in] numDependencies - Number of dependencies. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddEmptyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies); + + +/** + * @brief Creates an event record node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph the node is added to. + * @param [in] pDependencies - const pointer to the node dependencies. + * @param [in] numDependencies - Number of dependencies. + * @param [in] event - Event of the node. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddEventRecordNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipEvent_t event); + +/** + * @brief Returns the event associated with an event record node. + * + * @param [in] node - Instance of the node to get event of. + * @param [out] event_out - Pointer to return the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out); + +/** + * @brief Sets an event record node's event. + * + * @param [in] node - Instance of the node to set event to. + * @param [in] event - Pointer to the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, hipEvent_t event); + +/** + * @brief Sets the event for an event record node in the given graphExec. + * + * @param [in] hGraphExec - instance of the executable graph with the node. + * @param [in] hNode - node from the graph which was used to instantiate graphExec. + * @param [in] event - pointer to the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecEventRecordNodeSetEvent(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + hipEvent_t event); + +/** + * @brief Creates an event wait node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph the node to be added. + * @param [in] pDependencies - const pointer to the node dependencies. + * @param [in] numDependencies - Number of dependencies. + * @param [in] event - Event for the node. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddEventWaitNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipEvent_t event); + + +/** + * @brief Returns the event associated with an event wait node. + * + * @param [in] node - Instance of the node to get event of. + * @param [out] event_out - Pointer to return the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out); + +/** + * @brief Sets an event wait node's event. + * + * @param [in] node - Instance of the node to set event of. + * @param [in] event - Pointer to the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphEventWaitNodeSetEvent(hipGraphNode_t node, hipEvent_t event); + +/** + * @brief Sets the event for an event record node in the given graphExec. + * + * @param [in] hGraphExec - instance of the executable graph with the node. + * @param [in] hNode - node from the graph which was used to instantiate graphExec. + * @param [in] event - pointer to the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecEventWaitNodeSetEvent(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + hipEvent_t event); + +/** + * @brief Creates a memory allocation node and adds it to a graph + * + * @param [out] pGraphNode - Pointer to the graph node to create and add to the graph + * @param [in] graph - Instance of the graph node to be added + * @param [in] pDependencies - Const pointer to the node dependencies + * @param [in] numDependencies - The number of dependencies + * @param [in, out] pNodeParams - Node parameters for memory allocation, returns a pointer to the + * allocated memory. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemAllocNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipMemAllocNodeParams* pNodeParams); + +/** + * @brief Returns parameters for memory allocation node + * + * @param [in] node - Memory allocation node to query + * @param [out] pNodeParams - Parameters for the specified memory allocation node + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemAllocNodeGetParams(hipGraphNode_t node, hipMemAllocNodeParams* pNodeParams); + +/** + * @brief Creates a memory free node and adds it to a graph + * + * @param [out] pGraphNode - Pointer to the graph node to create and add to the graph + * @param [in] graph - Instance of the graph node to be added + * @param [in] pDependencies - Const pointer to the node dependencies + * @param [in] numDependencies - The number of dependencies + * @param [in] dev_ptr - Pointer to the memory to be freed + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemFreeNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + void* dev_ptr); + +/** + * @brief Returns parameters for memory free node + * + * @param [in] node - Memory free node to query + * @param [out] dev_ptr - Device pointer of the specified memory free node + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemFreeNodeGetParams(hipGraphNode_t node, void* dev_ptr); + +/** + * @brief Get the mem attribute for graphs. + * + * @param [in] device - Device to get attributes from + * @param [in] attr - Attribute type to be queried + * @param [out] value - Value of the queried attribute + * @returns #hipSuccess, #hipErrorInvalidDevice + * + */ +hipError_t hipDeviceGetGraphMemAttribute(int device, hipGraphMemAttributeType attr, void* value); + +/** + * @brief Set the mem attribute for graphs. + * + * @param [in] device - Device to set attribute of. + * @param [in] attr - Attribute type to be set. + * @param [in] value - Value of the attribute. + * @returns #hipSuccess, #hipErrorInvalidDevice + * + */ +hipError_t hipDeviceSetGraphMemAttribute(int device, hipGraphMemAttributeType attr, void* value); + +/** + * @brief Free unused memory reserved for graphs on a specific device and return it back to the OS. + * + * @param [in] device - Device for which memory should be trimmed + * @returns #hipSuccess, #hipErrorInvalidDevice + * + */ +hipError_t hipDeviceGraphMemTrim(int device); + +/** + * @brief Create an instance of userObject to manage lifetime of a resource. + * + * @param [out] object_out - pointer to instace of userobj. + * @param [in] ptr - pointer to pass to destroy function. + * @param [in] destroy - destroy callback to remove resource. + * @param [in] initialRefcount - reference to resource. + * @param [in] flags - flags passed to API. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipUserObjectCreate(hipUserObject_t* object_out, void* ptr, hipHostFn_t destroy, + unsigned int initialRefcount, unsigned int flags); + +/** + * @brief Release number of references to resource. + * + * @param [in] object - pointer to instace of userobj. + * @param [in] count - reference to resource to be retained. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipUserObjectRelease(hipUserObject_t object, unsigned int count __dparm(1)); + +/** + * @brief Retain number of references to resource. + * + * @param [in] object - pointer to instace of userobj. + * @param [in] count - reference to resource to be retained. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipUserObjectRetain(hipUserObject_t object, unsigned int count __dparm(1)); + +/** + * @brief Retain user object for graphs. + * + * @param [in] graph - pointer to graph to retain the user object for. + * @param [in] object - pointer to instace of userobj. + * @param [in] count - reference to resource to be retained. + * @param [in] flags - flags passed to API. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphRetainUserObject(hipGraph_t graph, hipUserObject_t object, + unsigned int count __dparm(1), unsigned int flags __dparm(0)); + +/** + * @brief Release user object from graphs. + * + * @param [in] graph - pointer to graph to retain the user object for. + * @param [in] object - pointer to instace of userobj. + * @param [in] count - reference to resource to be retained. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphReleaseUserObject(hipGraph_t graph, hipUserObject_t object, + unsigned int count __dparm(1)); + +/** + * @brief Write a DOT file describing graph structure. + * + * @param [in] graph - graph object for which DOT file has to be generated. + * @param [in] path - path to write the DOT file. + * @param [in] flags - Flags from hipGraphDebugDotFlags to get additional node information. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorOperatingSystem + * + */ +hipError_t hipGraphDebugDotPrint(hipGraph_t graph, const char* path, unsigned int flags); + +/** + * @brief Copies attributes from source node to destination node. + * + * Copies attributes from source node to destination node. + * Both node must have the same context. + * + * @param [out] hDst - Destination node. + * @param [in] hSrc - Source node. + * For list of attributes see ::hipKernelNodeAttrID. + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + */ +hipError_t hipGraphKernelNodeCopyAttributes(hipGraphNode_t hSrc, hipGraphNode_t hDst); + +/** + * @brief Enables or disables the specified node in the given graphExec + * + * Sets hNode to be either enabled or disabled. Disabled nodes are functionally equivalent + * to empty nodes until they are reenabled. Existing node parameters are not affected by + * disabling/enabling the node. + * + * The node is identified by the corresponding hNode in the non-executable graph, from which the + * executable graph was instantiated. + * + * hNode must not have been removed from the original graph. + * + * @note Currently only kernel, memset and memcpy nodes are supported. + * + * @param [in] hGraphExec - The executable graph in which to set the specified node. + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [in] isEnabled - Node is enabled if != 0, otherwise the node is disabled. + * + * @returns #hipSuccess, #hipErrorInvalidValue, + * + */ +hipError_t hipGraphNodeSetEnabled(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + unsigned int isEnabled); +/** + * @brief Query whether a node in the given graphExec is enabled + * + * Sets isEnabled to 1 if hNode is enabled, or 0 if it is disabled. + * + * The node is identified by the corresponding node in the non-executable graph, from which the + * executable graph was instantiated. + * + * hNode must not have been removed from the original graph. + * + * @note Currently only kernel, memset and memcpy nodes are supported. + * + * @param [in] hGraphExec - The executable graph in which to set the specified node. + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [out] isEnabled - Location to return the enabled status of the node. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphNodeGetEnabled(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + unsigned int* isEnabled); + +/** + * @brief Creates a external semaphor wait node and adds it to a graph. + * + * @param [out] pGraphNode - pointer to the graph node to create. + * @param [in] graph - instance of the graph to add the created node. + * @param [in] pDependencies - const pointer to the dependencies on the memset execution node. + * @param [in] numDependencies - the number of the dependencies. + * @param [in] nodeParams -pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddExternalSemaphoresWaitNode( + hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, + size_t numDependencies, const hipExternalSemaphoreWaitNodeParams* nodeParams); + +/** + * @brief Creates a external semaphor signal node and adds it to a graph. + * + * @param [out] pGraphNode - pointer to the graph node to create. + * @param [in] graph - instance of the graph to add the created node. + * @param [in] pDependencies - const pointer to the dependencies on the memset execution node. + * @param [in] numDependencies - the number of the dependencies. + * @param [in] nodeParams -pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddExternalSemaphoresSignalNode( + hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, + size_t numDependencies, const hipExternalSemaphoreSignalNodeParams* nodeParams); +/** + * @brief Updates node parameters in the external semaphore signal node. + * + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [in] nodeParams - Pointer to the params to be set. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExternalSemaphoresSignalNodeSetParams( + hipGraphNode_t hNode, const hipExternalSemaphoreSignalNodeParams* nodeParams); +/** + * @brief Updates node parameters in the external semaphore wait node. + * + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [in] nodeParams - Pointer to the params to be set. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExternalSemaphoresWaitNodeSetParams( + hipGraphNode_t hNode, const hipExternalSemaphoreWaitNodeParams* nodeParams); +/** + * @brief Returns external semaphore signal node params. + * + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [out] params_out - Pointer to params. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExternalSemaphoresSignalNodeGetParams( + hipGraphNode_t hNode, hipExternalSemaphoreSignalNodeParams* params_out); +/** + * @brief Returns external semaphore wait node params. + * + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [out] params_out - Pointer to params. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExternalSemaphoresWaitNodeGetParams( + hipGraphNode_t hNode, hipExternalSemaphoreWaitNodeParams* params_out); +/** + * @brief Updates node parameters in the external semaphore signal node in the given graphExec. + * + * @param [in] hGraphExec - The executable graph in which to set the specified node. + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [in] nodeParams - Pointer to the params to be set. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecExternalSemaphoresSignalNodeSetParams( + hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const hipExternalSemaphoreSignalNodeParams* nodeParams); +/** + * @brief Updates node parameters in the external semaphore wait node in the given graphExec. + * + * @param [in] hGraphExec - The executable graph in which to set the specified node. + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [in] nodeParams - Pointer to the params to be set. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecExternalSemaphoresWaitNodeSetParams( + hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const hipExternalSemaphoreWaitNodeParams* nodeParams); + +/** + * @brief Gets a memcpy node's parameters. + * + * @param [in] hNode - instance of the node to get parameters from. + * @param [out] nodeParams - pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphMemcpyNodeGetParams(hipGraphNode_t hNode, HIP_MEMCPY3D* nodeParams); + +/** + * @brief Sets a memcpy node's parameters. + * + * @param [in] hNode - instance of the node to Set parameters for. + * @param [out] nodeParams - pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphMemcpyNodeSetParams(hipGraphNode_t hNode, const HIP_MEMCPY3D* nodeParams); + +/** + * @brief Creates a memset node and adds it to a graph. + * + * @param [out] phGraphNode - pointer to graph node to create. + * @param [in] hGraph - instance of graph to add the created node to. + * @param [in] dependencies - const pointer to the dependencies on the memset execution node. + * @param [in] numDependencies - number of the dependencies. + * @param [in] memsetParams - const pointer to the parameters for the memory set. + * @param [in] ctx - cotext related to current device. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphAddMemsetNode(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, + const hipGraphNode_t* dependencies, size_t numDependencies, + const hipMemsetParams* memsetParams, hipCtx_t ctx); + +/** + * @brief Creates a memory free node and adds it to a graph + * + * @param [out] phGraphNode - Pointer to the graph node to create and add to the graph + * @param [in] hGraph - Instance of the graph the node to be added + * @param [in] dependencies - Const pointer to the node dependencies + * @param [in] numDependencies - The number of dependencies + * @param [in] dptr - Pointer to the memory to be freed + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphAddMemFreeNode(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, + const hipGraphNode_t* dependencies, size_t numDependencies, + hipDeviceptr_t dptr); + +/** + * @brief Sets the parameters for a memcpy node in the given graphExec. + * + * @param [in] hGraphExec - instance of the executable graph with the node. + * @param [in] hNode - instance of the node to set parameters to. + * @param [in] copyParams - const pointer to the memcpy node params. + * @param [in] ctx - cotext related to current device. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const HIP_MEMCPY3D* copyParams, hipCtx_t ctx); + +/** + * @brief Sets the parameters for a memset node in the given graphExec. + * + * @param [in] hGraphExec - instance of the executable graph with the node. + * @param [in] hNode - instance of the node to set parameters to. + * @param [in] memsetParams - pointer to the parameters. + * @param [in] ctx - cotext related to current device. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const hipMemsetParams* memsetParams, hipCtx_t ctx); + +// doxygen end graph API +/** + * @} + */ + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Virtual Virtual Memory Management + * @{ + * This section describes the virtual memory management functions of HIP runtime API. + * + * @note Please note, the virtual memory management functions of HIP runtime + * API are implemented on Linux, under development on Windows. The + * following Virtual Memory Management APIs are not (yet) + * supported in HIP: + * - hipMemMapArrayAsync + */ + +/** + * @brief Frees an address range reservation made via hipMemAddressReserve + * + * @param [in] devPtr - starting address of the range. + * @param [in] size - size of the range. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemAddressFree(void* devPtr, size_t size); + +/** + * @brief Reserves an address range + * + * @param [out] ptr - starting address of the reserved range. + * @param [in] size - size of the reservation. + * @param [in] alignment - alignment of the address. + * @param [in] addr - requested starting address of the range. + * @param [in] flags - currently unused, must be zero. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemAddressReserve(void** ptr, size_t size, size_t alignment, void* addr, + unsigned long long flags); + +/** + * @brief Creates a memory allocation described by the properties and size + * + * @param [out] handle - value of the returned handle. + * @param [in] size - size of the allocation. + * @param [in] prop - properties of the allocation. + * @param [in] flags - currently unused, must be zero. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size, + const hipMemAllocationProp* prop, unsigned long long flags); + +/** + * @brief Exports an allocation to a requested shareable handle type. + * + * @param [out] shareableHandle - value of the returned handle. + * @param [in] handle - handle to share. + * @param [in] handleType - type of the shareable handle. + * @param [in] flags - currently unused, must be zero. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemExportToShareableHandle(void* shareableHandle, + hipMemGenericAllocationHandle_t handle, + hipMemAllocationHandleType handleType, + unsigned long long flags); + +/** + * @brief Get the access flags set for the given location and ptr. + * + * @param [out] flags - flags for this location. + * @param [in] location - target location. + * @param [in] ptr - address to check the access flags. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemGetAccess(unsigned long long* flags, const hipMemLocation* location, void* ptr); + +/** + * @brief Calculates either the minimal or recommended granularity. + * + * @param [out] granularity - returned granularity. + * @param [in] prop - location properties. + * @param [in] option - determines which granularity to return. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipMemGetAllocationGranularity(size_t* granularity, const hipMemAllocationProp* prop, + hipMemAllocationGranularity_flags option); + +/** + * @brief Retrieve the property structure of the given handle. + * + * @param [out] prop - properties of the given handle. + * @param [in] handle - handle to perform the query on. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemGetAllocationPropertiesFromHandle(hipMemAllocationProp* prop, + hipMemGenericAllocationHandle_t handle); + +/** + * @brief Imports an allocation from a requested shareable handle type. + * + * @param [out] handle - returned value. + * @param [in] osHandle - shareable handle representing the memory allocation. + * @param [in] shHandleType - handle type. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemImportFromShareableHandle(hipMemGenericAllocationHandle_t* handle, void* osHandle, + hipMemAllocationHandleType shHandleType); + +/** + * @brief Maps an allocation handle to a reserved virtual address range. + * + * @param [in] ptr - address where the memory will be mapped. + * @param [in] size - size of the mapping. + * @param [in] offset - offset into the memory, currently must be zero. + * @param [in] handle - memory allocation to be mapped. + * @param [in] flags - currently unused, must be zero. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemMap(void* ptr, size_t size, size_t offset, hipMemGenericAllocationHandle_t handle, + unsigned long long flags); + +/** + * @brief Maps or unmaps subregions of sparse HIP arrays and sparse HIP mipmapped arrays. + * + * @param [in] mapInfoList - list of hipArrayMapInfo. + * @param [in] count - number of hipArrayMapInfo in mapInfoList. + * @param [in] stream - stream identifier for the stream to use for map or unmap operations. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is under development. Currently it is not supported on AMD + * GPUs and returns #hipErrorNotSupported. + */ +hipError_t hipMemMapArrayAsync(hipArrayMapInfo* mapInfoList, unsigned int count, + hipStream_t stream); + +/** + * @brief Release a memory handle representing a memory allocation which was previously allocated + * through hipMemCreate. + * + * @param [in] handle - handle of the memory allocation. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemRelease(hipMemGenericAllocationHandle_t handle); + +/** + * @brief Returns the allocation handle of the backing memory allocation given the address. + * + * @param [out] handle - handle representing addr. + * @param [in] addr - address to look up. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle, void* addr); + +/** + * @brief Set the access flags for each location specified in desc for the given virtual address + * range. + * + * @param [in] ptr - starting address of the virtual address range. + * @param [in] size - size of the range. + * @param [in] desc - array of hipMemAccessDesc. + * @param [in] count - number of hipMemAccessDesc in desc. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemSetAccess(void* ptr, size_t size, const hipMemAccessDesc* desc, size_t count); + +/** + * @brief Unmap memory allocation of a given address range. + * + * @param [in] ptr - starting address of the range to unmap. + * @param [in] size - size of the virtual address range. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemUnmap(void* ptr, size_t size); + +// doxygen end virtual memory management API +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup GraphicsInterop Graphics Interoperability + * @{ + * This section describes graphics interoperability functions of HIP runtime API. + */ + +/** + * @brief Maps a graphics resource for access. + * + * @param [in] count - Number of resources to map. + * @param [in] resources - Pointer of resources to map. + * @param [in] stream - Stream for synchronization. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown, #hipErrorInvalidResourceHandle + * + */ +hipError_t hipGraphicsMapResources(int count, hipGraphicsResource_t* resources, + hipStream_t stream __dparm(0)); +/** + * @brief Get an array through which to access a subresource of a mapped graphics resource. + * + * @param [out] array - Pointer of array through which a subresource of resource may be accessed. + * @param [in] resource - Mapped resource to access. + * @param [in] arrayIndex - Array index for the subresource to access. + * @param [in] mipLevel - Mipmap level for the subresource to access. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note In this API, the value of arrayIndex higher than zero is currently not supported. + * + */ +hipError_t hipGraphicsSubResourceGetMappedArray(hipArray_t* array, hipGraphicsResource_t resource, + unsigned int arrayIndex, unsigned int mipLevel); +/** + * @brief Gets device accessible address of a graphics resource. + * + * @param [out] devPtr - Pointer of device through which graphic resource may be accessed. + * @param [out] size - Size of the buffer accessible from devPtr. + * @param [in] resource - Mapped resource to access. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphicsResourceGetMappedPointer(void** devPtr, size_t* size, + hipGraphicsResource_t resource); +/** + * @brief Unmaps graphics resources. + * + * @param [in] count - Number of resources to unmap. + * @param [in] resources - Pointer of resources to unmap. + * @param [in] stream - Stream for synchronization. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown, #hipErrorContextIsDestroyed + * + */ +hipError_t hipGraphicsUnmapResources(int count, hipGraphicsResource_t* resources, + hipStream_t stream __dparm(0)); +/** + * @brief Unregisters a graphics resource. + * + * @param [in] resource - Graphics resources to unregister. + * + * @returns #hipSuccess + * + */ +hipError_t hipGraphicsUnregisterResource(hipGraphicsResource_t resource); +// doxygen end GraphicsInterop +/** + * @} + */ + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Surface Surface Object + * @{ + * + * This section describes surface object functions of HIP runtime API. + * + * @note APIs in this section are under development. + * + */ + +/** + * @brief Create a surface object. + * + * @param [out] pSurfObject Pointer of surface object to be created. + * @param [in] pResDesc Pointer of suface object descriptor. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc); +/** + * @brief Destroy a surface object. + * + * @param [in] surfaceObject Surface object to be destroyed. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject); +// end of surface +/** + * @} + */ +#ifdef __cplusplus +} /* extern "c" */ +#endif +#ifdef __cplusplus +#if defined(__clang__) && defined(__HIP__) +template static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSize( + int* gridSize, int* blockSize, T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) { + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast(f), + dynSharedMemPerBlk, blockSizeLimit); +} +template static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeWithFlags( + int* gridSize, int* blockSize, T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, + unsigned int flags = 0) { + (void)flags; + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast(f), + dynSharedMemPerBlk, blockSizeLimit); +} +#endif // defined(__clang__) && defined(__HIP__) + +/** + * @brief Gets the address of a symbol. + * @ingroup Memory + * @param [out] devPtr - Returns device pointer associated with symbol. + * @param [in] symbol - Device symbol. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +template hipError_t hipGetSymbolAddress(void** devPtr, const T& symbol) { + return ::hipGetSymbolAddress(devPtr, (const void*)&symbol); +} +/** + * @ingroup Memory + * @brief Gets the size of a symbol. + * + * @param [out] size - Returns the size of a symbol. + * @param [in] symbol - Device symbol address. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +template hipError_t hipGetSymbolSize(size_t* size, const T& symbol) { + return ::hipGetSymbolSize(size, (const void*)&symbol); +} + +/** + * @ingroup Memory + * @brief Copies data to the given symbol on the device. + * + * @returns #hipSuccess, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue + * + * @see hipMemcpyToSymbol + */ +template +hipError_t hipMemcpyToSymbol(const T& symbol, const void* src, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)) { + return ::hipMemcpyToSymbol((const void*)&symbol, src, sizeBytes, offset, kind); +} +/** + * @ingroup Memory + * @brief Copies data to the given symbol on the device asynchronously on the stream. + * + * @returns #hipSuccess, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue + * + * @see hipMemcpyToSymbolAsync + */ +template +hipError_t hipMemcpyToSymbolAsync(const T& symbol, const void* src, size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)) { + return ::hipMemcpyToSymbolAsync((const void*)&symbol, src, sizeBytes, offset, kind, stream); +} +/** + * @brief Copies data from the given symbol on the device. + * @ingroup Memory + * @returns #hipSuccess, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue + * + * @see hipMemcpyFromSymbol + */ +template +hipError_t hipMemcpyFromSymbol(void* dst, const T& symbol, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) { + return ::hipMemcpyFromSymbol(dst, (const void*)&symbol, sizeBytes, offset, kind); +} +/** + * @brief Copies data from the given symbol on the device asynchronously on the stream. + * @ingroup Memory + * @returns #hipSuccess, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue + * + * @see hipMemcpyFromSymbolAsync + */ +template +hipError_t hipMemcpyFromSymbolAsync(void* dst, const T& symbol, size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)) { + return ::hipMemcpyFromSymbolAsync(dst, (const void*)&symbol, sizeBytes, offset, kind, stream); +} + +/** + * @brief Returns occupancy for a kernel function. + * @ingroup Occupancy + * @param [out] numBlocks - Pointer of occupancy in number of blocks. + * @param [in] f - The kernel function to launch on the device. + * @param [in] blockSize - The block size as kernel launched. + * @param [in] dynSharedMemPerBlk - Dynamic shared memory in bytes per block. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +template +inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, T f, int blockSize, + size_t dynSharedMemPerBlk) { + return hipOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, reinterpret_cast(f), + blockSize, dynSharedMemPerBlk); +} +/** + * @brief Returns occupancy for a device function with the specified flags. + * + * @ingroup Occupancy + * @param [out] numBlocks - Pointer of occupancy in number of blocks. + * @param [in] f - The kernel function to launch on the device. + * @param [in] blockSize - The block size as kernel launched. + * @param [in] dynSharedMemPerBlk - Dynamic shared memory in bytes per block. + * @param [in] flags - Flag to handle the behavior for the occupancy calculator. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +template inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, T f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) { + return hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + numBlocks, reinterpret_cast(f), blockSize, dynSharedMemPerBlk, flags); +} +/** + * @brief Returns grid and block size that achieves maximum potential occupancy for a device + * function + * + * @ingroup Occupancy + * Returns in \p *min_grid_size and \p *block_size a suggested grid / + * block size pair that achieves the best potential occupancy + * (i.e. the maximum number of active warps on the current device with the smallest number + * of blocks for a particular function). + * + * @param [out] min_grid_size minimum grid size needed to achieve the best potential occupancy + * @param [out] block_size block size required for the best potential occupancy + * @param [in] func device function symbol + * @param [in] block_size_to_dynamic_smem_size - a unary function/functor that takes block size, + * and returns the size, in bytes, of dynamic shared memory needed for a block + * @param [in] block_size_limit the maximum block size \p func is designed to work with. 0 means no + * limit. + * @param [in] flags reserved + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidDeviceFunction, + * #hipErrorInvalidValue, #hipErrorUnknown + */ +template +static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags( + int* min_grid_size, int* block_size, T func, UnaryFunction block_size_to_dynamic_smem_size, + int block_size_limit = 0, unsigned int flags = 0) { + if (min_grid_size == nullptr || block_size == nullptr || + reinterpret_cast(func) == nullptr) { + return hipErrorInvalidValue; + } + + int dev; + hipError_t status; + if ((status = hipGetDevice(&dev)) != hipSuccess) { + return status; + } + + int max_threads_per_cu; + if ((status = hipDeviceGetAttribute(&max_threads_per_cu, + hipDeviceAttributeMaxThreadsPerMultiProcessor, dev)) != + hipSuccess) { + return status; + } + + int warp_size; + if ((status = hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, dev)) != hipSuccess) { + return status; + } + + int max_cu_count; + if ((status = hipDeviceGetAttribute(&max_cu_count, hipDeviceAttributeMultiprocessorCount, dev)) != + hipSuccess) { + return status; + } + + struct hipFuncAttributes attr; + if ((status = hipFuncGetAttributes(&attr, reinterpret_cast(func))) != hipSuccess) { + return status; + } + + // Initial limits for the execution + const int func_max_threads_per_block = attr.maxThreadsPerBlock; + if (block_size_limit == 0) { + block_size_limit = func_max_threads_per_block; + } + + if (func_max_threads_per_block < block_size_limit) { + block_size_limit = func_max_threads_per_block; + } + + const int block_size_limit_aligned = + ((block_size_limit + (warp_size - 1)) / warp_size) * warp_size; + + // For maximum search + int max_threads = 0; + int max_block_size{}; + int max_num_blocks{}; + for (int block_size_check_aligned = block_size_limit_aligned; block_size_check_aligned > 0; + block_size_check_aligned -= warp_size) { + // Make sure the logic uses the requested limit and not aligned + int block_size_check = + (block_size_limit < block_size_check_aligned) ? block_size_limit : block_size_check_aligned; + + size_t dyn_smem_size = block_size_to_dynamic_smem_size(block_size_check); + int optimal_blocks; + if ((status = hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + &optimal_blocks, func, block_size_check, dyn_smem_size, flags)) != hipSuccess) { + return status; + } + + int total_threads = block_size_check * optimal_blocks; + if (total_threads > max_threads) { + max_block_size = block_size_check; + max_num_blocks = optimal_blocks; + max_threads = total_threads; + } + + // Break if the logic reached possible maximum + if (max_threads_per_cu == max_threads) { + break; + } + } + + // Grid size is the number of blocks per CU * CU count + *min_grid_size = max_num_blocks * max_cu_count; + *block_size = max_block_size; + + return status; +} + +/** + * @brief Returns grid and block size that achieves maximum potential occupancy for a device + * function + * + * @ingroup Occupancy + * Returns in \p *min_grid_size and \p *block_size a suggested grid / + * block size pair that achieves the best potential occupancy + * (i.e. the maximum number of active warps on the current device with the smallest number + * of blocks for a particular function). + * + * @param [out] min_grid_size minimum grid size needed to achieve the best potential occupancy + * @param [out] block_size block size required for the best potential occupancy + * @param [in] func device function symbol + * @param [in] block_size_to_dynamic_smem_size - a unary function/functor that takes block size, + * and returns the size, in bytes, of dynamic shared memory needed for a block + * @param [in] block_size_limit the maximum block size \p func is designed to work with. 0 means no + * limit. + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidDeviceFunction, + * #hipErrorInvalidValue, #hipErrorUnknown + */ +template +static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeVariableSMem( + int* min_grid_size, int* block_size, T func, UnaryFunction block_size_to_dynamic_smem_size, + int block_size_limit = 0) { + return hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags( + min_grid_size, block_size, func, block_size_to_dynamic_smem_size, block_size_limit); +} +/** + * @brief Returns grid and block size that achieves maximum potential occupancy for a device + * function + * + * @ingroup Occupancy + * + * Returns in \p *min_grid_size and \p *block_size a suggested grid / + * block size pair that achieves the best potential occupancy + * (i.e. the maximum number of active warps on the current device with the smallest number + * of blocks for a particular function). + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see hipOccupancyMaxPotentialBlockSize + */ +template inline hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, + int* blockSize, F kernel, + size_t dynSharedMemPerBlk, + uint32_t blockSizeLimit) { + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, (hipFunction_t)kernel, + dynSharedMemPerBlk, blockSizeLimit); +} +/** + * @brief Launches a device function + * + * @ingroup Execution + * @ingroup ModuleCooperativeG + * + * \tparam T The type of the kernel function. + * + * @param [in] f Kernel function to launch. + * @param [in] gridDim Grid dimensions specified as multiple of blockDim. + * @param [in] blockDim Block dimensions specified in work-items. + * @param [in] kernelParams A list of kernel arguments. + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for + * this kernel. The HIP-Clang compiler provides + * support for extern shared declarations. + * @param [in] stream Stream which on the kernel launched. + * + * @return #hipSuccess, #hipErrorLaunchFailure, #hipErrorInvalidValue, + * #hipErrorInvalidResourceHandle + * + */ +template +inline hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim, void** kernelParams, + unsigned int sharedMemBytes, hipStream_t stream) { + return hipLaunchCooperativeKernel(reinterpret_cast(f), gridDim, blockDim, + kernelParams, sharedMemBytes, stream); +} +/** + * @brief Launches kernel function on multiple devices, where thread blocks can + * cooperate and synchronize on execution. + * + * @ingroup Execution + * @ingroup ModuleCooperativeG + * + * @param [in] launchParamsList List of kernel launch parameters, one per device. + * @param [in] numDevices Size of launchParamsList array. + * @param [in] flags Flag to handle launch behavior. + * + * @return #hipSuccess, #hipErrorLaunchFailure, #hipErrorInvalidValue, + * #hipErrorInvalidResourceHandle + * + */ +template +inline hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + unsigned int numDevices, + unsigned int flags = 0) { + return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags); +} +/** + * @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched + * on respective streams before enqueuing any other work on the specified streams from any other + * threads + * @ingroup Execution + * + * @param [in] launchParamsList List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +template +inline hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, + unsigned int numDevices, + unsigned int flags = 0) { + return hipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags); +} +/** + * @brief Binds a memory area to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of memory on the device. + * @param [in] size Size of memory in bites. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t hipBindTexture(size_t* offset, const struct texture& tex, + const void* devPtr, size_t size = UINT_MAX) { + return hipBindTexture(offset, &tex, devPtr, &tex.channelDesc, size); +} +/** + * @brief Binds a memory area to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of memory on the device. + * @param [in] desc Texture channel format. + * @param [in] size Size of memory in bites. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t + hipBindTexture(size_t* offset, const struct texture& tex, const void* devPtr, + const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) { + return hipBindTexture(offset, &tex, devPtr, &desc, size); +} +/** + * @brief Binds a 2D memory area to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of 2D memory area on the device. + * @param [in] width Width in texel units. + * @param [in] height Height in texel units. + * @param [in] pitch Pitch in bytes. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t + hipBindTexture2D(size_t* offset, const struct texture& tex, + const void* devPtr, size_t width, size_t height, size_t pitch) { + return hipBindTexture2D(offset, &tex, devPtr, &tex.channelDesc, width, height, pitch); +} +/** + * @brief Binds a 2D memory area to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of 2D memory area on the device. + * @param [in] desc Texture channel format. + * @param [in] width Width in texel units. + * @param [in] height Height in texel units. + * @param [in] pitch Pitch in bytes. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t + hipBindTexture2D(size_t* offset, const struct texture& tex, + const void* devPtr, const struct hipChannelFormatDesc& desc, size_t width, + size_t height, size_t pitch) { + return hipBindTexture2D(offset, &tex, devPtr, &desc, width, height, pitch); +} +/** + * @brief Binds an array to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] tex Texture to bind. + * @param [in] array Array of memory on the device. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t + hipBindTextureToArray(const struct texture& tex, hipArray_const_t array) { + struct hipChannelFormatDesc desc; + hipError_t err = hipGetChannelDesc(&desc, array); + return (err == hipSuccess) ? hipBindTextureToArray(&tex, array, &desc) : err; +} +/** + * @brief Binds an array to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] tex Texture to bind. + * @param [in] array Array of memory on the device. + * @param [in] desc Texture channel format. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t + hipBindTextureToArray(const struct texture& tex, hipArray_const_t array, + const struct hipChannelFormatDesc& desc) { + return hipBindTextureToArray(&tex, array, &desc); +} +/** + * @brief Binds a mipmapped array to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] tex Texture to bind. + * @param [in] mipmappedArray Mipmapped Array of memory on the device. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t hipBindTextureToMipmappedArray(const struct texture& tex, + hipMipmappedArray_const_t mipmappedArray) { + struct hipChannelFormatDesc desc; + hipArray_t levelArray; + hipError_t err = hipGetMipmappedArrayLevel(&levelArray, mipmappedArray, 0); + if (err != hipSuccess) { + return err; + } + err = hipGetChannelDesc(&desc, levelArray); + return (err == hipSuccess) ? hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc) : err; +} +/** + * @brief Binds a mipmapped array to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] tex Texture to bind. + * @param [in] mipmappedArray Mipmapped Array of memory on the device. + * @param [in] desc Texture channel format. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t hipBindTextureToMipmappedArray(const struct texture& tex, + hipMipmappedArray_const_t mipmappedArray, + const struct hipChannelFormatDesc& desc) { + return hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc); +} +/** + * @brief Unbinds a texture [Depreacated] + * + * @ingroup TextureD + * + * @param [in] tex Texture to unbind. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t hipUnbindTexture(const struct texture& tex) { + return hipUnbindTexture(&tex); +} +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @ingroup StreamO + * @{ + * + * This section describes wrappers for stream Ordered allocation from memory pool functions of + * HIP runtime API. + * + * @note APIs in this section are implemented on Linux, under development on Windows. + * + */ + +/** + * @brief C++ wrappers for allocations from a memory pool + * + * This is an alternate C++ calls for @p hipMallocFromPoolAsync made available through + * function overloading. + * + * @see hipMallocFromPoolAsync + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +static inline hipError_t hipMallocAsync(void** dev_ptr, size_t size, hipMemPool_t mem_pool, + hipStream_t stream) { + return hipMallocFromPoolAsync(dev_ptr, size, mem_pool, stream); +} +/** + * @brief C++ wrappers for allocations from a memory pool on the stream + * + * This is an alternate C++ calls for @p hipMallocFromPoolAsync made available through + * function overloading. + * + * @see hipMallocFromPoolAsync + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +template static inline hipError_t hipMallocAsync(T** dev_ptr, size_t size, + hipMemPool_t mem_pool, + hipStream_t stream) { + return hipMallocFromPoolAsync(reinterpret_cast(dev_ptr), size, mem_pool, stream); +} +/** + * @brief C++ wrappers for allocations from a memory pool + * + * This is an alternate C++ calls for @p hipMallocFromPoolAsync made available through + * function overloading. + * + * @see hipMallocFromPoolAsync + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +template +static inline hipError_t hipMallocAsync(T** dev_ptr, size_t size, hipStream_t stream) { + return hipMallocAsync(reinterpret_cast(dev_ptr), size, stream); +} +/** + * @brief C++ wrappers for allocations from a memory pool + * + * This is an alternate C++ calls for @p hipMallocFromPoolAsync made available through + * function overloading. + * + * @see hipMallocFromPoolAsync + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +template static inline hipError_t hipMallocFromPoolAsync(T** dev_ptr, size_t size, + hipMemPool_t mem_pool, + hipStream_t stream) { + return hipMallocFromPoolAsync(reinterpret_cast(dev_ptr), size, mem_pool, stream); +} +/** + * @brief Launches a HIP kernel using the specified configuration. + * @ingroup Execution + * + * This function dispatches the provided kernel with the given launch configuration and forwards the + * kernel arguments. + * + * @param [in] config Pointer to the kernel launch configuration structure. + * @param [in] kernel Pointer to the device kernel function to be launched. + * @param [in] args Variadic list of arguments to be passed to the kernel. + * + * @returns #hipSuccess if the kernel is launched successfully, otherwise an appropriate error code. + */ +template +static inline __host__ hipError_t hipLaunchKernelEx(const hipLaunchConfig_t* config, + void (*kernel)(KernelArgs...), + Params&&... args) { + return [&](KernelArgs... convertedArgs) { + void* pArgs[] = {&convertedArgs...}; + return ::hipLaunchKernelExC(config, reinterpret_cast(kernel), pArgs); + }(std::forward(args)...); +} +/** + * @} + */ + + +#endif // __cplusplus + +#ifdef __GNUC__ +#pragma GCC visibility pop +#endif + + +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include "hip/nvidia_detail/nvidia_hip_runtime_api.h" +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + + +/** + * @brief: C++ wrapper for hipMalloc + * @ingroup Memory + * Perform automatic type conversion to eliminate the need for excessive typecasting (ie void**) + * + * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these + * wrappers. It is useful for applications which need to obtain decltypes of + * HIP runtime APIs. + * + * @see hipMalloc + */ +#if defined(__cplusplus) && !defined(__HIP_DISABLE_CPP_FUNCTIONS__) +template static inline hipError_t hipMalloc(T** devPtr, size_t size) { + return hipMalloc((void**)devPtr, size); +} +/** + * @brief: C++ wrapper for hipMallocPitch + * @ingroup Memory + * Perform automatic type conversion to eliminate the need for excessive typecasting (ie void**) + * + * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these + * wrappers. It is useful for applications which need to obtain decltypes of + * HIP runtime APIs. + * + * @see hipMallocPitch + */ +template +static inline hipError_t hipMallocPitch(T** devPtr, size_t* pitch, size_t width, size_t height) { + return hipMallocPitch((void**)devPtr, pitch, width, height); +} +/** + * @brief: C++ wrapper for hipHostMalloc + * @ingroup Memory + * Provide an override to automatically typecast the pointer type from void**, and also provide a + * default for the flags. + * + * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these + * wrappers. It is useful for applications which need to obtain decltypes of + * HIP runtime APIs. + * + * @see hipHostMalloc + */ +template +static inline hipError_t hipHostMalloc(T** ptr, size_t size, + unsigned int flags = hipHostMallocDefault) { + return hipHostMalloc((void**)ptr, size, flags); +} +/** + * @brief: C++ wrapper for hipHostAlloc + * @ingroup Memory + * Provide an override to automatically typecast the pointer type from void**, and also provide a + * default for the flags. + * + * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these + * wrappers. It is useful for applications which need to obtain decltypes of + * HIP runtime APIs. + * + * @see hipHostAlloc + */ +template static inline hipError_t hipHostAlloc(T** ptr, size_t size, + unsigned int flags = hipHostAllocDefault) { + return hipHostAlloc((void**)ptr, size, flags); +} +/** + * @brief: C++ wrapper for hipMallocManaged + * + * @ingroup MemoryM + * Provide an override to automatically typecast the pointer type from void**, and also provide a + * default for the flags. + * + * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these + * wrappers. It is useful for applications which need to obtain decltypes of + * HIP runtime APIs. + * + * @see hipMallocManaged + * + */ +template +static inline hipError_t hipMallocManaged(T** devPtr, size_t size, + unsigned int flags = hipMemAttachGlobal) { + return hipMallocManaged((void**)devPtr, size, flags); +} + + +#endif +#endif +// doxygen end HIP API +/** + * @} + */ +#include + +#if USE_PROF_API +#include +#endif diff --git a/3rdparty/hip-headers/include/hip/hip_texture_types.h b/3rdparty/hip-headers/include/hip/hip_texture_types.h new file mode 100644 index 0000000000..9cefbe674b --- /dev/null +++ b/3rdparty/hip-headers/include/hip/hip_texture_types.h @@ -0,0 +1,29 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +#ifndef HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H +#define HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H + +#include + +#endif diff --git a/3rdparty/hip-headers/include/hip/hip_vector_types.h b/3rdparty/hip-headers/include/hip/hip_vector_types.h new file mode 100644 index 0000000000..98a0bcdea1 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/hip_vector_types.h @@ -0,0 +1,41 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +//! hip_vector_types.h : Defines the HIP vector types. + +#ifndef HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H +#define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H + +#include + + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#if __cplusplus +#include +#endif +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/3rdparty/hip-headers/include/hip/hip_version.h b/3rdparty/hip-headers/include/hip/hip_version.h new file mode 100644 index 0000000000..bd3cbbc3a3 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/hip_version.h @@ -0,0 +1,17 @@ +// Auto-generated by cmake + +#ifndef HIP_VERSION_H +#define HIP_VERSION_H + +#define HIP_VERSION_MAJOR 7 +#define HIP_VERSION_MINOR 1 +#define HIP_VERSION_PATCH 25424 +#define HIP_VERSION_GITHASH "4179531dcd" +#define HIP_VERSION_BUILD_ID 0 +#define HIP_VERSION_BUILD_NAME "" +#define HIP_VERSION (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH) + +#define __HIP_HAS_GET_PCH 1 + +#endif + diff --git a/3rdparty/hip-headers/include/hip/library_types.h b/3rdparty/hip-headers/include/hip/library_types.h new file mode 100644 index 0000000000..c3c8d5d835 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/library_types.h @@ -0,0 +1,84 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_LIBRARY_TYPES_H +#define HIP_INCLUDE_HIP_LIBRARY_TYPES_H + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + +typedef enum hipDataType { + HIP_R_32F = 0, + HIP_R_64F = 1, + HIP_R_16F = 2, + HIP_R_8I = 3, + HIP_C_32F = 4, + HIP_C_64F = 5, + HIP_C_16F = 6, + HIP_C_8I = 7, + HIP_R_8U = 8, + HIP_C_8U = 9, + HIP_R_32I = 10, + HIP_C_32I = 11, + HIP_R_32U = 12, + HIP_C_32U = 13, + HIP_R_16BF = 14, + HIP_C_16BF = 15, + HIP_R_4I = 16, + HIP_C_4I = 17, + HIP_R_4U = 18, + HIP_C_4U = 19, + HIP_R_16I = 20, + HIP_C_16I = 21, + HIP_R_16U = 22, + HIP_C_16U = 23, + HIP_R_64I = 24, + HIP_C_64I = 25, + HIP_R_64U = 26, + HIP_C_64U = 27, + HIP_R_8F_E4M3 = 28, + HIP_R_8F_E5M2 = 29, + HIP_R_8F_UE8M0 = 30, + HIP_R_6F_E2M3 = 31, + HIP_R_6F_E3M2 = 32, + HIP_R_4F_E2M1 = 33, + // HIP specific Data Types + HIP_R_8F_E4M3_FNUZ = 1000, + HIP_R_8F_E5M2_FNUZ = 1001, +} hipDataType; + +typedef enum hipLibraryPropertyType { + HIP_LIBRARY_MAJOR_VERSION, + HIP_LIBRARY_MINOR_VERSION, + HIP_LIBRARY_PATCH_LEVEL +} hipLibraryPropertyType; + +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include "library_types.h" +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/3rdparty/hip-headers/include/hip/linker_types.h b/3rdparty/hip-headers/include/hip/linker_types.h new file mode 100644 index 0000000000..1131910322 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/linker_types.h @@ -0,0 +1,138 @@ + +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_LINKER_TYPES_H +#define HIP_INCLUDE_HIP_LINKER_TYPES_H + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" +#endif + + +#if !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#elif defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + +/** + * @defgroup LinkerTypes Jit Linker Data Types + * @{ + * This section describes the Jit Linker data types. + * + */ + +/** + * hipJitOption + */ +typedef enum hipJitOption { + hipJitOptionMaxRegisters = 0, ///< CUDA Only Maximum registers may be used in a thread, + ///< passed to compiler + hipJitOptionThreadsPerBlock, ///< CUDA Only Number of thread per block + hipJitOptionWallTime, ///< CUDA Only Value for total wall clock time + hipJitOptionInfoLogBuffer, ///< CUDA Only Pointer to the buffer with logged information + hipJitOptionInfoLogBufferSizeBytes, ///< CUDA Only Size of the buffer in bytes for logged info + hipJitOptionErrorLogBuffer, ///< CUDA Only Pointer to the buffer with logged error(s) + hipJitOptionErrorLogBufferSizeBytes, ///< CUDA Only Size of the buffer in bytes for logged + ///< error(s) + hipJitOptionOptimizationLevel, ///< Value of optimization level for generated codes, acceptable + ///< options -O0, -O1, -O2, -O3 + hipJitOptionTargetFromContext, ///< CUDA Only The target context, which is the default + hipJitOptionTarget, ///< CUDA Only JIT target + hipJitOptionFallbackStrategy, ///< CUDA Only Fallback strategy + hipJitOptionGenerateDebugInfo, ///< CUDA Only Generate debug information + hipJitOptionLogVerbose, ///< CUDA Only Generate log verbose + hipJitOptionGenerateLineInfo, ///< CUDA Only Generate line number information + hipJitOptionCacheMode, ///< CUDA Only Set cache mode + hipJitOptionSm3xOpt, ///< @deprecated CUDA Only New SM3X option. + hipJitOptionFastCompile, ///< CUDA Only Set fast compile + hipJitOptionGlobalSymbolNames, ///< CUDA Only Array of device symbol names to be relocated to the + ///< host + hipJitOptionGlobalSymbolAddresses, ///< CUDA Only Array of host addresses to be relocated to the + ///< device + hipJitOptionGlobalSymbolCount, ///< CUDA Only Number of symbol count. + hipJitOptionLto, ///< @deprecated CUDA Only Enable link-time optimization for device code + hipJitOptionFtz, ///< @deprecated CUDA Only Set single-precision denormals. + hipJitOptionPrecDiv, ///< @deprecated CUDA Only Set single-precision floating-point division + ///< and reciprocals + hipJitOptionPrecSqrt, ///< @deprecated CUDA Only Set single-precision floating-point square root + hipJitOptionFma, ///< @deprecated CUDA Only Enable floating-point multiplies and + ///< adds/subtracts operations + hipJitOptionPositionIndependentCode, ///< CUDA Only Generates Position Independent code + hipJitOptionMinCTAPerSM, ///< CUDA Only Hints to JIT compiler the minimum number of CTAs frin + ///< kernel's grid to be mapped to SM + hipJitOptionMaxThreadsPerBlock, ///< CUDA only Maximum number of threads in a thread block + hipJitOptionOverrideDirectiveValues, ///< Cuda only Override Directive values + hipJitOptionNumOptions, ///< Number of options + hipJitOptionIRtoISAOptExt = 10000, ///< Hip Only Linker options to be passed on to compiler + hipJitOptionIRtoISAOptCountExt, ///< Hip Only Count of linker options to be passed on to compiler +} hipJitOption; +/** + * hipJitInputType + */ +typedef enum hipJitInputType { + hipJitInputCubin = 0, ///< Cuda only Input cubin + hipJitInputPtx, ///< Cuda only Input PTX + hipJitInputFatBinary, ///< Cuda Only Input FAT Binary + hipJitInputObject, ///< Cuda Only Host Object with embedded device code + hipJitInputLibrary, ///< Cuda Only Archive of Host Objects with embedded + ///< device code + hipJitInputNvvm, ///< @deprecated Cuda only High Level intermediate + ///< code for LTO + hipJitNumLegacyInputTypes, ///< Count of Legacy Input Types + hipJitInputLLVMBitcode = 100, ///< HIP Only LLVM Bitcode or IR assembly + hipJitInputLLVMBundledBitcode = 101, ///< HIP Only LLVM Clang Bundled Code + hipJitInputLLVMArchivesOfBundledBitcode = 102, ///< HIP Only LLVM Archive of Bundled Bitcode + hipJitInputSpirv = 103, ///< HIP Only SPIRV Code Object + hipJitNumInputTypes = 10 ///< Count of Input Types +} hipJitInputType; +/** + * hipJitCacheMode + */ +typedef enum hipJitCacheMode { + hipJitCacheOptionNone = 0, + hipJitCacheOptionCG, + hipJitCacheOptionCA +} hipJitCacheMode; +/** + * hipJitFallback + */ +typedef enum hipJitFallback { + hipJitPreferPTX = 0, + hipJitPreferBinary, +} hipJitFallback; + +typedef enum hipLibraryOption_e { + hipLibraryHostUniversalFunctionAndDataTable = 0, + hipLibraryBinaryIsPreserved = 1 +} hipLibraryOption; + +// doxygen end LinkerTypes +/** + * @} + */ + +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif // HIP_INCLUDE_HIP_LINKER_TYPES_H \ No newline at end of file diff --git a/3rdparty/hip-headers/include/hip/surface_types.h b/3rdparty/hip-headers/include/hip/surface_types.h new file mode 100644 index 0000000000..d5cc457b85 --- /dev/null +++ b/3rdparty/hip-headers/include/hip/surface_types.h @@ -0,0 +1,65 @@ +/* +Copyright (c) 2022 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file surface_types.h + * @brief Defines surface types for HIP runtime. + */ + +#ifndef HIP_INCLUDE_HIP_SURFACE_TYPES_H +#define HIP_INCLUDE_HIP_SURFACE_TYPES_H + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" +#endif + +#if !defined(__HIPCC_RTC__) +#include +#endif + +/** + * An opaque value that represents a hip surface object + */ +struct __hip_surface; +typedef struct __hip_surface* hipSurfaceObject_t; + +/** + * hip surface reference + */ +struct surfaceReference { + hipSurfaceObject_t surfaceObject; +}; + +/** + * hip surface boundary modes + */ +enum hipSurfaceBoundaryMode { + hipBoundaryModeZero = 0, + hipBoundaryModeTrap = 1, + hipBoundaryModeClamp = 2 +}; + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + +#endif /* !HIP_INCLUDE_HIP_SURFACE_TYPES_H */ diff --git a/3rdparty/hip-headers/include/hip/texture_types.h b/3rdparty/hip-headers/include/hip/texture_types.h new file mode 100644 index 0000000000..65290cd52c --- /dev/null +++ b/3rdparty/hip-headers/include/hip/texture_types.h @@ -0,0 +1,193 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_TEXTURE_TYPES_H +#define HIP_INCLUDE_HIP_TEXTURE_TYPES_H + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" +#pragma clang diagnostic ignored "-Wc++98-compat" +#endif + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include "texture_types.h" +#elif defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +/******************************************************************************* + * * + * * + * * + *******************************************************************************/ +#if !defined(__HIPCC_RTC__) +#include +#include +#endif // !defined(__HIPCC_RTC__) + +#define hipTextureType1D 0x01 +#define hipTextureType2D 0x02 +#define hipTextureType3D 0x03 +#define hipTextureTypeCubemap 0x0C +#define hipTextureType1DLayered 0xF1 +#define hipTextureType2DLayered 0xF2 +#define hipTextureTypeCubemapLayered 0xFC + +/** + * Should be same as HSA_IMAGE_OBJECT_SIZE_DWORD/HSA_SAMPLER_OBJECT_SIZE_DWORD + */ +#define HIP_IMAGE_OBJECT_SIZE_DWORD 12 +#define HIP_SAMPLER_OBJECT_SIZE_DWORD 8 +#define HIP_SAMPLER_OBJECT_OFFSET_DWORD HIP_IMAGE_OBJECT_SIZE_DWORD +#define HIP_TEXTURE_OBJECT_SIZE_DWORD (HIP_IMAGE_OBJECT_SIZE_DWORD + HIP_SAMPLER_OBJECT_SIZE_DWORD) + +/** + * An opaque value that represents a hip texture object + */ +struct __hip_texture; +typedef struct __hip_texture* hipTextureObject_t; + +/** + * hip texture address modes + */ +enum hipTextureAddressMode { + hipAddressModeWrap = 0, + hipAddressModeClamp = 1, + hipAddressModeMirror = 2, + hipAddressModeBorder = 3 +}; + +/** + * hip texture filter modes + */ +enum hipTextureFilterMode { hipFilterModePoint = 0, hipFilterModeLinear = 1 }; + +/** + * hip texture read modes + */ +enum hipTextureReadMode { hipReadModeElementType = 0, hipReadModeNormalizedFloat = 1 }; + +/** + * hip texture reference + */ +typedef struct textureReference { + int normalized; + enum hipTextureReadMode readMode; // used only for driver API's + enum hipTextureFilterMode filterMode; + enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions + struct hipChannelFormatDesc channelDesc; + int sRGB; // Perform sRGB->linear conversion during texture read + unsigned int maxAnisotropy; // Limit to the anisotropy ratio + enum hipTextureFilterMode mipmapFilterMode; + float mipmapLevelBias; + float minMipmapLevelClamp; + float maxMipmapLevelClamp; + + hipTextureObject_t textureObject; + int numChannels; + enum hipArray_Format format; +} textureReference; + +/** + * hip texture descriptor + */ +typedef struct hipTextureDesc { + enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions + enum hipTextureFilterMode filterMode; + enum hipTextureReadMode readMode; + int sRGB; // Perform sRGB->linear conversion during texture read + float borderColor[4]; + int normalizedCoords; + unsigned int maxAnisotropy; + enum hipTextureFilterMode mipmapFilterMode; + float mipmapLevelBias; + float minMipmapLevelClamp; + float maxMipmapLevelClamp; +} hipTextureDesc; + +#if __cplusplus + +/******************************************************************************* + * * + * * + * * + *******************************************************************************/ +#if __HIP__ +#define __HIP_TEXTURE_ATTRIB __attribute__((device_builtin_texture_type)) +#else +#define __HIP_TEXTURE_ATTRIB +#endif + +typedef textureReference* hipTexRef; + +template +struct __HIP_TEXTURE_ATTRIB texture : public textureReference { + texture(int norm = 0, enum hipTextureFilterMode fMode = hipFilterModePoint, + enum hipTextureAddressMode aMode = hipAddressModeClamp) { + normalized = norm; + readMode = mode; + filterMode = fMode; + addressMode[0] = aMode; + addressMode[1] = aMode; + addressMode[2] = aMode; + channelDesc = hipCreateChannelDesc(); + sRGB = 0; + textureObject = nullptr; + maxAnisotropy = 0; + mipmapLevelBias = 0; + minMipmapLevelClamp = 0; + maxMipmapLevelClamp = 0; + } + + texture(int norm, enum hipTextureFilterMode fMode, enum hipTextureAddressMode aMode, + struct hipChannelFormatDesc desc) { + normalized = norm; + readMode = mode; + filterMode = fMode; + addressMode[0] = aMode; + addressMode[1] = aMode; + addressMode[2] = aMode; + channelDesc = desc; + sRGB = 0; + textureObject = nullptr; + maxAnisotropy = 0; + mipmapLevelBias = 0; + minMipmapLevelClamp = 0; + maxMipmapLevelClamp = 0; + } +}; + +#endif /* __cplusplus */ + +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + +#endif diff --git a/3rdparty/hip-headers/include/hsa/hsa.h b/3rdparty/hip-headers/include/hsa/hsa.h new file mode 100644 index 0000000000..00753e992e --- /dev/null +++ b/3rdparty/hip-headers/include/hsa/hsa.h @@ -0,0 +1,5752 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef HSA_RUNTIME_INC_HSA_H_ +#define HSA_RUNTIME_INC_HSA_H_ + +#include /* size_t */ +#include /* uintXX_t */ + +#ifndef __cplusplus +#include /* bool */ +#endif /* __cplusplus */ + +// Placeholder for calling convention and import/export macros +#ifndef HSA_CALL +#define HSA_CALL +#endif + +#ifndef HSA_EXPORT_DECORATOR +#ifdef __GNUC__ +#define HSA_EXPORT_DECORATOR __attribute__ ((visibility ("default"))) +#else +#define HSA_EXPORT_DECORATOR +#endif +#endif +#define HSA_API_EXPORT HSA_EXPORT_DECORATOR HSA_CALL +#define HSA_API_IMPORT HSA_CALL + +#if !defined(HSA_API) && defined(HSA_EXPORT) +#define HSA_API HSA_API_EXPORT +#else +#define HSA_API HSA_API_IMPORT +#endif + +// Detect and set large model builds. +#undef HSA_LARGE_MODEL +#if defined(__LP64__) || defined(_M_X64) +#define HSA_LARGE_MODEL +#endif + +// Try to detect CPU endianness +#if !defined(LITTLEENDIAN_CPU) && !defined(BIGENDIAN_CPU) +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +#define LITTLEENDIAN_CPU +#elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#define BIGENDIAN_CPU +#elif defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \ + defined(_M_X64) || defined(__loongarch64) || defined(__riscv) +#define LITTLEENDIAN_CPU +#endif +#endif + +#undef HSA_LITTLE_ENDIAN +#if defined(LITTLEENDIAN_CPU) +#define HSA_LITTLE_ENDIAN +#elif defined(BIGENDIAN_CPU) +#else +#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined" +#endif + +#ifndef HSA_DEPRECATED +#define HSA_DEPRECATED +//#ifdef __GNUC__ +//#define HSA_DEPRECATED __attribute__((deprecated)) +//#else +//#define HSA_DEPRECATED __declspec(deprecated) +//#endif +#endif + +#define HSA_VERSION_1_0 1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** \addtogroup error-codes Error codes + * @{ + */ + +/** + * @brief Status codes. + */ +typedef enum { + /** + * The function has been executed successfully. + */ + HSA_STATUS_SUCCESS = 0x0, + /** + * A traversal over a list of elements has been interrupted by the + * application before completing. + */ + HSA_STATUS_INFO_BREAK = 0x1, + /** + * A generic error has occurred. + */ + HSA_STATUS_ERROR = 0x1000, + /** + * One of the actual arguments does not meet a precondition stated in the + * documentation of the corresponding formal argument. + */ + HSA_STATUS_ERROR_INVALID_ARGUMENT = 0x1001, + /** + * The requested queue creation is not valid. + */ + HSA_STATUS_ERROR_INVALID_QUEUE_CREATION = 0x1002, + /** + * The requested allocation is not valid. + */ + HSA_STATUS_ERROR_INVALID_ALLOCATION = 0x1003, + /** + * The agent is invalid. + */ + HSA_STATUS_ERROR_INVALID_AGENT = 0x1004, + /** + * The memory region is invalid. + */ + HSA_STATUS_ERROR_INVALID_REGION = 0x1005, + /** + * The signal is invalid. + */ + HSA_STATUS_ERROR_INVALID_SIGNAL = 0x1006, + /** + * The queue is invalid. + */ + HSA_STATUS_ERROR_INVALID_QUEUE = 0x1007, + /** + * The HSA runtime failed to allocate the necessary resources. This error + * may also occur when the HSA runtime needs to spawn threads or create + * internal OS-specific events. + */ + HSA_STATUS_ERROR_OUT_OF_RESOURCES = 0x1008, + /** + * The AQL packet is malformed. + */ + HSA_STATUS_ERROR_INVALID_PACKET_FORMAT = 0x1009, + /** + * An error has been detected while releasing a resource. + */ + HSA_STATUS_ERROR_RESOURCE_FREE = 0x100A, + /** + * An API other than ::hsa_init has been invoked while the reference count + * of the HSA runtime is 0. + */ + HSA_STATUS_ERROR_NOT_INITIALIZED = 0x100B, + /** + * The maximum reference count for the object has been reached. + */ + HSA_STATUS_ERROR_REFCOUNT_OVERFLOW = 0x100C, + /** + * The arguments passed to a functions are not compatible. + */ + HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS = 0x100D, + /** + * The index is invalid. + */ + HSA_STATUS_ERROR_INVALID_INDEX = 0x100E, + /** + * The instruction set architecture is invalid. + */ + HSA_STATUS_ERROR_INVALID_ISA = 0x100F, + /** + * The instruction set architecture name is invalid. + */ + HSA_STATUS_ERROR_INVALID_ISA_NAME = 0x1017, + /** + * The code object is invalid. + */ + HSA_STATUS_ERROR_INVALID_CODE_OBJECT = 0x1010, + /** + * The executable is invalid. + */ + HSA_STATUS_ERROR_INVALID_EXECUTABLE = 0x1011, + /** + * The executable is frozen. + */ + HSA_STATUS_ERROR_FROZEN_EXECUTABLE = 0x1012, + /** + * There is no symbol with the given name. + */ + HSA_STATUS_ERROR_INVALID_SYMBOL_NAME = 0x1013, + /** + * The variable is already defined. + */ + HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED = 0x1014, + /** + * The variable is undefined. + */ + HSA_STATUS_ERROR_VARIABLE_UNDEFINED = 0x1015, + /** + * An HSAIL operation resulted in a hardware exception. + */ + HSA_STATUS_ERROR_EXCEPTION = 0x1016, + /** + * The code object symbol is invalid. + */ + HSA_STATUS_ERROR_INVALID_CODE_SYMBOL = 0x1018, + /** + * The executable symbol is invalid. + */ + HSA_STATUS_ERROR_INVALID_EXECUTABLE_SYMBOL = 0x1019, + /** + * The file descriptor is invalid. + */ + HSA_STATUS_ERROR_INVALID_FILE = 0x1020, + /** + * The code object reader is invalid. + */ + HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER = 0x1021, + /** + * The cache is invalid. + */ + HSA_STATUS_ERROR_INVALID_CACHE = 0x1022, + /** + * The wavefront is invalid. + */ + HSA_STATUS_ERROR_INVALID_WAVEFRONT = 0x1023, + /** + * The signal group is invalid. + */ + HSA_STATUS_ERROR_INVALID_SIGNAL_GROUP = 0x1024, + /** + * The HSA runtime is not in the configuration state. + */ + HSA_STATUS_ERROR_INVALID_RUNTIME_STATE = 0x1025, + /** + * The queue received an error that may require process termination. + */ + HSA_STATUS_ERROR_FATAL = 0x1026 +} hsa_status_t; + +/** + * @brief Query additional information about a status code. + * + * @param[in] status Status code. + * + * @param[out] status_string A NUL-terminated string that describes the error + * status. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p status is an invalid + * status code, or @p status_string is NULL. + */ +hsa_status_t HSA_API hsa_status_string( + hsa_status_t status, + const char ** status_string); + +/** @} */ + +/** \defgroup common Common Definitions + * @{ + */ + +/** + * @brief Three-dimensional coordinate. + */ +typedef struct hsa_dim3_s { + /** + * X dimension. + */ + uint32_t x; + + /** + * Y dimension. + */ + uint32_t y; + + /** + * Z dimension. + */ + uint32_t z; +} hsa_dim3_t; + +/** + * @brief Access permissions. + */ +typedef enum { + /** + * Used to remove existing access + */ + HSA_ACCESS_PERMISSION_NONE = 0, + /** + * Read-only access. + */ + HSA_ACCESS_PERMISSION_RO = 1, + /** + * Write-only access. + */ + HSA_ACCESS_PERMISSION_WO = 2, + /** + * Read and write access. + */ + HSA_ACCESS_PERMISSION_RW = 3 +} hsa_access_permission_t; + +/** + * @brief POSIX file descriptor. + */ +typedef int hsa_file_t; + +/** @} **/ + + +/** \defgroup initshutdown Initialization and Shut Down + * @{ + */ + +/** + * @brief Initialize the HSA runtime. + * + * @details Initializes the HSA runtime if it is not already initialized, and + * increases the reference counter associated with the HSA runtime for the + * current process. Invocation of any HSA function other than ::hsa_init results + * in undefined behavior if the current HSA runtime reference counter is less + * than one. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate + * the required resources. + * + * @retval ::HSA_STATUS_ERROR_REFCOUNT_OVERFLOW The HSA runtime reference + * count reaches INT32_MAX. + */ +hsa_status_t HSA_API hsa_init(); + +/** + * @brief Shut down the HSA runtime. + * + * @details Decreases the reference count of the HSA runtime instance. When the + * reference count reaches 0, the HSA runtime is no longer considered valid + * but the application might call ::hsa_init to initialize the HSA runtime + * again. + * + * Once the reference count of the HSA runtime reaches 0, all the resources + * associated with it (queues, signals, agent information, etc.) are + * considered invalid and any attempt to reference them in subsequent API calls + * results in undefined behavior. When the reference count reaches 0, the HSA + * runtime may release resources associated with it. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + */ +hsa_status_t HSA_API hsa_shut_down(); + +/** @} **/ + +/** \defgroup agentinfo System and Agent Information + * @{ + */ + +/** + * @brief Endianness. A convention used to interpret the bytes making up a data + * word. + */ +typedef enum { + /** + * The least significant byte is stored in the smallest address. + */ + HSA_ENDIANNESS_LITTLE = 0, + /** + * The most significant byte is stored in the smallest address. + */ + HSA_ENDIANNESS_BIG = 1 +} hsa_endianness_t; + +/** + * @brief Machine model. A machine model determines the size of certain data + * types in HSA runtime and an agent. + */ +typedef enum { + /** + * Small machine model. Addresses use 32 bits. + */ + HSA_MACHINE_MODEL_SMALL = 0, + /** + * Large machine model. Addresses use 64 bits. + */ + HSA_MACHINE_MODEL_LARGE = 1 +} hsa_machine_model_t; + +/** + * @brief Profile. A profile indicates a particular level of feature + * support. For example, in the base profile the application must use the HSA + * runtime allocator to reserve shared virtual memory, while in the full profile + * any host pointer can be shared across all the agents. + */ +typedef enum { + /** + * Base profile. + */ + HSA_PROFILE_BASE = 0, + /** + * Full profile. + */ + HSA_PROFILE_FULL = 1 +} hsa_profile_t; + +/** + * @brief System attributes. + */ +typedef enum { + /** + * Major version of the HSA runtime specification supported by the + * implementation. The type of this attribute is uint16_t. + */ + HSA_SYSTEM_INFO_VERSION_MAJOR = 0, + /** + * Minor version of the HSA runtime specification supported by the + * implementation. The type of this attribute is uint16_t. + */ + HSA_SYSTEM_INFO_VERSION_MINOR = 1, + /** + * Current timestamp. The value of this attribute monotonically increases at a + * constant rate. The type of this attribute is uint64_t. + */ + HSA_SYSTEM_INFO_TIMESTAMP = 2, + /** + * Timestamp value increase rate, in Hz. The timestamp (clock) frequency is + * in the range 1-400MHz. The type of this attribute is uint64_t. + */ + HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY = 3, + /** + * Maximum duration of a signal wait operation. Expressed as a count based on + * the timestamp frequency. The type of this attribute is uint64_t. + */ + HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT = 4, + /** + * Endianness of the system. The type of this attribute is ::hsa_endianness_t. + */ + HSA_SYSTEM_INFO_ENDIANNESS = 5, + /** + * Machine model supported by the HSA runtime. The type of this attribute is + * ::hsa_machine_model_t. + */ + HSA_SYSTEM_INFO_MACHINE_MODEL = 6, + /** + * Bit-mask indicating which extensions are supported by the + * implementation. An extension with an ID of @p i is supported if the bit at + * position @p i is set. The type of this attribute is uint8_t[128]. + */ + HSA_SYSTEM_INFO_EXTENSIONS = 7, + /** + * String containing the ROCr build identifier. + */ + HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200, + /** + * Returns true if hsa_amd_svm_* APIs are supported by the driver. The type of + * this attribute is bool. + */ + HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED = 0x201, + // TODO: Should this be per Agent? + /** + * Returns true if all Agents have access to system allocated memory (such as + * that allocated by mmap, malloc, or new) by default. + * If false then system allocated memory may only be made SVM accessible to + * an Agent by declaration of accessibility with hsa_amd_svm_set_attributes. + * The type of this attribute is bool. + */ + HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202, + /** + * Returns true if mwaitx is enabled on this system + * The type of this attribute is bool. + */ + HSA_AMD_SYSTEM_INFO_MWAITX_ENABLED = 0x203, + /** + * Returns true if DMABUF APIs are supported by the driver. The type of + * this attribute is bool. + */ + HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED = 0x204, + /** + * Returns true if Virtual Memory APIs are supported by the driver. The type of + * this attribute is bool. + */ + HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED = 0x205, + /** + * Returns true if XNACK is enabled on this system. The type of + * this attribute is bool. + */ + HSA_AMD_SYSTEM_INFO_XNACK_ENABLED = 0x206, + /** + * Major version of the HSA runtime extension specification supported by the + * implementation. The type of this attribute is uint16_t. + */ + HSA_AMD_SYSTEM_INFO_EXT_VERSION_MAJOR = 0x207, + /** + * Minor version of the HSA runtime extension specification supported by the + * implementation. The type of this attribute is uint16_t. + */ + HSA_AMD_SYSTEM_INFO_EXT_VERSION_MINOR = 0x208, +} hsa_system_info_t; + +/** + * @brief Get the current value of a system attribute. + * + * @param[in] attribute Attribute to query. + * + * @param[out] value Pointer to an application-allocated buffer where to store + * the value of the attribute. If the buffer passed by the application is not + * large enough to hold the value of @p attribute, the behavior is undefined. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid + * system attribute, or @p value is NULL. + */ +hsa_status_t HSA_API hsa_system_get_info( + hsa_system_info_t attribute, + void* value); + +/** + * @brief HSA extensions. + */ +typedef enum { + /** + * Finalizer extension. + */ + HSA_EXTENSION_FINALIZER = 0, + /** + * Images extension. + */ + HSA_EXTENSION_IMAGES = 1, + + /** + * Performance counter extension. + */ + HSA_EXTENSION_PERFORMANCE_COUNTERS = 2, + + /** + * Profiling events extension. + */ + HSA_EXTENSION_PROFILING_EVENTS = 3, + /** + * Extension count. + */ + HSA_EXTENSION_STD_LAST = 3, + /** + * First AMD extension number. + */ + HSA_AMD_FIRST_EXTENSION = 0x200, + /** + * Profiler extension. + */ + HSA_EXTENSION_AMD_PROFILER = 0x200, + /** + * Loader extension. + */ + HSA_EXTENSION_AMD_LOADER = 0x201, + /** + * AqlProfile extension. + */ + HSA_EXTENSION_AMD_AQLPROFILE = 0x202, + /** + * PC Sampling extension. + */ + HSA_EXTENSION_AMD_PC_SAMPLING = 0x203, + /** + * Last AMD extension. + */ + HSA_AMD_LAST_EXTENSION = 0x203 +} hsa_extension_t; + +/** + * @brief Query the name of a given extension. + * + * @param[in] extension Extension identifier. If the extension is not supported + * by the implementation (see ::HSA_SYSTEM_INFO_EXTENSIONS), the behavior + * is undefined. + * + * @param[out] name Pointer to a memory location where the HSA runtime stores + * the extension name. The extension name is a NUL-terminated string. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid + * extension, or @p name is NULL. + */ +hsa_status_t HSA_API hsa_extension_get_name( + uint16_t extension, + const char **name); + +/** + * @deprecated + * + * @brief Query if a given version of an extension is supported by the HSA + * implementation. + * + * @param[in] extension Extension identifier. + * + * @param[in] version_major Major version number. + * + * @param[in] version_minor Minor version number. + * + * @param[out] result Pointer to a memory location where the HSA runtime stores + * the result of the check. The result is true if the specified version of the + * extension is supported, and false otherwise. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid + * extension, or @p result is NULL. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_system_extension_supported( + uint16_t extension, + uint16_t version_major, + uint16_t version_minor, + bool* result); + +/** + * @brief Query if a given version of an extension is supported by the HSA + * implementation. All minor versions from 0 up to the returned @p version_minor + * must be supported by the implementation. + * + * @param[in] extension Extension identifier. + * + * @param[in] version_major Major version number. + * + * @param[out] version_minor Minor version number. + * + * @param[out] result Pointer to a memory location where the HSA runtime stores + * the result of the check. The result is true if the specified version of the + * extension is supported, and false otherwise. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid + * extension, or @p version_minor is NULL, or @p result is NULL. + */ +hsa_status_t HSA_API hsa_system_major_extension_supported( + uint16_t extension, + uint16_t version_major, + uint16_t *version_minor, + bool* result); + + +/** + * @deprecated + * + * @brief Retrieve the function pointers corresponding to a given version of an + * extension. Portable applications are expected to invoke the extension API + * using the returned function pointers + * + * @details The application is responsible for verifying that the given version + * of the extension is supported by the HSA implementation (see + * ::hsa_system_extension_supported). If the given combination of extension, + * major version, and minor version is not supported by the implementation, the + * behavior is undefined. + * + * @param[in] extension Extension identifier. + * + * @param[in] version_major Major version number for which to retrieve the + * function pointer table. + * + * @param[in] version_minor Minor version number for which to retrieve the + * function pointer table. + * + * @param[out] table Pointer to an application-allocated function pointer table + * that is populated by the HSA runtime. Must not be NULL. The memory associated + * with table can be reused or freed after the function returns. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid + * extension, or @p table is NULL. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_system_get_extension_table( + uint16_t extension, + uint16_t version_major, + uint16_t version_minor, + void *table); + +/** + * @brief Retrieve the function pointers corresponding to a given major version + * of an extension. Portable applications are expected to invoke the extension + * API using the returned function pointers. + * + * @details The application is responsible for verifying that the given major + * version of the extension is supported by the HSA implementation (see + * ::hsa_system_major_extension_supported). If the given combination of extension + * and major version is not supported by the implementation, the behavior is + * undefined. Additionally if the length doesn't allow space for a full minor + * version, it is implementation defined if only some of the function pointers for + * that minor version get written. + * + * @param[in] extension Extension identifier. + * + * @param[in] version_major Major version number for which to retrieve the + * function pointer table. + * + * @param[in] table_length Size in bytes of the function pointer table to be + * populated. The implementation will not write more than this many bytes to the + * table. + * + * @param[out] table Pointer to an application-allocated function pointer table + * that is populated by the HSA runtime. Must not be NULL. The memory associated + * with table can be reused or freed after the function returns. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid + * extension, or @p table is NULL. + */ +hsa_status_t HSA_API hsa_system_get_major_extension_table( + uint16_t extension, + uint16_t version_major, + size_t table_length, + void *table); + +/** + * @brief Struct containing an opaque handle to an agent, a device that participates in + * the HSA memory model. An agent can submit AQL packets for execution, and + * may also accept AQL packets for execution (agent dispatch packets or kernel + * dispatch packets launching HSAIL-derived binaries). + */ +typedef struct hsa_agent_s { + /** + * Opaque handle. Two handles reference the same object of the enclosing type + * if and only if they are equal. + */ + uint64_t handle; +} hsa_agent_t; + +/** + * @brief Agent features. + */ +typedef enum { + /** + * The agent supports AQL packets of kernel dispatch type. If this + * feature is enabled, the agent is also a kernel agent. + */ + HSA_AGENT_FEATURE_KERNEL_DISPATCH = 1, + /** + * The agent supports AQL packets of agent dispatch type. + */ + HSA_AGENT_FEATURE_AGENT_DISPATCH = 2 +} hsa_agent_feature_t; + +/** + * @brief Hardware device type. + */ +typedef enum { + /** + * CPU device. + */ + HSA_DEVICE_TYPE_CPU = 0, + /** + * GPU device. + */ + HSA_DEVICE_TYPE_GPU = 1, + /** + * DSP device. + */ + HSA_DEVICE_TYPE_DSP = 2, + /** + * AI Engine (AIE) device. + */ + HSA_DEVICE_TYPE_AIE = 3 +} hsa_device_type_t; + +/** + * @brief Default floating-point rounding mode. + */ +typedef enum { + /** + * Use a default floating-point rounding mode specified elsewhere. + */ + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT = 0, + /** + * Operations that specify the default floating-point mode are rounded to zero + * by default. + */ + HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO = 1, + /** + * Operations that specify the default floating-point mode are rounded to the + * nearest representable number and that ties should be broken by selecting + * the value with an even least significant bit. + */ + HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR = 2 +} hsa_default_float_rounding_mode_t; + +/** + * @brief Agent attributes. + */ +typedef enum { + /** + * Agent name. The type of this attribute is a NUL-terminated char[64]. The + * name must be at most 63 characters long (not including the NUL terminator) + * and all array elements not used for the name must be NUL. + */ + HSA_AGENT_INFO_NAME = 0, + /** + * Name of vendor. The type of this attribute is a NUL-terminated char[64]. + * The name must be at most 63 characters long (not including the NUL + * terminator) and all array elements not used for the name must be NUL. + */ + HSA_AGENT_INFO_VENDOR_NAME = 1, + /** + * Agent capability. The type of this attribute is ::hsa_agent_feature_t. + */ + HSA_AGENT_INFO_FEATURE = 2, + /** + * @deprecated Query ::HSA_ISA_INFO_MACHINE_MODELS for a given intruction set + * architecture supported by the agent instead. If more than one ISA is + * supported by the agent, the returned value corresponds to the first ISA + * enumerated by ::hsa_agent_iterate_isas. + * + * Machine model supported by the agent. The type of this attribute is + * ::hsa_machine_model_t. + */ + HSA_AGENT_INFO_MACHINE_MODEL = 3, + /** + * @deprecated Query ::HSA_ISA_INFO_PROFILES for a given intruction set + * architecture supported by the agent instead. If more than one ISA is + * supported by the agent, the returned value corresponds to the first ISA + * enumerated by ::hsa_agent_iterate_isas. + * + * Profile supported by the agent. The type of this attribute is + * ::hsa_profile_t. + */ + HSA_AGENT_INFO_PROFILE = 4, + /** + * @deprecated Query ::HSA_ISA_INFO_DEFAULT_FLOAT_ROUNDING_MODES for a given + * intruction set architecture supported by the agent instead. If more than + * one ISA is supported by the agent, the returned value corresponds to the + * first ISA enumerated by ::hsa_agent_iterate_isas. + * + * Default floating-point rounding mode. The type of this attribute is + * ::hsa_default_float_rounding_mode_t, but the value + * ::HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT is not allowed. + */ + HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5, + /** + * @deprecated Query ::HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES + * for a given intruction set architecture supported by the agent instead. If + * more than one ISA is supported by the agent, the returned value corresponds + * to the first ISA enumerated by ::hsa_agent_iterate_isas. + * + * A bit-mask of ::hsa_default_float_rounding_mode_t values, representing the + * default floating-point rounding modes supported by the agent in the Base + * profile. The type of this attribute is uint32_t. The default floating-point + * rounding mode (::HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE) bit must not + * be set. + */ + HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES = 23, + /** + * @deprecated Query ::HSA_ISA_INFO_FAST_F16_OPERATION for a given intruction + * set architecture supported by the agent instead. If more than one ISA is + * supported by the agent, the returned value corresponds to the first ISA + * enumerated by ::hsa_agent_iterate_isas. + * + * Flag indicating that the f16 HSAIL operation is at least as fast as the + * f32 operation in the current agent. The value of this attribute is + * undefined if the agent is not a kernel agent. The type of this + * attribute is bool. + */ + HSA_AGENT_INFO_FAST_F16_OPERATION = 24, + /** + * @deprecated Query ::HSA_WAVEFRONT_INFO_SIZE for a given wavefront and + * intruction set architecture supported by the agent instead. If more than + * one ISA is supported by the agent, the returned value corresponds to the + * first ISA enumerated by ::hsa_agent_iterate_isas and the first wavefront + * enumerated by ::hsa_isa_iterate_wavefronts for that ISA. + * + * Number of work-items in a wavefront. Must be a power of 2 in the range + * [1,256]. The value of this attribute is undefined if the agent is not + * a kernel agent. The type of this attribute is uint32_t. + */ + HSA_AGENT_INFO_WAVEFRONT_SIZE = 6, + /** + * @deprecated Query ::HSA_ISA_INFO_WORKGROUP_MAX_DIM for a given intruction + * set architecture supported by the agent instead. If more than one ISA is + * supported by the agent, the returned value corresponds to the first ISA + * enumerated by ::hsa_agent_iterate_isas. + * + * Maximum number of work-items of each dimension of a work-group. Each + * maximum must be greater than 0. No maximum can exceed the value of + * ::HSA_AGENT_INFO_WORKGROUP_MAX_SIZE. The value of this attribute is + * undefined if the agent is not a kernel agent. The type of this + * attribute is uint16_t[3]. + */ + HSA_AGENT_INFO_WORKGROUP_MAX_DIM = 7, + /** + * @deprecated Query ::HSA_ISA_INFO_WORKGROUP_MAX_SIZE for a given intruction + * set architecture supported by the agent instead. If more than one ISA is + * supported by the agent, the returned value corresponds to the first ISA + * enumerated by ::hsa_agent_iterate_isas. + * + * Maximum total number of work-items in a work-group. The value of this + * attribute is undefined if the agent is not a kernel agent. The type + * of this attribute is uint32_t. + */ + HSA_AGENT_INFO_WORKGROUP_MAX_SIZE = 8, + /** + * @deprecated Query ::HSA_ISA_INFO_GRID_MAX_DIM for a given intruction set + * architecture supported by the agent instead. + * + * Maximum number of work-items of each dimension of a grid. Each maximum must + * be greater than 0, and must not be smaller than the corresponding value in + * ::HSA_AGENT_INFO_WORKGROUP_MAX_DIM. No maximum can exceed the value of + * ::HSA_AGENT_INFO_GRID_MAX_SIZE. The value of this attribute is undefined + * if the agent is not a kernel agent. The type of this attribute is + * ::hsa_dim3_t. + */ + HSA_AGENT_INFO_GRID_MAX_DIM = 9, + /** + * @deprecated Query ::HSA_ISA_INFO_GRID_MAX_SIZE for a given intruction set + * architecture supported by the agent instead. If more than one ISA is + * supported by the agent, the returned value corresponds to the first ISA + * enumerated by ::hsa_agent_iterate_isas. + * + * Maximum total number of work-items in a grid. The value of this attribute + * is undefined if the agent is not a kernel agent. The type of this + * attribute is uint32_t. + */ + HSA_AGENT_INFO_GRID_MAX_SIZE = 10, + /** + * @deprecated Query ::HSA_ISA_INFO_FBARRIER_MAX_SIZE for a given intruction + * set architecture supported by the agent instead. If more than one ISA is + * supported by the agent, the returned value corresponds to the first ISA + * enumerated by ::hsa_agent_iterate_isas. + * + * Maximum number of fbarriers per work-group. Must be at least 32. The value + * of this attribute is undefined if the agent is not a kernel agent. The + * type of this attribute is uint32_t. + */ + HSA_AGENT_INFO_FBARRIER_MAX_SIZE = 11, + /** + * @deprecated The maximum number of queues is not statically determined. + * + * Maximum number of queues that can be active (created but not destroyed) at + * one time in the agent. The type of this attribute is uint32_t. + */ + HSA_AGENT_INFO_QUEUES_MAX = 12, + /** + * Minimum number of packets that a queue created in the agent + * can hold. Must be a power of 2 greater than 0. Must not exceed + * the value of ::HSA_AGENT_INFO_QUEUE_MAX_SIZE. The type of this + * attribute is uint32_t. + */ + HSA_AGENT_INFO_QUEUE_MIN_SIZE = 13, + /** + * Maximum number of packets that a queue created in the agent can + * hold. Must be a power of 2 greater than 0. The type of this attribute + * is uint32_t. + */ + HSA_AGENT_INFO_QUEUE_MAX_SIZE = 14, + /** + * Type of a queue created in the agent. The type of this attribute is + * ::hsa_queue_type32_t. + */ + HSA_AGENT_INFO_QUEUE_TYPE = 15, + /** + * @deprecated NUMA information is not exposed anywhere else in the API. + * + * Identifier of the NUMA node associated with the agent. The type of this + * attribute is uint32_t. + */ + HSA_AGENT_INFO_NODE = 16, + /** + * Type of hardware device associated with the agent. The type of this + * attribute is ::hsa_device_type_t. + */ + HSA_AGENT_INFO_DEVICE = 17, + /** + * @deprecated Query ::hsa_agent_iterate_caches to retrieve information about + * the caches present in a given agent. + * + * Array of data cache sizes (L1..L4). Each size is expressed in bytes. A size + * of 0 for a particular level indicates that there is no cache information + * for that level. The type of this attribute is uint32_t[4]. + */ + HSA_AGENT_INFO_CACHE_SIZE = 18, + /** + * @deprecated An agent may support multiple instruction set + * architectures. See ::hsa_agent_iterate_isas. If more than one ISA is + * supported by the agent, the returned value corresponds to the first ISA + * enumerated by ::hsa_agent_iterate_isas. + * + * Instruction set architecture of the agent. The type of this attribute + * is ::hsa_isa_t. + */ + HSA_AGENT_INFO_ISA = 19, + /** + * Bit-mask indicating which extensions are supported by the agent. An + * extension with an ID of @p i is supported if the bit at position @p i is + * set. The type of this attribute is uint8_t[128]. + */ + HSA_AGENT_INFO_EXTENSIONS = 20, + /** + * Major version of the HSA runtime specification supported by the + * agent. The type of this attribute is uint16_t. + */ + HSA_AGENT_INFO_VERSION_MAJOR = 21, + /** + * Minor version of the HSA runtime specification supported by the + * agent. The type of this attribute is uint16_t. + */ + HSA_AGENT_INFO_VERSION_MINOR = 22, + /** + * This enum does not have a fixed underlying type, thus in C++ post D2338: + * If the enumeration type does not have a fixed underlying type, the value is + * unchanged if the original value is within the range of the enumeration + * values (9.7.1 [dcl.enum]), and otherwise, the behavior is + * undefined. + * Thus increase the range of this enum to encompass vendor extensions. + */ + HSA_AGENT_INFO_LAST = INT32_MAX +} hsa_agent_info_t; + +/** + * @brief Get the current value of an attribute for a given agent. + * + * @param[in] agent A valid agent. + * + * @param[in] attribute Attribute to query. + * + * @param[out] value Pointer to an application-allocated buffer where to store + * the value of the attribute. If the buffer passed by the application is not + * large enough to hold the value of @p attribute, the behavior is undefined. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid + * agent attribute, or @p value is NULL. + */ +hsa_status_t HSA_API hsa_agent_get_info( + hsa_agent_t agent, + hsa_agent_info_t attribute, + void* value); + +/** + * @brief Iterate over the available agents, and invoke an + * application-defined callback on every iteration. + * + * @param[in] callback Callback to be invoked once per agent. The HSA + * runtime passes two arguments to the callback: the agent and the + * application data. If @p callback returns a status other than + * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and + * ::hsa_iterate_agents returns that status value. + * + * @param[in] data Application data that is passed to @p callback on every + * iteration. May be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL. +*/ +hsa_status_t HSA_API hsa_iterate_agents( + hsa_status_t (*callback)(hsa_agent_t agent, void* data), + void* data); + +/* + +// If we do not know the size of an attribute, we need to query it first +// Note: this API will not be in the spec unless needed +hsa_status_t HSA_API hsa_agent_get_info_size( + hsa_agent_t agent, + hsa_agent_info_t attribute, + size_t* size); + +// Set the value of an agents attribute +// Note: this API will not be in the spec unless needed +hsa_status_t HSA_API hsa_agent_set_info( + hsa_agent_t agent, + hsa_agent_info_t attribute, + void* value); + +*/ + +/** + * @brief Exception policies applied in the presence of hardware exceptions. + */ +typedef enum { + /** + * If a hardware exception is detected, a work-item signals an exception. + */ + HSA_EXCEPTION_POLICY_BREAK = 1, + /** + * If a hardware exception is detected, a hardware status bit is set. + */ + HSA_EXCEPTION_POLICY_DETECT = 2 +} hsa_exception_policy_t; + +/** + * @deprecated Use ::hsa_isa_get_exception_policies for a given intruction set + * architecture supported by the agent instead. If more than one ISA is + * supported by the agent, this function uses the first value returned by + * ::hsa_agent_iterate_isas. + * + * @brief Retrieve the exception policy support for a given combination of + * agent and profile + * + * @param[in] agent Agent. + * + * @param[in] profile Profile. + * + * @param[out] mask Pointer to a memory location where the HSA runtime stores a + * mask of ::hsa_exception_policy_t values. Must not be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is not a valid + * profile, or @p mask is NULL. + * + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_agent_get_exception_policies( + hsa_agent_t agent, + hsa_profile_t profile, + uint16_t *mask); + +/** + * @brief Cache handle. + */ +typedef struct hsa_cache_s { + /** + * Opaque handle. Two handles reference the same object of the enclosing type + * if and only if they are equal. + */ + uint64_t handle; +} hsa_cache_t; + +/** + * @brief Cache attributes. + */ +typedef enum { + /** + * The length of the cache name in bytes, not including the NUL terminator. + * The type of this attribute is uint32_t. + */ + HSA_CACHE_INFO_NAME_LENGTH = 0, + /** + * Human-readable description. The type of this attribute is a NUL-terminated + * character array with the length equal to the value of + * ::HSA_CACHE_INFO_NAME_LENGTH attribute. + */ + HSA_CACHE_INFO_NAME = 1, + /** + * Cache level. A L1 cache must return a value of 1, a L2 must return a value + * of 2, and so on. The type of this attribute is uint8_t. + */ + HSA_CACHE_INFO_LEVEL = 2, + /** + * Cache size, in bytes. A value of 0 indicates that there is no size + * information available. The type of this attribute is uint32_t. + */ + HSA_CACHE_INFO_SIZE = 3 +} hsa_cache_info_t; + +/** + * @brief Get the current value of an attribute for a given cache object. + * + * @param[in] cache Cache. + * + * @param[in] attribute Attribute to query. + * + * @param[out] value Pointer to an application-allocated buffer where to store + * the value of the attribute. If the buffer passed by the application is not + * large enough to hold the value of @p attribute, the behavior is undefined. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_CACHE The cache is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid + * instruction set architecture attribute, or @p value is + * NULL. + */ +hsa_status_t HSA_API hsa_cache_get_info( + hsa_cache_t cache, + hsa_cache_info_t attribute, + void* value); + +/** + * @brief Iterate over the memory caches of a given agent, and + * invoke an application-defined callback on every iteration. + * + * @details Caches are visited in ascending order according to the value of the + * ::HSA_CACHE_INFO_LEVEL attribute. + * + * @param[in] agent A valid agent. + * + * @param[in] callback Callback to be invoked once per cache that is present in + * the agent. The HSA runtime passes two arguments to the callback: the cache + * and the application data. If @p callback returns a status other than + * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and + * that value is returned. + * + * @param[in] data Application data that is passed to @p callback on every + * iteration. May be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL. + */ +hsa_status_t HSA_API hsa_agent_iterate_caches( + hsa_agent_t agent, + hsa_status_t (*callback)(hsa_cache_t cache, void* data), + void* data); + +/** + * @deprecated + * + * @brief Query if a given version of an extension is supported by an agent + * + * @param[in] extension Extension identifier. + * + * @param[in] agent Agent. + * + * @param[in] version_major Major version number. + * + * @param[in] version_minor Minor version number. + * + * @param[out] result Pointer to a memory location where the HSA runtime stores + * the result of the check. The result is true if the specified version of the + * extension is supported, and false otherwise. The result must be false if + * ::hsa_system_extension_supported returns false for the same extension + * version. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid + * extension, or @p result is NULL. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_agent_extension_supported( + uint16_t extension, + hsa_agent_t agent, + uint16_t version_major, + uint16_t version_minor, + bool* result); + +/** + * @brief Query if a given version of an extension is supported by an agent. All + * minor versions from 0 up to the returned @p version_minor must be supported. + * + * @param[in] extension Extension identifier. + * + * @param[in] agent Agent. + * + * @param[in] version_major Major version number. + * + * @param[out] version_minor Minor version number. + * + * @param[out] result Pointer to a memory location where the HSA runtime stores + * the result of the check. The result is true if the specified version of the + * extension is supported, and false otherwise. The result must be false if + * ::hsa_system_extension_supported returns false for the same extension + * version. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid + * extension, or @p version_minor is NULL, or @p result is NULL. + */ +hsa_status_t HSA_API hsa_agent_major_extension_supported( + uint16_t extension, + hsa_agent_t agent, + uint16_t version_major, + uint16_t *version_minor, + bool* result); + + +/** @} */ + + +/** \defgroup signals Signals + * @{ + */ + +/** + * @brief Signal handle. + */ +typedef struct hsa_signal_s { + /** + * Opaque handle. Two handles reference the same object of the enclosing type + * if and only if they are equal. The value 0 is reserved. + */ + uint64_t handle; +} hsa_signal_t; + +/** + * @brief Signal value. The value occupies 32 bits in small machine mode, and 64 + * bits in large machine mode. + */ +#ifdef HSA_LARGE_MODEL + typedef int64_t hsa_signal_value_t; +#else + typedef int32_t hsa_signal_value_t; +#endif + +/** + * @brief Create a signal. + * + * @param[in] initial_value Initial value of the signal. + * + * @param[in] num_consumers Size of @p consumers. A value of 0 indicates that + * any agent might wait on the signal. + * + * @param[in] consumers List of agents that might consume (wait on) the + * signal. If @p num_consumers is 0, this argument is ignored; otherwise, the + * HSA runtime might use the list to optimize the handling of the signal + * object. If an agent not listed in @p consumers waits on the returned + * signal, the behavior is undefined. The memory associated with @p consumers + * can be reused or freed after the function returns. + * + * @param[out] signal Pointer to a memory location where the HSA runtime will + * store the newly created signal handle. Must not be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate + * the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p signal is NULL, @p + * num_consumers is greater than 0 but @p consumers is NULL, or @p consumers + * contains duplicates. + */ +hsa_status_t HSA_API hsa_signal_create( + hsa_signal_value_t initial_value, + uint32_t num_consumers, + const hsa_agent_t *consumers, + hsa_signal_t *signal); + +/** + * @brief Destroy a signal previous created by ::hsa_signal_create. + * + * @param[in] signal Signal. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL @p signal is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The handle in @p signal is 0. + */ +hsa_status_t HSA_API hsa_signal_destroy( + hsa_signal_t signal); + +/** + * @brief Atomically read the current value of a signal. + * + * @param[in] signal Signal. + * + * @return Value of the signal. +*/ +hsa_signal_value_t HSA_API hsa_signal_load_scacquire( + hsa_signal_t signal); + +/** + * @copydoc hsa_signal_load_scacquire + */ +hsa_signal_value_t HSA_API hsa_signal_load_relaxed( + hsa_signal_t signal); + +/** + * @deprecated Renamed as ::hsa_signal_load_scacquire. + * + * @copydoc hsa_signal_load_scacquire +*/ +hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_load_acquire( + hsa_signal_t signal); + +/** + * @brief Atomically set the value of a signal. + * + * @details If the value of the signal is changed, all the agents waiting + * on @p signal for which @p value satisfies their wait condition are awakened. + * + * @param[in] signal Signal. + * + * @param[in] value New signal value. + */ +void HSA_API hsa_signal_store_relaxed( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_store_relaxed + */ +void HSA_API hsa_signal_store_screlease( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @deprecated Renamed as ::hsa_signal_store_screlease. + * + * @copydoc hsa_signal_store_screlease + */ +void HSA_API HSA_DEPRECATED hsa_signal_store_release( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @brief Atomically set the value of a signal without necessarily notifying the + * the agents waiting on it. + * + * @details The agents waiting on @p signal may not wake up even when the new + * value satisfies their wait condition. If the application wants to update the + * signal and there is no need to notify any agent, invoking this function can + * be more efficient than calling the non-silent counterpart. + * + * @param[in] signal Signal. + * + * @param[in] value New signal value. + */ +void HSA_API hsa_signal_silent_store_relaxed( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_silent_store_relaxed + */ +void HSA_API hsa_signal_silent_store_screlease( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @brief Atomically set the value of a signal and return its previous value. + * + * @details If the value of the signal is changed, all the agents waiting + * on @p signal for which @p value satisfies their wait condition are awakened. + * + * @param[in] signal Signal. If @p signal is a queue doorbell signal, the + * behavior is undefined. + * + * @param[in] value New value. + * + * @return Value of the signal prior to the exchange. + * + */ +hsa_signal_value_t HSA_API hsa_signal_exchange_scacq_screl( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @deprecated Renamed as ::hsa_signal_exchange_scacq_screl. + * + * @copydoc hsa_signal_exchange_scacq_screl + */ +hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_exchange_acq_rel( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_exchange_scacq_screl + */ +hsa_signal_value_t HSA_API hsa_signal_exchange_scacquire( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @deprecated Renamed as ::hsa_signal_exchange_scacquire. + * + * @copydoc hsa_signal_exchange_scacquire + */ +hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_exchange_acquire( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_exchange_scacq_screl + */ +hsa_signal_value_t HSA_API hsa_signal_exchange_relaxed( + hsa_signal_t signal, + hsa_signal_value_t value); +/** + * @copydoc hsa_signal_exchange_scacq_screl + */ +hsa_signal_value_t HSA_API hsa_signal_exchange_screlease( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @deprecated Renamed as ::hsa_signal_exchange_screlease. + * + * @copydoc hsa_signal_exchange_screlease + */ +hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_exchange_release( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @brief Atomically set the value of a signal if the observed value is equal to + * the expected value. The observed value is returned regardless of whether the + * replacement was done. + * + * @details If the value of the signal is changed, all the agents waiting + * on @p signal for which @p value satisfies their wait condition are awakened. + * + * @param[in] signal Signal. If @p signal is a queue + * doorbell signal, the behavior is undefined. + * + * @param[in] expected Value to compare with. + * + * @param[in] value New value. + * + * @return Observed value of the signal. + * + */ +hsa_signal_value_t HSA_API hsa_signal_cas_scacq_screl( + hsa_signal_t signal, + hsa_signal_value_t expected, + hsa_signal_value_t value); + + +/** + * @deprecated Renamed as ::hsa_signal_cas_scacq_screl. + * + * @copydoc hsa_signal_cas_scacq_screl + */ +hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_cas_acq_rel( + hsa_signal_t signal, + hsa_signal_value_t expected, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_cas_scacq_screl + */ +hsa_signal_value_t HSA_API hsa_signal_cas_scacquire( + hsa_signal_t signal, + hsa_signal_value_t expected, + hsa_signal_value_t value); + +/** + * @deprecated Renamed as ::hsa_signal_cas_scacquire. + * + * @copydoc hsa_signal_cas_scacquire + */ +hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_cas_acquire( + hsa_signal_t signal, + hsa_signal_value_t expected, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_cas_scacq_screl + */ +hsa_signal_value_t HSA_API hsa_signal_cas_relaxed( + hsa_signal_t signal, + hsa_signal_value_t expected, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_cas_scacq_screl + */ +hsa_signal_value_t HSA_API hsa_signal_cas_screlease( + hsa_signal_t signal, + hsa_signal_value_t expected, + hsa_signal_value_t value); + +/** + * @deprecated Renamed as ::hsa_signal_cas_screlease. + * + * @copydoc hsa_signal_cas_screlease + */ +hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_cas_release( + hsa_signal_t signal, + hsa_signal_value_t expected, + hsa_signal_value_t value); + +/** + * @brief Atomically increment the value of a signal by a given amount. + * + * @details If the value of the signal is changed, all the agents waiting on + * @p signal for which @p value satisfies their wait condition are awakened. + * + * @param[in] signal Signal. If @p signal is a queue doorbell signal, the + * behavior is undefined. + * + * @param[in] value Value to add to the value of the signal. + * + */ +void HSA_API hsa_signal_add_scacq_screl( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @deprecated Renamed as ::hsa_signal_add_scacq_screl. + * + * @copydoc hsa_signal_add_scacq_screl + */ +void HSA_API HSA_DEPRECATED hsa_signal_add_acq_rel( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_add_scacq_screl + */ +void HSA_API hsa_signal_add_scacquire( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @deprecated Renamed as ::hsa_signal_add_scacquire. + * + * @copydoc hsa_signal_add_scacquire + */ +void HSA_API HSA_DEPRECATED hsa_signal_add_acquire( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_add_scacq_screl + */ +void HSA_API hsa_signal_add_relaxed( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_add_scacq_screl + */ +void HSA_API hsa_signal_add_screlease( + hsa_signal_t signal, + hsa_signal_value_t value); + + +/** + * @deprecated Renamed as ::hsa_signal_add_screlease. + * + * @copydoc hsa_signal_add_screlease + */ +void HSA_API HSA_DEPRECATED hsa_signal_add_release( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @brief Atomically decrement the value of a signal by a given amount. + * + * @details If the value of the signal is changed, all the agents waiting on + * @p signal for which @p value satisfies their wait condition are awakened. + * + * @param[in] signal Signal. If @p signal is a queue doorbell signal, the + * behavior is undefined. + * + * @param[in] value Value to subtract from the value of the signal. + * + */ +void HSA_API hsa_signal_subtract_scacq_screl( + hsa_signal_t signal, + hsa_signal_value_t value); + + +/** + * @deprecated Renamed as ::hsa_signal_subtract_scacq_screl. + * + * @copydoc hsa_signal_subtract_scacq_screl + */ +void HSA_API HSA_DEPRECATED hsa_signal_subtract_acq_rel( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_subtract_scacq_screl + */ +void HSA_API hsa_signal_subtract_scacquire( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @deprecated Renamed as ::hsa_signal_subtract_scacquire. + * + * @copydoc hsa_signal_subtract_scacquire + */ +void HSA_API HSA_DEPRECATED hsa_signal_subtract_acquire( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_subtract_scacq_screl + */ +void HSA_API hsa_signal_subtract_relaxed( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_subtract_scacq_screl + */ +void HSA_API hsa_signal_subtract_screlease( + hsa_signal_t signal, + hsa_signal_value_t value); + + +/** + * @deprecated Renamed as ::hsa_signal_subtract_screlease. + * + * @copydoc hsa_signal_subtract_screlease + */ +void HSA_API HSA_DEPRECATED hsa_signal_subtract_release( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @brief Atomically perform a bitwise AND operation between the value of a + * signal and a given value. + * + * @details If the value of the signal is changed, all the agents waiting on + * @p signal for which @p value satisfies their wait condition are awakened. + * + * @param[in] signal Signal. If @p signal is a queue doorbell signal, the + * behavior is undefined. + * + * @param[in] value Value to AND with the value of the signal. + * + */ +void HSA_API hsa_signal_and_scacq_screl( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @deprecated Renamed as ::hsa_signal_and_scacq_screl. + * + * @copydoc hsa_signal_and_scacq_screl + */ +void HSA_API HSA_DEPRECATED hsa_signal_and_acq_rel( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_and_scacq_screl + */ +void HSA_API hsa_signal_and_scacquire( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @deprecated Renamed as ::hsa_signal_and_scacquire. + * + * @copydoc hsa_signal_and_scacquire + */ +void HSA_API HSA_DEPRECATED hsa_signal_and_acquire( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_and_scacq_screl + */ +void HSA_API hsa_signal_and_relaxed( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_and_scacq_screl + */ +void HSA_API hsa_signal_and_screlease( + hsa_signal_t signal, + hsa_signal_value_t value); + + +/** + * @deprecated Renamed as ::hsa_signal_and_screlease. + * + * @copydoc hsa_signal_and_screlease + */ +void HSA_API HSA_DEPRECATED hsa_signal_and_release( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @brief Atomically perform a bitwise OR operation between the value of a + * signal and a given value. + * + * @details If the value of the signal is changed, all the agents waiting on + * @p signal for which @p value satisfies their wait condition are awakened. + * + * @param[in] signal Signal. If @p signal is a queue doorbell signal, the + * behavior is undefined. + * + * @param[in] value Value to OR with the value of the signal. + */ +void HSA_API hsa_signal_or_scacq_screl( + hsa_signal_t signal, + hsa_signal_value_t value); + + +/** + * @deprecated Renamed as ::hsa_signal_or_scacq_screl. + * + * @copydoc hsa_signal_or_scacq_screl + */ +void HSA_API HSA_DEPRECATED hsa_signal_or_acq_rel( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_or_scacq_screl + */ +void HSA_API hsa_signal_or_scacquire( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @deprecated Renamed as ::hsa_signal_or_scacquire. + * + * @copydoc hsa_signal_or_scacquire + */ +void HSA_API HSA_DEPRECATED hsa_signal_or_acquire( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_or_scacq_screl + */ +void HSA_API hsa_signal_or_relaxed( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_or_scacq_screl + */ +void HSA_API hsa_signal_or_screlease( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @deprecated Renamed as ::hsa_signal_or_screlease. + * + * @copydoc hsa_signal_or_screlease + */ +void HSA_API HSA_DEPRECATED hsa_signal_or_release( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @brief Atomically perform a bitwise XOR operation between the value of a + * signal and a given value. + * + * @details If the value of the signal is changed, all the agents waiting on + * @p signal for which @p value satisfies their wait condition are awakened. + * + * @param[in] signal Signal. If @p signal is a queue doorbell signal, the + * behavior is undefined. + * + * @param[in] value Value to XOR with the value of the signal. + * + */ +void HSA_API hsa_signal_xor_scacq_screl( + hsa_signal_t signal, + hsa_signal_value_t value); + + +/** + * @deprecated Renamed as ::hsa_signal_xor_scacq_screl. + * + * @copydoc hsa_signal_xor_scacq_screl + */ +void HSA_API HSA_DEPRECATED hsa_signal_xor_acq_rel( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_xor_scacq_screl + */ +void HSA_API hsa_signal_xor_scacquire( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @deprecated Renamed as ::hsa_signal_xor_scacquire. + * + * @copydoc hsa_signal_xor_scacquire + */ +void HSA_API HSA_DEPRECATED hsa_signal_xor_acquire( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_xor_scacq_screl + */ +void HSA_API hsa_signal_xor_relaxed( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @copydoc hsa_signal_xor_scacq_screl + */ +void HSA_API hsa_signal_xor_screlease( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @deprecated Renamed as ::hsa_signal_xor_screlease. + * + * @copydoc hsa_signal_xor_screlease + */ +void HSA_API HSA_DEPRECATED hsa_signal_xor_release( + hsa_signal_t signal, + hsa_signal_value_t value); + +/** + * @brief Wait condition operator. + */ +typedef enum { + /** + * The two operands are equal. + */ + HSA_SIGNAL_CONDITION_EQ = 0, + /** + * The two operands are not equal. + */ + HSA_SIGNAL_CONDITION_NE = 1, + /** + * The first operand is less than the second operand. + */ + HSA_SIGNAL_CONDITION_LT = 2, + /** + * The first operand is greater than or equal to the second operand. + */ + HSA_SIGNAL_CONDITION_GTE = 3 +} hsa_signal_condition_t; + +/** + * @brief State of the application thread during a signal wait. + */ +typedef enum { + /** + * The application thread may be rescheduled while waiting on the signal. + */ + HSA_WAIT_STATE_BLOCKED = 0, + /** + * The application thread stays active while waiting on a signal. + */ + HSA_WAIT_STATE_ACTIVE = 1 +} hsa_wait_state_t; + + +/** + * @brief Wait until a signal value satisfies a specified condition, or a + * certain amount of time has elapsed. + * + * @details A wait operation can spuriously resume at any time sooner than the + * timeout (for example, due to system or other external factors) even when the + * condition has not been met. + * + * The function is guaranteed to return if the signal value satisfies the + * condition at some point in time during the wait, but the value returned to + * the application might not satisfy the condition. The application must ensure + * that signals are used in such way that wait wakeup conditions are not + * invalidated before dependent threads have woken up. + * + * When the wait operation internally loads the value of the passed signal, it + * uses the memory order indicated in the function name. + * + * @param[in] signal Signal. + * + * @param[in] condition Condition used to compare the signal value with @p + * compare_value. + * + * @param[in] compare_value Value to compare with. + * + * @param[in] timeout_hint Maximum duration of the wait. Specified in the same + * unit as the system timestamp. The operation might block for a shorter or + * longer time even if the condition is not met. A value of UINT64_MAX indicates + * no maximum. + * + * @param[in] wait_state_hint Hint used by the application to indicate the + * preferred waiting state. The actual waiting state is ultimately decided by + * HSA runtime and may not match the provided hint. A value of + * ::HSA_WAIT_STATE_ACTIVE may improve the latency of response to a signal + * update by avoiding rescheduling overhead. + * + * @return Observed value of the signal, which might not satisfy the specified + * condition. + * +*/ +hsa_signal_value_t HSA_API hsa_signal_wait_scacquire( + hsa_signal_t signal, + hsa_signal_condition_t condition, + hsa_signal_value_t compare_value, + uint64_t timeout_hint, + hsa_wait_state_t wait_state_hint); + +/** + * @copydoc hsa_signal_wait_scacquire + */ +hsa_signal_value_t HSA_API hsa_signal_wait_relaxed( + hsa_signal_t signal, + hsa_signal_condition_t condition, + hsa_signal_value_t compare_value, + uint64_t timeout_hint, + hsa_wait_state_t wait_state_hint); + +/** + * @deprecated Renamed as ::hsa_signal_wait_scacquire. + * + * @copydoc hsa_signal_wait_scacquire + */ +hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_wait_acquire( + hsa_signal_t signal, + hsa_signal_condition_t condition, + hsa_signal_value_t compare_value, + uint64_t timeout_hint, + hsa_wait_state_t wait_state_hint); + +/** + * @brief Group of signals. + */ +typedef struct hsa_signal_group_s { + /** + * Opaque handle. Two handles reference the same object of the enclosing type + * if and only if they are equal. + */ + uint64_t handle; +} hsa_signal_group_t; + +/** + * @brief Create a signal group. + * + * @param[in] num_signals Number of elements in @p signals. Must not be 0. + * + * @param[in] signals List of signals in the group. The list must not contain + * any repeated elements. Must not be NULL. + * + * @param[in] num_consumers Number of elements in @p consumers. Must not be 0. + * + * @param[in] consumers List of agents that might consume (wait on) the signal + * group. The list must not contain repeated elements, and must be a subset of + * the set of agents that are allowed to wait on all the signals in the + * group. If an agent not listed in @p consumers waits on the returned group, + * the behavior is undefined. The memory associated with @p consumers can be + * reused or freed after the function returns. Must not be NULL. + * + * @param[out] signal_group Pointer to newly created signal group. Must not be + * NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate + * the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p num_signals is 0, @p signals + * is NULL, @p num_consumers is 0, @p consumers is NULL, or @p signal_group is + * NULL. + */ +hsa_status_t HSA_API hsa_signal_group_create( + uint32_t num_signals, + const hsa_signal_t *signals, + uint32_t num_consumers, + const hsa_agent_t *consumers, + hsa_signal_group_t *signal_group); + +/** + * @brief Destroy a signal group previous created by ::hsa_signal_group_create. + * + * @param[in] signal_group Signal group. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL_GROUP @p signal_group is invalid. + */ +hsa_status_t HSA_API hsa_signal_group_destroy( + hsa_signal_group_t signal_group); + +/** + * @brief Wait until the value of at least one of the signals in a signal group + * satisfies its associated condition. + * + * @details The function is guaranteed to return if the value of at least one of + * the signals in the group satisfies its associated condition at some point in + * time during the wait, but the signal value returned to the application may no + * longer satisfy the condition. The application must ensure that signals in the + * group are used in such way that wait wakeup conditions are not invalidated + * before dependent threads have woken up. + * + * When this operation internally loads the value of the passed signal, it uses + * the memory order indicated in the function name. + * + * @param[in] signal_group Signal group. + * + * @param[in] conditions List of conditions. Each condition, and the value at + * the same index in @p compare_values, is used to compare the value of the + * signal at that index in @p signal_group (the signal passed by the application + * to ::hsa_signal_group_create at that particular index). The size of @p + * conditions must not be smaller than the number of signals in @p signal_group; + * any extra elements are ignored. Must not be NULL. + * + * @param[in] compare_values List of comparison values. The size of @p + * compare_values must not be smaller than the number of signals in @p + * signal_group; any extra elements are ignored. Must not be NULL. + * + * @param[in] wait_state_hint Hint used by the application to indicate the + * preferred waiting state. The actual waiting state is decided by the HSA runtime + * and may not match the provided hint. A value of ::HSA_WAIT_STATE_ACTIVE may + * improve the latency of response to a signal update by avoiding rescheduling + * overhead. + * + * @param[out] signal Signal in the group that satisfied the associated + * condition. If several signals satisfied their condition, the function can + * return any of those signals. Must not be NULL. + * + * @param[out] value Observed value for @p signal, which might no longer satisfy + * the specified condition. Must not be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL_GROUP @p signal_group is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p conditions is NULL, @p + * compare_values is NULL, @p signal is NULL, or @p value is NULL. + */ +hsa_status_t HSA_API hsa_signal_group_wait_any_scacquire( + hsa_signal_group_t signal_group, + const hsa_signal_condition_t *conditions, + const hsa_signal_value_t *compare_values, + hsa_wait_state_t wait_state_hint, + hsa_signal_t *signal, + hsa_signal_value_t *value); + +/** + * @copydoc hsa_signal_group_wait_any_scacquire + */ +hsa_status_t HSA_API hsa_signal_group_wait_any_relaxed( + hsa_signal_group_t signal_group, + const hsa_signal_condition_t *conditions, + const hsa_signal_value_t *compare_values, + hsa_wait_state_t wait_state_hint, + hsa_signal_t *signal, + hsa_signal_value_t *value); + +/** @} */ + +/** \defgroup memory Memory + * @{ + */ + +/** + * @brief A memory region represents a block of virtual memory with certain + * properties. For example, the HSA runtime represents fine-grained memory in + * the global segment using a region. A region might be associated with more + * than one agent. + */ +typedef struct hsa_region_s { + /** + * Opaque handle. Two handles reference the same object of the enclosing type + * if and only if they are equal. + */ + uint64_t handle; +} hsa_region_t; + +/** @} */ + + +/** \defgroup queue Queues + * @{ + */ + +/** + * @brief Queue type. Intended to be used for dynamic queue protocol + * determination. + */ +typedef enum { + /** + * Queue supports multiple producers. Use of multiproducer queue mechanics is + * required. + */ + HSA_QUEUE_TYPE_MULTI = 0, + /** + * Queue only supports a single producer. In some scenarios, the application + * may want to limit the submission of AQL packets to a single agent. Queues + * that support a single producer may be more efficient than queues supporting + * multiple producers. Use of multiproducer queue mechanics is not supported. + */ + HSA_QUEUE_TYPE_SINGLE = 1, + /** + * Queue supports multiple producers and cooperative dispatches. Cooperative + * dispatches are able to use GWS synchronization. Queues of this type may be + * limited in number. The runtime may return the same queue to serve multiple + * ::hsa_queue_create calls when this type is given. Callers must inspect the + * returned queue to discover queue size. Queues of this type are reference + * counted and require a matching number of ::hsa_queue_destroy calls to + * release. Use of multiproducer queue mechanics is required. See + * ::HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES to query agent support for this + * type. + */ + HSA_QUEUE_TYPE_COOPERATIVE = 2 +} hsa_queue_type_t; + +/** + * @brief A fixed-size type used to represent ::hsa_queue_type_t constants. + */ +typedef uint32_t hsa_queue_type32_t; + +/** + * @brief Queue features. + */ +typedef enum { + /** + * Queue supports kernel dispatch packets. + */ + HSA_QUEUE_FEATURE_KERNEL_DISPATCH = 1, + + /** + * Queue supports agent dispatch packets. + */ + HSA_QUEUE_FEATURE_AGENT_DISPATCH = 2 +} hsa_queue_feature_t; + +/** + * @brief User mode queue. + * + * @details The queue structure is read-only and allocated by the HSA runtime, + * but agents can directly modify the contents of the buffer pointed by @a + * base_address, or use HSA runtime APIs to access the doorbell signal. + * + */ +typedef struct hsa_queue_s { + /** + * Queue type. + */ + hsa_queue_type32_t type; + + /** + * Queue features mask. This is a bit-field of ::hsa_queue_feature_t + * values. Applications should ignore any unknown set bits. + */ + uint32_t features; + +#ifdef HSA_LARGE_MODEL + void* base_address; +#elif defined HSA_LITTLE_ENDIAN + /** + * Starting address of the HSA runtime-allocated buffer used to store the AQL + * packets. Must be aligned to the size of an AQL packet. + */ + void* base_address; + /** + * Reserved. Must be 0. + */ + uint32_t reserved0; +#else + uint32_t reserved0; + void* base_address; +#endif + + /** + * Signal object used by the application to indicate the ID of a packet that + * is ready to be processed. The HSA runtime manages the doorbell signal. If + * the application tries to replace or destroy this signal, the behavior is + * undefined. + * + * If @a type is ::HSA_QUEUE_TYPE_SINGLE, the doorbell signal value must be + * updated in a monotonically increasing fashion. If @a type is + * ::HSA_QUEUE_TYPE_MULTI, the doorbell signal value can be updated with any + * value. + */ + hsa_signal_t doorbell_signal; + + /** + * Maximum number of packets the queue can hold. Must be a power of 2. + */ + uint32_t size; + /** + * Reserved. Must be 0. + */ + uint32_t reserved1; + /** + * Queue identifier, which is unique over the lifetime of the application. + */ + uint64_t id; + +} hsa_queue_t; + +/** + * @brief Create a user mode queue. + * + * @details The HSA runtime creates the queue structure, the underlying packet + * buffer, the completion signal, and the write and read indexes. The initial + * value of the write and read indexes is 0. The type of every packet in the + * buffer is initialized to ::HSA_PACKET_TYPE_INVALID. + * + * The application should only rely on the error code returned to determine if + * the queue is valid. + * + * @param[in] agent Agent where to create the queue. + * + * @param[in] size Number of packets the queue is expected to + * hold. Must be a power of 2 between 1 and the value of + * ::HSA_AGENT_INFO_QUEUE_MAX_SIZE in @p agent. The size of the newly + * created queue is the maximum of @p size and the value of + * ::HSA_AGENT_INFO_QUEUE_MIN_SIZE in @p agent. + * + * @param[in] type Type of the queue, a bitwise OR of hsa_queue_type_t values. + * If the value of ::HSA_AGENT_INFO_QUEUE_TYPE in @p agent is ::HSA_QUEUE_TYPE_SINGLE, + * then @p type must also be ::HSA_QUEUE_TYPE_SINGLE. + * + * @param[in] callback Callback invoked by the HSA runtime for every + * asynchronous event related to the newly created queue. May be NULL. The HSA + * runtime passes three arguments to the callback: a code identifying the event + * that triggered the invocation, a pointer to the queue where the event + * originated, and the application data. + * + * @param[in] data Application data that is passed to @p callback on every + * iteration. May be NULL. + * + * @param[in] private_segment_size Hint indicating the maximum + * expected private segment usage per work-item, in bytes. There may + * be performance degradation if the application places a kernel + * dispatch packet in the queue and the corresponding private segment + * usage exceeds @p private_segment_size. If the application does not + * want to specify any particular value for this argument, @p + * private_segment_size must be UINT32_MAX. If the queue does not + * support kernel dispatch packets, this argument is ignored. + * + * @param[in] group_segment_size Hint indicating the maximum expected + * group segment usage per work-group, in bytes. There may be + * performance degradation if the application places a kernel dispatch + * packet in the queue and the corresponding group segment usage + * exceeds @p group_segment_size. If the application does not want to + * specify any particular value for this argument, @p + * group_segment_size must be UINT32_MAX. If the queue does not + * support kernel dispatch packets, this argument is ignored. + * + * @param[out] queue Memory location where the HSA runtime stores a pointer to + * the newly created queue. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate + * the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE_CREATION @p agent does not + * support queues of the given type. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is not a power of two, + * @p size is 0, @p type is an invalid queue type, or @p queue is NULL. + * + */ +hsa_status_t HSA_API hsa_queue_create( + hsa_agent_t agent, + uint32_t size, + hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), + void *data, + uint32_t private_segment_size, + uint32_t group_segment_size, + hsa_queue_t **queue); + +/** + * @brief Create a queue for which the application or a kernel is responsible + * for processing the AQL packets. + * + * @details The application can use this function to create queues where AQL + * packets are not parsed by the packet processor associated with an agent, + * but rather by a unit of execution running on that agent (for example, a + * thread in the host application). + * + * The application is responsible for ensuring that all the producers and + * consumers of the resulting queue can access the provided doorbell signal + * and memory region. The application is also responsible for ensuring that the + * unit of execution processing the queue packets supports the indicated + * features (AQL packet types). + * + * When the queue is created, the HSA runtime allocates the packet buffer using + * @p region, and the write and read indexes. The initial value of the write and + * read indexes is 0, and the type of every packet in the buffer is initialized + * to ::HSA_PACKET_TYPE_INVALID. The value of the @e size, @e type, @e features, + * and @e doorbell_signal fields in the returned queue match the values passed + * by the application. + * + * @param[in] region Memory region that the HSA runtime should use to allocate + * the AQL packet buffer and any other queue metadata. + * + * @param[in] size Number of packets the queue is expected to hold. Must be a + * power of 2 greater than 0. + * + * @param[in] type Queue type. + * + * @param[in] features Supported queue features. This is a bit-field of + * ::hsa_queue_feature_t values. + * + * @param[in] doorbell_signal Doorbell signal that the HSA runtime must + * associate with the returned queue. The signal handle must not be 0. + * + * @param[out] queue Memory location where the HSA runtime stores a pointer to + * the newly created queue. The application should not rely on the value + * returned for this argument but only in the status code to determine if the + * queue is valid. Must not be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate + * the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is not a power of two, @p + * size is 0, @p type is an invalid queue type, the doorbell signal handle is + * 0, or @p queue is NULL. + * + */ +hsa_status_t HSA_API hsa_soft_queue_create( + hsa_region_t region, + uint32_t size, + hsa_queue_type32_t type, + uint32_t features, + hsa_signal_t doorbell_signal, + hsa_queue_t **queue); + +/** + * @brief Destroy a user mode queue. + * + * @details When a queue is destroyed, the state of the AQL packets that have + * not been yet fully processed (their completion phase has not finished) + * becomes undefined. It is the responsibility of the application to ensure that + * all pending queue operations are finished if their results are required. + * + * The resources allocated by the HSA runtime during queue creation (queue + * structure, ring buffer, doorbell signal) are released. The queue should not + * be accessed after being destroyed. + * + * @param[in] queue Pointer to a queue created using ::hsa_queue_create. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL. + */ +hsa_status_t HSA_API hsa_queue_destroy( + hsa_queue_t *queue); + +/** + * @brief Inactivate a queue. + * + * @details Inactivating the queue aborts any pending executions and prevent any + * new packets from being processed. Any more packets written to the queue once + * it is inactivated will be ignored by the packet processor. + * + * @param[in] queue Pointer to a queue. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL. + */ +hsa_status_t HSA_API hsa_queue_inactivate( + hsa_queue_t *queue); + +/** + * @deprecated Renamed as ::hsa_queue_load_read_index_scacquire. + * + * @copydoc hsa_queue_load_read_index_scacquire + */ +uint64_t HSA_API HSA_DEPRECATED hsa_queue_load_read_index_acquire( + const hsa_queue_t *queue); + +/** + * @brief Atomically load the read index of a queue. + * + * @param[in] queue Pointer to a queue. + * + * @return Read index of the queue pointed by @p queue. + */ +uint64_t HSA_API hsa_queue_load_read_index_scacquire( + const hsa_queue_t *queue); + +/** + * @copydoc hsa_queue_load_read_index_scacquire + */ +uint64_t HSA_API hsa_queue_load_read_index_relaxed( + const hsa_queue_t *queue); + +/** + * @deprecated Renamed as ::hsa_queue_load_write_index_scacquire. + * + * @copydoc hsa_queue_load_write_index_scacquire + */ +uint64_t HSA_API HSA_DEPRECATED hsa_queue_load_write_index_acquire( + const hsa_queue_t *queue); + +/** + * @brief Atomically load the write index of a queue. + * + * @param[in] queue Pointer to a queue. + * + * @return Write index of the queue pointed by @p queue. + */ +uint64_t HSA_API hsa_queue_load_write_index_scacquire( + const hsa_queue_t *queue); + +/** + * @copydoc hsa_queue_load_write_index_scacquire + */ +uint64_t HSA_API hsa_queue_load_write_index_relaxed( + const hsa_queue_t *queue); + +/** + * @brief Atomically set the write index of a queue. + * + * @details It is recommended that the application uses this function to update + * the write index when there is a single agent submitting work to the queue + * (the queue type is ::HSA_QUEUE_TYPE_SINGLE). + * + * @param[in] queue Pointer to a queue. + * + * @param[in] value Value to assign to the write index. + * + */ +void HSA_API hsa_queue_store_write_index_relaxed( + const hsa_queue_t *queue, + uint64_t value); + +/** + * @deprecated Renamed as ::hsa_queue_store_write_index_screlease. + * + * @copydoc hsa_queue_store_write_index_screlease + */ +void HSA_API HSA_DEPRECATED hsa_queue_store_write_index_release( + const hsa_queue_t *queue, + uint64_t value); + +/** + * @copydoc hsa_queue_store_write_index_relaxed + */ +void HSA_API hsa_queue_store_write_index_screlease( + const hsa_queue_t *queue, + uint64_t value); + +/** + * @deprecated Renamed as ::hsa_queue_cas_write_index_scacq_screl. + * + * @copydoc hsa_queue_cas_write_index_scacq_screl + */ +uint64_t HSA_API HSA_DEPRECATED hsa_queue_cas_write_index_acq_rel( + const hsa_queue_t *queue, + uint64_t expected, + uint64_t value); + +/** + * @brief Atomically set the write index of a queue if the observed value is + * equal to the expected value. The application can inspect the returned value + * to determine if the replacement was done. + * + * @param[in] queue Pointer to a queue. + * + * @param[in] expected Expected value. + * + * @param[in] value Value to assign to the write index if @p expected matches + * the observed write index. Must be greater than @p expected. + * + * @return Previous value of the write index. + */ +uint64_t HSA_API hsa_queue_cas_write_index_scacq_screl( + const hsa_queue_t *queue, + uint64_t expected, + uint64_t value); + +/** + * @deprecated Renamed as ::hsa_queue_cas_write_index_scacquire. + * + * @copydoc hsa_queue_cas_write_index_scacquire + */ +uint64_t HSA_API HSA_DEPRECATED hsa_queue_cas_write_index_acquire( + const hsa_queue_t *queue, + uint64_t expected, + uint64_t value); + +/** + * @copydoc hsa_queue_cas_write_index_scacq_screl + */ +uint64_t HSA_API hsa_queue_cas_write_index_scacquire( + const hsa_queue_t *queue, + uint64_t expected, + uint64_t value); + +/** + * @copydoc hsa_queue_cas_write_index_scacq_screl + */ +uint64_t HSA_API hsa_queue_cas_write_index_relaxed( + const hsa_queue_t *queue, + uint64_t expected, + uint64_t value); + +/** + * @deprecated Renamed as ::hsa_queue_cas_write_index_screlease. + * + * @copydoc hsa_queue_cas_write_index_screlease + */ +uint64_t HSA_API HSA_DEPRECATED hsa_queue_cas_write_index_release( + const hsa_queue_t *queue, + uint64_t expected, + uint64_t value); + +/** + * @copydoc hsa_queue_cas_write_index_scacq_screl + */ +uint64_t HSA_API hsa_queue_cas_write_index_screlease( + const hsa_queue_t *queue, + uint64_t expected, + uint64_t value); + +/** + * @deprecated Renamed as ::hsa_queue_add_write_index_scacq_screl. + * + * @copydoc hsa_queue_add_write_index_scacq_screl + */ +uint64_t HSA_API HSA_DEPRECATED hsa_queue_add_write_index_acq_rel( + const hsa_queue_t *queue, + uint64_t value); + +/** + * @brief Atomically increment the write index of a queue by an offset. + * + * @param[in] queue Pointer to a queue. + * + * @param[in] value Value to add to the write index. + * + * @return Previous value of the write index. + */ +uint64_t HSA_API hsa_queue_add_write_index_scacq_screl( + const hsa_queue_t *queue, + uint64_t value); + +/** + * @deprecated Renamed as ::hsa_queue_add_write_index_scacquire. + * + * @copydoc hsa_queue_add_write_index_scacquire + */ +uint64_t HSA_API HSA_DEPRECATED hsa_queue_add_write_index_acquire( + const hsa_queue_t *queue, + uint64_t value); + +/** + * @copydoc hsa_queue_add_write_index_scacq_screl + */ +uint64_t HSA_API hsa_queue_add_write_index_scacquire( + const hsa_queue_t *queue, + uint64_t value); + +/** + * @copydoc hsa_queue_add_write_index_scacq_screl + */ +uint64_t HSA_API hsa_queue_add_write_index_relaxed( + const hsa_queue_t *queue, + uint64_t value); + +/** + * @deprecated Renamed as ::hsa_queue_add_write_index_screlease. + * + * @copydoc hsa_queue_add_write_index_screlease + */ +uint64_t HSA_API HSA_DEPRECATED hsa_queue_add_write_index_release( + const hsa_queue_t *queue, + uint64_t value); + +/** + * @copydoc hsa_queue_add_write_index_scacq_screl + */ +uint64_t HSA_API hsa_queue_add_write_index_screlease( + const hsa_queue_t *queue, + uint64_t value); + +/** + * @brief Atomically set the read index of a queue. + * + * @details Modifications of the read index are not allowed and result in + * undefined behavior if the queue is associated with an agent for which + * only the corresponding packet processor is permitted to update the read + * index. + * + * @param[in] queue Pointer to a queue. + * + * @param[in] value Value to assign to the read index. + * + */ +void HSA_API hsa_queue_store_read_index_relaxed( + const hsa_queue_t *queue, + uint64_t value); + +/** + * @deprecated Renamed as ::hsa_queue_store_read_index_screlease. + * + * @copydoc hsa_queue_store_read_index_screlease + */ +void HSA_API HSA_DEPRECATED hsa_queue_store_read_index_release( + const hsa_queue_t *queue, + uint64_t value); + +/** + * @copydoc hsa_queue_store_read_index_relaxed + */ +void HSA_API hsa_queue_store_read_index_screlease( + const hsa_queue_t *queue, + uint64_t value); +/** @} */ + + +/** \defgroup aql Architected Queuing Language + * @{ + */ + +/** + * @brief Packet type. + */ +typedef enum { + /** + * Vendor-specific packet. + */ + HSA_PACKET_TYPE_VENDOR_SPECIFIC = 0, + /** + * The packet has been processed in the past, but has not been reassigned to + * the packet processor. A packet processor must not process a packet of this + * type. All queues support this packet type. + */ + HSA_PACKET_TYPE_INVALID = 1, + /** + * Packet used by agents for dispatching jobs to kernel agents. Not all + * queues support packets of this type (see ::hsa_queue_feature_t). + */ + HSA_PACKET_TYPE_KERNEL_DISPATCH = 2, + /** + * Packet used by agents to delay processing of subsequent packets, and to + * express complex dependencies between multiple packets. All queues support + * this packet type. + */ + HSA_PACKET_TYPE_BARRIER_AND = 3, + /** + * Packet used by agents for dispatching jobs to agents. Not all + * queues support packets of this type (see ::hsa_queue_feature_t). + */ + HSA_PACKET_TYPE_AGENT_DISPATCH = 4, + /** + * Packet used by agents to delay processing of subsequent packets, and to + * express complex dependencies between multiple packets. All queues support + * this packet type. + */ + HSA_PACKET_TYPE_BARRIER_OR = 5 +} hsa_packet_type_t; + +/** + * @brief Scope of the memory fence operation associated with a packet. + */ +typedef enum { + /** + * No scope (no fence is applied). The packet relies on external fences to + * ensure visibility of memory updates. + */ + HSA_FENCE_SCOPE_NONE = 0, + /** + * The fence is applied with agent scope for the global segment. + */ + HSA_FENCE_SCOPE_AGENT = 1, + /** + * The fence is applied across both agent and system scope for the global + * segment. + */ + HSA_FENCE_SCOPE_SYSTEM = 2 +} hsa_fence_scope_t; + +/** + * @brief Sub-fields of the @a header field that is present in any AQL + * packet. The offset (with respect to the address of @a header) of a sub-field + * is identical to its enumeration constant. The width of each sub-field is + * determined by the corresponding value in ::hsa_packet_header_width_t. The + * offset and the width are expressed in bits. + */ + typedef enum { + /** + * Packet type. The value of this sub-field must be one of + * ::hsa_packet_type_t. If the type is ::HSA_PACKET_TYPE_VENDOR_SPECIFIC, the + * packet layout is vendor-specific. + */ + HSA_PACKET_HEADER_TYPE = 0, + /** + * Barrier bit. If the barrier bit is set, the processing of the current + * packet only launches when all preceding packets (within the same queue) are + * complete. + */ + HSA_PACKET_HEADER_BARRIER = 8, + /** + * Acquire fence scope. The value of this sub-field determines the scope and + * type of the memory fence operation applied before the packet enters the + * active phase. An acquire fence ensures that any subsequent global segment + * or image loads by any unit of execution that belongs to a dispatch that has + * not yet entered the active phase on any queue of the same kernel agent, + * sees any data previously released at the scopes specified by the acquire + * fence. The value of this sub-field must be one of ::hsa_fence_scope_t. + */ + HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE = 9, + /** + * @deprecated Renamed as ::HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE. + */ + HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE = 9, + /** + * Release fence scope, The value of this sub-field determines the scope and + * type of the memory fence operation applied after kernel completion but + * before the packet is completed. A release fence makes any global segment or + * image data that was stored by any unit of execution that belonged to a + * dispatch that has completed the active phase on any queue of the same + * kernel agent visible in all the scopes specified by the release fence. The + * value of this sub-field must be one of ::hsa_fence_scope_t. + */ + HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE = 11, + /** + * @deprecated Renamed as ::HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE. + */ + HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE = 11 + } hsa_packet_header_t; + +/** + * @brief Width (in bits) of the sub-fields in ::hsa_packet_header_t. + */ + typedef enum { + HSA_PACKET_HEADER_WIDTH_TYPE = 8, + HSA_PACKET_HEADER_WIDTH_BARRIER = 1, + HSA_PACKET_HEADER_WIDTH_SCACQUIRE_FENCE_SCOPE = 2, + /** + * @deprecated Use HSA_PACKET_HEADER_WIDTH_SCACQUIRE_FENCE_SCOPE. + */ + HSA_PACKET_HEADER_WIDTH_ACQUIRE_FENCE_SCOPE = 2, + HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE = 2, + /** + * @deprecated Use HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE. + */ + HSA_PACKET_HEADER_WIDTH_RELEASE_FENCE_SCOPE = 2 + } hsa_packet_header_width_t; + +/** + * @brief Sub-fields of the kernel dispatch packet @a setup field. The offset + * (with respect to the address of @a setup) of a sub-field is identical to its + * enumeration constant. The width of each sub-field is determined by the + * corresponding value in ::hsa_kernel_dispatch_packet_setup_width_t. The + * offset and the width are expressed in bits. + */ + typedef enum { + /** + * Number of dimensions of the grid. Valid values are 1, 2, or 3. + * + */ + HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS = 0 + } hsa_kernel_dispatch_packet_setup_t; + +/** + * @brief Width (in bits) of the sub-fields in + * ::hsa_kernel_dispatch_packet_setup_t. + */ + typedef enum { + HSA_KERNEL_DISPATCH_PACKET_SETUP_WIDTH_DIMENSIONS = 2 + } hsa_kernel_dispatch_packet_setup_width_t; + +/** + * @brief AQL kernel dispatch packet + */ +typedef struct hsa_kernel_dispatch_packet_s { + union { + struct { + /** + * Packet header. Used to configure multiple packet parameters such as the + * packet type. The parameters are described by ::hsa_packet_header_t. + */ + uint16_t header; + + /** + * Dispatch setup parameters. Used to configure kernel dispatch parameters + * such as the number of dimensions in the grid. The parameters are described + * by ::hsa_kernel_dispatch_packet_setup_t. + */ + uint16_t setup; + }; + uint32_t full_header; + }; + + /** + * X dimension of work-group, in work-items. Must be greater than 0. + */ + uint16_t workgroup_size_x; + + /** + * Y dimension of work-group, in work-items. Must be greater than + * 0. If the grid has 1 dimension, the only valid value is 1. + */ + uint16_t workgroup_size_y; + + /** + * Z dimension of work-group, in work-items. Must be greater than + * 0. If the grid has 1 or 2 dimensions, the only valid value is 1. + */ + uint16_t workgroup_size_z; + + /** + * Reserved. Must be 0. + */ + uint16_t reserved0; + + /** + * X dimension of grid, in work-items. Must be greater than 0. Must + * not be smaller than @a workgroup_size_x. + */ + uint32_t grid_size_x; + + /** + * Y dimension of grid, in work-items. Must be greater than 0. If the grid has + * 1 dimension, the only valid value is 1. Must not be smaller than @a + * workgroup_size_y. + */ + uint32_t grid_size_y; + + /** + * Z dimension of grid, in work-items. Must be greater than 0. If the grid has + * 1 or 2 dimensions, the only valid value is 1. Must not be smaller than @a + * workgroup_size_z. + */ + uint32_t grid_size_z; + + /** + * Size in bytes of private memory allocation request (per work-item). + */ + uint32_t private_segment_size; + + /** + * Size in bytes of group memory allocation request (per work-group). Must not + * be less than the sum of the group memory used by the kernel (and the + * functions it calls directly or indirectly) and the dynamically allocated + * group segment variables. + */ + uint32_t group_segment_size; + + /** + * Opaque handle to a code object that includes an implementation-defined + * executable code for the kernel. + */ + uint64_t kernel_object; + +#ifdef HSA_LARGE_MODEL + void* kernarg_address; +#elif defined HSA_LITTLE_ENDIAN + /** + * Pointer to a buffer containing the kernel arguments. May be NULL. + * + * The buffer must be allocated using ::hsa_memory_allocate, and must not be + * modified once the kernel dispatch packet is enqueued until the dispatch has + * completed execution. + */ + void* kernarg_address; + /** + * Reserved. Must be 0. + */ + uint32_t reserved1; +#else + uint32_t reserved1; + void* kernarg_address; +#endif + + /** + * Reserved. Must be 0. + */ + uint64_t reserved2; + + /** + * Signal used to indicate completion of the job. The application can use the + * special signal handle 0 to indicate that no signal is used. + */ + hsa_signal_t completion_signal; + +} hsa_kernel_dispatch_packet_t; + +/** + * @brief Agent dispatch packet. + */ +typedef struct hsa_agent_dispatch_packet_s { + /** + * Packet header. Used to configure multiple packet parameters such as the + * packet type. The parameters are described by ::hsa_packet_header_t. + */ + uint16_t header; + + /** + * Application-defined function to be performed by the destination agent. + */ + uint16_t type; + + /** + * Reserved. Must be 0. + */ + uint32_t reserved0; + +#ifdef HSA_LARGE_MODEL + void* return_address; +#elif defined HSA_LITTLE_ENDIAN + /** + * Address where to store the function return values, if any. + */ + void* return_address; + /** + * Reserved. Must be 0. + */ + uint32_t reserved1; +#else + uint32_t reserved1; + void* return_address; +#endif + + /** + * Function arguments. + */ + uint64_t arg[4]; + + /** + * Reserved. Must be 0. + */ + uint64_t reserved2; + + /** + * Signal used to indicate completion of the job. The application can use the + * special signal handle 0 to indicate that no signal is used. + */ + hsa_signal_t completion_signal; + +} hsa_agent_dispatch_packet_t; + +/** + * @brief Barrier-AND packet. + */ +typedef struct hsa_barrier_and_packet_s { + /** + * Packet header. Used to configure multiple packet parameters such as the + * packet type. The parameters are described by ::hsa_packet_header_t. + */ + uint16_t header; + + /** + * Reserved. Must be 0. + */ + uint16_t reserved0; + + /** + * Reserved. Must be 0. + */ + uint32_t reserved1; + + /** + * Array of dependent signal objects. Signals with a handle value of 0 are + * allowed and are interpreted by the packet processor as satisfied + * dependencies. + */ + hsa_signal_t dep_signal[5]; + + /** + * Reserved. Must be 0. + */ + uint64_t reserved2; + + /** + * Signal used to indicate completion of the job. The application can use the + * special signal handle 0 to indicate that no signal is used. + */ + hsa_signal_t completion_signal; + +} hsa_barrier_and_packet_t; + +/** + * @brief Barrier-OR packet. + */ +typedef struct hsa_barrier_or_packet_s { + /** + * Packet header. Used to configure multiple packet parameters such as the + * packet type. The parameters are described by ::hsa_packet_header_t. + */ + uint16_t header; + + /** + * Reserved. Must be 0. + */ + uint16_t reserved0; + + /** + * Reserved. Must be 0. + */ + uint32_t reserved1; + + /** + * Array of dependent signal objects. Signals with a handle value of 0 are + * allowed and are interpreted by the packet processor as dependencies not + * satisfied. + */ + hsa_signal_t dep_signal[5]; + + /** + * Reserved. Must be 0. + */ + uint64_t reserved2; + + /** + * Signal used to indicate completion of the job. The application can use the + * special signal handle 0 to indicate that no signal is used. + */ + hsa_signal_t completion_signal; + +} hsa_barrier_or_packet_t; + +/** @} */ + +/** \addtogroup memory Memory + * @{ + */ + +/** + * @brief Memory segments associated with a region. + */ +typedef enum { + /** + * Global segment. Used to hold data that is shared by all agents. + */ + HSA_REGION_SEGMENT_GLOBAL = 0, + /** + * Read-only segment. Used to hold data that remains constant during the + * execution of a kernel. + */ + HSA_REGION_SEGMENT_READONLY = 1, + /** + * Private segment. Used to hold data that is local to a single work-item. + */ + HSA_REGION_SEGMENT_PRIVATE = 2, + /** + * Group segment. Used to hold data that is shared by the work-items of a + * work-group. + */ + HSA_REGION_SEGMENT_GROUP = 3, + /** + * Kernarg segment. Used to store kernel arguments. + */ + HSA_REGION_SEGMENT_KERNARG = 4 +} hsa_region_segment_t; + +/** + * @brief Global region flags. + */ +typedef enum { + /** + * The application can use memory in the region to store kernel arguments, and + * provide the values for the kernarg segment of a kernel dispatch. If this + * flag is set, then ::HSA_REGION_GLOBAL_FLAG_FINE_GRAINED must be set. + */ + HSA_REGION_GLOBAL_FLAG_KERNARG = 1, + /** + * Updates to memory in this region are immediately visible to all the + * agents under the terms of the HSA memory model. If this + * flag is set, then ::HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED must not be set. + */ + HSA_REGION_GLOBAL_FLAG_FINE_GRAINED = 2, + /** + * Updates to memory in this region can be performed by a single agent at + * a time. If a different agent in the system is allowed to access the + * region, the application must explicitely invoke ::hsa_memory_assign_agent + * in order to transfer ownership to that agent for a particular buffer. + */ + HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED = 4, + + /** + * Updates to memory in this region have extended scope, where the device-scope atomics + * to this memory type act as system-scope with respect to all variables located in + * memory regions of this type. + * Note: On non-compliant systems, the application may still be responsible for performing + * device-specific actions necessary to achieve system-scope coherence. + */ + HSA_REGION_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED = 8 +} hsa_region_global_flag_t; + +/** + * @brief Attributes of a memory region. + */ + +#ifdef __cplusplus +typedef enum : int { +#else +typedef enum { +#endif + /** + * Segment where memory in the region can be used. The type of this + * attribute is ::hsa_region_segment_t. + */ + HSA_REGION_INFO_SEGMENT = 0, + /** + * Flag mask. The value of this attribute is undefined if the value of + * ::HSA_REGION_INFO_SEGMENT is not ::HSA_REGION_SEGMENT_GLOBAL. The type of + * this attribute is uint32_t, a bit-field of ::hsa_region_global_flag_t + * values. + */ + HSA_REGION_INFO_GLOBAL_FLAGS = 1, + /** + * Size of this region, in bytes. The type of this attribute is size_t. + */ + HSA_REGION_INFO_SIZE = 2, + /** + * Maximum allocation size in this region, in bytes. Must not exceed the value + * of ::HSA_REGION_INFO_SIZE. The type of this attribute is size_t. + * + * If the region is in the global or readonly segments, this is the maximum + * size that the application can pass to ::hsa_memory_allocate. + * + * If the region is in the group segment, this is the maximum size (per + * work-group) that can be requested for a given kernel dispatch. If the + * region is in the private segment, this is the maximum size (per work-item) + * that can be requested for a specific kernel dispatch, and must be at least + * 256 bytes. + */ + HSA_REGION_INFO_ALLOC_MAX_SIZE = 4, + /** + * Maximum size (per work-group) of private memory that can be requested for a + * specific kernel dispatch. Must be at least 65536 bytes. The type of this + * attribute is uint32_t. The value of this attribute is undefined if the + * region is not in the private segment. + */ + HSA_REGION_INFO_ALLOC_MAX_PRIVATE_WORKGROUP_SIZE = 8, + /** + * Indicates whether memory in this region can be allocated using + * ::hsa_memory_allocate. The type of this attribute is bool. + * + * The value of this flag is always false for regions in the group and private + * segments. + */ + HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED = 5, + /** + * Allocation granularity of buffers allocated by ::hsa_memory_allocate in + * this region. The size of a buffer allocated in this region is a multiple of + * the value of this attribute. The value of this attribute is only defined if + * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED is true for this region. The type + * of this attribute is size_t. + */ + HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE = 6, + /** + * Alignment of buffers allocated by ::hsa_memory_allocate in this region. The + * value of this attribute is only defined if + * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED is true for this region, and must be + * a power of 2. The type of this attribute is size_t. + */ + HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT = 7 +} hsa_region_info_t; + +/** + * @brief Get the current value of an attribute of a region. + * + * @param[in] region A valid region. + * + * @param[in] attribute Attribute to query. + * + * @param[out] value Pointer to a application-allocated buffer where to store + * the value of the attribute. If the buffer passed by the application is not + * large enough to hold the value of @p attribute, the behavior is undefined. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_REGION The region is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid + * region attribute, or @p value is NULL. + */ +hsa_status_t HSA_API hsa_region_get_info( + hsa_region_t region, + hsa_region_info_t attribute, + void* value); + +/** + * @brief Iterate over the memory regions associated with a given agent, and + * invoke an application-defined callback on every iteration. + * + * @param[in] agent A valid agent. + * + * @param[in] callback Callback to be invoked once per region that is + * accessible from the agent. The HSA runtime passes two arguments to the + * callback, the region and the application data. If @p callback returns a + * status other than ::HSA_STATUS_SUCCESS for a particular iteration, the + * traversal stops and ::hsa_agent_iterate_regions returns that status value. + * + * @param[in] data Application data that is passed to @p callback on every + * iteration. May be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL. + */ +hsa_status_t HSA_API hsa_agent_iterate_regions( + hsa_agent_t agent, + hsa_status_t (*callback)(hsa_region_t region, void* data), + void* data); + +/** + * @brief Allocate a block of memory in a given region. + * + * @param[in] region Region where to allocate memory from. The region must have + * the ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED flag set. + * + * @param[in] size Allocation size, in bytes. Must not be zero. This value is + * rounded up to the nearest multiple of ::HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE + * in @p region. + * + * @param[out] ptr Pointer to the location where to store the base address of + * the allocated block. The returned base address is aligned to the value of + * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT in @p region. If the allocation + * fails, the returned value is undefined. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate + * the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_REGION The region is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The host is not allowed to + * allocate memory in @p region, or @p size is greater than the value of + * HSA_REGION_INFO_ALLOC_MAX_SIZE in @p region. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0. + */ +hsa_status_t HSA_API hsa_memory_allocate(hsa_region_t region, + size_t size, + void** ptr); + +/** + * @brief Deallocate a block of memory previously allocated using + * ::hsa_memory_allocate. + * + * @param[in] ptr Pointer to a memory block. If @p ptr does not match a value + * previously returned by ::hsa_memory_allocate, the behavior is undefined. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + */ +hsa_status_t HSA_API hsa_memory_free(void* ptr); + +/** + * @brief Copy a block of memory from the location pointed to by @p src to the + * memory block pointed to by @p dst. + * + * @param[out] dst Buffer where the content is to be copied. If @p dst is in + * coarse-grained memory, the copied data is only visible to the agent currently + * assigned (::hsa_memory_assign_agent) to @p dst. + * + * @param[in] src A valid pointer to the source of data to be copied. The source + * buffer must not overlap with the destination buffer. If the source buffer is + * in coarse-grained memory then it must be assigned to an agent, from which the + * data will be retrieved. + * + * @param[in] size Number of bytes to copy. If @p size is 0, no copy is + * performed and the function returns success. Copying a number of bytes larger + * than the size of the buffers pointed by @p dst or @p src results in undefined + * behavior. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The source or destination + * pointers are NULL. + */ +hsa_status_t HSA_API hsa_memory_copy( + void *dst, + const void *src, + size_t size); + +/** + * @brief Change the ownership of a global, coarse-grained buffer. + * + * @details The contents of a coarse-grained buffer are visible to an agent + * only after ownership has been explicitely transferred to that agent. Once the + * operation completes, the previous owner cannot longer access the data in the + * buffer. + * + * An implementation of the HSA runtime is allowed, but not required, to change + * the physical location of the buffer when ownership is transferred to a + * different agent. In general the application must not assume this + * behavior. The virtual location (address) of the passed buffer is never + * modified. + * + * @param[in] ptr Base address of a global buffer. The pointer must match an + * address previously returned by ::hsa_memory_allocate. The size of the buffer + * affected by the ownership change is identical to the size of that previous + * allocation. If @p ptr points to a fine-grained global buffer, no operation is + * performed and the function returns success. If @p ptr does not point to + * global memory, the behavior is undefined. + * + * @param[in] agent Agent that becomes the owner of the buffer. The + * application is responsible for ensuring that @p agent has access to the + * region that contains the buffer. It is allowed to change ownership to an + * agent that is already the owner of the buffer, with the same or different + * access permissions. + * + * @param[in] access Access permissions requested for the new owner. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate + * the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p access is + * not a valid access value. + */ +hsa_status_t HSA_API hsa_memory_assign_agent( + void *ptr, + hsa_agent_t agent, + hsa_access_permission_t access); + +/** + * + * @brief Register a global, fine-grained buffer. + * + * @details Registering a buffer serves as an indication to the HSA runtime that + * the memory might be accessed from a kernel agent other than the + * host. Registration is a performance hint that allows the HSA runtime + * implementation to know which buffers will be accessed by some of the kernel + * agents ahead of time. + * + * Registration is only recommended for buffers in the global segment that have + * not been allocated using the HSA allocator (::hsa_memory_allocate), but an OS + * allocator instead. Registering an OS-allocated buffer in the base profile is + * equivalent to a no-op. + * + * Registrations should not overlap. + * + * @param[in] ptr A buffer in global, fine-grained memory. If a NULL pointer is + * passed, no operation is performed. If the buffer has been allocated using + * ::hsa_memory_allocate, or has already been registered, no operation is + * performed. + * + * @param[in] size Requested registration size in bytes. A size of 0 is + * only allowed if @p ptr is NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate + * the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is 0 but @p ptr + * is not NULL. + */ +hsa_status_t HSA_API hsa_memory_register( + void *ptr, + size_t size); + +/** + * + * @brief Deregister memory previously registered using ::hsa_memory_register. + * + * @details If the memory interval being deregistered does not match a previous + * registration (start and end addresses), the behavior is undefined. + * + * @param[in] ptr A pointer to the base of the buffer to be deregistered. If + * a NULL pointer is passed, no operation is performed. + * + * @param[in] size Size of the buffer to be deregistered. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + */ +hsa_status_t HSA_API hsa_memory_deregister( + void *ptr, + size_t size); + +/** @} */ + + +/** \defgroup instruction-set-architecture Instruction Set Architecture. + * @{ + */ + +/** + * @brief Instruction set architecture. + */ +typedef struct hsa_isa_s { + /** + * Opaque handle. Two handles reference the same object of the enclosing type + * if and only if they are equal. + */ + uint64_t handle; +} hsa_isa_t; + +/** + * @brief Retrieve a reference to an instruction set architecture handle out of + * a symbolic name. + * + * @param[in] name Vendor-specific name associated with a a particular + * instruction set architecture. @p name must start with the vendor name and a + * colon (for example, "AMD:"). The rest of the name is vendor-specific. Must be + * a NUL-terminated string. + * + * @param[out] isa Memory location where the HSA runtime stores the ISA handle + * corresponding to the given name. Must not be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ISA_NAME The given name does not + * correspond to any instruction set architecture. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to + * allocate the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p name is NULL, or @p isa is + * NULL. + */ +hsa_status_t HSA_API hsa_isa_from_name( + const char *name, + hsa_isa_t *isa); + +/** + * @brief Iterate over the instruction sets supported by the given agent, and + * invoke an application-defined callback on every iteration. The iterator is + * deterministic: if an agent supports several instruction set architectures, + * they are traversed in the same order in every invocation of this function. + * + * @param[in] agent A valid agent. + * + * @param[in] callback Callback to be invoked once per instruction set + * architecture. The HSA runtime passes two arguments to the callback: the + * ISA and the application data. If @p callback returns a status other than + * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and + * that status value is returned. + * + * @param[in] data Application data that is passed to @p callback on every + * iteration. May be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL. + */ +hsa_status_t HSA_API hsa_agent_iterate_isas( + hsa_agent_t agent, + hsa_status_t (*callback)(hsa_isa_t isa, void *data), + void *data); + +/** + * @brief Instruction set architecture attributes. + */ +typedef enum { + /** + * The length of the ISA name in bytes, not including the NUL terminator. The + * type of this attribute is uint32_t. + */ + HSA_ISA_INFO_NAME_LENGTH = 0, + /** + * Human-readable description. The type of this attribute is character array + * with the length equal to the value of ::HSA_ISA_INFO_NAME_LENGTH attribute. + */ + HSA_ISA_INFO_NAME = 1, + /** + * @deprecated + * + * Number of call conventions supported by the instruction set architecture. + * Must be greater than zero. The type of this attribute is uint32_t. + */ + HSA_ISA_INFO_CALL_CONVENTION_COUNT = 2, + /** + * @deprecated + * + * Number of work-items in a wavefront for a given call convention. Must be a + * power of 2 in the range [1,256]. The type of this attribute is uint32_t. + */ + HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONT_SIZE = 3, + /** + * @deprecated + * + * Number of wavefronts per compute unit for a given call convention. In + * practice, other factors (for example, the amount of group memory used by a + * work-group) may further limit the number of wavefronts per compute + * unit. The type of this attribute is uint32_t. + */ + HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONTS_PER_COMPUTE_UNIT = 4, + /** + * Machine models supported by the instruction set architecture. The type of + * this attribute is a bool[2]. If the ISA supports the small machine model, + * the element at index ::HSA_MACHINE_MODEL_SMALL is true. If the ISA supports + * the large model, the element at index ::HSA_MACHINE_MODEL_LARGE is true. + */ + HSA_ISA_INFO_MACHINE_MODELS = 5, + /** + * Profiles supported by the instruction set architecture. The type of this + * attribute is a bool[2]. If the ISA supports the base profile, the element + * at index ::HSA_PROFILE_BASE is true. If the ISA supports the full profile, + * the element at index ::HSA_PROFILE_FULL is true. + */ + HSA_ISA_INFO_PROFILES = 6, + /** + * Default floating-point rounding modes supported by the instruction set + * architecture. The type of this attribute is a bool[3]. The value at a given + * index is true if the corresponding rounding mode in + * ::hsa_default_float_rounding_mode_t is supported. At least one default mode + * has to be supported. + * + * If the default mode is supported, then + * ::HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES must report that + * both the zero and the near roundings modes are supported. + */ + HSA_ISA_INFO_DEFAULT_FLOAT_ROUNDING_MODES = 7, + /** + * Default floating-point rounding modes supported by the instruction set + * architecture in the Base profile. The type of this attribute is a + * bool[3]. The value at a given index is true if the corresponding rounding + * mode in ::hsa_default_float_rounding_mode_t is supported. The value at + * index HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT must be false. At least one + * of the values at indexes ::HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO or + * HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR must be true. + */ + HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES = 8, + /** + * Flag indicating that the f16 HSAIL operation is at least as fast as the + * f32 operation in the instruction set architecture. The type of this + * attribute is bool. + */ + HSA_ISA_INFO_FAST_F16_OPERATION = 9, + /** + * Maximum number of work-items of each dimension of a work-group. Each + * maximum must be greater than 0. No maximum can exceed the value of + * ::HSA_ISA_INFO_WORKGROUP_MAX_SIZE. The type of this attribute is + * uint16_t[3]. + */ + HSA_ISA_INFO_WORKGROUP_MAX_DIM = 12, + /** + * Maximum total number of work-items in a work-group. The type + * of this attribute is uint32_t. + */ + HSA_ISA_INFO_WORKGROUP_MAX_SIZE = 13, + /** + * Maximum number of work-items of each dimension of a grid. Each maximum must + * be greater than 0, and must not be smaller than the corresponding value in + * ::HSA_ISA_INFO_WORKGROUP_MAX_DIM. No maximum can exceed the value of + * ::HSA_ISA_INFO_GRID_MAX_SIZE. The type of this attribute is + * ::hsa_dim3_t. + */ + HSA_ISA_INFO_GRID_MAX_DIM = 14, + /** + * Maximum total number of work-items in a grid. The type of this + * attribute is uint64_t. + */ + HSA_ISA_INFO_GRID_MAX_SIZE = 16, + /** + * Maximum number of fbarriers per work-group. Must be at least 32. The + * type of this attribute is uint32_t. + */ + HSA_ISA_INFO_FBARRIER_MAX_SIZE = 17 +} hsa_isa_info_t; + +/** + * @deprecated The concept of call convention has been deprecated. If the + * application wants to query the value of an attribute for a given instruction + * set architecture, use ::hsa_isa_get_info_alt instead. If the application + * wants to query an attribute that is specific to a given combination of ISA + * and wavefront, use ::hsa_wavefront_get_info. + * + * @brief Get the current value of an attribute for a given instruction set + * architecture (ISA). + * + * @param[in] isa A valid instruction set architecture. + * + * @param[in] attribute Attribute to query. + * + * @param[in] index Call convention index. Used only for call convention + * attributes, otherwise ignored. Must have a value between 0 (inclusive) and + * the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT (not + * inclusive) in @p isa. + * + * @param[out] value Pointer to an application-allocated buffer where to store + * the value of the attribute. If the buffer passed by the application is not + * large enough to hold the value of @p attribute, the behavior is undefined. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is + * invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_INDEX The index is out of range. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid + * instruction set architecture attribute, or @p value is + * NULL. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_isa_get_info( + hsa_isa_t isa, + hsa_isa_info_t attribute, + uint32_t index, + void *value); + +/** + * @brief Get the current value of an attribute for a given instruction set + * architecture (ISA). + * + * @param[in] isa A valid instruction set architecture. + * + * @param[in] attribute Attribute to query. + * + * @param[out] value Pointer to an application-allocated buffer where to store + * the value of the attribute. If the buffer passed by the application is not + * large enough to hold the value of @p attribute, the behavior is undefined. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is + * invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid + * instruction set architecture attribute, or @p value is + * NULL. + */ +hsa_status_t HSA_API hsa_isa_get_info_alt( + hsa_isa_t isa, + hsa_isa_info_t attribute, + void *value); + +/** + * @brief Retrieve the exception policy support for a given combination of + * instruction set architecture and profile. + * + * @param[in] isa A valid instruction set architecture. + * + * @param[in] profile Profile. + * + * @param[out] mask Pointer to a memory location where the HSA runtime stores a + * mask of ::hsa_exception_policy_t values. Must not be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is + * invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is not a valid + * profile, or @p mask is NULL. + */ +hsa_status_t HSA_API hsa_isa_get_exception_policies( + hsa_isa_t isa, + hsa_profile_t profile, + uint16_t *mask); + +/** + * @brief Floating-point types. + */ +typedef enum { + /** + * 16-bit floating-point type. + */ + HSA_FP_TYPE_16 = 1, + /** + * 32-bit floating-point type. + */ + HSA_FP_TYPE_32 = 2, + /** + * 64-bit floating-point type. + */ + HSA_FP_TYPE_64 = 4 +} hsa_fp_type_t; + +/** + * @brief Flush to zero modes. + */ +typedef enum { + /** + * Flush to zero. + */ + HSA_FLUSH_MODE_FTZ = 1, + /** + * Do not flush to zero. + */ + HSA_FLUSH_MODE_NON_FTZ = 2 +} hsa_flush_mode_t; + +/** + * @brief Round methods. + */ +typedef enum { + /** + * Single round method. + */ + HSA_ROUND_METHOD_SINGLE = 1, + /** + * Double round method. + */ + HSA_ROUND_METHOD_DOUBLE = 2 +} hsa_round_method_t; + +/** + * @brief Retrieve the round method (single or double) used to implement the + * floating-point multiply add instruction (mad) for a given combination of + * instruction set architecture, floating-point type, and flush to zero + * modifier. + * + * @param[in] isa Instruction set architecture. + * + * @param[in] fp_type Floating-point type. + * + * @param[in] flush_mode Flush to zero modifier. + * + * @param[out] round_method Pointer to a memory location where the HSA + * runtime stores the round method used by the implementation. Must not be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is + * invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p fp_type is not a valid + * floating-point type, or @p flush_mode is not a valid flush to zero modifier, + * or @p round_method is NULL. + */ +hsa_status_t HSA_API hsa_isa_get_round_method( + hsa_isa_t isa, + hsa_fp_type_t fp_type, + hsa_flush_mode_t flush_mode, + hsa_round_method_t *round_method); + +/** + * @brief Wavefront handle + */ +typedef struct hsa_wavefront_s { + /** + * Opaque handle. Two handles reference the same object of the enclosing type + * if and only if they are equal. + */ + uint64_t handle; +} hsa_wavefront_t; + +/** + * @brief Wavefront attributes. + */ +typedef enum { + /** + * Number of work-items in the wavefront. Must be a power of 2 in the range + * [1,256]. The type of this attribute is uint32_t. + */ + HSA_WAVEFRONT_INFO_SIZE = 0 +} hsa_wavefront_info_t; + +/** + * @brief Get the current value of a wavefront attribute. + * + * @param[in] wavefront A wavefront. + * + * @param[in] attribute Attribute to query. + * + * @param[out] value Pointer to an application-allocated buffer where to store + * the value of the attribute. If the buffer passed by the application is not + * large enough to hold the value of @p attribute, the behavior is undefined. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_WAVEFRONT The wavefront is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid + * wavefront attribute, or @p value is NULL. + */ +hsa_status_t HSA_API hsa_wavefront_get_info( + hsa_wavefront_t wavefront, + hsa_wavefront_info_t attribute, + void *value); + +/** + * @brief Iterate over the different wavefronts supported by an instruction set + * architecture, and invoke an application-defined callback on every iteration. + * + * @param[in] isa Instruction set architecture. + * + * @param[in] callback Callback to be invoked once per wavefront that is + * supported by the agent. The HSA runtime passes two arguments to the callback: + * the wavefront handle and the application data. If @p callback returns a + * status other than ::HSA_STATUS_SUCCESS for a particular iteration, the + * traversal stops and that value is returned. + * + * @param[in] data Application data that is passed to @p callback on every + * iteration. May be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is + * invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL. + */ +hsa_status_t HSA_API hsa_isa_iterate_wavefronts( + hsa_isa_t isa, + hsa_status_t (*callback)(hsa_wavefront_t wavefront, void *data), + void *data); + +/** + * @deprecated Use ::hsa_agent_iterate_isas to query which instructions set + * architectures are supported by a given agent. + * + * @brief Check if the instruction set architecture of a code object can be + * executed on an agent associated with another architecture. + * + * @param[in] code_object_isa Instruction set architecture associated with a + * code object. + * + * @param[in] agent_isa Instruction set architecture associated with an agent. + * + * @param[out] result Pointer to a memory location where the HSA runtime stores + * the result of the check. If the two architectures are compatible, the result + * is true; if they are incompatible, the result is false. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ISA @p code_object_isa or @p agent_isa are + * invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_isa_compatible( + hsa_isa_t code_object_isa, + hsa_isa_t agent_isa, + bool *result); + +/** @} */ + + +/** \defgroup executable Executable + * @{ + */ + +/** + * @brief Code object reader handle. A code object reader is used to + * load a code object from file (when created using + * ::hsa_code_object_reader_create_from_file), or from memory (if created using + * ::hsa_code_object_reader_create_from_memory). + */ +typedef struct hsa_code_object_reader_s { + /** + * Opaque handle. Two handles reference the same object of the enclosing type + * if and only if they are equal. + */ + uint64_t handle; +} hsa_code_object_reader_t; + +/** + * @brief Create a code object reader to operate on a file. + * + * @param[in] file File descriptor. The file must have been opened by + * application with at least read permissions prior calling this function. The + * file must contain a vendor-specific code object. + * + * The file is owned and managed by the application; the lifetime of the file + * descriptor must exceed that of any associated code object reader. + * + * @param[out] code_object_reader Memory location to store the newly created + * code object reader handle. Must not be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_FILE @p file is invalid. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to + * allocate the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object_reader is NULL. + */ +hsa_status_t HSA_API hsa_code_object_reader_create_from_file( + hsa_file_t file, + hsa_code_object_reader_t *code_object_reader); + +/** + * @brief Create a code object reader to operate on memory. + * + * @param[in] code_object Memory buffer that contains a vendor-specific code + * object. The buffer is owned and managed by the application; the lifetime of + * the buffer must exceed that of any associated code object reader. + * + * @param[in] size Size of the buffer pointed to by @p code_object. Must not be + * 0. + * + * @param[out] code_object_reader Memory location to store newly created code + * object reader handle. Must not be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to + * allocate the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object is NULL, @p size + * is zero, or @p code_object_reader is NULL. + */ +hsa_status_t HSA_API hsa_code_object_reader_create_from_memory( + const void *code_object, + size_t size, + hsa_code_object_reader_t *code_object_reader); + +/** + * @brief Destroy a code object reader. + * + * @details The code object reader handle becomes invalid after completion of + * this function. Any file or memory used to create the code object read is not + * closed, removed, or deallocated by this function. + * + * @param[in] code_object_reader Code object reader to destroy. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER @p code_object_reader + * is invalid. + */ +hsa_status_t HSA_API hsa_code_object_reader_destroy( + hsa_code_object_reader_t code_object_reader); + +/** + * @brief Struct containing an opaque handle to an executable, which contains + * ISA for finalized kernels and indirect functions together with the allocated + * global or readonly segment variables they reference. + */ +typedef struct hsa_executable_s { + /** + * Opaque handle. Two handles reference the same object of the enclosing type + * if and only if they are equal. + */ + uint64_t handle; +} hsa_executable_t; + +/** + * @brief Executable state. + */ +typedef enum { + /** + * Executable state, which allows the user to load code objects and define + * external variables. Variable addresses, kernel code handles, and + * indirect function code handles are not available in query operations until + * the executable is frozen (zero always returned). + */ + HSA_EXECUTABLE_STATE_UNFROZEN = 0, + /** + * Executable state, which allows the user to query variable addresses, + * kernel code handles, and indirect function code handles using query + * operations. Loading new code objects, as well as defining external + * variables, is not allowed in this state. + */ + HSA_EXECUTABLE_STATE_FROZEN = 1 +} hsa_executable_state_t; + +/** + * @deprecated Use ::hsa_executable_create_alt instead, which allows the + * application to specify the default floating-point rounding mode of the + * executable and assumes an unfrozen initial state. + * + * @brief Create an empty executable. + * + * @param[in] profile Profile used in the executable. + * + * @param[in] executable_state Executable state. If the state is + * ::HSA_EXECUTABLE_STATE_FROZEN, the resulting executable is useless because no + * code objects can be loaded, and no variables can be defined. + * + * @param[in] options Standard and vendor-specific options. Unknown options are + * ignored. A standard option begins with the "-hsa_" prefix. Options beginning + * with the "-hsa_ext__" prefix are reserved for extensions. A + * vendor-specific option begins with the "-_" prefix. Must be a + * NUL-terminated string. May be NULL. + * + * @param[out] executable Memory location where the HSA runtime stores the newly + * created executable handle. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to + * allocate the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is invalid, or + * @p executable is NULL. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_executable_create( + hsa_profile_t profile, + hsa_executable_state_t executable_state, + const char *options, + hsa_executable_t *executable); + +/** + * @brief Create an empty executable. + * + * @param[in] profile Profile used in the executable. + * + * @param[in] default_float_rounding_mode Default floating-point rounding mode + * used in the executable. Allowed rounding modes are near and zero (default is + * not allowed). + * + * @param[in] options Standard and vendor-specific options. Unknown options are + * ignored. A standard option begins with the "-hsa_" prefix. Options beginning + * with the "-hsa_ext__" prefix are reserved for extensions. A + * vendor-specific option begins with the "-_" prefix. Must be a + * NUL-terminated string. May be NULL. + * + * @param[out] executable Memory location where the HSA runtime stores newly + * created executable handle. The initial state of the executable is + * ::HSA_EXECUTABLE_STATE_UNFROZEN. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to + * allocate the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is invalid, or + * @p executable is NULL. + */ +hsa_status_t HSA_API hsa_executable_create_alt( + hsa_profile_t profile, + hsa_default_float_rounding_mode_t default_float_rounding_mode, + const char *options, + hsa_executable_t *executable); + +/** + * @brief Destroy an executable. + * + * @details An executable handle becomes invalid after the executable has been + * destroyed. Code object handles that were loaded into this executable are + * still valid after the executable has been destroyed, and can be used as + * intended. Resources allocated outside and associated with this executable + * (such as external global or readonly variables) can be released after the + * executable has been destroyed. + * + * Executable should not be destroyed while kernels are in flight. + * + * @param[in] executable Executable. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. + */ +hsa_status_t HSA_API hsa_executable_destroy( + hsa_executable_t executable); + +/** + * @brief Loaded code object handle. + */ +typedef struct hsa_loaded_code_object_s { + /** + * Opaque handle. Two handles reference the same object of the enclosing type + * if and only if they are equal. + */ + uint64_t handle; +} hsa_loaded_code_object_t; + +/** + * @brief Load a program code object into an executable. + * + * @details A program code object contains information about resources that are + * accessible by all kernel agents that run the executable, and can be loaded + * at most once into an executable. + * + * If the program code object uses extensions, the implementation must support + * them for this operation to return successfully. + * + * @param[in] executable Executable. + * + * @param[in] code_object_reader A code object reader that holds the program + * code object to load. If a code object reader is destroyed before all the + * associated executables are destroyed, the behavior is undefined. + * + * @param[in] options Standard and vendor-specific options. Unknown options are + * ignored. A standard option begins with the "-hsa_" prefix. Options beginning + * with the "-hsa_ext__" prefix are reserved for extensions. A + * vendor-specific option begins with the "-_" prefix. Must be a + * NUL-terminated string. May be NULL. + * + * @param[out] loaded_code_object Pointer to a memory location where the HSA + * runtime stores the loaded code object handle. May be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to + * allocate the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. + * + * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE The executable is frozen. + * + * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER @p code_object_reader + * is invalid. + * + * @retval ::HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS The program code object is + * not compatible with the executable or the implementation (for example, the + * code object uses an extension that is not supported by the implementation). + */ +hsa_status_t HSA_API hsa_executable_load_program_code_object( + hsa_executable_t executable, + hsa_code_object_reader_t code_object_reader, + const char *options, + hsa_loaded_code_object_t *loaded_code_object); + +/** + * @brief Load an agent code object into an executable. + * + * @details The agent code object contains all defined agent + * allocation variables, functions, indirect functions, and kernels in a given + * program for a given instruction set architecture. + * + * Any module linkage declaration must have been defined either by a define + * variable or by loading a code object that has a symbol with module linkage + * definition. + * + * The default floating-point rounding mode of the code object associated with + * @p code_object_reader must match that of the executable + * (::HSA_EXECUTABLE_INFO_DEFAULT_FLOAT_ROUNDING_MODE), or be default (in which + * case the value of ::HSA_EXECUTABLE_INFO_DEFAULT_FLOAT_ROUNDING_MODE is used). + * If the agent code object uses extensions, the implementation and the agent + * must support them for this operation to return successfully. + * + * @param[in] executable Executable. + * + * @param[in] agent Agent to load code object for. A code object can be loaded + * into an executable at most once for a given agent. The instruction set + * architecture of the code object must be supported by the agent. + * + * @param[in] code_object_reader A code object reader that holds the code object + * to load. If a code object reader is destroyed before all the associated + * executables are destroyed, the behavior is undefined. + * + * @param[in] options Standard and vendor-specific options. Unknown options are + * ignored. A standard option begins with the "-hsa_" prefix. Options beginning + * with the "-hsa_ext__" prefix are reserved for extensions. A + * vendor-specific option begins with the "-_" prefix. Must be a + * NUL-terminated string. May be NULL. + * + * @param[out] loaded_code_object Pointer to a memory location where the HSA + * runtime stores the loaded code object handle. May be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to + * allocate the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. + * + * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE The executable is frozen. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER @p code_object_reader + * is invalid. + * + * @retval ::HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS The code object read by @p + * code_object_reader is not compatible with the agent (for example, the agent + * does not support the instruction set architecture of the code object), the + * executable (for example, there is a default floating-point mode mismatch + * between the two), or the implementation. + */ +hsa_status_t HSA_API hsa_executable_load_agent_code_object( + hsa_executable_t executable, + hsa_agent_t agent, + hsa_code_object_reader_t code_object_reader, + const char *options, + hsa_loaded_code_object_t *loaded_code_object); + +/** + * @brief Freeze the executable. + * + * @details No modifications to executable can be made after freezing: no code + * objects can be loaded to the executable, and no external variables can be + * defined. Freezing the executable does not prevent querying the executable's + * attributes. The application must define all the external variables in an + * executable before freezing it. + * + * @param[in] executable Executable. + * + * @param[in] options Standard and vendor-specific options. Unknown options are + * ignored. A standard option begins with the "-hsa_" prefix. Options beginning + * with the "-hsa_ext__" prefix are reserved for extensions. A + * vendor-specific option begins with the "-_" prefix. Must be a + * NUL-terminated string. May be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. + * + * @retval ::HSA_STATUS_ERROR_VARIABLE_UNDEFINED One or more variables are + * undefined in the executable. + * + * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is already frozen. + */ +hsa_status_t HSA_API hsa_executable_freeze( + hsa_executable_t executable, + const char *options); + +/** + * @brief Executable attributes. + */ +typedef enum { + /** + * Profile this executable is created for. The type of this attribute is + * ::hsa_profile_t. + */ + HSA_EXECUTABLE_INFO_PROFILE = 1, + /** + * Executable state. The type of this attribute is ::hsa_executable_state_t. + */ + HSA_EXECUTABLE_INFO_STATE = 2, + /** + * Default floating-point rounding mode specified when executable was created. + * The type of this attribute is ::hsa_default_float_rounding_mode_t. + */ + HSA_EXECUTABLE_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 3 +} hsa_executable_info_t; + +/** + * @brief Get the current value of an attribute for a given executable. + * + * @param[in] executable Executable. + * + * @param[in] attribute Attribute to query. + * + * @param[out] value Pointer to an application-allocated buffer where to store + * the value of the attribute. If the buffer passed by the application is not + * large enough to hold the value of @p attribute, the behavior is undefined. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid + * executable attribute, or @p value is NULL. + */ +hsa_status_t HSA_API hsa_executable_get_info( + hsa_executable_t executable, + hsa_executable_info_t attribute, + void *value); + +/** + * @brief Define an external global variable with program allocation. + * + * @details This function allows the application to provide the definition + * of a variable in the global segment memory with program allocation. The + * variable must be defined before loading a code object into an executable. + * In addition, code objects loaded must not define the variable. + * + * @param[in] executable Executable. Must not be in frozen state. + * + * @param[in] variable_name Name of the variable. The Programmer's Reference + * Manual describes the standard name mangling scheme. + * + * @param[in] address Address where the variable is defined. This address must + * be in global memory and can be read and written by any agent in the + * system. The application cannot deallocate the buffer pointed by @p address + * before @p executable is destroyed. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to + * allocate the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. + * + * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is + * already defined. + * + * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the + * @p variable_name. + * + * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL. + */ +hsa_status_t HSA_API hsa_executable_global_variable_define( + hsa_executable_t executable, + const char *variable_name, + void *address); + +/** + * @brief Define an external global variable with agent allocation. + * + * @details This function allows the application to provide the definition + * of a variable in the global segment memory with agent allocation. The + * variable must be defined before loading a code object into an executable. + * In addition, code objects loaded must not define the variable. + * + * @param[in] executable Executable. Must not be in frozen state. + * + * @param[in] agent Agent for which the variable is being defined. + * + * @param[in] variable_name Name of the variable. The Programmer's Reference + * Manual describes the standard name mangling scheme. + * + * @param[in] address Address where the variable is defined. This address must + * have been previously allocated using ::hsa_memory_allocate in a global region + * that is only visible to @p agent. The application cannot deallocate the + * buffer pointed by @p address before @p executable is destroyed. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to + * allocate the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT @p agent is invalid. + * + * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is + * already defined. + * + * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the + * @p variable_name. + * + * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL. + */ +hsa_status_t HSA_API hsa_executable_agent_global_variable_define( + hsa_executable_t executable, + hsa_agent_t agent, + const char *variable_name, + void *address); + +/** + * @brief Define an external readonly variable. + * + * @details This function allows the application to provide the definition + * of a variable in the readonly segment memory. The variable must be defined + * before loading a code object into an executable. In addition, code objects + * loaded must not define the variable. + * + * @param[in] executable Executable. Must not be in frozen state. + * + * @param[in] agent Agent for which the variable is being defined. + * + * @param[in] variable_name Name of the variable. The Programmer's Reference + * Manual describes the standard name mangling scheme. + * + * @param[in] address Address where the variable is defined. This address must + * have been previously allocated using ::hsa_memory_allocate in a readonly + * region associated with @p agent. The application cannot deallocate the buffer + * pointed by @p address before @p executable is destroyed. + * + * @param[in] address Address where the variable is defined. The buffer pointed + * by @p address is owned by the application, and cannot be deallocated before + * @p executable is destroyed. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to + * allocate the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE Executable is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT @p agent is invalid. + * + * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is + * already defined. + * + * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the + * @p variable_name. + * + * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL. + */ +hsa_status_t HSA_API hsa_executable_readonly_variable_define( + hsa_executable_t executable, + hsa_agent_t agent, + const char *variable_name, + void *address); + +/** + * @brief Validate an executable. Checks that all code objects have matching + * machine model, profile, and default floating-point rounding mode. Checks that + * all declarations have definitions. Checks declaration-definition + * compatibility (see the HSA Programming Reference Manual for compatibility + * rules). Invoking this function is equivalent to invoking + * ::hsa_executable_validate_alt with no options. + * + * @param[in] executable Executable. Must be in frozen state. + * + * @param[out] result Memory location where the HSA runtime stores the + * validation result. If the executable passes validation, the result is 0. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE @p executable is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL. + */ +hsa_status_t HSA_API hsa_executable_validate( + hsa_executable_t executable, + uint32_t *result); + +/** + * @brief Validate an executable. Checks that all code objects have matching + * machine model, profile, and default floating-point rounding mode. Checks that + * all declarations have definitions. Checks declaration-definition + * compatibility (see the HSA Programming Reference Manual for compatibility + * rules). + * + * @param[in] executable Executable. Must be in frozen state. + * + * @param[in] options Standard and vendor-specific options. Unknown options are + * ignored. A standard option begins with the "-hsa_" prefix. Options beginning + * with the "-hsa_ext__" prefix are reserved for extensions. A + * vendor-specific option begins with the "-_" prefix. Must be a + * NUL-terminated string. May be NULL. + * + * @param[out] result Memory location where the HSA runtime stores the + * validation result. If the executable passes validation, the result is 0. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE @p executable is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL. + */ +hsa_status_t HSA_API hsa_executable_validate_alt( + hsa_executable_t executable, + const char *options, + uint32_t *result); + +/** + * @brief Executable symbol handle. + * + * The lifetime of an executable object symbol matches that of the executable + * associated with it. An operation on a symbol whose associated executable has + * been destroyed results in undefined behavior. + */ +typedef struct hsa_executable_symbol_s { + /** + * Opaque handle. Two handles reference the same object of the enclosing type + * if and only if they are equal. + */ + uint64_t handle; +} hsa_executable_symbol_t; + +/** + * @deprecated Use ::hsa_executable_get_symbol_by_name instead. + * + * @brief Get the symbol handle for a given a symbol name. + * + * @param[in] executable Executable. + * + * @param[in] module_name Module name. Must be NULL if the symbol has + * program linkage. + * + * @param[in] symbol_name Symbol name. + * + * @param[in] agent Agent associated with the symbol. If the symbol is + * independent of any agent (for example, a variable with program + * allocation), this argument is ignored. + * + * @param[in] call_convention Call convention associated with the symbol. If the + * symbol does not correspond to an indirect function, this argument is ignored. + * + * @param[out] symbol Memory location where the HSA runtime stores the symbol + * handle. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name + * that matches @p symbol_name. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or + * @p symbol is NULL. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_executable_get_symbol( + hsa_executable_t executable, + const char *module_name, + const char *symbol_name, + hsa_agent_t agent, + int32_t call_convention, + hsa_executable_symbol_t *symbol); + +/** + * @brief Retrieve the symbol handle corresponding to a given a symbol name. + * + * @param[in] executable Executable. + * + * @param[in] symbol_name Symbol name. Must be a NUL-terminated character + * array. The Programmer's Reference Manual describes the standard name mangling + * scheme. + * + * @param[in] agent Pointer to the agent for which the symbol with the given + * name is defined. If the symbol corresponding to the given name has program + * allocation, @p agent must be NULL. + * + * @param[out] symbol Memory location where the HSA runtime stores the symbol + * handle. Must not be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name + * that matches @p symbol_name. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or @p + * symbol is NULL. + */ +hsa_status_t HSA_API hsa_executable_get_symbol_by_name( + hsa_executable_t executable, + const char *symbol_name, + const hsa_agent_t *agent, + hsa_executable_symbol_t *symbol); + +/** + * @brief Symbol type. + */ +typedef enum { + /** + * Variable. + */ + HSA_SYMBOL_KIND_VARIABLE = 0, + /** + * Kernel. + */ + HSA_SYMBOL_KIND_KERNEL = 1, + /** + * Indirect function. + */ + HSA_SYMBOL_KIND_INDIRECT_FUNCTION = 2 +} hsa_symbol_kind_t; + +/** + * @brief Linkage type of a symbol. + */ +typedef enum { + /** + * Module linkage. + */ + HSA_SYMBOL_LINKAGE_MODULE = 0, + /** + * Program linkage. + */ + HSA_SYMBOL_LINKAGE_PROGRAM = 1 +} hsa_symbol_linkage_t; + +/** + * @brief Allocation type of a variable. + */ +typedef enum { + /** + * Agent allocation. + */ + HSA_VARIABLE_ALLOCATION_AGENT = 0, + /** + * Program allocation. + */ + HSA_VARIABLE_ALLOCATION_PROGRAM = 1 +} hsa_variable_allocation_t; + +/** + * @brief Memory segment associated with a variable. + */ +typedef enum { + /** + * Global memory segment. + */ + HSA_VARIABLE_SEGMENT_GLOBAL = 0, + /** + * Readonly memory segment. + */ + HSA_VARIABLE_SEGMENT_READONLY = 1 +} hsa_variable_segment_t; + +/** + * @brief Executable symbol attributes. + */ +typedef enum { + /** + * The kind of the symbol. The type of this attribute is ::hsa_symbol_kind_t. + */ + HSA_EXECUTABLE_SYMBOL_INFO_TYPE = 0, + /** + * The length of the symbol name in bytes, not including the NUL terminator. + * The type of this attribute is uint32_t. + */ + HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH = 1, + /** + * The name of the symbol. The type of this attribute is character array with + * the length equal to the value of ::HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH + * attribute. + */ + HSA_EXECUTABLE_SYMBOL_INFO_NAME = 2, + /** + * @deprecated + * + * The length of the module name in bytes (not including the NUL terminator) + * to which this symbol belongs if this symbol has module linkage, otherwise 0 + * is returned. The type of this attribute is uint32_t. + */ + HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3, + /** + * @deprecated + * + * The module name to which this symbol belongs if this symbol has module + * linkage, otherwise an empty string is returned. The type of this attribute + * is character array with the length equal to the value of + * ::HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH attribute. + */ + HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME = 4, + /** + * @deprecated + * + * Agent associated with this symbol. If the symbol is a variable, the + * value of this attribute is only defined if + * ::HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION is + * ::HSA_VARIABLE_ALLOCATION_AGENT. The type of this attribute is hsa_agent_t. + */ + HSA_EXECUTABLE_SYMBOL_INFO_AGENT = 20, + /** + * The address of the variable. The value of this attribute is undefined if + * the symbol is not a variable. The type of this attribute is uint64_t. + * + * If executable's state is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0 is + * returned. + */ + HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS = 21, + /** + * The linkage kind of the symbol. The type of this attribute is + * ::hsa_symbol_linkage_t. + */ + HSA_EXECUTABLE_SYMBOL_INFO_LINKAGE = 5, + /** + * Indicates whether the symbol corresponds to a definition. The type of this + * attribute is bool. + */ + HSA_EXECUTABLE_SYMBOL_INFO_IS_DEFINITION = 17, + /** + * @deprecated + * + * The allocation kind of the variable. The value of this attribute is + * undefined if the symbol is not a variable. The type of this attribute is + * ::hsa_variable_allocation_t. + */ + HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6, + /** + * @deprecated + * + * The segment kind of the variable. The value of this attribute is undefined + * if the symbol is not a variable. The type of this attribute is + * ::hsa_variable_segment_t. + */ + HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SEGMENT = 7, + /** + * @deprecated + * + * Alignment of the symbol in memory. The value of this attribute is undefined + * if the symbol is not a variable. The type of this attribute is uint32_t. + * + * The current alignment of the variable in memory may be greater than the + * value specified in the source program variable declaration. + */ + HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8, + /** + * @deprecated + * + * Size of the variable. The value of this attribute is undefined if + * the symbol is not a variable. The type of this attribute is uint32_t. + * + * A value of 0 is returned if the variable is an external variable and has an + * unknown dimension. + */ + HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE = 9, + /** + * @deprecated + * + * Indicates whether the variable is constant. The value of this attribute is + * undefined if the symbol is not a variable. The type of this attribute is + * bool. + */ + HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_IS_CONST = 10, + /** + * Kernel object handle, used in the kernel dispatch packet. The value of this + * attribute is undefined if the symbol is not a kernel. The type of this + * attribute is uint64_t. + * + * If the state of the executable is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0 + * is returned. + */ + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT = 22, + /** + * Size of kernarg segment memory that is required to hold the values of the + * kernel arguments, in bytes. Must be a multiple of 16. The value of this + * attribute is undefined if the symbol is not a kernel. The type of this + * attribute is uint32_t. + */ + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11, + /** + * Alignment (in bytes) of the buffer used to pass arguments to the kernel, + * which is the maximum of 16 and the maximum alignment of any of the kernel + * arguments. The value of this attribute is undefined if the symbol is not a + * kernel. The type of this attribute is uint32_t. + */ + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12, + /** + * Size of static group segment memory required by the kernel (per + * work-group), in bytes. The value of this attribute is undefined + * if the symbol is not a kernel. The type of this attribute is uint32_t. + * + * The reported amount does not include any dynamically allocated group + * segment memory that may be requested by the application when a kernel is + * dispatched. + */ + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13, + /** + * Size of static private, spill, and arg segment memory required by + * this kernel (per work-item), in bytes. The value of this attribute is + * undefined if the symbol is not a kernel. The type of this attribute is + * uint32_t. + * + * If the value of ::HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK is + * true, the kernel may use more private memory than the reported value, and + * the application must add the dynamic call stack usage to @a + * private_segment_size when populating a kernel dispatch packet. + */ + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14, + /** + * Dynamic callstack flag. The value of this attribute is undefined if the + * symbol is not a kernel. The type of this attribute is bool. + * + * If this flag is set (the value is true), the kernel uses a dynamically + * sized call stack. This can happen if recursive calls, calls to indirect + * functions, or the HSAIL alloca instruction are present in the kernel. + */ + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15, + /** + * @deprecated + * + * Call convention of the kernel. The value of this attribute is undefined if + * the symbol is not a kernel. The type of this attribute is uint32_t. + */ + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_CALL_CONVENTION = 18, + /** + * Indirect function object handle. The value of this attribute is undefined + * if the symbol is not an indirect function, or the associated agent does + * not support the Full Profile. The type of this attribute depends on the + * machine model: the type is uint32_t for small machine model, and uint64_t + * for large model. + * + * If the state of the executable is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0 + * is returned. + */ + HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_OBJECT = 23, + /** + * @deprecated + * + * Call convention of the indirect function. The value of this attribute is + * undefined if the symbol is not an indirect function, or the associated + * agent does not support the Full Profile. The type of this attribute is + * uint32_t. + */ + HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16 +} hsa_executable_symbol_info_t; + +/** + * @brief Get the current value of an attribute for a given executable symbol. + * + * @param[in] executable_symbol Executable symbol. + * + * @param[in] attribute Attribute to query. + * + * @param[out] value Pointer to an application-allocated buffer where to store + * the value of the attribute. If the buffer passed by the application is not + * large enough to hold the value of @p attribute, the behavior is undefined. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE_SYMBOL The executable symbol is + * invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid + * executable symbol attribute, or @p value is NULL. + */ +hsa_status_t HSA_API hsa_executable_symbol_get_info( + hsa_executable_symbol_t executable_symbol, + hsa_executable_symbol_info_t attribute, + void *value); + +/** + * @deprecated + * + * @brief Iterate over the symbols in a executable, and invoke an + * application-defined callback on every iteration. + * + * @param[in] executable Executable. + * + * @param[in] callback Callback to be invoked once per executable symbol. The + * HSA runtime passes three arguments to the callback: the executable, a symbol, + * and the application data. If @p callback returns a status other than + * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and + * ::hsa_executable_iterate_symbols returns that status value. + * + * @param[in] data Application data that is passed to @p callback on every + * iteration. May be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_executable_iterate_symbols( + hsa_executable_t executable, + hsa_status_t (*callback)(hsa_executable_t exec, + hsa_executable_symbol_t symbol, + void *data), + void *data); + +/** + * @brief Iterate over the kernels, indirect functions, and agent allocation + * variables in an executable for a given agent, and invoke an application- + * defined callback on every iteration. + * + * @param[in] executable Executable. + * + * @param[in] agent Agent. + * + * @param[in] callback Callback to be invoked once per executable symbol. The + * HSA runtime passes three arguments to the callback: the executable, a symbol, + * and the application data. If @p callback returns a status other than + * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and + * ::hsa_executable_iterate_symbols returns that status value. + * + * @param[in] data Application data that is passed to @p callback on every + * iteration. May be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL. + */ +hsa_status_t HSA_API hsa_executable_iterate_agent_symbols( + hsa_executable_t executable, + hsa_agent_t agent, + hsa_status_t (*callback)(hsa_executable_t exec, + hsa_agent_t agent, + hsa_executable_symbol_t symbol, + void *data), + void *data); + +/** + * @brief Iterate over the program allocation variables in an executable, and + * invoke an application-defined callback on every iteration. + * + * @param[in] executable Executable. + * + * @param[in] callback Callback to be invoked once per executable symbol. The + * HSA runtime passes three arguments to the callback: the executable, a symbol, + * and the application data. If @p callback returns a status other than + * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and + * ::hsa_executable_iterate_symbols returns that status value. + * + * @param[in] data Application data that is passed to @p callback on every + * iteration. May be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL. + */ +hsa_status_t HSA_API hsa_executable_iterate_program_symbols( + hsa_executable_t executable, + hsa_status_t (*callback)(hsa_executable_t exec, + hsa_executable_symbol_t symbol, + void *data), + void *data); + +/** @} */ + + +/** \defgroup code-object Code Objects (deprecated). + * @{ + */ + +/** + * @deprecated + * + * @brief Struct containing an opaque handle to a code object, which contains + * ISA for finalized kernels and indirect functions together with information + * about the global or readonly segment variables they reference. + */ +typedef struct hsa_code_object_s { + /** + * Opaque handle. Two handles reference the same object of the enclosing type + * if and only if they are equal. + */ + uint64_t handle; +} hsa_code_object_t; + +/** + * @deprecated + * + * @brief Application data handle that is passed to the serialization + * and deserialization functions. + */ +typedef struct hsa_callback_data_s { + /** + * Opaque handle. + */ + uint64_t handle; +} hsa_callback_data_t; + +/** + * @deprecated + * + * @brief Serialize a code object. Can be used for offline finalization, + * install-time finalization, disk code caching, etc. + * + * @param[in] code_object Code object. + * + * @param[in] alloc_callback Callback function for memory allocation. Must not + * be NULL. The HSA runtime passes three arguments to the callback: the + * allocation size, the application data, and a pointer to a memory location + * where the application stores the allocation result. The HSA runtime invokes + * @p alloc_callback once to allocate a buffer that contains the serialized + * version of @p code_object. If the callback returns a status code other than + * ::HSA_STATUS_SUCCESS, this function returns the same code. + * + * @param[in] callback_data Application data that is passed to @p + * alloc_callback. May be NULL. + * + * @param[in] options Standard and vendor-specific options. Unknown options are + * ignored. A standard option begins with the "-hsa_" prefix. Options beginning + * with the "-hsa_ext__" prefix are reserved for extensions. A + * vendor-specific option begins with the "-_" prefix. Must be a + * NUL-terminated string. May be NULL. + * + * @param[out] serialized_code_object Memory location where the HSA runtime + * stores a pointer to the serialized code object. Must not be NULL. + * + * @param[out] serialized_code_object_size Memory location where the HSA runtime + * stores the size (in bytes) of @p serialized_code_object. The returned value + * matches the allocation size passed by the HSA runtime to @p + * alloc_callback. Must not be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to + * allocate the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p alloc_callback, @p + * serialized_code_object, or @p serialized_code_object_size are NULL. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_serialize( + hsa_code_object_t code_object, + hsa_status_t (*alloc_callback)(size_t size, + hsa_callback_data_t data, + void **address), + hsa_callback_data_t callback_data, + const char *options, + void **serialized_code_object, + size_t *serialized_code_object_size); + +/** + * @deprecated + * + * @brief Deserialize a code object. + * + * @param[in] serialized_code_object A serialized code object. Must not be NULL. + * + * @param[in] serialized_code_object_size The size (in bytes) of @p + * serialized_code_object. Must not be 0. + * + * @param[in] options Standard and vendor-specific options. Unknown options are + * ignored. A standard option begins with the "-hsa_" prefix. Options beginning + * with the "-hsa_ext__" prefix are reserved for extensions. A + * vendor-specific option begins with the "-_" prefix. Must be a + * NUL-terminated string. May be NULL. + * + * @param[out] code_object Memory location where the HSA runtime stores the + * deserialized code object. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to + * allocate the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p serialized_code_object, or @p + * code_object are NULL, or @p serialized_code_object_size is 0. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_deserialize( + void *serialized_code_object, + size_t serialized_code_object_size, + const char *options, + hsa_code_object_t *code_object); + +/** + * @deprecated + * + * @brief Destroy a code object. + * + * @details The lifetime of a code object must exceed that of any executable + * where it has been loaded. If an executable that loaded @p code_object has not + * been destroyed, the behavior is undefined. + * + * @param[in] code_object Code object. The handle becomes invalid after it has + * been destroyed. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_destroy( + hsa_code_object_t code_object); + +/** + * @deprecated + * + * @brief Code object type. + */ +typedef enum { + /** + * Produces code object that contains ISA for all kernels and indirect + * functions in HSA source. + */ + HSA_CODE_OBJECT_TYPE_PROGRAM = 0 +} hsa_code_object_type_t; + +/** + * @deprecated + * + * @brief Code object attributes. + */ +typedef enum { + /** + * The version of the code object. The type of this attribute is a + * NUL-terminated char[64]. The name must be at most 63 characters long (not + * including the NUL terminator) and all array elements not used for the name + * must be NUL. + */ + HSA_CODE_OBJECT_INFO_VERSION = 0, + /** + * Type of code object. The type of this attribute is + * ::hsa_code_object_type_t. + */ + HSA_CODE_OBJECT_INFO_TYPE = 1, + /** + * Instruction set architecture this code object is produced for. The type of + * this attribute is ::hsa_isa_t. + */ + HSA_CODE_OBJECT_INFO_ISA = 2, + /** + * Machine model this code object is produced for. The type of this attribute + * is ::hsa_machine_model_t. + */ + HSA_CODE_OBJECT_INFO_MACHINE_MODEL = 3, + /** + * Profile this code object is produced for. The type of this attribute is + * ::hsa_profile_t. + */ + HSA_CODE_OBJECT_INFO_PROFILE = 4, + /** + * Default floating-point rounding mode used when the code object is + * produced. The type of this attribute is + * ::hsa_default_float_rounding_mode_t. + */ + HSA_CODE_OBJECT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5 +} hsa_code_object_info_t; + +/** + * @deprecated + * + * @brief Get the current value of an attribute for a given code object. + * + * @param[in] code_object Code object. + * + * @param[in] attribute Attribute to query. + * + * @param[out] value Pointer to an application-allocated buffer where to store + * the value of the attribute. If the buffer passed by the application is not + * large enough to hold the value of @p attribute, the behavior is undefined. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid + * code object attribute, or @p value is NULL. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_get_info( + hsa_code_object_t code_object, + hsa_code_object_info_t attribute, + void *value); + +/** + * @deprecated + * + * @brief Load code object into the executable. + * + * @details Every global or readonly variable that is external must be defined + * before loading the code object. An internal global or readonly variable is + * allocated once the code object, that is being loaded, references this + * variable and this variable is not allocated. + * + * Any module linkage declaration must have been defined either by a define + * variable or by loading a code object that has a symbol with module linkage + * definition. + * + * @param[in] executable Executable. + * + * @param[in] agent Agent to load code object for. The agent must support the + * default floating-point rounding mode used by @p code_object. + * + * @param[in] code_object Code object to load. The lifetime of the code object + * must exceed that of the executable: if @p code_object is destroyed before @p + * executable, the behavior is undefined. + * + * @param[in] options Standard and vendor-specific options. Unknown options are + * ignored. A standard option begins with the "-hsa_" prefix. Options beginning + * with the "-hsa_ext__" prefix are reserved for extensions. A + * vendor-specific option begins with the "-_" prefix. Must be a + * NUL-terminated string. May be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to + * allocate the required resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid. + * + * @retval ::HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p agent is not compatible + * with @p code_object (for example, @p agent does not support the default + * floating-point rounding mode specified by @p code_object), or @p code_object + * is not compatible with @p executable (for example, @p code_object and @p + * executable have different machine models or profiles). + * + * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_executable_load_code_object( + hsa_executable_t executable, + hsa_agent_t agent, + hsa_code_object_t code_object, + const char *options); + +/** + * @deprecated + * + * @brief Code object symbol handle. + * + * The lifetime of a code object symbol matches that of the code object + * associated with it. An operation on a symbol whose associated code object has + * been destroyed results in undefined behavior. + */ +typedef struct hsa_code_symbol_s { + /** + * Opaque handle. Two handles reference the same object of the enclosing type + * if and only if they are equal. + */ + uint64_t handle; +} hsa_code_symbol_t; + +/** + * @deprecated + * + * @brief Get the symbol handle within a code object for a given a symbol name. + * + * @param[in] code_object Code object. + * + * @param[in] symbol_name Symbol name. + * + * @param[out] symbol Memory location where the HSA runtime stores the symbol + * handle. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name + * that matches @p symbol_name. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or + * @p symbol is NULL. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_get_symbol( + hsa_code_object_t code_object, + const char *symbol_name, + hsa_code_symbol_t *symbol); + +/** + * @deprecated + * + * @brief Get the symbol handle within a code object for a given a symbol name. + * + * @param[in] code_object Code object. + * + * @param[in] module_name Module name. Must be NULL if the symbol has + * program linkage. + * + * @param[in] symbol_name Symbol name. + * + * @param[out] symbol Memory location where the HSA runtime stores the symbol + * handle. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name + * that matches @p symbol_name. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or + * @p symbol is NULL. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_get_symbol_from_name( + hsa_code_object_t code_object, + const char *module_name, + const char *symbol_name, + hsa_code_symbol_t *symbol); + +/** + * @deprecated + * + * @brief Code object symbol attributes. + */ +typedef enum { + /** + * The type of the symbol. The type of this attribute is ::hsa_symbol_kind_t. + */ + HSA_CODE_SYMBOL_INFO_TYPE = 0, + /** + * The length of the symbol name in bytes, not including the NUL terminator. + * The type of this attribute is uint32_t. + */ + HSA_CODE_SYMBOL_INFO_NAME_LENGTH = 1, + /** + * The name of the symbol. The type of this attribute is character array with + * the length equal to the value of ::HSA_CODE_SYMBOL_INFO_NAME_LENGTH + * attribute. + */ + HSA_CODE_SYMBOL_INFO_NAME = 2, + /** + * The length of the module name in bytes (not including the NUL terminator) + * to which this symbol belongs if this symbol has module linkage, otherwise 0 + * is returned. The type of this attribute is uint32_t. + */ + HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3, + /** + * The module name to which this symbol belongs if this symbol has module + * linkage, otherwise an empty string is returned. The type of this attribute + * is character array with the length equal to the value of + * ::HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH attribute. + */ + HSA_CODE_SYMBOL_INFO_MODULE_NAME = 4, + /** + * The linkage kind of the symbol. The type of this attribute is + * ::hsa_symbol_linkage_t. + */ + HSA_CODE_SYMBOL_INFO_LINKAGE = 5, + /** + * Indicates whether the symbol corresponds to a definition. The type of this + * attribute is bool. + */ + HSA_CODE_SYMBOL_INFO_IS_DEFINITION = 17, + /** + * The allocation kind of the variable. The value of this attribute is + * undefined if the symbol is not a variable. The type of this attribute is + * ::hsa_variable_allocation_t. + */ + HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6, + /** + * The segment kind of the variable. The value of this attribute is + * undefined if the symbol is not a variable. The type of this attribute is + * ::hsa_variable_segment_t. + */ + HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT = 7, + /** + * Alignment of the symbol in memory. The value of this attribute is undefined + * if the symbol is not a variable. The type of this attribute is uint32_t. + * + * The current alignment of the variable in memory may be greater than the + * value specified in the source program variable declaration. + */ + HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8, + /** + * Size of the variable. The value of this attribute is undefined if the + * symbol is not a variable. The type of this attribute is uint32_t. + * + * A size of 0 is returned if the variable is an external variable and has an + * unknown dimension. + */ + HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE = 9, + /** + * Indicates whether the variable is constant. The value of this attribute is + * undefined if the symbol is not a variable. The type of this attribute is + * bool. + */ + HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST = 10, + /** + * Size of kernarg segment memory that is required to hold the values of the + * kernel arguments, in bytes. Must be a multiple of 16. The value of this + * attribute is undefined if the symbol is not a kernel. The type of this + * attribute is uint32_t. + */ + HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11, + /** + * Alignment (in bytes) of the buffer used to pass arguments to the kernel, + * which is the maximum of 16 and the maximum alignment of any of the kernel + * arguments. The value of this attribute is undefined if the symbol is not a + * kernel. The type of this attribute is uint32_t. + */ + HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12, + /** + * Size of static group segment memory required by the kernel (per + * work-group), in bytes. The value of this attribute is undefined + * if the symbol is not a kernel. The type of this attribute is uint32_t. + * + * The reported amount does not include any dynamically allocated group + * segment memory that may be requested by the application when a kernel is + * dispatched. + */ + HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13, + /** + * Size of static private, spill, and arg segment memory required by + * this kernel (per work-item), in bytes. The value of this attribute is + * undefined if the symbol is not a kernel. The type of this attribute is + * uint32_t. + * + * If the value of ::HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK is true, + * the kernel may use more private memory than the reported value, and the + * application must add the dynamic call stack usage to @a + * private_segment_size when populating a kernel dispatch packet. + */ + HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14, + /** + * Dynamic callstack flag. The value of this attribute is undefined if the + * symbol is not a kernel. The type of this attribute is bool. + * + * If this flag is set (the value is true), the kernel uses a dynamically + * sized call stack. This can happen if recursive calls, calls to indirect + * functions, or the HSAIL alloca instruction are present in the kernel. + */ + HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15, + /** + * Call convention of the kernel. The value of this attribute is undefined if + * the symbol is not a kernel. The type of this attribute is uint32_t. + */ + HSA_CODE_SYMBOL_INFO_KERNEL_CALL_CONVENTION = 18, + /** + * Call convention of the indirect function. The value of this attribute is + * undefined if the symbol is not an indirect function. The type of this + * attribute is uint32_t. + */ + HSA_CODE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16, + /** + * Wavefront size used by the kernel. The value of this attribute is either + * 32 or 64. The type of this attribute is uint32_t. + */ + HSA_CODE_SYMBOL_INFO_KERNEL_WAVEFRONT_SIZE = 19 +} hsa_code_symbol_info_t; + +/** + * @deprecated + * + * @brief Get the current value of an attribute for a given code symbol. + * + * @param[in] code_symbol Code symbol. + * + * @param[in] attribute Attribute to query. + * + * @param[out] value Pointer to an application-allocated buffer where to store + * the value of the attribute. If the buffer passed by the application is not + * large enough to hold the value of @p attribute, the behavior is undefined. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_CODE_SYMBOL The code symbol is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid + * code symbol attribute, or @p value is NULL. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_code_symbol_get_info( + hsa_code_symbol_t code_symbol, + hsa_code_symbol_info_t attribute, + void *value); + +/** + * @deprecated + * + * @brief Iterate over the symbols in a code object, and invoke an + * application-defined callback on every iteration. + * + * @param[in] code_object Code object. + * + * @param[in] callback Callback to be invoked once per code object symbol. The + * HSA runtime passes three arguments to the callback: the code object, a + * symbol, and the application data. If @p callback returns a status other than + * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and + * ::hsa_code_object_iterate_symbols returns that status value. + * + * @param[in] data Application data that is passed to @p callback on every + * iteration. May be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL. + */ +hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_iterate_symbols( + hsa_code_object_t code_object, + hsa_status_t (*callback)(hsa_code_object_t code_object, + hsa_code_symbol_t symbol, + void *data), + void *data); + +/** @} */ + +#ifdef __cplusplus +} // end extern "C" block +#endif + +#endif // header guard diff --git a/CMakeLists.txt b/CMakeLists.txt index 1137d2af26..b9cecd9ce7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -321,6 +321,14 @@ option(TILELANG_USE_HIP_STUBS "Use POSIX dlopen-based HIP stub libraries (hip/hiprtc) for portable wheels" ${_TILELANG_USE_HIP_STUBS_DEFAULT}) unset(_TILELANG_USE_HIP_STUBS_DEFAULT) + +# Optional path to HIP headers, used together with TILELANG_USE_HIP_STUBS=ON +# when the build host has no ROCm runtime installed (e.g. an NV-only machine +# producing a cross-target wheel). The directory must contain a `hip/` subtree. +# When unset, the resolution order is: $ENV{TILELANG_HIP_INCLUDE_DIR} -> +# /opt/rocm/include -> 3rdparty/hip-headers/include (vendored default). +set(TILELANG_HIP_INCLUDE_DIR "" CACHE PATH + "Path to HIP headers when building with USE_ROCM=ON on a host without a ROCm runtime") # Configs end include(cmake/load_tvm.cmake) @@ -386,19 +394,39 @@ endforeach() # Only auto-select a backend when the user didn't specify one explicitly. if(NOT TILELANG_BACKEND_USER_SELECTED) - if($ENV{USE_METAL}) - set(USE_METAL ON) - elseif(APPLE) - message(STATUS "Enable Metal support by default.") - set(USE_METAL ON) - elseif($ENV{USE_ROCM}) - set(USE_ROCM ON) - else() + set(_tilelang_backend_env_selected OFF) + + if(DEFINED ENV{USE_CUDA}) + set(_tilelang_backend_env_selected ON) if($ENV{USE_CUDA}) set(USE_CUDA ON) - elseif(DEFINED ENV{USE_CUDA} AND NOT $ENV{USE_CUDA}) - # Build CPU-only when we explicitly disable CUDA + else() set(USE_CUDA OFF) + endif() + endif() + + if(DEFINED ENV{USE_ROCM}) + set(_tilelang_backend_env_selected ON) + if($ENV{USE_ROCM}) + set(USE_ROCM ON) + else() + set(USE_ROCM OFF) + endif() + endif() + + if(DEFINED ENV{USE_METAL}) + set(_tilelang_backend_env_selected ON) + if($ENV{USE_METAL}) + set(USE_METAL ON) + else() + set(USE_METAL OFF) + endif() + endif() + + if(NOT _tilelang_backend_env_selected) + if(APPLE) + message(STATUS "Enable Metal support by default.") + set(USE_METAL ON) elseif(TILELANG_CUDA_TOOLKIT_AVAILABLE) message(STATUS "Enable CUDA support by default.") set(USE_CUDA ON) @@ -407,6 +435,7 @@ if(NOT TILELANG_BACKEND_USER_SELECTED) set(USE_CUDA OFF) endif() endif() + unset(_tilelang_backend_env_selected) endif() # Backend-local CMake files own native source lists, stubs, include paths, and diff --git a/pyproject.toml b/pyproject.toml index 10ea1823f9..1daaae99e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -139,6 +139,9 @@ include = [ # Composable Kernel "3rdparty/composable_kernel/include", "3rdparty/composable_kernel/library", + # Vendored HIP headers (build-time only) so source builds work on hosts + # without a ROCm install. Not mapped into the wheel at runtime. + "3rdparty/hip-headers/include", "testing/**", "examples/**", ] @@ -278,6 +281,13 @@ environment.PYTHONDEVMODE = "1" environment.PYTHONUNBUFFERED = "1" environment.PATH = "/usr/local/cuda/bin:$PATH" environment.LD_LIBRARY_PATH = "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" +# Build a fat wheel that supports both CUDA and ROCm at runtime. ROCm sources +# compile against the vendored HIP headers under 3rdparty/hip-headers/include +# and link to libhip_stub (dlopen-based), so no ROCm runtime is required in +# the cibuildwheel manylinux container. The resulting wheel still requires +# a real ROCm runtime to be present at execution time on AMD hosts. +environment.USE_CUDA = "ON" +environment.USE_ROCM = "ON" manylinux-x86_64-image = "manylinux_2_28" # AlmaLinux 8 manylinux-aarch64-image = "manylinux_2_34" # Z3 requires # Install CUDA runtime and stub driver library diff --git a/src/backend/rocm/CMakeLists.txt b/src/backend/rocm/CMakeLists.txt index 24b2a83245..56d7b1992e 100644 --- a/src/backend/rocm/CMakeLists.txt +++ b/src/backend/rocm/CMakeLists.txt @@ -10,6 +10,57 @@ include(${TVM_SOURCE}/cmake/utils/FindROCM.cmake) find_rocm(${USE_ROCM}) add_compile_definitions(__HIP_PLATFORM_AMD__ __HIP_PLATFORM_HCC__=1) +# Fallback for build hosts without a ROCm runtime install (e.g. an NV-only +# machine producing a cross-target wheel). TVM's find_rocm() only succeeds +# when libamdhip64 is present, but with TILELANG_USE_HIP_STUBS=ON we don't +# need the runtime library at build time -- only the public HIP headers. +# +# Resolution order (first match wins): +# 1. -DTILELANG_HIP_INCLUDE_DIR= (explicit override) +# 2. $ENV{TILELANG_HIP_INCLUDE_DIR} +# 3. /opt/rocm/include (if a system ROCm install exists) +# 4. 3rdparty/hip-headers/include (vendored, default fallback) +# +# A minimal hsa/hsa.h is vendored alongside the HIP headers because TVM's +# src/runtime/rocm/rocm_device_api.cc unconditionally #includes . +# Only two HSA symbols (hsa_init / hsa_shut_down) are actually referenced; +# both are exported by hip_stub and lazy-loaded from libhsa-runtime64 at run +# time, so no HSA library is linked. +if(NOT ROCM_FOUND AND TILELANG_USE_HIP_STUBS) + set(_TL_VENDORED_HIP_INC "${CMAKE_SOURCE_DIR}/3rdparty/hip-headers/include") + set(_TL_HIP_INC "") + if(TILELANG_HIP_INCLUDE_DIR) + set(_TL_HIP_INC "${TILELANG_HIP_INCLUDE_DIR}") + elseif(DEFINED ENV{TILELANG_HIP_INCLUDE_DIR}) + set(_TL_HIP_INC "$ENV{TILELANG_HIP_INCLUDE_DIR}") + elseif(IS_DIRECTORY "/opt/rocm/include/hip") + set(_TL_HIP_INC "/opt/rocm/include") + elseif(IS_DIRECTORY "${_TL_VENDORED_HIP_INC}/hip") + set(_TL_HIP_INC "${_TL_VENDORED_HIP_INC}") + endif() + + if(_TL_HIP_INC AND IS_DIRECTORY "${_TL_HIP_INC}/hip") + message(STATUS + "ROCm runtime library not found on host; using HIP headers from " + "${_TL_HIP_INC} with hip_stub for linking. The resulting wheel still " + "requires a real ROCm runtime to be present at execution time.") + set(ROCM_FOUND TRUE) + set(ROCM_INCLUDE_DIRS "${_TL_HIP_INC}") + set(ROCM_HIPHCC_LIBRARY hip_stub CACHE STRING + "HIP runtime library to link against" FORCE) + set(ROCM_HSA_LIBRARY ROCM_HSA_LIBRARY-NOTFOUND CACHE STRING + "HSA runtime library to link against" FORCE) + else() + message(STATUS + "ROCm runtime library not found on host and no HIP headers could be " + "located. Set -DTILELANG_HIP_INCLUDE_DIR= to a directory " + "containing a `hip/` subtree, or restore the vendored headers under " + "3rdparty/hip-headers/include/hip.") + endif() + unset(_TL_HIP_INC) + unset(_TL_VENDORED_HIP_INC) +endif() + if(TILELANG_USE_HIP_STUBS) if(WIN32 AND NOT CYGWIN) message(FATAL_ERROR "TILELANG_USE_HIP_STUBS=ON is not supported on Windows. " diff --git a/src/backend/rocm/codegen/rt_mod_hip.cc b/src/backend/rocm/codegen/rt_mod_hip.cc index e4b45b5ddb..038ec281e4 100644 --- a/src/backend/rocm/codegen/rt_mod_hip.cc +++ b/src/backend/rocm/codegen/rt_mod_hip.cc @@ -4,7 +4,6 @@ #endif #include -#include #include "codegen_hip.h" #include "runtime/rocm/rocm_module.h" diff --git a/tilelang/autotuner/grouped_compile.py b/tilelang/autotuner/grouped_compile.py index 73e02521b3..bb9cbd6f1d 100644 --- a/tilelang/autotuner/grouped_compile.py +++ b/tilelang/autotuner/grouped_compile.py @@ -6,7 +6,7 @@ from __future__ import annotations -from typing import Any, Callable +from typing import Any, Callable, Optional from tilelang import tvm from tvm.tir import PrimFunc @@ -18,7 +18,7 @@ from tilelang.jit.kernel import JITKernel from tilelang.transform import PassConfigKey -CompileUnitResult = tuple[int, dict[str, Any], JITKernel | None, Exception | None] +CompileUnitResult = tuple[int, dict[str, Any], Optional[JITKernel], Optional[Exception]] def compile_grouped_unit_tvm_ffi( diff --git a/version_provider.py b/version_provider.py index 9b7f258c3e..b4040d62c4 100644 --- a/version_provider.py +++ b/version_provider.py @@ -56,7 +56,8 @@ def dynamic_metadata(field: str, settings: dict[str, object] | None = None) -> s # only on macosx_11_0_arm64, not necessary # backend = 'metal' pass - elif _read_cmake_bool(os.environ.get("USE_ROCM", "")): + elif _read_cmake_bool(os.environ.get("USE_ROCM", "")) and not _read_cmake_bool(os.environ.get("USE_CUDA", "")): + # ROCm-only build. When USE_CUDA is also on (fat wheel), fall through and label as the CUDA backend so the wheel keeps using cuda naming backend = "rocm" elif "USE_CUDA" in os.environ and not _read_cmake_bool(os.environ.get("USE_CUDA")): backend = "cpu"