Skip to content

Commit 6aff055

Browse files
committed
feat: add new functions of CUDA 11.5.1
Signed-off-by: thomassong <[email protected]>
1 parent 1507803 commit 6aff055

File tree

8 files changed

+815
-309
lines changed

8 files changed

+815
-309
lines changed

find_new_lib.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/usr/bin/env bash
2+
3+
set -o errexit
4+
set -o nounset
5+
set -o pipefail
6+
7+
CUDA_LIBRARY=$1
8+
ML_LIBRARY=$2
9+
10+
echo "find new library"
11+
12+
while read item; do
13+
grep -q ${item} include/cuda-helper.h || echo "$item,"
14+
done < <(nm -D ${CUDA_LIBRARY} | grep " T " | awk '{print "CUDA_ENTRY_ENUM("$3")"}')
15+
16+
echo ""
17+
18+
while read item; do
19+
grep -q ${item} include/nvml-helper.h || echo "$item,"
20+
done < <(nm -D ${ML_LIBRARY} | grep " T " | awk '{print "NVML_ENTRY_ENUM("$3")"}')

include/cuda-helper.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,6 +1078,30 @@ typedef enum {
10781078
CUDA_ENTRY_ENUM(cuMemPoolTrimTo),
10791079
/** cuMipmappedArrayGetSparseProperties */
10801080
CUDA_ENTRY_ENUM(cuMipmappedArrayGetSparseProperties),
1081+
CUDA_ENTRY_ENUM(cuCtxCreate_v3),
1082+
CUDA_ENTRY_ENUM(cuCtxGetExecAffinity),
1083+
CUDA_ENTRY_ENUM(cuDeviceGetExecAffinitySupport),
1084+
CUDA_ENTRY_ENUM(cuDeviceGetGraphMemAttribute),
1085+
CUDA_ENTRY_ENUM(cuDeviceGetUuid_v2),
1086+
CUDA_ENTRY_ENUM(cuDeviceGraphMemTrim),
1087+
CUDA_ENTRY_ENUM(cuDeviceSetGraphMemAttribute),
1088+
CUDA_ENTRY_ENUM(cuFlushGPUDirectRDMAWrites),
1089+
CUDA_ENTRY_ENUM(cuGetProcAddress),
1090+
CUDA_ENTRY_ENUM(cuGraphAddMemAllocNode),
1091+
CUDA_ENTRY_ENUM(cuGraphAddMemFreeNode),
1092+
CUDA_ENTRY_ENUM(cuGraphDebugDotPrint),
1093+
CUDA_ENTRY_ENUM(cuGraphInstantiateWithFlags),
1094+
CUDA_ENTRY_ENUM(cuGraphMemAllocNodeGetParams),
1095+
CUDA_ENTRY_ENUM(cuGraphMemFreeNodeGetParams),
1096+
CUDA_ENTRY_ENUM(cuGraphReleaseUserObject),
1097+
CUDA_ENTRY_ENUM(cuGraphRetainUserObject),
1098+
CUDA_ENTRY_ENUM(cuStreamGetCaptureInfo_v2),
1099+
CUDA_ENTRY_ENUM(cuStreamGetCaptureInfo_v2_ptsz),
1100+
CUDA_ENTRY_ENUM(cuStreamUpdateCaptureDependencies),
1101+
CUDA_ENTRY_ENUM(cuStreamUpdateCaptureDependencies_ptsz),
1102+
CUDA_ENTRY_ENUM(cuUserObjectCreate),
1103+
CUDA_ENTRY_ENUM(cuUserObjectRelease),
1104+
CUDA_ENTRY_ENUM(cuUserObjectRetain),
10811105
CUDA_ENTRY_END
10821106
} cuda_entry_enum_t;
10831107

include/cuda-subset.h

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2262,6 +2262,105 @@ typedef enum CUmemPool_attribute_enum {
22622262
CU_MEMPOOL_ATTR_RELEASE_THRESHOLD
22632263
} CUmemPool_attribute;
22642264

2265+
/**
2266+
* Execution Affinity Types
2267+
*/
2268+
typedef enum CUexecAffinityType_enum {
2269+
CU_EXEC_AFFINITY_TYPE_SM_COUNT = 0, /**< Create a context with limited SMs. */
2270+
CU_EXEC_AFFINITY_TYPE_MAX
2271+
} CUexecAffinityType;
2272+
2273+
/**
2274+
* Value for ::CU_EXEC_AFFINITY_TYPE_SM_COUNT
2275+
*/
2276+
typedef struct CUexecAffinitySmCount_st {
2277+
unsigned int val; /**< The number of SMs the context is limited to use. */
2278+
} CUexecAffinitySmCount_v1;
2279+
typedef CUexecAffinitySmCount_v1 CUexecAffinitySmCount;
2280+
2281+
/**
2282+
* Execution Affinity Parameters
2283+
*/
2284+
typedef struct CUexecAffinityParam_st {
2285+
CUexecAffinityType type;
2286+
union {
2287+
CUexecAffinitySmCount
2288+
smCount; /** Value for ::CU_EXEC_AFFINITY_TYPE_SM_COUNT */
2289+
} param;
2290+
} CUexecAffinityParam_v1;
2291+
typedef CUexecAffinityParam_v1 CUexecAffinityParam;
2292+
2293+
typedef enum CUgraphMem_attribute_enum {
2294+
/**
2295+
* (value type = cuuint64_t)
2296+
* Amount of memory, in bytes, currently associated with graphs
2297+
*/
2298+
CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT,
2299+
2300+
/**
2301+
* (value type = cuuint64_t)
2302+
* High watermark of memory, in bytes, associated with graphs since the
2303+
* last time it was reset. High watermark can only be reset to zero.
2304+
*/
2305+
CU_GRAPH_MEM_ATTR_USED_MEM_HIGH,
2306+
2307+
/**
2308+
* (value type = cuuint64_t)
2309+
* Amount of memory, in bytes, currently allocated for use by
2310+
* the CUDA graphs asynchronous allocator.
2311+
*/
2312+
CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT,
2313+
2314+
/**
2315+
* (value type = cuuint64_t)
2316+
* High watermark of memory, in bytes, currently allocated for use by
2317+
* the CUDA graphs asynchronous allocator.
2318+
*/
2319+
CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH
2320+
} CUgraphMem_attribute;
2321+
2322+
/**
2323+
* Memory allocation node parameters
2324+
*/
2325+
typedef struct CUDA_MEM_ALLOC_NODE_PARAMS_st {
2326+
/**
2327+
* in: location where the allocation should reside (specified in ::location).
2328+
* ::handleTypes must be ::CU_MEM_HANDLE_TYPE_NONE. IPC is not supported.
2329+
*/
2330+
CUmemPoolProps poolProps;
2331+
const CUmemAccessDesc
2332+
*accessDescs; /**< in: array of memory access descriptors. Used to
2333+
describe peer GPU access */
2334+
size_t accessDescCount; /**< in: number of memory access descriptors. Must
2335+
not exceed the number of GPUs. */
2336+
size_t bytesize; /**< in: size in bytes of the requested allocation */
2337+
CUdeviceptr dptr; /**< out: address of the allocation returned by CUDA */
2338+
} CUDA_MEM_ALLOC_NODE_PARAMS;
2339+
2340+
typedef struct CUuserObject_st
2341+
*CUuserObject; /**< CUDA user object for graphs */
2342+
2343+
/**
2344+
* The targets for ::cuFlushGPUDirectRDMAWrites
2345+
*/
2346+
typedef enum CUflushGPUDirectRDMAWritesTarget_enum {
2347+
CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX =
2348+
0 /**< Sets the target for ::cuFlushGPUDirectRDMAWrites() to the currently
2349+
active CUDA device context. */
2350+
} CUflushGPUDirectRDMAWritesTarget;
2351+
2352+
/**
2353+
* The scopes for ::cuFlushGPUDirectRDMAWrites
2354+
*/
2355+
typedef enum CUflushGPUDirectRDMAWritesScope_enum {
2356+
CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER =
2357+
100, /**< Blocks until remote writes are visible to the CUDA device
2358+
context owning the data. */
2359+
CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES =
2360+
200 /**< Blocks until remote writes are visible to all CUDA device
2361+
contexts. */
2362+
} CUflushGPUDirectRDMAWritesScope;
2363+
22652364
#ifdef __cplusplus
22662365
}
22672366
#endif

include/nvml-helper.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,19 @@ typedef enum {
540540
NVML_ENTRY_ENUM(nvmlVgpuInstanceGetGpuInstanceId),
541541
/** nvmlVgpuTypeGetGpuInstanceProfileId */
542542
NVML_ENTRY_ENUM(nvmlVgpuTypeGetGpuInstanceProfileId),
543+
NVML_ENTRY_ENUM(nvmlDeviceCreateGpuInstanceWithPlacement),
544+
NVML_ENTRY_ENUM(nvmlDeviceGetBusType),
545+
NVML_ENTRY_ENUM(nvmlDeviceGetClkMonStatus),
546+
NVML_ENTRY_ENUM(nvmlDeviceGetGpuInstancePossiblePlacements_v2),
547+
NVML_ENTRY_ENUM(nvmlDeviceGetGridLicensableFeatures_v4),
548+
NVML_ENTRY_ENUM(nvmlDeviceGetIrqNum),
549+
NVML_ENTRY_ENUM(nvmlDeviceGetMPSComputeRunningProcesses_v2),
550+
NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkRemoteDeviceType),
551+
NVML_ENTRY_ENUM(nvmlDeviceResetMemoryLockedClocks),
552+
NVML_ENTRY_ENUM(nvmlDeviceSetMemoryLockedClocks),
553+
NVML_ENTRY_ENUM(nvmlGetExcludedDeviceCount),
554+
NVML_ENTRY_ENUM(nvmlGetExcludedDeviceInfoByIndex),
555+
NVML_ENTRY_ENUM(nvmlVgpuInstanceGetLicenseInfo),
543556
NVML_ENTRY_END
544557
} nvml_entry_enum_t;
545558

0 commit comments

Comments
 (0)