-
Notifications
You must be signed in to change notification settings - Fork 431
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
UCT/CUDA: Runtime CUDA >= 12.3 to enable VMM #10396
base: master
Are you sure you want to change the base?
Changes from 2 commits
68a5f51
9fc4430
e8c9f99
3b43d29
2161adf
6563253
ff4313c
2f5e5a5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -81,6 +81,10 @@ static ucs_config_field_t uct_cuda_copy_md_config_table[] = { | |
{NULL} | ||
}; | ||
|
||
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; | ||
|
||
static CUresult (*ctx_set_flags_func)(unsigned); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. uct_cuda_cuCtxSetFlags_func There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
|
||
static int uct_cuda_copy_md_is_dmabuf_supported() | ||
{ | ||
int dmabuf_supported = 0; | ||
|
@@ -479,7 +483,6 @@ static void uct_cuda_copy_md_close(uct_md_h uct_md) { | |
|
||
static size_t uct_cuda_copy_md_get_total_device_mem(CUdevice cuda_device) | ||
{ | ||
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; | ||
static size_t total_bytes[UCT_CUDA_MAX_DEVICES]; | ||
char dev_name[UCT_CUDA_DEV_NAME_MAX_LEN]; | ||
|
||
|
@@ -515,22 +518,28 @@ static size_t uct_cuda_copy_md_get_total_device_mem(CUdevice cuda_device) | |
static void | ||
uct_cuda_copy_sync_memops(uct_cuda_copy_md_t *md, const void *address) | ||
{ | ||
unsigned value = 1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's rename to sync_memops_value? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
|
||
#if HAVE_CUDA_FABRIC | ||
ucs_status_t status; | ||
if (!md->sync_memops_set) { | ||
/* Synchronize future DMA operations for all memory types */ | ||
status = UCT_CUDADRV_FUNC_LOG_WARN(cuCtxSetFlags(CU_CTX_SYNC_MEMOPS)); | ||
if (status == UCS_OK) { | ||
md->sync_memops_set = 1; | ||
if (ctx_set_flags_func != NULL) { | ||
if (!md->sync_memops_set) { | ||
/* Synchronize future DMA operations for all memory types */ | ||
status = UCT_CUDADRV_FUNC_LOG_ERR( | ||
rakhmets marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ctx_set_flags_func(CU_CTX_SYNC_MEMOPS)); | ||
if (status == UCS_OK) { | ||
md->sync_memops_set = 1; | ||
} | ||
} | ||
|
||
return; | ||
} | ||
#else | ||
unsigned value = 1; | ||
#endif | ||
|
||
/* Synchronize for DMA for legacy memory types*/ | ||
UCT_CUDADRV_FUNC_LOG_WARN( | ||
cuPointerSetAttribute(&value, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, | ||
(CUdeviceptr)address)); | ||
#endif | ||
} | ||
|
||
static ucs_status_t | ||
|
@@ -823,6 +832,35 @@ static uct_md_ops_t md_ops = { | |
.detect_memory_type = uct_cuda_copy_md_detect_memory_type | ||
}; | ||
|
||
static ucs_status_t uct_cuda_copy_md_check_is_ctx_set_flags_supported(void) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To simplify the code, we could have this function call the needed function pointer, and move the global var inside it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought about it but went for two step approach as we need:
|
||
{ | ||
static ucs_status_t status = UCS_ERR_INVALID_ADDR; | ||
rakhmets marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
#if CUDA_VERSION >= 12000 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
CUdriverProcAddressQueryResult sym_status; | ||
CUresult cu_err; | ||
|
||
if (status == UCS_ERR_INVALID_ADDR) { | ||
pthread_mutex_lock(&lock); | ||
rakhmets marked this conversation as resolved.
Show resolved
Hide resolved
|
||
cu_err = cuGetProcAddress("cuCtxSetFlags", (void**)&ctx_set_flags_func, | ||
12010, CU_GET_PROC_ADDRESS_DEFAULT, | ||
&sym_status); | ||
|
||
if ((cu_err == CUDA_SUCCESS) && | ||
(sym_status == CU_GET_PROC_ADDRESS_SUCCESS)) { | ||
status = UCS_OK; | ||
} else { | ||
ctx_set_flags_func = NULL; | ||
status = UCS_ERR_UNSUPPORTED; | ||
} | ||
|
||
pthread_mutex_unlock(&lock); | ||
} | ||
#endif | ||
|
||
return status; | ||
} | ||
|
||
static ucs_status_t | ||
uct_cuda_copy_md_open(uct_component_t *component, const char *md_name, | ||
const uct_md_config_t *md_config, uct_md_h *md_p) | ||
|
@@ -850,6 +888,13 @@ uct_cuda_copy_md_open(uct_component_t *component, const char *md_name, | |
md->sync_memops_set = 0; | ||
md->granularity = SIZE_MAX; | ||
|
||
status = uct_cuda_copy_md_check_is_ctx_set_flags_supported(); | ||
if ((status != UCS_OK) && (md->config.enable_fabric != UCS_NO)) { | ||
ucs_warn("disabled fabric memory allocations as cuda driver " | ||
"library does not support cuCtxSetFlags()"); | ||
md->config.enable_fabric = UCS_NO; | ||
} | ||
rakhmets marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if ((config->cuda_async_mem_type != UCS_MEMORY_TYPE_CUDA) && | ||
(config->cuda_async_mem_type != UCS_MEMORY_TYPE_CUDA_MANAGED)) { | ||
ucs_warn("wrong memory type for async memory allocations: \"%s\";" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i don't think we need a lock, and if we did, i would use a better name like uct_cuda_...._lock ..
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
removed