Skip to content

Commit 58f134c

Browse files
authored
Merge pull request #739 from bratpiorka/rrudnick_ipc_win
enable GPU IPC tests on Windows
2 parents 44aa65a + 8d79f8f commit 58f134c

File tree

10 files changed

+640
-104
lines changed

10 files changed

+640
-104
lines changed

docs/config/ctl.rst

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,25 @@ The CUDA provider currently exposes only the common statistics nodes.
314314
Level Zero memory provider (``LEVEL_ZERO``)
315315
-----------------------------------------------
316316

317-
The Level Zero provider implements the same statistics nodes as the other providers.
317+
The Level Zero provider supports the common statistics nodes described above and
318+
adds the following parameter entry.
319+
320+
.. py:function:: .params.use_import_export_for_IPC(policy)
321+
322+
:param policy: Receives or supplies ``0`` to use IPC API for memory sharing
323+
and ``1`` to use import/export mechanism for memory sharing.
324+
:type policy: ``int``
325+
326+
**Access:** read-write.
327+
**Defaults / Env:** Supported.
328+
329+
Controls the memory exchange policy for inter-process communication
330+
operations. When set to ``0`` (default), the provider uses the IPC API
331+
for memory sharing between processes. When set to ``1``, the provider uses
332+
the import/export mechanism for memory sharing. This option is supported
333+
only on Windows with the Level Zero provider, where the default IPC mechanism
334+
does not work. Note that enabling import/export adds overhead during
335+
allocation and deallocation for all allocations on the current provider.
318336

319337
Pool nodes
320338
==========

src/provider/provider_level_zero.c

Lines changed: 162 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -44,20 +44,26 @@ void fini_ze_global_state(void) {
4444

4545
// Level Zero Memory Provider settings struct
4646
typedef struct umf_level_zero_memory_provider_params_t {
47-
ze_context_handle_t
48-
level_zero_context_handle; ///< Handle to the Level Zero context
49-
ze_device_handle_t
50-
level_zero_device_handle; ///< Handle to the Level Zero device
47+
// Handle to the Level Zero context
48+
ze_context_handle_t level_zero_context_handle;
5149

52-
umf_usm_memory_type_t memory_type; ///< Allocation memory type
50+
// Handle to the Level Zero device
51+
ze_device_handle_t level_zero_device_handle;
5352

54-
ze_device_handle_t *
55-
resident_device_handles; ///< Array of devices for which the memory should be made resident
56-
uint32_t
57-
resident_device_count; ///< Number of devices for which the memory should be made resident
53+
// Allocation memory type
54+
umf_usm_memory_type_t memory_type;
5855

59-
umf_level_zero_memory_provider_free_policy_t
60-
freePolicy; ///< Memory free policy
56+
// Array of devices for which the memory should be made resident
57+
ze_device_handle_t *resident_device_handles;
58+
59+
// Number of devices for which the memory should be made resident
60+
uint32_t resident_device_count;
61+
62+
// Memory free policy
63+
umf_level_zero_memory_provider_free_policy_t freePolicy;
64+
65+
// Memory exchange policy 0 = IPC (default), 1 = import/export
66+
int use_import_export_for_IPC;
6167

6268
uint32_t device_ordinal;
6369
char name[64];
@@ -77,6 +83,9 @@ typedef struct ze_memory_provider_t {
7783

7884
ze_driver_memory_free_policy_ext_flags_t freePolicyFlags;
7985

86+
// Memory exchange policy 0 = IPC (default), 1 = import/export
87+
int use_import_export_for_IPC;
88+
8089
size_t min_page_size;
8190

8291
uint32_t device_ordinal;
@@ -134,7 +143,56 @@ static void store_last_native_error(int32_t native_error) {
134143
struct ctl ze_memory_ctl_root;
135144
static UTIL_ONCE_FLAG ctl_initialized = UTIL_ONCE_FLAG_INIT;
136145

146+
static ze_relaxed_allocation_limits_exp_desc_t relaxed_device_allocation_desc =
147+
{.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC,
148+
.pNext = NULL,
149+
.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE};
150+
151+
static ze_external_memory_export_desc_t memory_export_desc = {
152+
.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC,
153+
.pNext = NULL,
154+
.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32};
155+
156+
static umf_result_t CTL_READ_HANDLER(use_import_export_for_IPC)(
157+
void *ctx, umf_ctl_query_source_t source, void *arg, size_t size,
158+
umf_ctl_index_utlist_t *indexes) {
159+
(void)source, (void)indexes;
160+
161+
if (arg == NULL || size != sizeof(int)) {
162+
LOG_ERR("arg is NULL or size is not valid");
163+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
164+
}
165+
166+
int *arg_out = arg;
167+
ze_memory_provider_t *ze_provider = (ze_memory_provider_t *)ctx;
168+
*arg_out = ze_provider->use_import_export_for_IPC;
169+
return UMF_RESULT_SUCCESS;
170+
}
171+
172+
static umf_result_t CTL_WRITE_HANDLER(use_import_export_for_IPC)(
173+
void *ctx, umf_ctl_query_source_t source, void *arg, size_t size,
174+
umf_ctl_index_utlist_t *indexes) {
175+
(void)source, (void)indexes;
176+
177+
if (arg == NULL || size != sizeof(int)) {
178+
LOG_ERR("arg is NULL or size is not valid");
179+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
180+
}
181+
182+
int arg_in = *(int *)arg;
183+
ze_memory_provider_t *ze_provider = (ze_memory_provider_t *)ctx;
184+
ze_provider->use_import_export_for_IPC = arg_in;
185+
return UMF_RESULT_SUCCESS;
186+
}
187+
188+
static const struct ctl_argument
189+
CTL_ARG(use_import_export_for_IPC) = CTL_ARG_INT;
190+
191+
static const umf_ctl_node_t CTL_NODE(params)[] = {
192+
CTL_LEAF_RW(use_import_export_for_IPC), CTL_NODE_END};
193+
137194
static void initialize_ze_ctl(void) {
195+
CTL_REGISTER_MODULE(&ze_memory_ctl_root, params);
138196
CTL_REGISTER_MODULE(&ze_memory_ctl_root, stats);
139197
}
140198

@@ -268,6 +326,7 @@ umf_result_t umfLevelZeroMemoryProviderParamsCreate(
268326
params->resident_device_handles = NULL;
269327
params->resident_device_count = 0;
270328
params->freePolicy = UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_DEFAULT;
329+
params->use_import_export_for_IPC = 0; // disabled by default - use IPC
271330
params->device_ordinal = 0;
272331
strncpy(params->name, DEFAULT_NAME, sizeof(params->name) - 1);
273332
params->name[sizeof(params->name) - 1] = '\0';
@@ -421,11 +480,6 @@ static bool use_relaxed_allocation(ze_memory_provider_t *ze_provider,
421480
return size > ze_provider->device_properties.maxMemAllocSize;
422481
}
423482

424-
static ze_relaxed_allocation_limits_exp_desc_t relaxed_device_allocation_desc =
425-
{.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC,
426-
.pNext = NULL,
427-
.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE};
428-
429483
static umf_result_t ze_memory_provider_free_helper(void *provider, void *ptr,
430484
size_t bytes,
431485
int update_stats) {
@@ -483,11 +537,29 @@ static umf_result_t ze_memory_provider_alloc_helper(void *provider, size_t size,
483537
case UMF_MEMORY_TYPE_DEVICE: {
484538
ze_device_mem_alloc_desc_t dev_desc = {
485539
.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC,
486-
.pNext = use_relaxed_allocation(ze_provider, size)
487-
? &relaxed_device_allocation_desc
488-
: NULL,
540+
.pNext = NULL,
489541
.flags = 0,
490542
.ordinal = ze_provider->device_ordinal};
543+
void *lastNext = &dev_desc.pNext;
544+
545+
ze_relaxed_allocation_limits_exp_desc_t
546+
relaxed_device_allocation_desc_copy =
547+
relaxed_device_allocation_desc;
548+
if (use_relaxed_allocation(ze_provider, size)) {
549+
// add relaxed allocation desc to the pNext chain
550+
*(void **)lastNext = &relaxed_device_allocation_desc_copy;
551+
lastNext = &relaxed_device_allocation_desc_copy.pNext;
552+
}
553+
554+
// check if the allocation should use import / export mechanism
555+
ze_external_memory_export_desc_t memory_export_desc_copy =
556+
memory_export_desc;
557+
if (ze_provider->use_import_export_for_IPC == 1) {
558+
// add external memory export desc to the pNext chain
559+
*(void **)lastNext = &memory_export_desc_copy;
560+
lastNext = &memory_export_desc_copy.pNext;
561+
}
562+
491563
ze_result = g_ze_ops.zeMemAllocDevice(ze_provider->context, &dev_desc,
492564
size, alignment,
493565
ze_provider->device, resultPtr);
@@ -647,6 +719,8 @@ static umf_result_t ze_memory_provider_initialize(const void *params,
647719
ze_provider->memory_type = umf2ze_memory_type(ze_params->memory_type);
648720
ze_provider->freePolicyFlags =
649721
umfFreePolicyToZePolicy(ze_params->freePolicy);
722+
ze_provider->use_import_export_for_IPC =
723+
ze_params->use_import_export_for_IPC;
650724
ze_provider->min_page_size = 0;
651725
ze_provider->device_ordinal = ze_params->device_ordinal;
652726

@@ -812,6 +886,7 @@ static umf_result_t ze_memory_provider_allocation_split(void *provider,
812886

813887
typedef struct ze_ipc_data_t {
814888
int pid;
889+
size_t size;
815890
ze_ipc_mem_handle_t ze_handle;
816891
} ze_ipc_data_t;
817892

@@ -827,20 +902,46 @@ static umf_result_t ze_memory_provider_get_ipc_handle(void *provider,
827902
const void *ptr,
828903
size_t size,
829904
void *providerIpcData) {
830-
(void)size;
831-
832905
ze_result_t ze_result;
833906
ze_ipc_data_t *ze_ipc_data = (ze_ipc_data_t *)providerIpcData;
834907
struct ze_memory_provider_t *ze_provider =
835908
(struct ze_memory_provider_t *)provider;
836909

837-
ze_result = g_ze_ops.zeMemGetIpcHandle(ze_provider->context, ptr,
838-
&ze_ipc_data->ze_handle);
839-
if (ze_result != ZE_RESULT_SUCCESS) {
840-
LOG_ERR("zeMemGetIpcHandle() failed.");
841-
return ze2umf_result(ze_result);
910+
if (ze_provider->use_import_export_for_IPC == 0) {
911+
// default - IPC API
912+
ze_result = g_ze_ops.zeMemGetIpcHandle(ze_provider->context, ptr,
913+
&ze_ipc_data->ze_handle);
914+
if (ze_result != ZE_RESULT_SUCCESS) {
915+
LOG_ERR("zeMemGetIpcHandle() failed.");
916+
return ze2umf_result(ze_result);
917+
}
918+
} else {
919+
// import / export API (NOTE this requires additional flags enabled
920+
// during the memory allocation)
921+
ze_external_memory_export_fd_t fd_desc = {
922+
.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD,
923+
.pNext = NULL,
924+
.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32,
925+
.fd = 0};
926+
927+
ze_memory_allocation_properties_t mem_alloc_props = {
928+
.stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES,
929+
.pNext = &fd_desc,
930+
.type = 0,
931+
.id = 0,
932+
.pageSize = 0};
933+
934+
ze_result = g_ze_ops.zeMemGetAllocProperties(ze_provider->context, ptr,
935+
&mem_alloc_props, NULL);
936+
if (ze_result != ZE_RESULT_SUCCESS) {
937+
LOG_ERR("zeMemGetAllocProperties() failed.");
938+
return ze2umf_result(ze_result);
939+
}
940+
941+
memcpy(&ze_ipc_data->ze_handle, &fd_desc.fd, sizeof(fd_desc.fd));
842942
}
843943

944+
ze_ipc_data->size = size;
844945
ze_ipc_data->pid = utils_getpid();
845946

846947
return UMF_RESULT_SUCCESS;
@@ -891,14 +992,41 @@ static umf_result_t ze_memory_provider_open_ipc_handle(void *provider,
891992
memcpy(&ze_ipc_handle, &fd_local, sizeof(fd_local));
892993
}
893994

894-
ze_result = g_ze_ops.zeMemOpenIpcHandle(
895-
ze_provider->context, ze_provider->device, ze_ipc_handle, 0, ptr);
896-
if (fd_local != -1) {
897-
(void)utils_close_fd(fd_local);
898-
}
899-
if (ze_result != ZE_RESULT_SUCCESS) {
900-
LOG_ERR("zeMemOpenIpcHandle() failed.");
901-
return ze2umf_result(ze_result);
995+
if (ze_provider->use_import_export_for_IPC == 0) {
996+
// default - IPC API
997+
ze_result = g_ze_ops.zeMemOpenIpcHandle(
998+
ze_provider->context, ze_provider->device, ze_ipc_handle, 0, ptr);
999+
if (fd_local != -1) {
1000+
(void)utils_close_fd(fd_local);
1001+
}
1002+
if (ze_result != ZE_RESULT_SUCCESS) {
1003+
LOG_ERR("zeMemOpenIpcHandle() failed.");
1004+
return ze2umf_result(ze_result);
1005+
}
1006+
} else {
1007+
// import / export API
1008+
ze_external_memory_import_fd_t import_fd = {
1009+
.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD,
1010+
.pNext = NULL,
1011+
.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF,
1012+
.fd = fd_local};
1013+
1014+
ze_device_mem_alloc_desc_t alloc_desc = {
1015+
.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC,
1016+
.pNext = &import_fd,
1017+
.flags = 0,
1018+
.ordinal = 0};
1019+
ze_result = g_ze_ops.zeMemAllocDevice(ze_provider->context, &alloc_desc,
1020+
ze_ipc_data->size, 0,
1021+
ze_provider->device, ptr);
1022+
if (fd_local != -1) {
1023+
(void)utils_close_fd(fd_local);
1024+
}
1025+
1026+
if (ze_result != ZE_RESULT_SUCCESS) {
1027+
LOG_ERR("zeMemAllocDevice() failed.");
1028+
return ze2umf_result(ze_result);
1029+
}
9021030
}
9031031

9041032
return UMF_RESULT_SUCCESS;

src/utils/utils_windows_common.c

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <windows.h>
1111

1212
#include <assert.h>
13+
#include <handleapi.h>
1314
#include <processenv.h>
1415
#include <processthreadsapi.h>
1516
#include <stdio.h>
@@ -47,21 +48,49 @@ int utils_getpid(void) { return GetCurrentProcessId(); }
4748

4849
int utils_gettid(void) { return GetCurrentThreadId(); }
4950

50-
int utils_close_fd(int fd) {
51-
(void)fd; // unused
52-
return -1;
53-
}
51+
int utils_close_fd(int fd) { return CloseHandle((HANDLE)(uintptr_t)fd); }
5452

5553
umf_result_t utils_errno_to_umf_result(int err) {
5654
(void)err; // unused
5755
return UMF_RESULT_ERROR_NOT_SUPPORTED;
5856
}
5957

6058
umf_result_t utils_duplicate_fd(int pid, int fd_in, int *fd_out) {
61-
(void)pid; // unused
62-
(void)fd_in; // unused
63-
(void)fd_out; // unused
64-
return UMF_RESULT_ERROR_NOT_SUPPORTED;
59+
umf_result_t ret = UMF_RESULT_SUCCESS;
60+
HANDLE current_process_handle = GetCurrentProcess();
61+
if (!current_process_handle) {
62+
LOG_ERR("GetCurrentProcess() failed.");
63+
return UMF_RESULT_ERROR_UNKNOWN;
64+
}
65+
66+
HANDLE source_process_handle = OpenProcess(PROCESS_DUP_HANDLE, FALSE, pid);
67+
if (!source_process_handle) {
68+
LOG_ERR("OpenProcess() failed for pid=%d.", pid);
69+
ret = UMF_RESULT_ERROR_UNKNOWN;
70+
goto release_current;
71+
}
72+
73+
HANDLE handle_in = (HANDLE)(uintptr_t)fd_in;
74+
HANDLE handle_out = NULL;
75+
BOOL result = DuplicateHandle(source_process_handle, handle_in,
76+
current_process_handle, &handle_out,
77+
GENERIC_READ | GENERIC_WRITE, FALSE, 0);
78+
if (!result) {
79+
LOG_ERR("DuplicateHandle() failed for pid=%d fd_in=%d handle_in=%p",
80+
pid, fd_in, handle_in);
81+
ret = UMF_RESULT_ERROR_UNKNOWN;
82+
goto release_source;
83+
}
84+
85+
*fd_out = (int)(uintptr_t)handle_out;
86+
87+
release_source:
88+
CloseHandle(source_process_handle);
89+
90+
release_current:
91+
CloseHandle(current_process_handle);
92+
93+
return ret;
6594
}
6695

6796
umf_result_t utils_translate_mem_protection_flags(unsigned in_protection,

0 commit comments

Comments
 (0)