Skip to content

Commit 62ecb84

Browse files
JackAKirknpmillerJackAKirksteffenlarsen
authored
[SYCL][CUDA] Implement sycl_ext_oneapi_peer_access extension (#8303)
This implements the current extension doc from #6104 in the CUDA backend only. Fixes #7543. Fixes #6749. --------- Signed-off-by: JackAKirk <[email protected]> Co-authored-by: Nicolas Miller <[email protected]> Co-authored-by: JackAKirk <[email protected]> Co-authored-by: Steffen Larsen <[email protected]>
1 parent 1dc77a7 commit 62ecb84

30 files changed

+782
-9
lines changed

sycl/doc/extensions/proposed/sycl_ext_oneapi_peer_access.asciidoc sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc

+7-6
Original file line numberDiff line numberDiff line change
@@ -36,18 +36,19 @@ https://github.com/intel/llvm/issues
3636

3737
== Dependencies
3838

39-
This extension is written against the SYCL 2020 revision 6 specification. All
39+
This extension is written against the SYCL 2020 revision 7 specification. All
4040
references below to the "core SYCL specification" or to section numbers in the
4141
SYCL specification refer to that revision.
4242

4343
== Status
4444

45-
This is a proposed extension specification, intended to gather community
46-
feedback. Interfaces defined in this specification may not be implemented yet
47-
or may be in a preliminary state. The specification itself may also change in
48-
incompatible ways before it is finalized. *Shipping software products should
49-
not rely on APIs defined in this specification.*
45+
This extension is implemented and fully supported by DPC++.
5046

47+
== Backend support status
48+
49+
This extension is currently implemented in DPC++ for all GPU devices and
50+
backends, however, only the CUDA backend allows peer to peer memory access.
51+
Other backends report false from the `ext_oneapi_can_access_peer` query.
5152

5253
== Overview
5354

sycl/include/sycl/detail/pi.def

+4-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,6 @@ _PI_API(piPluginGetLastError)
145145

146146
_PI_API(piTearDown)
147147

148-
149148
_PI_API(piextUSMEnqueueFill2D)
150149
_PI_API(piextUSMEnqueueMemset2D)
151150
_PI_API(piextUSMEnqueueMemcpy2D)
@@ -158,6 +157,10 @@ _PI_API(piextEnqueueDeviceGlobalVariableRead)
158157

159158
_PI_API(piPluginGetBackendOption)
160159

160+
_PI_API(piextEnablePeerAccess)
161+
_PI_API(piextDisablePeerAccess)
162+
_PI_API(piextPeerAccessGetInfo)
163+
161164
// command-buffer Extension
162165
_PI_API(piextCommandBufferCreate)
163166
_PI_API(piextCommandBufferRetain)

sycl/include/sycl/detail/pi.h

+21-1
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,11 @@
9797
// 14.33 Added new parameter (memory object properties) to
9898
// piextKernelSetArgMemObj
9999
// 14.34 Added command-buffer extension methods
100+
// 14.35 Added piextEnablePeerAccess, piextDisablePeerAccess,
101+
// piextPeerAccessGetInfo, and pi_peer_attr enum.
100102

101103
#define _PI_H_VERSION_MAJOR 14
102-
#define _PI_H_VERSION_MINOR 34
104+
#define _PI_H_VERSION_MINOR 35
103105

104106
#define _PI_STRING_HELPER(a) #a
105107
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
@@ -1030,7 +1032,17 @@ using pi_image_desc = _pi_image_desc;
10301032

10311033
typedef enum { PI_MEM_CONTEXT = 0x1106, PI_MEM_SIZE = 0x1102 } _pi_mem_info;
10321034

1035+
typedef enum {
1036+
PI_PEER_ACCESS_SUPPORTED =
1037+
0x0, ///< returns a uint32_t: 1 if P2P Access is supported
1038+
///< otherwise P2P Access is not supported.
1039+
PI_PEER_ATOMICS_SUPPORTED =
1040+
0x1 ///< returns a uint32_t: 1 if Atomic operations are supported over the
1041+
///< P2P link, otherwise such operations are not supported.
1042+
} _pi_peer_attr;
1043+
10331044
using pi_mem_info = _pi_mem_info;
1045+
using pi_peer_attr = _pi_peer_attr;
10341046

10351047
//
10361048
// Following section contains SYCL RT Plugin Interface (PI) functions.
@@ -1088,6 +1100,14 @@ __SYCL_EXPORT pi_result piDevicesGet(pi_platform platform,
10881100
pi_uint32 num_entries, pi_device *devices,
10891101
pi_uint32 *num_devices);
10901102

1103+
__SYCL_EXPORT pi_result piextEnablePeerAccess(pi_device command_device,
1104+
pi_device peer_device);
1105+
__SYCL_EXPORT pi_result piextDisablePeerAccess(pi_device command_device,
1106+
pi_device peer_device);
1107+
__SYCL_EXPORT pi_result piextPeerAccessGetInfo(
1108+
pi_device command_device, pi_device peer_device, pi_peer_attr attr,
1109+
size_t param_value_size, void *param_value, size_t *param_value_size_ret);
1110+
10911111
/// Returns requested info for provided native device
10921112
/// Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT for
10931113
/// PI_DEVICE_INFO_EXTENSIONS query when the device supports native asserts

sycl/include/sycl/detail/pi.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ using PiKernelCacheConfig = ::pi_kernel_cache_config;
155155
using PiExtSyncPoint = ::pi_ext_sync_point;
156156
using PiExtCommandBuffer = ::pi_ext_command_buffer;
157157
using PiExtCommandBufferDesc = ::pi_ext_command_buffer_desc;
158+
using PiPeerAttr = ::pi_peer_attr;
158159

159160
__SYCL_EXPORT void contextSetExtendedDeleter(const sycl::context &constext,
160161
pi_context_extended_deleter func,

sycl/include/sycl/device.hpp

+13
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ enum class aspect;
4141
namespace ext::oneapi {
4242
// Forward declaration
4343
class filter_selector;
44+
45+
enum class peer_access {
46+
access_supported = 0x0,
47+
atomics_supported = 0x1,
48+
};
49+
4450
} // namespace ext::oneapi
4551

4652
/// The SYCL device class encapsulates a single SYCL device on which kernels
@@ -90,6 +96,13 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase<device> {
9096

9197
device &operator=(device &&rhs) = default;
9298

99+
void ext_oneapi_enable_peer_access(const device &peer);
100+
void ext_oneapi_disable_peer_access(const device &peer);
101+
bool
102+
ext_oneapi_can_access_peer(const device &peer,
103+
ext::oneapi::peer_access value =
104+
ext::oneapi::peer_access::access_supported);
105+
93106
/// Get instance of device
94107
///
95108
/// \return a valid cl_device_id instance in accordance with the requirements

sycl/plugins/cuda/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ add_sycl_plugin(cuda
8181
"../unified_runtime/ur/adapters/cuda/usm.cpp"
8282
"../unified_runtime/ur/adapters/cuda/command_buffer.hpp"
8383
"../unified_runtime/ur/adapters/cuda/command_buffer.cpp"
84+
"../unified_runtime/ur/adapters/cuda/usm_p2p.cpp"
8485
# ---
8586
"${sycl_inc_dir}/sycl/detail/pi.h"
8687
"${sycl_inc_dir}/sycl/detail/pi.hpp"

sycl/plugins/cuda/pi_cuda.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,10 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
198198
_PI_CL(piextCommandBufferMemBufferCopyRect,
199199
pi2ur::piextCommandBufferMemBufferCopyRect)
200200
_PI_CL(piextEnqueueCommandBuffer, pi2ur::piextEnqueueCommandBuffer)
201-
201+
// Peer to Peer
202+
_PI_CL(piextEnablePeerAccess, pi2ur::piextEnablePeerAccess)
203+
_PI_CL(piextDisablePeerAccess, pi2ur::piextDisablePeerAccess)
204+
_PI_CL(piextPeerAccessGetInfo, pi2ur::piextPeerAccessGetInfo)
202205
#undef _PI_CL
203206

204207
return PI_SUCCESS;

sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp

+37
Original file line numberDiff line numberDiff line change
@@ -2241,6 +2241,43 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
22412241
return PI_SUCCESS;
22422242
}
22432243

2244+
pi_result piextEnablePeerAccess(pi_device command_device,
2245+
pi_device peer_device) {
2246+
2247+
std::ignore = command_device;
2248+
std::ignore = peer_device;
2249+
2250+
setErrorMessage("piextEnablePeerAccess not "
2251+
"implemented in esimd_emulator backend",
2252+
PI_ERROR_PLUGIN_SPECIFIC_ERROR);
2253+
return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
2254+
}
2255+
2256+
pi_result piextDisablePeerAccess(pi_device command_device,
2257+
pi_device peer_device) {
2258+
2259+
std::ignore = command_device;
2260+
std::ignore = peer_device;
2261+
2262+
setErrorMessage("piextDisablePeerAccess not "
2263+
"implemented in esimd_emulator backend",
2264+
PI_ERROR_PLUGIN_SPECIFIC_ERROR);
2265+
return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
2266+
}
2267+
2268+
pi_result piextPeerAccessGetInfo(pi_device command_device,
2269+
pi_device peer_device, pi_peer_attr attr,
2270+
size_t ParamValueSize, void *ParamValue,
2271+
size_t *ParamValueSizeRet) {
2272+
std::ignore = command_device;
2273+
std::ignore = peer_device;
2274+
std::ignore = attr;
2275+
2276+
ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet);
2277+
// Zero return value indicates that all of the queries currently return false.
2278+
return ReturnValue(pi_int32{0});
2279+
}
2280+
22442281
#ifdef _WIN32
22452282
#define __SYCL_PLUGIN_DLL_NAME "pi_esimd_emulator.dll"
22462283
#include "../common_win_pi_trace/common_win_pi_trace.hpp"

sycl/plugins/hip/pi_hip.cpp

+41
Original file line numberDiff line numberDiff line change
@@ -5857,6 +5857,42 @@ pi_result hip_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime,
58575857
return PI_SUCCESS;
58585858
}
58595859

5860+
pi_result hip_piextEnablePeerAccess(pi_device command_device,
5861+
pi_device peer_device) {
5862+
5863+
std::ignore = command_device;
5864+
std::ignore = peer_device;
5865+
5866+
setErrorMessage("piextEnablePeerAccess not "
5867+
"implemented in hip backend",
5868+
PI_ERROR_PLUGIN_SPECIFIC_ERROR);
5869+
return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
5870+
}
5871+
5872+
pi_result hip_piextDisablePeerAccess(pi_device command_device,
5873+
pi_device peer_device) {
5874+
5875+
std::ignore = command_device;
5876+
std::ignore = peer_device;
5877+
5878+
setErrorMessage("piextDisablePeerAccess not "
5879+
"implemented in hip backend",
5880+
PI_ERROR_PLUGIN_SPECIFIC_ERROR);
5881+
return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
5882+
}
5883+
5884+
pi_result hip_piextPeerAccessGetInfo(pi_device command_device,
5885+
pi_device peer_device, pi_peer_attr attr,
5886+
size_t param_value_size, void *param_value,
5887+
size_t *param_value_size_ret) {
5888+
std::ignore = command_device;
5889+
std::ignore = peer_device;
5890+
std::ignore = attr;
5891+
// Zero return value indicates that all of the queries currently return false.
5892+
return getInfo(param_value_size, param_value, param_value_size_ret,
5893+
pi_int32{0});
5894+
}
5895+
58605896
const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING;
58615897

58625898
pi_result piPluginInit(pi_plugin *PluginInit) {
@@ -6028,6 +6064,11 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
60286064
_PI_CL(piGetDeviceAndHostTimer, hip_piGetDeviceAndHostTimer)
60296065
_PI_CL(piPluginGetBackendOption, hip_piPluginGetBackendOption)
60306066

6067+
// Peer to Peer
6068+
_PI_CL(piextEnablePeerAccess, hip_piextEnablePeerAccess)
6069+
_PI_CL(piextDisablePeerAccess, hip_piextDisablePeerAccess)
6070+
_PI_CL(piextPeerAccessGetInfo, hip_piextPeerAccessGetInfo)
6071+
60316072
#undef _PI_CL
60326073

60336074
return PI_SUCCESS;

sycl/plugins/level_zero/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ add_sycl_plugin(level_zero
127127
"../unified_runtime/ur/adapters/level_zero/queue.cpp"
128128
"../unified_runtime/ur/adapters/level_zero/sampler.cpp"
129129
"../unified_runtime/ur/adapters/level_zero/usm.cpp"
130+
"../unified_runtime/ur/adapters/level_zero/usm_p2p.cpp"
130131
# Following are the PI Level-Zero Plugin only codes.
131132
"pi_level_zero.cpp"
132133
"pi_level_zero.hpp"

sycl/plugins/level_zero/pi_level_zero.cpp

+22
Original file line numberDiff line numberDiff line change
@@ -1226,6 +1226,28 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime,
12261226
return pi2ur::piGetDeviceAndHostTimer(Device, DeviceTime, HostTime);
12271227
}
12281228

1229+
pi_result piextEnablePeerAccess(pi_device command_device,
1230+
pi_device peer_device) {
1231+
1232+
return pi2ur::piextEnablePeerAccess(command_device, peer_device);
1233+
}
1234+
1235+
pi_result piextDisablePeerAccess(pi_device command_device,
1236+
pi_device peer_device) {
1237+
1238+
return pi2ur::piextDisablePeerAccess(command_device, peer_device);
1239+
}
1240+
1241+
pi_result piextPeerAccessGetInfo(pi_device command_device,
1242+
pi_device peer_device, pi_peer_attr attr,
1243+
size_t ParamValueSize, void *ParamValue,
1244+
size_t *ParamValueSizeRet) {
1245+
1246+
return pi2ur::piextPeerAccessGetInfo(command_device, peer_device, attr,
1247+
ParamValueSize, ParamValue,
1248+
ParamValueSizeRet);
1249+
}
1250+
12291251
#ifdef _WIN32
12301252
#define __SYCL_PLUGIN_DLL_NAME "pi_level_zero.dll"
12311253
#include "../common_win_pi_trace/common_win_pi_trace.hpp"

sycl/plugins/unified_runtime/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ add_sycl_library("ur_adapter_level_zero" SHARED
110110
"ur/adapters/level_zero/queue.cpp"
111111
"ur/adapters/level_zero/sampler.cpp"
112112
"ur/adapters/level_zero/usm.cpp"
113+
"ur/adapters/level_zero/usm_p2p.cpp"
113114
INCLUDE_DIRS
114115
${sycl_inc_dir}
115116
LIBRARIES
@@ -158,6 +159,7 @@ if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS)
158159
"ur/adapters/cuda/usm.cpp"
159160
"ur/adapters/cuda/command_buffer.hpp"
160161
"ur/adapters/cuda/command_buffer.cpp"
162+
"ur/adapters/cuda/usm_p2p.cpp"
161163
INCLUDE_DIRS
162164
${sycl_inc_dir}
163165
LIBRARIES

sycl/plugins/unified_runtime/pi2ur.hpp

+55
Original file line numberDiff line numberDiff line change
@@ -4322,4 +4322,59 @@ inline pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer,
43224322
// Command-buffer extension
43234323
///////////////////////////////////////////////////////////////////////////////
43244324

4325+
///////////////////////////////////////////////////////////////////////////////
4326+
// usm-p2p
4327+
4328+
inline pi_result piextEnablePeerAccess(pi_device command_device,
4329+
pi_device peer_device) {
4330+
auto commandDevice = reinterpret_cast<ur_device_handle_t>(command_device);
4331+
auto peerDevice = reinterpret_cast<ur_device_handle_t>(peer_device);
4332+
4333+
HANDLE_ERRORS(urUsmP2PEnablePeerAccessExp(commandDevice, peerDevice));
4334+
4335+
return PI_SUCCESS;
4336+
}
4337+
4338+
inline pi_result piextDisablePeerAccess(pi_device command_device,
4339+
pi_device peer_device) {
4340+
auto commandDevice = reinterpret_cast<ur_device_handle_t>(command_device);
4341+
auto peerDevice = reinterpret_cast<ur_device_handle_t>(peer_device);
4342+
4343+
HANDLE_ERRORS(urUsmP2PDisablePeerAccessExp(commandDevice, peerDevice));
4344+
4345+
return PI_SUCCESS;
4346+
}
4347+
4348+
inline pi_result
4349+
piextPeerAccessGetInfo(pi_device command_device, pi_device peer_device,
4350+
pi_peer_attr attr, size_t param_value_size,
4351+
void *param_value, size_t *param_value_size_ret) {
4352+
auto commandDevice = reinterpret_cast<ur_device_handle_t>(command_device);
4353+
auto peerDevice = reinterpret_cast<ur_device_handle_t>(peer_device);
4354+
4355+
ur_exp_peer_info_t propName;
4356+
switch (attr) {
4357+
case PI_PEER_ACCESS_SUPPORTED: {
4358+
propName = UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED;
4359+
break;
4360+
}
4361+
case PI_PEER_ATOMICS_SUPPORTED: {
4362+
propName = UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED;
4363+
break;
4364+
}
4365+
default: {
4366+
return PI_ERROR_INVALID_VALUE;
4367+
}
4368+
}
4369+
4370+
HANDLE_ERRORS(urUsmP2PPeerAccessGetInfoExp(
4371+
commandDevice, peerDevice, propName, param_value_size, param_value,
4372+
param_value_size_ret));
4373+
4374+
return PI_SUCCESS;
4375+
}
4376+
4377+
// usm-p2p
4378+
///////////////////////////////////////////////////////////////////////////////
4379+
43254380
} // namespace pi2ur

sycl/plugins/unified_runtime/pi_unified_runtime.cpp

+25
Original file line numberDiff line numberDiff line change
@@ -1101,6 +1101,26 @@ __SYCL_EXPORT pi_result piPluginGetBackendOption(pi_platform platform,
11011101
backend_option);
11021102
}
11031103

1104+
__SYCL_EXPORT pi_result piextEnablePeerAccess(pi_device command_device,
1105+
pi_device peer_device) {
1106+
1107+
return pi2ur::piextEnablePeerAccess(command_device, peer_device);
1108+
}
1109+
1110+
__SYCL_EXPORT pi_result piextDisablePeerAccess(pi_device command_device,
1111+
pi_device peer_device) {
1112+
1113+
return pi2ur::piextDisablePeerAccess(command_device, peer_device);
1114+
}
1115+
1116+
__SYCL_EXPORT pi_result piextPeerAccessGetInfo(
1117+
pi_device command_device, pi_device peer_device, pi_peer_attr attr,
1118+
size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) {
1119+
return pi2ur::piextPeerAccessGetInfo(command_device, peer_device, attr,
1120+
ParamValueSize, ParamValue,
1121+
ParamValueSizeRet);
1122+
}
1123+
11041124
// This interface is not in Unified Runtime currently
11051125
__SYCL_EXPORT pi_result piTearDown(void *PluginParameter) {
11061126
return pi2ur::piTearDown(PluginParameter);
@@ -1251,6 +1271,11 @@ __SYCL_EXPORT pi_result piPluginInit(pi_plugin *PluginInit) {
12511271
_PI_API(piSamplerRetain)
12521272
_PI_API(piSamplerRelease)
12531273

1274+
// Peer to Peer
1275+
_PI_API(piextEnablePeerAccess)
1276+
_PI_API(piextDisablePeerAccess)
1277+
_PI_API(piextPeerAccessGetInfo)
1278+
12541279
_PI_API(piextPluginGetOpaqueData)
12551280
_PI_API(piTearDown)
12561281

0 commit comments

Comments
 (0)