Skip to content

Commit 7eb40c4

Browse files
committed
Do not call urDeviceRetain() in case of subdevices
urDeviceRetain(MDevice) should not be called in the constructor of device_impl in case of subdevices, because their reference counter does not drop to zero and subdevices are not destroyed nor freed now, what causes memory leaks. It fixes URT-961. Signed-off-by: Lukasz Dorau <[email protected]>
1 parent d4f5ac2 commit 7eb40c4

File tree

4 files changed

+34
-5
lines changed

4 files changed

+34
-5
lines changed

sycl/source/detail/device_impl.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,18 @@ namespace detail {
2222
/// Constructs a SYCL device instance using the provided
2323
/// UR device instance.
2424
device_impl::device_impl(ur_device_handle_t Device, platform_impl &Platform,
25-
device_impl::private_tag)
25+
device_impl::private_tag, bool IsSubDevice)
2626
: MDevice(Device), MPlatform(Platform),
2727
// No need to set MRootDevice when MAlwaysRootDevice is true
2828
MRootDevice(Platform.MAlwaysRootDevice
2929
? nullptr
3030
: get_info_impl<UR_DEVICE_INFO_PARENT_DEVICE>()),
3131
// TODO catch an exception and put it to list of asynchronous exceptions:
3232
MCache{*this} {
33+
if (IsSubDevice) {
34+
return;
35+
}
36+
3337
// Interoperability Constructor already calls DeviceRetain in
3438
// urDeviceCreateWithNativeHandle.
3539
getAdapter().call<UrApiKind::urDeviceRetain>(MDevice);
@@ -164,7 +168,7 @@ std::vector<device> device_impl::create_sub_devices(
164168
std::for_each(SubDevices.begin(), SubDevices.end(),
165169
[&res, this](const ur_device_handle_t &a_ur_device) {
166170
device sycl_device = detail::createSyclObjFromImpl<device>(
167-
MPlatform.getOrMakeDeviceImpl(a_ur_device));
171+
MPlatform.getOrMakeSubDeviceImpl(a_ur_device));
168172
res.push_back(sycl_device);
169173
});
170174
return res;

sycl/source/detail/device_impl.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -429,10 +429,11 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
429429
/// Constructs a SYCL device instance using the provided
430430
/// UR device instance.
431431
//
432-
// Must be called through `platform_impl::getOrMakeDeviceImpl` only.
432+
// Must be called through `platform_impl::getOrMakeDeviceImpl` or
433+
// `platform_impl::getOrMakeSubDeviceImpl` only.
433434
// `private_tag` ensures that is true.
434435
explicit device_impl(ur_device_handle_t Device, platform_impl &Platform,
435-
private_tag);
436+
private_tag, bool IsSubDevice);
436437

437438
~device_impl();
438439

sycl/source/detail/platform_impl.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,20 @@ device_impl &platform_impl::getOrMakeDeviceImpl(ur_device_handle_t UrDevice) {
306306

307307
// Otherwise make the impl
308308
MDevices.emplace_back(std::make_shared<device_impl>(
309-
UrDevice, *this, device_impl::private_tag{}));
309+
UrDevice, *this, device_impl::private_tag{}, false /*IsSubDevice*/));
310+
311+
return *MDevices.back();
312+
}
313+
314+
device_impl &platform_impl::getOrMakeSubDeviceImpl(ur_device_handle_t UrSubDevice) {
315+
const std::lock_guard<std::mutex> Guard(MDeviceMapMutex);
316+
// If we've already seen this device, return the impl
317+
if (device_impl *Result = getDeviceImplHelper(UrSubDevice))
318+
return *Result;
319+
320+
// Otherwise make the impl
321+
MDevices.emplace_back(std::make_shared<device_impl>(
322+
UrSubDevice, *this, device_impl::private_tag{}, true /*IsSubDevice*/));
310323

311324
return *MDevices.back();
312325
}

sycl/source/detail/platform_impl.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,17 @@ class platform_impl : public std::enable_shared_from_this<platform_impl> {
172172
/// \return a device_impl* corresponding to the device
173173
device_impl &getOrMakeDeviceImpl(ur_device_handle_t UrDevice);
174174

175+
/// Queries the device_impl cache to either return a shared_ptr
176+
/// for the device_impl corresponding to the UrSubDevice or add
177+
/// a new entry to the cache
178+
///
179+
/// \param UrSubDevice is the UrSubDevice whose impl is requested
180+
///
181+
/// \param PlatormImpl is the Platform for that Device
182+
///
183+
/// \return a device_impl* corresponding to the subdevice
184+
device_impl &getOrMakeSubDeviceImpl(ur_device_handle_t UrSubDevice);
185+
175186
/// Queries the cache to see if the specified UR platform has been seen
176187
/// before. If so, return the cached platform_impl, otherwise create a new
177188
/// one and cache it.

0 commit comments

Comments
 (0)