From a04e4b118bb0a01bbcd8dc17a727e48e81009b8d Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Wed, 24 Jun 2026 18:39:36 -0400 Subject: [PATCH] rocr: Close shareable dmabuf fds after use instead of holding them open Defer creating the shareable dmabuf_fd until VMemorySetAccessPerHandle needs it for peer import, then close it again via a scope guard. This avoids leaking open fds for the lifetime of a VMM shareable handle while keeping fabric-handle export/import on the existing BO-based path. Co-authored-by: Cursor --- .../core/driver/kfd/amd_kfd_driver.cpp | 31 ++++++++-------- .../driver/virtio/amd_kfd_virtio_driver.cpp | 2 +- .../core/driver/xdna/amd_xdna_driver.cpp | 2 +- .../runtime/hsa-runtime/core/inc/driver.h | 5 ++- .../hsa-runtime/core/runtime/runtime.cpp | 37 +++++++++++++++++-- 5 files changed, 54 insertions(+), 23 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp index 0d97e589825..5f3c3ce8f6a 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp @@ -540,7 +540,7 @@ hsa_status_t KfdDriver::ImportMemoryHandle(const core::Agent& agent, core::Drive switch (type) { case core::ShareType::DMABUF_FD: { const auto& gpu_agent = static_cast(agent); - const int dmabuf_fd = *static_cast(import_handle); + const int dmabuf_fd = static_cast(import_handle)->dmabuf_fd; HsaHandleImportDesc desc = {}; desc.device_handle = gpu_agent.libThunkDev(); @@ -564,7 +564,7 @@ hsa_status_t KfdDriver::ImportMemoryHandle(const core::Agent& agent, core::Drive return HSA_STATUS_ERROR; #endif const auto& gpu_agent = static_cast(agent); - const auto fabric_handle = *static_cast(import_handle); + const auto fabric_handle = static_cast(import_handle)->fabric_handle; HsaHandleImportDesc desc = {}; desc.device_handle = gpu_agent.libThunkDev(); @@ -626,8 +626,9 @@ hsa_status_t KfdDriver::CreateShareableHandle(void* va, void* mem, size_t size, /* * On Linux, export via KFD first (EXPORT_MEMORY_FLAGS_KFD_DMABUF) so the KFD section of the - * AMDGPU driver has a BO entry, then import into DRM. Re-export from DRM for the shareable fd. - * On Windows, KFD export and DRM export are equivalent. + * AMDGPU driver has a BO entry, then import into DRM. The shareable dmabuf_fd itself is created + * lazily when access is set (see Runtime::VMemorySetAccessPerHandle), so it is not re-exported + * here. On Windows, KFD export and DRM export are equivalent. */ core::DriverMemoryHandle kfd_alloc = {}; @@ -639,9 +640,12 @@ hsa_status_t KfdDriver::CreateShareableHandle(void* va, void* mem, size_t size, return HSA_STATUS_ERROR; } + core::DriverMemoryHandle source_handle = {}; + source_handle.dmabuf_fd = source_fd; + core::DriverMemoryHandle targetHandle = {}; hsa_status_t ret = ImportMemoryHandle(agent, &targetHandle, core::ShareType::DMABUF_FD, - &source_fd, mem); + &source_handle, mem); #if defined(__linux__) rocr::os::DmaBufClose(source_fd); #endif @@ -649,22 +653,13 @@ hsa_status_t KfdDriver::CreateShareableHandle(void* va, void* mem, size_t size, return ret; assert(targetHandle.size == size); - int shareable_fd = source_fd; #if defined(__linux__) - // Re-export from DRM; the KFD fd was transient and is already closed. - ret = ExportMemoryHandle(agent, targetHandle, core::ShareType::DMABUF_FD, 0, - &shareable_fd); - if (ret != HSA_STATUS_SUCCESS) { - DestroyMemoryHandle(&targetHandle); - return ret; - } /* * We converted mem into a driver handle. The driver handle will keep the reference count * inside the KMD so we can free the original KFD allocation. */ if (HSAKMT_CALL(hsaKmtFreeMemory(mem, size)) != HSAKMT_STATUS_SUCCESS) { DestroyMemoryHandle(&targetHandle); - rocr::os::DmaBufClose(shareable_fd); return HSA_STATUS_ERROR; } #endif @@ -673,12 +668,15 @@ hsa_status_t KfdDriver::CreateShareableHandle(void* va, void* mem, size_t size, const auto memhandle = reinterpret_cast(targetHandle.handle); if (HSAKMT_CALL(hsaKmtMemoryGetCpuAddr(devhandle, memhandle, &handle->mmap_offset)) != HSAKMT_STATUS_SUCCESS) { DestroyMemoryHandle(&targetHandle); - rocr::os::DmaBufClose(shareable_fd); return HSA_STATUS_ERROR; } handle->handle = targetHandle.handle; - handle->dmabuf_fd = shareable_fd; + /* + * Do not hold a shareable dmabuf_fd open for the lifetime of the handle. It is created lazily + * (and closed again) when access is set in Runtime::VMemorySetAccessPerHandle. + */ + handle->dmabuf_fd = -1; handle->size = size; return HSA_STATUS_SUCCESS; } @@ -686,6 +684,7 @@ hsa_status_t KfdDriver::CreateShareableHandle(void* va, void* mem, size_t size, hsa_status_t KfdDriver::DestroyMemoryHandle(core::DriverMemoryHandle* handle) { if (handle->dmabuf_fd >= 0) { hsa_status_t status = rocr::os::DmaBufClose(handle->dmabuf_fd); + handle->dmabuf_fd = -1; if (status != HSA_STATUS_SUCCESS) return status; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/virtio/amd_kfd_virtio_driver.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/virtio/amd_kfd_virtio_driver.cpp index 017bbd41ac1..d9aa4bff734 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/virtio/amd_kfd_virtio_driver.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/virtio/amd_kfd_virtio_driver.cpp @@ -508,7 +508,7 @@ hsa_status_t KfdVirtioDriver::ImportMemoryHandle(const core::Agent& agent, core: switch (type) { case core::ShareType::DMABUF_FD: { - const int dmabuf_fd = *static_cast(import_handle); + const int dmabuf_fd = static_cast(import_handle)->dmabuf_fd; const auto& gpu_agent = static_cast(agent); amdgpu_bo_import_result res; auto ret = vamdgpu_bo_import( diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp index c9f3215c71b..22e076c0c20 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp @@ -860,7 +860,7 @@ hsa_status_t XdnaDriver::ImportMemoryHandle(const core::Agent& agent, core::Driv switch (type) { case core::ShareType::DMABUF_FD: { - const int dmabuf_fd = *static_cast(import_handle); + const int dmabuf_fd = static_cast(import_handle)->dmabuf_fd; drm_prime_handle import_params = {}; import_params.handle = AMDXDNA_INVALID_BO_HANDLE; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h index 9741b771c06..e3145a33499 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h @@ -246,8 +246,9 @@ class Driver { /// @param[out] handle handle to the imported memory; @p handle->size is set to the /// imported allocation size in bytes /// @param[in] type @ref ShareType to import - /// @param[in] import_handle input handle; @p int* for @p DMABUF_FD, - /// @p hsa_fabric_handle_t* for @p FABRIC_HANDLE + /// @param[in] import_handle input handle; @p DriverMemoryHandle* whose + /// @p dmabuf_fd field is read for @p DMABUF_FD and whose + /// @p fabric_handle field is read for @p FABRIC_HANDLE /// @param[in] mem address of existing buffer, used to bypass import virtual hsa_status_t ImportMemoryHandle(const core::Agent& agent, DriverMemoryHandle* handle, ShareType type, void* import_handle, diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index 63745b47527..9ba17fb55fa 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -4022,11 +4022,11 @@ Runtime::MappedHandleAllowedAgent::MappedHandleAllowedAgent( if (memHandle->imported && memHandle->is_fabric_handle) { status = targetAgent->driver().ImportMemoryHandle( *targetAgent, &driver_handle, ShareType::FABRIC_HANDLE, - &memHandle->driver_handle.fabric_handle); + &memHandle->driver_handle); } else { status = targetAgent->driver().ImportMemoryHandle( *targetAgent, &driver_handle, ShareType::DMABUF_FD, - &memHandle->driver_handle.dmabuf_fd); + &memHandle->driver_handle); } if (status != HSA_STATUS_SUCCESS) throw AMD::hsa_exception(status, "Failed to import memory"); @@ -4056,7 +4056,7 @@ hsa_status_t Runtime::MappedHandleAllowedAgent::EnableAccess(hsa_access_permissi #endif auto agentOwner = mappedHandle->mem_handle->agentOwner(); int mmap_fd = -1; - /* Do not check the return value of GetDeviceFd. We do not need mmap_fd so it is valid for mmap_fd to be -1*/ + /* Do not check the return value of GetDeviceFd. We do not need mmap_fd in some cases, so it is valid for mmap_fd to be -1*/ agentOwner->driver().GetDeviceFd(agentOwner->node_id(), &mmap_fd); if (!rocr::os::MapMemory(va, size, PermissionsToMemProt(perms), mmap_fd, @@ -4153,6 +4153,11 @@ Runtime::MemoryHandle::MemoryHandle(hsa_fabric_handle_t fabric_handle) Runtime::MemoryHandle::~MemoryHandle() { if (driver_handle.handle != 0 && region != nullptr) agentOwner()->driver().DestroyMemoryHandle(&driver_handle); + + if (driver_handle.dmabuf_fd >= 0) { + os::DmaBufClose(driver_handle.dmabuf_fd); + driver_handle.dmabuf_fd = -1; + } } @@ -4161,6 +4166,32 @@ hsa_status_t Runtime::VMemorySetAccessPerHandle(void *va, MappedHandle &mappedHandle, const hsa_amd_memory_access_desc_t *desc, const size_t desc_cnt) { + MemoryHandle *memHandle = mappedHandle.mem_handle; + + /* + * For locally-created shareable handles CreateShareableHandle leaves dmabuf_fd as -1 to avoid + * holding an fd open for the lifetime of the handle. Export it lazily here so the target agents + * can import the memory below, then close it again before returning. + */ + bool created_dmabuf_fd = false; + if (!memHandle->imported && memHandle->driver_handle.dmabuf_fd == -1) { + Agent *ownerAgent = memHandle->agentOwner(); + int dmabuf_fd = -1; + hsa_status_t status = ownerAgent->driver().ExportMemoryHandle( + *ownerAgent, memHandle->driver_handle, ShareType::DMABUF_FD, 0, &dmabuf_fd); + if (status != HSA_STATUS_SUCCESS) + return status; + memHandle->driver_handle.dmabuf_fd = dmabuf_fd; + created_dmabuf_fd = true; + } + + MAKE_SCOPE_GUARD([&]() { + if (created_dmabuf_fd) { + os::DmaBufClose(memHandle->driver_handle.dmabuf_fd); + memHandle->driver_handle.dmabuf_fd = -1; + } + }); + for (int i = 0; i < desc_cnt; i++) { Agent *targetAgent = Agent::Convert(desc[i].agent_handle);