From 8d9b4207e75888aa5c24b6ae256a279e04cd3433 Mon Sep 17 00:00:00 2001 From: Jeeja KP Date: Thu, 8 Jan 2026 16:51:45 +0200 Subject: [PATCH] Fix multi-rail map for endpoints/rail using unordered_map does not allow duplicates, so the device_to_rail_map had only the last rail add to map. This will result in always mapping to one rail. Fix this to add all rails for device to the device_to_rail_map list. Signed-off-by: Jeeja KP --- .../libfabric/libfabric_rail_manager.cpp | 30 +++++++++++-------- src/utils/libfabric/libfabric_rail_manager.h | 2 +- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/utils/libfabric/libfabric_rail_manager.cpp b/src/utils/libfabric/libfabric_rail_manager.cpp index e2d639c6f5..5fa4a28a06 100644 --- a/src/utils/libfabric/libfabric_rail_manager.cpp +++ b/src/utils/libfabric/libfabric_rail_manager.cpp @@ -91,7 +91,7 @@ nixlLibfabricRailManager::createDataRails(const std::vector &fabric fabric_devices[i], provider_name, static_cast(i))); // Initialize fabric device mapping - device_to_rail_map[fabric_devices[i]] = i; + device_to_rail_map[fabric_devices[i]].push_back(i); NIXL_DEBUG << "Created data rail " << i << " (device: " << fabric_devices[i] << ", provider: " << provider_name << ")"; @@ -328,19 +328,22 @@ nixlLibfabricRailManager::selectRailsForMemory(void *mem_addr, for (const std::string &device_name : gpu_nics) { auto it = device_to_rail_map.find(device_name); if (it != device_to_rail_map.end()) { - // Bounds check: ensure rail index is valid - if (it->second < data_rails_.size()) { - gpu_rails.push_back(it->second); - NIXL_DEBUG << "VRAM memory " << mem_addr << " on GPU " << gpu_id - << " mapped to rail " << it->second << " (fabric device: " << device_name - << ")"; - } else { - NIXL_WARN << "Fabric device " << device_name << " maps to rail " << it->second - << " but only " << data_rails_.size() << " rails available"; + const std::vector &rails = it->second; + for (size_t rail_id : rails) { + // Bounds check: ensure rail index is valid + if (rail_id < data_rails_.size()) { + gpu_rails.push_back(rail_id); + NIXL_DEBUG << "VRAM memory " << mem_addr << " on GPU " << gpu_id + << " mapped to rail " << rail_id + << " (fabric device: " << device_name << ")"; + } else { + NIXL_WARN << "Fabric device " << device_name << " maps to rail " << rail_id + << " but only " << data_rails_.size() << " rails available"; + } } } else { - NIXL_WARN << "Fabric device " << device_name << " not found in rail mapping for GPU " - << gpu_id; + NIXL_WARN << "Fabric device " << device_name + << " not found in rail mapping for GPU " << gpu_id; } } @@ -420,7 +423,8 @@ nixlLibfabricRailManager::registerMemory(void *buffer, struct fid_mr *mr; uint64_t key; - nixl_status_t status = data_rails_[rail_idx]->registerMemory(buffer, length, hmem_hint, gpu_id, &mr, &key); + nixl_status_t status = + data_rails_[rail_idx]->registerMemory(buffer, length, hmem_hint, gpu_id, &mr, &key); if (status != NIXL_SUCCESS) { NIXL_ERROR << "Failed to register memory on rail " << rail_idx; // Cleanup already registered MRs diff --git a/src/utils/libfabric/libfabric_rail_manager.h b/src/utils/libfabric/libfabric_rail_manager.h index fe68e962d0..c8cb290c7d 100644 --- a/src/utils/libfabric/libfabric_rail_manager.h +++ b/src/utils/libfabric/libfabric_rail_manager.h @@ -314,7 +314,7 @@ class nixlLibfabricRailManager { std::unique_ptr topology; // Fabric device to rail mapping - std::unordered_map device_to_rail_map; + std::unordered_map> device_to_rail_map; // Active Rail Tracking System std::unordered_set active_rails_;