From 861b6a0a78ef8b5ec4697c619cae5d86f601e7ec Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 18 Dec 2024 12:12:00 +0000 Subject: [PATCH] UCP/GTEST: Workaround KSM ODP failure - 5 --- src/uct/ib/mlx5/dc/dc_mlx5.c | 4 ++++ src/uct/ib/mlx5/dc/dc_mlx5.h | 1 + src/uct/ib/mlx5/dc/dc_mlx5_ep.c | 22 +++++++++++++--------- src/uct/ib/mlx5/dc/dc_mlx5_ep.h | 5 ++++- src/uct/ib/mlx5/rc/rc_mlx5.h | 4 +++- src/uct/ib/mlx5/rc/rc_mlx5_ep.c | 22 ++++++++++++---------- src/uct/ib/mlx5/rc/rc_mlx5_iface.c | 2 ++ src/uct/ib/rc/verbs/rc_verbs.h | 3 ++- src/uct/ib/rc/verbs/rc_verbs_ep.c | 14 +++++++++++--- test/gtest/uct/ib/test_ib_md.cc | 20 ++++++++++++++------ 10 files changed, 66 insertions(+), 31 deletions(-) diff --git a/src/uct/ib/mlx5/dc/dc_mlx5.c b/src/uct/ib/mlx5/dc/dc_mlx5.c index 20db5709645..270f576d6c1 100644 --- a/src/uct/ib/mlx5/dc/dc_mlx5.c +++ b/src/uct/ib/mlx5/dc/dc_mlx5.c @@ -1067,6 +1067,10 @@ uct_dc_mlx5_iface_get_address(uct_iface_h tl_iface, uct_iface_addr_t *iface_addr addr->super.flags |= UCT_DC_MLX5_IFACE_ADDR_MAX_RD_ATOMIC_16; } + if (!md->config.enable_indirect_atomic) { + addr->super.flags |= UCT_DC_MLX5_IFACE_ADDR_NO_ATOMIC_OFFSET; + } + return UCS_OK; } diff --git a/src/uct/ib/mlx5/dc/dc_mlx5.h b/src/uct/ib/mlx5/dc/dc_mlx5.h index a2398832a32..3d5d2104866 100644 --- a/src/uct/ib/mlx5/dc/dc_mlx5.h +++ b/src/uct/ib/mlx5/dc/dc_mlx5.h @@ -73,6 +73,7 @@ typedef enum { UCT_DC_MLX5_IFACE_ADDR_DC_V2 = UCS_BIT(2), UCT_DC_MLX5_IFACE_ADDR_FLUSH_RKEY = UCS_BIT(3), UCT_DC_MLX5_IFACE_ADDR_MAX_RD_ATOMIC_16 = UCS_BIT(4), + UCT_DC_MLX5_IFACE_ADDR_NO_ATOMIC_OFFSET = UCS_BIT(5), UCT_DC_MLX5_IFACE_ADDR_DC_VERS = UCT_DC_MLX5_IFACE_ADDR_DC_V1 | UCT_DC_MLX5_IFACE_ADDR_DC_V2 } uct_dc_mlx5_iface_addr_flags_t; diff --git a/src/uct/ib/mlx5/dc/dc_mlx5_ep.c b/src/uct/ib/mlx5/dc/dc_mlx5_ep.c index 8a0a18d8e43..ff1b72a5513 100644 --- a/src/uct/ib/mlx5/dc/dc_mlx5_ep.c +++ b/src/uct/ib/mlx5/dc/dc_mlx5_ep.c @@ -24,13 +24,13 @@ uct_dc_mlx5_add_flush_remote(uct_dc_mlx5_ep_t *ep) } static UCS_F_ALWAYS_INLINE uint16_t -uct_dc_mlx5_atomic_offset(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep) +uct_dc_mlx5_atomic_offset(uct_dc_mlx5_ep_t *ep) { - if (uct_ib_iface_md(&iface->super.super.super)->config.enable_indirect_atomic) { - return uct_ib_md_atomic_offset(ep->atomic_mr_id); + if (ep->flags & UCT_DC_MLX5_EP_FLAG_NO_ATOMIC_OFFSET) { + return 0; } - return 0; + return uct_ib_md_atomic_offset(ep->atomic_mr_id); } static UCS_F_ALWAYS_INLINE size_t @@ -119,7 +119,7 @@ uct_dc_mlx5_iface_atomic_post(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep, uint64_t compare_mask, uint64_t compare, uint64_t swap_mask, uint64_t swap_add) { - uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(iface, ep); + uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(ep); uint32_t ib_rkey = uct_ib_resolve_atomic_rkey(rkey, atomic_mr_offset, &remote_addr); @@ -475,7 +475,7 @@ uct_dc_mlx5_ep_put_short_inline(uct_ep_h tl_ep, const void *buffer, { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); - uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(iface, ep); + uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(ep); size_t av_size; UCT_DC_MLX5_TXQP_DECL(txqp, txwq); @@ -505,7 +505,7 @@ ucs_status_t uct_dc_mlx5_ep_put_short(uct_ep_h tl_ep, const void *payload, #if HAVE_IBV_DM uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); - uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(iface, ep); + uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(ep); ucs_status_t status; size_t av_size; UCT_DC_MLX5_TXQP_DECL(txqp, txwq); @@ -545,7 +545,7 @@ ssize_t uct_dc_mlx5_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); - uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(iface, ep); + uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(ep); uct_rc_iface_send_desc_t *desc; size_t length; UCT_DC_MLX5_TXQP_DECL(txqp, txwq); @@ -569,7 +569,7 @@ ucs_status_t uct_dc_mlx5_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); - uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(iface, ep); + uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(ep); UCT_DC_MLX5_TXQP_DECL(txqp, txwq); UCT_CHECK_IOV_SIZE(iovcnt, UCT_RC_MLX5_RMA_MAX_IOV(UCT_IB_MLX5_AV_FULL_SIZE), @@ -1258,6 +1258,10 @@ UCS_CLASS_INIT_FUNC(uct_dc_mlx5_ep_t, uct_dc_mlx5_iface_t *iface, self->flush_rkey_hi = 0; } + if (if_addr->flags & UCT_DC_MLX5_IFACE_ADDR_NO_ATOMIC_OFFSET) { + self->flags |= UCT_DC_MLX5_EP_FLAG_NO_ATOMIC_OFFSET; + } + if ((iface->super.super.config.max_rd_atomic == 16) != !!(if_addr->flags & UCT_DC_MLX5_IFACE_ADDR_MAX_RD_ATOMIC_16)) { ucs_diag("max_rd_atomic values do not match on peers (local is %u), " diff --git a/src/uct/ib/mlx5/dc/dc_mlx5_ep.h b/src/uct/ib/mlx5/dc/dc_mlx5_ep.h index 05def9a2b75..6d1803ba8ce 100644 --- a/src/uct/ib/mlx5/dc/dc_mlx5_ep.h +++ b/src/uct/ib/mlx5/dc/dc_mlx5_ep.h @@ -49,9 +49,12 @@ enum uct_dc_mlx5_ep_flags { /* Flush remote operation should be invoked */ UCT_DC_MLX5_EP_FLAG_FLUSH_REMOTE = UCS_BIT(10), + /* No atomic offset on EP */ + UCT_DC_MLX5_EP_FLAG_NO_ATOMIC_OFFSET = UCS_BIT(11), + #if UCS_ENABLE_ASSERT /* EP was invalidated without DCI */ - UCT_DC_MLX5_EP_FLAG_INVALIDATED = UCS_BIT(11) + UCT_DC_MLX5_EP_FLAG_INVALIDATED = UCS_BIT(12) #else UCT_DC_MLX5_EP_FLAG_INVALIDATED = 0 #endif diff --git a/src/uct/ib/mlx5/rc/rc_mlx5.h b/src/uct/ib/mlx5/rc/rc_mlx5.h index c7bf4c89e11..5bead017a7b 100644 --- a/src/uct/ib/mlx5/rc/rc_mlx5.h +++ b/src/uct/ib/mlx5/rc/rc_mlx5.h @@ -27,7 +27,9 @@ enum { /* EP address includes flush_rkey value */ - UCT_RC_MLX5_EP_ADDR_FLAG_FLUSH_RKEY = UCS_BIT(0) + UCT_RC_MLX5_EP_ADDR_FLAG_FLUSH_RKEY = UCS_BIT(0), + /* atomic key has no offset */ + UCT_RC_MLX5_EP_ADDR_FLAG_NO_ATOMIC_OFFSET = UCS_BIT(1) }; diff --git a/src/uct/ib/mlx5/rc/rc_mlx5_ep.c b/src/uct/ib/mlx5/rc/rc_mlx5_ep.c index 81d22401690..2752fb3c90e 100644 --- a/src/uct/ib/mlx5/rc/rc_mlx5_ep.c +++ b/src/uct/ib/mlx5/rc/rc_mlx5_ep.c @@ -667,14 +667,17 @@ ucs_status_t uct_rc_mlx5_ep_get_address(uct_ep_h tl_ep, uct_ep_addr_t *addr) uct_ib_pack_uint24(rc_addr->tm_qp_num, ep->tm_qp.qp_num); } + ext_addr = ucs_derived_of(rc_addr, uct_rc_mlx5_ep_ext_address_t); if (uct_rc_iface_flush_rkey_enabled(&iface->super)) { - ext_addr = ucs_derived_of(rc_addr, - uct_rc_mlx5_ep_ext_address_t); ext_addr->flags = UCT_RC_MLX5_EP_ADDR_FLAG_FLUSH_RKEY; ptr = ext_addr + 1; *ucs_serialize_next(&ptr, uint16_t) = md->flush_rkey >> 16; } + if (!md->config.enable_indirect_atomic) { + ext_addr->flags |= UCT_RC_MLX5_EP_ADDR_FLAG_NO_ATOMIC_OFFSET; + } + return UCS_OK; } @@ -810,14 +813,9 @@ uct_rc_mlx5_ep_connect_to_ep_v2(uct_ep_h tl_ep, return status; } - if (uct_ib_iface_md(&iface->super.super)->config.enable_indirect_atomic) { - ep->super.super.atomic_mr_offset = uct_ib_md_atomic_offset( - rc_addr->atomic_mr_id); - } else { - ep->super.super.atomic_mr_offset = 0; - } - - ep->super.super.flags |= UCT_RC_EP_FLAG_CONNECTED; + ep->super.super.atomic_mr_offset = uct_ib_md_atomic_offset( + rc_addr->atomic_mr_id); + ep->super.super.flags |= UCT_RC_EP_FLAG_CONNECTED; addr_length = UCS_PARAM_VALUE(UCT_EP_CONNECT_TO_EP_PARAM_FIELD, params, ep_addr_length, EP_ADDR_LENGTH, @@ -837,6 +835,10 @@ uct_rc_mlx5_ep_connect_to_ep_v2(uct_ep_h tl_ep, ep->super.super.flush_rkey = UCT_IB_MD_INVALID_FLUSH_RKEY; } + if (ext_addr->flags & UCT_RC_MLX5_EP_ADDR_FLAG_NO_ATOMIC_OFFSET) { + ep->super.super.atomic_mr_offset = 0; + } + return UCS_OK; } diff --git a/src/uct/ib/mlx5/rc/rc_mlx5_iface.c b/src/uct/ib/mlx5/rc/rc_mlx5_iface.c index f8677511c29..20d15cf47bf 100644 --- a/src/uct/ib/mlx5/rc/rc_mlx5_iface.c +++ b/src/uct/ib/mlx5/rc/rc_mlx5_iface.c @@ -181,6 +181,8 @@ static ucs_status_t uct_rc_mlx5_iface_query(uct_iface_h tl_iface, uct_iface_attr if (uct_rc_iface_flush_rkey_enabled(&iface->super)) { ep_addr_len = sizeof(uct_rc_mlx5_ep_ext_address_t) + sizeof(uint16_t); + } else if (!uct_ib_iface_md(&rc_iface->super)->config.enable_indirect_atomic) { + ep_addr_len = sizeof(uct_rc_mlx5_ep_ext_address_t); } else { ep_addr_len = sizeof(uct_rc_mlx5_ep_address_t); } diff --git a/src/uct/ib/rc/verbs/rc_verbs.h b/src/uct/ib/rc/verbs/rc_verbs.h index 8b5c0e700b8..41e26129bc2 100644 --- a/src/uct/ib/rc/verbs/rc_verbs.h +++ b/src/uct/ib/rc/verbs/rc_verbs.h @@ -23,7 +23,8 @@ enum { - UCT_RC_VERBS_ADDR_HAS_ATOMIC_MR = UCS_BIT(0) + UCT_RC_VERBS_ADDR_HAS_ATOMIC_MR = UCS_BIT(0), + UCT_RC_VERBS_ADDR_NO_ATOMIC_OFFSET = UCS_BIT(1) }; diff --git a/src/uct/ib/rc/verbs/rc_verbs_ep.c b/src/uct/ib/rc/verbs/rc_verbs_ep.c index 72ae7cbf7ea..1088b234fff 100644 --- a/src/uct/ib/rc/verbs/rc_verbs_ep.c +++ b/src/uct/ib/rc/verbs/rc_verbs_ep.c @@ -595,6 +595,9 @@ ucs_status_t uct_rc_verbs_ep_get_address(uct_ep_h tl_ep, uct_ep_addr_t *addr) rc_addr->super.flags |= UCT_RC_VERBS_ADDR_HAS_ATOMIC_MR; rc_addr->atomic_mr_id = uct_ib_md_get_atomic_mr_id(md); rc_addr->flush_rkey_hi = md->flush_rkey >> 16; + if (!md->config.enable_indirect_atomic) { + rc_addr->super.flags |= UCT_RC_VERBS_ADDR_NO_ATOMIC_OFFSET; + } } return UCS_OK; } @@ -652,9 +655,14 @@ uct_rc_verbs_ep_connect_to_ep_v2(uct_ep_h tl_ep, } if (rc_addr->super.flags & UCT_RC_VERBS_ADDR_HAS_ATOMIC_MR) { - ep->super.atomic_mr_offset = uct_ib_md_atomic_offset(rc_addr->atomic_mr_id); - ep->super.flush_rkey = ((uint32_t)rc_addr->flush_rkey_hi << 16) + - ((uint32_t)rc_addr->atomic_mr_id << 8); + if (rc_addr->super.flags & UCT_RC_VERBS_ADDR_NO_ATOMIC_OFFSET) { + ep->super.atomic_mr_offset = 0; + } else { + ep->super.atomic_mr_offset = uct_ib_md_atomic_offset( + rc_addr->atomic_mr_id); + } + ep->super.flush_rkey = ((uint32_t)rc_addr->flush_rkey_hi << 16) + + ((uint32_t)rc_addr->atomic_mr_id << 8); } else { ep->super.atomic_mr_offset = 0; ep->super.flush_rkey = UCT_IB_MD_INVALID_FLUSH_RKEY; diff --git a/test/gtest/uct/ib/test_ib_md.cc b/test/gtest/uct/ib/test_ib_md.cc index 1138ba8a0bd..37636e7b770 100644 --- a/test/gtest/uct/ib/test_ib_md.cc +++ b/test/gtest/uct/ib/test_ib_md.cc @@ -35,7 +35,8 @@ class test_ib_md : public test_md #ifdef HAVE_MLX5_DV uint32_t m_mlx5_flags = 0; #endif - void check_mlx5_mr(uct_ib_mem_t *ib_memh, bool is_expected); + void check_mlx5_mr(uct_ib_mem_t *ib_memh, bool is_expected_to_have_atomic, + bool is_expected_to_have_auxiliary_key); }; void test_ib_md::init() { @@ -53,17 +54,23 @@ const uct_ib_md_t &test_ib_md::ib_md() const { return *ucs_derived_of(md(), uct_ib_md_t); } -void test_ib_md::check_mlx5_mr(uct_ib_mem_t *ib_memh, bool is_expected) +void test_ib_md::check_mlx5_mr(uct_ib_mem_t *ib_memh, + bool is_expected_to_have_atomic, + bool is_expected_to_have_auxiliary_key) { #if HAVE_DEVX uct_ib_mlx5_devx_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_mlx5_devx_mem_t); - if (is_expected) { + if (is_expected_to_have_atomic) { EXPECT_NE(nullptr, memh->atomic_dvmr); EXPECT_NE(UCT_IB_INVALID_MKEY, memh->atomic_rkey); } else { EXPECT_EQ(nullptr, memh->atomic_dvmr); - EXPECT_EQ(UCT_IB_INVALID_MKEY, memh->atomic_rkey); + if (is_expected_to_have_auxiliary_key) { + EXPECT_NE(UCT_IB_INVALID_MKEY, memh->atomic_rkey); + } else { + EXPECT_EQ(UCT_IB_INVALID_MKEY, memh->atomic_rkey); + } } EXPECT_EQ(nullptr, memh->smkey_mr); @@ -119,14 +126,15 @@ void test_ib_md::ib_md_umr_check(void *rkey_buffer, bool amo_access, EXPECT_FALSE(ib_memh->flags & UCT_IB_MEM_ACCESS_REMOTE_ATOMIC); } - check_mlx5_mr(ib_memh, false); + check_mlx5_mr(ib_memh, false, ib_md().relaxed_order); status = uct_md_mkey_pack(md(), memh, rkey_buffer); EXPECT_UCS_OK(status); status = uct_md_mkey_pack(md(), memh, rkey_buffer); EXPECT_UCS_OK(status); - check_mlx5_mr(ib_memh, (amo_access && has_ksm()) || ib_md().relaxed_order); + check_mlx5_mr(ib_memh, (amo_access && has_ksm()) || ib_md().relaxed_order, + ib_md().relaxed_order); status = uct_md_mem_dereg(md(), memh); EXPECT_UCS_OK(status);