Skip to content

Commit

Permalink
UCP/GTEST: Workaround KSM ODP failure - 5
Browse files Browse the repository at this point in the history
  • Loading branch information
Artemy-Mellanox committed Dec 19, 2024
1 parent e0e2d75 commit dda4a34
Show file tree
Hide file tree
Showing 10 changed files with 65 additions and 31 deletions.
4 changes: 4 additions & 0 deletions src/uct/ib/mlx5/dc/dc_mlx5.c
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,10 @@ uct_dc_mlx5_iface_get_address(uct_iface_h tl_iface, uct_iface_addr_t *iface_addr
addr->super.flags |= UCT_DC_MLX5_IFACE_ADDR_MAX_RD_ATOMIC_16;
}

if (!md->config.enable_indirect_atomic) {
addr->super.flags |= UCT_DC_MLX5_IFACE_ADDR_NO_ATOMIC_OFFSET;
}

return UCS_OK;
}

Expand Down
1 change: 1 addition & 0 deletions src/uct/ib/mlx5/dc/dc_mlx5.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ typedef enum {
UCT_DC_MLX5_IFACE_ADDR_DC_V2 = UCS_BIT(2),
UCT_DC_MLX5_IFACE_ADDR_FLUSH_RKEY = UCS_BIT(3),
UCT_DC_MLX5_IFACE_ADDR_MAX_RD_ATOMIC_16 = UCS_BIT(4),
UCT_DC_MLX5_IFACE_ADDR_NO_ATOMIC_OFFSET = UCS_BIT(5),
UCT_DC_MLX5_IFACE_ADDR_DC_VERS = UCT_DC_MLX5_IFACE_ADDR_DC_V1 |
UCT_DC_MLX5_IFACE_ADDR_DC_V2
} uct_dc_mlx5_iface_addr_flags_t;
Expand Down
22 changes: 13 additions & 9 deletions src/uct/ib/mlx5/dc/dc_mlx5_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ uct_dc_mlx5_add_flush_remote(uct_dc_mlx5_ep_t *ep)
}

static UCS_F_ALWAYS_INLINE uint16_t
uct_dc_mlx5_atomic_offset(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep)
uct_dc_mlx5_atomic_offset(uct_dc_mlx5_ep_t *ep)
{
if (uct_ib_iface_md(&iface->super.super.super)->config.enable_indirect_atomic) {
return uct_ib_md_atomic_offset(ep->atomic_mr_id);
if (ep->flags & UCT_DC_MLX5_EP_FLAG_NO_ATOMIC_OFFSET) {
return 0;
}

return 0;
return uct_ib_md_atomic_offset(ep->atomic_mr_id);
}

static UCS_F_ALWAYS_INLINE size_t
Expand Down Expand Up @@ -119,7 +119,7 @@ uct_dc_mlx5_iface_atomic_post(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep,
uint64_t compare_mask, uint64_t compare,
uint64_t swap_mask, uint64_t swap_add)
{
uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(iface, ep);
uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(ep);
uint32_t ib_rkey = uct_ib_resolve_atomic_rkey(rkey,
atomic_mr_offset,
&remote_addr);
Expand Down Expand Up @@ -475,7 +475,7 @@ uct_dc_mlx5_ep_put_short_inline(uct_ep_h tl_ep, const void *buffer,
{
uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t);
uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t);
uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(iface, ep);
uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(ep);
size_t av_size;
UCT_DC_MLX5_TXQP_DECL(txqp, txwq);

Expand Down Expand Up @@ -505,7 +505,7 @@ ucs_status_t uct_dc_mlx5_ep_put_short(uct_ep_h tl_ep, const void *payload,
#if HAVE_IBV_DM
uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t);
uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t);
uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(iface, ep);
uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(ep);
ucs_status_t status;
size_t av_size;
UCT_DC_MLX5_TXQP_DECL(txqp, txwq);
Expand Down Expand Up @@ -545,7 +545,7 @@ ssize_t uct_dc_mlx5_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb,
{
uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t);
uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t);
uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(iface, ep);
uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(ep);
uct_rc_iface_send_desc_t *desc;
size_t length;
UCT_DC_MLX5_TXQP_DECL(txqp, txwq);
Expand All @@ -569,7 +569,7 @@ ucs_status_t uct_dc_mlx5_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size
{
uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t);
uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t);
uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(iface, ep);
uint16_t atomic_mr_offset = uct_dc_mlx5_atomic_offset(ep);
UCT_DC_MLX5_TXQP_DECL(txqp, txwq);

UCT_CHECK_IOV_SIZE(iovcnt, UCT_RC_MLX5_RMA_MAX_IOV(UCT_IB_MLX5_AV_FULL_SIZE),
Expand Down Expand Up @@ -1258,6 +1258,10 @@ UCS_CLASS_INIT_FUNC(uct_dc_mlx5_ep_t, uct_dc_mlx5_iface_t *iface,
self->flush_rkey_hi = 0;
}

if (if_addr->flags & UCT_DC_MLX5_IFACE_ADDR_NO_ATOMIC_OFFSET) {
self->flags |= UCT_DC_MLX5_EP_FLAG_NO_ATOMIC_OFFSET;
}

if ((iface->super.super.config.max_rd_atomic == 16) !=
!!(if_addr->flags & UCT_DC_MLX5_IFACE_ADDR_MAX_RD_ATOMIC_16)) {
ucs_diag("max_rd_atomic values do not match on peers (local is %u), "
Expand Down
5 changes: 4 additions & 1 deletion src/uct/ib/mlx5/dc/dc_mlx5_ep.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,12 @@ enum uct_dc_mlx5_ep_flags {
/* Flush remote operation should be invoked */
UCT_DC_MLX5_EP_FLAG_FLUSH_REMOTE = UCS_BIT(10),

/* No atomic offset on EP */
UCT_DC_MLX5_EP_FLAG_NO_ATOMIC_OFFSET = UCS_BIT(11),

#if UCS_ENABLE_ASSERT
/* EP was invalidated without DCI */
UCT_DC_MLX5_EP_FLAG_INVALIDATED = UCS_BIT(11)
UCT_DC_MLX5_EP_FLAG_INVALIDATED = UCS_BIT(12)
#else
UCT_DC_MLX5_EP_FLAG_INVALIDATED = 0
#endif
Expand Down
4 changes: 3 additions & 1 deletion src/uct/ib/mlx5/rc/rc_mlx5.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@

enum {
/* EP address includes flush_rkey value */
UCT_RC_MLX5_EP_ADDR_FLAG_FLUSH_RKEY = UCS_BIT(0)
UCT_RC_MLX5_EP_ADDR_FLAG_FLUSH_RKEY = UCS_BIT(0),
/* atomic key has no offset */
UCT_RC_MLX5_EP_ADDR_FLAG_NO_ATOMIC_OFFSET = UCS_BIT(1)
};


Expand Down
22 changes: 12 additions & 10 deletions src/uct/ib/mlx5/rc/rc_mlx5_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -667,14 +667,17 @@ ucs_status_t uct_rc_mlx5_ep_get_address(uct_ep_h tl_ep, uct_ep_addr_t *addr)
uct_ib_pack_uint24(rc_addr->tm_qp_num, ep->tm_qp.qp_num);
}

ext_addr = ucs_derived_of(rc_addr, uct_rc_mlx5_ep_ext_address_t);
if (uct_rc_iface_flush_rkey_enabled(&iface->super)) {
ext_addr = ucs_derived_of(rc_addr,
uct_rc_mlx5_ep_ext_address_t);
ext_addr->flags = UCT_RC_MLX5_EP_ADDR_FLAG_FLUSH_RKEY;
ptr = ext_addr + 1;
*ucs_serialize_next(&ptr, uint16_t) = md->flush_rkey >> 16;
}

if (!md->config.enable_indirect_atomic) {
ext_addr->flags |= UCT_RC_MLX5_EP_ADDR_FLAG_NO_ATOMIC_OFFSET;
}

return UCS_OK;
}

Expand Down Expand Up @@ -810,14 +813,9 @@ uct_rc_mlx5_ep_connect_to_ep_v2(uct_ep_h tl_ep,
return status;
}

if (uct_ib_iface_md(&iface->super.super)->config.enable_indirect_atomic) {
ep->super.super.atomic_mr_offset = uct_ib_md_atomic_offset(
rc_addr->atomic_mr_id);
} else {
ep->super.super.atomic_mr_offset = 0;
}

ep->super.super.flags |= UCT_RC_EP_FLAG_CONNECTED;
ep->super.super.atomic_mr_offset = uct_ib_md_atomic_offset(
rc_addr->atomic_mr_id);
ep->super.super.flags |= UCT_RC_EP_FLAG_CONNECTED;

addr_length = UCS_PARAM_VALUE(UCT_EP_CONNECT_TO_EP_PARAM_FIELD, params,
ep_addr_length, EP_ADDR_LENGTH,
Expand All @@ -837,6 +835,10 @@ uct_rc_mlx5_ep_connect_to_ep_v2(uct_ep_h tl_ep,
ep->super.super.flush_rkey = UCT_IB_MD_INVALID_FLUSH_RKEY;
}

if (ext_addr->flags & UCT_RC_MLX5_EP_ADDR_FLAG_NO_ATOMIC_OFFSET) {
ep->super.super.atomic_mr_offset = 0;
}

return UCS_OK;
}

Expand Down
2 changes: 2 additions & 0 deletions src/uct/ib/mlx5/rc/rc_mlx5_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@ static ucs_status_t uct_rc_mlx5_iface_query(uct_iface_h tl_iface, uct_iface_attr

if (uct_rc_iface_flush_rkey_enabled(&iface->super)) {
ep_addr_len = sizeof(uct_rc_mlx5_ep_ext_address_t) + sizeof(uint16_t);
} else if (!uct_ib_iface_md(&rc_iface->super)->config.enable_indirect_atomic) {
ep_addr_len = sizeof(uct_rc_mlx5_ep_ext_address_t);
} else {
ep_addr_len = sizeof(uct_rc_mlx5_ep_address_t);
}
Expand Down
3 changes: 2 additions & 1 deletion src/uct/ib/rc/verbs/rc_verbs.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@


enum {
UCT_RC_VERBS_ADDR_HAS_ATOMIC_MR = UCS_BIT(0)
UCT_RC_VERBS_ADDR_HAS_ATOMIC_MR = UCS_BIT(0),
UCT_RC_VERBS_ADDR_NO_ATOMIC_OFFSET = UCS_BIT(1)
};


Expand Down
14 changes: 11 additions & 3 deletions src/uct/ib/rc/verbs/rc_verbs_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,9 @@ ucs_status_t uct_rc_verbs_ep_get_address(uct_ep_h tl_ep, uct_ep_addr_t *addr)
rc_addr->super.flags |= UCT_RC_VERBS_ADDR_HAS_ATOMIC_MR;
rc_addr->atomic_mr_id = uct_ib_md_get_atomic_mr_id(md);
rc_addr->flush_rkey_hi = md->flush_rkey >> 16;
if (!md->config.enable_indirect_atomic) {
rc_addr->super.flags |= UCT_RC_VERBS_ADDR_NO_ATOMIC_OFFSET;
}
}
return UCS_OK;
}
Expand Down Expand Up @@ -652,9 +655,14 @@ uct_rc_verbs_ep_connect_to_ep_v2(uct_ep_h tl_ep,
}

if (rc_addr->super.flags & UCT_RC_VERBS_ADDR_HAS_ATOMIC_MR) {
ep->super.atomic_mr_offset = uct_ib_md_atomic_offset(rc_addr->atomic_mr_id);
ep->super.flush_rkey = ((uint32_t)rc_addr->flush_rkey_hi << 16) +
((uint32_t)rc_addr->atomic_mr_id << 8);
if (rc_addr->super.flags & UCT_RC_VERBS_ADDR_NO_ATOMIC_OFFSET) {
ep->super.atomic_mr_offset = 0;
} else {
ep->super.atomic_mr_offset = uct_ib_md_atomic_offset(
rc_addr->atomic_mr_id);
}
ep->super.flush_rkey = ((uint32_t)rc_addr->flush_rkey_hi << 16) +
((uint32_t)rc_addr->atomic_mr_id << 8);
} else {
ep->super.atomic_mr_offset = 0;
ep->super.flush_rkey = UCT_IB_MD_INVALID_FLUSH_RKEY;
Expand Down
19 changes: 13 additions & 6 deletions test/gtest/uct/ib/test_ib_md.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ class test_ib_md : public test_md
#ifdef HAVE_MLX5_DV
uint32_t m_mlx5_flags = 0;
#endif
void check_mlx5_mr(uct_ib_mem_t *ib_memh, bool is_expected);
void check_mlx5_mr(uct_ib_mem_t *ib_memh, bool is_expected_to_have_atomic,
bool is_expected_to_have_auxiliary_key);
};

void test_ib_md::init() {
Expand All @@ -53,16 +54,21 @@ const uct_ib_md_t &test_ib_md::ib_md() const {
return *ucs_derived_of(md(), uct_ib_md_t);
}

void test_ib_md::check_mlx5_mr(uct_ib_mem_t *ib_memh, bool is_expected)
void test_ib_md::check_mlx5_mr(uct_ib_mem_t *ib_memh,
bool is_expected_to_have_atomic,
bool is_expected_to_have_auxiliary_key)
{
#if HAVE_DEVX
uct_ib_mlx5_devx_mem_t *memh = ucs_derived_of(ib_memh,
uct_ib_mlx5_devx_mem_t);
if (is_expected) {
if (is_expected_to_have_atomic) {
EXPECT_NE(nullptr, memh->atomic_dvmr);
EXPECT_NE(UCT_IB_INVALID_MKEY, memh->atomic_rkey);
} else {
EXPECT_EQ(nullptr, memh->atomic_dvmr);
}
if (is_expected_to_have_auxiliary_key) {
EXPECT_NE(UCT_IB_INVALID_MKEY, memh->atomic_rkey);
} else {
EXPECT_EQ(UCT_IB_INVALID_MKEY, memh->atomic_rkey);
}

Expand Down Expand Up @@ -119,14 +125,15 @@ void test_ib_md::ib_md_umr_check(void *rkey_buffer, bool amo_access,
EXPECT_FALSE(ib_memh->flags & UCT_IB_MEM_ACCESS_REMOTE_ATOMIC);
}

check_mlx5_mr(ib_memh, false);
check_mlx5_mr(ib_memh, false, ib_md().relaxed_order);

status = uct_md_mkey_pack(md(), memh, rkey_buffer);
EXPECT_UCS_OK(status);

status = uct_md_mkey_pack(md(), memh, rkey_buffer);
EXPECT_UCS_OK(status);
check_mlx5_mr(ib_memh, (amo_access && has_ksm()) || ib_md().relaxed_order);
check_mlx5_mr(ib_memh, (amo_access && has_ksm()) || ib_md().relaxed_order,
ib_md().relaxed_order);

status = uct_md_mem_dereg(md(), memh);
EXPECT_UCS_OK(status);
Expand Down

0 comments on commit dda4a34

Please sign in to comment.