Skip to content

Commit b681b04

Browse files
committed
UCP/WIREUP: Support EP reconfiguration for non wired-up scenarios
1 parent c8ef020 commit b681b04

File tree

4 files changed

+176
-82
lines changed

4 files changed

+176
-82
lines changed

src/ucp/core/ucp_ep.c

+6-7
Original file line numberDiff line numberDiff line change
@@ -1893,10 +1893,9 @@ int ucp_ep_config_lane_is_peer_match(const ucp_ep_config_key_t *key1,
18931893
config_lane2->dst_md_index);
18941894
}
18951895

1896-
static ucp_lane_index_t
1897-
ucp_ep_config_find_match_lane(const ucp_ep_config_key_t *key1,
1898-
ucp_lane_index_t lane1,
1899-
const ucp_ep_config_key_t *key2)
1896+
ucp_lane_index_t ucp_ep_config_find_match_lane(const ucp_ep_config_key_t *key1,
1897+
ucp_lane_index_t lane1,
1898+
const ucp_ep_config_key_t *key2)
19001899
{
19011900
ucp_lane_index_t lane_idx;
19021901

@@ -1970,9 +1969,9 @@ void ucp_ep_config_lanes_intersect(const ucp_ep_config_key_t *key1,
19701969
}
19711970
}
19721971

1973-
static int ucp_ep_config_lane_is_equal(const ucp_ep_config_key_t *key1,
1974-
const ucp_ep_config_key_t *key2,
1975-
ucp_lane_index_t lane)
1972+
int ucp_ep_config_lane_is_equal(const ucp_ep_config_key_t *key1,
1973+
const ucp_ep_config_key_t *key2,
1974+
ucp_lane_index_t lane)
19761975
{
19771976
const ucp_ep_config_key_lane_t *config_lane1 = &key1->lanes[lane];
19781977
const ucp_ep_config_key_lane_t *config_lane2 = &key2->lanes[lane];

src/ucp/core/ucp_ep.h

+8
Original file line numberDiff line numberDiff line change
@@ -749,6 +749,10 @@ int ucp_ep_config_lane_is_peer_match(const ucp_ep_config_key_t *key1,
749749
const ucp_ep_config_key_t *key2,
750750
ucp_lane_index_t lane2);
751751

752+
ucp_lane_index_t ucp_ep_config_find_match_lane(const ucp_ep_config_key_t *key1,
753+
ucp_lane_index_t lane1,
754+
const ucp_ep_config_key_t *key2);
755+
752756
void ucp_ep_config_lanes_intersect(const ucp_ep_config_key_t *key1,
753757
const ucp_ep_config_key_t *key2,
754758
const ucp_ep_h ep,
@@ -762,6 +766,10 @@ ucp_ep_find_non_reused_lane(ucp_ep_h ep, const ucp_ep_config_key_t *key,
762766

763767
ucp_lane_index_t ucp_ep_find_wireup_ep_lane(ucp_ep_h ep);
764768

769+
int ucp_ep_config_lane_is_equal(const ucp_ep_config_key_t *key1,
770+
const ucp_ep_config_key_t *key2,
771+
ucp_lane_index_t lane);
772+
765773
int ucp_ep_config_is_equal(const ucp_ep_config_key_t *key1,
766774
const ucp_ep_config_key_t *key2);
767775

src/ucp/wireup/wireup.c

+88-22
Original file line numberDiff line numberDiff line change
@@ -1353,28 +1353,69 @@ static void ucp_wireup_discard_uct_eps(ucp_ep_h ep, uct_ep_h *uct_eps,
13531353
}
13541354
}
13551355

1356+
static int
1357+
ucp_wireup_is_am_lane_replaced(ucp_ep_h ep,
1358+
const ucp_lane_index_t *reuse_lane_map)
1359+
{
1360+
return !ucp_ep_has_cm_lane(ep) && (ep->am_lane != UCP_NULL_LANE) &&
1361+
(reuse_lane_map[ep->am_lane] == UCP_NULL_LANE);
1362+
}
1363+
13561364
static int
13571365
ucp_wireup_check_is_reconfigurable(ucp_ep_h ep,
13581366
const ucp_ep_config_key_t *new_key,
13591367
const ucp_unpacked_address_t *remote_address,
13601368
const unsigned *addr_indices)
13611369
{
1362-
ucp_lane_index_t lane;
1370+
ucp_lane_index_t lane, wireup_lane, reuse_lane_map[UCP_MAX_LANES];
1371+
const ucp_ep_config_key_t *old_key;
13631372

13641373
if ((ep->cfg_index == UCP_WORKER_CFG_INDEX_NULL) ||
13651374
ucp_ep_has_cm_lane(ep)) {
13661375
return 1;
13671376
}
13681377

1369-
/* TODO: Support reconfiguration when lanes are created without a wireup_ep
1370-
* wrapper */
1371-
for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) {
1372-
if (!ucp_wireup_ep_test(ucp_ep_get_lane(ep, lane))) {
1378+
old_key = &ucp_ep_config(ep)->key;
1379+
1380+
/* TODO: 1) Support lanes which are connected to the same remote MD, but
1381+
* different remote sys_dev (eg. TCP). */
1382+
for (lane = 0; lane < old_key->num_lanes; ++lane) {
1383+
if ((ucp_ep_config_find_match_lane(old_key, lane, new_key) !=
1384+
UCP_NULL_LANE) &&
1385+
!ucp_ep_config_lane_is_equal(old_key, new_key, lane)) {
13731386
return 0;
13741387
}
13751388
}
13761389

1377-
return 1;
1390+
ucp_ep_config_lanes_intersect(old_key, new_key, ep, remote_address,
1391+
addr_indices, reuse_lane_map);
1392+
wireup_lane = ucp_wireup_get_msg_lane(ep, UCP_WIREUP_MSG_REQUEST);
1393+
1394+
/* TODO: 2) Support reconfiguration for separated wireup and AM lanes
1395+
* during wireup process (request sent). */
1396+
return !(ep->flags & UCP_EP_FLAG_CONNECT_REQ_QUEUED) ||
1397+
(ep->am_lane == wireup_lane) ||
1398+
!ucp_wireup_is_am_lane_replaced(ep, reuse_lane_map);
1399+
}
1400+
1401+
static int ucp_wireup_should_reconfigure(ucp_ep_h ep,
1402+
const ucp_lane_index_t *reuse_lane_map,
1403+
ucp_lane_index_t num_lanes)
1404+
{
1405+
ucp_lane_index_t lane;
1406+
1407+
if (ucp_ep_has_cm_lane(ep)) {
1408+
return 1;
1409+
}
1410+
1411+
/* Check whether all lanes are reused */
1412+
for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) {
1413+
if (reuse_lane_map[lane] == UCP_NULL_LANE) {
1414+
return 1;
1415+
}
1416+
}
1417+
1418+
return ucp_ep_num_lanes(ep) != num_lanes;
13781419
}
13791420

13801421
static ucs_status_t
@@ -1384,15 +1425,16 @@ ucp_wireup_replace_wireup_msg_lane(ucp_ep_h ep, ucp_ep_config_key_t *key,
13841425
{
13851426
uct_ep_h uct_ep = NULL;
13861427
ucp_lane_index_t old_lane, new_wireup_lane;
1387-
ucp_wireup_ep_t *old_wireup_ep, *new_wireup_ep;
1428+
ucp_wireup_ep_t *new_wireup_ep;
1429+
uct_ep_h old_wireup_ep, msg_ep;
13881430
ucp_rsc_index_t aux_rsc_index;
1389-
int is_p2p;
1431+
int is_p2p, is_wireup_ep, is_next;
13901432
ucs_status_t status;
13911433

13921434
/* Get old wireup lane */
13931435
old_lane = ucp_wireup_get_msg_lane(ep, UCP_WIREUP_MSG_REQUEST);
1394-
old_wireup_ep = ucp_wireup_ep(ucp_ep_get_lane(ep, old_lane));
1395-
ucs_assert_always(old_wireup_ep != NULL);
1436+
old_wireup_ep = ucp_ep_get_lane(ep, old_lane);
1437+
is_wireup_ep = ucp_wireup_ep_test(old_wireup_ep);
13961438

13971439
/* Select CM/non-reused lane as new wireup lane */
13981440
new_wireup_lane = ucp_ep_find_non_reused_lane(ep, key, reuse_lane_map);
@@ -1417,32 +1459,31 @@ ucp_wireup_replace_wireup_msg_lane(ucp_ep_h ep, ucp_ep_config_key_t *key,
14171459
}
14181460

14191461
ucs_assert(new_wireup_ep != NULL);
1462+
is_next = ucp_wireup_ep_is_next_ep_active(ucp_wireup_ep(old_wireup_ep));
14201463

14211464
/* Get correct aux_rsc_index either from next_ep or aux_ep */
1422-
aux_rsc_index = ucp_wireup_ep_is_next_ep_active(old_wireup_ep) ?
1465+
aux_rsc_index = (!is_wireup_ep || is_next) ?
14231466
ucp_ep_get_rsc_index(ep, old_lane) :
1424-
ucp_wireup_ep_get_aux_rsc_index(
1425-
&old_wireup_ep->super.super);
1467+
ucp_wireup_ep_get_aux_rsc_index(old_wireup_ep);
14261468

14271469
ucs_assert(aux_rsc_index != UCP_NULL_RESOURCE);
14281470
is_p2p = ucp_ep_config_connect_p2p(ep->worker, &ucp_ep_config(ep)->key,
14291471
aux_rsc_index);
14301472

14311473
/* Move aux EP to new wireup lane */
1432-
ucp_wireup_ep_set_aux(new_wireup_ep,
1433-
ucp_wireup_ep_extract_msg_ep(old_wireup_ep),
1474+
msg_ep = ucp_wireup_ep_extract_msg_ep(ucp_wireup_ep(old_wireup_ep));
1475+
ucp_wireup_ep_set_aux(new_wireup_ep, is_wireup_ep ? msg_ep : old_wireup_ep,
14341476
aux_rsc_index, is_p2p);
14351477

1436-
/* Remove old wireup_ep as it's not needed anymore.
1437-
* NOTICE: Next two lines are intentionally not merged with the lane
1438-
* removal loop in ucp_wireup_check_config_intersect, because of future
1439-
* support for non-wireup EPs reconfiguration (which will modify this
1440-
* code). */
1441-
uct_ep_destroy(&old_wireup_ep->super.super);
1442-
ucp_ep_set_lane(ep, old_lane, NULL);
1478+
if (is_wireup_ep) {
1479+
/* Remove old wireup_ep as it's not needed anymore. */
1480+
uct_ep_destroy(old_wireup_ep);
1481+
}
14431482

1483+
ucp_ep_set_lane(ep, old_lane, NULL);
14441484
new_uct_eps[new_wireup_lane] = &new_wireup_ep->super.super;
14451485
key->wireup_msg_lane = new_wireup_lane;
1486+
14461487
return UCS_OK;
14471488
}
14481489

@@ -1479,6 +1520,11 @@ ucp_wireup_check_config_intersect(ucp_ep_h ep, ucp_ep_config_key_t *new_key,
14791520
ucp_ep_config_lanes_intersect(old_key, new_key, ep, remote_address,
14801521
addr_indices, reuse_lane_map);
14811522

1523+
if (!ucp_wireup_should_reconfigure(ep, reuse_lane_map,
1524+
new_key->num_lanes)) {
1525+
return UCS_OK;
1526+
}
1527+
14821528
if (ucp_ep_has_cm_lane(ep)) {
14831529
/* CM lane has to be reused by the new EP configuration */
14841530
ucs_assert(reuse_lane_map[ucp_ep_get_cm_lane(ep)] != UCP_NULL_LANE);
@@ -1585,16 +1631,36 @@ ucp_wireup_gather_pending_reqs(ucp_ep_h ep,
15851631
ucs_queue_head_t *replay_pending_queue)
15861632
{
15871633
ucp_request_t *req;
1634+
ucp_lane_index_t lane;
15881635

1636+
ucs_queue_head_init(replay_pending_queue);
1637+
1638+
if (ep->cfg_index == UCP_WORKER_CFG_INDEX_NULL) {
1639+
return;
1640+
}
1641+
1642+
/* Handle wireup EPs */
15891643
ucp_wireup_eps_pending_extract(ep, replay_pending_queue);
15901644

1645+
/* rkey ptr requests */
15911646
ucs_queue_for_each(req, &ep->worker->rkey_ptr_reqs,
15921647
send.rndv.rkey_ptr.queue_elem) {
15931648
if (req->send.ep == ep) {
15941649
ucs_queue_push(replay_pending_queue,
15951650
(ucs_queue_elem_t*)&req->send.uct.priv);
15961651
}
15971652
}
1653+
1654+
/* Fully connected lanes */
1655+
for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) {
1656+
if (ucp_wireup_ep_test(ucp_ep_get_lane(ep, lane))) {
1657+
continue;
1658+
}
1659+
1660+
uct_ep_pending_purge(ucp_ep_get_lane(ep, lane),
1661+
ucp_request_purge_enqueue_cb,
1662+
replay_pending_queue);
1663+
}
15981664
}
15991665

16001666
ucs_status_t ucp_wireup_init_lanes(ucp_ep_h ep, unsigned ep_init_flags,

0 commit comments

Comments
 (0)