Skip to content

Commit aef7d6e

Browse files
committed
UCP/WIREUP: Calculate score when EP is promoted
1 parent 5e8c701 commit aef7d6e

File tree

6 files changed

+57
-28
lines changed

6 files changed

+57
-28
lines changed

src/ucp/core/ucp_ep.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,9 @@ enum {
166166
UCP_EP_INIT_CREATE_AM_LANE_ONLY = UCS_BIT(8), /**< Endpoint requires an AM lane only */
167167
UCP_EP_INIT_KA_FROM_EXIST_LANES = UCS_BIT(9), /**< Use only existing lanes to create
168168
keepalive lane */
169-
UCP_EP_INIT_ALLOW_AM_AUX_TL = UCS_BIT(10) /**< Endpoint allows selecting of auxiliary
169+
UCP_EP_INIT_ALLOW_AM_AUX_TL = UCS_BIT(10), /**< Endpoint allows selecting of auxiliary
170170
transports for AM lane */
171+
UCP_EP_INIT_FLAG_PROMOTED = UCS_BIT(11) /**< Endpoint is promoted by usage_tracker */
171172
};
172173

173174

src/ucp/core/ucp_worker.c

+3-2
Original file line numberDiff line numberDiff line change
@@ -1699,10 +1699,11 @@ static void ucp_worker_init_device_atomics(ucp_worker_h worker)
16991699

17001700
UCS_STATIC_BITMAP_SET(&supp_tls, rsc_index);
17011701
priority = iface_attr->priority;
1702-
dummy_ae.iface_attr.lat_ovh = ucp_wireup_iface_lat_distance_v2(wiface);
1702+
dummy_ae.iface_attr.lat_ovh = ucp_wireup_iface_lat_distance_v2(wiface,
1703+
0);
17031704

17041705
score = ucp_wireup_amo_score_func(wiface, md_attr, &dummy_addr,
1705-
&dummy_ae, NULL);
1706+
&dummy_ae, 0, NULL);
17061707

17071708
ucs_trace(UCT_TL_RESOURCE_DESC_FMT " atomic score %.2f priority %d",
17081709
UCT_TL_RESOURCE_DESC_ARG(&rsc->tl_rsc), score, priority);

src/ucp/wireup/address.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -794,7 +794,7 @@ ucp_address_pack_iface_attr_v2(const ucp_worker_iface_t *wiface, void *ptr,
794794
double latency_nsec, overhead_nsec, latency, bandwidth;
795795
size_t seg_size;
796796

797-
latency = ucp_wireup_iface_lat_distance_v2(wiface);
797+
latency = ucp_wireup_iface_lat_distance_v2(wiface, 0);
798798
bandwidth = ucp_wireup_iface_bw_distance(wiface);
799799

800800
latency_nsec = latency * UCS_NSEC_PER_SEC;

src/ucp/wireup/select.c

+34-21
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,7 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_select_transport(
607607
}
608608

609609
score = criteria->calc_score(wiface, md_attr, address, ae,
610+
select_params->ep_init_flags,
610611
criteria->arg);
611612
priority = iface_attr->priority + ae->iface_attr.priority;
612613
is_reachable = 1;
@@ -664,7 +665,8 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_select_transport(
664665
static inline double
665666
ucp_wireup_tl_iface_latency(const ucp_worker_iface_t *wiface,
666667
const ucp_unpacked_address_t *unpacked_addr,
667-
const ucp_address_iface_attr_t *remote_iface_attr)
668+
const ucp_address_iface_attr_t *remote_iface_attr,
669+
unsigned flags)
668670
{
669671
ucp_context_h context = wiface->worker->context;
670672
double local_lat, lat_lossy;
@@ -673,9 +675,10 @@ ucp_wireup_tl_iface_latency(const ucp_worker_iface_t *wiface,
673675
local_lat = ucp_wireup_iface_lat_distance_v1(wiface);
674676
/* Address v1 contains just latency overhead */
675677
return ((local_lat + remote_iface_attr->lat_ovh) / 2) +
676-
(wiface->attr.latency.m * context->config.est_num_eps);
678+
(ucp_wireup_adjusted_lat_multiplier(wiface, flags) *
679+
context->config.est_num_eps);
677680
} else {
678-
local_lat = ucp_wireup_iface_lat_distance_v2(wiface);
681+
local_lat = ucp_wireup_iface_lat_distance_v2(wiface, flags);
679682
/* FP8 is a lossy compression method, so in order to create a symmetric
680683
* calculation we pack/unpack the local latency as well */
681684
lat_lossy = ucp_wireup_fp8_pack_unpack_latency(local_lat);
@@ -988,7 +991,7 @@ static double ucp_wireup_rma_score_func(const ucp_worker_iface_t *wiface,
988991
const uct_md_attr_v2_t *md_attr,
989992
const ucp_unpacked_address_t *unpacked_addr,
990993
const ucp_address_entry_t *remote_addr,
991-
void *arg)
994+
unsigned ep_init_flags, void *arg)
992995
{
993996
/* best for 4k messages */
994997
double local_bw;
@@ -1001,8 +1004,9 @@ static double ucp_wireup_rma_score_func(const ucp_worker_iface_t *wiface,
10011004
}
10021005

10031006
return 1e-3 /
1004-
(ucp_wireup_tl_iface_latency(
1005-
wiface, unpacked_addr, &remote_addr->iface_attr) +
1007+
(ucp_wireup_tl_iface_latency(wiface, unpacked_addr,
1008+
&remote_addr->iface_attr,
1009+
ep_init_flags) +
10061010
wiface->attr.overhead +
10071011
(4096.0 / ucs_min(local_bw, remote_addr->iface_attr.bandwidth)));
10081012
}
@@ -1022,12 +1026,13 @@ static double ucp_wireup_aux_score_func(const ucp_worker_iface_t *wiface,
10221026
const uct_md_attr_v2_t *md_attr,
10231027
const ucp_unpacked_address_t *unpacked_addr,
10241028
const ucp_address_entry_t *remote_addr,
1025-
void *arg)
1029+
unsigned ep_init_flags, void *arg)
10261030
{
10271031
/* best end-to-end latency and larger bcopy size */
10281032
return (1e-3 /
1029-
(ucp_wireup_tl_iface_latency(
1030-
wiface, unpacked_addr, &remote_addr->iface_attr) +
1033+
(ucp_wireup_tl_iface_latency(wiface, unpacked_addr,
1034+
&remote_addr->iface_attr,
1035+
ep_init_flags) +
10311036
wiface->attr.overhead + remote_addr->iface_attr.overhead));
10321037
}
10331038

@@ -1170,12 +1175,13 @@ double ucp_wireup_amo_score_func(const ucp_worker_iface_t *wiface,
11701175
const uct_md_attr_v2_t *md_attr,
11711176
const ucp_unpacked_address_t *unpacked_addr,
11721177
const ucp_address_entry_t *remote_addr,
1173-
void *arg)
1178+
unsigned ep_init_flags, void *arg)
11741179
{
11751180
/* best one-sided latency */
11761181
return 1e-3 /
1177-
(ucp_wireup_tl_iface_latency(
1178-
wiface, unpacked_addr, &remote_addr->iface_attr) +
1182+
(ucp_wireup_tl_iface_latency(wiface, unpacked_addr,
1183+
&remote_addr->iface_attr,
1184+
ep_init_flags) +
11791185
wiface->attr.overhead);
11801186
}
11811187

@@ -1228,12 +1234,14 @@ static double
12281234
ucp_wireup_am_score_func(const ucp_worker_iface_t *wiface,
12291235
const uct_md_attr_v2_t *md_attr,
12301236
const ucp_unpacked_address_t *unpacked_addr,
1231-
const ucp_address_entry_t *remote_addr, void *arg)
1237+
const ucp_address_entry_t *remote_addr,
1238+
unsigned ep_init_flags, void *arg)
12321239
{
12331240
/* best end-to-end latency */
12341241
return 1e-3 /
1235-
(ucp_wireup_tl_iface_latency(
1236-
wiface, unpacked_addr, &remote_addr->iface_attr) +
1242+
(ucp_wireup_tl_iface_latency(wiface, unpacked_addr,
1243+
&remote_addr->iface_attr,
1244+
ep_init_flags) +
12371245
wiface->attr.overhead + remote_addr->iface_attr.overhead);
12381246
}
12391247

@@ -1327,7 +1335,8 @@ static double
13271335
ucp_wireup_rma_bw_score_func(const ucp_worker_iface_t *wiface,
13281336
const uct_md_attr_v2_t *md_attr,
13291337
const ucp_unpacked_address_t *unpacked_addr,
1330-
const ucp_address_entry_t *remote_addr, void *arg)
1338+
const ucp_address_entry_t *remote_addr,
1339+
unsigned ep_init_flags, void *arg)
13311340
{
13321341
ucp_wireup_dev_usage_count *dev_count = arg;
13331342
ucp_context_t *context = wiface->worker->context;
@@ -1342,7 +1351,8 @@ ucp_wireup_rma_bw_score_func(const ucp_worker_iface_t *wiface,
13421351
ucp_wireup_iface_avail_bandwidth(wiface, unpacked_addr,
13431352
remote_addr, dev_count)) +
13441353
ucp_wireup_tl_iface_latency(wiface, unpacked_addr,
1345-
&remote_addr->iface_attr) +
1354+
&remote_addr->iface_attr,
1355+
ep_init_flags) +
13461356
wiface->attr.overhead +
13471357
ucs_linear_func_apply(mem_reg_cost,
13481358
UCP_WIREUP_RMA_BW_TEST_MSG_SIZE));
@@ -1467,7 +1477,8 @@ static double
14671477
ucp_wireup_am_bw_score_func(const ucp_worker_iface_t *wiface,
14681478
const uct_md_attr_v2_t *md_attr,
14691479
const ucp_unpacked_address_t *unpacked_addr,
1470-
const ucp_address_entry_t *remote_addr, void *arg)
1480+
const ucp_address_entry_t *remote_addr,
1481+
unsigned ep_init_flags, void *arg)
14711482
{
14721483
ucp_wireup_dev_usage_count *dev_count = arg;
14731484

@@ -1480,7 +1491,8 @@ ucp_wireup_am_bw_score_func(const ucp_worker_iface_t *wiface,
14801491
wiface, unpacked_addr, remote_addr, dev_count)) +
14811492
wiface->attr.overhead + remote_addr->iface_attr.overhead +
14821493
ucp_wireup_tl_iface_latency(wiface, unpacked_addr,
1483-
&remote_addr->iface_attr);
1494+
&remote_addr->iface_attr,
1495+
ep_init_flags);
14841496

14851497
return size / t * 1e-5;
14861498
}
@@ -2106,7 +2118,7 @@ ucp_wireup_keepalive_score_func(const ucp_worker_iface_t *wiface,
21062118
const uct_md_attr_v2_t *md_attr,
21072119
const ucp_unpacked_address_t *unpacked_addr,
21082120
const ucp_address_entry_t *remote_addr,
2109-
void *arg)
2121+
unsigned ep_init_flags, void *arg)
21102122
{
21112123
uct_perf_attr_t perf_attr;
21122124
ucs_status_t status;
@@ -2122,7 +2134,8 @@ ucp_wireup_keepalive_score_func(const ucp_worker_iface_t *wiface,
21222134
return 0;
21232135
}
21242136

2125-
return ucp_wireup_am_score_func(wiface, md_attr, unpacked_addr, remote_addr, arg) *
2137+
return ucp_wireup_am_score_func(wiface, md_attr, unpacked_addr, remote_addr,
2138+
ep_init_flags, arg) *
21262139
((double)perf_attr.max_inflight_eps / (double)SIZE_MAX);
21272140
}
21282141

src/ucp/wireup/wireup.c

+10-1
Original file line numberDiff line numberDiff line change
@@ -2068,21 +2068,30 @@ unsigned ucp_ep_init_flags(const ucp_worker_h worker,
20682068
return flags;
20692069
}
20702070

2071+
double ucp_wireup_adjusted_lat_multiplier(const ucp_worker_iface_t *wiface,
2072+
unsigned flags)
2073+
{
2074+
return (flags & UCP_EP_INIT_FLAG_PROMOTED) ? 0 : wiface->attr.latency.m;
2075+
}
2076+
20712077
double ucp_wireup_iface_lat_distance_v1(const ucp_worker_iface_t *wiface)
20722078
{
20732079
return wiface->worker->context->config.ext.proto_enable ?
20742080
(wiface->attr.latency.c + wiface->distance.latency) :
20752081
wiface->attr.latency.c;
20762082
}
20772083

2078-
double ucp_wireup_iface_lat_distance_v2(const ucp_worker_iface_t *wiface)
2084+
double ucp_wireup_iface_lat_distance_v2(const ucp_worker_iface_t *wiface,
2085+
unsigned flags)
20792086
{
20802087
ucp_context_h context = wiface->worker->context;
20812088
ucs_linear_func_t lat = wiface->attr.latency;
20822089

20832090
if (context->config.ext.proto_enable) {
20842091
lat.c += wiface->distance.latency;
20852092
}
2093+
2094+
lat.m = ucp_wireup_adjusted_lat_multiplier(wiface, flags);
20862095
return ucp_tl_iface_latency(context, &lat);
20872096
}
20882097

src/ucp/wireup/wireup.h

+7-2
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ typedef struct {
100100
const uct_md_attr_v2_t *md_attr,
101101
const ucp_unpacked_address_t *unpacked_addr,
102102
const ucp_address_entry_t *remote_addr,
103+
unsigned ep_init_flags,
103104
void *arg);
104105

105106
/* Custom argument of @a calc_score function */
@@ -155,7 +156,7 @@ double ucp_wireup_amo_score_func(const ucp_worker_iface_t *wiface,
155156
const uct_md_attr_v2_t *md_attr,
156157
const ucp_unpacked_address_t *unpacked_address,
157158
const ucp_address_entry_t *remote_addr,
158-
void *arg);
159+
unsigned ep_init_flags, void *arg);
159160

160161
size_t ucp_wireup_msg_pack(void *dest, void *arg);
161162

@@ -212,9 +213,13 @@ uct_ep_h ucp_wireup_extract_lane(ucp_ep_h ep, ucp_lane_index_t lane);
212213

213214
unsigned ucp_wireup_eps_progress(void *arg);
214215

216+
double ucp_wireup_adjusted_lat_multiplier(const ucp_worker_iface_t *wiface,
217+
unsigned flags);
218+
215219
double ucp_wireup_iface_lat_distance_v1(const ucp_worker_iface_t *wiface);
216220

217-
double ucp_wireup_iface_lat_distance_v2(const ucp_worker_iface_t *wiface);
221+
double ucp_wireup_iface_lat_distance_v2(const ucp_worker_iface_t *wiface,
222+
unsigned flags);
218223

219224
double ucp_wireup_iface_bw_distance(const ucp_worker_iface_t *wiface);
220225

0 commit comments

Comments
 (0)