Skip to content

Commit 25f8800

Browse files
committed
UCP/WIREUP: Calculate score when EP is promoted
1 parent 5e8c701 commit 25f8800

File tree

6 files changed

+58
-28
lines changed

6 files changed

+58
-28
lines changed

src/ucp/core/ucp_ep.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,9 @@ enum {
166166
UCP_EP_INIT_CREATE_AM_LANE_ONLY = UCS_BIT(8), /**< Endpoint requires an AM lane only */
167167
UCP_EP_INIT_KA_FROM_EXIST_LANES = UCS_BIT(9), /**< Use only existing lanes to create
168168
keepalive lane */
169-
UCP_EP_INIT_ALLOW_AM_AUX_TL = UCS_BIT(10) /**< Endpoint allows selecting of auxiliary
169+
UCP_EP_INIT_ALLOW_AM_AUX_TL = UCS_BIT(10), /**< Endpoint allows selecting of auxiliary
170170
transports for AM lane */
171+
UCP_EP_INIT_FLAG_PROMOTED = UCS_BIT(11) /**< Endpoint is promoted by usage_tracker */
171172
};
172173

173174

src/ucp/core/ucp_worker.c

+3-2
Original file line numberDiff line numberDiff line change
@@ -1699,10 +1699,11 @@ static void ucp_worker_init_device_atomics(ucp_worker_h worker)
16991699

17001700
UCS_STATIC_BITMAP_SET(&supp_tls, rsc_index);
17011701
priority = iface_attr->priority;
1702-
dummy_ae.iface_attr.lat_ovh = ucp_wireup_iface_lat_distance_v2(wiface);
1702+
dummy_ae.iface_attr.lat_ovh = ucp_wireup_iface_lat_distance_v2(wiface,
1703+
0);
17031704

17041705
score = ucp_wireup_amo_score_func(wiface, md_attr, &dummy_addr,
1705-
&dummy_ae, NULL);
1706+
&dummy_ae, 0, NULL);
17061707

17071708
ucs_trace(UCT_TL_RESOURCE_DESC_FMT " atomic score %.2f priority %d",
17081709
UCT_TL_RESOURCE_DESC_ARG(&rsc->tl_rsc), score, priority);

src/ucp/wireup/address.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -794,7 +794,7 @@ ucp_address_pack_iface_attr_v2(const ucp_worker_iface_t *wiface, void *ptr,
794794
double latency_nsec, overhead_nsec, latency, bandwidth;
795795
size_t seg_size;
796796

797-
latency = ucp_wireup_iface_lat_distance_v2(wiface);
797+
latency = ucp_wireup_iface_lat_distance_v2(wiface, 0);
798798
bandwidth = ucp_wireup_iface_bw_distance(wiface);
799799

800800
latency_nsec = latency * UCS_NSEC_PER_SEC;

src/ucp/wireup/select.c

+35-21
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,7 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_select_transport(
607607
}
608608

609609
score = criteria->calc_score(wiface, md_attr, address, ae,
610+
select_params->ep_init_flags,
610611
criteria->arg);
611612
priority = iface_attr->priority + ae->iface_attr.priority;
612613
is_reachable = 1;
@@ -664,7 +665,8 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_select_transport(
664665
static inline double
665666
ucp_wireup_tl_iface_latency(const ucp_worker_iface_t *wiface,
666667
const ucp_unpacked_address_t *unpacked_addr,
667-
const ucp_address_iface_attr_t *remote_iface_attr)
668+
const ucp_address_iface_attr_t *remote_iface_attr,
669+
unsigned ep_init_flags)
668670
{
669671
ucp_context_h context = wiface->worker->context;
670672
double local_lat, lat_lossy;
@@ -673,9 +675,11 @@ ucp_wireup_tl_iface_latency(const ucp_worker_iface_t *wiface,
673675
local_lat = ucp_wireup_iface_lat_distance_v1(wiface);
674676
/* Address v1 contains just latency overhead */
675677
return ((local_lat + remote_iface_attr->lat_ovh) / 2) +
676-
(wiface->attr.latency.m * context->config.est_num_eps);
678+
(ucp_wireup_adjusted_lat_multiplier(&wiface->attr.latency,
679+
ep_init_flags) *
680+
context->config.est_num_eps);
677681
} else {
678-
local_lat = ucp_wireup_iface_lat_distance_v2(wiface);
682+
local_lat = ucp_wireup_iface_lat_distance_v2(wiface, ep_init_flags);
679683
/* FP8 is a lossy compression method, so in order to create a symmetric
680684
* calculation we pack/unpack the local latency as well */
681685
lat_lossy = ucp_wireup_fp8_pack_unpack_latency(local_lat);
@@ -988,7 +992,7 @@ static double ucp_wireup_rma_score_func(const ucp_worker_iface_t *wiface,
988992
const uct_md_attr_v2_t *md_attr,
989993
const ucp_unpacked_address_t *unpacked_addr,
990994
const ucp_address_entry_t *remote_addr,
991-
void *arg)
995+
unsigned ep_init_flags, void *arg)
992996
{
993997
/* best for 4k messages */
994998
double local_bw;
@@ -1001,8 +1005,9 @@ static double ucp_wireup_rma_score_func(const ucp_worker_iface_t *wiface,
10011005
}
10021006

10031007
return 1e-3 /
1004-
(ucp_wireup_tl_iface_latency(
1005-
wiface, unpacked_addr, &remote_addr->iface_attr) +
1008+
(ucp_wireup_tl_iface_latency(wiface, unpacked_addr,
1009+
&remote_addr->iface_attr,
1010+
ep_init_flags) +
10061011
wiface->attr.overhead +
10071012
(4096.0 / ucs_min(local_bw, remote_addr->iface_attr.bandwidth)));
10081013
}
@@ -1022,12 +1027,13 @@ static double ucp_wireup_aux_score_func(const ucp_worker_iface_t *wiface,
10221027
const uct_md_attr_v2_t *md_attr,
10231028
const ucp_unpacked_address_t *unpacked_addr,
10241029
const ucp_address_entry_t *remote_addr,
1025-
void *arg)
1030+
unsigned ep_init_flags, void *arg)
10261031
{
10271032
/* best end-to-end latency and larger bcopy size */
10281033
return (1e-3 /
1029-
(ucp_wireup_tl_iface_latency(
1030-
wiface, unpacked_addr, &remote_addr->iface_attr) +
1034+
(ucp_wireup_tl_iface_latency(wiface, unpacked_addr,
1035+
&remote_addr->iface_attr,
1036+
ep_init_flags) +
10311037
wiface->attr.overhead + remote_addr->iface_attr.overhead));
10321038
}
10331039

@@ -1170,12 +1176,13 @@ double ucp_wireup_amo_score_func(const ucp_worker_iface_t *wiface,
11701176
const uct_md_attr_v2_t *md_attr,
11711177
const ucp_unpacked_address_t *unpacked_addr,
11721178
const ucp_address_entry_t *remote_addr,
1173-
void *arg)
1179+
unsigned ep_init_flags, void *arg)
11741180
{
11751181
/* best one-sided latency */
11761182
return 1e-3 /
1177-
(ucp_wireup_tl_iface_latency(
1178-
wiface, unpacked_addr, &remote_addr->iface_attr) +
1183+
(ucp_wireup_tl_iface_latency(wiface, unpacked_addr,
1184+
&remote_addr->iface_attr,
1185+
ep_init_flags) +
11791186
wiface->attr.overhead);
11801187
}
11811188

@@ -1228,12 +1235,14 @@ static double
12281235
ucp_wireup_am_score_func(const ucp_worker_iface_t *wiface,
12291236
const uct_md_attr_v2_t *md_attr,
12301237
const ucp_unpacked_address_t *unpacked_addr,
1231-
const ucp_address_entry_t *remote_addr, void *arg)
1238+
const ucp_address_entry_t *remote_addr,
1239+
unsigned ep_init_flags, void *arg)
12321240
{
12331241
/* best end-to-end latency */
12341242
return 1e-3 /
1235-
(ucp_wireup_tl_iface_latency(
1236-
wiface, unpacked_addr, &remote_addr->iface_attr) +
1243+
(ucp_wireup_tl_iface_latency(wiface, unpacked_addr,
1244+
&remote_addr->iface_attr,
1245+
ep_init_flags) +
12371246
wiface->attr.overhead + remote_addr->iface_attr.overhead);
12381247
}
12391248

@@ -1327,7 +1336,8 @@ static double
13271336
ucp_wireup_rma_bw_score_func(const ucp_worker_iface_t *wiface,
13281337
const uct_md_attr_v2_t *md_attr,
13291338
const ucp_unpacked_address_t *unpacked_addr,
1330-
const ucp_address_entry_t *remote_addr, void *arg)
1339+
const ucp_address_entry_t *remote_addr,
1340+
unsigned ep_init_flags, void *arg)
13311341
{
13321342
ucp_wireup_dev_usage_count *dev_count = arg;
13331343
ucp_context_t *context = wiface->worker->context;
@@ -1342,7 +1352,8 @@ ucp_wireup_rma_bw_score_func(const ucp_worker_iface_t *wiface,
13421352
ucp_wireup_iface_avail_bandwidth(wiface, unpacked_addr,
13431353
remote_addr, dev_count)) +
13441354
ucp_wireup_tl_iface_latency(wiface, unpacked_addr,
1345-
&remote_addr->iface_attr) +
1355+
&remote_addr->iface_attr,
1356+
ep_init_flags) +
13461357
wiface->attr.overhead +
13471358
ucs_linear_func_apply(mem_reg_cost,
13481359
UCP_WIREUP_RMA_BW_TEST_MSG_SIZE));
@@ -1467,7 +1478,8 @@ static double
14671478
ucp_wireup_am_bw_score_func(const ucp_worker_iface_t *wiface,
14681479
const uct_md_attr_v2_t *md_attr,
14691480
const ucp_unpacked_address_t *unpacked_addr,
1470-
const ucp_address_entry_t *remote_addr, void *arg)
1481+
const ucp_address_entry_t *remote_addr,
1482+
unsigned ep_init_flags, void *arg)
14711483
{
14721484
ucp_wireup_dev_usage_count *dev_count = arg;
14731485

@@ -1480,7 +1492,8 @@ ucp_wireup_am_bw_score_func(const ucp_worker_iface_t *wiface,
14801492
wiface, unpacked_addr, remote_addr, dev_count)) +
14811493
wiface->attr.overhead + remote_addr->iface_attr.overhead +
14821494
ucp_wireup_tl_iface_latency(wiface, unpacked_addr,
1483-
&remote_addr->iface_attr);
1495+
&remote_addr->iface_attr,
1496+
ep_init_flags);
14841497

14851498
return size / t * 1e-5;
14861499
}
@@ -2106,7 +2119,7 @@ ucp_wireup_keepalive_score_func(const ucp_worker_iface_t *wiface,
21062119
const uct_md_attr_v2_t *md_attr,
21072120
const ucp_unpacked_address_t *unpacked_addr,
21082121
const ucp_address_entry_t *remote_addr,
2109-
void *arg)
2122+
unsigned ep_init_flags, void *arg)
21102123
{
21112124
uct_perf_attr_t perf_attr;
21122125
ucs_status_t status;
@@ -2122,7 +2135,8 @@ ucp_wireup_keepalive_score_func(const ucp_worker_iface_t *wiface,
21222135
return 0;
21232136
}
21242137

2125-
return ucp_wireup_am_score_func(wiface, md_attr, unpacked_addr, remote_addr, arg) *
2138+
return ucp_wireup_am_score_func(wiface, md_attr, unpacked_addr, remote_addr,
2139+
ep_init_flags, arg) *
21262140
((double)perf_attr.max_inflight_eps / (double)SIZE_MAX);
21272141
}
21282142

src/ucp/wireup/wireup.c

+10-1
Original file line numberDiff line numberDiff line change
@@ -2068,21 +2068,30 @@ unsigned ucp_ep_init_flags(const ucp_worker_h worker,
20682068
return flags;
20692069
}
20702070

2071+
double ucp_wireup_adjusted_lat_multiplier(const ucs_linear_func_t *latency,
2072+
unsigned ep_init_flags)
2073+
{
2074+
return (ep_init_flags & UCP_EP_INIT_FLAG_PROMOTED) ? 0 : latency->m;
2075+
}
2076+
20712077
double ucp_wireup_iface_lat_distance_v1(const ucp_worker_iface_t *wiface)
20722078
{
20732079
return wiface->worker->context->config.ext.proto_enable ?
20742080
(wiface->attr.latency.c + wiface->distance.latency) :
20752081
wiface->attr.latency.c;
20762082
}
20772083

2078-
double ucp_wireup_iface_lat_distance_v2(const ucp_worker_iface_t *wiface)
2084+
double ucp_wireup_iface_lat_distance_v2(const ucp_worker_iface_t *wiface,
2085+
unsigned ep_init_flags)
20792086
{
20802087
ucp_context_h context = wiface->worker->context;
20812088
ucs_linear_func_t lat = wiface->attr.latency;
20822089

20832090
if (context->config.ext.proto_enable) {
20842091
lat.c += wiface->distance.latency;
20852092
}
2093+
2094+
lat.m = ucp_wireup_adjusted_lat_multiplier(&lat, ep_init_flags);
20862095
return ucp_tl_iface_latency(context, &lat);
20872096
}
20882097

src/ucp/wireup/wireup.h

+7-2
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ typedef struct {
100100
const uct_md_attr_v2_t *md_attr,
101101
const ucp_unpacked_address_t *unpacked_addr,
102102
const ucp_address_entry_t *remote_addr,
103+
unsigned ep_init_flags,
103104
void *arg);
104105

105106
/* Custom argument of @a calc_score function */
@@ -155,7 +156,7 @@ double ucp_wireup_amo_score_func(const ucp_worker_iface_t *wiface,
155156
const uct_md_attr_v2_t *md_attr,
156157
const ucp_unpacked_address_t *unpacked_address,
157158
const ucp_address_entry_t *remote_addr,
158-
void *arg);
159+
unsigned ep_init_flags, void *arg);
159160

160161
size_t ucp_wireup_msg_pack(void *dest, void *arg);
161162

@@ -212,9 +213,13 @@ uct_ep_h ucp_wireup_extract_lane(ucp_ep_h ep, ucp_lane_index_t lane);
212213

213214
unsigned ucp_wireup_eps_progress(void *arg);
214215

216+
double ucp_wireup_adjusted_lat_multiplier(const ucs_linear_func_t *latency,
217+
unsigned ep_init_flags);
218+
215219
double ucp_wireup_iface_lat_distance_v1(const ucp_worker_iface_t *wiface);
216220

217-
double ucp_wireup_iface_lat_distance_v2(const ucp_worker_iface_t *wiface);
221+
double ucp_wireup_iface_lat_distance_v2(const ucp_worker_iface_t *wiface,
222+
unsigned ep_init_flags);
218223

219224
double ucp_wireup_iface_bw_distance(const ucp_worker_iface_t *wiface);
220225

0 commit comments

Comments
 (0)