Skip to content

Commit 869bf03

Browse files
committed
UCX/RNDV/CUDA: RNDV protocol improvements for CUDA
1 parent cac4887 commit 869bf03

File tree

6 files changed

+194
-106
lines changed

6 files changed

+194
-106
lines changed

src/ucp/core/ucp_context.c

+4
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,10 @@ static ucs_config_field_t ucp_config_table[] = {
256256
"RNDV fragment size \n",
257257
ucs_offsetof(ucp_config_t, ctx.rndv_frag_size), UCS_CONFIG_TYPE_MEMUNITS},
258258

259+
{"RNDV_PIPELINE_SEND_THRESH", "inf",
260+
"RNDV size threshold to enable sender side pipeline for mem type\n",
261+
ucs_offsetof(ucp_config_t, ctx.rndv_pipeline_send_thresh), UCS_CONFIG_TYPE_MEMUNITS},
262+
259263
{"MEMTYPE_CACHE", "y",
260264
"Enable memory type (cuda/rocm) cache \n",
261265
ucs_offsetof(ucp_config_t, ctx.enable_memtype_cache), UCS_CONFIG_TYPE_BOOL},

src/ucp/core/ucp_context.h

+2
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ typedef struct ucp_context_config {
6161
size_t seg_size;
6262
/** RNDV pipeline fragment size */
6363
size_t rndv_frag_size;
64+
/** RNDV pipline send threshold */
65+
size_t rndv_pipeline_send_thresh;
6466
/** Threshold for using tag matching offload capabilities. Smaller buffers
6567
* will not be posted to the transport. */
6668
size_t tm_thresh;

src/ucp/core/ucp_request.h

+9-6
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,15 @@ struct ucp_request {
154154
} proxy;
155155

156156
struct {
157-
uint64_t remote_address; /* address of the sender's data buffer */
158-
uintptr_t remote_request; /* pointer to the sender's request */
159-
ucp_request_t *rreq; /* receive request on the recv side */
160-
ucp_rkey_h rkey; /* key for remote send buffer */
161-
ucp_lane_map_t lanes_map; /* used lanes map */
162-
ucp_lane_index_t lane_count; /* number of lanes used in transaction */
157+
uint64_t remote_address; /* address of the sender's data buffer */
158+
uintptr_t remote_request; /* pointer to the sender's request */
159+
ucp_request_t *rreq; /* receive request on the recv side */
160+
ucp_rkey_h rkey; /* key for remote send buffer */
161+
ucp_lane_map_t lanes_map_avail; /* used lanes map */
162+
ucp_lane_map_t lanes_map_all; /* actual lanes map */
163+
uint8_t lanes_count; /* actual lanes count */
164+
uint8_t rkey_index[UCP_MAX_LANES];
165+
163166
} rndv_get;
164167

165168
struct {

src/ucp/tag/offload.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -561,8 +561,7 @@ ucs_status_t ucp_tag_offload_sw_rndv(uct_pending_req_t *self)
561561
rndv_rts_hdr = ucs_alloca(rndv_hdr_len);
562562
packed_len = ucp_tag_rndv_rts_pack(rndv_rts_hdr, req);
563563
ucs_assert((rndv_rts_hdr->address != 0) || !UCP_DT_IS_CONTIG(req->send.datatype) ||
564-
!ucp_rndv_is_get_zcopy(req->send.mem_type,
565-
ep->worker->context->config.ext.rndv_mode));
564+
!ucp_rndv_is_get_zcopy(req, ep->worker->context));
566565
return uct_ep_tag_rndv_request(ep->uct_eps[req->send.lane],
567566
req->send.msg_proto.tag.tag,
568567
rndv_rts_hdr, packed_len, 0);

0 commit comments

Comments
 (0)