Skip to content

Commit d74fd54

Browse files
authored
Merge pull request #7841 from brminich/uct/intra_ka_fix_v1.12.x
UCT/SM/CUDA: Fix common intra-node keepalive protocol - v1.12.x
2 parents 2b8c4bd + b1ada98 commit d74fd54

12 files changed

Lines changed: 113 additions & 159 deletions

File tree

NEWS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@
122122
* Reduced default value of keep-alive interval to 20 seconds
123123
* Fixes in tag_send datatype processing
124124
#### UCT
125+
* Fixed keep-alive protocol for intra-node transports (sm, cuda)
125126
* Fixed deadlock in TCP
126127
* Suppressed EHOSTUNREACH error in TCP sockcm
127128
* Restricted connecting loop-back to other devices in TCP

src/ucs/sys/sys.c

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#define UCS_PROCESS_NS_DIR "/proc/self/ns"
4646
#define UCS_PROCESS_BOOTID_FILE "/proc/sys/kernel/random/boot_id"
4747
#define UCS_PROCESS_BOOTID_FMT "%x-%4hx-%4hx-%4hx-%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx"
48+
#define UCS_PROCCESS_STAT_FMT "/proc/%d/stat"
4849
#define UCS_PROCESS_NS_FIRST 0xF0000000U
4950
#define UCS_PROCESS_NS_NET_DFLT 0xF0000080U
5051

@@ -1462,32 +1463,6 @@ ucs_status_t ucs_sys_enum_threads(ucs_sys_enum_threads_cb_t cb, void *ctx)
14621463
return ucs_sys_readdir(task_dir, &ucs_sys_enum_threads_cb, &param);
14631464
}
14641465

1465-
ucs_status_t ucs_sys_get_file_time(const char *name, ucs_sys_file_time_t type,
1466-
struct timespec *ts)
1467-
{
1468-
struct stat stat_buf;
1469-
int res;
1470-
1471-
res = stat(name, &stat_buf);
1472-
if (res != 0) {
1473-
return UCS_ERR_IO_ERROR; /* failed to get file info */
1474-
}
1475-
1476-
switch (type) {
1477-
case UCS_SYS_FILE_TIME_CTIME:
1478-
*ts = stat_buf.st_ctim;
1479-
return UCS_OK;
1480-
case UCS_SYS_FILE_TIME_ATIME:
1481-
*ts = stat_buf.st_atim;
1482-
return UCS_OK;
1483-
case UCS_SYS_FILE_TIME_MTIME:
1484-
*ts = stat_buf.st_mtim;
1485-
return UCS_OK;
1486-
default:
1487-
return UCS_ERR_INVALID_PARAM;
1488-
}
1489-
}
1490-
14911466
ucs_status_t ucs_sys_check_fd_limit_per_process()
14921467
{
14931468
int fd;
@@ -1541,3 +1516,37 @@ long ucs_sys_get_num_cpus()
15411516

15421517
return num_cpus;
15431518
}
1519+
1520+
unsigned long ucs_sys_get_proc_create_time(pid_t pid)
1521+
{
1522+
char stat[1024];
1523+
char *start_str;
1524+
ssize_t size;
1525+
unsigned long stime;
1526+
int res;
1527+
1528+
size = ucs_read_file_str(stat, sizeof(stat), 1, UCS_PROCCESS_STAT_FMT, pid);
1529+
if (size < 0) {
1530+
goto err;
1531+
}
1532+
1533+
/* Start sscanf right after the executable name which may contain spaces or
1534+
* brackets itself */
1535+
start_str = strrchr(stat, ')');
1536+
if (start_str == NULL) {
1537+
goto scan_err;
1538+
}
1539+
1540+
res = sscanf(start_str, ") %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %*u"
1541+
"%*u %*d %*d %*d %*d %*d %*d %lu", &stime);
1542+
if (res == 1) {
1543+
return stime;
1544+
}
1545+
1546+
scan_err:
1547+
ucs_error("failed to scan "UCS_PROCCESS_STAT_FMT, pid);
1548+
err:
1549+
return 0ul;
1550+
}
1551+
1552+

src/ucs/sys/sys.h

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -91,14 +91,6 @@ typedef enum {
9191
} ucs_sys_vma_info_flags_t;
9292

9393

94-
/* file time information */
95-
typedef enum {
96-
UCS_SYS_FILE_TIME_CTIME, /**< create time */
97-
UCS_SYS_FILE_TIME_ATIME, /**< access time */
98-
UCS_SYS_FILE_TIME_MTIME /**< modification time */
99-
} ucs_sys_file_time_t;
100-
101-
10294
/* information about virtual memory area */
10395
typedef struct {
10496
unsigned long start;
@@ -596,19 +588,6 @@ ucs_status_t ucs_sys_readdir(const char *path, ucs_sys_readdir_cb_t cb, void *ct
596588
ucs_status_t ucs_sys_enum_threads(ucs_sys_enum_threads_cb_t cb, void *ctx);
597589

598590

599-
/**
600-
* Get file time
601-
*
602-
* @param [in] name File name
603-
* @param [in] type Type of file time information
604-
* @param [out] ts File time information
605-
*
606-
* @return UCS_OK if file is found and got information.
607-
*/
608-
ucs_status_t ucs_sys_get_file_time(const char *name, ucs_sys_file_time_t type,
609-
struct timespec *ts);
610-
611-
612591
/**
613592
* Check the per-process limit on the number of open file descriptors.
614593
*
@@ -639,6 +618,16 @@ ucs_status_t ucs_pthread_create(pthread_t *thread_id_p,
639618
*/
640619
long ucs_sys_get_num_cpus();
641620

621+
622+
/*
623+
* Get process creation time.
624+
*
625+
* @param [in] pid Process id to get start time.
626+
*
627+
* @return The time the process started after system boot or 0 in case of error.
628+
*/
629+
unsigned long ucs_sys_get_proc_create_time(pid_t pid);
630+
642631
END_C_DECLS
643632

644633
#endif

src/uct/base/uct_iface.c

Lines changed: 19 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -787,52 +787,6 @@ ucs_status_t uct_base_ep_am_short_iov(uct_ep_h ep, uint8_t id, const uct_iov_t *
787787
return status;
788788
}
789789

790-
int uct_ep_get_process_proc_dir(char *buffer, size_t max_len, pid_t pid)
791-
{
792-
ucs_assert((buffer != NULL) || (max_len == 0));
793-
/* cppcheck-suppress nullPointer */
794-
/* cppcheck-suppress ctunullpointer */
795-
return snprintf(buffer, max_len, "/proc/%d", (int)pid);
796-
}
797-
798-
ucs_status_t uct_ep_keepalive_create(pid_t pid, uct_keepalive_info_t **ka_p)
799-
{
800-
uct_keepalive_info_t *ka;
801-
ucs_status_t status;
802-
int proc_len;
803-
804-
proc_len = uct_ep_get_process_proc_dir(NULL, 0, pid);
805-
if (proc_len <= 0) {
806-
ucs_error("failed to get length to hold path to a process directory");
807-
status = UCS_ERR_NO_MEMORY;
808-
goto err;
809-
}
810-
811-
ka = ucs_malloc(sizeof(*ka) + proc_len + 1, "keepalive");
812-
if (ka == NULL) {
813-
ucs_error("failed to allocate keepalive info");
814-
status = UCS_ERR_NO_MEMORY;
815-
goto err;
816-
}
817-
818-
uct_ep_get_process_proc_dir(ka->proc, proc_len + 1, pid);
819-
820-
status = ucs_sys_get_file_time(ka->proc, UCS_SYS_FILE_TIME_CTIME,
821-
&ka->start_time);
822-
if (status != UCS_OK) {
823-
ucs_error("failed to get process start time");
824-
goto err_free_ka;
825-
}
826-
827-
*ka_p = ka;
828-
return UCS_OK;
829-
830-
err_free_ka:
831-
ucs_free(ka);
832-
err:
833-
return status;
834-
}
835-
836790
static ucs_status_t uct_iface_schedule_ep_err(uct_ep_h ep, ucs_status_t status)
837791
{
838792
uct_base_iface_t *iface = ucs_derived_of(ep->iface, uct_base_iface_t);
@@ -856,31 +810,32 @@ static ucs_status_t uct_iface_schedule_ep_err(uct_ep_h ep, ucs_status_t status)
856810
return UCS_OK;
857811
}
858812

859-
ucs_status_t uct_ep_keepalive_check(uct_ep_h ep, uct_keepalive_info_t **ka_p,
813+
ucs_status_t uct_ep_keepalive_init(uct_keepalive_info_t *ka, pid_t pid)
814+
{
815+
ka->start_time = ucs_sys_get_proc_create_time(pid);
816+
if (ka->start_time == 0) {
817+
ucs_diag("failed to get start time for pid %d", pid);
818+
return UCS_ERR_ENDPOINT_TIMEOUT;
819+
}
820+
821+
return UCS_OK;
822+
}
823+
824+
ucs_status_t uct_ep_keepalive_check(uct_ep_h ep, uct_keepalive_info_t *ka,
860825
pid_t pid, unsigned flags,
861826
uct_completion_t *comp)
862827
{
863-
struct timespec create_time;
864-
uct_keepalive_info_t *ka;
865-
ucs_status_t status;
828+
unsigned long start_time;
866829

867830
UCT_EP_KEEPALIVE_CHECK_PARAM(flags, comp);
868831

869-
if (*ka_p == NULL) {
870-
status = uct_ep_keepalive_create(pid, ka_p);
871-
} else {
872-
ka = *ka_p;
873-
status = ucs_sys_get_file_time(ka->proc, UCS_SYS_FILE_TIME_CTIME,
874-
&create_time);
875-
if ((status != UCS_OK) ||
876-
(ka->start_time.tv_sec != create_time.tv_sec) ||
877-
(ka->start_time.tv_nsec != create_time.tv_nsec)) {
878-
status = UCS_ERR_ENDPOINT_TIMEOUT;
879-
}
880-
}
832+
ucs_assert(ka->start_time != 0);
881833

882-
if (status != UCS_OK) {
883-
return uct_iface_schedule_ep_err(ep, status);
834+
start_time = ucs_sys_get_proc_create_time(pid);
835+
if (ka->start_time != start_time) {
836+
ucs_diag("ka failed for pid %d start time %lu != %lu", pid,
837+
ka->start_time, start_time);
838+
return uct_iface_schedule_ep_err(ep, UCS_ERR_ENDPOINT_TIMEOUT);
884839
}
885840

886841
return UCS_OK;

src/uct/base/uct_iface.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -295,8 +295,7 @@ typedef struct uct_failed_iface {
295295
* Keepalive info used by EP
296296
*/
297297
typedef struct uct_keepalive_info {
298-
struct timespec start_time; /* Process start time */
299-
char proc[]; /* Process owner proc dir */
298+
unsigned long start_time; /* Process start time */
300299
} uct_keepalive_info_t;
301300

302301

@@ -849,9 +848,9 @@ ucs_status_t uct_base_ep_am_short_iov(uct_ep_h ep, uint8_t id, const uct_iov_t *
849848

850849
int uct_ep_get_process_proc_dir(char *buffer, size_t max_len, pid_t pid);
851850

852-
ucs_status_t uct_ep_keepalive_create(pid_t pid, uct_keepalive_info_t **ka_p);
851+
ucs_status_t uct_ep_keepalive_init(uct_keepalive_info_t *ka, pid_t pid);
853852

854-
ucs_status_t uct_ep_keepalive_check(uct_ep_h ep, uct_keepalive_info_t **ka_p,
853+
ucs_status_t uct_ep_keepalive_check(uct_ep_h ep, uct_keepalive_info_t *ka,
855854
pid_t pid, unsigned flags,
856855
uct_completion_t *comp);
857856

src/uct/cuda/cuda_ipc/cuda_ipc_ep.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,12 @@ static UCS_CLASS_INIT_FUNC(uct_cuda_ipc_ep_t, const uct_ep_params_t *params)
3232
UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super);
3333

3434
self->remote_pid = *(const pid_t*)params->iface_addr;
35-
self->keepalive = NULL;
3635

37-
return UCS_OK;
36+
return uct_ep_keepalive_init(&self->keepalive, self->remote_pid);
3837
}
3938

4039
static UCS_CLASS_CLEANUP_FUNC(uct_cuda_ipc_ep_t)
4140
{
42-
ucs_free(self->keepalive);
4341
}
4442

4543
UCS_CLASS_DEFINE(uct_cuda_ipc_ep_t, uct_base_ep_t)

src/uct/cuda/cuda_ipc/cuda_ipc_ep.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
typedef struct uct_cuda_ipc_ep {
1515
uct_base_ep_t super;
1616
pid_t remote_pid;
17-
uct_keepalive_info_t *keepalive; /* keepalive metadata */
17+
uct_keepalive_info_t keepalive; /* keepalive metadata */
1818
} uct_cuda_ipc_ep_t;
1919

2020

src/uct/sm/mm/base/mm_ep.c

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,19 @@ static void uct_mm_ep_signal_remote(uct_mm_ep_t *ep)
124124
}
125125
}
126126

127+
void uct_mm_ep_cleanup_remote_segs(uct_mm_ep_t *ep)
128+
{
129+
uct_mm_iface_t *iface = ucs_derived_of(ep->super.super.iface,
130+
uct_mm_iface_t);
131+
uct_mm_remote_seg_t remote_seg;
132+
133+
kh_foreach_value(&ep->remote_segs, remote_seg, {
134+
uct_mm_iface_mapper_call(iface, mem_detach, &remote_seg);
135+
})
136+
137+
kh_destroy_inplace(uct_mm_remote_seg, &ep->remote_segs);
138+
}
139+
127140
static UCS_CLASS_INIT_FUNC(uct_mm_ep_t, const uct_ep_params_t *params)
128141
{
129142
uct_mm_iface_t *iface = ucs_derived_of(params->iface, uct_mm_iface_t);
@@ -163,14 +176,20 @@ static UCS_CLASS_INIT_FUNC(uct_mm_ep_t, const uct_ep_params_t *params)
163176
/* Initialize remote FIFO control structure */
164177
uct_mm_iface_set_fifo_ptrs(fifo_ptr, &self->fifo_ctl, &self->fifo_elems);
165178
self->cached_tail = self->fifo_ctl->tail;
166-
self->keepalive = NULL;
167179
ucs_arbiter_elem_init(&self->arb_elem);
168180

181+
status = uct_ep_keepalive_init(&self->keepalive, self->fifo_ctl->pid);
182+
if (status != UCS_OK) {
183+
goto err_free_segs;
184+
}
185+
169186
ucs_debug("created mm ep %p, connected to remote FIFO id 0x%"PRIx64,
170187
self, addr->fifo_seg_id);
171188

172189
return UCS_OK;
173190

191+
err_free_segs:
192+
uct_mm_ep_cleanup_remote_segs(self);
174193
err_free_md_addr:
175194
ucs_free(self->remote_iface_addr);
176195
err:
@@ -179,18 +198,9 @@ static UCS_CLASS_INIT_FUNC(uct_mm_ep_t, const uct_ep_params_t *params)
179198

180199
static UCS_CLASS_CLEANUP_FUNC(uct_mm_ep_t)
181200
{
182-
uct_mm_iface_t *iface = ucs_derived_of(self->super.super.iface, uct_mm_iface_t);
183-
uct_mm_remote_seg_t remote_seg;
184-
185-
ucs_free(self->keepalive);
186201
uct_mm_ep_pending_purge(&self->super.super, NULL, NULL);
187-
188-
kh_foreach_value(&self->remote_segs, remote_seg, {
189-
uct_mm_iface_mapper_call(iface, mem_detach, &remote_seg);
190-
})
191-
202+
uct_mm_ep_cleanup_remote_segs(self);
192203
ucs_free(self->remote_iface_addr);
193-
kh_destroy_inplace(uct_mm_remote_seg, &self->remote_segs);
194204
}
195205

196206
UCS_CLASS_DEFINE(uct_mm_ep_t, uct_base_ep_t)

src/uct/sm/mm/base/mm_ep.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ typedef struct uct_mm_ep {
4848
the interface as long as one of the endpoints is unable to send */
4949
ucs_arbiter_elem_t arb_elem;
5050

51-
uct_keepalive_info_t *keepalive; /* keepalive info */
51+
uct_keepalive_info_t keepalive; /* keepalive info */
5252
} uct_mm_ep_t;
5353

5454

src/uct/sm/scopy/cma/cma_ep.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,14 @@ static UCS_CLASS_INIT_FUNC(uct_cma_ep_t, const uct_ep_params_t *params)
4242
UCT_EP_PARAMS_CHECK_DEV_IFACE_ADDRS(params);
4343
UCS_CLASS_CALL_SUPER_INIT(uct_scopy_ep_t, params);
4444

45-
self->remote_pid = *(const pid_t*)params->iface_addr &
46-
~UCT_CMA_IFACE_ADDR_FLAG_PID_NS;
47-
self->keepalive = NULL;
45+
self->remote_pid = *(const pid_t*)params->iface_addr &
46+
~UCT_CMA_IFACE_ADDR_FLAG_PID_NS;
4847

49-
return UCS_OK;
48+
return uct_ep_keepalive_init(&self->keepalive, self->remote_pid);
5049
}
5150

5251
static UCS_CLASS_CLEANUP_FUNC(uct_cma_ep_t)
5352
{
54-
ucs_free(self->keepalive);
5553
}
5654

5755
UCS_CLASS_DEFINE(uct_cma_ep_t, uct_scopy_ep_t)

0 commit comments

Comments
 (0)