@@ -787,52 +787,6 @@ ucs_status_t uct_base_ep_am_short_iov(uct_ep_h ep, uint8_t id, const uct_iov_t *
787787 return status ;
788788}
789789
790- int uct_ep_get_process_proc_dir (char * buffer , size_t max_len , pid_t pid )
791- {
792- ucs_assert ((buffer != NULL ) || (max_len == 0 ));
793- /* cppcheck-suppress nullPointer */
794- /* cppcheck-suppress ctunullpointer */
795- return snprintf (buffer , max_len , "/proc/%d" , (int )pid );
796- }
797-
798- ucs_status_t uct_ep_keepalive_create (pid_t pid , uct_keepalive_info_t * * ka_p )
799- {
800- uct_keepalive_info_t * ka ;
801- ucs_status_t status ;
802- int proc_len ;
803-
804- proc_len = uct_ep_get_process_proc_dir (NULL , 0 , pid );
805- if (proc_len <= 0 ) {
806- ucs_error ("failed to get length to hold path to a process directory" );
807- status = UCS_ERR_NO_MEMORY ;
808- goto err ;
809- }
810-
811- ka = ucs_malloc (sizeof (* ka ) + proc_len + 1 , "keepalive" );
812- if (ka == NULL ) {
813- ucs_error ("failed to allocate keepalive info" );
814- status = UCS_ERR_NO_MEMORY ;
815- goto err ;
816- }
817-
818- uct_ep_get_process_proc_dir (ka -> proc , proc_len + 1 , pid );
819-
820- status = ucs_sys_get_file_time (ka -> proc , UCS_SYS_FILE_TIME_CTIME ,
821- & ka -> start_time );
822- if (status != UCS_OK ) {
823- ucs_error ("failed to get process start time" );
824- goto err_free_ka ;
825- }
826-
827- * ka_p = ka ;
828- return UCS_OK ;
829-
830- err_free_ka :
831- ucs_free (ka );
832- err :
833- return status ;
834- }
835-
836790static ucs_status_t uct_iface_schedule_ep_err (uct_ep_h ep , ucs_status_t status )
837791{
838792 uct_base_iface_t * iface = ucs_derived_of (ep -> iface , uct_base_iface_t );
@@ -856,31 +810,32 @@ static ucs_status_t uct_iface_schedule_ep_err(uct_ep_h ep, ucs_status_t status)
856810 return UCS_OK ;
857811}
858812
859- ucs_status_t uct_ep_keepalive_check (uct_ep_h ep , uct_keepalive_info_t * * ka_p ,
813+ ucs_status_t uct_ep_keepalive_init (uct_keepalive_info_t * ka , pid_t pid )
814+ {
815+ ka -> start_time = ucs_sys_get_proc_create_time (pid );
816+ if (ka -> start_time == 0 ) {
817+ ucs_diag ("failed to get start time for pid %d" , pid );
818+ return UCS_ERR_ENDPOINT_TIMEOUT ;
819+ }
820+
821+ return UCS_OK ;
822+ }
823+
824+ ucs_status_t uct_ep_keepalive_check (uct_ep_h ep , uct_keepalive_info_t * ka ,
860825 pid_t pid , unsigned flags ,
861826 uct_completion_t * comp )
862827{
863- struct timespec create_time ;
864- uct_keepalive_info_t * ka ;
865- ucs_status_t status ;
828+ unsigned long start_time ;
866829
867830 UCT_EP_KEEPALIVE_CHECK_PARAM (flags , comp );
868831
869- if (* ka_p == NULL ) {
870- status = uct_ep_keepalive_create (pid , ka_p );
871- } else {
872- ka = * ka_p ;
873- status = ucs_sys_get_file_time (ka -> proc , UCS_SYS_FILE_TIME_CTIME ,
874- & create_time );
875- if ((status != UCS_OK ) ||
876- (ka -> start_time .tv_sec != create_time .tv_sec ) ||
877- (ka -> start_time .tv_nsec != create_time .tv_nsec )) {
878- status = UCS_ERR_ENDPOINT_TIMEOUT ;
879- }
880- }
832+ ucs_assert (ka -> start_time != 0 );
881833
882- if (status != UCS_OK ) {
883- return uct_iface_schedule_ep_err (ep , status );
834+ start_time = ucs_sys_get_proc_create_time (pid );
835+ if (ka -> start_time != start_time ) {
836+ ucs_diag ("ka failed for pid %d start time %lu != %lu" , pid ,
837+ ka -> start_time , start_time );
838+ return uct_iface_schedule_ep_err (ep , UCS_ERR_ENDPOINT_TIMEOUT );
884839 }
885840
886841 return UCS_OK ;
0 commit comments