Skip to content

Commit b1f4dad

Browse files
author
Minh Quan Ho
committed
OPAL: enable async progress thread based on OPAL_ASYNC_PROGRESS
- The SW-based async progress thread has been planned long time ago in 683efcb, but has never been enabled/implemented since. - This commit enables the spawn of an async progress thread to execute _opal_progress() routine when OPAL_ENABLE_PROGRESS_THREADS is set at both compile time and runtime (env OPAL_ASYNC_PROGRESS). - Fix minor typo in opal_progress.h doxygen comment Signed-off-by: Minh Quan Ho <[email protected]>
1 parent a477b22 commit b1f4dad

File tree

11 files changed

+173
-45
lines changed

11 files changed

+173
-45
lines changed

Diff for: config/opal_configure_options.m4

+17-5
Original file line numberDiff line numberDiff line change
@@ -544,9 +544,21 @@ fi
544544
AC_DEFINE_UNQUOTED([OPAL_ENABLE_GETPWUID], [$opal_want_getpwuid],
545545
[Disable getpwuid support (default: enabled)])
546546

547-
dnl We no longer support the old OPAL_ENABLE_PROGRESS_THREADS. At
548-
dnl some point, this should die.
549-
AC_DEFINE([OPAL_ENABLE_PROGRESS_THREADS],
550-
[0],
551-
[Whether we want BTL progress threads enabled])
547+
#
548+
# Disable progress threads
549+
#
550+
AC_MSG_CHECKING([if want asynchronous progress threads])
551+
AC_ARG_ENABLE([progress_threads],
552+
[AS_HELP_STRING([--disable-progress-threads],
553+
[Disable asynchronous progress threads (default: enabled)])])
554+
if test "$enable_progress_threads" = "no"; then
555+
AC_MSG_RESULT([no])
556+
opal_want_progress_threads=0
557+
else
558+
AC_MSG_RESULT([yes])
559+
opal_want_progress_threads=1
560+
fi
561+
AC_DEFINE_UNQUOTED([OPAL_ENABLE_PROGRESS_THREADS], [$opal_want_progress_threads],
562+
[Disable BTL asynchronous progress threads (default: enabled)])
563+
552564
])dnl

Diff for: ompi/instance/instance.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
512512
ddt_init, but before mca_coll_base_open, since some collective
513513
modules (e.g., the hierarchical coll component) may need ops in
514514
their query function. */
515-
if (OMPI_SUCCESS != (ret = ompi_op_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) {
515+
if (OMPI_SUCCESS != (ret = ompi_op_base_find_available (opal_async_progress_thread_spawned, ompi_mpi_thread_multiple))) {
516516
return ompi_instance_print_error ("ompi_op_base_find_available() failed", ret);
517517
}
518518

@@ -532,7 +532,7 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
532532
return ompi_instance_print_error ("mca_smsc_base_select() failed", ret);
533533
}
534534

535-
if (OMPI_SUCCESS != (ret = mca_pml_base_select (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) {
535+
if (OMPI_SUCCESS != (ret = mca_pml_base_select (opal_async_progress_thread_spawned, ompi_mpi_thread_multiple))) {
536536
return ompi_instance_print_error ("mca_pml_base_select() failed", ret);
537537
}
538538

@@ -613,11 +613,11 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
613613
return ompi_instance_print_error ("mca_pml_base_bsend_init() failed", ret);
614614
}
615615

616-
if (OMPI_SUCCESS != (ret = mca_coll_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) {
616+
if (OMPI_SUCCESS != (ret = mca_coll_base_find_available (opal_async_progress_thread_spawned, ompi_mpi_thread_multiple))) {
617617
return ompi_instance_print_error ("mca_coll_base_find_available() failed", ret);
618618
}
619619

620-
if (OMPI_SUCCESS != (ret = ompi_osc_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) {
620+
if (OMPI_SUCCESS != (ret = ompi_osc_base_find_available (opal_async_progress_thread_spawned, ompi_mpi_thread_multiple))) {
621621
return ompi_instance_print_error ("ompi_osc_base_find_available() failed", ret);
622622
}
623623

Diff for: ompi/mpi/c/request_get_status.c

+3-7
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,7 @@ static const char FUNC_NAME[] = "MPI_Request_get_status";
4545
int MPI_Request_get_status(MPI_Request request, int *flag,
4646
MPI_Status *status)
4747
{
48-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
4948
int do_it_once = 0;
50-
#endif
5149

5250
MEMCHECKER(
5351
memchecker_request(&request);
@@ -63,9 +61,7 @@ int MPI_Request_get_status(MPI_Request request, int *flag,
6361
}
6462
}
6563

66-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
6764
recheck_request_status:
68-
#endif
6965
opal_atomic_mb();
7066
if( (request == MPI_REQUEST_NULL) || (request->req_state == OMPI_REQUEST_INACTIVE) ) {
7167
*flag = true;
@@ -87,16 +83,16 @@ int MPI_Request_get_status(MPI_Request request, int *flag,
8783
}
8884
return MPI_SUCCESS;
8985
}
90-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
91-
if( 0 == do_it_once ) {
86+
87+
if( 0 == do_it_once && !opal_async_progress_thread_spawned ) {
9288
/* If we run the opal_progress then check the status of the
9389
request before leaving. We will call the opal_progress only
9490
once per call. */
9591
opal_progress();
9692
do_it_once++;
9793
goto recheck_request_status;
9894
}
99-
#endif
95+
10096
*flag = false;
10197
return MPI_SUCCESS;
10298
}

Diff for: ompi/request/req_test.c

+12-14
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,9 @@ int ompi_request_default_test(ompi_request_t ** rptr,
3232
{
3333
ompi_request_t *request = *rptr;
3434

35-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
3635
int do_it_once = 0;
3736

3837
recheck_request_status:
39-
#endif
4038
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
4139
*completed = true;
4240
if (MPI_STATUS_IGNORE != status) {
@@ -81,8 +79,8 @@ int ompi_request_default_test(ompi_request_t ** rptr,
8179
return MPI_ERR_PROC_FAILED_PENDING;
8280
}
8381
#endif
84-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
85-
if( 0 == do_it_once ) {
82+
83+
if( 0 == do_it_once && !opal_async_progress_thread_spawned ) {
8684
/**
8785
* If we run the opal_progress then check the status of the request before
8886
* leaving. We will call the opal_progress only once per call.
@@ -92,7 +90,7 @@ int ompi_request_default_test(ompi_request_t ** rptr,
9290
goto recheck_request_status;
9391
}
9492
}
95-
#endif
93+
9694
*completed = false;
9795
return OMPI_SUCCESS;
9896
}
@@ -163,9 +161,9 @@ int ompi_request_default_test_any(
163161
*index = MPI_UNDEFINED;
164162
if(num_requests_null_inactive != count) {
165163
*completed = false;
166-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
167-
opal_progress();
168-
#endif
164+
if (!opal_async_progress_thread_spawned) {
165+
opal_progress();
166+
}
169167
} else {
170168
*completed = true;
171169
if (MPI_STATUS_IGNORE != status) {
@@ -208,16 +206,16 @@ int ompi_request_default_test_all(
208206
return MPI_ERR_PROC_FAILED_PENDING;
209207
}
210208
#endif /* OPAL_ENABLE_FT_MPI */
211-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
212-
if (0 == do_it_once) {
209+
210+
if (0 == do_it_once && !opal_async_progress_thread_spawned) {
213211
++do_it_once;
214212
if (0 != opal_progress()) {
215213
/* continue walking the list, retest the current request */
216214
--i;
217215
continue;
218216
}
219217
}
220-
#endif /* OPAL_ENABLE_PROGRESS_THREADS */
218+
221219
/* short-circuit */
222220
break;
223221
}
@@ -353,9 +351,9 @@ int ompi_request_default_test_some(
353351
*outcount = num_requests_done;
354352

355353
if (num_requests_done == 0) {
356-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
357-
opal_progress();
358-
#endif
354+
if (!opal_async_progress_thread_spawned) {
355+
opal_progress();
356+
}
359357
return OMPI_SUCCESS;
360358
}
361359

Diff for: ompi/runtime/ompi_mpi_finalize.c

+4-3
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,10 @@ int ompi_mpi_finalize(void)
193193
opal_atomic_swap_32(&ompi_mpi_state,
194194
OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT);
195195

196-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
197-
opal_progress_set_event_flag(OPAL_EVLOOP_ONCE | OPAL_EVLOOP_NONBLOCK);
198-
#endif
196+
/* shutdown async progress thread before tearing down further services */
197+
if (opal_async_progress_thread_spawned) {
198+
opal_progress_shutdown_async_progress_thread();
199+
}
199200

200201
/* NOTE: MPI-2.1 requires that MPI_FINALIZE is "collective" across
201202
*all* connected processes. This only means that all processes

Diff for: ompi/runtime/ompi_mpi_init.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -522,16 +522,16 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
522522
time if so, then start the clock again */
523523
OMPI_TIMING_NEXT("barrier");
524524

525-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
526525
/* Start setting up the event engine for MPI operations. Don't
527526
block in the event library, so that communications don't take
528527
forever between procs in the dynamic code. This will increase
529528
CPU utilization for the remainder of MPI_INIT when we are
530529
blocking on RTE-level events, but may greatly reduce non-TCP
531530
latency. */
532-
int old_event_flags = opal_progress_set_event_flag(0);
533-
opal_progress_set_event_flag(old_event_flags | OPAL_EVLOOP_NONBLOCK);
534-
#endif
531+
if (!opal_async_progress_thread_spawned) {
532+
int old_event_flags = opal_progress_set_event_flag(0);
533+
opal_progress_set_event_flag(old_event_flags | OPAL_EVLOOP_NONBLOCK);
534+
}
535535

536536
/* wire up the mpi interface, if requested. Do this after the
537537
non-block switch for non-TCP performance. Do before the

Diff for: opal/mca/btl/smcuda/btl_smcuda.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,7 @@ static struct mca_btl_base_endpoint_t *create_sm_endpoint(int local_proc, struct
487487
OBJ_CONSTRUCT(&ep->endpoint_lock, opal_mutex_t);
488488
#if OPAL_ENABLE_PROGRESS_THREADS == 1
489489
sprintf(path, "%s" OPAL_PATH_SEP "sm_fifo.%lu", opal_process_info.job_session_dir,
490-
(unsigned long) proc->proc_name);
490+
(unsigned long) proc->proc_name.vpid);
491491
ep->fifo_fd = open(path, O_WRONLY);
492492
if (ep->fifo_fd < 0) {
493493
opal_output(0, "mca_btl_smcuda_add_procs: open(%s) failed with errno=%d\n", path, errno);

Diff for: opal/mca/btl/smcuda/btl_smcuda_component.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,7 @@ mca_btl_smcuda_component_init(int *num_btls, bool enable_progress_threads, bool
859859
#if OPAL_ENABLE_PROGRESS_THREADS == 1
860860
/* create a named pipe to receive events */
861861
sprintf(mca_btl_smcuda_component.sm_fifo_path, "%s" OPAL_PATH_SEP "sm_fifo.%lu",
862-
opal_process_info.job_session_dir, (unsigned long) OPAL_PROC_MY_NAME->vpid);
862+
opal_process_info.job_session_dir, (unsigned long) OPAL_PROC_MY_NAME.vpid);
863863
if (mkfifo(mca_btl_smcuda_component.sm_fifo_path, 0660) < 0) {
864864
opal_output(0, "mca_btl_smcuda_component_init: mkfifo failed with errno=%d\n", errno);
865865
return NULL;

Diff for: opal/runtime/opal_params_core.h

+1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ OPAL_DECLSPEC extern int opal_initialized;
5959
OPAL_DECLSPEC extern bool opal_built_with_cuda_support;
6060
OPAL_DECLSPEC extern bool opal_built_with_rocm_support;
6161
OPAL_DECLSPEC extern bool opal_built_with_ze_support;
62+
OPAL_DECLSPEC extern bool opal_async_progress_thread_spawned;
6263

6364
/**
6465
* * Whether we want to enable CUDA GPU buffer send and receive support.

0 commit comments

Comments
 (0)