From 956f893dc5fba9ac6783a05100b76ebb39a519b3 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Fri, 17 Nov 2023 15:22:15 -0800 Subject: [PATCH 01/90] make hypre_MPI_Comm a struct --- src/IJ_mv/HYPRE_IJMatrix.c | 6 +- src/IJ_mv/HYPRE_IJVector.c | 5 +- src/IJ_mv/IJMatrix_parcsr.c | 5 +- src/IJ_mv/IJVector_parcsr.c | 3 +- src/distributed_ls/Euclid/Euclid_dh.c | 7 +- src/distributed_ls/Euclid/ExternalRows_dh.c | 36 +++--- src/distributed_ls/Euclid/Factor_dh.c | 25 ++-- src/distributed_ls/Euclid/Mat_dh.c | 27 ++-- src/distributed_ls/Euclid/SubdomainGraph_dh.c | 36 +++--- src/distributed_ls/Euclid/TimeLog_dh.c | 5 +- src/distributed_ls/Euclid/blas_dh.c | 6 +- src/distributed_ls/Euclid/mat_dh_private.c | 31 ++--- src/distributed_ls/ParaSails/ConjGrad.c | 3 +- src/distributed_ls/ParaSails/DiagScale.c | 12 +- src/distributed_ls/ParaSails/FGmres.c | 3 +- src/distributed_ls/ParaSails/LoadBal.c | 19 +-- src/distributed_ls/ParaSails/Matrix.c | 37 +++--- src/distributed_ls/ParaSails/ParaSails.c | 57 +++++---- .../HYPRE_DistributedMatrixPilutSolver.c | 5 +- src/distributed_ls/pilut/comm.c | 19 +-- src/distributed_ls/pilut/parilut.c | 16 +-- src/distributed_ls/pilut/serilut.c | 3 +- src/distributed_ls/pilut/trifactor.c | 24 ++-- src/parcsr_block_mv/par_csr_block_comm.c | 9 +- src/parcsr_block_mv/par_csr_block_interp.c | 15 ++- src/parcsr_block_mv/par_csr_block_matrix.c | 6 +- .../par_csr_block_rap_communication.c | 9 +- src/parcsr_ls/amg_hybrid.c | 3 +- src/parcsr_ls/ams.c | 10 +- src/parcsr_ls/gen_redcs_mat.c | 48 ++++---- src/parcsr_ls/par_2s_interp.c | 10 +- src/parcsr_ls/par_amg_setup.c | 5 +- src/parcsr_ls/par_amgdd_helpers.c | 40 +++--- src/parcsr_ls/par_amgdd_setup.c | 13 +- src/parcsr_ls/par_amgdd_solve.c | 5 +- src/parcsr_ls/par_cgc_coarsen.c | 15 ++- src/parcsr_ls/par_coarse_parms.c | 3 +- src/parcsr_ls/par_coarsen.c | 6 +- src/parcsr_ls/par_cr.c | 11 +- src/parcsr_ls/par_gauss_elim.c | 9 +- src/parcsr_ls/par_gsmg.c | 3 +- src/parcsr_ls/par_ilu.c | 6 +- src/parcsr_ls/par_ilu_setup.c | 66 +++++----- src/parcsr_ls/par_interp.c | 15 ++- src/parcsr_ls/par_lr_interp.c | 18 ++- src/parcsr_ls/par_lr_restr.c | 6 +- src/parcsr_ls/par_mgr.c | 32 +++-- src/parcsr_ls/par_mgr_coarsen.c | 3 +- src/parcsr_ls/par_mgr_setup.c | 3 +- src/parcsr_ls/par_mod_lr_interp.c | 9 +- src/parcsr_ls/par_mod_multi_interp.c | 21 ++-- src/parcsr_ls/par_multi_interp.c | 7 +- src/parcsr_ls/par_rap_communication.c | 18 +-- src/parcsr_ls/par_relax.c | 10 +- src/parcsr_ls/par_relax_more.c | 5 +- src/parcsr_ls/par_restr.c | 3 +- src/parcsr_ls/par_scaled_matnorm.c | 3 +- src/parcsr_ls/par_stats.c | 5 +- src/parcsr_ls/par_strength.c | 8 +- src/parcsr_ls/par_sv_interp.c | 3 +- src/parcsr_ls/par_sv_interp_ln.c | 3 +- src/parcsr_ls/partial.c | 15 ++- src/parcsr_mv/HYPRE_parcsr_matrix.c | 16 +-- src/parcsr_mv/communicationT.c | 9 +- src/parcsr_mv/gen_fffc.c | 35 +++--- src/parcsr_mv/par_csr_assumed_part.c | 5 +- src/parcsr_mv/par_csr_bool_matrix.c | 18 +-- src/parcsr_mv/par_csr_communication.c | 34 ++--- src/parcsr_mv/par_csr_matop.c | 24 ++-- src/parcsr_mv/par_csr_matrix.c | 54 ++++---- src/parcsr_mv/par_csr_matrix_stats.c | 7 +- src/parcsr_mv/par_vector.c | 27 ++-- src/parcsr_mv/par_vector_batched.c | 6 +- src/sstruct_ls/maxwell_TV_setup.c | 4 +- src/sstruct_ls/sstruct_sharedDOFComm.c | 9 +- src/sstruct_mv/HYPRE_sstruct_graph.c | 4 +- src/sstruct_mv/sstruct_grid.c | 5 +- src/struct_ls/pfmg_setup.c | 7 +- src/struct_mv/assumed_part.c | 6 +- src/struct_mv/box_manager.c | 16 +-- src/struct_mv/struct_communication.c | 5 +- src/struct_mv/struct_grid.c | 10 +- src/struct_mv/struct_innerprod.c | 5 +- src/test/ij.c | 10 +- src/test/maxwell_unscaled.c | 4 +- src/test/sstruct.c | 4 +- src/test/sstruct_fac.c | 4 +- src/utilities/_hypre_utilities.h | 33 +++-- src/utilities/error.c | 2 +- src/utilities/exchange_data.c | 28 +++-- src/utilities/memory.c | 4 +- src/utilities/mpistubs.c | 116 +++++++++++------- src/utilities/mpistubs.h | 33 +++-- src/utilities/timing.c | 12 +- 94 files changed, 845 insertions(+), 580 deletions(-) diff --git a/src/IJ_mv/HYPRE_IJMatrix.c b/src/IJ_mv/HYPRE_IJMatrix.c index 1ea0ce8de3..36b39913de 100644 --- a/src/IJ_mv/HYPRE_IJMatrix.c +++ b/src/IJ_mv/HYPRE_IJMatrix.c @@ -48,7 +48,7 @@ HYPRE_IJMatrixCreate( MPI_Comm comm, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &myid); - + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (ilower > iupper + 1 || ilower < 0) { @@ -92,7 +92,7 @@ HYPRE_IJMatrixCreate( MPI_Comm comm, info[0] = ilower; info[1] = jlower; } - hypre_MPI_Bcast(info, 2, HYPRE_MPI_BIG_INT, 0, comm); + hypre_MPI_Bcast(info, 2, HYPRE_MPI_BIG_INT, 0, hcomm); row0 = info[0]; col0 = info[1]; @@ -102,7 +102,7 @@ HYPRE_IJMatrixCreate( MPI_Comm comm, info[0] = iupper; info[1] = jupper; } - hypre_MPI_Bcast(info, 2, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(info, 2, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); rowN = info[0]; colN = info[1]; diff --git a/src/IJ_mv/HYPRE_IJVector.c b/src/IJ_mv/HYPRE_IJVector.c index 99ac4faf1d..ff15ce9935 100644 --- a/src/IJ_mv/HYPRE_IJVector.c +++ b/src/IJ_mv/HYPRE_IJVector.c @@ -39,6 +39,7 @@ HYPRE_IJVectorCreate( MPI_Comm comm, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (jlower > jupper + 1 || jlower < 0) { @@ -60,13 +61,13 @@ HYPRE_IJVectorCreate( MPI_Comm comm, { row0 = jlower; } - hypre_MPI_Bcast(&row0, 1, HYPRE_MPI_BIG_INT, 0, comm); + hypre_MPI_Bcast(&row0, 1, HYPRE_MPI_BIG_INT, 0, hcomm); /* proc (num_procs-1) has the last row */ if (my_id == (num_procs - 1)) { rowN = jupper; } - hypre_MPI_Bcast(&rowN, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&rowN, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); hypre_IJVectorGlobalFirstRow(vec) = row0; hypre_IJVectorGlobalNumRows(vec) = rowN - row0 + 1; diff --git a/src/IJ_mv/IJMatrix_parcsr.c b/src/IJ_mv/IJMatrix_parcsr.c index 281989a6d6..8405f941ee 100644 --- a/src/IJ_mv/IJMatrix_parcsr.c +++ b/src/IJ_mv/IJMatrix_parcsr.c @@ -2548,6 +2548,7 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) //HYPRE_Int row_len; HYPRE_Int max_num_threads; HYPRE_Int aux_flag, aux_flag_global; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_ANNOTATE_FUNC_BEGIN; @@ -2561,7 +2562,7 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) { aux_flag = 1; } - hypre_MPI_Allreduce(&aux_flag, &aux_flag_global, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&aux_flag, &aux_flag_global, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); if (aux_flag_global && (!aux_flag)) { hypre_MPI_Comm_rank(comm, &my_id); @@ -2619,7 +2620,7 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) }*/ off_proc_i_indx = hypre_AuxParCSRMatrixOffProcIIndx(aux_matrix); hypre_MPI_Allreduce(&off_proc_i_indx, &offd_proc_elmts, 1, HYPRE_MPI_INT, - hypre_MPI_SUM, comm); + hypre_MPI_SUM, hcomm); if (offd_proc_elmts) { max_off_proc_elmts = hypre_AuxParCSRMatrixMaxOffProcElmts(aux_matrix); diff --git a/src/IJ_mv/IJVector_parcsr.c b/src/IJ_mv/IJVector_parcsr.c index 23e7307ac1..e6ad5ca337 100644 --- a/src/IJ_mv/IJVector_parcsr.c +++ b/src/IJ_mv/IJVector_parcsr.c @@ -607,6 +607,7 @@ hypre_IJVectorAssemblePar(hypre_IJVector *vector) hypre_AuxParVector *aux_vector = (hypre_AuxParVector*) hypre_IJVectorTranslator(vector); MPI_Comm comm = hypre_IJVectorComm(vector); HYPRE_Int print_level = hypre_IJVectorPrintLevel(vector); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (!par_vector) { @@ -627,7 +628,7 @@ hypre_IJVectorAssemblePar(hypre_IJVector *vector) HYPRE_Complex *off_proc_data; current_num_elmts = hypre_AuxParVectorCurrentOffProcElmts(aux_vector); hypre_MPI_Allreduce(¤t_num_elmts, &off_proc_elmts, 1, HYPRE_MPI_INT, - hypre_MPI_SUM, comm); + hypre_MPI_SUM, hcomm); if (off_proc_elmts) { max_off_proc_elmts = hypre_AuxParVectorMaxOffProcElmts(aux_vector); diff --git a/src/distributed_ls/Euclid/Euclid_dh.c b/src/distributed_ls/Euclid/Euclid_dh.c index 1c16c8d03b..4b25e929c6 100644 --- a/src/distributed_ls/Euclid/Euclid_dh.c +++ b/src/distributed_ls/Euclid/Euclid_dh.c @@ -407,7 +407,8 @@ void compute_rho_private(Euclid_dh ctx) bufGlobal[1] = bufLocal[1]; bufGlobal[2] = bufLocal[2]; } else { - hypre_MPI_Reduce(bufLocal, bufGlobal, 3, hypre_MPI_REAL, hypre_MPI_SUM, 0, comm_dh); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + hypre_MPI_Reduce(bufLocal, bufGlobal, 3, hypre_MPI_REAL, hypre_MPI_SUM, 0, hcomm); } if (myid_dh == 0) { @@ -885,7 +886,9 @@ void reduce_timings_private(Euclid_dh ctx) HYPRE_Real bufOUT[TIMING_BINS]; hypre_TMemcpy(bufOUT, ctx->timing, HYPRE_Real, TIMING_BINS, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - hypre_MPI_Reduce(bufOUT, ctx->timing, TIMING_BINS, hypre_MPI_REAL, hypre_MPI_MAX, 0, comm_dh); + + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + hypre_MPI_Reduce(bufOUT, ctx->timing, TIMING_BINS, hypre_MPI_REAL, hypre_MPI_MAX, 0, hcomm); } ctx->timingsWereReduced = true; diff --git a/src/distributed_ls/Euclid/ExternalRows_dh.c b/src/distributed_ls/Euclid/ExternalRows_dh.c index e72e520ef3..2e35535175 100644 --- a/src/distributed_ls/Euclid/ExternalRows_dh.c +++ b/src/distributed_ls/Euclid/ExternalRows_dh.c @@ -187,11 +187,13 @@ void rcv_ext_storage_private(ExternalRows_dh er) if (logFile != NULL && er->debug) debug = true; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + /* get number of rows, and total nonzeros, that each lo-nabor will send */ for (i=0; ireq1+i); - hypre_MPI_Irecv(rcv_nz_counts+i, 1, HYPRE_MPI_INT, nabor, NZ_CT_TAG, comm_dh, er->req2+i); + hypre_MPI_Irecv(rcv_row_counts+i, 1, HYPRE_MPI_INT, nabor, ROW_CT_TAG, hcomm, er->req1+i); + hypre_MPI_Irecv(rcv_nz_counts+i, 1, HYPRE_MPI_INT, nabor, NZ_CT_TAG, hcomm, er->req2+i); } hypre_MPI_Waitall(loCount, er->req1, er->status); hypre_MPI_Waitall(loCount, er->req2, er->status); @@ -209,8 +211,8 @@ void rcv_ext_storage_private(ExternalRows_dh er) HYPRE_Int nabor = loNabors[i]; lengths[i] = (HYPRE_Int*)MALLOC_DH(nz*sizeof(HYPRE_Int)); CHECK_V_ERROR; numbers[i] = (HYPRE_Int*)MALLOC_DH(nz*sizeof(HYPRE_Int)); CHECK_V_ERROR; - hypre_MPI_Irecv(lengths[i], nz, HYPRE_MPI_INT, nabor, ROW_LENGTH_TAG, comm_dh, er->req1+i); - hypre_MPI_Irecv(numbers[i], nz, HYPRE_MPI_INT, nabor, ROW_NUMBER_TAG, comm_dh, er->req2+i); + hypre_MPI_Irecv(lengths[i], nz, HYPRE_MPI_INT, nabor, ROW_LENGTH_TAG, hcomm, er->req1+i); + hypre_MPI_Irecv(numbers[i], nz, HYPRE_MPI_INT, nabor, ROW_NUMBER_TAG, hcomm, er->req2+i); } hypre_MPI_Waitall(loCount, er->req1, er->status); hypre_MPI_Waitall(loCount, er->req2, er->status); @@ -305,14 +307,16 @@ void rcv_external_rows_private(ExternalRows_dh er) HYPRE_Int *extRowCval = er->cvalExt, *extRowFill = er->fillExt; HYPRE_Real *extRowAval = er->avalExt; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + /* start receives of external rows */ nz = 0; for (i=0; ireq1+i); - hypre_MPI_Irecv(extRowFill+offset, nz, HYPRE_MPI_INT, nabor, FILL_TAG, comm_dh, er->req2+i); - hypre_MPI_Irecv(extRowAval+offset, nz, hypre_MPI_REAL, nabor, AVAL_TAG, comm_dh, er->req3+i); + hypre_MPI_Irecv(extRowCval+offset, nz, HYPRE_MPI_INT, nabor, CVAL_TAG, hcomm, er->req1+i); + hypre_MPI_Irecv(extRowFill+offset, nz, HYPRE_MPI_INT, nabor, FILL_TAG, hcomm, er->req2+i); + hypre_MPI_Irecv(extRowAval+offset, nz, hypre_MPI_REAL, nabor, AVAL_TAG, hcomm, er->req3+i); offset += nz; } @@ -443,11 +447,13 @@ void send_ext_storage_private(ExternalRows_dh er) hypre_fprintf(logFile, "EXR send_ext_storage_private:: nz Count = %i\n", nz); } + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + /* send number of rows, and total nonzeros, to higher ordered nabors */ for (i=0; ireq1+i); - hypre_MPI_Isend(&nz, 1, HYPRE_MPI_INT, nabor, NZ_CT_TAG, comm_dh, er->req2+i); + hypre_MPI_Isend(&rowCount, 1, HYPRE_MPI_INT, nabor, ROW_CT_TAG, hcomm, er->req1+i); + hypre_MPI_Isend(&nz, 1, HYPRE_MPI_INT, nabor, NZ_CT_TAG, hcomm, er->req2+i); } /* set up array for global row numbers */ @@ -462,8 +468,8 @@ void send_ext_storage_private(ExternalRows_dh er) */ for (i=0; ireq3+i); - hypre_MPI_Isend(nzCounts, rowCount, HYPRE_MPI_INT, nabor, ROW_LENGTH_TAG, comm_dh, er->req4+i); + hypre_MPI_Isend(nzNumbers, rowCount, HYPRE_MPI_INT, nabor, ROW_NUMBER_TAG, hcomm, er->req3+i); + hypre_MPI_Isend(nzCounts, rowCount, HYPRE_MPI_INT, nabor, ROW_LENGTH_TAG, hcomm, er->req4+i); } END_FUNC_DH @@ -527,12 +533,14 @@ void send_external_rows_private(ExternalRows_dh er) } } + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + /* start sends to higher-ordred nabors */ for (i=0; icval_req+i); - hypre_MPI_Isend(fillSend, nz, HYPRE_MPI_INT, nabor, FILL_TAG, comm_dh, er->fill_req+i); - hypre_MPI_Isend(avalSend, nz, hypre_MPI_REAL, nabor, AVAL_TAG, comm_dh, er->aval_req+i); + hypre_MPI_Isend(cvalSend, nz, HYPRE_MPI_INT, nabor, CVAL_TAG, hcomm, er->cval_req+i); + hypre_MPI_Isend(fillSend, nz, HYPRE_MPI_INT, nabor, FILL_TAG, hcomm, er->fill_req+i); + hypre_MPI_Isend(avalSend, nz, hypre_MPI_REAL, nabor, AVAL_TAG, hcomm, er->aval_req+i); } END_FUNC_DH } diff --git a/src/distributed_ls/Euclid/Factor_dh.c b/src/distributed_ls/Euclid/Factor_dh.c index 7934c064ba..51bf7d33ed 100644 --- a/src/distributed_ls/Euclid/Factor_dh.c +++ b/src/distributed_ls/Euclid/Factor_dh.c @@ -158,7 +158,8 @@ HYPRE_Int Factor_dhReadNz(Factor_dh mat) START_FUNC_DH HYPRE_Int ierr, retval = mat->rp[mat->m]; HYPRE_Int nz = retval; - ierr = hypre_MPI_Allreduce(&nz, &retval, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm_dh); CHECK_MPI_ERROR(ierr); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + ierr = hypre_MPI_Allreduce(&nz, &retval, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); CHECK_MPI_ERROR(ierr); END_FUNC_VAL(retval) } @@ -370,12 +371,13 @@ static HYPRE_Int setup_receives_private(Factor_dh mat, HYPRE_Int *beg_rows, HYPR receive; this matching receive will be started later, in setup_sends_private. */ - hypre_MPI_Isend(reqind+i, j-i, HYPRE_MPI_INT, this_pe, 444, comm_dh, &request); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + hypre_MPI_Isend(reqind+i, j-i, HYPRE_MPI_INT, this_pe, 444, hcomm, &request); hypre_MPI_Request_free(&request); /* set up persistent comms for receiving the values from this_pe */ hypre_MPI_Recv_init(recvBuf+i, j-i, hypre_MPI_REAL, this_pe, 555, - comm_dh, req+num_recv); + hcomm, req+num_recv); ++num_recv; } @@ -401,6 +403,7 @@ static void setup_sends_private(Factor_dh mat, HYPRE_Int *inlist, HYPRE_Real *sendBuf; HYPRE_Int myidNEW = o2n_subdomain[myid_dh]; HYPRE_Int count; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); if (debug) { hypre_fprintf(logFile, "FACT \nSTARTING: setup_sends_private\n"); @@ -448,11 +451,11 @@ static void setup_sends_private(Factor_dh mat, HYPRE_Int *inlist, /* matching receive, for list of unknowns that will be sent, during the triangular solves, from ourselves to P_i */ - hypre_MPI_Irecv(rcvBuf, inlist[i], HYPRE_MPI_INT, i, 444, comm_dh, requests+count); + hypre_MPI_Irecv(rcvBuf, inlist[i], HYPRE_MPI_INT, i, 444, hcomm, requests+count); ++count; /* Set up the send */ - hypre_MPI_Send_init(sendBuf, inlist[i], hypre_MPI_REAL, i, 555, comm_dh, sendReq); + hypre_MPI_Send_init(sendBuf, inlist[i], hypre_MPI_REAL, i, 555, hcomm, sendReq); } } @@ -555,7 +558,8 @@ void Factor_dhSolveSetup(Factor_dh mat, SubdomainGraph_dh sg) outlist, debug); CHECK_V_ERROR; } - hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, comm_dh); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, hcomm); /* At this point, inlist[j] contains the number of indices that this processor must send to P_j. Processors next need to exchange the actual lists of required indices; this is done @@ -1123,7 +1127,8 @@ HYPRE_Real Factor_dhMaxPivotInverse(Factor_dh mat) if (np_dh == 1) { minGlobal = min; } else { - hypre_MPI_Reduce(&min, &minGlobal, 1, hypre_MPI_REAL, hypre_MPI_MIN, 0, comm_dh); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + hypre_MPI_Reduce(&min, &minGlobal, 1, hypre_MPI_REAL, hypre_MPI_MIN, 0, hcomm); } if (minGlobal == 0) { @@ -1150,7 +1155,8 @@ HYPRE_Real Factor_dhMaxValue(Factor_dh mat) if (np_dh == 1) { maxGlobal = max; } else { - hypre_MPI_Reduce(&max, &maxGlobal, 1, hypre_MPI_REAL, hypre_MPI_MAX, 0, comm_dh); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + hypre_MPI_Reduce(&max, &maxGlobal, 1, hypre_MPI_REAL, hypre_MPI_MAX, 0, hcomm); } END_FUNC_VAL(maxGlobal) } @@ -1180,7 +1186,8 @@ HYPRE_Real Factor_dhCondEst(Factor_dh mat, Euclid_dh ctx) if (np_dh == 1) { maxGlobal = max; } else { - hypre_MPI_Reduce(&max, &maxGlobal, 1, hypre_MPI_REAL, hypre_MPI_MAX, 0, comm_dh); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + hypre_MPI_Reduce(&max, &maxGlobal, 1, hypre_MPI_REAL, hypre_MPI_MAX, 0, hcomm); } END_FUNC_VAL(maxGlobal) } diff --git a/src/distributed_ls/Euclid/Mat_dh.c b/src/distributed_ls/Euclid/Mat_dh.c index 400c0fe8fe..c691110aad 100644 --- a/src/distributed_ls/Euclid/Mat_dh.c +++ b/src/distributed_ls/Euclid/Mat_dh.c @@ -151,6 +151,7 @@ void Mat_dhMatVecSetup(Mat_dh mat) HYPRE_Int firstLocal = mat->beg_row; HYPRE_Int lastLocal = firstLocal+m; HYPRE_Int *beg_rows, *end_rows; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); mat->recv_req = (hypre_MPI_Request *)MALLOC_DH(np_dh * sizeof(hypre_MPI_Request)); CHECK_V_ERROR; mat->send_req = (hypre_MPI_Request *)MALLOC_DH(np_dh * sizeof(hypre_MPI_Request)); CHECK_V_ERROR; @@ -162,11 +163,9 @@ void Mat_dhMatVecSetup(Mat_dh mat) beg_rows[0] = 0; end_rows[0] = m; } else { - ierr = hypre_MPI_Allgather(&firstLocal, 1, HYPRE_MPI_INT, beg_rows, 1, HYPRE_MPI_INT, comm_dh); + ierr = hypre_MPI_Allgather(&firstLocal, 1, HYPRE_MPI_INT, beg_rows, 1, HYPRE_MPI_INT, hcomm); CHECK_MPI_V_ERROR(ierr); - CHECK_MPI_V_ERROR(ierr); - - ierr = hypre_MPI_Allgather(&lastLocal, 1, HYPRE_MPI_INT, end_rows, 1, HYPRE_MPI_INT, comm_dh); CHECK_MPI_V_ERROR(ierr); + ierr = hypre_MPI_Allgather(&lastLocal, 1, HYPRE_MPI_INT, end_rows, 1, HYPRE_MPI_INT, hcomm); CHECK_MPI_V_ERROR(ierr); } outlist = (HYPRE_Int *)MALLOC_DH(np_dh*sizeof(HYPRE_Int)); CHECK_V_ERROR; @@ -187,7 +186,7 @@ void Mat_dhMatVecSetup(Mat_dh mat) if (np_dh == 1) { /* this is for debugging purposes in some of the drivers */ inlist[0] = outlist[0]; } else { - ierr = hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, comm_dh); CHECK_MPI_V_ERROR(ierr); + ierr = hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, hcomm); CHECK_MPI_V_ERROR(ierr); } setup_matvec_sends_private(mat, inlist); CHECK_V_ERROR; @@ -220,6 +219,7 @@ void setup_matvec_receives_private(Mat_dh mat, HYPRE_Int *beg_rows, HYPRE_Int *e HYPRE_Int ierr, i, j, this_pe; hypre_MPI_Request request; HYPRE_Int m = mat->m; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); mat->num_recv = 0; @@ -240,14 +240,14 @@ void setup_matvec_receives_private(Mat_dh mat, HYPRE_Int *beg_rows, HYPRE_Int *e } /* Request rows in reqind[i..j-1] */ - ierr = hypre_MPI_Isend(&reqind[i], j-i, HYPRE_MPI_INT, this_pe, 444, comm_dh, &request); CHECK_MPI_V_ERROR(ierr); + ierr = hypre_MPI_Isend(&reqind[i], j-i, HYPRE_MPI_INT, this_pe, 444, hcomm, &request); CHECK_MPI_V_ERROR(ierr); ierr = hypre_MPI_Request_free(&request); CHECK_MPI_V_ERROR(ierr); /* Count of number of number of indices needed from this_pe */ outlist[this_pe] = j-i; ierr = hypre_MPI_Recv_init(&mat->recvbuf[i+m], j-i, hypre_MPI_REAL, this_pe, 555, - comm_dh, &mat->recv_req[mat->num_recv]); CHECK_MPI_V_ERROR(ierr); + hcomm, &mat->recv_req[mat->num_recv]); CHECK_MPI_V_ERROR(ierr); mat->num_recv++; mat->recvlen += j-i; /* only used for statistical reporting */ @@ -265,6 +265,7 @@ void setup_matvec_sends_private(Mat_dh mat, HYPRE_Int *inlist) HYPRE_Int ierr, i, j, sendlen, first = mat->beg_row; hypre_MPI_Request *requests; hypre_MPI_Status *statuses; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); requests = (hypre_MPI_Request *) MALLOC_DH(np_dh * sizeof(hypre_MPI_Request)); CHECK_V_ERROR; statuses = (hypre_MPI_Status *) MALLOC_DH(np_dh * sizeof(hypre_MPI_Status)); CHECK_V_ERROR; @@ -281,10 +282,10 @@ void setup_matvec_sends_private(Mat_dh mat, HYPRE_Int *inlist) for (i=0; isendind[j], inlist[i], HYPRE_MPI_INT, i, 444, comm_dh, + ierr = hypre_MPI_Irecv(&mat->sendind[j], inlist[i], HYPRE_MPI_INT, i, 444, hcomm, &requests[mat->num_send]); CHECK_MPI_V_ERROR(ierr); /* Set up the send */ - ierr = hypre_MPI_Send_init(&mat->sendbuf[j], inlist[i], hypre_MPI_REAL, i, 555, comm_dh, + ierr = hypre_MPI_Send_init(&mat->sendbuf[j], inlist[i], hypre_MPI_REAL, i, 555, hcomm, &mat->send_req[mat->num_send]); CHECK_MPI_V_ERROR(ierr); mat->num_send++; @@ -538,7 +539,8 @@ HYPRE_Int Mat_dhReadNz(Mat_dh mat) START_FUNC_DH HYPRE_Int ierr, retval = mat->rp[mat->m]; HYPRE_Int nz = retval; - ierr = hypre_MPI_Allreduce(&nz, &retval, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm_dh); CHECK_MPI_ERROR(ierr); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + ierr = hypre_MPI_Allreduce(&nz, &retval, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); CHECK_MPI_ERROR(ierr); END_FUNC_VAL(retval) } @@ -596,8 +598,9 @@ void Mat_dhReduceTiming(Mat_dh mat) if (mat->time[MATVEC_MPI_TIME]) { mat->time[MATVEC_RATIO] = mat->time[MATVEC_TIME] / mat->time[MATVEC_MPI_TIME]; } - hypre_MPI_Allreduce(mat->time, mat->time_min, MAT_DH_BINS, hypre_MPI_REAL, hypre_MPI_MIN, comm_dh); - hypre_MPI_Allreduce(mat->time, mat->time_max, MAT_DH_BINS, hypre_MPI_REAL, hypre_MPI_MAX, comm_dh); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + hypre_MPI_Allreduce(mat->time, mat->time_min, MAT_DH_BINS, hypre_MPI_REAL, hypre_MPI_MIN, hcomm); + hypre_MPI_Allreduce(mat->time, mat->time_max, MAT_DH_BINS, hypre_MPI_REAL, hypre_MPI_MAX, hcomm); END_FUNC_DH } diff --git a/src/distributed_ls/Euclid/SubdomainGraph_dh.c b/src/distributed_ls/Euclid/SubdomainGraph_dh.c index c0c0ceb6c4..4740cee78b 100644 --- a/src/distributed_ls/Euclid/SubdomainGraph_dh.c +++ b/src/distributed_ls/Euclid/SubdomainGraph_dh.c @@ -467,6 +467,7 @@ void init_mpi_private(SubdomainGraph_dh s, HYPRE_Int blocks, bool bj, void *A) HYPRE_Int m, n, beg_row; bool symmetric; HYPRE_Real t1; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); symmetric = Parser_dhHasSwitch(parser_dh, "-sym"); CHECK_V_ERROR; if (Parser_dhHasSwitch(parser_dh, "-makeSymmetric")) { @@ -494,8 +495,8 @@ void init_mpi_private(SubdomainGraph_dh s, HYPRE_Int blocks, bool bj, void *A) * At this point, beg_rowP[] is a copy of beg_row[]) *-------------------------------------------------------------*/ if (!bj) { - hypre_MPI_Allgather(&beg_row, 1, HYPRE_MPI_INT, s->beg_row, 1, HYPRE_MPI_INT, comm_dh); - hypre_MPI_Allgather(&m, 1, HYPRE_MPI_INT, s->row_count, 1, HYPRE_MPI_INT, comm_dh); + hypre_MPI_Allgather(&beg_row, 1, HYPRE_MPI_INT, s->beg_row, 1, HYPRE_MPI_INT, hcomm); + hypre_MPI_Allgather(&m, 1, HYPRE_MPI_INT, s->row_count, 1, HYPRE_MPI_INT, hcomm); hypre_TMemcpy(s->beg_rowP, s->beg_row, HYPRE_Int, np_dh, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); } else { s->beg_row[myid_dh] = beg_row; @@ -548,7 +549,7 @@ void init_mpi_private(SubdomainGraph_dh s, HYPRE_Int blocks, bool bj, void *A) } /* exchange number of boundary rows with all neighbors */ - hypre_MPI_Allgather(&bdryCount, 1, HYPRE_MPI_INT, s->bdry_count, 1, HYPRE_MPI_INT, comm_dh); + hypre_MPI_Allgather(&bdryCount, 1, HYPRE_MPI_INT, s->bdry_count, 1, HYPRE_MPI_INT, hcomm); /* form local permutation */ idx = 0; @@ -649,6 +650,7 @@ void SubdomainGraph_dhExchangePerms(SubdomainGraph_dh s) HYPRE_Int myFirstBdry = m - myBdryCount; HYPRE_Int *n2o_row = s->n2o_row; Hash_i_dh n2o_table, o2n_table; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); if (logFile != NULL && s->debug) debug = true; @@ -704,15 +706,14 @@ void SubdomainGraph_dhExchangePerms(SubdomainGraph_dh s) HYPRE_Int *buf = recvBuf + naborIdx[i]; HYPRE_Int ct = 2*bdryNodeCounts[nabr]; - - hypre_MPI_Isend(sendBuf, 2*myBdryCount, HYPRE_MPI_INT, nabr, 444, comm_dh, &(send_req[i])); + hypre_MPI_Isend(sendBuf, 2*myBdryCount, HYPRE_MPI_INT, nabr, 444, hcomm, &(send_req[i])); if (debug) { hypre_fprintf(logFile , "SUBG sending %i elts to %i\n", 2*myBdryCount, nabr); fflush(logFile); } - hypre_MPI_Irecv(buf, ct, HYPRE_MPI_INT, nabr, 444, comm_dh, &(recv_req[i])); + hypre_MPI_Irecv(buf, ct, HYPRE_MPI_INT, nabr, 444, hcomm, &(recv_req[i])); if (debug) { hypre_fprintf(logFile, "SUBG receiving %i elts from %i\n", ct, nabr); @@ -765,19 +766,20 @@ void form_subdomaingraph_mpi_private(SubdomainGraph_dh s) HYPRE_Int i, j, nz, *adj, *ptrs = s->ptrs; hypre_MPI_Request *recvReqs = NULL, sendReq; hypre_MPI_Status *statuses = NULL, status; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); /* all processors tell root how many nabors they have */ if (myid_dh == 0) { idxAll = (HYPRE_Int*)MALLOC_DH(np_dh*sizeof(HYPRE_Int)); CHECK_V_ERROR; } - hypre_MPI_Gather(&nct, 1, HYPRE_MPI_INT, idxAll, 1, HYPRE_MPI_INT, 0, comm_dh); + hypre_MPI_Gather(&nct, 1, HYPRE_MPI_INT, idxAll, 1, HYPRE_MPI_INT, 0, hcomm); /* root counts edges in graph, and broacasts to all */ if (myid_dh == 0) { nz = 0; for (i=0; iallNabors, *myNabors; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); myNabors = (HYPRE_Int*)MALLOC_DH(np_dh*sizeof(HYPRE_Int)); CHECK_V_ERROR; marker = (HYPRE_Int*)MALLOC_DH(np_dh*sizeof(HYPRE_Int)); CHECK_V_ERROR; @@ -987,7 +990,7 @@ hypre_fprintf(stderr, "\n"); */ /* find out who my neighbors are that I cannot discern locally */ - hypre_MPI_Alltoall(marker, 1, HYPRE_MPI_INT, nabors, 1, HYPRE_MPI_INT, comm_dh); CHECK_V_ERROR; + hypre_MPI_Alltoall(marker, 1, HYPRE_MPI_INT, nabors, 1, HYPRE_MPI_INT, hcomm); CHECK_V_ERROR; /* add in neighbors that I know about from scanning my adjacency lists */ for (i=0; idesc[t->last], "========== totals, and reset ==========\n"); t->last += 1; - hypre_MPI_Allreduce(t->time, timeMax, t->last, hypre_MPI_REAL, hypre_MPI_MAX, comm_dh); - hypre_MPI_Allreduce(t->time, timeMin, t->last, hypre_MPI_REAL, hypre_MPI_MIN, comm_dh); + hypre_MPI_Allreduce(t->time, timeMax, t->last, hypre_MPI_REAL, hypre_MPI_MAX, hcomm); + hypre_MPI_Allreduce(t->time, timeMin, t->last, hypre_MPI_REAL, hypre_MPI_MIN, hcomm); wasSummed = true; } diff --git a/src/distributed_ls/Euclid/blas_dh.c b/src/distributed_ls/Euclid/blas_dh.c index a75cbc5d26..c4f9feeffd 100644 --- a/src/distributed_ls/Euclid/blas_dh.c +++ b/src/distributed_ls/Euclid/blas_dh.c @@ -114,7 +114,8 @@ HYPRE_Real InnerProd(HYPRE_Int n, HYPRE_Real *x, HYPRE_Real *y) } if (np_dh > 1) { - hypre_MPI_Allreduce(&local_result, &result, 1, hypre_MPI_REAL, hypre_MPI_SUM, comm_dh); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + hypre_MPI_Allreduce(&local_result, &result, 1, hypre_MPI_REAL, hypre_MPI_SUM, hcomm); } else { result = local_result; } @@ -140,7 +141,8 @@ HYPRE_Real Norm2(HYPRE_Int n, HYPRE_Real *x) } if (np_dh > 1) { - hypre_MPI_Allreduce(&local_result, &result, 1, hypre_MPI_REAL, hypre_MPI_SUM, comm_dh); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + hypre_MPI_Allreduce(&local_result, &result, 1, hypre_MPI_REAL, hypre_MPI_SUM, hcomm); } else { result = local_result; } diff --git a/src/distributed_ls/Euclid/mat_dh_private.c b/src/distributed_ls/Euclid/mat_dh_private.c index 688d49e50a..d267e3d08e 100644 --- a/src/distributed_ls/Euclid/mat_dh_private.c +++ b/src/distributed_ls/Euclid/mat_dh_private.c @@ -1028,7 +1028,7 @@ void partition_and_distribute_metis_private(Mat_dh A, Mat_dh *Bout) /* broadcast number of rows to all processors */ if (myid_dh == 0) m = A->m; - hypre_MPI_Bcast(&m, 1, HYPRE_MPI_INT, 0, hypre_MPI_COMM_WORLD); + hypre_MPI_Bcast(&m, 1, HYPRE_MPI_INT, 0, hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD)); /* broadcast number of nonzeros in each row to all processors */ rowLengths = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR; @@ -1040,7 +1040,8 @@ void partition_and_distribute_metis_private(Mat_dh A, Mat_dh *Bout) rowLengths[i] = tmp[i+1] - tmp[i]; } } - hypre_MPI_Bcast(rowLengths, m, HYPRE_MPI_INT, 0, comm_dh); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + hypre_MPI_Bcast(rowLengths, m, HYPRE_MPI_INT, 0, hcomm); /* partition matrix */ if (myid_dh == 0) { @@ -1060,7 +1061,7 @@ void partition_and_distribute_metis_private(Mat_dh A, Mat_dh *Bout) } /* broadcast partitiioning information to all processors */ - hypre_MPI_Bcast(rowToBlock, m, HYPRE_MPI_INT, 0, comm_dh); + hypre_MPI_Bcast(rowToBlock, m, HYPRE_MPI_INT, 0, hcomm); /* allocate storage for local portion of matrix */ mat_par_read_allocate_private(&B, m, rowLengths, rowToBlock); CHECK_V_ERROR; @@ -1081,8 +1082,8 @@ void partition_and_distribute_metis_private(Mat_dh A, Mat_dh *Bout) SET_V_ERROR(msgBuf_dh); } - hypre_MPI_Isend(cval+rp[i], count, HYPRE_MPI_INT, owner, CVAL_TAG, comm_dh, send_req+2*i); - hypre_MPI_Isend(aval+rp[i], count, hypre_MPI_REAL, owner, AVAL_TAG, comm_dh, send_req+2*i+1); + hypre_MPI_Isend(cval+rp[i], count, HYPRE_MPI_INT, owner, CVAL_TAG, hcomm, send_req+2*i); + hypre_MPI_Isend(aval+rp[i], count, hypre_MPI_REAL, owner, AVAL_TAG, hcomm, send_req+2*i+1); } } @@ -1104,8 +1105,8 @@ void partition_and_distribute_metis_private(Mat_dh A, Mat_dh *Bout) SET_V_ERROR(msgBuf_dh); } - hypre_MPI_Irecv(cval+rp[i], count, HYPRE_MPI_INT, 0, CVAL_TAG, comm_dh, rcv_req+2*i); - hypre_MPI_Irecv(aval+rp[i], count, hypre_MPI_REAL, 0, AVAL_TAG, comm_dh, rcv_req+2*i+1); + hypre_MPI_Irecv(cval+rp[i], count, HYPRE_MPI_INT, 0, CVAL_TAG, hcomm, rcv_req+2*i); + hypre_MPI_Irecv(aval+rp[i], count, hypre_MPI_REAL, 0, AVAL_TAG, hcomm, rcv_req+2*i+1); } } @@ -1152,7 +1153,7 @@ void partition_and_distribute_private(Mat_dh A, Mat_dh *Bout) /* broadcast number of rows to all processors */ if (myid_dh == 0) m = A->m; - hypre_MPI_Bcast(&m, 1, HYPRE_MPI_INT, 0, hypre_MPI_COMM_WORLD); + hypre_MPI_Bcast(&m, 1, HYPRE_MPI_INT, 0, hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD)); /* broadcast number of nonzeros in each row to all processors */ rowLengths = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR; @@ -1162,7 +1163,9 @@ void partition_and_distribute_private(Mat_dh A, Mat_dh *Bout) rowLengths[i] = tmp[i+1] - tmp[i]; } } - hypre_MPI_Bcast(rowLengths, m, HYPRE_MPI_INT, 0, comm_dh); + + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); + hypre_MPI_Bcast(rowLengths, m, HYPRE_MPI_INT, 0, hcomm); /* partition matrix */ rowToBlock = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR; @@ -1173,7 +1176,7 @@ void partition_and_distribute_private(Mat_dh A, Mat_dh *Bout) } /* broadcast partitiioning information to all processors */ - hypre_MPI_Bcast(rowToBlock, m, HYPRE_MPI_INT, 0, comm_dh); + hypre_MPI_Bcast(rowToBlock, m, HYPRE_MPI_INT, 0, hcomm); /* allocate storage for local portion of matrix */ mat_par_read_allocate_private(&B, m, rowLengths, rowToBlock); CHECK_V_ERROR; @@ -1194,8 +1197,8 @@ void partition_and_distribute_private(Mat_dh A, Mat_dh *Bout) SET_V_ERROR(msgBuf_dh); } - hypre_MPI_Isend(cval+rp[i], count, HYPRE_MPI_INT, owner, CVAL_TAG, comm_dh, send_req+2*i); - hypre_MPI_Isend(aval+rp[i], count, hypre_MPI_REAL, owner, AVAL_TAG, comm_dh, send_req+2*i+1); + hypre_MPI_Isend(cval+rp[i], count, HYPRE_MPI_INT, owner, CVAL_TAG, hcomm, send_req+2*i); + hypre_MPI_Isend(aval+rp[i], count, hypre_MPI_REAL, owner, AVAL_TAG, hcomm, send_req+2*i+1); } } @@ -1217,8 +1220,8 @@ void partition_and_distribute_private(Mat_dh A, Mat_dh *Bout) SET_V_ERROR(msgBuf_dh); } - hypre_MPI_Irecv(cval+rp[i], count, HYPRE_MPI_INT, 0, CVAL_TAG, comm_dh, rcv_req+2*i); - hypre_MPI_Irecv(aval+rp[i], count, hypre_MPI_REAL, 0, AVAL_TAG, comm_dh, rcv_req+2*i+1); + hypre_MPI_Irecv(cval+rp[i], count, HYPRE_MPI_INT, 0, CVAL_TAG, hcomm, rcv_req+2*i); + hypre_MPI_Irecv(aval+rp[i], count, hypre_MPI_REAL, 0, AVAL_TAG, hcomm, rcv_req+2*i+1); } } diff --git a/src/distributed_ls/ParaSails/ConjGrad.c b/src/distributed_ls/ParaSails/ConjGrad.c index 0ef71b36fc..659f869cd3 100644 --- a/src/distributed_ls/ParaSails/ConjGrad.c +++ b/src/distributed_ls/ParaSails/ConjGrad.c @@ -21,11 +21,12 @@ static HYPRE_Real InnerProd(HYPRE_Int n, HYPRE_Real *x, HYPRE_Real *y, MPI_Comm comm) { HYPRE_Real local_result, result; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_Int one = 1; local_result = hypre_ddot(&n, x, &one, y, &one); - hypre_MPI_Allreduce(&local_result, &result, 1, hypre_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&local_result, &result, 1, hypre_MPI_REAL, hypre_MPI_SUM, hcomm); return result; } diff --git a/src/distributed_ls/ParaSails/DiagScale.c b/src/distributed_ls/ParaSails/DiagScale.c index a347cf6bd9..24efb44258 100644 --- a/src/distributed_ls/ParaSails/DiagScale.c +++ b/src/distributed_ls/ParaSails/DiagScale.c @@ -48,6 +48,7 @@ static void ExchangeDiagEntries(MPI_Comm comm, Matrix *mat, HYPRE_Int reqlen, { hypre_MPI_Request request; HYPRE_Int i, j, this_pe; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_shell_sort(reqlen, reqind); @@ -69,11 +70,11 @@ static void ExchangeDiagEntries(MPI_Comm comm, Matrix *mat, HYPRE_Int reqlen, /* Post receive for diagonal values */ hypre_MPI_Irecv(&diags[i], j-i, hypre_MPI_REAL, this_pe, DIAG_VALS_TAG, - comm, &requests[*num_requests]); + hcomm, &requests[*num_requests]); /* Request rows in reqind[i..j-1] */ hypre_MPI_Isend(&reqind[i], j-i, HYPRE_MPI_INT, this_pe, DIAG_INDS_TAG, - comm, &request); + hcomm, &request); hypre_MPI_Request_free(&request); (*num_requests)++; @@ -99,13 +100,14 @@ static void ExchangeDiagEntriesServer(MPI_Comm comm, Matrix *mat, HYPRE_Int *recvbuf; HYPRE_Real *sendbuf; HYPRE_Int i, j, source, count; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* recvbuf contains requested indices */ /* sendbuf contains corresponding diagonal entries */ for (i=0; ibeg_row - 1; /* imaginary end of previous block */ @@ -184,7 +186,7 @@ void LoadBalDonorSend(MPI_Comm comm, Matrix *mat, Numbering *numb, } hypre_MPI_Isend(donor_data[i].buffer, buflen, HYPRE_MPI_INT, donor_data[i].pe, - LOADBAL_REQ_TAG, comm, &request[i]); + LOADBAL_REQ_TAG, hcomm, &request[i]); } *local_beg_row = send_end_row + 1; @@ -204,16 +206,17 @@ void LoadBalRecipRecv(MPI_Comm comm, Numbering *numb, HYPRE_Int *buffer, *bufferp; HYPRE_Int beg_row, end_row; HYPRE_Int len; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); for (i=0; ibeg_rows = (HYPRE_Int *) MemAlloc(mat->mem, npes * sizeof(HYPRE_Int)); mat->end_rows = (HYPRE_Int *) MemAlloc(mat->mem, npes * sizeof(HYPRE_Int)); - hypre_MPI_Allgather(&beg_row, 1, HYPRE_MPI_INT, mat->beg_rows, 1, HYPRE_MPI_INT, comm); - hypre_MPI_Allgather(&end_row, 1, HYPRE_MPI_INT, mat->end_rows, 1, HYPRE_MPI_INT, comm); + hypre_MPI_Allgather(&beg_row, 1, HYPRE_MPI_INT, mat->beg_rows, 1, HYPRE_MPI_INT, hcomm); + hypre_MPI_Allgather(&end_row, 1, HYPRE_MPI_INT, mat->end_rows, 1, HYPRE_MPI_INT, hcomm); mat->num_recv = 0; mat->num_send = 0; @@ -236,6 +237,8 @@ HYPRE_Int MatrixRowPe(Matrix *mat, HYPRE_Int row) HYPRE_Int MatrixNnz(Matrix *mat) { HYPRE_Int num_local, i, total, alltotal; + MPI_Comm comm = mat->comm; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_local = mat->end_row - mat->beg_row + 1; @@ -243,7 +246,7 @@ HYPRE_Int MatrixNnz(Matrix *mat) for (i=0; ilens[i]; - hypre_MPI_Allreduce(&total, &alltotal, 1, HYPRE_MPI_INT, hypre_MPI_SUM, mat->comm); + hypre_MPI_Allreduce(&total, &alltotal, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); return alltotal; } @@ -315,6 +318,7 @@ static void MatrixReadMaster(Matrix *mat, char *filename) hypre_MPI_Comm_size(mat->comm, &npes); hypre_MPI_Comm_rank(mat->comm, &mype); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); file = fopen(filename, "r"); hypre_assert(file != NULL); @@ -344,7 +348,7 @@ static void MatrixReadMaster(Matrix *mat, char *filename) { hypre_MPI_Wait(&request, &status); outbuf = offset; - hypre_MPI_Isend(&outbuf, 1, hypre_MPI_LONG, curr_proc, 0, comm, &request); + hypre_MPI_Isend(&outbuf, 1, hypre_MPI_LONG, curr_proc, 0, hcomm, &request); curr_proc++; } offset = ftell(file); @@ -421,6 +425,7 @@ static void MatrixReadMaster(Matrix *mat, char *filename) static void MatrixReadSlave(Matrix *mat, char *filename) { MPI_Comm comm = mat->comm; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Status status; HYPRE_Int mype; FILE *file; @@ -441,7 +446,7 @@ static void MatrixReadSlave(Matrix *mat, char *filename) hypre_MPI_Comm_rank(mat->comm, &mype); - hypre_MPI_Recv(&offset, 1, hypre_MPI_LONG, 0, 0, comm, &status); + hypre_MPI_Recv(&offset, 1, hypre_MPI_LONG, 0, 0, hcomm, &status); time0 = hypre_MPI_Wtime(); ret = fseek(file, offset, SEEK_SET); @@ -533,12 +538,13 @@ void RhsRead(HYPRE_Real *rhs, Matrix *mat, char *filename) hypre_MPI_Comm_size(mat->comm, &npes); hypre_MPI_Comm_rank(mat->comm, &mype); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(mat->comm); num_local = mat->end_row - mat->beg_row + 1; if (mype != 0) { - hypre_MPI_Recv(rhs, num_local, hypre_MPI_REAL, 0, 0, mat->comm, &status); + hypre_MPI_Recv(rhs, num_local, hypre_MPI_REAL, 0, 0, hcomm, &status); return; } @@ -577,7 +583,7 @@ void RhsRead(HYPRE_Real *rhs, Matrix *mat, char *filename) else hypre_fscanf(file, "%lf", &buffer[i]); - hypre_MPI_Send(buffer, num_local, hypre_MPI_REAL, pe, 0, mat->comm); + hypre_MPI_Send(buffer, num_local, hypre_MPI_REAL, pe, 0, hcomm); } hypre_TFree(buffer,HYPRE_MEMORY_HOST); @@ -592,6 +598,7 @@ static void SetupReceives(Matrix *mat, HYPRE_Int reqlen, HYPRE_Int *reqind, HYPR HYPRE_Int i, j, this_pe, mype; hypre_MPI_Request request; MPI_Comm comm = mat->comm; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_Int num_local = mat->end_row - mat->beg_row + 1; hypre_MPI_Comm_rank(comm, &mype); @@ -618,17 +625,17 @@ static void SetupReceives(Matrix *mat, HYPRE_Int reqlen, HYPRE_Int *reqind, HYPR } /* Request rows in reqind[i..j-1] */ - hypre_MPI_Isend(&reqind[i], j-i, HYPRE_MPI_INT, this_pe, 444, comm, &request); + hypre_MPI_Isend(&reqind[i], j-i, HYPRE_MPI_INT, this_pe, 444, hcomm, &request); hypre_MPI_Request_free(&request); /* Count of number of number of indices needed from this_pe */ outlist[this_pe] = j-i; hypre_MPI_Recv_init(&mat->recvbuf[i+num_local], j-i, hypre_MPI_REAL, this_pe, 555, - comm, &mat->recv_req[mat->num_recv]); + hcomm, &mat->recv_req[mat->num_recv]); hypre_MPI_Send_init(&mat->recvbuf[i+num_local], j-i, hypre_MPI_REAL, this_pe, 666, - comm, &mat->send_req2[mat->num_recv]); + hcomm, &mat->send_req2[mat->num_recv]); mat->num_recv++; } @@ -645,6 +652,7 @@ static void SetupSends(Matrix *mat, HYPRE_Int *inlist) hypre_MPI_Request *requests; hypre_MPI_Status *statuses; MPI_Comm comm = mat->comm; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_rank(comm, &mype); hypre_MPI_Comm_size(comm, &npes); @@ -671,15 +679,15 @@ static void SetupSends(Matrix *mat, HYPRE_Int *inlist) if (inlist[i] != 0) { /* Post receive for the actual indices */ - hypre_MPI_Irecv(&mat->sendind[j], inlist[i], HYPRE_MPI_INT, i, 444, comm, + hypre_MPI_Irecv(&mat->sendind[j], inlist[i], HYPRE_MPI_INT, i, 444, hcomm, &requests[mat->num_send]); /* Set up the send */ - hypre_MPI_Send_init(&mat->sendbuf[j], inlist[i], hypre_MPI_REAL, i, 555, comm, + hypre_MPI_Send_init(&mat->sendbuf[j], inlist[i], hypre_MPI_REAL, i, 555, hcomm, &mat->send_req[mat->num_send]); /* Set up the receive for the transpose */ - hypre_MPI_Recv_init(&mat->sendbuf[j], inlist[i], hypre_MPI_REAL, i, 666, comm, + hypre_MPI_Recv_init(&mat->sendbuf[j], inlist[i], hypre_MPI_REAL, i, 666, hcomm, &mat->recv_req2[mat->num_send]); mat->num_send++; @@ -711,6 +719,7 @@ void MatrixComplete(Matrix *mat) hypre_MPI_Comm_rank(mat->comm, &mype); hypre_MPI_Comm_size(mat->comm, &npes); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(mat->comm); mat->recv_req = hypre_TAlloc(hypre_MPI_Request, npes , HYPRE_MEMORY_HOST); mat->send_req = hypre_TAlloc(hypre_MPI_Request, npes , HYPRE_MEMORY_HOST); @@ -727,7 +736,7 @@ void MatrixComplete(Matrix *mat) SetupReceives(mat, mat->numb->num_ind - mat->numb->num_loc, &mat->numb->local_to_global[mat->numb->num_loc], outlist); - hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, mat->comm); + hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, hcomm); SetupSends(mat, inlist); diff --git a/src/distributed_ls/ParaSails/ParaSails.c b/src/distributed_ls/ParaSails/ParaSails.c index e09236d417..808852e89f 100644 --- a/src/distributed_ls/ParaSails/ParaSails.c +++ b/src/distributed_ls/ParaSails/ParaSails.c @@ -63,10 +63,11 @@ HYPRE_Int FindNumReplies(MPI_Comm comm, HYPRE_Int *replies_list) hypre_MPI_Comm_rank(comm, &mype); hypre_MPI_Comm_size(comm, &npes); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); replies_list2 = hypre_TAlloc(HYPRE_Int, npes , HYPRE_MEMORY_HOST); - hypre_MPI_Allreduce(replies_list, replies_list2, npes, HYPRE_MPI_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(replies_list, replies_list2, npes, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); num_replies = replies_list2[mype]; hypre_TFree(replies_list2,HYPRE_MEMORY_HOST); @@ -98,6 +99,7 @@ static void SendRequests(MPI_Comm comm, HYPRE_Int tag, Matrix *mat, HYPRE_Int re { hypre_MPI_Request request; HYPRE_Int i, j, this_pe; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_shell_sort(reqlen, reqind); @@ -119,7 +121,7 @@ static void SendRequests(MPI_Comm comm, HYPRE_Int tag, Matrix *mat, HYPRE_Int re /* Request rows in reqind[i..j-1] */ hypre_MPI_Isend(&reqind[i], j-i, HYPRE_MPI_INT, this_pe, tag, - comm, &request); + hcomm, &request); hypre_MPI_Request_free(&request); (*num_requests)++; @@ -148,9 +150,10 @@ static void SendRequests(MPI_Comm comm, HYPRE_Int tag, Matrix *mat, HYPRE_Int re static void ReceiveRequest(MPI_Comm comm, HYPRE_Int *source, HYPRE_Int tag, HYPRE_Int **buffer, HYPRE_Int *buflen, HYPRE_Int *count) { + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Status status; - hypre_MPI_Probe(hypre_MPI_ANY_SOURCE, tag, comm, &status); + hypre_MPI_Probe(hypre_MPI_ANY_SOURCE, tag, hcomm, &status); *source = status.hypre_MPI_SOURCE; hypre_MPI_Get_count(&status, HYPRE_MPI_INT, count); @@ -161,7 +164,7 @@ static void ReceiveRequest(MPI_Comm comm, HYPRE_Int *source, HYPRE_Int tag, HYPR *buffer = hypre_TAlloc(HYPRE_Int, *buflen , HYPRE_MEMORY_HOST); } - hypre_MPI_Recv(*buffer, *count, HYPRE_MPI_INT, *source, tag, comm, &status); + hypre_MPI_Recv(*buffer, *count, HYPRE_MPI_INT, *source, tag, hcomm, &status); } /*-------------------------------------------------------------------------- @@ -190,6 +193,7 @@ static void SendReplyPrunedRows(MPI_Comm comm, Numbering *numb, HYPRE_Int sendbacksize, j; HYPRE_Int len, *ind, *indbuf, *indbufp; HYPRE_Int temp; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Determine the size of the integer message we need to send back */ sendbacksize = count+1; /* length of header part */ @@ -226,7 +230,7 @@ static void SendReplyPrunedRows(MPI_Comm comm, Numbering *numb, } hypre_MPI_Isend(indbuf, indbufp-indbuf, HYPRE_MPI_INT, dest, ROW_REPI_TAG, - comm, request); + hcomm, request); } /*-------------------------------------------------------------------------- @@ -245,15 +249,16 @@ static void ReceiveReplyPrunedRows(MPI_Comm comm, Numbering *numb, hypre_MPI_Status status; HYPRE_Int source, count; HYPRE_Int len, *ind, num_rows, *row_nums, j; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Don't know the size of reply, so use probe and get count */ - hypre_MPI_Probe(hypre_MPI_ANY_SOURCE, ROW_REPI_TAG, comm, &status); + hypre_MPI_Probe(hypre_MPI_ANY_SOURCE, ROW_REPI_TAG, hcomm, &status); source = status.hypre_MPI_SOURCE; hypre_MPI_Get_count(&status, HYPRE_MPI_INT, &count); /* Allocate space in stored rows data structure */ ind = PrunedRowsAlloc(pruned_rows, count); - hypre_MPI_Recv(ind, count, HYPRE_MPI_INT, source, ROW_REPI_TAG, comm, &status); + hypre_MPI_Recv(ind, count, HYPRE_MPI_INT, source, ROW_REPI_TAG, hcomm, &status); /* Parse the message */ num_rows = *ind++; /* number of rows */ @@ -304,6 +309,7 @@ static void SendReplyStoredRows(MPI_Comm comm, Numbering *numb, HYPRE_Int len, *ind, *indbuf, *indbufp; HYPRE_Real *val, *valbuf, *valbufp; HYPRE_Int temp; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Determine the size of the integer message we need to send back */ sendbacksize = count+1; /* length of header part */ @@ -345,12 +351,12 @@ static void SendReplyStoredRows(MPI_Comm comm, Numbering *numb, } hypre_MPI_Isend(indbuf, indbufp-indbuf, HYPRE_MPI_INT, dest, ROW_REPI_TAG, - comm, request); + hcomm, request); hypre_MPI_Request_free(request); hypre_MPI_Isend(valbuf, valbufp-valbuf, hypre_MPI_REAL, dest, ROW_REPV_TAG, - comm, request); + hcomm, request); } /*-------------------------------------------------------------------------- @@ -368,17 +374,18 @@ static void ReceiveReplyStoredRows(MPI_Comm comm, Numbering *numb, HYPRE_Int source, count; HYPRE_Int len, *ind, num_rows, *row_nums, j; HYPRE_Real *val; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Don't know the size of reply, so use probe and get count */ - hypre_MPI_Probe(hypre_MPI_ANY_SOURCE, ROW_REPI_TAG, comm, &status); + hypre_MPI_Probe(hypre_MPI_ANY_SOURCE, ROW_REPI_TAG, hcomm, &status); source = status.hypre_MPI_SOURCE; hypre_MPI_Get_count(&status, HYPRE_MPI_INT, &count); /* Allocate space in stored rows data structure */ ind = StoredRowsAllocInd(stored_rows, count); - hypre_MPI_Recv(ind, count, HYPRE_MPI_INT, source, ROW_REPI_TAG, comm, &status); + hypre_MPI_Recv(ind, count, HYPRE_MPI_INT, source, ROW_REPI_TAG, hcomm, &status); val = StoredRowsAllocVal(stored_rows, count); - hypre_MPI_Recv(val, count, hypre_MPI_REAL, source, ROW_REPV_TAG, comm, &status); + hypre_MPI_Recv(val, count, hypre_MPI_REAL, source, ROW_REPV_TAG, hcomm, &status); /* Parse the message */ num_rows = *ind++; /* number of rows */ @@ -1407,6 +1414,7 @@ static HYPRE_Real SelectThresh(MPI_Comm comm, Matrix *A, DiagScale *diag_scale, HYPRE_Real *val; HYPRE_Real localsum = 0.0, sum; HYPRE_Real temp; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Buffer for storing the values in each row when computing the i-th smallest element - buffer will grow if necessary */ @@ -1442,7 +1450,7 @@ static HYPRE_Real SelectThresh(MPI_Comm comm, Matrix *A, DiagScale *diag_scale, } /* Find the average across all processors */ - hypre_MPI_Allreduce(&localsum, &sum, 1, hypre_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&localsum, &sum, 1, hypre_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_MPI_Comm_size(comm, &npes); hypre_TFree(buffer,HYPRE_MEMORY_HOST); @@ -1461,6 +1469,7 @@ static HYPRE_Real SelectFilter(MPI_Comm comm, Matrix *M, DiagScale *diag_scale, HYPRE_Real *val; HYPRE_Real localsum = 0.0, sum; HYPRE_Real temp = 1.0; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Buffer for storing the values in each row when computing the i-th smallest element - buffer will grow if necessary */ @@ -1498,7 +1507,7 @@ static HYPRE_Real SelectFilter(MPI_Comm comm, Matrix *M, DiagScale *diag_scale, } /* Find the average across all processors */ - hypre_MPI_Allreduce(&localsum, &sum, 1, hypre_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&localsum, &sum, 1, hypre_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_MPI_Comm_size(comm, &npes); hypre_TFree(buffer,HYPRE_MEMORY_HOST); @@ -1643,12 +1652,13 @@ ParaSails *ParaSailsCreate(MPI_Comm comm, HYPRE_Int beg_row, HYPRE_Int end_row, ps->end_row = end_row; hypre_MPI_Comm_size(comm, &npes); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); ps->beg_rows = hypre_TAlloc(HYPRE_Int, npes , HYPRE_MEMORY_HOST); ps->end_rows = hypre_TAlloc(HYPRE_Int, npes , HYPRE_MEMORY_HOST); - hypre_MPI_Allgather(&beg_row, 1, HYPRE_MPI_INT, ps->beg_rows, 1, HYPRE_MPI_INT, comm); - hypre_MPI_Allgather(&end_row, 1, HYPRE_MPI_INT, ps->end_rows, 1, HYPRE_MPI_INT, comm); + hypre_MPI_Allgather(&beg_row, 1, HYPRE_MPI_INT, ps->beg_rows, 1, HYPRE_MPI_INT, hcomm); + hypre_MPI_Allgather(&end_row, 1, HYPRE_MPI_INT, ps->end_rows, 1, HYPRE_MPI_INT, hcomm); return ps; } @@ -1782,6 +1792,7 @@ HYPRE_Int ParaSailsSetupValues(ParaSails *ps, Matrix *A, HYPRE_Real filter) HYPRE_Int i; HYPRE_Real time0, time1; MPI_Comm comm = ps->comm; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_Int error = 0, error_sum; time0 = hypre_MPI_Wtime(); @@ -1846,7 +1857,7 @@ HYPRE_Int ParaSailsSetupValues(ParaSails *ps, Matrix *A, HYPRE_Real filter) LoadBalReturn(load_bal, ps->comm, ps->M); /* check if there was an error in computing the approximate inverse */ - hypre_MPI_Allreduce(&error, &error_sum, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&error, &error_sum, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); if (error_sum != 0) { hypre_printf("Hypre-ParaSails detected a problem. The input matrix\n"); @@ -1979,6 +1990,7 @@ HYPRE_Real ParaSailsStatsPattern(ParaSails *ps, Matrix *A) HYPRE_Int n, nnzm, nnza; MPI_Comm comm = ps->comm; HYPRE_Real max_pattern_time, max_cost, ave_cost; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_rank(comm, &mype); hypre_MPI_Comm_size(comm, &npes); @@ -1992,9 +2004,9 @@ HYPRE_Real ParaSailsStatsPattern(ParaSails *ps, Matrix *A) } hypre_MPI_Allreduce(&ps->setup_pattern_time, &max_pattern_time, - 1, hypre_MPI_REAL, hypre_MPI_MAX, comm); - hypre_MPI_Allreduce(&ps->cost, &max_cost, 1, hypre_MPI_REAL, hypre_MPI_MAX, comm); - hypre_MPI_Allreduce(&ps->cost, &ave_cost, 1, hypre_MPI_REAL, hypre_MPI_SUM, comm); + 1, hypre_MPI_REAL, hypre_MPI_MAX, hcomm); + hypre_MPI_Allreduce(&ps->cost, &max_cost, 1, hypre_MPI_REAL, hypre_MPI_MAX, hcomm); + hypre_MPI_Allreduce(&ps->cost, &ave_cost, 1, hypre_MPI_REAL, hypre_MPI_SUM, hcomm); ave_cost = ave_cost / (HYPRE_Real) npes; if (mype) @@ -2031,6 +2043,7 @@ void ParaSailsStatsValues(ParaSails *ps, Matrix *A) hypre_MPI_Comm_rank(comm, &mype); hypre_MPI_Comm_size(comm, &npes); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); nnzm = MatrixNnz(ps->M); nnza = MatrixNnz(A); @@ -2041,13 +2054,13 @@ void ParaSailsStatsValues(ParaSails *ps, Matrix *A) } hypre_MPI_Allreduce(&ps->setup_values_time, &max_values_time, - 1, hypre_MPI_REAL, hypre_MPI_MAX, comm); + 1, hypre_MPI_REAL, hypre_MPI_MAX, hcomm); if (!mype) setup_times = hypre_TAlloc(HYPRE_Real, npes , HYPRE_MEMORY_HOST); temp = ps->setup_pattern_time + ps->setup_values_time; - hypre_MPI_Gather(&temp, 1, hypre_MPI_REAL, setup_times, 1, hypre_MPI_REAL, 0, comm); + hypre_MPI_Gather(&temp, 1, hypre_MPI_REAL, setup_times, 1, hypre_MPI_REAL, 0, hcomm); if (mype) return; diff --git a/src/distributed_ls/pilut/HYPRE_DistributedMatrixPilutSolver.c b/src/distributed_ls/pilut/HYPRE_DistributedMatrixPilutSolver.c index a8e415b0a8..d063168f4e 100644 --- a/src/distributed_ls/pilut/HYPRE_DistributedMatrixPilutSolver.c +++ b/src/distributed_ls/pilut/HYPRE_DistributedMatrixPilutSolver.c @@ -368,8 +368,9 @@ HYPRE_Int HYPRE_DistributedMatrixPilutSolverSetup( HYPRE_DistributedMatrixPilutS rowdist = DataDistTypeRowdist( hypre_DistributedMatrixPilutSolverDataDist( solver ) ); - hypre_MPI_Allgather( &start, 1, HYPRE_MPI_INT, rowdist, 1, HYPRE_MPI_INT, - hypre_DistributedMatrixPilutSolverComm(solver) ); + MPI_Comm comm = hypre_DistributedMatrixPilutSolverComm(solver); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + hypre_MPI_Allgather( &start, 1, HYPRE_MPI_INT, rowdist, 1, HYPRE_MPI_INT, hcomm ); rowdist[ nprocs ] = n; diff --git a/src/distributed_ls/pilut/comm.c b/src/distributed_ls/pilut/comm.c index cca3302ccb..136c03f70b 100644 --- a/src/distributed_ls/pilut/comm.c +++ b/src/distributed_ls/pilut/comm.c @@ -34,7 +34,8 @@ HYPRE_Int hypre_GlobalSEMax(HYPRE_Int value, MPI_Comm hypre_MPI_Context ) { HYPRE_Int max; - hypre_MPI_Allreduce( &value, &max, 1, HYPRE_MPI_INT, hypre_MPI_MAX, hypre_MPI_Context ); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_Context); + hypre_MPI_Allreduce( &value, &max, 1, HYPRE_MPI_INT, hypre_MPI_MAX, hcomm ); return max; } @@ -46,7 +47,8 @@ HYPRE_Int hypre_GlobalSEMax(HYPRE_Int value, MPI_Comm hypre_MPI_Context ) HYPRE_Int hypre_GlobalSEMin(HYPRE_Int value, MPI_Comm hypre_MPI_Context) { HYPRE_Int min; - hypre_MPI_Allreduce( &value, &min, 1, HYPRE_MPI_INT, hypre_MPI_MIN, hypre_MPI_Context ); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_Context); + hypre_MPI_Allreduce( &value, &min, 1, HYPRE_MPI_INT, hypre_MPI_MIN, hcomm ); return min; } @@ -57,8 +59,8 @@ HYPRE_Int hypre_GlobalSEMin(HYPRE_Int value, MPI_Comm hypre_MPI_Context) HYPRE_Int hypre_GlobalSESum(HYPRE_Int value, MPI_Comm hypre_MPI_Context) { HYPRE_Int sum; - - hypre_MPI_Allreduce( &value, &sum, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hypre_MPI_Context ); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_Context); + hypre_MPI_Allreduce( &value, &sum, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm ); return sum; } @@ -69,7 +71,8 @@ HYPRE_Int hypre_GlobalSESum(HYPRE_Int value, MPI_Comm hypre_MPI_Context) HYPRE_Real hypre_GlobalSEMaxDouble(HYPRE_Real value, MPI_Comm hypre_MPI_Context) { HYPRE_Real max; - hypre_MPI_Allreduce( &value, &max, 1, hypre_MPI_REAL, hypre_MPI_MAX, hypre_MPI_Context ); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_Context); + hypre_MPI_Allreduce( &value, &max, 1, hypre_MPI_REAL, hypre_MPI_MAX, hcomm ); return max; } @@ -80,7 +83,8 @@ HYPRE_Real hypre_GlobalSEMaxDouble(HYPRE_Real value, MPI_Comm hypre_MPI_Context) HYPRE_Real hypre_GlobalSEMinDouble(HYPRE_Real value, MPI_Comm hypre_MPI_Context) { HYPRE_Real min; - hypre_MPI_Allreduce( &value, &min, 1, hypre_MPI_REAL, hypre_MPI_MIN, hypre_MPI_Context ); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_Context); + hypre_MPI_Allreduce( &value, &min, 1, hypre_MPI_REAL, hypre_MPI_MIN, hcomm ); return min; } @@ -91,7 +95,8 @@ HYPRE_Real hypre_GlobalSEMinDouble(HYPRE_Real value, MPI_Comm hypre_MPI_Context) HYPRE_Real hypre_GlobalSESumDouble(HYPRE_Real value, MPI_Comm hypre_MPI_Context) { HYPRE_Real sum; - hypre_MPI_Allreduce( &value, &sum, 1, hypre_MPI_REAL, hypre_MPI_SUM, hypre_MPI_Context ); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_Context); + hypre_MPI_Allreduce( &value, &sum, 1, hypre_MPI_REAL, hypre_MPI_SUM, hcomm ); return sum; } diff --git a/src/distributed_ls/pilut/parilut.c b/src/distributed_ls/pilut/parilut.c index 2f2af49dba..34648df988 100644 --- a/src/distributed_ls/pilut/parilut.c +++ b/src/distributed_ls/pilut/parilut.c @@ -196,6 +196,7 @@ void hypre_ComputeCommInfo(ReduceMatType *rmat, CommInfoType *cinfo, HYPRE_Int * HYPRE_Int *rrowind, *rnbrptr, *rnbrind, *srowind, *snbrind, *snbrptr; hypre_MPI_Status Status ; hypre_MPI_Request *index_requests; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(pilut_comm); #ifdef HYPRE_DEBUG hypre_PrintLine("hypre_ComputeCommInfo", globals); @@ -276,7 +277,7 @@ void hypre_ComputeCommInfo(ReduceMatType *rmat, CommInfoType *cinfo, HYPRE_Int * pilu_send[rnbrind[i]] = rnbrptr[i+1]-rnbrptr[i]; /* The # of rows I need */ hypre_MPI_Alltoall( pilu_send, 1, HYPRE_MPI_INT, - pilu_recv, 1, HYPRE_MPI_INT, pilut_comm ); + pilu_recv, 1, HYPRE_MPI_INT, hcomm ); nsend = 0; snnbr = 0; @@ -308,12 +309,12 @@ void hypre_ComputeCommInfo(ReduceMatType *rmat, CommInfoType *cinfo, HYPRE_Int * /* issue asynchronous recieves */ for (i=0; i 0 ) { /* Something to recv */ hypre_MPI_Irecv( raddr[i]+rdone[i], rnum[i], hypre_MPI_REAL, - rpes[i], TAG, pilut_comm, &receive_requests[i] ); + rpes[i], TAG, hcomm, &receive_requests[i] ); rdone[i] += rnum[i] ; } @@ -132,7 +133,7 @@ void hypre_LDUSolve(DataDistType *ddist, FactorMatType *ldu, HYPRE_Real *x, HYPR gatherbuf[l] = lx[sindex[j]]; hypre_MPI_Send( gatherbuf, l, hypre_MPI_REAL, - spes[i], TAG, pilut_comm ); + spes[i], TAG, hcomm ); auxsptr[i] = j; } @@ -205,7 +206,7 @@ void hypre_LDUSolve(DataDistType *ddist, FactorMatType *ldu, HYPRE_Real *x, HYPR for (i=0; i 0 ) { /* Something to recv */ hypre_MPI_Irecv( raddr[i]+rdone[i], rnum[i], hypre_MPI_REAL, - rpes[i], TAG, pilut_comm, &receive_requests[ i ] ); + rpes[i], TAG, hcomm, &receive_requests[ i ] ); rdone[i] += rnum[i] ; } @@ -218,7 +219,7 @@ void hypre_LDUSolve(DataDistType *ddist, FactorMatType *ldu, HYPRE_Real *x, HYPR gatherbuf[l] = ux[sindex[j]]; hypre_MPI_Send( gatherbuf, l, hypre_MPI_REAL, - spes[i], TAG, pilut_comm ); + spes[i], TAG, hcomm ); auxsptr[i] = j; } @@ -345,6 +346,7 @@ void hypre_SetUpFactor(DataDistType *ddist, FactorMatType *ldu, HYPRE_Int maxnz, hypre_MPI_Status Status; hypre_MPI_Request *receive_requests; hypre_MPI_Datatype MyColType_rnbr; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(pilut_comm); /* data common to L and U */ lnrows = ddist->ddist_lnrows; @@ -407,7 +409,7 @@ void hypre_SetUpFactor(DataDistType *ddist, FactorMatType *ldu, HYPRE_Int maxnz, TriSolveComm->rnbrpes = rnbrpes ; hypre_MPI_Alltoall( petotal, 1, HYPRE_MPI_INT, - lu_recv, 1, HYPRE_MPI_INT, pilut_comm ); + lu_recv, 1, HYPRE_MPI_INT, hcomm ); /* Determine to how many processors you will be sending data */ snbrpes = 0; @@ -459,7 +461,7 @@ void hypre_SetUpFactor(DataDistType *ddist, FactorMatType *ldu, HYPRE_Int maxnz, /* Start asynchronous receives */ for (i=0; i 0) { hypre_MPI_Send( rind+k, petotal[i], HYPRE_MPI_INT , - i, TAG_SetUp_rind, pilut_comm ); + i, TAG_SetUp_rind, hcomm ); /* recv info for hypre_LDUSolve */ raddr[rnbrpes] = x + k + lnrows; @@ -518,7 +520,7 @@ void hypre_SetUpFactor(DataDistType *ddist, FactorMatType *ldu, HYPRE_Int maxnz, for (i=0; i 0) { hypre_MPI_Irecv( rind+k, petotal[i], HYPRE_MPI_INT, - i, TAG_SetUp_reord, pilut_comm, &receive_requests[i] ); + i, TAG_SetUp_reord, hcomm, &receive_requests[i] ); k += petotal[i]; } } @@ -526,7 +528,7 @@ void hypre_SetUpFactor(DataDistType *ddist, FactorMatType *ldu, HYPRE_Int maxnz, /* Write them back to the processors that send them to me */ for (i=0; irnum+i, 1, MyColType_rnbr, - rpes[i], TAG_SetUp_rnum, pilut_comm, &Status ); + rpes[i], TAG_SetUp_rnum, hcomm, &Status ); } hypre_MPI_Type_free( &MyColType_rnbr ); diff --git a/src/parcsr_block_mv/par_csr_block_comm.c b/src/parcsr_block_mv/par_csr_block_comm.c index af5c08f791..27954dc253 100644 --- a/src/parcsr_block_mv/par_csr_block_comm.c +++ b/src/parcsr_block_mv/par_csr_block_comm.c @@ -46,6 +46,7 @@ hypre_ParCSRBlockCommHandleCreate(HYPRE_Int job, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); j = 0; @@ -60,7 +61,7 @@ hypre_ParCSRBlockCommHandleCreate(HYPRE_Int job, vec_len = (hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start) * bnnz; hypre_MPI_Irecv(&d_recv_data[vec_start * bnnz], vec_len, - HYPRE_MPI_COMPLEX, ip, 0, comm, &requests[j++]); + HYPRE_MPI_COMPLEX, ip, 0, hcomm, &requests[j++]); } for (i = 0; i < num_sends; i++) { @@ -69,7 +70,7 @@ hypre_ParCSRBlockCommHandleCreate(HYPRE_Int job, (hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start) * bnnz; ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); hypre_MPI_Isend(&d_send_data[vec_start * bnnz], vec_len, - HYPRE_MPI_COMPLEX, ip, 0, comm, &requests[j++]); + HYPRE_MPI_COMPLEX, ip, 0, hcomm, &requests[j++]); } break; } @@ -83,7 +84,7 @@ hypre_ParCSRBlockCommHandleCreate(HYPRE_Int job, (hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start) * bnnz; ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); hypre_MPI_Irecv(&d_recv_data[vec_start * bnnz], vec_len, - HYPRE_MPI_COMPLEX, ip, 0, comm, &requests[j++]); + HYPRE_MPI_COMPLEX, ip, 0, hcomm, &requests[j++]); } for (i = 0; i < num_recvs; i++) { @@ -92,7 +93,7 @@ hypre_ParCSRBlockCommHandleCreate(HYPRE_Int job, vec_len = (hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start) * bnnz; hypre_MPI_Isend(&d_send_data[vec_start * bnnz], vec_len, - HYPRE_MPI_COMPLEX, ip, 0, comm, &requests[j++]); + HYPRE_MPI_COMPLEX, ip, 0, hcomm, &requests[j++]); } break; } diff --git a/src/parcsr_block_mv/par_csr_block_interp.c b/src/parcsr_block_mv/par_csr_block_interp.c index 8c2f859df5..8080740319 100644 --- a/src/parcsr_block_mv/par_csr_block_interp.c +++ b/src/parcsr_block_mv/par_csr_block_interp.c @@ -141,12 +141,13 @@ hypre_BoomerAMGBuildBlockInterp( hypre_ParCSRBlockMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* num_threads = hypre_NumThreads(); */ num_threads = 1; my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -1710,12 +1711,13 @@ hypre_BoomerAMGBuildBlockInterpDiag( hypre_ParCSRBlockMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -2833,11 +2835,12 @@ hypre_BoomerAMGBuildBlockInterpRV( hypre_ParCSRBlockMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -3885,11 +3888,12 @@ hypre_BoomerAMGBuildBlockInterpRV2( hypre_ParCSRBlockMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -4899,11 +4903,12 @@ hypre_BoomerAMGBuildBlockDirInterp( hypre_ParCSRBlockMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns diff --git a/src/parcsr_block_mv/par_csr_block_matrix.c b/src/parcsr_block_mv/par_csr_block_matrix.c index 1b1549254f..2f38caaf9f 100644 --- a/src/parcsr_block_mv/par_csr_block_matrix.c +++ b/src/parcsr_block_mv/par_csr_block_matrix.c @@ -163,6 +163,7 @@ HYPRE_Int hypre_ParCSRBlockMatrixSetNumNonzeros( hypre_ParCSRBlockMatrix *matrix) { MPI_Comm comm = hypre_ParCSRBlockMatrixComm(matrix); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(matrix); HYPRE_Int *diag_i = hypre_CSRBlockMatrixI(diag); hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(matrix); @@ -174,7 +175,7 @@ hypre_ParCSRBlockMatrixSetNumNonzeros( hypre_ParCSRBlockMatrix *matrix) local_num_nonzeros = (HYPRE_BigInt)(diag_i[local_num_rows] + offd_i[local_num_rows]); hypre_MPI_Allreduce(&local_num_nonzeros, &total_num_nonzeros, 1, HYPRE_MPI_BIG_INT, - hypre_MPI_SUM, comm); + hypre_MPI_SUM, hcomm); hypre_ParCSRBlockMatrixNumNonzeros(matrix) = total_num_nonzeros; return ierr; @@ -188,6 +189,7 @@ HYPRE_Int hypre_ParCSRBlockMatrixSetDNumNonzeros( hypre_ParCSRBlockMatrix *matrix) { MPI_Comm comm = hypre_ParCSRBlockMatrixComm(matrix); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(matrix); HYPRE_Int *diag_i = hypre_CSRBlockMatrixI(diag); hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(matrix); @@ -199,7 +201,7 @@ hypre_ParCSRBlockMatrixSetDNumNonzeros( hypre_ParCSRBlockMatrix *matrix) local_num_nonzeros = (HYPRE_Real) diag_i[local_num_rows] + (HYPRE_Real) offd_i[local_num_rows]; hypre_MPI_Allreduce(&local_num_nonzeros, &total_num_nonzeros, 1, - HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_ParCSRBlockMatrixDNumNonzeros(matrix) = total_num_nonzeros; return ierr; diff --git a/src/parcsr_block_mv/par_csr_block_rap_communication.c b/src/parcsr_block_mv/par_csr_block_rap_communication.c index 5c6ef6886c..317f249003 100644 --- a/src/parcsr_block_mv/par_csr_block_rap_communication.c +++ b/src/parcsr_block_mv/par_csr_block_rap_communication.c @@ -57,6 +57,7 @@ hypre_GetCommPkgBlockRTFromCommPkgBlockA( hypre_ParCSRBlockMatrix *RT, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*-------------------------------------------------------------------------- * determine num_recvs, recv_procs and recv_vec_starts for RT @@ -120,11 +121,11 @@ hypre_GetCommPkgBlockRTFromCommPkgBlockA( hypre_ParCSRBlockMatrix *RT, j = 0; for (i = 0; i < num_sends_A; i++) - hypre_MPI_Irecv(&change_array[i], 1, HYPRE_MPI_INT, send_procs_A[i], 0, comm, + hypre_MPI_Irecv(&change_array[i], 1, HYPRE_MPI_INT, send_procs_A[i], 0, hcomm, &requests[j++]); for (i = 0; i < num_recvs_A; i++) - hypre_MPI_Isend(&proc_mark[i], 1, HYPRE_MPI_INT, recv_procs_A[i], 0, comm, + hypre_MPI_Isend(&proc_mark[i], 1, HYPRE_MPI_INT, recv_procs_A[i], 0, hcomm, &requests[j++]); hypre_MPI_Waitall(num_requests, requests, status); @@ -168,7 +169,7 @@ hypre_GetCommPkgBlockRTFromCommPkgBlockA( hypre_ParCSRBlockMatrix *RT, vec_start = send_map_starts_RT[i]; vec_len = send_map_starts_RT[i + 1] - vec_start; hypre_MPI_Irecv(&send_big_elmts[vec_start], vec_len, HYPRE_MPI_BIG_INT, - send_procs_RT[i], 0, comm, &requests[j++]); + send_procs_RT[i], 0, hcomm, &requests[j++]); } for (i = 0; i < num_recvs_RT; i++) @@ -176,7 +177,7 @@ hypre_GetCommPkgBlockRTFromCommPkgBlockA( hypre_ParCSRBlockMatrix *RT, vec_start = recv_vec_starts_RT[i]; vec_len = recv_vec_starts_RT[i + 1] - vec_start; hypre_MPI_Isend(&col_map_offd_RT[vec_start], vec_len, HYPRE_MPI_BIG_INT, - recv_procs_RT[i], 0, comm, &requests[j++]); + recv_procs_RT[i], 0, hcomm, &requests[j++]); } hypre_MPI_Waitall(j, requests, status); diff --git a/src/parcsr_ls/amg_hybrid.c b/src/parcsr_ls/amg_hybrid.c index 11f79ad62b..fc2fb51bb4 100644 --- a/src/parcsr_ls/amg_hybrid.c +++ b/src/parcsr_ls/amg_hybrid.c @@ -1581,8 +1581,9 @@ hypre_AMGHybridGetSetupSolveTime( void *AMGhybrid_vdata, t[3] = AMGhybrid_data->solve_time2; MPI_Comm comm = AMGhybrid_data->comm; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); - hypre_MPI_Allreduce(t, time, 4, hypre_MPI_REAL, hypre_MPI_MAX, comm); + hypre_MPI_Allreduce(t, time, 4, hypre_MPI_REAL, hypre_MPI_MAX, hcomm); return hypre_error_flag; } diff --git a/src/parcsr_ls/ams.c b/src/parcsr_ls/ams.c index 2a8947eb15..1344392155 100644 --- a/src/parcsr_ls/ams.c +++ b/src/parcsr_ls/ams.c @@ -2927,6 +2927,9 @@ HYPRE_Int hypre_AMSSetup(void *solver, ams_data -> A = A; + MPI_Comm comm = hypre_ParCSRMatrixComm(A); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + /* Modifications for problems with zero-conductivity regions */ if (ams_data -> interior_nodes) { @@ -3111,7 +3114,7 @@ HYPRE_Int hypre_AMSSetup(void *solver, } lfactor *= 1e-10; /* scaling factor: max|A_ij|*1e-10 */ - hypre_MPI_Allreduce(&lfactor, &factor, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_ParCSRMatrixComm(A)); + hypre_MPI_Allreduce(&lfactor, &factor, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hcomm); } hypre_ParCSRMatrixAdd(factor, A, 1.0, B, &C); @@ -4342,14 +4345,15 @@ HYPRE_Int hypre_AMSFEISetup(void *solver, HYPRE_Real *x_data, *y_data, *z_data; MPI_Comm comm = hypre_ParCSRMatrixComm(A); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_BigInt vert_part[2], num_global_vert; HYPRE_BigInt vert_start, vert_end; HYPRE_BigInt big_local_vert = (HYPRE_BigInt) num_local_vert; /* Find the processor partitioning of the vertices */ - hypre_MPI_Scan(&big_local_vert, &vert_part[1], 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_local_vert, &vert_part[1], 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); vert_part[0] = vert_part[1] - big_local_vert; - hypre_MPI_Allreduce(&big_local_vert, &num_global_vert, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&big_local_vert, &num_global_vert, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); /* Construct hypre parallel vectors for the vertex coordinates */ x_coord = hypre_ParVectorCreate(comm, num_global_vert, vert_part); diff --git a/src/parcsr_ls/gen_redcs_mat.c b/src/parcsr_ls/gen_redcs_mat.c index e443e9ea1a..6157ee20aa 100644 --- a/src/parcsr_ls/gen_redcs_mat.c +++ b/src/parcsr_ls/gen_redcs_mat.c @@ -90,7 +90,7 @@ hypre_seqAMGSetup( hypre_ParAMGData *amg_data, HYPRE_BigInt row_starts[2]; hypre_GenerateSubComm(comm, num_rows, &new_comm); - + hypre_MPI_Comm hnew_comm = hypre_MPI_CommFromMPI_Comm(new_comm); /*hypre_MPI_Group orig_group, new_group; HYPRE_Int *ranks, new_num_procs, *row_starts; @@ -124,11 +124,11 @@ hypre_seqAMGSetup( hypre_ParAMGData *amg_data, if (redundant) { - hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm); + hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hnew_comm); } else { - hypre_MPI_Gather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, 0, new_comm); + hypre_MPI_Gather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, 0, hnew_comm); } /* alloc space in seq data structure only for participating procs*/ @@ -210,11 +210,11 @@ hypre_seqAMGSetup( hypre_ParAMGData *amg_data, if (redundant) { hypre_MPI_Allgatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, - displs, HYPRE_MPI_INT, new_comm ); + displs, HYPRE_MPI_INT, hnew_comm ); if (num_functions > 1) { hypre_MPI_Allgatherv ( hypre_IntArrayData(dof_func_array[level]), num_rows, HYPRE_MPI_INT, - seq_dof_func, info, displs, HYPRE_MPI_INT, new_comm ); + seq_dof_func, info, displs, HYPRE_MPI_INT, hnew_comm ); HYPRE_BoomerAMGSetDofFunc(coarse_solver, seq_dof_func); } } @@ -222,14 +222,14 @@ hypre_seqAMGSetup( hypre_ParAMGData *amg_data, { if (A_seq_i) hypre_MPI_Gatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, - displs, HYPRE_MPI_INT, 0, new_comm ); + displs, HYPRE_MPI_INT, 0, hnew_comm ); else hypre_MPI_Gatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, A_seq_i, info, - displs, HYPRE_MPI_INT, 0, new_comm ); + displs, HYPRE_MPI_INT, 0, hnew_comm ); if (num_functions > 1) { hypre_MPI_Gatherv ( hypre_IntArrayData(dof_func_array[level]), num_rows, HYPRE_MPI_INT, - seq_dof_func, info, displs, HYPRE_MPI_INT, 0, new_comm ); + seq_dof_func, info, displs, HYPRE_MPI_INT, 0, hnew_comm ); if (my_id == 0) { HYPRE_BoomerAMGSetDofFunc(coarse_solver, seq_dof_func); } } } @@ -267,21 +267,21 @@ hypre_seqAMGSetup( hypre_ParAMGData *amg_data, { hypre_MPI_Allgatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT, A_seq_j, info, displs2, - HYPRE_MPI_INT, new_comm ); + HYPRE_MPI_INT, hnew_comm ); hypre_MPI_Allgatherv ( A_tmp_data, num_nonzeros, HYPRE_MPI_REAL, A_seq_data, info, displs2, - HYPRE_MPI_REAL, new_comm ); + HYPRE_MPI_REAL, hnew_comm ); } else { hypre_MPI_Gatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT, A_seq_j, info, displs2, - HYPRE_MPI_INT, 0, new_comm ); + HYPRE_MPI_INT, 0, hnew_comm ); hypre_MPI_Gatherv ( A_tmp_data, num_nonzeros, HYPRE_MPI_REAL, A_seq_data, info, displs2, - HYPRE_MPI_REAL, 0, new_comm ); + HYPRE_MPI_REAL, 0, hnew_comm ); } hypre_TFree(info, HYPRE_MEMORY_HOST); @@ -358,6 +358,7 @@ hypre_seqAMGCycle( hypre_ParAMGData *amg_data, /* Acquire seq data */ MPI_Comm new_comm = hypre_ParAMGDataNewComm(amg_data); + hypre_MPI_Comm hnew_comm = hypre_MPI_CommFromMPI_Comm(new_comm); HYPRE_Solver coarse_solver = hypre_ParAMGDataCoarseSolver(amg_data); hypre_ParCSRMatrix *A_coarse = hypre_ParAMGDataACoarse(amg_data); hypre_ParVector *F_coarse = hypre_ParAMGDataFCoarse(amg_data); @@ -399,11 +400,11 @@ hypre_seqAMGCycle( hypre_ParAMGData *amg_data, local_info = nf; if (redundant) { - hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm); + hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hnew_comm); } else { - hypre_MPI_Gather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, 0, new_comm); + hypre_MPI_Gather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, 0, hnew_comm); } if (redundant || my_id == 0) @@ -425,11 +426,11 @@ hypre_seqAMGCycle( hypre_ParAMGData *amg_data, if (redundant) hypre_MPI_Allgatherv ( f_data, nf, HYPRE_MPI_REAL, recv_buf, info, displs, - HYPRE_MPI_REAL, new_comm ); + HYPRE_MPI_REAL, hnew_comm ); else hypre_MPI_Gatherv ( f_data, nf, HYPRE_MPI_REAL, recv_buf, info, displs, - HYPRE_MPI_REAL, 0, new_comm ); + HYPRE_MPI_REAL, 0, hnew_comm ); if (redundant || my_id == 0) { @@ -442,14 +443,14 @@ hypre_seqAMGCycle( hypre_ParAMGData *amg_data, { hypre_MPI_Allgatherv ( u_data, n, HYPRE_MPI_REAL, recv_buf, info, displs, - HYPRE_MPI_REAL, new_comm ); + HYPRE_MPI_REAL, hnew_comm ); hypre_TFree(displs, HYPRE_MEMORY_HOST); hypre_TFree(info, HYPRE_MEMORY_HOST); } else hypre_MPI_Gatherv ( u_data, n, HYPRE_MPI_REAL, recv_buf, info, displs, - HYPRE_MPI_REAL, 0, new_comm ); + HYPRE_MPI_REAL, 0, hnew_comm ); /* clean up */ if (redundant || my_id == 0) @@ -479,11 +480,11 @@ hypre_seqAMGCycle( hypre_ParAMGData *amg_data, } hypre_MPI_Scatterv ( local_data, info, displs, HYPRE_MPI_REAL, - u_data, n, HYPRE_MPI_REAL, 0, new_comm ); + u_data, n, HYPRE_MPI_REAL, 0, hnew_comm ); /*if (my_id == 0) local_data = hypre_VectorData(hypre_ParVectorLocalVector(F_coarse)); hypre_MPI_Scatterv ( local_data, info, displs, HYPRE_MPI_REAL, - f_data, n, HYPRE_MPI_REAL, 0, new_comm );*/ + f_data, n, HYPRE_MPI_REAL, 0, hnew_comm );*/ if (my_id == 0) { hypre_TFree(displs, HYPRE_MEMORY_HOST); } hypre_TFree(info, HYPRE_MEMORY_HOST); } @@ -510,6 +511,7 @@ hypre_GenerateSubComm(MPI_Comm comm, HYPRE_Int *list_len; hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (participate) { @@ -520,7 +522,7 @@ hypre_GenerateSubComm(MPI_Comm comm, my_info = 0; } - hypre_MPI_Allreduce(&my_info, &new_num_procs, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&my_info, &new_num_procs, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); if (new_num_procs == 0) { @@ -538,7 +540,7 @@ hypre_GenerateSubComm(MPI_Comm comm, { my_info = my_id; } - hypre_MPI_Allreduce(&my_info, &ranks[2], 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&my_info, &ranks[2], 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); } else { @@ -560,7 +562,7 @@ hypre_GenerateSubComm(MPI_Comm comm, hypre_MPI_Op_create((hypre_MPI_User_function *)hypre_merge_lists, 0, &hypre_MPI_MERGE); - hypre_MPI_Allreduce(info, ranks, list_len[0], HYPRE_MPI_INT, hypre_MPI_MERGE, comm); + hypre_MPI_Allreduce(info, ranks, list_len[0], HYPRE_MPI_INT, hypre_MPI_MERGE, hcomm); hypre_MPI_Op_free (&hypre_MPI_MERGE); diff --git a/src/parcsr_ls/par_2s_interp.c b/src/parcsr_ls/par_2s_interp.c index 1b3523da23..20d248d078 100644 --- a/src/parcsr_ls/par_2s_interp.c +++ b/src/parcsr_ls/par_2s_interp.c @@ -126,11 +126,12 @@ hypre_BoomerAMGBuildModPartialExtInterpHost( hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } if (my_id == (num_procs - 1)) { total_old_global_cpts = num_old_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); - hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); n_Cpts = num_cpts_global[1] - num_cpts_global[0]; n_old_Cpts = num_old_cpts_global[1] - num_old_cpts_global[0]; @@ -769,11 +770,12 @@ hypre_BoomerAMGBuildModPartialExtPEInterpHost( hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } if (my_id == (num_procs - 1)) { total_old_global_cpts = num_old_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); - hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); n_Cpts = num_cpts_global[1] - num_cpts_global[0]; n_old_Cpts = num_old_cpts_global[1] - num_old_cpts_global[0]; diff --git a/src/parcsr_ls/par_amg_setup.c b/src/parcsr_ls/par_amg_setup.c index 5480ec33fe..7da265c243 100644 --- a/src/parcsr_ls/par_amg_setup.c +++ b/src/parcsr_ls/par_amg_setup.c @@ -237,6 +237,7 @@ hypre_BoomerAMGSetup( void *amg_vdata, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*A_new = hypre_CSRMatrixDeleteZeros(hypre_ParCSRMatrixDiag(A), 1.e-16); hypre_CSRMatrixPrint(A_new, "Atestnew"); */ @@ -1641,7 +1642,7 @@ hypre_BoomerAMGSetup( void *amg_vdata, { coarse_size = coarse_pnts_global[1]; } - hypre_MPI_Bcast(&coarse_size, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&coarse_size, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /* if no coarse-grid, stop coarsening, and set the * coarsest solve to be a single sweep of default smoother or smoother set by user */ @@ -2094,7 +2095,7 @@ hypre_BoomerAMGSetup( void *amg_vdata, { coarse_size = coarse_pnts_global[1]; } - hypre_MPI_Bcast(&coarse_size, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&coarse_size, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } else /* no aggressive coarsening */ { diff --git a/src/parcsr_ls/par_amgdd_helpers.c b/src/parcsr_ls/par_amgdd_helpers.c index b3153cc7cd..ea3dd59b84 100644 --- a/src/parcsr_ls/par_amgdd_helpers.c +++ b/src/parcsr_ls/par_amgdd_helpers.c @@ -317,8 +317,9 @@ hypre_BoomerAMGDD_FindNeighborProcessors( hypre_ParCSRMatrix *A, hypre_AMGDDCommPkgNumRecvProcs(compGridCommPkg)[level], HYPRE_MEMORY_HOST); for (i = 0; i < hypre_AMGDDCommPkgNumRecvProcs(compGridCommPkg)[level]; i++) { + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Irecv(&(recv_sizes[i]), 1, HYPRE_MPI_INT, - hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 6, hypre_MPI_COMM_WORLD, + hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 6, hcomm, &(requests[request_cnt++])); } HYPRE_Int *send_sizes = hypre_CTAlloc(HYPRE_Int, @@ -332,8 +333,9 @@ hypre_BoomerAMGDD_FindNeighborProcessors( hypre_ParCSRMatrix *A, send_sizes[i]++; } } + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Isend(&(send_sizes[i]), 1, HYPRE_MPI_INT, - hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 6, hypre_MPI_COMM_WORLD, + hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 6, hcomm, &(requests[request_cnt++])); } @@ -356,8 +358,9 @@ hypre_BoomerAMGDD_FindNeighborProcessors( hypre_ParCSRMatrix *A, for (i = 0; i < hypre_AMGDDCommPkgNumRecvProcs(compGridCommPkg)[level]; i++) { recv_buffers[i] = hypre_CTAlloc(HYPRE_Int, recv_sizes[i], HYPRE_MEMORY_HOST); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Irecv(recv_buffers[i], recv_sizes[i], HYPRE_MPI_INT, - hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 7, hypre_MPI_COMM_WORLD, + hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 7, hcomm, &(requests[request_cnt++])); } // Setup and send the send buffers @@ -375,8 +378,9 @@ hypre_BoomerAMGDD_FindNeighborProcessors( hypre_ParCSRMatrix *A, } } + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Isend(send_buffers[i], send_sizes[i], HYPRE_MPI_INT, - hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 7, hypre_MPI_COMM_WORLD, + hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 7, hcomm, &(requests[request_cnt++])); } @@ -448,8 +452,9 @@ hypre_BoomerAMGDD_FindNeighborProcessors( hypre_ParCSRMatrix *A, request_cnt = 0; for (i = 0; i < csr_num_sends; i++) { + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Irecv(&(recv_sizes[i]), 1, HYPRE_MPI_INT, hypre_ParCSRCommPkgSendProc(commPkg, i), 4, - hypre_MPI_COMM_WORLD, &(requests[request_cnt++])); + hcomm, &(requests[request_cnt++])); } for (i = 0; i < csr_num_recvs; i++) { @@ -461,8 +466,9 @@ hypre_BoomerAMGDD_FindNeighborProcessors( hypre_ParCSRMatrix *A, send_sizes[i] += 2 + 2 * num_req_dofs[i][j]; } } + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Isend(&(send_sizes[i]), 1, HYPRE_MPI_INT, hypre_ParCSRCommPkgRecvProc(commPkg, i), 4, - hypre_MPI_COMM_WORLD, &(requests[request_cnt++])); + hcomm, &(requests[request_cnt++])); } // Wait on the recv sizes, then free and re-allocate the requests and statuses @@ -478,8 +484,9 @@ hypre_BoomerAMGDD_FindNeighborProcessors( hypre_ParCSRMatrix *A, for (i = 0; i < csr_num_sends; i++) { recv_buffers[i] = hypre_CTAlloc(HYPRE_Int, recv_sizes[i], HYPRE_MEMORY_HOST); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Irecv(recv_buffers[i], recv_sizes[i], HYPRE_MPI_INT, hypre_ParCSRCommPkgSendProc(commPkg, - i), 5, hypre_MPI_COMM_WORLD, &(requests[request_cnt++])); + i), 5, hcomm, &(requests[request_cnt++])); } // Setup the send buffer and post the sends @@ -502,8 +509,9 @@ hypre_BoomerAMGDD_FindNeighborProcessors( hypre_ParCSRMatrix *A, } } } + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Isend(send_buffers[i], send_sizes[i], HYPRE_MPI_INT, hypre_ParCSRCommPkgRecvProc(commPkg, - i), 5, hypre_MPI_COMM_WORLD, &(requests[request_cnt++])); + i), 5, hcomm, &(requests[request_cnt++])); } // Free the req dof info for (i = 0; i < csr_num_recvs; i++) @@ -2624,14 +2632,16 @@ hypre_BoomerAMGDD_CommunicateRemainingMatrixInfo( hypre_ParAMGDDData* amgdd_data for (proc = 0; proc < num_recv_procs; proc++) { + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Irecv(&(recv_sizes[2 * proc]), 2, HYPRE_MPI_INT, recv_procs[proc], 1, - hypre_MPI_COMM_WORLD, + hcomm, &(size_requests[request_cnt++])); } for (proc = 0; proc < num_send_procs; proc++) { + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Isend(&(send_sizes[2 * proc]), 2, HYPRE_MPI_INT, send_procs[proc], 1, - hypre_MPI_COMM_WORLD, + hcomm, &(size_requests[request_cnt++])); } @@ -2815,10 +2825,11 @@ hypre_BoomerAMGDD_CommunicateRemainingMatrixInfo( hypre_ParAMGDDData* amgdd_data for (proc = 0; proc < num_send_procs; proc++) { + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Isend(int_send_buffers[proc], send_sizes[2 * proc], HYPRE_MPI_INT, send_procs[proc], 2, - hypre_MPI_COMM_WORLD, &(buf_requests[request_cnt++])); + hcomm, &(buf_requests[request_cnt++])); hypre_MPI_Isend(complex_send_buffers[proc], send_sizes[2 * proc + 1], HYPRE_MPI_COMPLEX, - send_procs[proc], 3, hypre_MPI_COMM_WORLD, &(buf_requests[request_cnt++])); + send_procs[proc], 3, hcomm, &(buf_requests[request_cnt++])); } // Wait on buffer sizes @@ -2827,13 +2838,14 @@ hypre_BoomerAMGDD_CommunicateRemainingMatrixInfo( hypre_ParAMGDDData* amgdd_data // Allocate and post recvs for (proc = 0; proc < num_recv_procs; proc++) { + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); int_recv_buffers[proc] = hypre_CTAlloc(HYPRE_Int, recv_sizes[2 * proc], HYPRE_MEMORY_HOST); complex_recv_buffers[proc] = hypre_CTAlloc(HYPRE_Complex, recv_sizes[2 * proc + 1], HYPRE_MEMORY_HOST); hypre_MPI_Irecv(int_recv_buffers[proc], recv_sizes[2 * proc], HYPRE_MPI_INT, recv_procs[proc], 2, - hypre_MPI_COMM_WORLD, &(buf_requests[request_cnt++])); + hcomm, &(buf_requests[request_cnt++])); hypre_MPI_Irecv(complex_recv_buffers[proc], recv_sizes[2 * proc + 1], HYPRE_MPI_COMPLEX, - recv_procs[proc], 3, hypre_MPI_COMM_WORLD, &(buf_requests[request_cnt++])); + recv_procs[proc], 3, hcomm, &(buf_requests[request_cnt++])); } // Wait on buffers diff --git a/src/parcsr_ls/par_amgdd_setup.c b/src/parcsr_ls/par_amgdd_setup.c index 0fcc7ee718..d3b6d3e105 100644 --- a/src/parcsr_ls/par_amgdd_setup.c +++ b/src/parcsr_ls/par_amgdd_setup.c @@ -168,6 +168,7 @@ hypre_BoomerAMGDDSetup( void *amgdd_vdata, for (level = num_levels - 1; level >= amgdd_start_level; level--) { comm = hypre_ParCSRMatrixComm(A_array[level]); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_send_procs = hypre_AMGDDCommPkgNumSendProcs(compGridCommPkg)[level]; num_recv_procs = hypre_AMGDDCommPkgNumRecvProcs(compGridCommPkg)[level]; num_requests = num_send_procs + num_recv_procs; @@ -192,7 +193,7 @@ hypre_BoomerAMGDDSetup( void *amgdd_vdata, for (i = 0; i < num_recv_procs; i++) { hypre_MPI_Irecv(&(recv_buffer_size[level][i]), 1, HYPRE_MPI_INT, - hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 0, comm, &(requests[request_counter++])); + hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 0, hcomm, &(requests[request_counter++])); } } @@ -214,7 +215,7 @@ hypre_BoomerAMGDDSetup( void *amgdd_vdata, for (i = 0; i < num_send_procs; i++) { hypre_MPI_Isend(&(send_buffer_size[level][i]), 1, HYPRE_MPI_INT, - hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 0, comm, &(requests[request_counter++])); + hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 0, hcomm, &(requests[request_counter++])); } } @@ -227,13 +228,13 @@ hypre_BoomerAMGDDSetup( void *amgdd_vdata, { recv_buffer[i] = hypre_CTAlloc(HYPRE_Int, recv_buffer_size[level][i], HYPRE_MEMORY_HOST); hypre_MPI_Irecv(recv_buffer[i], recv_buffer_size[level][i], HYPRE_MPI_INT, - hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 1, comm, &(requests[request_counter++])); + hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 1, hcomm, &(requests[request_counter++])); } for (i = 0; i < num_send_procs; i++) { hypre_MPI_Isend(send_buffer[i], send_buffer_size[level][i], HYPRE_MPI_INT, - hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 1, comm, &(requests[request_counter++])); + hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 1, hcomm, &(requests[request_counter++])); } // Wait for buffers to be received @@ -270,14 +271,14 @@ hypre_BoomerAMGDDSetup( void *amgdd_vdata, { send_flag_buffer[i] = hypre_CTAlloc(HYPRE_Int, send_flag_buffer_size[i], HYPRE_MEMORY_HOST); hypre_MPI_Irecv(send_flag_buffer[i], send_flag_buffer_size[i], HYPRE_MPI_INT, - hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 2, comm, &(requests[request_counter++])); + hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 2, hcomm, &(requests[request_counter++])); } // send the recv_map_send_buffer's for (i = 0; i < num_recv_procs; i++) { hypre_MPI_Isend(recv_map_send_buffer[i], recv_map_send_buffer_size[i], HYPRE_MPI_INT, - hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 2, comm, &(requests[request_counter++])); + hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 2, hcomm, &(requests[request_counter++])); } // wait for maps to be received diff --git a/src/parcsr_ls/par_amgdd_solve.c b/src/parcsr_ls/par_amgdd_solve.c index 0af33b34b2..4043df3061 100644 --- a/src/parcsr_ls/par_amgdd_solve.c +++ b/src/parcsr_ls/par_amgdd_solve.c @@ -415,6 +415,7 @@ hypre_BoomerAMGDD_ResidualCommunication( hypre_ParAMGDDData *amgdd_data ) { // Get some communication info comm = hypre_ParCSRMatrixComm(A_array[level]); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_size(comm, &num_procs); if (num_procs > 1) @@ -437,7 +438,7 @@ hypre_BoomerAMGDD_ResidualCommunication( hypre_ParAMGDDData *amgdd_data ) recv_buffer_size = hypre_AMGDDCommPkgRecvBufferSize(compGridCommPkg)[level][i]; recv_buffers[i] = hypre_CTAlloc(HYPRE_Complex, recv_buffer_size, HYPRE_MEMORY_HOST); hypre_MPI_Irecv(recv_buffers[i], recv_buffer_size, HYPRE_MPI_COMPLEX, - hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 3, comm, &requests[request_counter++]); + hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 3, hcomm, &requests[request_counter++]); } for (i = 0; i < num_sends; i++) @@ -445,7 +446,7 @@ hypre_BoomerAMGDD_ResidualCommunication( hypre_ParAMGDDData *amgdd_data ) send_buffer_size = hypre_AMGDDCommPkgSendBufferSize(compGridCommPkg)[level][i]; send_buffers[i] = hypre_BoomerAMGDD_PackResidualBuffer(compGrid, compGridCommPkg, level, i); hypre_MPI_Isend(send_buffers[i], send_buffer_size, HYPRE_MPI_COMPLEX, - hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 3, comm, &requests[request_counter++]); + hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 3, hcomm, &requests[request_counter++]); } // wait for buffers to be received diff --git a/src/parcsr_ls/par_cgc_coarsen.c b/src/parcsr_ls/par_cgc_coarsen.c index 55c88a977a..6d02a3ee6d 100644 --- a/src/parcsr_ls/par_cgc_coarsen.c +++ b/src/parcsr_ls/par_cgc_coarsen.c @@ -669,6 +669,7 @@ HYPRE_Int hypre_BoomerAMGCoarsenCGC (hypre_ParCSRMatrix *S, HYPRE_Int numbero hypre_MPI_Comm_size (comm, &mpisize); hypre_MPI_Comm_rank (comm, &mpirank); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); #if 0 if (!mpirank) @@ -706,7 +707,7 @@ HYPRE_Int hypre_BoomerAMGCoarsenCGC (hypre_ParCSRMatrix *S, HYPRE_Int numbero anyway, here it is: */ HYPRE_Int nlocal = vertexrange[1] - vertexrange[0]; vertexrange_all = hypre_CTAlloc(HYPRE_Int, mpisize + 1, HYPRE_MEMORY_HOST); - hypre_MPI_Allgather (&nlocal, 1, HYPRE_MPI_INT, vertexrange_all + 1, 1, HYPRE_MPI_INT, comm); + hypre_MPI_Allgather (&nlocal, 1, HYPRE_MPI_INT, vertexrange_all + 1, 1, HYPRE_MPI_INT, hcomm); vertexrange_all[0] = 0; for (j = 2; j <= mpisize; j++) { vertexrange_all[j] += vertexrange_all[j - 1]; } } @@ -832,6 +833,7 @@ HYPRE_Int hypre_AmgCGCPrepare (hypre_ParCSRMatrix *S, HYPRE_Int nlocal, HYPRE_In hypre_MPI_Comm_size (comm, &mpisize); hypre_MPI_Comm_rank (comm, &mpirank); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (!comm_pkg) { @@ -845,7 +847,7 @@ HYPRE_Int hypre_AmgCGCPrepare (hypre_ParCSRMatrix *S, HYPRE_Int nlocal, HYPRE_In HYPRE_Int scan_recv; vertexrange = hypre_CTAlloc(HYPRE_Int, 2, HYPRE_MEMORY_HOST); - hypre_MPI_Scan(&nlocal, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&nlocal, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); /* first point in my range */ vertexrange[0] = scan_recv - nlocal; /* first point in next proc's range */ @@ -948,6 +950,7 @@ HYPRE_Int hypre_AmgCGCGraphAssemble (hypre_ParCSRMatrix *S, HYPRE_Int *vertexran hypre_MPI_Comm_size (comm, &mpisize); hypre_MPI_Comm_rank (comm, &mpirank); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* determine neighbor processors */ num_recvs = hypre_ParCSRCommPkgNumRecvs (comm_pkg); @@ -973,10 +976,10 @@ HYPRE_Int hypre_AmgCGCGraphAssemble (hypre_ParCSRMatrix *S, HYPRE_Int *vertexran for (i = 0; i < num_recvs; i++) { - hypre_MPI_Irecv (pointrange_nonlocal + 2 * i, 2, HYPRE_MPI_INT, recv_procs[i], tag_pointrange, comm, + hypre_MPI_Irecv (pointrange_nonlocal + 2 * i, 2, HYPRE_MPI_INT, recv_procs[i], tag_pointrange, hcomm, &recvrequest[2 * i]); hypre_MPI_Irecv (vertexrange_nonlocal + 2 * i, 2, HYPRE_MPI_INT, recv_procs[i], tag_vertexrange, - comm, + hcomm, &recvrequest[2 * i + 1]); } for (i = 0; i < num_sends; i++) @@ -985,9 +988,9 @@ HYPRE_Int hypre_AmgCGCGraphAssemble (hypre_ParCSRMatrix *S, HYPRE_Int *vertexran int_buf_data[2 * i + 1] = pointrange_end; int_buf_data2[2 * i] = vertexrange_start; int_buf_data2[2 * i + 1] = vertexrange_end; - hypre_MPI_Isend (int_buf_data + 2 * i, 2, HYPRE_MPI_INT, send_procs[i], tag_pointrange, comm, + hypre_MPI_Isend (int_buf_data + 2 * i, 2, HYPRE_MPI_INT, send_procs[i], tag_pointrange, hcomm, &sendrequest[2 * i]); - hypre_MPI_Isend (int_buf_data2 + 2 * i, 2, HYPRE_MPI_INT, send_procs[i], tag_vertexrange, comm, + hypre_MPI_Isend (int_buf_data2 + 2 * i, 2, HYPRE_MPI_INT, send_procs[i], tag_vertexrange, hcomm, &sendrequest[2 * i + 1]); } hypre_MPI_Waitall (2 * (num_sends + num_recvs), sendrequest, hypre_MPI_STATUSES_IGNORE); diff --git a/src/parcsr_ls/par_coarse_parms.c b/src/parcsr_ls/par_coarse_parms.c index 22b5f2dc0f..d284024291 100644 --- a/src/parcsr_ls/par_coarse_parms.c +++ b/src/parcsr_ls/par_coarse_parms.c @@ -87,7 +87,8 @@ hypre_BoomerAMGCoarseParmsHost(MPI_Comm comm, { HYPRE_BigInt scan_recv; - hypre_MPI_Scan(&local_coarse_size, &scan_recv, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + hypre_MPI_Scan(&local_coarse_size, &scan_recv, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); /* first point in my range */ coarse_pnts_global[0] = scan_recv - local_coarse_size; diff --git a/src/parcsr_ls/par_coarsen.c b/src/parcsr_ls/par_coarsen.c index e41421d897..754a02cbd0 100644 --- a/src/parcsr_ls/par_coarsen.c +++ b/src/parcsr_ls/par_coarsen.c @@ -167,6 +167,7 @@ hypre_BoomerAMGCoarsen( hypre_ParCSRMatrix *S, if (debug_flag == 3) { wall_time = time_getWallclockSeconds(); } hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (!comm_pkg) { @@ -548,7 +549,7 @@ hypre_BoomerAMGCoarsen( hypre_ParCSRMatrix *S, *------------------------------------------------*/ big_graph_size = (HYPRE_BigInt) graph_size; - hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); if (global_graph_size == 0) { @@ -2186,6 +2187,7 @@ hypre_BoomerAMGCoarsenPMISHost( hypre_ParCSRMatrix *S, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (!comm_pkg) { @@ -2468,7 +2470,7 @@ hypre_BoomerAMGCoarsenPMISHost( hypre_ParCSRMatrix *S, big_graph_size = (HYPRE_BigInt) graph_size; /* stop the coarsening if nothing left to be coarsened */ - hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); /* if (my_id == 0) { hypre_printf("graph size %b\n", global_graph_size); } */ diff --git a/src/parcsr_ls/par_cr.c b/src/parcsr_ls/par_cr.c index 25738643f0..133981db0e 100644 --- a/src/parcsr_ls/par_cr.c +++ b/src/parcsr_ls/par_cr.c @@ -1685,6 +1685,7 @@ hypre_BoomerAMGIndepPMIS( hypre_ParCSRMatrix *S, if (debug_flag == 3) { wall_time = time_getWallclockSeconds(); } hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (!comm_pkg) { @@ -1950,7 +1951,7 @@ hypre_BoomerAMGIndepPMIS( hypre_ParCSRMatrix *S, HYPRE_BigInt big_graph_size = (HYPRE_BigInt) graph_size; /* stop the coarsening if nothing left to be coarsened */ - hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); if (global_graph_size == 0) { @@ -2303,6 +2304,7 @@ hypre_BoomerAMGIndepPMISa( hypre_ParCSRMatrix *S, if (debug_flag == 3) { wall_time = time_getWallclockSeconds(); } hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (!comm_pkg) { @@ -2562,7 +2564,7 @@ hypre_BoomerAMGIndepPMISa( hypre_ParCSRMatrix *S, HYPRE_BigInt big_graph_size = (HYPRE_BigInt) graph_size; /* stop the coarsening if nothing left to be coarsened */ - hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); if (global_graph_size == 0) { @@ -2894,6 +2896,7 @@ hypre_BoomerAMGCoarsenCR( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); @@ -3185,7 +3188,7 @@ hypre_BoomerAMGCoarsenCR( hypre_ParCSRMatrix *A, } } - hypre_MPI_Allreduce(&local_max, &global_max, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, comm); + hypre_MPI_Allreduce(&local_max, &global_max, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hcomm); if (num_functions == 1) /*if(CRaddCpoints == 0)*/ { @@ -3330,7 +3333,7 @@ hypre_BoomerAMGCoarsenCR( hypre_ParCSRMatrix *A, } } nstages += 1; - hypre_MPI_Allreduce(&num_coarse, &global_nc, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm); + hypre_MPI_Allreduce(&num_coarse, &global_nc, 1, HYPRE_MPI_INT, hypre_MPI_MAX, hcomm); } else { diff --git a/src/parcsr_ls/par_gauss_elim.c b/src/parcsr_ls/par_gauss_elim.c index 1895a204f4..57ac4ac990 100644 --- a/src/parcsr_ls/par_gauss_elim.c +++ b/src/parcsr_ls/par_gauss_elim.c @@ -154,6 +154,7 @@ hypre_GaussElimSetup(hypre_ParAMGData *amg_data, /* Generate sub communicator - processes that have nonzero num_rows */ hypre_GenerateSubComm(comm, num_rows, &new_comm); hypre_ParAMGDataNewComm(amg_data) = new_comm; + hypre_MPI_Comm hnew_comm = hypre_MPI_CommFromMPI_Comm(new_comm); if (num_rows) { @@ -176,7 +177,7 @@ hypre_GaussElimSetup(hypre_ParAMGData *amg_data, displs = &comm_info[new_num_procs]; hypre_ParAMGDataCommInfo(amg_data) = comm_info; - hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm); + hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hnew_comm); displs[0] = 0; mat_displs[0] = 0; @@ -215,7 +216,7 @@ hypre_GaussElimSetup(hypre_ParAMGData *amg_data, } hypre_MPI_Allgatherv(A_mat_local, A_mat_local_size, HYPRE_MPI_REAL, A_mat, mat_info, - mat_displs, HYPRE_MPI_REAL, new_comm); + mat_displs, HYPRE_MPI_REAL, hnew_comm); /* Set dense matrix - We store it in row-major format when using hypre's internal Gaussian Elimination or in column-major format if using LAPACK solvers */ @@ -458,6 +459,8 @@ hypre_GaussElimSolve(hypre_ParAMGData *amg_data, HYPRE_Int *displs, *info; HYPRE_Int new_num_procs; + hypre_MPI_Comm hnew_comm = hypre_MPI_CommFromMPI_Comm(new_comm); + #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_GS_ELIM_SOLVE] -= hypre_MPI_Wtime(); #endif @@ -532,7 +535,7 @@ hypre_GaussElimSolve(hypre_ParAMGData *amg_data, /* TODO (VPM): Add GPU-aware MPI support to buffers */ hypre_MPI_Allgatherv(f_data_h, num_rows, HYPRE_MPI_REAL, b_data_h, - info, displs, HYPRE_MPI_REAL, new_comm); + info, displs, HYPRE_MPI_REAL, hnew_comm); if (f_data_h != f_data) { diff --git a/src/parcsr_ls/par_gsmg.c b/src/parcsr_ls/par_gsmg.c index 23383c7047..1dac1452e6 100644 --- a/src/parcsr_ls/par_gsmg.c +++ b/src/parcsr_ls/par_gsmg.c @@ -286,7 +286,8 @@ hypre_ParCSRMatrixChooseThresh(hypre_ParCSRMatrix *S) } } - hypre_MPI_Allreduce(&minimax, &minmin, 1, HYPRE_MPI_REAL, hypre_MPI_MIN, comm); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + hypre_MPI_Allreduce(&minimax, &minmin, 1, HYPRE_MPI_REAL, hypre_MPI_MIN, hcomm); return minmin; } diff --git a/src/parcsr_ls/par_ilu.c b/src/parcsr_ls/par_ilu.c index 9662f21c5a..8b378c4615 100644 --- a/src/parcsr_ls/par_ilu.c +++ b/src/parcsr_ls/par_ilu.c @@ -3794,6 +3794,7 @@ hypre_ParCSRMatrixNormFro(hypre_ParCSRMatrix *A, HYPRE_Real *norm_io) HYPRE_Real local_norm = 0.0; HYPRE_Real global_norm; MPI_Comm comm = hypre_ParCSRMatrixComm(A); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); @@ -3807,7 +3808,7 @@ hypre_ParCSRMatrixNormFro(hypre_ParCSRMatrix *A, HYPRE_Real *norm_io) local_norm += global_norm * global_norm; /* do communication to get global total sum */ - hypre_MPI_Allreduce(&local_norm, &global_norm, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&local_norm, &global_norm, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); *norm_io = hypre_sqrt(global_norm); @@ -3830,6 +3831,7 @@ hypre_ParCSRMatrixResNormFro(hypre_ParCSRMatrix *A, HYPRE_Real *norm_io) HYPRE_Real local_norm = 0.0; HYPRE_Real global_norm; MPI_Comm comm = hypre_ParCSRMatrixComm(A); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); @@ -3845,7 +3847,7 @@ hypre_ParCSRMatrixResNormFro(hypre_ParCSRMatrix *A, HYPRE_Real *norm_io) local_norm += global_norm * global_norm; /* do communication to get global total sum */ - hypre_MPI_Allreduce(&local_norm, &global_norm, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&local_norm, &global_norm, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); *norm_io = hypre_sqrt(global_norm); return hypre_error_flag; diff --git a/src/parcsr_ls/par_ilu_setup.c b/src/parcsr_ls/par_ilu_setup.c index 36e1de3840..0e01a820b0 100644 --- a/src/parcsr_ls/par_ilu_setup.c +++ b/src/parcsr_ls/par_ilu_setup.c @@ -137,6 +137,7 @@ hypre_ILUSetup( void *ilu_vdata, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); #if defined(HYPRE_USING_GPU) hypre_CSRMatrixDestroy(matALU_d); matALU_d = NULL; @@ -976,7 +977,7 @@ hypre_ILUSetup( void *ilu_vdata, HYPRE_BigInt global_start, S_total_rows, S_row_starts[2]; HYPRE_BigInt big_m = (HYPRE_BigInt) m; - hypre_MPI_Allreduce(&big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); if (S_total_rows > 0) { @@ -990,7 +991,7 @@ hypre_ILUSetup( void *ilu_vdata, hypre_ParCSRMatrixRowStarts(matA)); hypre_ParVectorInitialize(Ytemp); - hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); S_row_starts[0] = global_start - big_m; S_row_starts[1] = global_start; @@ -1214,7 +1215,7 @@ hypre_ILUSetup( void *ilu_vdata, /* borrow i for local nnz of S */ nnzS_offd_local = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(matS)); hypre_MPI_Allreduce(&nnzS_offd_local, &nnzS_offd, 1, HYPRE_MPI_REAL, - hypre_MPI_SUM, comm); + hypre_MPI_SUM, hcomm); nnzS = nnzS * hypre_ParILUDataOperatorComplexity(schur_precond_ilu) + nnzS_offd; break; @@ -1850,6 +1851,7 @@ hypre_ILUSetupRAPILU0Device(hypre_ParCSRMatrix *A, HYPRE_Int test_opt) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_Int *rperm = NULL; HYPRE_Int m = n - nLU; HYPRE_Int i; @@ -1970,13 +1972,13 @@ hypre_ILUSetupRAPILU0Device(hypre_ParCSRMatrix *A, HYPRE_BigInt S_total_rows, S_row_starts[2]; HYPRE_BigInt big_m = (HYPRE_BigInt)m; - hypre_MPI_Allreduce(&big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); if (S_total_rows > 0) { { HYPRE_BigInt global_start; - hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); S_row_starts[0] = global_start - big_m; S_row_starts[1] = global_start; } @@ -2243,6 +2245,7 @@ hypre_ILUSetupMILU0(hypre_ParCSRMatrix *A, */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); comm_pkg = hypre_ParCSRMatrixCommPkg(A); /* setup if not yet built */ @@ -2659,7 +2662,7 @@ hypre_ILUSetupMILU0(hypre_ParCSRMatrix *A, * Check if we need to create Schur complement */ HYPRE_BigInt big_m = (HYPRE_BigInt)m; - hypre_MPI_Allreduce(&big_m, &total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&big_m, &total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); /* only form when total_rows > 0 */ if (total_rows > 0) @@ -2668,7 +2671,7 @@ hypre_ILUSetupMILU0(hypre_ParCSRMatrix *A, /* need to get new column start */ { HYPRE_BigInt global_start; - hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); col_starts[0] = global_start - m; col_starts[1] = global_start; } @@ -2780,7 +2783,7 @@ hypre_ILUSetupMILU0(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) ctrL; - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; matU = hypre_ParCSRMatrixCreate( comm, @@ -2807,7 +2810,7 @@ hypre_ILUSetupMILU0(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) ctrU; - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; /* free memory */ hypre_TFree(wL, HYPRE_MEMORY_HOST); @@ -3395,6 +3398,7 @@ hypre_ILUSetupILUK(hypre_ParCSRMatrix *A, /* set Comm_Pkg if not yet built */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); comm_pkg = hypre_ParCSRMatrixCommPkg(A); if (!comm_pkg) { @@ -3655,7 +3659,7 @@ hypre_ILUSetupILUK(hypre_ParCSRMatrix *A, * Check if we need to create Schur complement */ HYPRE_BigInt big_m = (HYPRE_BigInt)m; - hypre_MPI_Allreduce(&big_m, &total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&big_m, &total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); /* only form when total_rows > 0 */ if ( total_rows > 0 ) { @@ -3663,7 +3667,7 @@ hypre_ILUSetupILUK(hypre_ParCSRMatrix *A, /* need to get new column start */ { HYPRE_BigInt global_start; - hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); col_starts[0] = global_start - m; col_starts[1] = global_start; } @@ -3780,7 +3784,7 @@ hypre_ILUSetupILUK(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (L_diag_i[n]); - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; matU = hypre_ParCSRMatrixCreate( comm, @@ -3806,7 +3810,7 @@ hypre_ILUSetupILUK(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (U_diag_i[n]); - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; /* free */ @@ -3987,6 +3991,7 @@ hypre_ILUSetupILUT(hypre_ParCSRMatrix *A, */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); comm_pkg = hypre_ParCSRMatrixCommPkg(A); /* create if not yet built */ if (!comm_pkg) @@ -4527,7 +4532,7 @@ hypre_ILUSetupILUT(hypre_ParCSRMatrix *A, * Check if we need to create Schur complement */ HYPRE_BigInt big_m = (HYPRE_BigInt)m; - hypre_MPI_Allreduce(&big_m, &total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&big_m, &total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); /* only form when total_rows > 0 */ if ( total_rows > 0 ) @@ -4536,7 +4541,7 @@ hypre_ILUSetupILUT(hypre_ParCSRMatrix *A, /* need to get new column start */ { HYPRE_BigInt global_start; - hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); col_starts[0] = global_start - m; col_starts[1] = global_start; } @@ -4655,7 +4660,7 @@ hypre_ILUSetupILUT(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (L_diag_i[n]); - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; matU = hypre_ParCSRMatrixCreate( comm, @@ -4682,7 +4687,7 @@ hypre_ILUSetupILUT(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (U_diag_i[n]); - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; /* free working array */ @@ -4913,6 +4918,7 @@ hypre_ILUSetupILU0RAS(hypre_ParCSRMatrix *A, * get communication stuffs first */ hypre_MPI_Comm_size(comm, &num_procs); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); comm_pkg = hypre_ParCSRMatrixCommPkg(A); /* Setup if not yet built */ @@ -5403,12 +5409,12 @@ hypre_ILUSetupILU0RAS(hypre_ParCSRMatrix *A, } HYPRE_BigInt big_total_rows = (HYPRE_BigInt)total_rows; - hypre_MPI_Allreduce(&big_total_rows, &global_num_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&big_total_rows, &global_num_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); /* need to get new column start */ { HYPRE_BigInt global_start; - hypre_MPI_Scan(&big_total_rows, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_total_rows, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); col_starts[0] = global_start - total_rows; col_starts[1] = global_start; } @@ -5437,7 +5443,7 @@ hypre_ILUSetupILU0RAS(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) ctrL; - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; matU = hypre_ParCSRMatrixCreate( comm, @@ -5464,7 +5470,7 @@ hypre_ILUSetupILU0RAS(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) ctrU; - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; /* free memory */ hypre_TFree(wL, HYPRE_MEMORY_HOST); @@ -6136,6 +6142,7 @@ hypre_ILUSetupILUKRAS(hypre_ParCSRMatrix *A, /* communication */ hypre_ParCSRCommPkg *comm_pkg; hypre_MPI_Comm_size(comm, &num_procs); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* reverse permutation array */ HYPRE_Int *rperm; @@ -6523,11 +6530,11 @@ hypre_ILUSetupILUKRAS(hypre_ParCSRMatrix *A, */ HYPRE_BigInt big_total_rows = (HYPRE_BigInt)total_rows; hypre_MPI_Allreduce(&big_total_rows, &global_num_rows, 1, HYPRE_MPI_BIG_INT, - hypre_MPI_SUM, comm); + hypre_MPI_SUM, hcomm); /* need to get new column start */ { HYPRE_BigInt global_start; - hypre_MPI_Scan(&big_total_rows, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_total_rows, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); col_starts[0] = global_start - total_rows; col_starts[1] = global_start; } @@ -6555,7 +6562,7 @@ hypre_ILUSetupILUKRAS(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (L_diag_i[total_rows]); - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; matU = hypre_ParCSRMatrixCreate( comm, @@ -6581,7 +6588,7 @@ hypre_ILUSetupILUKRAS(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (U_diag_i[total_rows]); - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; /* free */ @@ -6721,6 +6728,7 @@ hypre_ILUSetupILUTRAS(hypre_ParCSRMatrix *A, * setup communication stuffs first */ hypre_MPI_Comm_size(comm, &num_procs); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); comm_pkg = hypre_ParCSRMatrixCommPkg(A); /* create if not yet built */ if (!comm_pkg) @@ -7466,11 +7474,11 @@ hypre_ILUSetupILUTRAS(hypre_ParCSRMatrix *A, */ HYPRE_BigInt big_total_rows = (HYPRE_BigInt)total_rows; hypre_MPI_Allreduce(&big_total_rows, &global_num_rows, 1, HYPRE_MPI_BIG_INT, - hypre_MPI_SUM, comm); + hypre_MPI_SUM, hcomm); /* need to get new column start */ { HYPRE_BigInt global_start; - hypre_MPI_Scan(&big_total_rows, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_total_rows, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); col_starts[0] = global_start - total_rows; col_starts[1] = global_start; } @@ -7500,7 +7508,7 @@ hypre_ILUSetupILUTRAS(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (L_diag_i[total_rows]); - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; matU = hypre_ParCSRMatrixCreate( comm, @@ -7527,7 +7535,7 @@ hypre_ILUSetupILUTRAS(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (U_diag_i[total_rows]); - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; /* free working array */ diff --git a/src/parcsr_ls/par_interp.c b/src/parcsr_ls/par_interp.c index de2252f937..7aae5e5b59 100644 --- a/src/parcsr_ls/par_interp.c +++ b/src/parcsr_ls/par_interp.c @@ -127,11 +127,12 @@ hypre_BoomerAMGBuildInterp( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); //my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -1097,12 +1098,13 @@ hypre_BoomerAMGBuildInterpHE( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); //my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -1973,11 +1975,12 @@ hypre_BoomerAMGBuildDirInterpHost( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); //my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -2771,12 +2774,13 @@ hypre_BoomerAMGBuildInterpModUnk( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); //my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -3931,10 +3935,11 @@ hypre_BoomerAMGBuildInterpOnePntHost( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns diff --git a/src/parcsr_ls/par_lr_interp.c b/src/parcsr_ls/par_lr_interp.c index 6920aaea6b..bff74a356c 100644 --- a/src/parcsr_ls/par_lr_interp.c +++ b/src/parcsr_ls/par_lr_interp.c @@ -145,10 +145,11 @@ hypre_BoomerAMGBuildStdInterp(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); if (!comm_pkg) { @@ -1146,10 +1147,11 @@ hypre_BoomerAMGBuildExtPIInterpHost(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); if (!comm_pkg) { @@ -2042,10 +2044,11 @@ hypre_BoomerAMGBuildExtPICCInterp(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); if (!comm_pkg) { @@ -3029,10 +3032,11 @@ hypre_BoomerAMGBuildFFInterp(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); if (!comm_pkg) { @@ -3945,10 +3949,11 @@ hypre_BoomerAMGBuildFF1Interp(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); if (!comm_pkg) { @@ -4880,13 +4885,14 @@ hypre_BoomerAMGBuildExtInterpHost(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); if (!comm_pkg) { diff --git a/src/parcsr_ls/par_lr_restr.c b/src/parcsr_ls/par_lr_restr.c index 60020e6d85..67fbca91a3 100644 --- a/src/parcsr_ls/par_lr_restr.c +++ b/src/parcsr_ls/par_lr_restr.c @@ -167,6 +167,7 @@ hypre_BoomerAMGBuildRestrDist2AIR( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*-------------- global number of C points and my start position */ /*my_first_cpt = num_cpts_global[0];*/ @@ -174,7 +175,7 @@ hypre_BoomerAMGBuildRestrDist2AIR( hypre_ParCSRMatrix *A, { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -1711,6 +1712,7 @@ hypre_BoomerAMGBuildRestrNeumannAIRHost( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_MemoryLocation memory_location_R = hypre_ParCSRMatrixMemoryLocation(A); @@ -1720,7 +1722,7 @@ hypre_BoomerAMGBuildRestrNeumannAIRHost( hypre_ParCSRMatrix *A, { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns diff --git a/src/parcsr_ls/par_mgr.c b/src/parcsr_ls/par_mgr.c index 5c56aa0b92..72342c785c 100644 --- a/src/parcsr_ls/par_mgr.c +++ b/src/parcsr_ls/par_mgr.c @@ -1396,6 +1396,7 @@ hypre_MGRBuildPHost( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); nfpoints = 0; for (i = 0; i < A_nr_of_rows; i++) @@ -1463,7 +1464,7 @@ hypre_MGRBuildPHost( hypre_ParCSRMatrix *A, { nC_global = num_cpts_global[1]; } - hypre_MPI_Bcast(&nC_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&nC_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } /* Construct P from matrix product W_diag */ @@ -1626,6 +1627,7 @@ hypre_MGRBuildP( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); //num_threads = hypre_NumThreads(); // Temporary fix, disable threading // TODO: enable threading @@ -1633,7 +1635,7 @@ hypre_MGRBuildP( hypre_ParCSRMatrix *A, //my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -2202,6 +2204,7 @@ hypre_MGRBuildPDRS( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); //num_threads = hypre_NumThreads(); // Temporary fix, disable threading // TODO: enable threading @@ -2209,7 +2212,7 @@ hypre_MGRBuildPDRS( hypre_ParCSRMatrix *A, //my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -2740,6 +2743,7 @@ hypre_MGRGetAcfCPR(hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); // Count total F-points // Also setup F to C column map @@ -2770,7 +2774,7 @@ hypre_MGRGetAcfCPR(hypre_ParCSRMatrix *A, //hypre_printf("my_id = %d, cpts_this = %d, cpts_next = %d\n", my_id, num_row_cpts_global[0], num_row_cpts_global[1]); if (my_id == (num_procs - 1)) { total_global_row_cpts = num_row_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_row_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_row_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /* get the number of coarse rows */ hypre_IntArrayData(wrap_cf) = f_marker; @@ -2784,7 +2788,7 @@ hypre_MGRGetAcfCPR(hypre_ParCSRMatrix *A, //hypre_printf("my_id = %d, cpts_this = %d, cpts_next = %d\n", my_id, num_col_fpts_global[0], num_col_fpts_global[1]); if (my_id == (num_procs - 1)) { total_global_col_fpts = num_col_fpts_global[1]; } - hypre_MPI_Bcast(&total_global_col_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_col_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); // First pass: count the nnz of A_CF jj_counter = 0; @@ -3490,9 +3494,10 @@ hypre_MGRBuildInterpApproximateInverse(hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*----------------------------------------------------------------------- * Allocate arrays. @@ -4772,6 +4777,7 @@ hypre_ParCSRMatrixBlockDiagMatrixHost( hypre_ParCSRMatrix *A, hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Sanity check */ if ((num_rows_A > 0) && (num_rows_A < blk_size)) @@ -4809,7 +4815,7 @@ hypre_ParCSRMatrixBlockDiagMatrixHost( hypre_ParCSRMatrix *A, if (CF_marker) { num_rows_big = (HYPRE_BigInt) B_diag_num_rows; - hypre_MPI_Scan(&num_rows_big, &scan_recv, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&num_rows_big, &scan_recv, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); /* first point in my range */ row_starts_B[0] = scan_recv - num_rows_big; @@ -4820,7 +4826,7 @@ hypre_ParCSRMatrixBlockDiagMatrixHost( hypre_ParCSRMatrix *A, { num_rows_B = row_starts_B[1]; } - hypre_MPI_Bcast(&num_rows_B, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&num_rows_B, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } else { @@ -6048,6 +6054,7 @@ hypre_MGRGetSubBlock( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); //num_threads = hypre_NumThreads(); // Temporary fix, disable threading // TODO: enable threading @@ -6066,7 +6073,7 @@ hypre_MGRGetSubBlock( hypre_ParCSRMatrix *A, // my_first_row_cpt = num_row_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_row_cpts = num_row_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_row_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_row_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /* get the number of coarse rows */ hypre_IntArrayData(wrap_cf) = col_cf_marker; @@ -6079,7 +6086,7 @@ hypre_MGRGetSubBlock( hypre_ParCSRMatrix *A, // my_first_col_cpt = num_col_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_col_cpts = num_col_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_col_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_col_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -6673,6 +6680,7 @@ hypre_MGRDataPrint(void *mgr_vdata) /* Get rank ID */ comm = hypre_ParCSRMatrixComm(par_A); hypre_MPI_Comm_rank(comm, &myid); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Create new "ls_" folder (info_path) */ if (((print_level & HYPRE_MGR_PRINT_INFO_PARAMS) || @@ -6690,7 +6698,7 @@ hypre_MGRDataPrint(void *mgr_vdata) hypre_CreateNextDirOfSequence(topdir, "ls_", &info_path); info_path_length = strlen(info_path) + 1; } - hypre_MPI_Bcast(&info_path_length, 1, HYPRE_MPI_INT, 0, comm); + hypre_MPI_Bcast(&info_path_length, 1, HYPRE_MPI_INT, 0, hcomm); if (info_path_length > 0) { @@ -6704,7 +6712,7 @@ hypre_MGRDataPrint(void *mgr_vdata) hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unable to create info path!"); return hypre_error_flag; } - hypre_MPI_Bcast(info_path, info_path_length, hypre_MPI_CHAR, 0, comm); + hypre_MPI_Bcast(info_path, info_path_length, hypre_MPI_CHAR, 0, hcomm); /* Save info_path */ (mgr_data -> info_path) = info_path; diff --git a/src/parcsr_ls/par_mgr_coarsen.c b/src/parcsr_ls/par_mgr_coarsen.c index 18f024bc3c..b98e3909cb 100644 --- a/src/parcsr_ls/par_mgr_coarsen.c +++ b/src/parcsr_ls/par_mgr_coarsen.c @@ -38,7 +38,8 @@ hypre_MGRCoarseParms(MPI_Comm comm, /* Scan global starts */ sbuffer_send[0] = (HYPRE_BigInt) num_cpts; sbuffer_send[1] = (HYPRE_BigInt) num_fpts; - hypre_MPI_Scan(&sbuffer_send, &sbuffer_recv, 2, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + hypre_MPI_Scan(&sbuffer_send, &sbuffer_recv, 2, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); /* First points in next processor's range */ row_starts_cpts[1] = sbuffer_recv[0]; diff --git a/src/parcsr_ls/par_mgr_setup.c b/src/parcsr_ls/par_mgr_setup.c index 3a3b5f1d2a..d325cbfca8 100644 --- a/src/parcsr_ls/par_mgr_setup.c +++ b/src/parcsr_ls/par_mgr_setup.c @@ -2017,6 +2017,7 @@ hypre_MGRSetupFrelaxVcycleData( void *mgr_vdata, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); local_size = hypre_ParCSRMatrixNumRows(A); @@ -2185,7 +2186,7 @@ hypre_MGRSetupFrelaxVcycleData( void *mgr_vdata, { coarse_size = coarse_pnts_global_lvl[1]; } - hypre_MPI_Bcast(&coarse_size, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&coarse_size, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); if (coarse_size == 0) // stop coarsening { diff --git a/src/parcsr_ls/par_mod_lr_interp.c b/src/parcsr_ls/par_mod_lr_interp.c index a8c1ebe01d..1efb865bec 100644 --- a/src/parcsr_ls/par_mod_lr_interp.c +++ b/src/parcsr_ls/par_mod_lr_interp.c @@ -117,9 +117,10 @@ hypre_BoomerAMGBuildModExtInterpHost(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); n_Cpts = num_cpts_global[1] - num_cpts_global[0]; hypre_ParCSRMatrixGenerateFFFCHost(A, CF_marker, num_cpts_global, S, &As_FC, &As_FF); @@ -697,9 +698,10 @@ hypre_BoomerAMGBuildModExtPIInterpHost(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); n_Cpts = num_cpts_global[1] - num_cpts_global[0]; hypre_ParCSRMatrixGenerateFFFCHost(A, CF_marker, num_cpts_global, S, &As_FC, &As_FF); @@ -1358,9 +1360,10 @@ hypre_BoomerAMGBuildModExtPEInterpHost(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); n_Cpts = num_cpts_global[1] - num_cpts_global[0]; hypre_ParCSRMatrixGenerateFFFCHost(A, CF_marker, num_cpts_global, S, &As_FC, &As_FF); diff --git a/src/parcsr_ls/par_mod_multi_interp.c b/src/parcsr_ls/par_mod_multi_interp.c index 1280ba36ba..a0e1e60c02 100644 --- a/src/parcsr_ls/par_mod_multi_interp.c +++ b/src/parcsr_ls/par_mod_multi_interp.c @@ -98,6 +98,7 @@ hypre_BoomerAMGBuildModMultipassHost( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (num_procs > 1) { @@ -105,7 +106,7 @@ hypre_BoomerAMGBuildModMultipassHost( hypre_ParCSRMatrix *A, { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } else { @@ -199,7 +200,7 @@ hypre_BoomerAMGBuildModMultipassHost( hypre_ParCSRMatrix *A, current_pass = 1; num_passes = 1; /* color points according to pass number */ - hypre_MPI_Allreduce(&remaining, &global_remaining, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&remaining, &global_remaining, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); while (global_remaining > 0) { HYPRE_Int remaining_pts = (HYPRE_Int) remaining; @@ -265,7 +266,7 @@ hypre_BoomerAMGBuildModMultipassHost( hypre_ParCSRMatrix *A, hypre_ParCSRCommHandleDestroy(comm_handle); } old_global_remaining = global_remaining; - hypre_MPI_Allreduce(&remaining, &global_remaining, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&remaining, &global_remaining, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); /* if the number of remaining points does not change, we have a situation of isolated areas of * fine points that are not connected to any C-points, and the pass generation process breaks * down. Those points can be ignored, i.e. the corresponding rows in P will just be 0 @@ -596,6 +597,7 @@ hypre_GenerateMultipassPi( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* define P matrices */ @@ -623,15 +625,15 @@ hypre_GenerateMultipassPi( hypre_ParCSRMatrix *A, HYPRE_BigInt big_Fpts; big_Fpts = num_points; - hypre_MPI_Scan(&big_Fpts, f_pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_Fpts, f_pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); f_pts_starts[0] = f_pts_starts[1] - big_Fpts; if (my_id == num_procs - 1) { total_global_fpts = f_pts_starts[1]; total_global_cpts = c_pts_starts[1]; } - hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } else { @@ -922,6 +924,7 @@ hypre_GenerateMultiPi( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* define P matrices */ @@ -949,15 +952,15 @@ hypre_GenerateMultiPi( hypre_ParCSRMatrix *A, HYPRE_BigInt big_Fpts; big_Fpts = num_points; - hypre_MPI_Scan(&big_Fpts, f_pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_Fpts, f_pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); f_pts_starts[0] = f_pts_starts[1] - big_Fpts; if (my_id == num_procs - 1) { total_global_fpts = f_pts_starts[1]; total_global_cpts = c_pts_starts[1]; } - hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } else { diff --git a/src/parcsr_ls/par_multi_interp.c b/src/parcsr_ls/par_multi_interp.c index 45dca9abdd..c0f7007868 100644 --- a/src/parcsr_ls/par_multi_interp.c +++ b/src/parcsr_ls/par_multi_interp.c @@ -204,11 +204,12 @@ hypre_BoomerAMGBuildMultipassHost( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; /* total_global_cpts = 0; */ if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); if (!comm_pkg) { @@ -518,7 +519,7 @@ hypre_BoomerAMGBuildMultipassHost( hypre_ParCSRMatrix *A, pass = 2; local_pass_array_size = (HYPRE_BigInt)(pass_array_size - cnt); hypre_MPI_Allreduce(&local_pass_array_size, &global_pass_array_size, 1, HYPRE_MPI_BIG_INT, - hypre_MPI_SUM, comm); + hypre_MPI_SUM, hcomm); while (global_pass_array_size && pass < max_num_passes) { for (i = pass_array_size - 1; i > cnt - 1; i--) @@ -559,7 +560,7 @@ hypre_BoomerAMGBuildMultipassHost( hypre_ParCSRMatrix *A, local_pass_array_size = (HYPRE_BigInt)(pass_array_size - cnt); hypre_MPI_Allreduce(&local_pass_array_size, &global_pass_array_size, 1, HYPRE_MPI_BIG_INT, - hypre_MPI_SUM, comm); + hypre_MPI_SUM, hcomm); index = 0; for (i = 0; i < num_sends; i++) { diff --git a/src/parcsr_ls/par_rap_communication.c b/src/parcsr_ls/par_rap_communication.c index 073ad96654..89345e5049 100644 --- a/src/parcsr_ls/par_rap_communication.c +++ b/src/parcsr_ls/par_rap_communication.c @@ -62,6 +62,7 @@ hypre_GetCommPkgRTFromCommPkgA( hypre_ParCSRMatrix *RT, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*-------------------------------------------------------------------------- * determine num_recvs, recv_procs and recv_vec_starts for RT @@ -177,11 +178,11 @@ hypre_GetCommPkgRTFromCommPkgA( hypre_ParCSRMatrix *RT, j = 0; for (i = 0; i < num_sends_A; i++) - hypre_MPI_Irecv(&change_array[i], 1, HYPRE_MPI_INT, send_procs_A[i], 0, comm, + hypre_MPI_Irecv(&change_array[i], 1, HYPRE_MPI_INT, send_procs_A[i], 0, hcomm, &requests[j++]); for (i = 0; i < num_recvs_A; i++) - hypre_MPI_Isend(&proc_mark[i], 1, HYPRE_MPI_INT, recv_procs_A[i], 0, comm, + hypre_MPI_Isend(&proc_mark[i], 1, HYPRE_MPI_INT, recv_procs_A[i], 0, hcomm, &requests[j++]); hypre_MPI_Waitall(num_requests, requests, status); @@ -227,7 +228,7 @@ hypre_GetCommPkgRTFromCommPkgA( hypre_ParCSRMatrix *RT, vec_start = send_map_starts_RT[i]; vec_len = send_map_starts_RT[i + 1] - vec_start; hypre_MPI_Irecv(&send_big_elmts[vec_start], vec_len, HYPRE_MPI_BIG_INT, - send_procs_RT[i], 0, comm, &requests[j++]); + send_procs_RT[i], 0, hcomm, &requests[j++]); } for (i = 0; i < num_recvs_RT; i++) @@ -235,7 +236,7 @@ hypre_GetCommPkgRTFromCommPkgA( hypre_ParCSRMatrix *RT, vec_start = recv_vec_starts_RT[i]; vec_len = recv_vec_starts_RT[i + 1] - vec_start; hypre_MPI_Isend(&col_map_offd_RT[vec_start], vec_len, HYPRE_MPI_BIG_INT, - recv_procs_RT[i], 0, comm, &requests[j++]); + recv_procs_RT[i], 0, hcomm, &requests[j++]); } hypre_MPI_Waitall(j, requests, status); @@ -278,6 +279,7 @@ hypre_GenerateSendMapAndCommPkg(MPI_Comm comm, HYPRE_Int num_sends, HYPRE_Int nu HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_BigInt first_col_diag = hypre_ParCSRMatrixFirstColDiag(A); HYPRE_BigInt *send_big_elmts = NULL; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*-------------------------------------------------------------------------- * generate send_map_starts and send_map_elmts @@ -289,14 +291,14 @@ hypre_GenerateSendMapAndCommPkg(MPI_Comm comm, HYPRE_Int num_sends, HYPRE_Int nu j = 0; for (i = 0; i < num_sends; i++) { - hypre_MPI_Irecv(&send_map_starts[i + 1], 1, HYPRE_MPI_INT, send_procs[i], 0, comm, + hypre_MPI_Irecv(&send_map_starts[i + 1], 1, HYPRE_MPI_INT, send_procs[i], 0, hcomm, &requests[j++]); } for (i = 0; i < num_recvs; i++) { vec_len = recv_vec_starts[i + 1] - recv_vec_starts[i]; - hypre_MPI_Isend(&vec_len, 1, HYPRE_MPI_INT, recv_procs[i], 0, comm, &requests[j++]); + hypre_MPI_Isend(&vec_len, 1, HYPRE_MPI_INT, recv_procs[i], 0, hcomm, &requests[j++]); } hypre_MPI_Waitall(j, requests, status); @@ -316,7 +318,7 @@ hypre_GenerateSendMapAndCommPkg(MPI_Comm comm, HYPRE_Int num_sends, HYPRE_Int nu vec_start = send_map_starts[i]; vec_len = send_map_starts[i + 1] - vec_start; hypre_MPI_Irecv(&send_big_elmts[vec_start], vec_len, HYPRE_MPI_BIG_INT, - send_procs[i], 0, comm, &requests[j++]); + send_procs[i], 0, hcomm, &requests[j++]); } for (i = 0; i < num_recvs; i++) @@ -324,7 +326,7 @@ hypre_GenerateSendMapAndCommPkg(MPI_Comm comm, HYPRE_Int num_sends, HYPRE_Int nu vec_start = recv_vec_starts[i]; vec_len = recv_vec_starts[i + 1] - vec_start; hypre_MPI_Isend(&col_map_offd[vec_start], vec_len, HYPRE_MPI_BIG_INT, - recv_procs[i], 0, comm, &requests[j++]); + recv_procs[i], 0, hcomm, &requests[j++]); } hypre_MPI_Waitall(j, requests, status); diff --git a/src/parcsr_ls/par_relax.c b/src/parcsr_ls/par_relax.c index e457a1463c..438df84a1a 100644 --- a/src/parcsr_ls/par_relax.c +++ b/src/parcsr_ls/par_relax.c @@ -414,6 +414,7 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (num_procs > 1) { @@ -449,7 +450,7 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A, v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)]; } hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0, - comm, &requests[jr++]); + hcomm, &requests[jr++]); } } hypre_MPI_Waitall(jr, requests, status); @@ -465,7 +466,7 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A, vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0, - comm, &requests[jr++]); + hcomm, &requests[jr++]); } hypre_MPI_Waitall(jr, requests, status); } @@ -558,6 +559,7 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (num_procs > 1) { @@ -612,7 +614,7 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A, v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)]; } hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0, - comm, &requests[jr++]); + hcomm, &requests[jr++]); } } hypre_MPI_Waitall(jr, requests, status); @@ -628,7 +630,7 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A, vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0, - comm, &requests[jr++]); + hcomm, &requests[jr++]); } hypre_MPI_Waitall(jr, requests, status); } diff --git a/src/parcsr_ls/par_relax_more.c b/src/parcsr_ls/par_relax_more.c index a19ca1bd9d..0e415d1e81 100644 --- a/src/parcsr_ls/par_relax_more.c +++ b/src/parcsr_ls/par_relax_more.c @@ -48,6 +48,9 @@ hypre_ParCSRMaxEigEstimateHost( hypre_ParCSRMatrix *A, /* matrix to relax HYPRE_Real e_max, e_min; HYPRE_Real send_buf[2], recv_buf[2]; + MPI_Comm comm = hypre_ParCSRMatrixComm(A); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A); if (scale > 1) @@ -102,7 +105,7 @@ hypre_ParCSRMaxEigEstimateHost( hypre_ParCSRMatrix *A, /* matrix to relax /* get e_min e_max across procs */ hypre_MPI_Allreduce(send_buf, recv_buf, 2, HYPRE_MPI_REAL, hypre_MPI_MAX, - hypre_ParCSRMatrixComm(A)); + hcomm); e_min = -recv_buf[0]; e_max = recv_buf[1]; diff --git a/src/parcsr_ls/par_restr.c b/src/parcsr_ls/par_restr.c index 22263a7d28..6b833aabe0 100644 --- a/src/parcsr_ls/par_restr.c +++ b/src/parcsr_ls/par_restr.c @@ -112,6 +112,7 @@ hypre_BoomerAMGBuildRestrAIR( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*-------------- global number of C points and my start position */ /*my_first_cpt = num_cpts_global[0];*/ @@ -119,7 +120,7 @@ hypre_BoomerAMGBuildRestrAIR( hypre_ParCSRMatrix *A, { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns diff --git a/src/parcsr_ls/par_scaled_matnorm.c b/src/parcsr_ls/par_scaled_matnorm.c index d4e24e569b..b8af4a944a 100644 --- a/src/parcsr_ls/par_scaled_matnorm.c +++ b/src/parcsr_ls/par_scaled_matnorm.c @@ -23,6 +23,7 @@ hypre_ParCSRMatrixScaledNorm( hypre_ParCSRMatrix *A, HYPRE_Real *scnorm) hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); MPI_Comm comm = hypre_ParCSRMatrixComm(A); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); HYPRE_Int *diag_i = hypre_CSRMatrixI(diag); HYPRE_Int *diag_j = hypre_CSRMatrixJ(diag); @@ -116,7 +117,7 @@ hypre_ParCSRMatrixScaledNorm( hypre_ParCSRMatrix *A, HYPRE_Real *scnorm) } } - hypre_MPI_Allreduce(&max_row_sum, &mat_norm, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, comm); + hypre_MPI_Allreduce(&max_row_sum, &mat_norm, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hcomm); hypre_ParVectorDestroy(dinvsqrt); hypre_SeqVectorDestroy(sum); diff --git a/src/parcsr_ls/par_stats.c b/src/parcsr_ls/par_stats.c index ef0d69adf7..b5c0c77d17 100644 --- a/src/parcsr_ls/par_stats.c +++ b/src/parcsr_ls/par_stats.c @@ -154,6 +154,7 @@ hypre_BoomerAMGSetupStats( void *amg_vdata, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); A_array = hypre_ParAMGDataAArray(amg_data); @@ -655,7 +656,7 @@ hypre_BoomerAMGSetupStats( void *amg_vdata, send_buff[2] = - min_rowsum; send_buff[3] = max_rowsum; - hypre_MPI_Reduce(send_buff, gather_buff, 4, HYPRE_MPI_REAL, hypre_MPI_MAX, 0, comm); + hypre_MPI_Reduce(send_buff, gather_buff, 4, HYPRE_MPI_REAL, hypre_MPI_MAX, 0, hcomm); if (my_id == 0) { @@ -951,7 +952,7 @@ hypre_BoomerAMGSetupStats( void *amg_vdata, send_buff[4] = - min_weight; send_buff[5] = max_weight; - hypre_MPI_Reduce(send_buff, gather_buff, 6, HYPRE_MPI_REAL, hypre_MPI_MAX, 0, comm); + hypre_MPI_Reduce(send_buff, gather_buff, 6, HYPRE_MPI_REAL, hypre_MPI_MAX, 0, hcomm); if (my_id == 0) { diff --git a/src/parcsr_ls/par_strength.c b/src/parcsr_ls/par_strength.c index dc667cdfef..3d40f34470 100644 --- a/src/parcsr_ls/par_strength.c +++ b/src/parcsr_ls/par_strength.c @@ -1539,6 +1539,7 @@ hypre_BoomerAMGCreateSCommPkg(hypre_ParCSRMatrix *A, HYPRE_Int **col_offd_S_to_A_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Status *status; hypre_MPI_Request *requests; hypre_ParCSRCommPkg *comm_pkg_A = hypre_ParCSRMatrixCommPkg(A); @@ -1689,12 +1690,12 @@ hypre_BoomerAMGCreateSCommPkg(hypre_ParCSRMatrix *A, j = 0; for (i = 0; i < num_sends_A; i++) { - hypre_MPI_Irecv(&send_change[i], 1, HYPRE_MPI_INT, send_procs_A[i], 0, comm, &requests[j++]); + hypre_MPI_Irecv(&send_change[i], 1, HYPRE_MPI_INT, send_procs_A[i], 0, hcomm, &requests[j++]); } for (i = 0; i < num_recvs_A; i++) { - hypre_MPI_Isend(&recv_change[i], 1, HYPRE_MPI_INT, recv_procs_A[i], 0, comm, &requests[j++]); + hypre_MPI_Isend(&recv_change[i], 1, HYPRE_MPI_INT, recv_procs_A[i], 0, hcomm, &requests[j++]); } status = hypre_CTAlloc(hypre_MPI_Status, j, HYPRE_MEMORY_HOST); @@ -1890,11 +1891,12 @@ hypre_BoomerAMGCreate2ndSHost( hypre_ParCSRMatrix *S, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = coarse_row_starts[0]; my_last_cpt = coarse_row_starts[1] - 1; if (my_id == (num_procs - 1)) { global_num_coarse = coarse_row_starts[1]; } - hypre_MPI_Bcast(&global_num_coarse, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&global_num_coarse, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); if (num_cols_offd_S) { diff --git a/src/parcsr_ls/par_sv_interp.c b/src/parcsr_ls/par_sv_interp.c index 9037e2cb7e..22d0cece8a 100644 --- a/src/parcsr_ls/par_sv_interp.c +++ b/src/parcsr_ls/par_sv_interp.c @@ -355,6 +355,7 @@ hypre_BoomerAMG_GMExpandInterp( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &myid); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); #if SV_DEBUG { @@ -1407,7 +1408,7 @@ hypre_BoomerAMG_GMExpandInterp( hypre_ParCSRMatrix *A, new_col_starts[1] = (col_starts[1] / (HYPRE_BigInt)num_functions) * (HYPRE_BigInt)new_nf; if (myid == (num_procs - 1)) { g_nc = new_col_starts[1]; } - hypre_MPI_Bcast(&g_nc, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&g_nc, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } else /* not first level */ { diff --git a/src/parcsr_ls/par_sv_interp_ln.c b/src/parcsr_ls/par_sv_interp_ln.c index 3989378a14..e06255fde1 100644 --- a/src/parcsr_ls/par_sv_interp_ln.c +++ b/src/parcsr_ls/par_sv_interp_ln.c @@ -223,6 +223,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &myid); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); #if SV_DEBUG @@ -2498,7 +2499,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, { g_nc = new_col_starts[1]; } - hypre_MPI_Bcast(&g_nc, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&g_nc, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } else /* not first level */ { diff --git a/src/parcsr_ls/partial.c b/src/parcsr_ls/partial.c index f7bd7dfcfe..c7482d3279 100644 --- a/src/parcsr_ls/partial.c +++ b/src/parcsr_ls/partial.c @@ -132,6 +132,7 @@ hypre_BoomerAMGBuildPartialExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_mark /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); max_num_threads = hypre_NumThreads(); my_first_cpt = num_cpts_global[0]; @@ -143,8 +144,8 @@ hypre_BoomerAMGBuildPartialExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_mark total_global_cpts = num_cpts_global[1]; total_old_global_cpts = num_old_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); - hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); if (!comm_pkg) { @@ -997,6 +998,7 @@ hypre_BoomerAMGBuildPartialStdInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; /*my_first_old_cpt = num_old_cpts_global[0];*/ @@ -1008,8 +1010,8 @@ hypre_BoomerAMGBuildPartialStdInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker total_global_cpts = num_cpts_global[1]; total_old_global_cpts = num_old_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); - hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); if (!comm_pkg) { @@ -2001,6 +2003,7 @@ hypre_BoomerAMGBuildPartialExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; /*my_first_old_cpt = num_old_cpts_global[0];*/ @@ -2011,8 +2014,8 @@ hypre_BoomerAMGBuildPartialExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker total_global_cpts = num_cpts_global[1]; total_old_global_cpts = num_old_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); - hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); if (!comm_pkg) { diff --git a/src/parcsr_mv/HYPRE_parcsr_matrix.c b/src/parcsr_mv/HYPRE_parcsr_matrix.c index 1595425dba..bd04b4a699 100644 --- a/src/parcsr_mv/HYPRE_parcsr_matrix.c +++ b/src/parcsr_mv/HYPRE_parcsr_matrix.c @@ -147,8 +147,8 @@ HYPRE_ParCSRMatrixGetRowPartitioning( HYPRE_ParCSRMatrix matrix, return hypre_error_flag; } - hypre_MPI_Comm_size(hypre_ParCSRMatrixComm((hypre_ParCSRMatrix *) matrix), - &num_procs); + MPI_Comm comm = hypre_ParCSRMatrixComm((hypre_ParCSRMatrix *) matrix); + hypre_MPI_Comm_size(comm, &num_procs); row_starts = hypre_ParCSRMatrixRowStarts((hypre_ParCSRMatrix *) matrix); if (!row_starts) { return -1; } row_partitioning = hypre_CTAlloc(HYPRE_BigInt, num_procs + 1, HYPRE_MEMORY_HOST); @@ -169,7 +169,6 @@ HYPRE_ParCSRMatrixGetGlobalRowPartitioning( HYPRE_ParCSRMatrix matrix, HYPRE_Int all_procs, HYPRE_BigInt **row_partitioning_ptr ) { - MPI_Comm comm; HYPRE_Int my_id; HYPRE_BigInt *row_partitioning = NULL; @@ -179,7 +178,8 @@ HYPRE_ParCSRMatrixGetGlobalRowPartitioning( HYPRE_ParCSRMatrix matrix, return hypre_error_flag; } - comm = hypre_ParCSRMatrixComm((hypre_ParCSRMatrix *) matrix); + MPI_Comm comm = hypre_ParCSRMatrixComm((hypre_ParCSRMatrix *) matrix); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_rank(comm, &my_id); HYPRE_Int num_procs; @@ -195,12 +195,12 @@ HYPRE_ParCSRMatrixGetGlobalRowPartitioning( HYPRE_ParCSRMatrix matrix, if (all_procs) { hypre_MPI_Allgather(&row_start, 1, HYPRE_MPI_BIG_INT, row_partitioning, - 1, HYPRE_MPI_BIG_INT, comm); + 1, HYPRE_MPI_BIG_INT, hcomm); } else { hypre_MPI_Gather(&row_start, 1, HYPRE_MPI_BIG_INT, row_partitioning, - 1, HYPRE_MPI_BIG_INT, 0, comm); + 1, HYPRE_MPI_BIG_INT, 0, hcomm); } if (my_id == 0 || all_procs) @@ -230,8 +230,8 @@ HYPRE_ParCSRMatrixGetColPartitioning( HYPRE_ParCSRMatrix matrix, return hypre_error_flag; } - hypre_MPI_Comm_size(hypre_ParCSRMatrixComm((hypre_ParCSRMatrix *) matrix), - &num_procs); + MPI_Comm comm = hypre_ParCSRMatrixComm((hypre_ParCSRMatrix *) matrix); + hypre_MPI_Comm_size(comm, &num_procs); col_starts = hypre_ParCSRMatrixColStarts((hypre_ParCSRMatrix *) matrix); if (!col_starts) { return -1; } col_partitioning = hypre_CTAlloc(HYPRE_BigInt, num_procs + 1, HYPRE_MEMORY_HOST); diff --git a/src/parcsr_mv/communicationT.c b/src/parcsr_mv/communicationT.c index d5244a5a6a..762eeeb0d5 100644 --- a/src/parcsr_mv/communicationT.c +++ b/src/parcsr_mv/communicationT.c @@ -167,6 +167,7 @@ hypre_MatTCommPkgCreate_core ( HYPRE_BigInt col, kc; HYPRE_Int * recv_sz_buf; HYPRE_Int * row_marker; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); @@ -234,7 +235,7 @@ hypre_MatTCommPkgCreate_core ( num_recvs = num_procs - 1; local_info = num_procs + num_cols_offd + num_cols_diag; - hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); + hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hcomm); /* ---------------------------------------------------------------------- * generate information to be send: tmp contains for each recv_proc: @@ -274,7 +275,7 @@ hypre_MatTCommPkgCreate_core ( hypre_MPI_Allgatherv(tmp, local_info, HYPRE_MPI_BIG_INT, recv_buf, info, displs, HYPRE_MPI_BIG_INT, - comm); + hcomm); /* ---------------------------------------------------------------------- * determine send_procs and actual elements to be send (in send_map_elmts) @@ -423,7 +424,7 @@ hypre_MatTCommPkgCreate_core ( /* scatter-gather num_sends, to set up the size for the main comm. step */ i = 3 * num_sends; - hypre_MPI_Allgather( &i, 1, HYPRE_MPI_INT, all_num_sends3, 1, HYPRE_MPI_INT, comm ); + hypre_MPI_Allgather( &i, 1, HYPRE_MPI_INT, all_num_sends3, 1, HYPRE_MPI_INT, hcomm ); displs[0] = 0; for ( p = 0; p < num_procs; ++p ) { @@ -442,7 +443,7 @@ hypre_MatTCommPkgCreate_core ( }; hypre_MPI_Allgatherv( send_buf, 3 * num_sends, HYPRE_MPI_INT, - recv_sz_buf, all_num_sends3, displs, HYPRE_MPI_INT, comm); + recv_sz_buf, all_num_sends3, displs, HYPRE_MPI_INT, hcomm); recv_vec_starts[0] = 0; j2 = 0; j = 0; diff --git a/src/parcsr_mv/gen_fffc.c b/src/parcsr_mv/gen_fffc.c index 4526813bc9..8fc8ff6520 100644 --- a/src/parcsr_mv/gen_fffc.c +++ b/src/parcsr_mv/gen_fffc.c @@ -22,7 +22,8 @@ hypre_ParCSRMatrixGenerateFFFCHost( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **A_FC_ptr, hypre_ParCSRMatrix **A_FF_ptr) { - MPI_Comm comm = hypre_ParCSRMatrixComm(A); + MPI_Comm comm = hypre_ParCSRMatrixComm(A); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A); if (!hypre_ParCSRMatrixCommPkg(A)) { @@ -170,15 +171,15 @@ hypre_ParCSRMatrixGenerateFFFCHost( hypre_ParCSRMatrix *A, n_Fpts = fpt_array[num_threads]; big_Fpts = n_Fpts; - hypre_MPI_Scan(&big_Fpts, fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_Fpts, fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); fpts_starts[0] = fpts_starts[1] - big_Fpts; if (my_id == num_procs - 1) { total_global_fpts = fpts_starts[1]; total_global_cpts = cpts_starts[1]; } - hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } #ifdef HYPRE_USING_OPENMP #pragma omp barrier @@ -531,7 +532,8 @@ hypre_ParCSRMatrixGenerateFFFC3( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **A_FC_ptr, hypre_ParCSRMatrix **A_FF_ptr) { - MPI_Comm comm = hypre_ParCSRMatrixComm(A); + MPI_Comm comm = hypre_ParCSRMatrixComm(A); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_ParCSRCommHandle *comm_handle; @@ -687,8 +689,8 @@ hypre_ParCSRMatrixGenerateFFFC3( hypre_ParCSRMatrix *A, big_Fpts = n_Fpts; big_new_Fpts = n_new_Fpts; - hypre_MPI_Scan(&big_Fpts, fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); - hypre_MPI_Scan(&big_new_Fpts, new_fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_Fpts, fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_new_Fpts, new_fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); fpts_starts[0] = fpts_starts[1] - big_Fpts; new_fpts_starts[0] = new_fpts_starts[1] - big_new_Fpts; if (my_id == num_procs - 1) @@ -697,9 +699,9 @@ hypre_ParCSRMatrixGenerateFFFC3( hypre_ParCSRMatrix *A, total_global_fpts = fpts_starts[1]; total_global_cpts = cpts_starts[1]; } - hypre_MPI_Bcast(&total_global_new_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); - hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_new_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } #ifdef HYPRE_USING_OPENMP #pragma omp barrier @@ -1072,7 +1074,8 @@ hypre_ParCSRMatrixGenerateFFFCD3( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **A_FF_ptr, HYPRE_Real **D_lambda_ptr) { - MPI_Comm comm = hypre_ParCSRMatrixComm(A); + MPI_Comm comm = hypre_ParCSRMatrixComm(A); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_ParCSRCommHandle *comm_handle; @@ -1227,9 +1230,9 @@ hypre_ParCSRMatrixGenerateFFFCD3( hypre_ParCSRMatrix *A, big_Fpts = n_Fpts; big_new_Fpts = n_new_Fpts; - hypre_MPI_Scan(&big_Fpts, fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_Fpts, fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); hypre_MPI_Scan(&big_new_Fpts, new_fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, - hypre_MPI_SUM, comm); + hypre_MPI_SUM, hcomm); fpts_starts[0] = fpts_starts[1] - big_Fpts; new_fpts_starts[0] = new_fpts_starts[1] - big_new_Fpts; if (my_id == num_procs - 1) @@ -1238,9 +1241,9 @@ hypre_ParCSRMatrixGenerateFFFCD3( hypre_ParCSRMatrix *A, total_global_fpts = fpts_starts[1]; total_global_cpts = cpts_starts[1]; } - hypre_MPI_Bcast(&total_global_new_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); - hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_new_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } #ifdef HYPRE_USING_OPENMP #pragma omp barrier diff --git a/src/parcsr_mv/par_csr_assumed_part.c b/src/parcsr_mv/par_csr_assumed_part.c index 7356168d2a..c5a06aaf0b 100644 --- a/src/parcsr_mv/par_csr_assumed_part.c +++ b/src/parcsr_mv/par_csr_assumed_part.c @@ -49,6 +49,7 @@ hypre_LocateAssumedPartition(MPI_Comm comm, HYPRE_BigInt row_start, HYPRE_BigInt hypre_MPI_Request *requests; hypre_MPI_Status status0, *statuses; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_ANNOTATE_FUNC_BEGIN; @@ -179,7 +180,7 @@ hypre_LocateAssumedPartition(MPI_Comm comm, HYPRE_BigInt row_start, HYPRE_BigInt for (i = 0; i < contact_list_length; i++) { hypre_MPI_Isend(&CONTACT(i, 1), 2, HYPRE_MPI_BIG_INT, CONTACT(i, 0), flag1, - comm, &requests[i]); + hcomm, &requests[i]); /*hypre_MPI_COMM_WORLD, &requests[i]);*/ } @@ -261,7 +262,7 @@ hypre_LocateAssumedPartition(MPI_Comm comm, HYPRE_BigInt row_start, HYPRE_BigInt while (rows_found != locate_row_count) { hypre_MPI_Recv( tmp_range, 2, HYPRE_MPI_BIG_INT, hypre_MPI_ANY_SOURCE, - flag1, comm, &status0); + flag1, hcomm, &status0); /*flag1 , hypre_MPI_COMM_WORLD, &status0);*/ if (part->length == part->storage_length) diff --git a/src/parcsr_mv/par_csr_bool_matrix.c b/src/parcsr_mv/par_csr_bool_matrix.c index 1f56344b0a..36c05cbd65 100644 --- a/src/parcsr_mv/par_csr_bool_matrix.c +++ b/src/parcsr_mv/par_csr_bool_matrix.c @@ -351,6 +351,7 @@ HYPRE_Int hypre_ParCSRBooleanMatrixInitialize( hypre_ParCSRBooleanMatrix *matrix HYPRE_Int hypre_ParCSRBooleanMatrixSetNNZ( hypre_ParCSRBooleanMatrix *matrix) { MPI_Comm comm = hypre_ParCSRBooleanMatrix_Get_Comm(matrix); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_CSRBooleanMatrix *diag = hypre_ParCSRBooleanMatrix_Get_Diag(matrix); HYPRE_Int *diag_i = hypre_CSRBooleanMatrix_Get_I(diag); hypre_CSRBooleanMatrix *offd = hypre_ParCSRBooleanMatrix_Get_Offd(matrix); @@ -362,7 +363,7 @@ HYPRE_Int hypre_ParCSRBooleanMatrixSetNNZ( hypre_ParCSRBooleanMatrix *matrix) local_num_nonzeros = diag_i[local_num_rows] + offd_i[local_num_rows]; hypre_MPI_Allreduce(&local_num_nonzeros, &total_num_nonzeros, 1, HYPRE_MPI_INT, - hypre_MPI_SUM, comm); + hypre_MPI_SUM, hcomm); hypre_ParCSRBooleanMatrix_Get_NNZ(matrix) = total_num_nonzeros; return ierr; } @@ -646,7 +647,8 @@ HYPRE_Int hypre_ParCSRBooleanMatrixGetLocalRange(hypre_ParCSRBooleanMatrix *matr HYPRE_Int ierr = 0; HYPRE_Int my_id; - hypre_MPI_Comm_rank( hypre_ParCSRBooleanMatrix_Get_Comm(matrix), &my_id ); + MPI_Comm comm = hypre_ParCSRBooleanMatrix_Get_Comm(matrix); + hypre_MPI_Comm_rank(comm, &my_id ); *row_start = hypre_ParCSRBooleanMatrix_Get_RowStarts(matrix)[ my_id ]; *row_end = hypre_ParCSRBooleanMatrix_Get_RowStarts(matrix)[ my_id + 1 ] - 1; @@ -688,7 +690,8 @@ HYPRE_Int hypre_ParCSRBooleanMatrixGetRow(hypre_ParCSRBooleanMatrix *mat, if (hypre_ParCSRBooleanMatrix_Get_Getrowactive(mat)) { return (-1); } - hypre_MPI_Comm_rank( hypre_ParCSRBooleanMatrix_Get_Comm(mat), &my_id ); + MPI_Comm comm = hypre_ParCSRBooleanMatrix_Get_Comm(mat); + hypre_MPI_Comm_rank(comm, &my_id); hypre_ParCSRBooleanMatrix_Get_Getrowactive(mat) = 1; @@ -841,6 +844,7 @@ hypre_CSRBooleanMatrixToParCSRBooleanMatrix HYPRE_Int i, j, ind; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); @@ -851,7 +855,7 @@ hypre_CSRBooleanMatrixToParCSRBooleanMatrix a_i = hypre_CSRBooleanMatrix_Get_I(A); a_j = hypre_CSRBooleanMatrix_Get_J(A); } - hypre_MPI_Bcast(global_data, 2, HYPRE_MPI_BIG_INT, 0, comm); + hypre_MPI_Bcast(global_data, 2, HYPRE_MPI_BIG_INT, 0, hcomm); global_num_rows = global_data[0]; global_num_cols = global_data[1]; @@ -878,7 +882,7 @@ hypre_CSRBooleanMatrixToParCSRBooleanMatrix local_num_nonzeros[num_procs - 1] = a_i[(HYPRE_Int)global_num_rows] - a_i[(HYPRE_Int)row_starts[num_procs - 1]]; } - hypre_MPI_Scatter(local_num_nonzeros, 1, HYPRE_MPI_INT, &num_nonzeros, 1, HYPRE_MPI_INT, 0, comm); + hypre_MPI_Scatter(local_num_nonzeros, 1, HYPRE_MPI_INT, &num_nonzeros, 1, HYPRE_MPI_INT, 0, hcomm); if (my_id == 0) { num_nonzeros = local_num_nonzeros[0]; } @@ -897,7 +901,7 @@ hypre_CSRBooleanMatrixToParCSRBooleanMatrix &a_i[(HYPRE_Int)row_starts[i]], &a_j[ind], &csr_matrix_datatypes[i]); - hypre_MPI_Isend(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[i], i, 0, comm, + hypre_MPI_Isend(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[i], i, 0, hcomm, &requests[j++]); hypre_MPI_Type_free(&csr_matrix_datatypes[i]); } @@ -916,7 +920,7 @@ hypre_CSRBooleanMatrixToParCSRBooleanMatrix hypre_CSRBooleanMatrix_Get_I(local_A), hypre_CSRBooleanMatrix_Get_J(local_A), csr_matrix_datatypes); - hypre_MPI_Recv(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[0], 0, 0, comm, &status0); + hypre_MPI_Recv(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[0], 0, 0, hcomm, &status0); hypre_MPI_Type_free(csr_matrix_datatypes); } diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 70a355f1ba..75db057537 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -367,6 +367,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_Int num_send_bytes = 0; HYPRE_Int num_recv_bytes = 0; hypre_ParCSRCommHandle *comm_handle; @@ -490,7 +491,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Irecv(&d_recv_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, &requests[j++]); + ip, 0, hcomm, &requests[j++]); } for (i = 0; i < num_sends; i++) { @@ -498,7 +499,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Isend(&d_send_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, &requests[j++]); + ip, 0, hcomm, &requests[j++]); } break; } @@ -512,7 +513,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Irecv(&d_recv_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, &requests[j++]); + ip, 0, hcomm, &requests[j++]); } for (i = 0; i < num_recvs; i++) { @@ -520,7 +521,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Isend(&d_send_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, &requests[j++]); + ip, 0, hcomm, &requests[j++]); } break; } @@ -534,7 +535,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_INT, - ip, 0, comm, &requests[j++]); + ip, 0, hcomm, &requests[j++]); } for (i = 0; i < num_sends; i++) { @@ -542,7 +543,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_INT, - ip, 0, comm, &requests[j++]); + ip, 0, hcomm, &requests[j++]); } break; } @@ -556,7 +557,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_INT, - ip, 0, comm, &requests[j++]); + ip, 0, hcomm, &requests[j++]); } for (i = 0; i < num_recvs; i++) { @@ -564,7 +565,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_INT, - ip, 0, comm, &requests[j++]); + ip, 0, hcomm, &requests[j++]); } break; } @@ -578,7 +579,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, comm, &requests[j++]); + ip, 0, hcomm, &requests[j++]); } for (i = 0; i < num_sends; i++) { @@ -586,7 +587,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, comm, &requests[j++]); + ip, 0, hcomm, &requests[j++]); } break; } @@ -600,7 +601,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, comm, &requests[j++]); + ip, 0, hcomm, &requests[j++]); } for (i = 0; i < num_recvs; i++) { @@ -608,7 +609,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, comm, &requests[j++]); + ip, 0, hcomm, &requests[j++]); } break; } @@ -742,6 +743,7 @@ hypre_ParCSRCommPkgCreate_core( hypre_MPI_Request *requests; hypre_MPI_Status *status; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); @@ -802,7 +804,7 @@ hypre_ParCSRCommPkgCreate_core( local_info = 2 * num_recvs; - hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); + hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hcomm); /* ---------------------------------------------------------------------- * generate information to be sent: tmp contains for each recv_proc: @@ -837,7 +839,7 @@ hypre_ParCSRCommPkgCreate_core( } hypre_MPI_Allgatherv(tmp, local_info, HYPRE_MPI_INT, recv_buf, info, - displs, HYPRE_MPI_INT, comm); + displs, HYPRE_MPI_INT, hcomm); /* ---------------------------------------------------------------------- * determine num_sends and number of elements to be sent @@ -898,7 +900,7 @@ hypre_ParCSRCommPkgCreate_core( vec_len = send_map_starts[i + 1] - vec_start; ip = send_procs[i]; hypre_MPI_Irecv(&big_buf_data[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, comm, &requests[j++]); + ip, 0, hcomm, &requests[j++]); } for (i = 0; i < num_recvs; i++) { @@ -906,7 +908,7 @@ hypre_ParCSRCommPkgCreate_core( vec_len = recv_vec_starts[i + 1] - vec_start; ip = recv_procs[i]; hypre_MPI_Isend(&col_map_offd[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, comm, &requests[j++]); + ip, 0, hcomm, &requests[j++]); } if (num_requests) diff --git a/src/parcsr_mv/par_csr_matop.c b/src/parcsr_mv/par_csr_matop.c index 95296c848b..6586b83dfb 100644 --- a/src/parcsr_mv/par_csr_matop.c +++ b/src/parcsr_mv/par_csr_matop.c @@ -2344,6 +2344,7 @@ hypre_ParCSRMatrixGenSpanningTree( hypre_ParCSRMatrix *G_csr, /* fetch the communication information from */ comm = hypre_ParCSRMatrixComm(G_csr); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_rank(comm, &mypid); hypre_MPI_Comm_size(comm, &nprocs); comm_pkg = hypre_ParCSRMatrixCommPkg(G_csr); @@ -2384,7 +2385,7 @@ hypre_ParCSRMatrixGenSpanningTree( hypre_ParCSRMatrix *G_csr, pgraph_i = hypre_TAlloc(HYPRE_Int, (nprocs + 1), HYPRE_MEMORY_HOST); recv_cnts = hypre_TAlloc(HYPRE_Int, nprocs, HYPRE_MEMORY_HOST); hypre_MPI_Allgather(&n_proc_array, 1, HYPRE_MPI_INT, recv_cnts, 1, - HYPRE_MPI_INT, comm); + HYPRE_MPI_INT, hcomm); pgraph_i[0] = 0; for (i = 1; i <= nprocs; i++) { @@ -2392,7 +2393,7 @@ hypre_ParCSRMatrixGenSpanningTree( hypre_ParCSRMatrix *G_csr, } pgraph_j = hypre_TAlloc(HYPRE_Int, pgraph_i[nprocs], HYPRE_MEMORY_HOST); hypre_MPI_Allgatherv(proc_array, n_proc_array, HYPRE_MPI_INT, pgraph_j, - recv_cnts, pgraph_i, HYPRE_MPI_INT, comm); + recv_cnts, pgraph_i, HYPRE_MPI_INT, hcomm); hypre_TFree(recv_cnts, HYPRE_MEMORY_HOST); /* BFS on the processor graph to determine parent and children */ @@ -2549,6 +2550,7 @@ void hypre_ParCSRMatrixExtractSubmatrices( hypre_ParCSRMatrix *A_csr, A_diag_j = hypre_CSRMatrixJ(A_diag); A_diag_a = hypre_CSRMatrixData(A_diag); comm = hypre_ParCSRMatrixComm(A_csr); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_rank(comm, &mypid); hypre_MPI_Comm_size(comm, &nprocs); if (nprocs > 1) @@ -2564,7 +2566,7 @@ void hypre_ParCSRMatrixExtractSubmatrices( hypre_ParCSRMatrix *A_csr, proc_offsets1 = hypre_TAlloc(HYPRE_Int, (nprocs + 1), HYPRE_MEMORY_HOST); proc_offsets2 = hypre_TAlloc(HYPRE_Int, (nprocs + 1), HYPRE_MEMORY_HOST); hypre_MPI_Allgather(&nindices, 1, HYPRE_MPI_INT, proc_offsets1, 1, - HYPRE_MPI_INT, comm); + HYPRE_MPI_INT, hcomm); k = 0; for (i = 0; i < nprocs; i++) { @@ -2914,6 +2916,7 @@ void hypre_ParCSRMatrixExtractRowSubmatrices( hypre_ParCSRMatrix *A_csr, A_offd_i = hypre_CSRMatrixI(A_offd); A_offd_j = hypre_CSRMatrixJ(A_offd); comm = hypre_ParCSRMatrixComm(A_csr); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_rank(comm, &mypid); hypre_MPI_Comm_size(comm, &nprocs); @@ -2924,7 +2927,7 @@ void hypre_ParCSRMatrixExtractRowSubmatrices( hypre_ParCSRMatrix *A_csr, proc_offsets1 = hypre_TAlloc(HYPRE_Int, (nprocs + 1), HYPRE_MEMORY_HOST); proc_offsets2 = hypre_TAlloc(HYPRE_Int, (nprocs + 1), HYPRE_MEMORY_HOST); hypre_MPI_Allgather(&nindices, 1, HYPRE_MPI_INT, proc_offsets1, 1, - HYPRE_MPI_INT, comm); + HYPRE_MPI_INT, hcomm); k = 0; for (i = 0; i < nprocs; i++) { @@ -5424,13 +5427,14 @@ HYPRE_Real hypre_ParCSRMatrixFnorm( hypre_ParCSRMatrix *A ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_Real f_diag, f_offd, local_result, result; f_diag = hypre_CSRMatrixFnorm(hypre_ParCSRMatrixDiag(A)); f_offd = hypre_CSRMatrixFnorm(hypre_ParCSRMatrixOffd(A)); local_result = f_diag * f_diag + f_offd * f_offd; - hypre_MPI_Allreduce(&local_result, &result, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&local_result, &result, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); return hypre_sqrt(result); } @@ -5448,6 +5452,7 @@ hypre_ParCSRMatrixInfNorm( hypre_ParCSRMatrix *A, HYPRE_Real *norm ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* diag part of A */ hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); @@ -5518,7 +5523,7 @@ hypre_ParCSRMatrixInfNorm( hypre_ParCSRMatrix *A, } #endif - hypre_MPI_Allreduce(&maxsum, norm, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, comm); + hypre_MPI_Allreduce(&maxsum, norm, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hcomm); return hypre_error_flag; } @@ -5693,6 +5698,7 @@ hypre_ParCSRMatrixExtractSubmatrixFC( hypre_ParCSRMatrix *A, HYPRE_Real strength_thresh) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_ParCSRCommHandle *comm_handle; @@ -5750,7 +5756,7 @@ hypre_ParCSRMatrixExtractSubmatrixFC( hypre_ParCSRMatrix *A, { total_global_cpts = cpts_starts[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); nc_local = (HYPRE_Int)(cpts_starts[1] - cpts_starts[0]); } @@ -5766,13 +5772,13 @@ hypre_ParCSRMatrixExtractSubmatrixFC( hypre_ParCSRMatrix *A, } } big_nf_local = (HYPRE_BigInt) nf_local; - hypre_MPI_Scan(&big_nf_local, fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_nf_local, fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); fpts_starts[0] = fpts_starts[1] - nf_local; if (my_id == num_procs - 1) { total_global_fpts = fpts_starts[1]; } - hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } if (row_set == -1 && col_set == -1) diff --git a/src/parcsr_mv/par_csr_matrix.c b/src/parcsr_mv/par_csr_matrix.c index 0703b49f17..2500bf7d64 100644 --- a/src/parcsr_mv/par_csr_matrix.c +++ b/src/parcsr_mv/par_csr_matrix.c @@ -325,6 +325,7 @@ hypre_ParCSRMatrixSetNumNonzeros_core( hypre_ParCSRMatrix *matrix, const char* f } comm = hypre_ParCSRMatrixComm(matrix); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); diag = hypre_ParCSRMatrixDiag(matrix); offd = hypre_ParCSRMatrixOffd(matrix); @@ -341,7 +342,7 @@ hypre_ParCSRMatrixSetNumNonzeros_core( hypre_ParCSRMatrix *matrix, const char* f hypre_CSRMatrixNumNonzeros(offd) ); hypre_MPI_Allreduce(&local_num_nonzeros, &total_num_nonzeros, 1, HYPRE_MPI_BIG_INT, - hypre_MPI_SUM, comm); + hypre_MPI_SUM, hcomm); hypre_ParCSRMatrixNumNonzeros(matrix) = total_num_nonzeros; } @@ -353,7 +354,7 @@ hypre_ParCSRMatrixSetNumNonzeros_core( hypre_ParCSRMatrix *matrix, const char* f hypre_CSRMatrixNumNonzeros(offd) ); hypre_MPI_Allreduce(&local_num_nonzeros, &total_num_nonzeros, 1, - HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_ParCSRMatrixDNumNonzeros(matrix) = total_num_nonzeros; } @@ -393,6 +394,7 @@ HYPRE_Int hypre_ParCSRMatrixSetNumRownnz( hypre_ParCSRMatrix *matrix ) { MPI_Comm comm = hypre_ParCSRMatrixComm(matrix); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(matrix); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(matrix); HYPRE_Int *rownnz_diag = hypre_CSRMatrixRownnz(diag); @@ -427,7 +429,7 @@ hypre_ParCSRMatrixSetNumRownnz( hypre_ParCSRMatrix *matrix ) local_num_rownnz += (HYPRE_BigInt) ((num_rownnz_diag - i) + (num_rownnz_offd - j)); hypre_MPI_Allreduce(&local_num_rownnz, &global_num_rownnz, 1, - HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); hypre_ParCSRMatrixGlobalNumRownnz(matrix) = global_num_rownnz; @@ -1605,6 +1607,7 @@ hypre_CSRMatrixToParCSRMatrix( MPI_Comm comm, hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); total_size = 4; if (my_id == 0) @@ -1678,7 +1681,7 @@ hypre_CSRMatrixToParCSRMatrix( MPI_Comm comm, A_i = hypre_CSRMatrixI(A); A_j = hypre_CSRMatrixJ(A); } - hypre_MPI_Bcast(global_data, 3, HYPRE_MPI_BIG_INT, 0, comm); + hypre_MPI_Bcast(global_data, 3, HYPRE_MPI_BIG_INT, 0, hcomm); global_num_rows = global_data[0]; global_num_cols = global_data[1]; global_size = global_data[2]; @@ -1691,29 +1694,29 @@ hypre_CSRMatrixToParCSRMatrix( MPI_Comm comm, { send_start = 4; hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT, - &row_starts[0], 1, HYPRE_MPI_BIG_INT, 0, comm); + &row_starts[0], 1, HYPRE_MPI_BIG_INT, 0, hcomm); send_start = 5; hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT, - &row_starts[1], 1, HYPRE_MPI_BIG_INT, 0, comm); + &row_starts[1], 1, HYPRE_MPI_BIG_INT, 0, hcomm); send_start = 4 + (num_procs + 1); hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT, - &col_starts[0], 1, HYPRE_MPI_BIG_INT, 0, comm); + &col_starts[0], 1, HYPRE_MPI_BIG_INT, 0, hcomm); send_start = 5 + (num_procs + 1); hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT, - &col_starts[1], 1, HYPRE_MPI_BIG_INT, 0, comm); + &col_starts[1], 1, HYPRE_MPI_BIG_INT, 0, hcomm); } else if ((global_data[3] == 0) || (global_data[3] == 1)) { send_start = 4; hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT, - &row_starts[0], 1, HYPRE_MPI_BIG_INT, 0, comm); + &row_starts[0], 1, HYPRE_MPI_BIG_INT, 0, hcomm); send_start = 5; hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT, - &row_starts[1], 1, HYPRE_MPI_BIG_INT, 0, comm); + &row_starts[1], 1, HYPRE_MPI_BIG_INT, 0, hcomm); if (global_data[3] == 0) { @@ -1725,11 +1728,11 @@ hypre_CSRMatrixToParCSRMatrix( MPI_Comm comm, { send_start = 4; hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT, - &col_starts[0], 1, HYPRE_MPI_BIG_INT, 0, comm); + &col_starts[0], 1, HYPRE_MPI_BIG_INT, 0, hcomm); send_start = 5; hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT, - &col_starts[1], 1, HYPRE_MPI_BIG_INT, 0, comm); + &col_starts[1], 1, HYPRE_MPI_BIG_INT, 0, hcomm); } } hypre_TFree(global_data, HYPRE_MEMORY_HOST); @@ -1763,8 +1766,8 @@ hypre_CSRMatrixToParCSRMatrix( MPI_Comm comm, } //num_nonzeros_proc[num_procs-1] = A_i[(HYPRE_Int)global_num_rows] - A_i[(HYPRE_Int)row_starts[num_procs-1]]; } - hypre_MPI_Scatter(num_rows_proc, 1, HYPRE_MPI_INT, &num_rows, 1, HYPRE_MPI_INT, 0, comm); - hypre_MPI_Scatter(num_nonzeros_proc, 1, HYPRE_MPI_INT, &num_nonzeros, 1, HYPRE_MPI_INT, 0, comm); + hypre_MPI_Scatter(num_rows_proc, 1, HYPRE_MPI_INT, &num_rows, 1, HYPRE_MPI_INT, 0, hcomm); + hypre_MPI_Scatter(num_nonzeros_proc, 1, HYPRE_MPI_INT, &num_nonzeros, 1, HYPRE_MPI_INT, 0, hcomm); /* RL: this is not correct: (HYPRE_Int) global_num_cols */ local_A = hypre_CSRMatrixCreate(num_rows, (HYPRE_Int) global_num_cols, num_nonzeros); @@ -1784,7 +1787,7 @@ hypre_CSRMatrixToParCSRMatrix( MPI_Comm comm, &A_i[(HYPRE_Int) global_row_starts[i]], &A_j[ind], &csr_matrix_datatypes[i]); - hypre_MPI_Isend(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[i], i, 0, comm, + hypre_MPI_Isend(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[i], i, 0, hcomm, &requests[i - 1]); hypre_MPI_Type_free(&csr_matrix_datatypes[i]); } @@ -1818,7 +1821,7 @@ hypre_CSRMatrixToParCSRMatrix( MPI_Comm comm, hypre_CSRMatrixI(local_A), hypre_CSRMatrixJ(local_A), &csr_matrix_datatypes[0]); - hypre_MPI_Recv(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[0], 0, 0, comm, &status0); + hypre_MPI_Recv(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[0], 0, 0, hcomm, &status0); hypre_MPI_Type_free(csr_matrix_datatypes); } @@ -2161,6 +2164,7 @@ hypre_ParCSRMatrixToCSRMatrixAll_v2( hypre_ParCSRMatrix *par_matrix, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Clone input matrix to host memory */ par_temp = hypre_ParCSRMatrixClone_v2(par_matrix, 1, HYPRE_MEMORY_HOST); @@ -2227,11 +2231,11 @@ hypre_ParCSRMatrixToCSRMatrixAll_v2( hypre_ParCSRMatrix *par_matrix, if (local_num_rows) { /* look for a message from processor 0 */ - hypre_MPI_Probe(0, tag1, comm, &status1); + hypre_MPI_Probe(0, tag1, hcomm, &status1); hypre_MPI_Get_count(&status1, HYPRE_MPI_INT, &count); send_info = hypre_CTAlloc(HYPRE_Int, count, HYPRE_MEMORY_HOST); - hypre_MPI_Recv(send_info, count, HYPRE_MPI_INT, 0, tag1, comm, &status1); + hypre_MPI_Recv(send_info, count, HYPRE_MPI_INT, 0, tag1, hcomm, &status1); /* now unpack */ num_types = send_info[0]; @@ -2299,7 +2303,7 @@ hypre_ParCSRMatrixToCSRMatrixAll_v2( hypre_ParCSRMatrix *par_matrix, for (i = start; i < num_types; i++) { hypre_MPI_Isend(send_info, count, HYPRE_MPI_INT, used_procs[i], tag1, - comm, &requests[i - start]); + hcomm, &requests[i - start]); } hypre_MPI_Waitall(num_types - start, requests, status); @@ -2341,13 +2345,13 @@ hypre_ParCSRMatrixToCSRMatrixAll_v2( hypre_ParCSRMatrix *par_matrix, proc_id = used_procs[i]; vec_len = (HYPRE_Int)(new_vec_starts[i + 1] - new_vec_starts[i]); hypre_MPI_Irecv(&matrix_i[new_vec_starts[i] + 1], vec_len, HYPRE_MPI_INT, - proc_id, tag2, comm, &requests[j++]); + proc_id, tag2, hcomm, &requests[j++]); } for (i = 0; i < num_types; i++) { proc_id = used_procs[i]; hypre_MPI_Isend(&local_matrix_i[1], local_num_rows, HYPRE_MPI_INT, - proc_id, tag2, comm, &requests[j++]); + proc_id, tag2, hcomm, &requests[j++]); } hypre_MPI_Waitall(j, requests, status); @@ -2380,17 +2384,17 @@ hypre_ParCSRMatrixToCSRMatrixAll_v2( hypre_ParCSRMatrix *par_matrix, start_index = matrix_i[(HYPRE_Int)new_vec_starts[i]]; num_data = matrix_i[(HYPRE_Int)new_vec_starts[i + 1]] - start_index; hypre_MPI_Irecv(&matrix_data[start_index], num_data, HYPRE_MPI_COMPLEX, - used_procs[i], tag1, comm, &requests[j++]); + used_procs[i], tag1, hcomm, &requests[j++]); hypre_MPI_Irecv(&matrix_j[start_index], num_data, HYPRE_MPI_INT, - used_procs[i], tag3, comm, &requests[j++]); + used_procs[i], tag3, hcomm, &requests[j++]); } local_num_nonzeros = local_matrix_i[local_num_rows]; for (i = 0; i < num_types; i++) { hypre_MPI_Isend(local_matrix_data, local_num_nonzeros, HYPRE_MPI_COMPLEX, - used_procs[i], tag1, comm, &requests[j++]); + used_procs[i], tag1, hcomm, &requests[j++]); hypre_MPI_Isend(local_matrix_j, local_num_nonzeros, HYPRE_MPI_INT, - used_procs[i], tag3, comm, &requests[j++]); + used_procs[i], tag3, hcomm, &requests[j++]); } hypre_MPI_Waitall(num_requests, requests, status); diff --git a/src/parcsr_mv/par_csr_matrix_stats.c b/src/parcsr_mv/par_csr_matrix_stats.c index 7925d1a203..e892f7a846 100644 --- a/src/parcsr_mv/par_csr_matrix_stats.c +++ b/src/parcsr_mv/par_csr_matrix_stats.c @@ -347,6 +347,7 @@ hypre_ParCSRMatrixStatsArrayCompute(HYPRE_Int num_matrices, /* We assume all MPI communicators are equal */ comm = hypre_ParCSRMatrixComm(matrices[0]); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Allocate MPI buffers */ recvbuffer = hypre_CTAlloc(HYPRE_Real, 4 * num_matrices, HYPRE_MEMORY_HOST); @@ -388,7 +389,7 @@ hypre_ParCSRMatrixStatsArrayCompute(HYPRE_Int num_matrices, } hypre_MPI_Reduce(sendbuffer, recvbuffer, 4 * num_matrices, - HYPRE_MPI_REAL, hypre_MPI_MAX, 0, comm); + HYPRE_MPI_REAL, hypre_MPI_MAX, 0, hcomm); /* Unpack MPI buffers */ for (i = 0; i < num_matrices; i++) @@ -419,7 +420,7 @@ hypre_ParCSRMatrixStatsArrayCompute(HYPRE_Int num_matrices, } hypre_MPI_Reduce(sendbuffer, recvbuffer, 3 * num_matrices, - HYPRE_MPI_REAL, hypre_MPI_SUM, 0, comm); + HYPRE_MPI_REAL, hypre_MPI_SUM, 0, hcomm); /* Unpack MPI buffers */ for (i = 0; i < num_matrices; i++) @@ -466,7 +467,7 @@ hypre_ParCSRMatrixStatsArrayCompute(HYPRE_Int num_matrices, } hypre_MPI_Reduce(sendbuffer, recvbuffer, 2 * num_matrices, - HYPRE_MPI_REAL, hypre_MPI_SUM, 0, comm); + HYPRE_MPI_REAL, hypre_MPI_SUM, 0, hcomm); /* Unpack MPI buffers */ for (i = 0; i < num_matrices; i++) diff --git a/src/parcsr_mv/par_vector.c b/src/parcsr_mv/par_vector.c index 6e1c5f954f..9186b45bca 100644 --- a/src/parcsr_mv/par_vector.c +++ b/src/parcsr_mv/par_vector.c @@ -498,6 +498,7 @@ hypre_ParVectorInnerProd( hypre_ParVector *x, hypre_ParVector *y ) { MPI_Comm comm = hypre_ParVectorComm(x); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); @@ -508,7 +509,7 @@ hypre_ParVectorInnerProd( hypre_ParVector *x, hypre_profile_times[HYPRE_TIMER_ID_ALL_REDUCE] -= hypre_MPI_Wtime(); #endif hypre_MPI_Allreduce(&local_result, &result, 1, HYPRE_MPI_REAL, - hypre_MPI_SUM, comm); + hypre_MPI_SUM, hcomm); #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_ALL_REDUCE] += hypre_MPI_Wtime(); #endif @@ -584,6 +585,7 @@ hypre_VectorToParVector ( MPI_Comm comm, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (my_id == 0) { @@ -593,9 +595,9 @@ hypre_VectorToParVector ( MPI_Comm comm, global_vecstride = hypre_VectorVectorStride(v); } - hypre_MPI_Bcast(&global_size, 1, HYPRE_MPI_BIG_INT, 0, comm); - hypre_MPI_Bcast(&num_vectors, 1, HYPRE_MPI_INT, 0, comm); - hypre_MPI_Bcast(&global_vecstride, 1, HYPRE_MPI_INT, 0, comm); + hypre_MPI_Bcast(&global_size, 1, HYPRE_MPI_BIG_INT, 0, hcomm); + hypre_MPI_Bcast(&num_vectors, 1, HYPRE_MPI_INT, 0, hcomm); + hypre_MPI_Bcast(&global_vecstride, 1, HYPRE_MPI_INT, 0, hcomm); if (num_vectors == 1) { @@ -616,7 +618,7 @@ hypre_VectorToParVector ( MPI_Comm comm, global_vec_starts = hypre_CTAlloc(HYPRE_BigInt, num_procs + 1, HYPRE_MEMORY_HOST); } hypre_MPI_Gather(&first_index, 1, HYPRE_MPI_BIG_INT, global_vec_starts, - 1, HYPRE_MPI_BIG_INT, 0, comm); + 1, HYPRE_MPI_BIG_INT, 0, hcomm); if (my_id == 0) { global_vec_starts[num_procs] = hypre_ParVectorGlobalSize(par_vector); @@ -640,7 +642,7 @@ hypre_VectorToParVector ( MPI_Comm comm, { hypre_MPI_Isend( &v_data[(HYPRE_Int) global_vec_starts[p]] + j * global_vecstride, (HYPRE_Int)(global_vec_starts[p + 1] - global_vec_starts[p]), - HYPRE_MPI_COMPLEX, p, 0, comm, &requests[k++] ); + HYPRE_MPI_COMPLEX, p, 0, hcomm, &requests[k++] ); } if (num_vectors == 1) { @@ -667,7 +669,7 @@ hypre_VectorToParVector ( MPI_Comm comm, { for ( j = 0; j < num_vectors; ++j ) hypre_MPI_Recv( local_data + j * vecstride, local_size, HYPRE_MPI_COMPLEX, - 0, 0, comm, &status0 ); + 0, 0, hcomm, &status0 ); } if (global_vec_starts) @@ -739,6 +741,7 @@ hypre_ParVectorToVectorAll_v2( hypre_ParVector *par_v, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); local_size = (HYPRE_Int)(last_index - first_index + 1); if (hypre_GetActualMemLocation(hypre_ParVectorMemoryLocation(par_v)) != @@ -805,11 +808,11 @@ hypre_ParVectorToVectorAll_v2( hypre_ParVector *par_v, if (local_size) { /* look for a message from processor 0 */ - hypre_MPI_Probe(0, tag1, comm, &status1); + hypre_MPI_Probe(0, tag1, hcomm, &status1); hypre_MPI_Get_count(&status1, HYPRE_MPI_INT, &count); send_info = hypre_CTAlloc(HYPRE_Int, count, HYPRE_MEMORY_HOST); - hypre_MPI_Recv(send_info, count, HYPRE_MPI_INT, 0, tag1, comm, &status1); + hypre_MPI_Recv(send_info, count, HYPRE_MPI_INT, 0, tag1, hcomm, &status1); /* now unpack */ num_types = send_info[0]; @@ -876,7 +879,7 @@ hypre_ParVectorToVectorAll_v2( hypre_ParVector *par_v, for (i = start; i < num_types; i++) { hypre_MPI_Isend(send_info, count, HYPRE_MPI_INT, used_procs[i], - tag1, comm, &requests[i - start]); + tag1, hcomm, &requests[i - start]); } hypre_MPI_Waitall(num_types - start, requests, status); @@ -923,12 +926,12 @@ hypre_ParVectorToVectorAll_v2( hypre_ParVector *par_v, { vec_len = (HYPRE_Int) (new_vec_starts[i + 1] - new_vec_starts[i]); hypre_MPI_Irecv(&vector_data[(HYPRE_Int)new_vec_starts[i]], num_vectors * vec_len, - HYPRE_MPI_COMPLEX, used_procs[i], tag2, comm, &requests[j++]); + HYPRE_MPI_COMPLEX, used_procs[i], tag2, hcomm, &requests[j++]); } for (i = 0; i < num_types; i++) { hypre_MPI_Isend(local_data, num_vectors * local_size, HYPRE_MPI_COMPLEX, - used_procs[i], tag2, comm, &requests[j++]); + used_procs[i], tag2, hcomm, &requests[j++]); } hypre_MPI_Waitall(num_requests, requests, status); diff --git a/src/parcsr_mv/par_vector_batched.c b/src/parcsr_mv/par_vector_batched.c index 18dc8f8008..9e20f4bbd4 100644 --- a/src/parcsr_mv/par_vector_batched.c +++ b/src/parcsr_mv/par_vector_batched.c @@ -53,6 +53,7 @@ hypre_ParVectorMassInnerProd( hypre_ParVector *x, HYPRE_Real *result ) { MPI_Comm comm = hypre_ParVectorComm(x); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); HYPRE_Real *local_result; HYPRE_Int i; @@ -72,7 +73,7 @@ hypre_ParVectorMassInnerProd( hypre_ParVector *x, hypre_profile_times[HYPRE_TIMER_ID_ALL_REDUCE] -= hypre_MPI_Wtime(); #endif hypre_MPI_Allreduce(local_result, result, k, HYPRE_MPI_REAL, - hypre_MPI_SUM, comm); + hypre_MPI_SUM, hcomm); #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_ALL_REDUCE] += hypre_MPI_Wtime(); #endif @@ -97,6 +98,7 @@ hypre_ParVectorMassDotpTwo ( hypre_ParVector *x, HYPRE_Real *result_y ) { MPI_Comm comm = hypre_ParVectorComm(x); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_Real *local_result, *result; @@ -119,7 +121,7 @@ hypre_ParVectorMassDotpTwo ( hypre_ParVector *x, hypre_profile_times[HYPRE_TIMER_ID_ALL_REDUCE] -= hypre_MPI_Wtime(); #endif hypre_MPI_Allreduce(local_result, result, 2 * k, HYPRE_MPI_REAL, - hypre_MPI_SUM, comm); + hypre_MPI_SUM, hcomm); #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_ALL_REDUCE] += hypre_MPI_Wtime(); #endif diff --git a/src/sstruct_ls/maxwell_TV_setup.c b/src/sstruct_ls/maxwell_TV_setup.c index 88bea6e23c..5d58f827e3 100644 --- a/src/sstruct_ls/maxwell_TV_setup.c +++ b/src/sstruct_ls/maxwell_TV_setup.c @@ -869,8 +869,10 @@ hypre_MaxwellTV_Setup(void *maxwell_vdata, } lev_nboxes = 0; + MPI_Comm comm = hypre_SStructGridComm(egrid_l[l + 1]); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Allreduce(&nboxes, &lev_nboxes, 1, HYPRE_MPI_INT, hypre_MPI_SUM, - hypre_SStructGridComm(egrid_l[l + 1])); + hcomm); if (lev_nboxes) /* there were coarsen boxes */ { diff --git a/src/sstruct_ls/sstruct_sharedDOFComm.c b/src/sstruct_ls/sstruct_sharedDOFComm.c index 88f0e022b8..e6282e90b6 100644 --- a/src/sstruct_ls/sstruct_sharedDOFComm.c +++ b/src/sstruct_ls/sstruct_sharedDOFComm.c @@ -153,6 +153,7 @@ hypre_SStructSharedDOF_ParcsrMatRowsComm( hypre_SStructGrid *grid, hypre_MPI_Comm_rank(A_comm, &myproc); hypre_MPI_Comm_size(grid_comm, &nprocs); + hypre_MPI_Comm hgrid_comm = hypre_MPI_CommFromMPI_Comm(grid_comm); start_rank = hypre_ParCSRMatrixFirstRowIndex(A); end_rank = hypre_ParCSRMatrixLastRowIndex(A); @@ -781,7 +782,7 @@ hypre_SStructSharedDOF_ParcsrMatRowsComm( hypre_SStructGrid *grid, { rbuffer_RowsNcols[proc] = hypre_TAlloc(HYPRE_Int, 2 * RecvFromProcs[proc], HYPRE_MEMORY_HOST); hypre_MPI_Irecv(rbuffer_RowsNcols[proc], 2 * RecvFromProcs[proc], HYPRE_MPI_INT, - proc, 0, grid_comm, &requests[j++]); + proc, 0, hgrid_comm, &requests[j++]); } /* if (RecvFromProcs[proc]) */ } /* for (proc= 0; proc< nprocs; proc++) */ @@ -791,7 +792,7 @@ hypre_SStructSharedDOF_ParcsrMatRowsComm( hypre_SStructGrid *grid, if (tot_nsendRowsNcols[proc]) { hypre_MPI_Isend(send_RowsNcols[proc], tot_nsendRowsNcols[proc], HYPRE_MPI_INT, proc, - 0, grid_comm, &requests[j++]); + 0, hgrid_comm, &requests[j++]); } } @@ -835,7 +836,7 @@ hypre_SStructSharedDOF_ParcsrMatRowsComm( hypre_SStructGrid *grid, if (RecvFromProcs[proc]) { hypre_MPI_Irecv(rbuffer_ColsData[proc], 2 * send_RowsNcols_alloc[proc], HYPRE_MPI_REAL, - proc, 1, grid_comm, &requests[j++]); + proc, 1, hgrid_comm, &requests[j++]); } /* if (RecvFromProcs[proc]) */ } /* for (proc= 0; proc< nprocs; proc++) */ @@ -844,7 +845,7 @@ hypre_SStructSharedDOF_ParcsrMatRowsComm( hypre_SStructGrid *grid, if (tot_sendColsData[proc]) { hypre_MPI_Isend(vals[proc], tot_sendColsData[proc], HYPRE_MPI_REAL, proc, - 1, grid_comm, &requests[j++]); + 1, hgrid_comm, &requests[j++]); } } diff --git a/src/sstruct_mv/HYPRE_sstruct_graph.c b/src/sstruct_mv/HYPRE_sstruct_graph.c index 5691325e7d..f5bd343f25 100644 --- a/src/sstruct_mv/HYPRE_sstruct_graph.c +++ b/src/sstruct_mv/HYPRE_sstruct_graph.c @@ -391,8 +391,10 @@ HYPRE_SStructGraphAssemble( HYPRE_SStructGraph graph ) /* if any processor has added entries, then all need to participate */ + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + hypre_MPI_Allreduce(&n_add_entries, &global_n_add_entries, - 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); + 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); if (global_n_add_entries > 0 ) { diff --git a/src/sstruct_mv/sstruct_grid.c b/src/sstruct_mv/sstruct_grid.c index 5b2601e04e..b9d1620d66 100644 --- a/src/sstruct_mv/sstruct_grid.c +++ b/src/sstruct_mv/sstruct_grid.c @@ -480,6 +480,7 @@ hypre_SStructGridAssembleBoxManagers( hypre_SStructGrid *grid ) hypre_MPI_Comm_size(comm, &nprocs); hypre_MPI_Comm_rank(comm, &myproc); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*find offset and ghost offsets */ { @@ -488,7 +489,7 @@ hypre_SStructGridAssembleBoxManagers( hypre_SStructGrid *grid ) /* offsets */ hypre_MPI_Scan( - &local_size, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); + &local_size, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); /* first point in my range */ offsets[0] = scan_recv - local_size; /* first point in next proc's range */ @@ -498,7 +499,7 @@ hypre_SStructGridAssembleBoxManagers( hypre_SStructGrid *grid ) /* ghost offsets */ hypre_MPI_Scan( - &ghlocal_size, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); + &ghlocal_size, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); /* first point in my range */ ghoffsets[0] = scan_recv - ghlocal_size; /* first point in next proc's range */ diff --git a/src/struct_ls/pfmg_setup.c b/src/struct_ls/pfmg_setup.c index 116475d75b..2315291cec 100644 --- a/src/struct_ls/pfmg_setup.c +++ b/src/struct_ls/pfmg_setup.c @@ -773,6 +773,9 @@ hypre_PFMGComputeDxyz( hypre_StructMatrix *A, HYPRE_Int constant_coefficient; HYPRE_Int i, d; + MPI_Comm comm = hypre_StructMatrixComm(A); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + /*---------------------------------------------------------- * Initialize some things *----------------------------------------------------------*/ @@ -844,13 +847,13 @@ hypre_PFMGComputeDxyz( hypre_StructMatrix *A, tcxyz[1] = cxyz[1]; tcxyz[2] = cxyz[2]; hypre_MPI_Allreduce(tcxyz, cxyz, 3, HYPRE_MPI_REAL, hypre_MPI_SUM, - hypre_StructMatrixComm(A)); + hcomm); tcxyz[0] = sqcxyz[0]; tcxyz[1] = sqcxyz[1]; tcxyz[2] = sqcxyz[2]; hypre_MPI_Allreduce(tcxyz, sqcxyz, 3, HYPRE_MPI_REAL, hypre_MPI_SUM, - hypre_StructMatrixComm(A)); + hcomm); for (d = 0; d < 3; d++) { diff --git a/src/struct_mv/assumed_part.c b/src/struct_mv/assumed_part.c index 28b4a1161d..c0482d2346 100644 --- a/src/struct_mv/assumed_part.c +++ b/src/struct_mv/assumed_part.c @@ -284,6 +284,7 @@ hypre_APGetAllBoxesInRegions( hypre_BoxArray *region_array, HYPRE_Real *send_buf_vol; HYPRE_Real *vol_array; HYPRE_Real *dbl_vol_and_count; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); count_array = *p_count_array; vol_array = *p_vol_array; @@ -309,7 +310,7 @@ hypre_APGetAllBoxesInRegions( hypre_BoxArray *region_array, } hypre_MPI_Allreduce(send_buf_vol, dbl_vol_and_count, num_regions * 2, - HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); /* Unpack */ for (i = 0; i < num_regions; i++) @@ -348,6 +349,7 @@ hypre_APShrinkRegions( hypre_BoxArray *region_array, hypre_Box *my_box, *result_box, *grow_box, *region; hypre_Index grow_index, imin, imax; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); ndim = hypre_BoxArrayNDim(my_box_array); ndim2 = 2 * ndim; @@ -455,7 +457,7 @@ hypre_APShrinkRegions( hypre_BoxArray *region_array, /* Do an Allreduce on size and volume to get the global information */ hypre_MPI_Allreduce(indices, recvbuf, num_regions * ndim2, HYPRE_MPI_INT, - hypre_MPI_MIN, comm); + hypre_MPI_MIN, hcomm); /* Unpack the "shrunk" regions */ /* For each region */ diff --git a/src/struct_mv/box_manager.c b/src/struct_mv/box_manager.c index edd357d91c..e9ebfd9dd6 100644 --- a/src/struct_mv/box_manager.c +++ b/src/struct_mv/box_manager.c @@ -273,13 +273,14 @@ hypre_BoxManGetGlobalIsGatherCalled( hypre_BoxManager *manager, HYPRE_Int nprocs; hypre_MPI_Comm_size(comm, &nprocs); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); loc_is_gather = hypre_BoxManIsGatherCalled(manager); if (nprocs > 1) { hypre_MPI_Allreduce(&loc_is_gather, is_gather, 1, HYPRE_MPI_INT, - hypre_MPI_LOR, comm); + hypre_MPI_LOR, hcomm); } else /* just one proc */ { @@ -1159,6 +1160,7 @@ hypre_BoxManAssemble( hypre_BoxManager *manager ) /* initilize */ hypre_MPI_Comm_rank(comm, &myid); hypre_MPI_Comm_size(comm, &nprocs); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); gather_regions = hypre_BoxManGatherRegions(manager); nentries = hypre_BoxManNEntries(manager); @@ -1176,7 +1178,7 @@ hypre_BoxManAssemble( hypre_BoxManager *manager ) { is_gather = hypre_BoxManIsGatherCalled(manager); hypre_MPI_Allreduce(&is_gather, &global_is_gather, 1, HYPRE_MPI_INT, - hypre_MPI_LOR, comm); + hypre_MPI_LOR, hcomm); } else /* just one proc */ { @@ -1317,7 +1319,7 @@ hypre_BoxManAssemble( hypre_BoxManager *manager ) sendbuf2[1] = (HYPRE_Real) num_my_entries; hypre_MPI_Allreduce(&sendbuf2, &recvbuf2, 2, HYPRE_MPI_REAL, - hypre_MPI_SUM, comm); + hypre_MPI_SUM, hcomm); global_volume = recvbuf2[0]; global_num_boxes = (HYPRE_Int) recvbuf2[1]; @@ -1425,7 +1427,7 @@ hypre_BoxManAssemble( hypre_BoxManager *manager ) send_statbuf[2] = num_my_entries; hypre_MPI_Allreduce(send_statbuf, statbuf, 3, HYPRE_MPI_INT, - hypre_MPI_MAX, comm); + hypre_MPI_MAX, hcomm); //max_proc_count = statbuf[0]; @@ -1773,7 +1775,7 @@ hypre_BoxManAssemble( hypre_BoxManager *manager ) recv_counts = hypre_CTAlloc(HYPRE_Int, nprocs, HYPRE_MEMORY_HOST); hypre_MPI_Allgather(&send_count_bytes, 1, HYPRE_MPI_INT, - recv_counts, 1, HYPRE_MPI_INT, comm); + recv_counts, 1, HYPRE_MPI_INT, hcomm); displs = hypre_CTAlloc(HYPRE_Int, nprocs, HYPRE_MEMORY_HOST); displs[0] = 0; @@ -1842,7 +1844,7 @@ hypre_BoxManAssemble( hypre_BoxManager *manager ) /* now send_buf is ready to go! */ hypre_MPI_Allgatherv(send_buf, send_count_bytes, hypre_MPI_BYTE, - recv_buf, recv_counts, displs, hypre_MPI_BYTE, comm); + recv_buf, recv_counts, displs, hypre_MPI_BYTE, hcomm); /* unpack recv_buf into entries - let's just unpack them all into the entries table - this way they will already be sorted - so we set @@ -2203,7 +2205,7 @@ hypre_BoxManAssemble( hypre_BoxManager *manager ) if (global_num_boxes == nentries) { all_known = 1; } hypre_MPI_Allreduce(&all_known, &global_all_known, 1, HYPRE_MPI_INT, - hypre_MPI_LAND, comm); + hypre_MPI_LAND, hcomm); hypre_BoxManAllGlobalKnown(manager) = global_all_known; } diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index 37f42c7233..d18c9660ab 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -829,6 +829,7 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Int num_sends = hypre_CommPkgNumSends(comm_pkg); HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); MPI_Comm comm = hypre_CommPkgComm(comm_pkg); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_Int num_requests; hypre_MPI_Request *requests; @@ -1033,7 +1034,7 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, hypre_MPI_Irecv(recv_buffers_mpi[i], hypre_CommTypeBufsize(comm_type)*sizeof(HYPRE_Complex), hypre_MPI_BYTE, hypre_CommTypeProc(comm_type), - tag, comm, &requests[j++]); + tag, hcomm, &requests[j++]); if ( hypre_CommPkgFirstComm(comm_pkg) ) { size = hypre_CommPrefixSize(hypre_CommTypeNumEntries(comm_type)); @@ -1048,7 +1049,7 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, hypre_MPI_Isend(send_buffers_mpi[i], hypre_CommTypeBufsize(comm_type)*sizeof(HYPRE_Complex), hypre_MPI_BYTE, hypre_CommTypeProc(comm_type), - tag, comm, &requests[j++]); + tag, hcomm, &requests[j++]); if ( hypre_CommPkgFirstComm(comm_pkg) ) { size = hypre_CommPrefixSize(hypre_CommTypeNumEntries(comm_type)); diff --git a/src/struct_mv/struct_grid.c b/src/struct_mv/struct_grid.c index e88ba25c80..bde92f32d4 100644 --- a/src/struct_mv/struct_grid.c +++ b/src/struct_mv/struct_grid.c @@ -276,6 +276,7 @@ hypre_StructGridAssemble( hypre_StructGrid *grid ) hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &myid); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* has the box manager been created? */ if (boxman == NULL) @@ -375,7 +376,7 @@ hypre_StructGridAssemble( hypre_StructGrid *grid ) HYPRE_BigInt big_size = (HYPRE_BigInt)size; hypre_MPI_Allreduce(&big_size, &global_size, 1, HYPRE_MPI_BIG_INT, - hypre_MPI_SUM, comm); + hypre_MPI_SUM, hcomm); hypre_StructGridGlobalSize(grid) = global_size; /* TO DO: this HYPRE_Int * could overflow! (used * to calc flops) */ @@ -436,7 +437,7 @@ hypre_StructGridAssemble( hypre_StructGrid *grid ) sendbuf6[d + ndim] = -hypre_BoxIMaxD(bounding_box, d); } hypre_MPI_Allreduce(sendbuf6, recvbuf6, 2 * ndim, HYPRE_MPI_INT, - hypre_MPI_MIN, comm); + hypre_MPI_MIN, hcomm); /* unpack buffer */ for (d = 0; d < ndim; d++) { @@ -593,6 +594,7 @@ hypre_GatherAllBoxes(MPI_Comm comm, hypre_MPI_Comm_size(comm, &num_all_procs); hypre_MPI_Comm_rank(comm, &my_rank); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* compute recvcounts and displs */ item_size = 2 * ndim + 1; @@ -600,7 +602,7 @@ hypre_GatherAllBoxes(MPI_Comm comm, recvcounts = hypre_TAlloc(HYPRE_Int, num_all_procs, HYPRE_MEMORY_HOST); displs = hypre_TAlloc(HYPRE_Int, num_all_procs, HYPRE_MEMORY_HOST); hypre_MPI_Allgather(&sendcount, 1, HYPRE_MPI_INT, - recvcounts, 1, HYPRE_MPI_INT, comm); + recvcounts, 1, HYPRE_MPI_INT, hcomm); displs[0] = 0; recvbuf_size = recvcounts[0]; for (p = 1; p < num_all_procs; p++) @@ -629,7 +631,7 @@ hypre_GatherAllBoxes(MPI_Comm comm, /* get global grid info */ hypre_MPI_Allgatherv(sendbuf, sendcount, HYPRE_MPI_INT, - recvbuf, recvcounts, displs, HYPRE_MPI_INT, comm); + recvbuf, recvcounts, displs, HYPRE_MPI_INT, hcomm); /* sort recvbuf by process rank? */ diff --git a/src/struct_mv/struct_innerprod.c b/src/struct_mv/struct_innerprod.c index d15b5cc0b8..d7ec309ef2 100644 --- a/src/struct_mv/struct_innerprod.c +++ b/src/struct_mv/struct_innerprod.c @@ -40,6 +40,9 @@ hypre_StructInnerProd( hypre_StructVector *x, HYPRE_Int ndim = hypre_StructVectorNDim(x); HYPRE_Int i; + MPI_Comm comm = hypre_StructVectorComm(x); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + #if 0 //defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) const HYPRE_Int data_location = hypre_StructGridDataLocation(hypre_StructVectorGrid(y)); #endif @@ -99,7 +102,7 @@ hypre_StructInnerProd( hypre_StructVector *x, process_result = (HYPRE_Real) local_result; hypre_MPI_Allreduce(&process_result, &final_innerprod_result, 1, - HYPRE_MPI_REAL, hypre_MPI_SUM, hypre_StructVectorComm(x)); + HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); hypre_IncFLOPCount(2 * hypre_StructVectorGlobalSize(x)); diff --git a/src/test/ij.c b/src/test/ij.c index e64355e32b..3ac0feafc8 100644 --- a/src/test/ij.c +++ b/src/test/ij.c @@ -10143,6 +10143,7 @@ BuildFuncsFromOneFile( HYPRE_Int argc, comm = hypre_MPI_COMM_WORLD; hypre_MPI_Comm_rank(comm, &myid ); hypre_MPI_Comm_size(comm, &num_procs ); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*----------------------------------------------------------- * Parse command line @@ -10196,7 +10197,7 @@ BuildFuncsFromOneFile( HYPRE_Int argc, { hypre_MPI_Isend(&dof_func[partitioning[i]], (partitioning[i + 1] - partitioning[i]), - HYPRE_MPI_INT, i, 0, comm, &requests[i - 1]); + HYPRE_MPI_INT, i, 0, hcomm, &requests[i - 1]); } for (i = 0; i < local_size; i++) { @@ -10208,7 +10209,7 @@ BuildFuncsFromOneFile( HYPRE_Int argc, } else { - hypre_MPI_Recv(dof_func_local, local_size, HYPRE_MPI_INT, 0, 0, comm, &status0); + hypre_MPI_Recv(dof_func_local, local_size, HYPRE_MPI_INT, 0, 0, hcomm, &status0); } *dof_func_ptr = dof_func_local; @@ -10315,6 +10316,7 @@ BuildBigArrayFromOneFile( HYPRE_Int argc, *-----------------------------------------------------------*/ hypre_MPI_Comm_rank(comm, &myid); hypre_MPI_Comm_size(comm, &num_procs); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*----------------------------------------------------------- * Parse command line @@ -10389,7 +10391,7 @@ BuildBigArrayFromOneFile( HYPRE_Int argc, displs[proc + 1] = displs[proc] + send_counts[proc]; } } - hypre_MPI_Scatter(send_counts, 1, HYPRE_MPI_INT, size, 1, HYPRE_MPI_INT, 0, comm); + hypre_MPI_Scatter(send_counts, 1, HYPRE_MPI_INT, size, 1, HYPRE_MPI_INT, 0, hcomm); if (myid == 0) { @@ -10412,7 +10414,7 @@ BuildBigArrayFromOneFile( HYPRE_Int argc, array = hypre_CTAlloc(HYPRE_BigInt, *size, HYPRE_MEMORY_HOST); hypre_MPI_Scatterv(send_buffer, send_counts, displs, HYPRE_MPI_BIG_INT, - array, *size, HYPRE_MPI_BIG_INT, 0, comm); + array, *size, HYPRE_MPI_BIG_INT, 0, hcomm); *array_ptr = array; /* Free memory */ diff --git a/src/test/maxwell_unscaled.c b/src/test/maxwell_unscaled.c index d50bae7f02..7d5bf2dd7e 100644 --- a/src/test/maxwell_unscaled.c +++ b/src/test/maxwell_unscaled.c @@ -275,11 +275,11 @@ ReadData( char *filename, } /* broadcast the data size */ - hypre_MPI_Bcast(&sdata_size, 1, HYPRE_MPI_INT, 0, hypre_MPI_COMM_WORLD); + hypre_MPI_Bcast(&sdata_size, 1, HYPRE_MPI_INT, 0, hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD)); /* broadcast the data */ sdata = hypre_TReAlloc(sdata, char, sdata_size, HYPRE_MEMORY_HOST); - hypre_MPI_Bcast(sdata, sdata_size, hypre_MPI_CHAR, 0, hypre_MPI_COMM_WORLD); + hypre_MPI_Bcast(sdata, sdata_size, hypre_MPI_CHAR, 0, hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD)); /*----------------------------------------------------------- * Parse the data and fill ProblemData structure diff --git a/src/test/sstruct.c b/src/test/sstruct.c index 1034b17500..2f5282423e 100644 --- a/src/test/sstruct.c +++ b/src/test/sstruct.c @@ -462,11 +462,11 @@ ReadData( char *filename, } } /* broadcast the data size */ - hypre_MPI_Bcast(&sdata_size, 1, HYPRE_MPI_INT, 0, hypre_MPI_COMM_WORLD); + hypre_MPI_Bcast(&sdata_size, 1, HYPRE_MPI_INT, 0, hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD)); /* broadcast the data */ sdata = hypre_TReAlloc(sdata, char, sdata_size, HYPRE_MEMORY_HOST); - hypre_MPI_Bcast(sdata, sdata_size, hypre_MPI_CHAR, 0, hypre_MPI_COMM_WORLD); + hypre_MPI_Bcast(sdata, sdata_size, hypre_MPI_CHAR, 0, hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD)); /*----------------------------------------------------------- * Parse the data and fill ProblemData structure diff --git a/src/test/sstruct_fac.c b/src/test/sstruct_fac.c index 8310f51156..5892e48c5a 100644 --- a/src/test/sstruct_fac.c +++ b/src/test/sstruct_fac.c @@ -321,11 +321,11 @@ ReadData( char *filename, } /* broadcast the data size */ - hypre_MPI_Bcast(&sdata_size, 1, HYPRE_MPI_INT, 0, hypre_MPI_COMM_WORLD); + hypre_MPI_Bcast(&sdata_size, 1, HYPRE_MPI_INT, 0, hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD)); /* broadcast the data */ sdata = hypre_TReAlloc(sdata, char, sdata_size, HYPRE_MEMORY_HOST); - hypre_MPI_Bcast(sdata, sdata_size, hypre_MPI_CHAR, 0, hypre_MPI_COMM_WORLD); + hypre_MPI_Bcast(sdata, sdata_size, hypre_MPI_CHAR, 0, hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD)); /*----------------------------------------------------------- * Parse the data and fill ProblemData structure diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index cd8c2cf6eb..9bdb4cdbf7 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -729,7 +729,13 @@ typedef HYPRE_Int hypre_MPI_Info; * MPI stubs to do casting of HYPRE_Int and hypre_int correctly *****************************************************************************/ -typedef MPI_Comm hypre_MPI_Comm; +typedef struct +{ + MPI_Comm mpi_comm; +} hypre_MPI_Comm; + +#define hypre_MPI_CommMPI_Comm(comm) ((comm).mpi_comm) + typedef MPI_Group hypre_MPI_Group; typedef MPI_Request hypre_MPI_Request; typedef MPI_Datatype hypre_MPI_Datatype; @@ -787,20 +793,20 @@ typedef MPI_User_function hypre_MPI_User_function; /* mpistubs.c */ HYPRE_Int hypre_MPI_Init( hypre_int *argc, char ***argv ); HYPRE_Int hypre_MPI_Finalize( void ); -HYPRE_Int hypre_MPI_Abort( hypre_MPI_Comm comm, HYPRE_Int errorcode ); +HYPRE_Int hypre_MPI_Abort( MPI_Comm comm, HYPRE_Int errorcode ); HYPRE_Real hypre_MPI_Wtime( void ); HYPRE_Real hypre_MPI_Wtick( void ); -HYPRE_Int hypre_MPI_Barrier( hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Comm_create( hypre_MPI_Comm comm, hypre_MPI_Group group, - hypre_MPI_Comm *newcomm ); -HYPRE_Int hypre_MPI_Comm_dup( hypre_MPI_Comm comm, hypre_MPI_Comm *newcomm ); -hypre_MPI_Comm hypre_MPI_Comm_f2c( hypre_int comm ); -HYPRE_Int hypre_MPI_Comm_size( hypre_MPI_Comm comm, HYPRE_Int *size ); -HYPRE_Int hypre_MPI_Comm_rank( hypre_MPI_Comm comm, HYPRE_Int *rank ); -HYPRE_Int hypre_MPI_Comm_free( hypre_MPI_Comm *comm ); -HYPRE_Int hypre_MPI_Comm_group( hypre_MPI_Comm comm, hypre_MPI_Group *group ); -HYPRE_Int hypre_MPI_Comm_split( hypre_MPI_Comm comm, HYPRE_Int n, HYPRE_Int m, - hypre_MPI_Comm * comms ); +HYPRE_Int hypre_MPI_Barrier( MPI_Comm comm ); +HYPRE_Int hypre_MPI_Comm_create( MPI_Comm comm, hypre_MPI_Group group, + MPI_Comm *newcomm ); +HYPRE_Int hypre_MPI_Comm_dup( MPI_Comm comm, MPI_Comm *newcomm ); +MPI_Comm hypre_MPI_Comm_f2c( hypre_int comm ); +HYPRE_Int hypre_MPI_Comm_size( MPI_Comm comm, HYPRE_Int *size ); +HYPRE_Int hypre_MPI_Comm_rank( MPI_Comm comm, HYPRE_Int *rank ); +HYPRE_Int hypre_MPI_Comm_free( MPI_Comm *comm ); +HYPRE_Int hypre_MPI_Comm_group( MPI_Comm comm, hypre_MPI_Group *group ); +HYPRE_Int hypre_MPI_Comm_split( MPI_Comm comm, HYPRE_Int n, HYPRE_Int m, + MPI_Comm * comms ); HYPRE_Int hypre_MPI_Group_incl( hypre_MPI_Group group, HYPRE_Int n, HYPRE_Int *ranks, hypre_MPI_Group *newgroup ); HYPRE_Int hypre_MPI_Group_free( hypre_MPI_Group *group ); @@ -876,6 +882,7 @@ HYPRE_Int hypre_MPI_Type_free( hypre_MPI_Datatype *datatype ); HYPRE_Int hypre_MPI_Op_free( hypre_MPI_Op *op ); HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, hypre_MPI_Op *op ); +hypre_MPI_Comm hypre_MPI_CommFromMPI_Comm(MPI_Comm comm); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, hypre_MPI_Comm *newcomm); diff --git a/src/utilities/error.c b/src/utilities/error.c index 865ae5f23c..8a7981675b 100644 --- a/src/utilities/error.c +++ b/src/utilities/error.c @@ -202,8 +202,8 @@ HYPRE_PrintErrorMessages(MPI_Comm comm) char *msg; hypre_MPI_Barrier(comm); - hypre_MPI_Comm_rank(comm, &myid); + for (msg = err.memory; msg < (err.memory + err.msg_sz); msg += strlen(msg) + 1) { hypre_fprintf(stderr, "%d: %s", myid, msg); diff --git a/src/utilities/exchange_data.c b/src/utilities/exchange_data.c index 7b596f72da..534c7e98d8 100644 --- a/src/utilities/exchange_data.c +++ b/src/utilities/exchange_data.c @@ -98,7 +98,8 @@ HYPRE_Int hypre_DataExchangeList(HYPRE_Int num_contacts, HYPRE_Int response_obj_size, hypre_DataExchangeResponse *response_obj, HYPRE_Int max_response_size, - HYPRE_Int rnum, MPI_Comm comm, + HYPRE_Int rnum, + MPI_Comm comm, void **p_response_recv_buf, HYPRE_Int **p_response_recv_buf_starts) { @@ -184,6 +185,7 @@ HYPRE_Int hypre_DataExchangeList(HYPRE_Int num_contacts, const HYPRE_Int term_tag = 1004 * rnum; const HYPRE_Int post_tag = 1006 * rnum; + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_size(comm, &num_procs ); hypre_MPI_Comm_rank(comm, &myid ); @@ -248,7 +250,7 @@ HYPRE_Int hypre_DataExchangeList(HYPRE_Int num_contacts, hypre_MPI_Irecv(response_ptrs[i], max_response_total_bytes, hypre_MPI_BYTE, contact_proc_list[i], - response_tag, comm, &response_requests[i]); + response_tag, hcomm, &response_requests[i]); } /* send out contact messages */ @@ -259,7 +261,7 @@ HYPRE_Int hypre_DataExchangeList(HYPRE_Int num_contacts, size = contact_send_buf_starts[i + 1] - contact_send_buf_starts[i] ; hypre_MPI_Isend(contact_ptrs[i], size * contact_obj_size, hypre_MPI_BYTE, contact_proc_list[i], - contact_tag, comm, &contact_requests[i]); + contact_tag, hcomm, &contact_requests[i]); /* start_ptr += (size*contact_obj_size); */ start_ptr = (void *) ((char *) start_ptr + (size * contact_obj_size)); } @@ -286,7 +288,7 @@ HYPRE_Int hypre_DataExchangeList(HYPRE_Int num_contacts, for (i = 0; i < tree.num_child; i++) { - hypre_MPI_Irecv(NULL, 0, HYPRE_MPI_INT, tree.child_id[i], term_tag, comm, + hypre_MPI_Irecv(NULL, 0, HYPRE_MPI_INT, tree.child_id[i], term_tag, hcomm, &term_requests[i]); } @@ -306,7 +308,7 @@ HYPRE_Int hypre_DataExchangeList(HYPRE_Int num_contacts, while (!terminate) { /* did I receive any contact messages? */ - hypre_MPI_Iprobe(hypre_MPI_ANY_SOURCE, contact_tag, comm, + hypre_MPI_Iprobe(hypre_MPI_ANY_SOURCE, contact_tag, hcomm, &contact_flag, &status); while (contact_flag) @@ -332,7 +334,7 @@ HYPRE_Int hypre_DataExchangeList(HYPRE_Int num_contacts, /* this must be blocking - can't fill recv without the buffer*/ hypre_MPI_Recv(recv_contact_buf, contact_size * contact_obj_size, - hypre_MPI_BYTE, proc, contact_tag, comm, &fill_status); + hypre_MPI_BYTE, proc, contact_tag, hcomm, &fill_status); response_obj->fill_response(recv_contact_buf, contact_size, proc, response_obj, comm, &send_response_buf, @@ -373,7 +375,7 @@ HYPRE_Int hypre_DataExchangeList(HYPRE_Int num_contacts, hypre_MPI_Isend(post_array[post_array_size], size, hypre_MPI_BYTE, proc, post_tag, /*hypre_MPI_COMM_WORLD, */ - comm, + hcomm, &post_send_requests[post_array_size]); post_array_size++; @@ -390,12 +392,12 @@ HYPRE_Int hypre_DataExchangeList(HYPRE_Int num_contacts, /*send the block of data that includes the overhead */ /* this is a blocking send - the recv has already been posted */ hypre_MPI_Send(send_response_buf, max_response_total_bytes, - hypre_MPI_BYTE, proc, response_tag, comm); + hypre_MPI_BYTE, proc, response_tag, hcomm); /*--------------------------------------------------------------*/ /* look for any more contact messages*/ - hypre_MPI_Iprobe(hypre_MPI_ANY_SOURCE, contact_tag, comm, + hypre_MPI_Iprobe(hypre_MPI_ANY_SOURCE, contact_tag, hcomm, &contact_flag, &status); } @@ -422,10 +424,10 @@ HYPRE_Int hypre_DataExchangeList(HYPRE_Int num_contacts, if (children_complete & (myid > 0)) /*root does not have a parent*/ { hypre_MPI_Isend(NULL, 0, HYPRE_MPI_INT, tree.parent_id, term_tag, - comm, &request_parent); + hcomm, &request_parent); hypre_MPI_Irecv(NULL, 0, HYPRE_MPI_INT, tree.parent_id, term_tag, - comm, &term_request1); + hcomm, &term_request1); } } else /*have we gotten a term message from our parent? */ @@ -446,7 +448,7 @@ HYPRE_Int hypre_DataExchangeList(HYPRE_Int num_contacts, { /*a blocking send - recv has been posted already*/ hypre_MPI_Send(NULL, 0, HYPRE_MPI_INT, tree.child_id[i], - term_tag, comm); + term_tag, hcomm); } } } @@ -516,7 +518,7 @@ HYPRE_Int hypre_DataExchangeList(HYPRE_Int num_contacts, post_ptrs[count] = index_ptr; hypre_MPI_Irecv(post_ptrs[count], size, hypre_MPI_BYTE, contact_proc_list[i], post_tag, - comm, &post_recv_requests[count]); + hcomm, &post_recv_requests[count]); count++; /* index_ptr+=size;*/ index_ptr = (void *) ((char *) index_ptr + size); diff --git a/src/utilities/memory.c b/src/utilities/memory.c index 129059b523..cc1bb986a4 100644 --- a/src/utilities/memory.c +++ b/src/utilities/memory.c @@ -417,7 +417,6 @@ hypre_MAlloc_core(size_t size, HYPRE_Int zeroinit, hypre_MemoryLocation location if (!ptr) { hypre_OutOfMemory(size); - hypre_MPI_Abort(hypre_MPI_COMM_WORLD, -1); } return ptr; @@ -976,9 +975,8 @@ hypre_ReAlloc(void *ptr, size_t size, HYPRE_MemoryLocation location) if (hypre_GetActualMemLocation(location) != hypre_MEMORY_HOST) { - hypre_printf("hypre_TReAlloc only works with HYPRE_MEMORY_HOST; Use hypre_TReAlloc_v2 instead!\n"); + hypre_error_w_msg(HYPRE_ERROR_MEMORY, "hypre_TReAlloc only works with HYPRE_MEMORY_HOST; Use hypre_TReAlloc_v2 instead!\n"); hypre_assert(0); - hypre_MPI_Abort(hypre_MPI_COMM_WORLD, -1); return NULL; } diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 988081923e..b22116d9a3 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -14,13 +14,13 @@ * a Fortran integer and hence usually the size of hypre_int. ****************************************************************************/ -hypre_MPI_Comm +MPI_Comm hypre_MPI_Comm_f2c( hypre_int comm ) { #ifdef HYPRE_HAVE_MPI_COMM_F2C - return (hypre_MPI_Comm) MPI_Comm_f2c(comm); + return (MPI_Comm) MPI_Comm_f2c(comm); #else - return (hypre_MPI_Comm) (size_t)comm; + return (MPI_Comm) (size_t)comm; #endif } @@ -759,6 +759,16 @@ HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ) #else +hypre_MPI_Comm +hypre_MPI_CommFromMPI_Comm(MPI_Comm comm) +{ + hypre_MPI_Comm hcomm; + hypre_Memset(&hcomm, 0, sizeof(hypre_MPI_Comm), HYPRE_MEMORY_HOST); + hypre_MPI_CommMPI_Comm(hcomm) = comm; + + return hcomm; +} + HYPRE_Int hypre_MPI_Init( hypre_int *argc, char ***argv ) @@ -773,8 +783,8 @@ hypre_MPI_Finalize( void ) } HYPRE_Int -hypre_MPI_Abort( hypre_MPI_Comm comm, - HYPRE_Int errorcode ) +hypre_MPI_Abort( MPI_Comm comm, + HYPRE_Int errorcode ) { return (HYPRE_Int) MPI_Abort(comm, (hypre_int)errorcode); } @@ -792,29 +802,29 @@ hypre_MPI_Wtick( void ) } HYPRE_Int -hypre_MPI_Barrier( hypre_MPI_Comm comm ) +hypre_MPI_Barrier( MPI_Comm comm ) { return (HYPRE_Int) MPI_Barrier(comm); } HYPRE_Int -hypre_MPI_Comm_create( hypre_MPI_Comm comm, - hypre_MPI_Group group, - hypre_MPI_Comm *newcomm ) +hypre_MPI_Comm_create( MPI_Comm comm, + hypre_MPI_Group group, + MPI_Comm *newcomm ) { return (HYPRE_Int) MPI_Comm_create(comm, group, newcomm); } HYPRE_Int -hypre_MPI_Comm_dup( hypre_MPI_Comm comm, - hypre_MPI_Comm *newcomm ) +hypre_MPI_Comm_dup( MPI_Comm comm, + MPI_Comm *newcomm ) { return (HYPRE_Int) MPI_Comm_dup(comm, newcomm); } HYPRE_Int -hypre_MPI_Comm_size( hypre_MPI_Comm comm, - HYPRE_Int *size ) +hypre_MPI_Comm_size( MPI_Comm comm, + HYPRE_Int *size ) { hypre_int mpi_size; HYPRE_Int ierr; @@ -824,8 +834,8 @@ hypre_MPI_Comm_size( hypre_MPI_Comm comm, } HYPRE_Int -hypre_MPI_Comm_rank( hypre_MPI_Comm comm, - HYPRE_Int *rank ) +hypre_MPI_Comm_rank( MPI_Comm comm, + HYPRE_Int *rank ) { hypre_int mpi_rank; HYPRE_Int ierr; @@ -835,25 +845,25 @@ hypre_MPI_Comm_rank( hypre_MPI_Comm comm, } HYPRE_Int -hypre_MPI_Comm_free( hypre_MPI_Comm *comm ) +hypre_MPI_Comm_free( MPI_Comm *comm ) { return (HYPRE_Int) MPI_Comm_free(comm); } HYPRE_Int -hypre_MPI_Comm_group( hypre_MPI_Comm comm, +hypre_MPI_Comm_group( MPI_Comm comm, hypre_MPI_Group *group ) { return (HYPRE_Int) MPI_Comm_group(comm, group); } HYPRE_Int -hypre_MPI_Comm_split( hypre_MPI_Comm comm, - HYPRE_Int n, - HYPRE_Int m, - hypre_MPI_Comm *comms ) +hypre_MPI_Comm_split( MPI_Comm comm, + HYPRE_Int color, + HYPRE_Int key, + MPI_Comm *newcomm ) { - return (HYPRE_Int) MPI_Comm_split(comm, (hypre_int)n, (hypre_int)m, comms); + return (HYPRE_Int) MPI_Comm_split(comm, (hypre_int) color, (hypre_int) key, newcomm); } HYPRE_Int @@ -916,7 +926,8 @@ hypre_MPI_Alltoall( void *sendbuf, hypre_MPI_Comm comm ) { return (HYPRE_Int) MPI_Alltoall(sendbuf, (hypre_int)sendcount, sendtype, - recvbuf, (hypre_int)recvcount, recvtype, comm); + recvbuf, (hypre_int)recvcount, recvtype, + hypre_MPI_CommMPI_Comm(comm)); } HYPRE_Int @@ -929,7 +940,8 @@ hypre_MPI_Allgather( void *sendbuf, hypre_MPI_Comm comm ) { return (HYPRE_Int) MPI_Allgather(sendbuf, (hypre_int)sendcount, sendtype, - recvbuf, (hypre_int)recvcount, recvtype, comm); + recvbuf, (hypre_int)recvcount, recvtype, + hypre_MPI_CommMPI_Comm(comm)); } HYPRE_Int @@ -946,7 +958,7 @@ hypre_MPI_Allgatherv( void *sendbuf, HYPRE_Int i; HYPRE_Int ierr; - MPI_Comm_size(comm, &csize); + MPI_Comm_size(hypre_MPI_CommMPI_Comm(comm), &csize); mpi_recvcounts = hypre_TAlloc(hypre_int, csize, HYPRE_MEMORY_HOST); mpi_displs = hypre_TAlloc(hypre_int, csize, HYPRE_MEMORY_HOST); for (i = 0; i < csize; i++) @@ -956,7 +968,7 @@ hypre_MPI_Allgatherv( void *sendbuf, } ierr = (HYPRE_Int) MPI_Allgatherv(sendbuf, (hypre_int)sendcount, sendtype, recvbuf, mpi_recvcounts, mpi_displs, - recvtype, comm); + recvtype, hypre_MPI_CommMPI_Comm(comm)); hypre_TFree(mpi_recvcounts, HYPRE_MEMORY_HOST); hypre_TFree(mpi_displs, HYPRE_MEMORY_HOST); @@ -975,7 +987,7 @@ hypre_MPI_Gather( void *sendbuf, { return (HYPRE_Int) MPI_Gather(sendbuf, (hypre_int) sendcount, sendtype, recvbuf, (hypre_int) recvcount, recvtype, - (hypre_int)root, comm); + (hypre_int)root, hypre_MPI_CommMPI_Comm(comm)); } HYPRE_Int @@ -995,8 +1007,8 @@ hypre_MPI_Gatherv(void *sendbuf, HYPRE_Int i; HYPRE_Int ierr; - MPI_Comm_size(comm, &csize); - MPI_Comm_rank(comm, &croot); + MPI_Comm_size(hypre_MPI_CommMPI_Comm(comm), &csize); + MPI_Comm_rank(hypre_MPI_CommMPI_Comm(comm), &croot); if (croot == (hypre_int) root) { mpi_recvcounts = hypre_TAlloc(hypre_int, csize, HYPRE_MEMORY_HOST); @@ -1009,7 +1021,7 @@ hypre_MPI_Gatherv(void *sendbuf, } ierr = (HYPRE_Int) MPI_Gatherv(sendbuf, (hypre_int)sendcount, sendtype, recvbuf, mpi_recvcounts, mpi_displs, - recvtype, (hypre_int) root, comm); + recvtype, (hypre_int) root, hypre_MPI_CommMPI_Comm(comm)); hypre_TFree(mpi_recvcounts, HYPRE_MEMORY_HOST); hypre_TFree(mpi_displs, HYPRE_MEMORY_HOST); @@ -1028,7 +1040,7 @@ hypre_MPI_Scatter( void *sendbuf, { return (HYPRE_Int) MPI_Scatter(sendbuf, (hypre_int)sendcount, sendtype, recvbuf, (hypre_int)recvcount, recvtype, - (hypre_int)root, comm); + (hypre_int)root, hypre_MPI_CommMPI_Comm(comm)); } HYPRE_Int @@ -1048,8 +1060,8 @@ hypre_MPI_Scatterv(void *sendbuf, HYPRE_Int i; HYPRE_Int ierr; - MPI_Comm_size(comm, &csize); - MPI_Comm_rank(comm, &croot); + MPI_Comm_size(hypre_MPI_CommMPI_Comm(comm), &csize); + MPI_Comm_rank(hypre_MPI_CommMPI_Comm(comm), &croot); if (croot == (hypre_int) root) { mpi_sendcounts = hypre_TAlloc(hypre_int, csize, HYPRE_MEMORY_HOST); @@ -1062,7 +1074,8 @@ hypre_MPI_Scatterv(void *sendbuf, } ierr = (HYPRE_Int) MPI_Scatterv(sendbuf, mpi_sendcounts, mpi_displs, sendtype, recvbuf, (hypre_int) recvcount, - recvtype, (hypre_int) root, comm); + recvtype, (hypre_int) root, + hypre_MPI_CommMPI_Comm(comm)); hypre_TFree(mpi_sendcounts, HYPRE_MEMORY_HOST); hypre_TFree(mpi_displs, HYPRE_MEMORY_HOST); @@ -1077,7 +1090,8 @@ hypre_MPI_Bcast( void *buffer, hypre_MPI_Comm comm ) { return (HYPRE_Int) MPI_Bcast(buffer, (hypre_int)count, datatype, - (hypre_int)root, comm); + (hypre_int)root, + hypre_MPI_CommMPI_Comm(comm)); } HYPRE_Int @@ -1089,7 +1103,8 @@ hypre_MPI_Send( void *buf, hypre_MPI_Comm comm ) { return (HYPRE_Int) MPI_Send(buf, (hypre_int)count, datatype, - (hypre_int)dest, (hypre_int)tag, comm); + (hypre_int)dest, (hypre_int)tag, + hypre_MPI_CommMPI_Comm(comm)); } HYPRE_Int @@ -1102,7 +1117,8 @@ hypre_MPI_Recv( void *buf, hypre_MPI_Status *status ) { return (HYPRE_Int) MPI_Recv(buf, (hypre_int)count, datatype, - (hypre_int)source, (hypre_int)tag, comm, status); + (hypre_int)source, (hypre_int)tag, + hypre_MPI_CommMPI_Comm(comm), status); } HYPRE_Int @@ -1115,7 +1131,8 @@ hypre_MPI_Isend( void *buf, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Isend(buf, (hypre_int)count, datatype, - (hypre_int)dest, (hypre_int)tag, comm, request); + (hypre_int)dest, (hypre_int)tag, + hypre_MPI_CommMPI_Comm(comm), request); } HYPRE_Int @@ -1128,7 +1145,8 @@ hypre_MPI_Irecv( void *buf, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Irecv(buf, (hypre_int)count, datatype, - (hypre_int)source, (hypre_int)tag, comm, request); + (hypre_int)source, (hypre_int)tag, + hypre_MPI_CommMPI_Comm(comm), request); } HYPRE_Int @@ -1142,7 +1160,7 @@ hypre_MPI_Send_init( void *buf, { return (HYPRE_Int) MPI_Send_init(buf, (hypre_int)count, datatype, (hypre_int)dest, (hypre_int)tag, - comm, request); + hypre_MPI_CommMPI_Comm(comm), request); } HYPRE_Int @@ -1156,7 +1174,7 @@ hypre_MPI_Recv_init( void *buf, { return (HYPRE_Int) MPI_Recv_init(buf, (hypre_int)count, datatype, (hypre_int)dest, (hypre_int)tag, - comm, request); + hypre_MPI_CommMPI_Comm(comm), request); } HYPRE_Int @@ -1169,7 +1187,8 @@ hypre_MPI_Irsend( void *buf, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Irsend(buf, (hypre_int)count, datatype, - (hypre_int)dest, (hypre_int)tag, comm, request); + (hypre_int)dest, (hypre_int)tag, + hypre_MPI_CommMPI_Comm(comm), request); } HYPRE_Int @@ -1185,7 +1204,8 @@ hypre_MPI_Probe( HYPRE_Int source, hypre_MPI_Comm comm, hypre_MPI_Status *status ) { - return (HYPRE_Int) MPI_Probe((hypre_int)source, (hypre_int)tag, comm, status); + return (HYPRE_Int) MPI_Probe((hypre_int)source, (hypre_int)tag, + hypre_MPI_CommMPI_Comm(comm), status); } HYPRE_Int @@ -1197,7 +1217,8 @@ hypre_MPI_Iprobe( HYPRE_Int source, { hypre_int mpi_flag; HYPRE_Int ierr; - ierr = (HYPRE_Int) MPI_Iprobe((hypre_int)source, (hypre_int)tag, comm, + ierr = (HYPRE_Int) MPI_Iprobe((hypre_int)source, (hypre_int)tag, + hypre_MPI_CommMPI_Comm(comm), &mpi_flag, status); *flag = (HYPRE_Int) mpi_flag; return ierr; @@ -1270,7 +1291,7 @@ hypre_MPI_Allreduce( void *sendbuf, hypre_GpuProfilingPushRange("MPI_Allreduce"); HYPRE_Int result = MPI_Allreduce(sendbuf, recvbuf, (hypre_int)count, - datatype, op, comm); + datatype, op, hypre_MPI_CommMPI_Comm(comm)); hypre_GpuProfilingPopRange(); @@ -1287,7 +1308,8 @@ hypre_MPI_Reduce( void *sendbuf, hypre_MPI_Comm comm ) { return (HYPRE_Int) MPI_Reduce(sendbuf, recvbuf, (hypre_int)count, - datatype, op, (hypre_int)root, comm); + datatype, op, (hypre_int)root, + hypre_MPI_CommMPI_Comm(comm)); } HYPRE_Int @@ -1299,7 +1321,7 @@ hypre_MPI_Scan( void *sendbuf, hypre_MPI_Comm comm ) { return (HYPRE_Int) MPI_Scan(sendbuf, recvbuf, (hypre_int)count, - datatype, op, comm); + datatype, op, hypre_MPI_CommMPI_Comm(comm)); } HYPRE_Int diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index f3071536a2..db8465b924 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -193,7 +193,13 @@ typedef HYPRE_Int hypre_MPI_Info; * MPI stubs to do casting of HYPRE_Int and hypre_int correctly *****************************************************************************/ -typedef MPI_Comm hypre_MPI_Comm; +typedef struct +{ + MPI_Comm mpi_comm; +} hypre_MPI_Comm; + +#define hypre_MPI_CommMPI_Comm(comm) ((comm).mpi_comm) + typedef MPI_Group hypre_MPI_Group; typedef MPI_Request hypre_MPI_Request; typedef MPI_Datatype hypre_MPI_Datatype; @@ -251,20 +257,20 @@ typedef MPI_User_function hypre_MPI_User_function; /* mpistubs.c */ HYPRE_Int hypre_MPI_Init( hypre_int *argc, char ***argv ); HYPRE_Int hypre_MPI_Finalize( void ); -HYPRE_Int hypre_MPI_Abort( hypre_MPI_Comm comm, HYPRE_Int errorcode ); +HYPRE_Int hypre_MPI_Abort( MPI_Comm comm, HYPRE_Int errorcode ); HYPRE_Real hypre_MPI_Wtime( void ); HYPRE_Real hypre_MPI_Wtick( void ); -HYPRE_Int hypre_MPI_Barrier( hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Comm_create( hypre_MPI_Comm comm, hypre_MPI_Group group, - hypre_MPI_Comm *newcomm ); -HYPRE_Int hypre_MPI_Comm_dup( hypre_MPI_Comm comm, hypre_MPI_Comm *newcomm ); -hypre_MPI_Comm hypre_MPI_Comm_f2c( hypre_int comm ); -HYPRE_Int hypre_MPI_Comm_size( hypre_MPI_Comm comm, HYPRE_Int *size ); -HYPRE_Int hypre_MPI_Comm_rank( hypre_MPI_Comm comm, HYPRE_Int *rank ); -HYPRE_Int hypre_MPI_Comm_free( hypre_MPI_Comm *comm ); -HYPRE_Int hypre_MPI_Comm_group( hypre_MPI_Comm comm, hypre_MPI_Group *group ); -HYPRE_Int hypre_MPI_Comm_split( hypre_MPI_Comm comm, HYPRE_Int n, HYPRE_Int m, - hypre_MPI_Comm * comms ); +HYPRE_Int hypre_MPI_Barrier( MPI_Comm comm ); +HYPRE_Int hypre_MPI_Comm_create( MPI_Comm comm, hypre_MPI_Group group, + MPI_Comm *newcomm ); +HYPRE_Int hypre_MPI_Comm_dup( MPI_Comm comm, MPI_Comm *newcomm ); +MPI_Comm hypre_MPI_Comm_f2c( hypre_int comm ); +HYPRE_Int hypre_MPI_Comm_size( MPI_Comm comm, HYPRE_Int *size ); +HYPRE_Int hypre_MPI_Comm_rank( MPI_Comm comm, HYPRE_Int *rank ); +HYPRE_Int hypre_MPI_Comm_free( MPI_Comm *comm ); +HYPRE_Int hypre_MPI_Comm_group( MPI_Comm comm, hypre_MPI_Group *group ); +HYPRE_Int hypre_MPI_Comm_split( MPI_Comm comm, HYPRE_Int n, HYPRE_Int m, + MPI_Comm * comms ); HYPRE_Int hypre_MPI_Group_incl( hypre_MPI_Group group, HYPRE_Int n, HYPRE_Int *ranks, hypre_MPI_Group *newgroup ); HYPRE_Int hypre_MPI_Group_free( hypre_MPI_Group *group ); @@ -340,6 +346,7 @@ HYPRE_Int hypre_MPI_Type_free( hypre_MPI_Datatype *datatype ); HYPRE_Int hypre_MPI_Op_free( hypre_MPI_Op *op ); HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, hypre_MPI_Op *op ); +hypre_MPI_Comm hypre_MPI_CommFromMPI_Comm(MPI_Comm comm); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, hypre_MPI_Comm *newcomm); diff --git a/src/utilities/timing.c b/src/utilities/timing.c index af64058c0a..521796a9c6 100644 --- a/src/utilities/timing.c +++ b/src/utilities/timing.c @@ -345,7 +345,8 @@ hypre_PrintTiming( const char *heading, return ierr; } - hypre_MPI_Comm_rank(comm, &myrank ); + hypre_MPI_Comm_rank(comm, &myrank); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* print heading */ if (myrank == 0) @@ -362,9 +363,9 @@ hypre_PrintTiming( const char *heading, local_wall_time = hypre_TimingWallTime(i); local_cpu_time = hypre_TimingCPUTime(i); hypre_MPI_Allreduce(&local_wall_time, &wall_time, 1, - hypre_MPI_REAL, hypre_MPI_MAX, comm); + hypre_MPI_REAL, hypre_MPI_MAX, hcomm); hypre_MPI_Allreduce(&local_cpu_time, &cpu_time, 1, - hypre_MPI_REAL, hypre_MPI_MAX, comm); + hypre_MPI_REAL, hypre_MPI_MAX, hcomm); if (myrank == 0) { @@ -422,7 +423,8 @@ hypre_GetTiming( const char *heading, return ierr; } - hypre_MPI_Comm_rank(comm, &myrank ); + hypre_MPI_Comm_rank(comm, &myrank); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* print heading */ if (myrank == 0) @@ -438,7 +440,7 @@ hypre_GetTiming( const char *heading, { local_wall_time = hypre_TimingWallTime(i); hypre_MPI_Allreduce(&local_wall_time, &wall_time, 1, - hypre_MPI_REAL, hypre_MPI_MAX, comm); + hypre_MPI_REAL, hypre_MPI_MAX, hcomm); if (myrank == 0) { From 5eba421c416a6fb2d329301de8790f41d4888ec2 Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Sat, 18 Nov 2023 16:04:33 -0800 Subject: [PATCH 02/90] fix GPU build --- src/IJ_mv/IJMatrix_parcsr_device.c | 3 ++- src/IJ_mv/IJVector_parcsr_device.c | 3 ++- src/parcsr_ls/par_coarse_parms_device.c | 3 ++- src/parcsr_ls/par_coarsen_device.c | 3 ++- src/parcsr_ls/par_ilu_setup.c | 4 ++-- src/parcsr_ls/par_ilu_setup_device.c | 5 +++-- src/parcsr_ls/par_interp_device.c | 6 ++++-- src/parcsr_ls/par_lr_restr_device.c | 3 ++- src/parcsr_ls/par_mgr_device.c | 9 ++++++--- src/parcsr_ls/par_mod_multi_interp_device.c | 21 ++++++++++++--------- src/parcsr_ls/par_relax_more_device.c | 6 ++++-- src/parcsr_mv/par_csr_fffc_device.c | 10 ++++++---- src/utilities/_hypre_utilities.h | 4 ++-- src/utilities/device_utils.c | 2 +- src/utilities/mpistubs.c | 4 ++-- src/utilities/mpistubs.h | 4 ++-- 16 files changed, 54 insertions(+), 36 deletions(-) diff --git a/src/IJ_mv/IJMatrix_parcsr_device.c b/src/IJ_mv/IJMatrix_parcsr_device.c index 09c14a7e6c..f485b59835 100644 --- a/src/IJ_mv/IJMatrix_parcsr_device.c +++ b/src/IJ_mv/IJMatrix_parcsr_device.c @@ -573,6 +573,7 @@ HYPRE_Int hypre_IJMatrixAssembleParCSRDevice(hypre_IJMatrix *matrix) { MPI_Comm comm = hypre_IJMatrixComm(matrix); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_BigInt *row_partitioning = hypre_IJMatrixRowPartitioning(matrix); HYPRE_BigInt *col_partitioning = hypre_IJMatrixColPartitioning(matrix); HYPRE_BigInt row_start = row_partitioning[0]; @@ -609,7 +610,7 @@ hypre_IJMatrixAssembleParCSRDevice(hypre_IJMatrix *matrix) #endif HYPRE_Int nelms_off = nelms - nelms_on; HYPRE_Int nelms_off_max; - hypre_MPI_Allreduce(&nelms_off, &nelms_off_max, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm); + hypre_MPI_Allreduce(&nelms_off, &nelms_off_max, 1, HYPRE_MPI_INT, hypre_MPI_MAX, hcomm); /* communicate for aux off-proc and add to remote aux on-proc */ if (nelms_off_max) diff --git a/src/IJ_mv/IJVector_parcsr_device.c b/src/IJ_mv/IJVector_parcsr_device.c index b61ac633cb..c452bf5385 100644 --- a/src/IJ_mv/IJVector_parcsr_device.c +++ b/src/IJ_mv/IJVector_parcsr_device.c @@ -436,6 +436,7 @@ HYPRE_Int hypre_IJVectorAssembleParDevice(hypre_IJVector *vector) { MPI_Comm comm = hypre_IJVectorComm(vector); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_ParVector *par_vector = (hypre_ParVector*) hypre_IJVectorObject(vector); hypre_AuxParVector *aux_vector = (hypre_AuxParVector*) hypre_IJVectorTranslator(vector); HYPRE_BigInt *IJpartitioning = hypre_IJVectorPartitioning(vector); @@ -470,7 +471,7 @@ hypre_IJVectorAssembleParDevice(hypre_IJVector *vector) #endif HYPRE_Int nelms_off = nelms - nelms_on; HYPRE_Int nelms_off_max; - hypre_MPI_Allreduce(&nelms_off, &nelms_off_max, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm); + hypre_MPI_Allreduce(&nelms_off, &nelms_off_max, 1, HYPRE_MPI_INT, hypre_MPI_MAX, hcomm); /* communicate for aux off-proc and add to remote aux on-proc */ if (nelms_off_max) diff --git a/src/parcsr_ls/par_coarse_parms_device.c b/src/parcsr_ls/par_coarse_parms_device.c index b6e863870c..1b592336c5 100644 --- a/src/parcsr_ls/par_coarse_parms_device.c +++ b/src/parcsr_ls/par_coarse_parms_device.c @@ -104,8 +104,9 @@ hypre_BoomerAMGCoarseParmsDevice(MPI_Comm comm, } { + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_BigInt scan_recv; - hypre_MPI_Scan(&local_coarse_size, &scan_recv, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&local_coarse_size, &scan_recv, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); /* first point in my range */ coarse_pnts_global[0] = scan_recv - local_coarse_size; diff --git a/src/parcsr_ls/par_coarsen_device.c b/src/parcsr_ls/par_coarsen_device.c index f9b008b114..4d77d2db79 100644 --- a/src/parcsr_ls/par_coarsen_device.c +++ b/src/parcsr_ls/par_coarsen_device.c @@ -58,6 +58,7 @@ hypre_BoomerAMGCoarsenPMISDevice( hypre_ParCSRMatrix *S, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (!comm_pkg) { @@ -137,7 +138,7 @@ hypre_BoomerAMGCoarsenPMISDevice( hypre_ParCSRMatrix *S, big_graph_size = graph_diag_size; /* stop the coarsening if nothing left to be coarsened */ - hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); /* if (my_id == 0) { hypre_printf("graph size %b\n", global_graph_size); } */ diff --git a/src/parcsr_ls/par_ilu_setup.c b/src/parcsr_ls/par_ilu_setup.c index 0e01a820b0..9e477f40b3 100644 --- a/src/parcsr_ls/par_ilu_setup.c +++ b/src/parcsr_ls/par_ilu_setup.c @@ -1158,7 +1158,7 @@ hypre_ILUSetup( void *ilu_vdata, { nnzBEF += hypre_CSRMatrixNumNonzeros(matF_d); } - hypre_MPI_Allreduce(&nnzBEF, &nnzG, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&nnzBEF, &nnzG, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); if (matS) { hypre_ParCSRMatrixSetDNumNonzeros(matS); @@ -1186,7 +1186,7 @@ hypre_ILUSetup( void *ilu_vdata, { nnzBEF += hypre_CSRMatrixNumNonzeros(matF_d); } - hypre_MPI_Allreduce(&nnzBEF, &nnzG, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&nnzBEF, &nnzG, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); if (matS) { hypre_ParCSRMatrixSetDNumNonzeros(matS); diff --git a/src/parcsr_ls/par_ilu_setup_device.c b/src/parcsr_ls/par_ilu_setup_device.c index 52c351a4b3..c012e49479 100644 --- a/src/parcsr_ls/par_ilu_setup_device.c +++ b/src/parcsr_ls/par_ilu_setup_device.c @@ -101,6 +101,7 @@ hypre_ILUSetupILUDevice(HYPRE_Int ilu_type, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Build the inverse permutation arrays */ if (perm_data && qperm_data) @@ -198,7 +199,7 @@ hypre_ILUSetupILUDevice(HYPRE_Int ilu_type, /* Compute total rows in Schur block */ HYPRE_BigInt big_m = (HYPRE_BigInt) m; - hypre_MPI_Allreduce(&big_m, &total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&big_m, &total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); /* only form when total_rows > 0 */ if (total_rows > 0) @@ -206,7 +207,7 @@ hypre_ILUSetupILUDevice(HYPRE_Int ilu_type, /* now create S - need to get new column start */ { HYPRE_BigInt global_start; - hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); col_starts[0] = global_start - m; col_starts[1] = global_start; } diff --git a/src/parcsr_ls/par_interp_device.c b/src/parcsr_ls/par_interp_device.c index 67528b6e47..13fd7a8b29 100644 --- a/src/parcsr_ls/par_interp_device.c +++ b/src/parcsr_ls/par_interp_device.c @@ -120,12 +120,13 @@ hypre_BoomerAMGBuildDirInterpDevice( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast( &total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast( &total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); if (!comm_pkg) { @@ -1092,10 +1093,11 @@ hypre_BoomerAMGBuildInterpOnePntDevice( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /* fine to coarse mapping */ fine_to_coarse = hypre_TAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_DEVICE); diff --git a/src/parcsr_ls/par_lr_restr_device.c b/src/parcsr_ls/par_lr_restr_device.c index 33aae87221..d90712bab6 100644 --- a/src/parcsr_ls/par_lr_restr_device.c +++ b/src/parcsr_ls/par_lr_restr_device.c @@ -61,13 +61,14 @@ hypre_BoomerAMGBuildRestrNeumannAIRDevice( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* global number of C points and my start position */ if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); /* get AFF and ACF */ hypre_ParCSRMatrix *AFF, *ACF, *Dinv, *N, *X, *X2, *Z, *Z2; diff --git a/src/parcsr_ls/par_mgr_device.c b/src/parcsr_ls/par_mgr_device.c index 7f0d7912ed..3e3aca02b6 100644 --- a/src/parcsr_ls/par_mgr_device.c +++ b/src/parcsr_ls/par_mgr_device.c @@ -140,6 +140,8 @@ hypre_MGRBuildPDevice(hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + hypre_GpuProfilingPushRange("MGRBuildP"); #if defined(HYPRE_USING_SYCL) @@ -241,7 +243,7 @@ hypre_MGRBuildPDevice(hypre_ParCSRMatrix *A, { nC_global = num_cpts_global[1]; } - hypre_MPI_Bcast(&nC_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&nC_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } W_nr_of_rows = hypre_CSRMatrixNumRows(W_diag); @@ -912,6 +914,7 @@ hypre_ParCSRMatrixBlockDiagMatrixDevice( hypre_ParCSRMatrix *A, hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*----------------------------------------------------------------- * Count the number of points matching point_type in CF_marker @@ -945,7 +948,7 @@ hypre_ParCSRMatrixBlockDiagMatrixDevice( hypre_ParCSRMatrix *A, if (CF_marker) { num_rows_big = (HYPRE_BigInt) B_diag_num_rows; - hypre_MPI_Scan(&num_rows_big, &scan_recv, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&num_rows_big, &scan_recv, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); /* first point in my range */ row_starts_B[0] = scan_recv - num_rows_big; @@ -956,7 +959,7 @@ hypre_ParCSRMatrixBlockDiagMatrixDevice( hypre_ParCSRMatrix *A, { num_rows_B = row_starts_B[1]; } - hypre_MPI_Bcast(&num_rows_B, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&num_rows_B, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } else { diff --git a/src/parcsr_ls/par_mod_multi_interp_device.c b/src/parcsr_ls/par_mod_multi_interp_device.c index cda32dd3f5..5eead8d5c0 100644 --- a/src/parcsr_ls/par_mod_multi_interp_device.c +++ b/src/parcsr_ls/par_mod_multi_interp_device.c @@ -255,6 +255,7 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (num_procs > 1) { @@ -262,7 +263,7 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } else { @@ -438,7 +439,7 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, num_passes = 1; /* color points according to pass number */ remaining_big = remaining; - hypre_MPI_Allreduce(&remaining_big, &global_remaining, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&remaining_big, &global_remaining, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); hypre_GpuProfilingPopRange(); @@ -588,7 +589,7 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, HYPRE_BigInt old_global_remaining = global_remaining; remaining_big = remaining; - hypre_MPI_Allreduce(&remaining_big, &global_remaining, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&remaining_big, &global_remaining, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); /* if the number of remaining points does not change, we have a situation of isolated areas of * fine points that are not connected to any C-points, and the pass generation process breaks @@ -1042,6 +1043,7 @@ hypre_GenerateMultipassPiDevice( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); fine_to_coarse = hypre_TAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_DEVICE); @@ -1051,7 +1053,7 @@ hypre_GenerateMultipassPiDevice( hypre_ParCSRMatrix *A, { HYPRE_BigInt big_Fpts = num_points; - hypre_MPI_Scan(&big_Fpts, f_pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_Fpts, f_pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); f_pts_starts[0] = f_pts_starts[1] - big_Fpts; @@ -1060,8 +1062,8 @@ hypre_GenerateMultipassPiDevice( hypre_ParCSRMatrix *A, total_global_fpts = f_pts_starts[1]; total_global_cpts = c_pts_starts[1]; } - hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } else { @@ -1338,6 +1340,7 @@ hypre_GenerateMultiPiDevice( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); fine_to_coarse = hypre_TAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_DEVICE); @@ -1347,7 +1350,7 @@ hypre_GenerateMultiPiDevice( hypre_ParCSRMatrix *A, { HYPRE_BigInt big_Fpts = num_points; - hypre_MPI_Scan(&big_Fpts, f_pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_Fpts, f_pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); f_pts_starts[0] = f_pts_starts[1] - big_Fpts; @@ -1356,8 +1359,8 @@ hypre_GenerateMultiPiDevice( hypre_ParCSRMatrix *A, total_global_fpts = f_pts_starts[1]; total_global_cpts = c_pts_starts[1]; } - hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } else { diff --git a/src/parcsr_ls/par_relax_more_device.c b/src/parcsr_ls/par_relax_more_device.c index 5c21011ae2..22e785b95a 100644 --- a/src/parcsr_ls/par_relax_more_device.c +++ b/src/parcsr_ls/par_relax_more_device.c @@ -181,8 +181,10 @@ hypre_ParCSRMaxEigEstimateDevice( hypre_ParCSRMatrix *A, send_buf[0] = -e_min; send_buf[1] = e_max; - hypre_MPI_Allreduce(send_buf, recv_buf, 2, HYPRE_MPI_REAL, hypre_MPI_MAX, - hypre_ParCSRMatrixComm(A)); + MPI_Comm comm = hypre_ParCSRMatrixComm(A); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + + hypre_MPI_Allreduce(send_buf, recv_buf, 2, HYPRE_MPI_REAL, hypre_MPI_MAX, hcomm); /* return */ if ( hypre_abs(e_min) > hypre_abs(e_max) ) diff --git a/src/parcsr_mv/par_csr_fffc_device.c b/src/parcsr_mv/par_csr_fffc_device.c index dbd7dd133d..32e810207a 100644 --- a/src/parcsr_mv/par_csr_fffc_device.c +++ b/src/parcsr_mv/par_csr_fffc_device.c @@ -283,6 +283,7 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); n_local = hypre_ParCSRMatrixNumRows(A); row_starts = hypre_ParCSRMatrixRowStarts(A); @@ -291,7 +292,7 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A, { nC_global = cpts_starts[1]; } - hypre_MPI_Bcast(&nC_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&nC_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); nC_local = (HYPRE_Int) (cpts_starts[1] - cpts_starts[0]); fpts_starts[0] = row_starts[0] - cpts_starts[0]; fpts_starts[1] = row_starts[1] - cpts_starts[1]; @@ -320,13 +321,13 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A, HYPRE_BigInt nF2_local_big = nF2_local; - hypre_MPI_Scan(&nF2_local_big, f2pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&nF2_local_big, f2pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); f2pts_starts[0] = f2pts_starts[1] - nF2_local_big; if (my_id == (num_procs - 1)) { nF2_global = f2pts_starts[1]; } - hypre_MPI_Bcast(&nF2_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&nF2_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); } /* map from all points (i.e, F+C) to F/C indices */ @@ -1497,6 +1498,7 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); n_local = hypre_ParCSRMatrixNumRows(A); row_starts = hypre_ParCSRMatrixRowStarts(A); @@ -1513,7 +1515,7 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A, { nC_global = cpts_starts[1]; } - hypre_MPI_Bcast(&nC_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&nC_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); nC_local = (HYPRE_Int) (cpts_starts[1] - cpts_starts[0]); fpts_starts[0] = row_starts[0] - cpts_starts[0]; fpts_starts[1] = row_starts[1] - cpts_starts[1]; diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 9bdb4cdbf7..a3b338dc4a 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -884,8 +884,8 @@ HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int comm hypre_MPI_Op *op ); hypre_MPI_Comm hypre_MPI_CommFromMPI_Comm(MPI_Comm comm); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) -HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, - hypre_MPI_Info info, hypre_MPI_Comm *newcomm); +HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, + hypre_MPI_Info info, MPI_Comm *newcomm); HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); #endif diff --git a/src/utilities/device_utils.c b/src/utilities/device_utils.c index 4d000c0595..5a407a075a 100644 --- a/src/utilities/device_utils.c +++ b/src/utilities/device_utils.c @@ -2971,7 +2971,7 @@ hypre_bind_device( HYPRE_Int device_id_in, /* device id that want to bind */ hypre_int device_id; - hypre_MPI_Comm node_comm; + MPI_Comm node_comm; hypre_MPI_Comm_split_type( comm, hypre_MPI_COMM_TYPE_SHARED, myid, hypre_MPI_INFO_NULL, &node_comm ); hypre_MPI_Comm_rank(node_comm, &myNodeid); diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index b22116d9a3..4e77b268c1 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -1423,8 +1423,8 @@ hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, hypre #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int -hypre_MPI_Comm_split_type( hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, - hypre_MPI_Info info, hypre_MPI_Comm *newcomm ) +hypre_MPI_Comm_split_type( MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, + hypre_MPI_Info info, MPI_Comm *newcomm ) { return (HYPRE_Int) MPI_Comm_split_type(comm, split_type, key, info, newcomm ); } diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index db8465b924..8072071713 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -348,8 +348,8 @@ HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int comm hypre_MPI_Op *op ); hypre_MPI_Comm hypre_MPI_CommFromMPI_Comm(MPI_Comm comm); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) -HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, - hypre_MPI_Info info, hypre_MPI_Comm *newcomm); +HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, + hypre_MPI_Info info, MPI_Comm *newcomm); HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); #endif From 6483d531c24635fad4cd23fca595632c396f27d1 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Sat, 18 Nov 2023 19:08:19 -0800 Subject: [PATCH 03/90] fix consistent mpi --- src/parcsr_mv/par_csr_communication.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 75db057537..54c9bb0409 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -59,6 +59,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_Int num_requests = num_sends + num_recvs; hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); @@ -83,7 +84,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Recv_init( (HYPRE_Complex *)recv_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, requests + i ); + ip, 0, hcomm, requests + i ); } for (i = 0; i < num_sends; ++i) { @@ -91,7 +92,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Send_init( (HYPRE_Complex *)send_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, requests + num_recvs + i ); + ip, 0, hcomm, requests + num_recvs + i ); } break; @@ -108,7 +109,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Recv_init( (HYPRE_Complex *)recv_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, requests + i ); + ip, 0, hcomm, requests + i ); } for (i = 0; i < num_recvs; ++i) { @@ -116,7 +117,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Send_init( (HYPRE_Complex *)send_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, requests + num_sends + i ); + ip, 0, hcomm, requests + num_sends + i ); } break; @@ -133,7 +134,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Recv_init( (HYPRE_Int *)recv_buff + vec_start, vec_len, HYPRE_MPI_INT, - ip, 0, comm, requests + i ); + ip, 0, hcomm, requests + i ); } for (i = 0; i < num_sends; ++i) { @@ -141,7 +142,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Send_init( (HYPRE_Int *)send_buff + vec_start, vec_len, HYPRE_MPI_INT, - ip, 0, comm, requests + num_recvs + i ); + ip, 0, hcomm, requests + num_recvs + i ); } break; @@ -158,7 +159,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Recv_init( (HYPRE_Int *)recv_buff + vec_start, vec_len, HYPRE_MPI_INT, - ip, 0, comm, requests + i ); + ip, 0, hcomm, requests + i ); } for (i = 0; i < num_recvs; ++i) { @@ -166,7 +167,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Send_init( (HYPRE_Int *)send_buff + vec_start, vec_len, HYPRE_MPI_INT, - ip, 0, comm, requests + num_sends + i ); + ip, 0, hcomm, requests + num_sends + i ); } break; @@ -184,7 +185,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Recv_init( (HYPRE_BigInt *)recv_buff + (HYPRE_BigInt)vec_start, vec_len, HYPRE_MPI_BIG_INT, - ip, 0, comm, requests + i ); + ip, 0, hcomm, requests + i ); } for (i = 0; i < num_sends; ++i) { @@ -193,7 +194,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Send_init( (HYPRE_BigInt *)send_buff + (HYPRE_BigInt)vec_start, vec_len, HYPRE_MPI_BIG_INT, - ip, 0, comm, requests + num_recvs + i); + ip, 0, hcomm, requests + num_recvs + i); } break; @@ -211,7 +212,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Recv_init( (HYPRE_BigInt *)recv_buff + (HYPRE_BigInt)vec_start, vec_len, HYPRE_MPI_BIG_INT, - ip, 0, comm, requests + i ); + ip, 0, hcomm, requests + i ); } for (i = 0; i < num_recvs; ++i) { @@ -221,7 +222,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm hypre_MPI_Send_init( (HYPRE_BigInt *)send_buff + (HYPRE_BigInt)vec_start, vec_len, HYPRE_MPI_BIG_INT, - ip, 0, comm, requests + num_sends + i); + ip, 0, hcomm, requests + num_sends + i); } break; default: From c2546c121e6fcfa2391c9603fa72b5ed9cca0cdc Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Wed, 22 Nov 2023 08:50:50 -0800 Subject: [PATCH 04/90] change orders of headers --- src/utilities/headers | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities/headers b/src/utilities/headers index f8ecf7d7c3..1b071ebe32 100755 --- a/src/utilities/headers +++ b/src/utilities/headers @@ -40,10 +40,10 @@ cat magma.h >> $INTERNAL_HEADER cat matrix_stats.h >> $INTERNAL_HEADER cat printf.h >> $INTERNAL_HEADER cat error.h >> $INTERNAL_HEADER -cat mpistubs.h >> $INTERNAL_HEADER cat smp.h >> $INTERNAL_HEADER cat memory.h >> $INTERNAL_HEADER cat memory_tracker.h >> $INTERNAL_HEADER +cat mpistubs.h >> $INTERNAL_HEADER cat omp_device.h >> $INTERNAL_HEADER cat threading.h >> $INTERNAL_HEADER cat timing.h >> $INTERNAL_HEADER From a791d175711a89834459b72d3be35efdbf04f706 Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Wed, 22 Nov 2023 08:52:38 -0800 Subject: [PATCH 05/90] add new mpi interface --- src/utilities/_hypre_utilities.h | 1115 +++++++++++++++--------------- src/utilities/mpistubs.c | 50 ++ src/utilities/mpistubs.h | 13 +- 3 files changed, 621 insertions(+), 557 deletions(-) diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index a3b338dc4a..26767ccd4c 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -534,6 +534,19 @@ void hypre_error_handler(const char *filename, HYPRE_Int line, HYPRE_Int ierr, c #endif /* hypre_ERROR_HEADER */ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#ifndef HYPRE_SMP_HEADER +#define HYPRE_SMP_HEADER +#endif + +#define HYPRE_SMP_SCHEDULE schedule(static) + /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -543,595 +556,221 @@ void hypre_error_handler(const char *filename, HYPRE_Int line, HYPRE_Int ierr, c /****************************************************************************** * - * Fake mpi stubs to generate serial codes without mpi + * Header file for memory management utilities + * + * The abstract memory model has a Host (think CPU) and a Device (think GPU) and + * three basic types of memory management utilities: + * + * 1. Malloc(..., location) + * location=LOCATION_DEVICE - malloc memory on the device + * location=LOCATION_HOST - malloc memory on the host + * 2. MemCopy(..., method) + * method=HOST_TO_DEVICE - copy from host to device + * method=DEVICE_TO_HOST - copy from device to host + * method=DEVICE_TO_DEVICE - copy from device to device + * 3. SetExecutionMode + * location=LOCATION_DEVICE - execute on the device + * location=LOCATION_HOST - execute on the host + * + * Although the abstract model does not explicitly reflect a managed memory + * model (i.e., unified memory), it can support it. Here is a summary of how + * the abstract model would be mapped to specific hardware scenarios: + * + * Not using a device, not using managed memory + * Malloc(..., location) + * location=LOCATION_DEVICE - host malloc e.g., malloc + * location=LOCATION_HOST - host malloc e.g., malloc + * MemoryCopy(..., locTo,locFrom) + * locTo=LOCATION_HOST, locFrom=LOCATION_DEVICE - copy from host to host e.g., memcpy + * locTo=LOCATION_DEVICE, locFrom=LOCATION_HOST - copy from host to host e.g., memcpy + * locTo=LOCATION_DEVICE, locFrom=LOCATION_DEVICE - copy from host to host e.g., memcpy + * SetExecutionMode + * location=LOCATION_DEVICE - execute on the host + * location=LOCATION_HOST - execute on the host + * + * Using a device, not using managed memory + * Malloc(..., location) + * location=LOCATION_DEVICE - device malloc e.g., cudaMalloc + * location=LOCATION_HOST - host malloc e.g., malloc + * MemoryCopy(..., locTo,locFrom) + * locTo=LOCATION_HOST, locFrom=LOCATION_DEVICE - copy from device to host e.g., cudaMemcpy + * locTo=LOCATION_DEVICE, locFrom=LOCATION_HOST - copy from host to device e.g., cudaMemcpy + * locTo=LOCATION_DEVICE, locFrom=LOCATION_DEVICE - copy from device to device e.g., cudaMemcpy + * SetExecutionMode + * location=LOCATION_DEVICE - execute on the device + * location=LOCATION_HOST - execute on the host + * + * Using a device, using managed memory + * Malloc(..., location) + * location=LOCATION_DEVICE - managed malloc e.g., cudaMallocManaged + * location=LOCATION_HOST - host malloc e.g., malloc + * MemoryCopy(..., locTo,locFrom) + * locTo=LOCATION_HOST, locFrom=LOCATION_DEVICE - copy from device to host e.g., cudaMallocManaged + * locTo=LOCATION_DEVICE, locFrom=LOCATION_HOST - copy from host to device e.g., cudaMallocManaged + * locTo=LOCATION_DEVICE, locFrom=LOCATION_DEVICE - copy from device to device e.g., cudaMallocManaged + * SetExecutionMode + * location=LOCATION_DEVICE - execute on the device + * location=LOCATION_HOST - execute on the host * *****************************************************************************/ -#ifndef hypre_MPISTUBS -#define hypre_MPISTUBS +#ifndef hypre_MEMORY_HEADER +#define hypre_MEMORY_HEADER + +#include +#include + +#if defined(HYPRE_USING_UNIFIED_MEMORY) && defined(HYPRE_USING_DEVICE_OPENMP) +//#pragma omp requires unified_shared_memory +#endif + +#if defined(HYPRE_USING_UMPIRE) +#include "umpire/config.hpp" +#if UMPIRE_VERSION_MAJOR >= 2022 +#include "umpire/interface/c_fortran/umpire.h" +#define hypre_umpire_resourcemanager_make_allocator_pool umpire_resourcemanager_make_allocator_quick_pool +#else +#include "umpire/interface/umpire.h" +#define hypre_umpire_resourcemanager_make_allocator_pool umpire_resourcemanager_make_allocator_pool +#endif +#define HYPRE_UMPIRE_POOL_NAME_MAX_LEN 1024 +#endif + +/* stringification: + * _Pragma(string-literal), so we need to cast argument to a string + * The three dots as last argument of the macro tells compiler that this is a variadic macro. + * I.e. this is a macro that receives variable number of arguments. + */ +#define HYPRE_STR(...) #__VA_ARGS__ +#define HYPRE_XSTR(...) HYPRE_STR(__VA_ARGS__) #ifdef __cplusplus extern "C" { #endif -#ifdef HYPRE_SEQUENTIAL +typedef enum _hypre_MemoryLocation +{ + hypre_MEMORY_UNDEFINED = -1, + hypre_MEMORY_HOST, + hypre_MEMORY_HOST_PINNED, + hypre_MEMORY_DEVICE, + hypre_MEMORY_UNIFIED, + hypre_NUM_MEMORY_LOCATION +} hypre_MemoryLocation; -/****************************************************************************** - * MPI stubs to generate serial codes without mpi - *****************************************************************************/ +/*------------------------------------------------------- + * hypre_GetActualMemLocation + * return actual location based on the selected memory model + *-------------------------------------------------------*/ +static inline hypre_MemoryLocation +hypre_GetActualMemLocation(HYPRE_MemoryLocation location) +{ + if (location == HYPRE_MEMORY_HOST) + { + return hypre_MEMORY_HOST; + } -/*-------------------------------------------------------------------------- - * Change all MPI names to hypre_MPI names to avoid link conflicts. - * - * NOTE: MPI_Comm is the only MPI symbol in the HYPRE user interface, - * and is defined in `HYPRE_utilities.h'. - *--------------------------------------------------------------------------*/ + if (location == HYPRE_MEMORY_DEVICE) + { +#if defined(HYPRE_USING_HOST_MEMORY) + return hypre_MEMORY_HOST; +#elif defined(HYPRE_USING_DEVICE_MEMORY) + return hypre_MEMORY_DEVICE; +#elif defined(HYPRE_USING_UNIFIED_MEMORY) + return hypre_MEMORY_UNIFIED; +#else +#error Wrong HYPRE memory setting. +#endif + } -#define MPI_Comm hypre_MPI_Comm -#define MPI_Group hypre_MPI_Group -#define MPI_Request hypre_MPI_Request -#define MPI_Datatype hypre_MPI_Datatype -#define MPI_Status hypre_MPI_Status -#define MPI_Op hypre_MPI_Op -#define MPI_Aint hypre_MPI_Aint -#define MPI_Info hypre_MPI_Info + return hypre_MEMORY_UNDEFINED; +} -#define MPI_COMM_WORLD hypre_MPI_COMM_WORLD -#define MPI_COMM_NULL hypre_MPI_COMM_NULL -#define MPI_COMM_SELF hypre_MPI_COMM_SELF -#define MPI_COMM_TYPE_SHARED hypre_MPI_COMM_TYPE_SHARED -#define MPI_BOTTOM hypre_MPI_BOTTOM +#if !defined(HYPRE_USING_MEMORY_TRACKER) -#define MPI_FLOAT hypre_MPI_FLOAT -#define MPI_DOUBLE hypre_MPI_DOUBLE -#define MPI_LONG_DOUBLE hypre_MPI_LONG_DOUBLE -#define MPI_INT hypre_MPI_INT -#define MPI_LONG_LONG_INT hypre_MPI_LONG_LONG_INT -#define MPI_CHAR hypre_MPI_CHAR -#define MPI_LONG hypre_MPI_LONG -#define MPI_BYTE hypre_MPI_BYTE +#define hypre_TAlloc(type, count, location) \ +( (type *) hypre_MAlloc((size_t)(sizeof(type) * (count)), location) ) -#define MPI_C_FLOAT_COMPLEX hypre_MPI_COMPLEX -#define MPI_C_LONG_DOUBLE_COMPLEX hypre_MPI_COMPLEX -#define MPI_C_DOUBLE_COMPLEX hypre_MPI_COMPLEX +#define _hypre_TAlloc(type, count, location) \ +( (type *) _hypre_MAlloc((size_t)(sizeof(type) * (count)), location) ) -#define MPI_SUM hypre_MPI_SUM -#define MPI_MIN hypre_MPI_MIN -#define MPI_MAX hypre_MPI_MAX -#define MPI_LOR hypre_MPI_LOR -#define MPI_LAND hypre_MPI_LAND -#define MPI_SUCCESS hypre_MPI_SUCCESS -#define MPI_STATUSES_IGNORE hypre_MPI_STATUSES_IGNORE +#define hypre_CTAlloc(type, count, location) \ +( (type *) hypre_CAlloc((size_t)(count), (size_t)sizeof(type), location) ) -#define MPI_UNDEFINED hypre_MPI_UNDEFINED -#define MPI_REQUEST_NULL hypre_MPI_REQUEST_NULL -#define MPI_INFO_NULL hypre_MPI_INFO_NULL -#define MPI_ANY_SOURCE hypre_MPI_ANY_SOURCE -#define MPI_ANY_TAG hypre_MPI_ANY_TAG -#define MPI_SOURCE hypre_MPI_SOURCE -#define MPI_TAG hypre_MPI_TAG +#define hypre_TReAlloc(ptr, type, count, location) \ +( (type *) hypre_ReAlloc((char *)ptr, (size_t)(sizeof(type) * (count)), location) ) -#define MPI_Init hypre_MPI_Init -#define MPI_Finalize hypre_MPI_Finalize -#define MPI_Abort hypre_MPI_Abort -#define MPI_Wtime hypre_MPI_Wtime -#define MPI_Wtick hypre_MPI_Wtick -#define MPI_Barrier hypre_MPI_Barrier -#define MPI_Comm_create hypre_MPI_Comm_create -#define MPI_Comm_dup hypre_MPI_Comm_dup -#define MPI_Comm_f2c hypre_MPI_Comm_f2c -#define MPI_Comm_group hypre_MPI_Comm_group -#define MPI_Comm_size hypre_MPI_Comm_size -#define MPI_Comm_rank hypre_MPI_Comm_rank -#define MPI_Comm_free hypre_MPI_Comm_free -#define MPI_Comm_split hypre_MPI_Comm_split -#define MPI_Comm_split_type hypre_MPI_Comm_split_type -#define MPI_Group_incl hypre_MPI_Group_incl -#define MPI_Group_free hypre_MPI_Group_free -#define MPI_Address hypre_MPI_Address -#define MPI_Get_count hypre_MPI_Get_count -#define MPI_Alltoall hypre_MPI_Alltoall -#define MPI_Allgather hypre_MPI_Allgather -#define MPI_Allgatherv hypre_MPI_Allgatherv -#define MPI_Gather hypre_MPI_Gather -#define MPI_Gatherv hypre_MPI_Gatherv -#define MPI_Scatter hypre_MPI_Scatter -#define MPI_Scatterv hypre_MPI_Scatterv -#define MPI_Bcast hypre_MPI_Bcast -#define MPI_Send hypre_MPI_Send -#define MPI_Recv hypre_MPI_Recv -#define MPI_Isend hypre_MPI_Isend -#define MPI_Irecv hypre_MPI_Irecv -#define MPI_Send_init hypre_MPI_Send_init -#define MPI_Recv_init hypre_MPI_Recv_init -#define MPI_Irsend hypre_MPI_Irsend -#define MPI_Startall hypre_MPI_Startall -#define MPI_Probe hypre_MPI_Probe -#define MPI_Iprobe hypre_MPI_Iprobe -#define MPI_Test hypre_MPI_Test -#define MPI_Testall hypre_MPI_Testall -#define MPI_Wait hypre_MPI_Wait -#define MPI_Waitall hypre_MPI_Waitall -#define MPI_Waitany hypre_MPI_Waitany -#define MPI_Allreduce hypre_MPI_Allreduce -#define MPI_Reduce hypre_MPI_Reduce -#define MPI_Scan hypre_MPI_Scan -#define MPI_Request_free hypre_MPI_Request_free -#define MPI_Type_contiguous hypre_MPI_Type_contiguous -#define MPI_Type_vector hypre_MPI_Type_vector -#define MPI_Type_hvector hypre_MPI_Type_hvector -#define MPI_Type_struct hypre_MPI_Type_struct -#define MPI_Type_commit hypre_MPI_Type_commit -#define MPI_Type_free hypre_MPI_Type_free -#define MPI_Op_free hypre_MPI_Op_free -#define MPI_Op_create hypre_MPI_Op_create -#define MPI_User_function hypre_MPI_User_function -#define MPI_Info_create hypre_MPI_Info_create +#define hypre_TReAlloc_v2(ptr, old_type, old_count, new_type, new_count, location) \ +( (new_type *) hypre_ReAlloc_v2((char *)ptr, (size_t)(sizeof(old_type)*(old_count)), (size_t)(sizeof(new_type)*(new_count)), location) ) -/*-------------------------------------------------------------------------- - * Types, etc. - *--------------------------------------------------------------------------*/ +#define hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ +(hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc)) -/* These types have associated creation and destruction routines */ -typedef HYPRE_Int hypre_MPI_Comm; -typedef HYPRE_Int hypre_MPI_Group; -typedef HYPRE_Int hypre_MPI_Request; -typedef HYPRE_Int hypre_MPI_Datatype; -typedef void (hypre_MPI_User_function) (void); - -typedef struct -{ - HYPRE_Int hypre_MPI_SOURCE; - HYPRE_Int hypre_MPI_TAG; -} hypre_MPI_Status; - -typedef HYPRE_Int hypre_MPI_Op; -typedef HYPRE_Int hypre_MPI_Aint; -typedef HYPRE_Int hypre_MPI_Info; - -#define hypre_MPI_COMM_SELF 1 -#define hypre_MPI_COMM_WORLD 0 -#define hypre_MPI_COMM_NULL -1 - -#define hypre_MPI_COMM_TYPE_SHARED 0 - -#define hypre_MPI_BOTTOM 0x0 - -#define hypre_MPI_FLOAT 0 -#define hypre_MPI_DOUBLE 1 -#define hypre_MPI_LONG_DOUBLE 2 -#define hypre_MPI_INT 3 -#define hypre_MPI_CHAR 4 -#define hypre_MPI_LONG 5 -#define hypre_MPI_BYTE 6 -#define hypre_MPI_REAL 7 -#define hypre_MPI_COMPLEX 8 -#define hypre_MPI_LONG_LONG_INT 9 - -#define hypre_MPI_SUM 0 -#define hypre_MPI_MIN 1 -#define hypre_MPI_MAX 2 -#define hypre_MPI_LOR 3 -#define hypre_MPI_LAND 4 -#define hypre_MPI_SUCCESS 0 -#define hypre_MPI_STATUSES_IGNORE 0 - -#define hypre_MPI_UNDEFINED -9999 -#define hypre_MPI_REQUEST_NULL 0 -#define hypre_MPI_INFO_NULL 0 -#define hypre_MPI_ANY_SOURCE 1 -#define hypre_MPI_ANY_TAG 1 - -#else - -/****************************************************************************** - * MPI stubs to do casting of HYPRE_Int and hypre_int correctly - *****************************************************************************/ - -typedef struct -{ - MPI_Comm mpi_comm; -} hypre_MPI_Comm; +#define hypre_TFree(ptr, location) \ +( hypre_Free((void *)ptr, location), ptr = NULL ) -#define hypre_MPI_CommMPI_Comm(comm) ((comm).mpi_comm) +#define _hypre_TFree(ptr, location) \ +( _hypre_Free((void *)ptr, location), ptr = NULL ) -typedef MPI_Group hypre_MPI_Group; -typedef MPI_Request hypre_MPI_Request; -typedef MPI_Datatype hypre_MPI_Datatype; -typedef MPI_Status hypre_MPI_Status; -typedef MPI_Op hypre_MPI_Op; -typedef MPI_Aint hypre_MPI_Aint; -typedef MPI_Info hypre_MPI_Info; -typedef MPI_User_function hypre_MPI_User_function; +#endif /* #if !defined(HYPRE_USING_MEMORY_TRACKER) */ -#define hypre_MPI_COMM_WORLD MPI_COMM_WORLD -#define hypre_MPI_COMM_NULL MPI_COMM_NULL -#define hypre_MPI_BOTTOM MPI_BOTTOM -#define hypre_MPI_COMM_SELF MPI_COMM_SELF -#define hypre_MPI_COMM_TYPE_SHARED MPI_COMM_TYPE_SHARED -#define hypre_MPI_FLOAT MPI_FLOAT -#define hypre_MPI_DOUBLE MPI_DOUBLE -#define hypre_MPI_LONG_DOUBLE MPI_LONG_DOUBLE -/* HYPRE_MPI_INT is defined in HYPRE_utilities.h */ -#define hypre_MPI_INT HYPRE_MPI_INT -#define hypre_MPI_CHAR MPI_CHAR -#define hypre_MPI_LONG MPI_LONG -#define hypre_MPI_BYTE MPI_BYTE -/* HYPRE_MPI_REAL is defined in HYPRE_utilities.h */ -#define hypre_MPI_REAL HYPRE_MPI_REAL -/* HYPRE_MPI_COMPLEX is defined in HYPRE_utilities.h */ -#define hypre_MPI_COMPLEX HYPRE_MPI_COMPLEX +/*-------------------------------------------------------------------------- + * Prototypes + *--------------------------------------------------------------------------*/ -#define hypre_MPI_SUM MPI_SUM -#define hypre_MPI_MIN MPI_MIN -#define hypre_MPI_MAX MPI_MAX -#define hypre_MPI_LOR MPI_LOR -#define hypre_MPI_SUCCESS MPI_SUCCESS -#define hypre_MPI_STATUSES_IGNORE MPI_STATUSES_IGNORE +/* memory.c */ +HYPRE_Int hypre_GetMemoryLocationName(hypre_MemoryLocation memory_location, + char *memory_location_name); +void hypre_CheckMemoryLocation(void *ptr, hypre_MemoryLocation location); +void * hypre_Memset(void *ptr, HYPRE_Int value, size_t num, HYPRE_MemoryLocation location); +void hypre_MemPrefetch(void *ptr, size_t size, HYPRE_MemoryLocation location); +void * hypre_MAlloc(size_t size, HYPRE_MemoryLocation location); +void * hypre_CAlloc( size_t count, size_t elt_size, HYPRE_MemoryLocation location); +void hypre_Free(void *ptr, HYPRE_MemoryLocation location); +void hypre_Memcpy(void *dst, void *src, size_t size, HYPRE_MemoryLocation loc_dst, + HYPRE_MemoryLocation loc_src); +void * hypre_ReAlloc(void *ptr, size_t size, HYPRE_MemoryLocation location); +void * hypre_ReAlloc_v2(void *ptr, size_t old_size, size_t new_size, HYPRE_MemoryLocation location); -#define hypre_MPI_UNDEFINED MPI_UNDEFINED -#define hypre_MPI_REQUEST_NULL MPI_REQUEST_NULL -#define hypre_MPI_INFO_NULL MPI_INFO_NULL -#define hypre_MPI_ANY_SOURCE MPI_ANY_SOURCE -#define hypre_MPI_ANY_TAG MPI_ANY_TAG -#define hypre_MPI_SOURCE MPI_SOURCE -#define hypre_MPI_TAG MPI_TAG -#define hypre_MPI_LAND MPI_LAND +void * _hypre_MAlloc(size_t size, hypre_MemoryLocation location); +void _hypre_Free(void *ptr, hypre_MemoryLocation location); -#endif +HYPRE_ExecutionPolicy hypre_GetExecPolicy1(HYPRE_MemoryLocation location); +HYPRE_ExecutionPolicy hypre_GetExecPolicy2(HYPRE_MemoryLocation location1, + HYPRE_MemoryLocation location2); -/****************************************************************************** - * Everything below this applies to both ifdef cases above - *****************************************************************************/ +HYPRE_Int hypre_GetPointerLocation(const void *ptr, hypre_MemoryLocation *memory_location); +HYPRE_Int hypre_SetCubMemPoolSize( hypre_uint bin_growth, hypre_uint min_bin, hypre_uint max_bin, + size_t max_cached_bytes ); +HYPRE_Int hypre_umpire_host_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_host_pooled_free(void *ptr); +void *hypre_umpire_host_pooled_realloc(void *ptr, size_t size); +HYPRE_Int hypre_umpire_device_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_device_pooled_free(void *ptr); +HYPRE_Int hypre_umpire_um_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_um_pooled_free(void *ptr); +HYPRE_Int hypre_umpire_pinned_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_pinned_pooled_free(void *ptr); -/*-------------------------------------------------------------------------- - * Prototypes - *--------------------------------------------------------------------------*/ +/* memory_dmalloc.c */ +HYPRE_Int hypre_InitMemoryDebugDML( HYPRE_Int id ); +HYPRE_Int hypre_FinalizeMemoryDebugDML( void ); +char *hypre_MAllocDML( HYPRE_Int size, char *file, HYPRE_Int line ); +char *hypre_CAllocDML( HYPRE_Int count, HYPRE_Int elt_size, char *file, HYPRE_Int line ); +char *hypre_ReAllocDML( char *ptr, HYPRE_Int size, char *file, HYPRE_Int line ); +void hypre_FreeDML( char *ptr, char *file, HYPRE_Int line ); -/* mpistubs.c */ -HYPRE_Int hypre_MPI_Init( hypre_int *argc, char ***argv ); -HYPRE_Int hypre_MPI_Finalize( void ); -HYPRE_Int hypre_MPI_Abort( MPI_Comm comm, HYPRE_Int errorcode ); -HYPRE_Real hypre_MPI_Wtime( void ); -HYPRE_Real hypre_MPI_Wtick( void ); -HYPRE_Int hypre_MPI_Barrier( MPI_Comm comm ); -HYPRE_Int hypre_MPI_Comm_create( MPI_Comm comm, hypre_MPI_Group group, - MPI_Comm *newcomm ); -HYPRE_Int hypre_MPI_Comm_dup( MPI_Comm comm, MPI_Comm *newcomm ); -MPI_Comm hypre_MPI_Comm_f2c( hypre_int comm ); -HYPRE_Int hypre_MPI_Comm_size( MPI_Comm comm, HYPRE_Int *size ); -HYPRE_Int hypre_MPI_Comm_rank( MPI_Comm comm, HYPRE_Int *rank ); -HYPRE_Int hypre_MPI_Comm_free( MPI_Comm *comm ); -HYPRE_Int hypre_MPI_Comm_group( MPI_Comm comm, hypre_MPI_Group *group ); -HYPRE_Int hypre_MPI_Comm_split( MPI_Comm comm, HYPRE_Int n, HYPRE_Int m, - MPI_Comm * comms ); -HYPRE_Int hypre_MPI_Group_incl( hypre_MPI_Group group, HYPRE_Int n, HYPRE_Int *ranks, - hypre_MPI_Group *newgroup ); -HYPRE_Int hypre_MPI_Group_free( hypre_MPI_Group *group ); -HYPRE_Int hypre_MPI_Address( void *location, hypre_MPI_Aint *address ); -HYPRE_Int hypre_MPI_Get_count( hypre_MPI_Status *status, hypre_MPI_Datatype datatype, - HYPRE_Int *count ); -HYPRE_Int hypre_MPI_Alltoall( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, - void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Allgather( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, - void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Allgatherv( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, - void *recvbuf, HYPRE_Int *recvcounts, HYPRE_Int *displs, hypre_MPI_Datatype recvtype, - hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Gather( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, - void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, HYPRE_Int root, - hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Gatherv( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, - void *recvbuf, HYPRE_Int *recvcounts, HYPRE_Int *displs, hypre_MPI_Datatype recvtype, - HYPRE_Int root, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Scatter( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, - void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, HYPRE_Int root, - hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Scatterv( void *sendbuf, HYPRE_Int *sendcounts, HYPRE_Int *displs, - hypre_MPI_Datatype sendtype, void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, - HYPRE_Int root, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Bcast( void *buffer, HYPRE_Int count, hypre_MPI_Datatype datatype, - HYPRE_Int root, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Send( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int dest, - HYPRE_Int tag, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Recv( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int source, - HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Status *status ); -HYPRE_Int hypre_MPI_Isend( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int dest, - HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); -HYPRE_Int hypre_MPI_Irecv( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, - HYPRE_Int source, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); -HYPRE_Int hypre_MPI_Send_init( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, - HYPRE_Int dest, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); -HYPRE_Int hypre_MPI_Recv_init( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, - HYPRE_Int dest, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); -HYPRE_Int hypre_MPI_Irsend( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int dest, - HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); -HYPRE_Int hypre_MPI_Startall( HYPRE_Int count, hypre_MPI_Request *array_of_requests ); -HYPRE_Int hypre_MPI_Probe( HYPRE_Int source, HYPRE_Int tag, hypre_MPI_Comm comm, - hypre_MPI_Status *status ); -HYPRE_Int hypre_MPI_Iprobe( HYPRE_Int source, HYPRE_Int tag, hypre_MPI_Comm comm, HYPRE_Int *flag, - hypre_MPI_Status *status ); -HYPRE_Int hypre_MPI_Test( hypre_MPI_Request *request, HYPRE_Int *flag, hypre_MPI_Status *status ); -HYPRE_Int hypre_MPI_Testall( HYPRE_Int count, hypre_MPI_Request *array_of_requests, HYPRE_Int *flag, - hypre_MPI_Status *array_of_statuses ); -HYPRE_Int hypre_MPI_Wait( hypre_MPI_Request *request, hypre_MPI_Status *status ); -HYPRE_Int hypre_MPI_Waitall( HYPRE_Int count, hypre_MPI_Request *array_of_requests, - hypre_MPI_Status *array_of_statuses ); -HYPRE_Int hypre_MPI_Waitany( HYPRE_Int count, hypre_MPI_Request *array_of_requests, - HYPRE_Int *index, hypre_MPI_Status *status ); -HYPRE_Int hypre_MPI_Allreduce( void *sendbuf, void *recvbuf, HYPRE_Int count, - hypre_MPI_Datatype datatype, hypre_MPI_Op op, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Reduce( void *sendbuf, void *recvbuf, HYPRE_Int count, - hypre_MPI_Datatype datatype, hypre_MPI_Op op, HYPRE_Int root, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Scan( void *sendbuf, void *recvbuf, HYPRE_Int count, - hypre_MPI_Datatype datatype, hypre_MPI_Op op, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Request_free( hypre_MPI_Request *request ); -HYPRE_Int hypre_MPI_Type_contiguous( HYPRE_Int count, hypre_MPI_Datatype oldtype, - hypre_MPI_Datatype *newtype ); -HYPRE_Int hypre_MPI_Type_vector( HYPRE_Int count, HYPRE_Int blocklength, HYPRE_Int stride, - hypre_MPI_Datatype oldtype, hypre_MPI_Datatype *newtype ); -HYPRE_Int hypre_MPI_Type_hvector( HYPRE_Int count, HYPRE_Int blocklength, hypre_MPI_Aint stride, - hypre_MPI_Datatype oldtype, hypre_MPI_Datatype *newtype ); -HYPRE_Int hypre_MPI_Type_struct( HYPRE_Int count, HYPRE_Int *array_of_blocklengths, - hypre_MPI_Aint *array_of_displacements, hypre_MPI_Datatype *array_of_types, - hypre_MPI_Datatype *newtype ); -HYPRE_Int hypre_MPI_Type_commit( hypre_MPI_Datatype *datatype ); -HYPRE_Int hypre_MPI_Type_free( hypre_MPI_Datatype *datatype ); -HYPRE_Int hypre_MPI_Op_free( hypre_MPI_Op *op ); -HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, - hypre_MPI_Op *op ); -hypre_MPI_Comm hypre_MPI_CommFromMPI_Comm(MPI_Comm comm); -#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) -HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, - hypre_MPI_Info info, MPI_Comm *newcomm); -HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); -HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); -#endif +/* GPU malloc prototype */ +typedef void (*GPUMallocFunc)(void **, size_t); +typedef void (*GPUMfreeFunc)(void *); #ifdef __cplusplus } #endif -#endif -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - -#ifndef HYPRE_SMP_HEADER -#define HYPRE_SMP_HEADER -#endif - -#define HYPRE_SMP_SCHEDULE schedule(static) - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - -/****************************************************************************** - * - * Header file for memory management utilities - * - * The abstract memory model has a Host (think CPU) and a Device (think GPU) and - * three basic types of memory management utilities: - * - * 1. Malloc(..., location) - * location=LOCATION_DEVICE - malloc memory on the device - * location=LOCATION_HOST - malloc memory on the host - * 2. MemCopy(..., method) - * method=HOST_TO_DEVICE - copy from host to device - * method=DEVICE_TO_HOST - copy from device to host - * method=DEVICE_TO_DEVICE - copy from device to device - * 3. SetExecutionMode - * location=LOCATION_DEVICE - execute on the device - * location=LOCATION_HOST - execute on the host - * - * Although the abstract model does not explicitly reflect a managed memory - * model (i.e., unified memory), it can support it. Here is a summary of how - * the abstract model would be mapped to specific hardware scenarios: - * - * Not using a device, not using managed memory - * Malloc(..., location) - * location=LOCATION_DEVICE - host malloc e.g., malloc - * location=LOCATION_HOST - host malloc e.g., malloc - * MemoryCopy(..., locTo,locFrom) - * locTo=LOCATION_HOST, locFrom=LOCATION_DEVICE - copy from host to host e.g., memcpy - * locTo=LOCATION_DEVICE, locFrom=LOCATION_HOST - copy from host to host e.g., memcpy - * locTo=LOCATION_DEVICE, locFrom=LOCATION_DEVICE - copy from host to host e.g., memcpy - * SetExecutionMode - * location=LOCATION_DEVICE - execute on the host - * location=LOCATION_HOST - execute on the host - * - * Using a device, not using managed memory - * Malloc(..., location) - * location=LOCATION_DEVICE - device malloc e.g., cudaMalloc - * location=LOCATION_HOST - host malloc e.g., malloc - * MemoryCopy(..., locTo,locFrom) - * locTo=LOCATION_HOST, locFrom=LOCATION_DEVICE - copy from device to host e.g., cudaMemcpy - * locTo=LOCATION_DEVICE, locFrom=LOCATION_HOST - copy from host to device e.g., cudaMemcpy - * locTo=LOCATION_DEVICE, locFrom=LOCATION_DEVICE - copy from device to device e.g., cudaMemcpy - * SetExecutionMode - * location=LOCATION_DEVICE - execute on the device - * location=LOCATION_HOST - execute on the host - * - * Using a device, using managed memory - * Malloc(..., location) - * location=LOCATION_DEVICE - managed malloc e.g., cudaMallocManaged - * location=LOCATION_HOST - host malloc e.g., malloc - * MemoryCopy(..., locTo,locFrom) - * locTo=LOCATION_HOST, locFrom=LOCATION_DEVICE - copy from device to host e.g., cudaMallocManaged - * locTo=LOCATION_DEVICE, locFrom=LOCATION_HOST - copy from host to device e.g., cudaMallocManaged - * locTo=LOCATION_DEVICE, locFrom=LOCATION_DEVICE - copy from device to device e.g., cudaMallocManaged - * SetExecutionMode - * location=LOCATION_DEVICE - execute on the device - * location=LOCATION_HOST - execute on the host - * - *****************************************************************************/ - -#ifndef hypre_MEMORY_HEADER -#define hypre_MEMORY_HEADER - -#include -#include - -#if defined(HYPRE_USING_UNIFIED_MEMORY) && defined(HYPRE_USING_DEVICE_OPENMP) -//#pragma omp requires unified_shared_memory -#endif - -#if defined(HYPRE_USING_UMPIRE) -#include "umpire/config.hpp" -#if UMPIRE_VERSION_MAJOR >= 2022 -#include "umpire/interface/c_fortran/umpire.h" -#define hypre_umpire_resourcemanager_make_allocator_pool umpire_resourcemanager_make_allocator_quick_pool -#else -#include "umpire/interface/umpire.h" -#define hypre_umpire_resourcemanager_make_allocator_pool umpire_resourcemanager_make_allocator_pool -#endif -#define HYPRE_UMPIRE_POOL_NAME_MAX_LEN 1024 -#endif - -/* stringification: - * _Pragma(string-literal), so we need to cast argument to a string - * The three dots as last argument of the macro tells compiler that this is a variadic macro. - * I.e. this is a macro that receives variable number of arguments. - */ -#define HYPRE_STR(...) #__VA_ARGS__ -#define HYPRE_XSTR(...) HYPRE_STR(__VA_ARGS__) - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum _hypre_MemoryLocation -{ - hypre_MEMORY_UNDEFINED = -1, - hypre_MEMORY_HOST, - hypre_MEMORY_HOST_PINNED, - hypre_MEMORY_DEVICE, - hypre_MEMORY_UNIFIED, - hypre_NUM_MEMORY_LOCATION -} hypre_MemoryLocation; - -/*------------------------------------------------------- - * hypre_GetActualMemLocation - * return actual location based on the selected memory model - *-------------------------------------------------------*/ -static inline hypre_MemoryLocation -hypre_GetActualMemLocation(HYPRE_MemoryLocation location) -{ - if (location == HYPRE_MEMORY_HOST) - { - return hypre_MEMORY_HOST; - } - - if (location == HYPRE_MEMORY_DEVICE) - { -#if defined(HYPRE_USING_HOST_MEMORY) - return hypre_MEMORY_HOST; -#elif defined(HYPRE_USING_DEVICE_MEMORY) - return hypre_MEMORY_DEVICE; -#elif defined(HYPRE_USING_UNIFIED_MEMORY) - return hypre_MEMORY_UNIFIED; -#else -#error Wrong HYPRE memory setting. -#endif - } - - return hypre_MEMORY_UNDEFINED; -} - - -#if !defined(HYPRE_USING_MEMORY_TRACKER) - -#define hypre_TAlloc(type, count, location) \ -( (type *) hypre_MAlloc((size_t)(sizeof(type) * (count)), location) ) - -#define _hypre_TAlloc(type, count, location) \ -( (type *) _hypre_MAlloc((size_t)(sizeof(type) * (count)), location) ) - -#define hypre_CTAlloc(type, count, location) \ -( (type *) hypre_CAlloc((size_t)(count), (size_t)sizeof(type), location) ) - -#define hypre_TReAlloc(ptr, type, count, location) \ -( (type *) hypre_ReAlloc((char *)ptr, (size_t)(sizeof(type) * (count)), location) ) - -#define hypre_TReAlloc_v2(ptr, old_type, old_count, new_type, new_count, location) \ -( (new_type *) hypre_ReAlloc_v2((char *)ptr, (size_t)(sizeof(old_type)*(old_count)), (size_t)(sizeof(new_type)*(new_count)), location) ) - -#define hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ -(hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc)) - -#define hypre_TFree(ptr, location) \ -( hypre_Free((void *)ptr, location), ptr = NULL ) - -#define _hypre_TFree(ptr, location) \ -( _hypre_Free((void *)ptr, location), ptr = NULL ) - -#endif /* #if !defined(HYPRE_USING_MEMORY_TRACKER) */ - - -/*-------------------------------------------------------------------------- - * Prototypes - *--------------------------------------------------------------------------*/ - -/* memory.c */ -HYPRE_Int hypre_GetMemoryLocationName(hypre_MemoryLocation memory_location, - char *memory_location_name); -void hypre_CheckMemoryLocation(void *ptr, hypre_MemoryLocation location); -void * hypre_Memset(void *ptr, HYPRE_Int value, size_t num, HYPRE_MemoryLocation location); -void hypre_MemPrefetch(void *ptr, size_t size, HYPRE_MemoryLocation location); -void * hypre_MAlloc(size_t size, HYPRE_MemoryLocation location); -void * hypre_CAlloc( size_t count, size_t elt_size, HYPRE_MemoryLocation location); -void hypre_Free(void *ptr, HYPRE_MemoryLocation location); -void hypre_Memcpy(void *dst, void *src, size_t size, HYPRE_MemoryLocation loc_dst, - HYPRE_MemoryLocation loc_src); -void * hypre_ReAlloc(void *ptr, size_t size, HYPRE_MemoryLocation location); -void * hypre_ReAlloc_v2(void *ptr, size_t old_size, size_t new_size, HYPRE_MemoryLocation location); - -void * _hypre_MAlloc(size_t size, hypre_MemoryLocation location); -void _hypre_Free(void *ptr, hypre_MemoryLocation location); - -HYPRE_ExecutionPolicy hypre_GetExecPolicy1(HYPRE_MemoryLocation location); -HYPRE_ExecutionPolicy hypre_GetExecPolicy2(HYPRE_MemoryLocation location1, - HYPRE_MemoryLocation location2); - -HYPRE_Int hypre_GetPointerLocation(const void *ptr, hypre_MemoryLocation *memory_location); -HYPRE_Int hypre_SetCubMemPoolSize( hypre_uint bin_growth, hypre_uint min_bin, hypre_uint max_bin, - size_t max_cached_bytes ); -HYPRE_Int hypre_umpire_host_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_host_pooled_free(void *ptr); -void *hypre_umpire_host_pooled_realloc(void *ptr, size_t size); -HYPRE_Int hypre_umpire_device_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_device_pooled_free(void *ptr); -HYPRE_Int hypre_umpire_um_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_um_pooled_free(void *ptr); -HYPRE_Int hypre_umpire_pinned_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_pinned_pooled_free(void *ptr); - -/* memory_dmalloc.c */ -HYPRE_Int hypre_InitMemoryDebugDML( HYPRE_Int id ); -HYPRE_Int hypre_FinalizeMemoryDebugDML( void ); -char *hypre_MAllocDML( HYPRE_Int size, char *file, HYPRE_Int line ); -char *hypre_CAllocDML( HYPRE_Int count, HYPRE_Int elt_size, char *file, HYPRE_Int line ); -char *hypre_ReAllocDML( char *ptr, HYPRE_Int size, char *file, HYPRE_Int line ); -void hypre_FreeDML( char *ptr, char *file, HYPRE_Int line ); - -/* GPU malloc prototype */ -typedef void (*GPUMallocFunc)(void **, size_t); -typedef void (*GPUMfreeFunc)(void *); - -#ifdef __cplusplus -} -#endif - -#endif /* hypre_MEMORY_HEADER */ +#endif /* hypre_MEMORY_HEADER */ /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -1308,6 +947,374 @@ extern hypre_MemoryTracker *_hypre_memory_tracker; #endif /* #if defined(HYPRE_USING_MEMORY_TRACKER) */ #endif /* #ifndef hypre_MEMORY_TRACKER_HEADER */ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/****************************************************************************** + * + * Fake mpi stubs to generate serial codes without mpi + * + *****************************************************************************/ + +#ifndef hypre_MPISTUBS +#define hypre_MPISTUBS + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef HYPRE_SEQUENTIAL + +/****************************************************************************** + * MPI stubs to generate serial codes without mpi + *****************************************************************************/ + +/*-------------------------------------------------------------------------- + * Change all MPI names to hypre_MPI names to avoid link conflicts. + * + * NOTE: MPI_Comm is the only MPI symbol in the HYPRE user interface, + * and is defined in `HYPRE_utilities.h'. + *--------------------------------------------------------------------------*/ + +#define MPI_Comm hypre_MPI_Comm +#define MPI_Group hypre_MPI_Group +#define MPI_Request hypre_MPI_Request +#define MPI_Datatype hypre_MPI_Datatype +#define MPI_Status hypre_MPI_Status +#define MPI_Op hypre_MPI_Op +#define MPI_Aint hypre_MPI_Aint +#define MPI_Info hypre_MPI_Info + +#define MPI_COMM_WORLD hypre_MPI_COMM_WORLD +#define MPI_COMM_NULL hypre_MPI_COMM_NULL +#define MPI_COMM_SELF hypre_MPI_COMM_SELF +#define MPI_COMM_TYPE_SHARED hypre_MPI_COMM_TYPE_SHARED + +#define MPI_BOTTOM hypre_MPI_BOTTOM + +#define MPI_FLOAT hypre_MPI_FLOAT +#define MPI_DOUBLE hypre_MPI_DOUBLE +#define MPI_LONG_DOUBLE hypre_MPI_LONG_DOUBLE +#define MPI_INT hypre_MPI_INT +#define MPI_LONG_LONG_INT hypre_MPI_LONG_LONG_INT +#define MPI_CHAR hypre_MPI_CHAR +#define MPI_LONG hypre_MPI_LONG +#define MPI_BYTE hypre_MPI_BYTE + +#define MPI_C_FLOAT_COMPLEX hypre_MPI_COMPLEX +#define MPI_C_LONG_DOUBLE_COMPLEX hypre_MPI_COMPLEX +#define MPI_C_DOUBLE_COMPLEX hypre_MPI_COMPLEX + +#define MPI_SUM hypre_MPI_SUM +#define MPI_MIN hypre_MPI_MIN +#define MPI_MAX hypre_MPI_MAX +#define MPI_LOR hypre_MPI_LOR +#define MPI_LAND hypre_MPI_LAND +#define MPI_SUCCESS hypre_MPI_SUCCESS +#define MPI_STATUSES_IGNORE hypre_MPI_STATUSES_IGNORE + +#define MPI_UNDEFINED hypre_MPI_UNDEFINED +#define MPI_REQUEST_NULL hypre_MPI_REQUEST_NULL +#define MPI_INFO_NULL hypre_MPI_INFO_NULL +#define MPI_ANY_SOURCE hypre_MPI_ANY_SOURCE +#define MPI_ANY_TAG hypre_MPI_ANY_TAG +#define MPI_SOURCE hypre_MPI_SOURCE +#define MPI_TAG hypre_MPI_TAG + +#define MPI_Init hypre_MPI_Init +#define MPI_Finalize hypre_MPI_Finalize +#define MPI_Abort hypre_MPI_Abort +#define MPI_Wtime hypre_MPI_Wtime +#define MPI_Wtick hypre_MPI_Wtick +#define MPI_Barrier hypre_MPI_Barrier +#define MPI_Comm_create hypre_MPI_Comm_create +#define MPI_Comm_dup hypre_MPI_Comm_dup +#define MPI_Comm_f2c hypre_MPI_Comm_f2c +#define MPI_Comm_group hypre_MPI_Comm_group +#define MPI_Comm_size hypre_MPI_Comm_size +#define MPI_Comm_rank hypre_MPI_Comm_rank +#define MPI_Comm_free hypre_MPI_Comm_free +#define MPI_Comm_split hypre_MPI_Comm_split +#define MPI_Comm_split_type hypre_MPI_Comm_split_type +#define MPI_Group_incl hypre_MPI_Group_incl +#define MPI_Group_free hypre_MPI_Group_free +#define MPI_Address hypre_MPI_Address +#define MPI_Get_count hypre_MPI_Get_count +#define MPI_Alltoall hypre_MPI_Alltoall +#define MPI_Allgather hypre_MPI_Allgather +#define MPI_Allgatherv hypre_MPI_Allgatherv +#define MPI_Gather hypre_MPI_Gather +#define MPI_Gatherv hypre_MPI_Gatherv +#define MPI_Scatter hypre_MPI_Scatter +#define MPI_Scatterv hypre_MPI_Scatterv +#define MPI_Bcast hypre_MPI_Bcast +#define MPI_Send hypre_MPI_Send +#define MPI_Recv hypre_MPI_Recv +#define MPI_Isend hypre_MPI_Isend +#define MPI_Irecv hypre_MPI_Irecv +#define MPI_Send_init hypre_MPI_Send_init +#define MPI_Recv_init hypre_MPI_Recv_init +#define MPI_Irsend hypre_MPI_Irsend +#define MPI_Startall hypre_MPI_Startall +#define MPI_Probe hypre_MPI_Probe +#define MPI_Iprobe hypre_MPI_Iprobe +#define MPI_Test hypre_MPI_Test +#define MPI_Testall hypre_MPI_Testall +#define MPI_Wait hypre_MPI_Wait +#define MPI_Waitall hypre_MPI_Waitall +#define MPI_Waitany hypre_MPI_Waitany +#define MPI_Allreduce hypre_MPI_Allreduce +#define MPI_Reduce hypre_MPI_Reduce +#define MPI_Scan hypre_MPI_Scan +#define MPI_Request_free hypre_MPI_Request_free +#define MPI_Type_contiguous hypre_MPI_Type_contiguous +#define MPI_Type_vector hypre_MPI_Type_vector +#define MPI_Type_hvector hypre_MPI_Type_hvector +#define MPI_Type_struct hypre_MPI_Type_struct +#define MPI_Type_commit hypre_MPI_Type_commit +#define MPI_Type_free hypre_MPI_Type_free +#define MPI_Op_free hypre_MPI_Op_free +#define MPI_Op_create hypre_MPI_Op_create +#define MPI_User_function hypre_MPI_User_function +#define MPI_Info_create hypre_MPI_Info_create + +/*-------------------------------------------------------------------------- + * Types, etc. + *--------------------------------------------------------------------------*/ + +/* These types have associated creation and destruction routines */ +typedef HYPRE_Int hypre_MPI_Comm; +typedef HYPRE_Int hypre_MPI_Group; +typedef HYPRE_Int hypre_MPI_Request; +typedef HYPRE_Int hypre_MPI_Datatype; +typedef void (hypre_MPI_User_function) (void); + +typedef struct +{ + HYPRE_Int hypre_MPI_SOURCE; + HYPRE_Int hypre_MPI_TAG; +} hypre_MPI_Status; + +typedef HYPRE_Int hypre_MPI_Op; +typedef HYPRE_Int hypre_MPI_Aint; +typedef HYPRE_Int hypre_MPI_Info; + +#define hypre_MPI_COMM_SELF 1 +#define hypre_MPI_COMM_WORLD 0 +#define hypre_MPI_COMM_NULL -1 + +#define hypre_MPI_COMM_TYPE_SHARED 0 + +#define hypre_MPI_BOTTOM 0x0 + +#define hypre_MPI_FLOAT 0 +#define hypre_MPI_DOUBLE 1 +#define hypre_MPI_LONG_DOUBLE 2 +#define hypre_MPI_INT 3 +#define hypre_MPI_CHAR 4 +#define hypre_MPI_LONG 5 +#define hypre_MPI_BYTE 6 +#define hypre_MPI_REAL 7 +#define hypre_MPI_COMPLEX 8 +#define hypre_MPI_LONG_LONG_INT 9 + +#define hypre_MPI_SUM 0 +#define hypre_MPI_MIN 1 +#define hypre_MPI_MAX 2 +#define hypre_MPI_LOR 3 +#define hypre_MPI_LAND 4 +#define hypre_MPI_SUCCESS 0 +#define hypre_MPI_STATUSES_IGNORE 0 + +#define hypre_MPI_UNDEFINED -9999 +#define hypre_MPI_REQUEST_NULL 0 +#define hypre_MPI_INFO_NULL 0 +#define hypre_MPI_ANY_SOURCE 1 +#define hypre_MPI_ANY_TAG 1 + +#else + +/****************************************************************************** + * MPI stubs to do casting of HYPRE_Int and hypre_int correctly + *****************************************************************************/ + +typedef struct +{ + MPI_Comm mpi_comm; + hypre_MemoryLocation send_location; + hypre_MemoryLocation recv_location; +} hypre_MPI_Comm; + +#define hypre_MPI_CommMPI_Comm(comm) ((comm).mpi_comm) +#define hypre_MPI_CommMPI_SendLocation(comm) ((comm).send_location) +#define hypre_MPI_CommMPI_RecvLocation(comm) ((comm).recv_location) + +typedef MPI_Group hypre_MPI_Group; +typedef MPI_Request hypre_MPI_Request; +typedef MPI_Datatype hypre_MPI_Datatype; +typedef MPI_Status hypre_MPI_Status; +typedef MPI_Op hypre_MPI_Op; +typedef MPI_Aint hypre_MPI_Aint; +typedef MPI_Info hypre_MPI_Info; +typedef MPI_User_function hypre_MPI_User_function; + +#define hypre_MPI_COMM_WORLD MPI_COMM_WORLD +#define hypre_MPI_COMM_NULL MPI_COMM_NULL +#define hypre_MPI_BOTTOM MPI_BOTTOM +#define hypre_MPI_COMM_SELF MPI_COMM_SELF +#define hypre_MPI_COMM_TYPE_SHARED MPI_COMM_TYPE_SHARED + +#define hypre_MPI_FLOAT MPI_FLOAT +#define hypre_MPI_DOUBLE MPI_DOUBLE +#define hypre_MPI_LONG_DOUBLE MPI_LONG_DOUBLE +/* HYPRE_MPI_INT is defined in HYPRE_utilities.h */ +#define hypre_MPI_INT HYPRE_MPI_INT +#define hypre_MPI_CHAR MPI_CHAR +#define hypre_MPI_LONG MPI_LONG +#define hypre_MPI_BYTE MPI_BYTE +/* HYPRE_MPI_REAL is defined in HYPRE_utilities.h */ +#define hypre_MPI_REAL HYPRE_MPI_REAL +/* HYPRE_MPI_COMPLEX is defined in HYPRE_utilities.h */ +#define hypre_MPI_COMPLEX HYPRE_MPI_COMPLEX + +#define hypre_MPI_SUM MPI_SUM +#define hypre_MPI_MIN MPI_MIN +#define hypre_MPI_MAX MPI_MAX +#define hypre_MPI_LOR MPI_LOR +#define hypre_MPI_SUCCESS MPI_SUCCESS +#define hypre_MPI_STATUSES_IGNORE MPI_STATUSES_IGNORE + +#define hypre_MPI_UNDEFINED MPI_UNDEFINED +#define hypre_MPI_REQUEST_NULL MPI_REQUEST_NULL +#define hypre_MPI_INFO_NULL MPI_INFO_NULL +#define hypre_MPI_ANY_SOURCE MPI_ANY_SOURCE +#define hypre_MPI_ANY_TAG MPI_ANY_TAG +#define hypre_MPI_SOURCE MPI_SOURCE +#define hypre_MPI_TAG MPI_TAG +#define hypre_MPI_LAND MPI_LAND + +#endif + +/****************************************************************************** + * Everything below this applies to both ifdef cases above + *****************************************************************************/ + +/*-------------------------------------------------------------------------- + * Prototypes + *--------------------------------------------------------------------------*/ + +/* mpistubs.c */ +HYPRE_Int hypre_MPI_Init( hypre_int *argc, char ***argv ); +HYPRE_Int hypre_MPI_Finalize( void ); +HYPRE_Int hypre_MPI_Abort( MPI_Comm comm, HYPRE_Int errorcode ); +HYPRE_Real hypre_MPI_Wtime( void ); +HYPRE_Real hypre_MPI_Wtick( void ); +HYPRE_Int hypre_MPI_Barrier( MPI_Comm comm ); +HYPRE_Int hypre_MPI_Comm_create( MPI_Comm comm, hypre_MPI_Group group, + MPI_Comm *newcomm ); +HYPRE_Int hypre_MPI_Comm_dup( MPI_Comm comm, MPI_Comm *newcomm ); +MPI_Comm hypre_MPI_Comm_f2c( hypre_int comm ); +HYPRE_Int hypre_MPI_Comm_size( MPI_Comm comm, HYPRE_Int *size ); +HYPRE_Int hypre_MPI_Comm_rank( MPI_Comm comm, HYPRE_Int *rank ); +HYPRE_Int hypre_MPI_Comm_free( MPI_Comm *comm ); +HYPRE_Int hypre_MPI_Comm_group( MPI_Comm comm, hypre_MPI_Group *group ); +HYPRE_Int hypre_MPI_Comm_split( MPI_Comm comm, HYPRE_Int n, HYPRE_Int m, + MPI_Comm * comms ); +HYPRE_Int hypre_MPI_Group_incl( hypre_MPI_Group group, HYPRE_Int n, HYPRE_Int *ranks, + hypre_MPI_Group *newgroup ); +HYPRE_Int hypre_MPI_Group_free( hypre_MPI_Group *group ); +HYPRE_Int hypre_MPI_Address( void *location, hypre_MPI_Aint *address ); +HYPRE_Int hypre_MPI_Get_count( hypre_MPI_Status *status, hypre_MPI_Datatype datatype, + HYPRE_Int *count ); +HYPRE_Int hypre_MPI_Alltoall( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, + void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Allgather( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, + void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Allgatherv( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, + void *recvbuf, HYPRE_Int *recvcounts, HYPRE_Int *displs, hypre_MPI_Datatype recvtype, + hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Gather( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, + void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, HYPRE_Int root, + hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Gatherv( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, + void *recvbuf, HYPRE_Int *recvcounts, HYPRE_Int *displs, hypre_MPI_Datatype recvtype, + HYPRE_Int root, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Scatter( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, + void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, HYPRE_Int root, + hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Scatterv( void *sendbuf, HYPRE_Int *sendcounts, HYPRE_Int *displs, + hypre_MPI_Datatype sendtype, void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, + HYPRE_Int root, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Bcast( void *buffer, HYPRE_Int count, hypre_MPI_Datatype datatype, + HYPRE_Int root, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Send( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int dest, + HYPRE_Int tag, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Recv( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int source, + HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Status *status ); +HYPRE_Int hypre_MPI_Isend( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int dest, + HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); +HYPRE_Int hypre_MPI_Irecv( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, + HYPRE_Int source, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); +HYPRE_Int hypre_MPI_Send_init( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, + HYPRE_Int dest, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); +HYPRE_Int hypre_MPI_Recv_init( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, + HYPRE_Int dest, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); +HYPRE_Int hypre_MPI_Irsend( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int dest, + HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); +HYPRE_Int hypre_MPI_Startall( HYPRE_Int count, hypre_MPI_Request *array_of_requests ); +HYPRE_Int hypre_MPI_Probe( HYPRE_Int source, HYPRE_Int tag, hypre_MPI_Comm comm, + hypre_MPI_Status *status ); +HYPRE_Int hypre_MPI_Iprobe( HYPRE_Int source, HYPRE_Int tag, hypre_MPI_Comm comm, HYPRE_Int *flag, + hypre_MPI_Status *status ); +HYPRE_Int hypre_MPI_Test( hypre_MPI_Request *request, HYPRE_Int *flag, hypre_MPI_Status *status ); +HYPRE_Int hypre_MPI_Testall( HYPRE_Int count, hypre_MPI_Request *array_of_requests, HYPRE_Int *flag, + hypre_MPI_Status *array_of_statuses ); +HYPRE_Int hypre_MPI_Wait( hypre_MPI_Request *request, hypre_MPI_Status *status ); +HYPRE_Int hypre_MPI_Waitall( HYPRE_Int count, hypre_MPI_Request *array_of_requests, + hypre_MPI_Status *array_of_statuses ); +HYPRE_Int hypre_MPI_Waitany( HYPRE_Int count, hypre_MPI_Request *array_of_requests, + HYPRE_Int *index, hypre_MPI_Status *status ); +HYPRE_Int hypre_MPI_Allreduce( void *sendbuf, void *recvbuf, HYPRE_Int count, + hypre_MPI_Datatype datatype, hypre_MPI_Op op, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Reduce( void *sendbuf, void *recvbuf, HYPRE_Int count, + hypre_MPI_Datatype datatype, hypre_MPI_Op op, HYPRE_Int root, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Scan( void *sendbuf, void *recvbuf, HYPRE_Int count, + hypre_MPI_Datatype datatype, hypre_MPI_Op op, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Request_free( hypre_MPI_Request *request ); +HYPRE_Int hypre_MPI_Type_contiguous( HYPRE_Int count, hypre_MPI_Datatype oldtype, + hypre_MPI_Datatype *newtype ); +HYPRE_Int hypre_MPI_Type_vector( HYPRE_Int count, HYPRE_Int blocklength, HYPRE_Int stride, + hypre_MPI_Datatype oldtype, hypre_MPI_Datatype *newtype ); +HYPRE_Int hypre_MPI_Type_hvector( HYPRE_Int count, HYPRE_Int blocklength, hypre_MPI_Aint stride, + hypre_MPI_Datatype oldtype, hypre_MPI_Datatype *newtype ); +HYPRE_Int hypre_MPI_Type_struct( HYPRE_Int count, HYPRE_Int *array_of_blocklengths, + hypre_MPI_Aint *array_of_displacements, hypre_MPI_Datatype *array_of_types, + hypre_MPI_Datatype *newtype ); +HYPRE_Int hypre_MPI_Type_commit( hypre_MPI_Datatype *datatype ); +HYPRE_Int hypre_MPI_Type_free( hypre_MPI_Datatype *datatype ); +HYPRE_Int hypre_MPI_Op_free( hypre_MPI_Op *op ); +HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, + hypre_MPI_Op *op ); +hypre_MPI_Comm hypre_MPI_CommFromMPI_Comm(MPI_Comm comm); +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) +HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, + hypre_MPI_Info info, MPI_Comm *newcomm); +HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); +HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); +#endif +HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +#ifdef __cplusplus +} +#endif + +#endif /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 4e77b268c1..3fc72cbf97 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -1149,6 +1149,56 @@ hypre_MPI_Irecv( void *buf, hypre_MPI_CommMPI_Comm(comm), request); } +#define TYPE_MACRO(MPI_CMD, HYPRE_DTYPE, HYPRE_MPI_DTYPE) \ +{ \ + if (datatype == HYPRE_MPI_DTYPE) \ + { \ + HYPRE_Int i; \ + HYPRE_DTYPE *data = (HYPRE_DTYPE *) buf; \ + for (i = 0; i < num; i++) \ + { \ + HYPRE_Int ip = procs[i]; \ + HYPRE_Int start = displs[i]; \ + HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; \ + MPI_CMD(data + start, len, HYPRE_MPI_COMPLEX, \ + ip, tag, hypre_MPI_CommMPI_Comm(comm), requests + i); \ + } \ + return hypre_error_flag; \ + } \ +} + +HYPRE_Int +hypre_MPI_Isend_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPI_Comm comm, + hypre_MPI_Request *requests ) +{ + TYPE_MACRO(MPI_Isend, HYPRE_Complex, HYPRE_MPI_COMPLEX); + TYPE_MACRO(MPI_Isend, HYPRE_Int, HYPRE_MPI_INT); + TYPE_MACRO(MPI_Isend, HYPRE_BigInt, HYPRE_MPI_BIG_INT); +} + +HYPRE_Int +hypre_MPI_Irecv_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPI_Comm comm, + hypre_MPI_Request *requests ) +{ + TYPE_MACRO(MPI_Irecv, HYPRE_Complex, HYPRE_MPI_COMPLEX); + TYPE_MACRO(MPI_Irecv, HYPRE_Int, HYPRE_MPI_INT); + TYPE_MACRO(MPI_Irecv, HYPRE_BigInt, HYPRE_MPI_BIG_INT); +} + HYPRE_Int hypre_MPI_Send_init( void *buf, HYPRE_Int count, diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 8072071713..d559389ccf 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -195,10 +195,14 @@ typedef HYPRE_Int hypre_MPI_Info; typedef struct { - MPI_Comm mpi_comm; + MPI_Comm mpi_comm; + hypre_MemoryLocation send_location; + hypre_MemoryLocation recv_location; } hypre_MPI_Comm; -#define hypre_MPI_CommMPI_Comm(comm) ((comm).mpi_comm) +#define hypre_MPI_CommMPI_Comm(comm) ((comm).mpi_comm) +#define hypre_MPI_CommMPI_SendLocation(comm) ((comm).send_location) +#define hypre_MPI_CommMPI_RecvLocation(comm) ((comm).recv_location) typedef MPI_Group hypre_MPI_Group; typedef MPI_Request hypre_MPI_Request; @@ -353,7 +357,10 @@ HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_I HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); #endif - +HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); #ifdef __cplusplus } #endif From e0b077c76dc61ce68a7f1c2676bd329a8da0c23e Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Wed, 22 Nov 2023 08:53:24 -0800 Subject: [PATCH 06/90] use new mpi interface --- src/parcsr_mv/par_csr_communication.c | 33 ++++++++++++--------------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 54c9bb0409..a358ddab01 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -380,6 +380,9 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, void *send_data; void *recv_data; + hypre_MPI_CommMPI_SendLocation(hcomm) = hypre_GetActualMemLocation(send_memory_location); + hypre_MPI_CommMPI_RecvLocation(hcomm) = hypre_GetActualMemLocation(recv_memory_location); + /*-------------------------------------------------------------------- * hypre_Initialize sets up a communication handle, * posts receives and initiates sends. It always requires num_sends, @@ -484,24 +487,18 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, { case 1: { - HYPRE_Complex *d_send_data = (HYPRE_Complex *) send_data; - HYPRE_Complex *d_recv_data = (HYPRE_Complex *) recv_data; - for (i = 0; i < num_recvs; i++) - { - ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Irecv(&d_recv_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, - ip, 0, hcomm, &requests[j++]); - } - for (i = 0; i < num_sends; i++) - { - ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Isend(&d_send_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, - ip, 0, hcomm, &requests[j++]); - } + hypre_MPI_Irecv_Multiple(recv_data, num_recvs, + hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), + NULL, HYPRE_MPI_COMPLEX, + hypre_ParCSRCommPkgRecvProcs(comm_pkg), + 0, hcomm, requests); + + hypre_MPI_Isend_Multiple(send_data, num_sends, + hypre_ParCSRCommPkgSendMapStarts(comm_pkg), + NULL, HYPRE_MPI_COMPLEX, + hypre_ParCSRCommPkgSendProcs(comm_pkg), + 0, hcomm, requests); + break; } case 2: From ac9e0d5c7b2717a4495b609848066dcfc63db583 Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Thu, 7 Dec 2023 17:36:13 -0800 Subject: [PATCH 07/90] bug fix --- src/utilities/mpistubs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 3fc72cbf97..1795f73864 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -1160,7 +1160,7 @@ hypre_MPI_Irecv( void *buf, HYPRE_Int ip = procs[i]; \ HYPRE_Int start = displs[i]; \ HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; \ - MPI_CMD(data + start, len, HYPRE_MPI_COMPLEX, \ + MPI_CMD(data + start, len, HYPRE_MPI_DTYPE, \ ip, tag, hypre_MPI_CommMPI_Comm(comm), requests + i); \ } \ return hypre_error_flag; \ @@ -1237,7 +1237,7 @@ hypre_MPI_Irsend( void *buf, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Irsend(buf, (hypre_int)count, datatype, - (hypre_int)dest, (hypre_int)tag, + (hypre_int)dest, (hypre_int)tag, hypre_MPI_CommMPI_Comm(comm), request); } @@ -1254,7 +1254,7 @@ hypre_MPI_Probe( HYPRE_Int source, hypre_MPI_Comm comm, hypre_MPI_Status *status ) { - return (HYPRE_Int) MPI_Probe((hypre_int)source, (hypre_int)tag, + return (HYPRE_Int) MPI_Probe((hypre_int)source, (hypre_int)tag, hypre_MPI_CommMPI_Comm(comm), status); } @@ -1267,7 +1267,7 @@ hypre_MPI_Iprobe( HYPRE_Int source, { hypre_int mpi_flag; HYPRE_Int ierr; - ierr = (HYPRE_Int) MPI_Iprobe((hypre_int)source, (hypre_int)tag, + ierr = (HYPRE_Int) MPI_Iprobe((hypre_int)source, (hypre_int)tag, hypre_MPI_CommMPI_Comm(comm), &mpi_flag, status); *flag = (HYPRE_Int) mpi_flag; From feb50055dd304ea75848584cb76bd599d6d46099 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Fri, 8 Dec 2023 09:59:10 -0800 Subject: [PATCH 08/90] restruct par_csr_communication.c --- src/parcsr_mv/par_csr_communication.c | 136 +++++--------------------- 1 file changed, 24 insertions(+), 112 deletions(-) diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index a358ddab01..295a1eb5cf 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -374,9 +374,8 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_ParCSRCommHandle *comm_handle; HYPRE_Int num_requests; hypre_MPI_Request *requests; - HYPRE_Int i, j; HYPRE_Int my_id, num_procs; - HYPRE_Int ip, vec_start, vec_len; + hypre_MPI_Datatype mpi_dtype; void *send_data; void *recv_data; @@ -482,136 +481,49 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - j = 0; + mpi_dtype = job <= 2 ? HYPRE_MPI_COMPLEX : job <= 12 ? HYPRE_MPI_INT : HYPRE_MPI_BIG_INT; + switch (job) { case 1: + case 11: + case 21: { hypre_MPI_Irecv_Multiple(recv_data, num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), - NULL, HYPRE_MPI_COMPLEX, + NULL, mpi_dtype, hypre_ParCSRCommPkgRecvProcs(comm_pkg), 0, hcomm, requests); hypre_MPI_Isend_Multiple(send_data, num_sends, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), - NULL, HYPRE_MPI_COMPLEX, + NULL, mpi_dtype, hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, hcomm, requests); + 0, hcomm, requests + num_recvs); break; } + case 2: + case 12: + case 22: { - HYPRE_Complex *d_send_data = (HYPRE_Complex *) send_data; - HYPRE_Complex *d_recv_data = (HYPRE_Complex *) recv_data; - for (i = 0; i < num_sends; i++) - { - ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Irecv(&d_recv_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, - ip, 0, hcomm, &requests[j++]); - } - for (i = 0; i < num_recvs; i++) - { - ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Isend(&d_send_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, - ip, 0, hcomm, &requests[j++]); - } - break; - } - case 11: - { - HYPRE_Int *i_send_data = (HYPRE_Int *) send_data; - HYPRE_Int *i_recv_data = (HYPRE_Int *) recv_data; - for (i = 0; i < num_recvs; i++) - { - ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_INT, - ip, 0, hcomm, &requests[j++]); - } - for (i = 0; i < num_sends; i++) - { - ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_INT, - ip, 0, hcomm, &requests[j++]); - } - break; - } - case 12: - { - HYPRE_Int *i_send_data = (HYPRE_Int *) send_data; - HYPRE_Int *i_recv_data = (HYPRE_Int *) recv_data; - for (i = 0; i < num_sends; i++) - { - ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_INT, - ip, 0, hcomm, &requests[j++]); - } - for (i = 0; i < num_recvs; i++) - { - ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_INT, - ip, 0, hcomm, &requests[j++]); - } - break; - } - case 21: - { - HYPRE_BigInt *i_send_data = (HYPRE_BigInt *) send_data; - HYPRE_BigInt *i_recv_data = (HYPRE_BigInt *) recv_data; - for (i = 0; i < num_recvs; i++) - { - ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, hcomm, &requests[j++]); - } - for (i = 0; i < num_sends; i++) - { - vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, hcomm, &requests[j++]); - } - break; - } - case 22: - { - HYPRE_BigInt *i_send_data = (HYPRE_BigInt *) send_data; - HYPRE_BigInt *i_recv_data = (HYPRE_BigInt *) recv_data; - for (i = 0; i < num_sends; i++) - { - vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, hcomm, &requests[j++]); - } - for (i = 0; i < num_recvs; i++) - { - ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, hcomm, &requests[j++]); - } + hypre_MPI_Irecv_Multiple(recv_data, num_sends, + hypre_ParCSRCommPkgSendMapStarts(comm_pkg), + NULL, mpi_dtype, + hypre_ParCSRCommPkgSendProcs(comm_pkg), + 0, hcomm, requests); + + hypre_MPI_Isend_Multiple(send_data, num_recvs, + hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), + NULL, mpi_dtype, + hypre_ParCSRCommPkgRecvProcs(comm_pkg), + 0, hcomm, requests + num_sends); + break; } } + /*-------------------------------------------------------------------- * set up comm_handle and return *--------------------------------------------------------------------*/ From 2bbc23df62bd4bb816240ec7d33368be87c89956 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Fri, 8 Dec 2023 15:35:10 -0800 Subject: [PATCH 09/90] make hypre_MPI_Request a struct --- src/utilities/_hypre_utilities.h | 15 ++++- src/utilities/mpistubs.c | 101 ++++++++++++++++++++++++++----- src/utilities/mpistubs.h | 15 ++++- 3 files changed, 113 insertions(+), 18 deletions(-) diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 26767ccd4c..cda12a45a0 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1154,7 +1154,19 @@ typedef struct #define hypre_MPI_CommMPI_RecvLocation(comm) ((comm).recv_location) typedef MPI_Group hypre_MPI_Group; -typedef MPI_Request hypre_MPI_Request; + +typedef HYPRE_Int (*hypre_mpi_request_action) (void *); +typedef struct +{ + MPI_Request mpi_request; + hypre_mpi_request_action post_wait_action; + void *post_wait_data; +} hypre_MPI_Request; + +#define hypre_MPI_RequestMPI_Request(request) ((request).mpi_request) +#define hypre_MPI_RequestPostWaitAction(request) ((request).post_wait_action) +#define hypre_MPI_RequestPostWaitData(request) ((request).post_wait_data) + typedef MPI_Datatype hypre_MPI_Datatype; typedef MPI_Status hypre_MPI_Status; typedef MPI_Op hypre_MPI_Op; @@ -1300,6 +1312,7 @@ HYPRE_Int hypre_MPI_Op_free( hypre_MPI_Op *op ); HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, hypre_MPI_Op *op ); hypre_MPI_Comm hypre_MPI_CommFromMPI_Comm(MPI_Comm comm); +hypre_MPI_Request hypre_MPI_RequestFromMPI_Request(MPI_Request request); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, MPI_Comm *newcomm); diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 1795f73864..e2db785137 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -769,6 +769,16 @@ hypre_MPI_CommFromMPI_Comm(MPI_Comm comm) return hcomm; } +hypre_MPI_Request +hypre_MPI_RequestFromMPI_Request(MPI_Request request) +{ + hypre_MPI_Request hrequest; + hypre_Memset(&hrequest, 0, sizeof(hypre_MPI_Request), HYPRE_MEMORY_HOST); + hypre_MPI_RequestMPI_Request(hrequest) = request; + + return hrequest; +} + HYPRE_Int hypre_MPI_Init( hypre_int *argc, char ***argv ) @@ -1132,7 +1142,8 @@ hypre_MPI_Isend( void *buf, { return (HYPRE_Int) MPI_Isend(buf, (hypre_int)count, datatype, (hypre_int)dest, (hypre_int)tag, - hypre_MPI_CommMPI_Comm(comm), request); + hypre_MPI_CommMPI_Comm(comm), + &hypre_MPI_RequestMPI_Request(*request)); } HYPRE_Int @@ -1146,7 +1157,8 @@ hypre_MPI_Irecv( void *buf, { return (HYPRE_Int) MPI_Irecv(buf, (hypre_int)count, datatype, (hypre_int)source, (hypre_int)tag, - hypre_MPI_CommMPI_Comm(comm), request); + hypre_MPI_CommMPI_Comm(comm), + &hypre_MPI_RequestMPI_Request(*request)); } #define TYPE_MACRO(MPI_CMD, HYPRE_DTYPE, HYPRE_MPI_DTYPE) \ @@ -1161,7 +1173,8 @@ hypre_MPI_Irecv( void *buf, HYPRE_Int start = displs[i]; \ HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; \ MPI_CMD(data + start, len, HYPRE_MPI_DTYPE, \ - ip, tag, hypre_MPI_CommMPI_Comm(comm), requests + i); \ + ip, tag, hypre_MPI_CommMPI_Comm(comm), \ + &hypre_MPI_RequestMPI_Request(requests[i])); \ } \ return hypre_error_flag; \ } \ @@ -1181,6 +1194,8 @@ hypre_MPI_Isend_Multiple( void *buf, TYPE_MACRO(MPI_Isend, HYPRE_Complex, HYPRE_MPI_COMPLEX); TYPE_MACRO(MPI_Isend, HYPRE_Int, HYPRE_MPI_INT); TYPE_MACRO(MPI_Isend, HYPRE_BigInt, HYPRE_MPI_BIG_INT); + + return hypre_error_flag; } HYPRE_Int @@ -1197,6 +1212,8 @@ hypre_MPI_Irecv_Multiple( void *buf, TYPE_MACRO(MPI_Irecv, HYPRE_Complex, HYPRE_MPI_COMPLEX); TYPE_MACRO(MPI_Irecv, HYPRE_Int, HYPRE_MPI_INT); TYPE_MACRO(MPI_Irecv, HYPRE_BigInt, HYPRE_MPI_BIG_INT); + + return hypre_error_flag; } HYPRE_Int @@ -1210,7 +1227,8 @@ hypre_MPI_Send_init( void *buf, { return (HYPRE_Int) MPI_Send_init(buf, (hypre_int)count, datatype, (hypre_int)dest, (hypre_int)tag, - hypre_MPI_CommMPI_Comm(comm), request); + hypre_MPI_CommMPI_Comm(comm), + &hypre_MPI_RequestMPI_Request(*request)); } HYPRE_Int @@ -1224,7 +1242,8 @@ hypre_MPI_Recv_init( void *buf, { return (HYPRE_Int) MPI_Recv_init(buf, (hypre_int)count, datatype, (hypre_int)dest, (hypre_int)tag, - hypre_MPI_CommMPI_Comm(comm), request); + hypre_MPI_CommMPI_Comm(comm), + &hypre_MPI_RequestMPI_Request(*request)); } HYPRE_Int @@ -1238,14 +1257,32 @@ hypre_MPI_Irsend( void *buf, { return (HYPRE_Int) MPI_Irsend(buf, (hypre_int)count, datatype, (hypre_int)dest, (hypre_int)tag, - hypre_MPI_CommMPI_Comm(comm), request); + hypre_MPI_CommMPI_Comm(comm), + &hypre_MPI_RequestMPI_Request(*request)); } HYPRE_Int hypre_MPI_Startall( HYPRE_Int count, hypre_MPI_Request *array_of_requests ) { - return (HYPRE_Int) MPI_Startall((hypre_int)count, array_of_requests); + HYPRE_Int i, ierr; + MPI_Request *array_of_mpi_requests = hypre_CTAlloc(MPI_Request, count, HYPRE_MEMORY_HOST); + + for (i = 0; i < count; i++) + { + array_of_mpi_requests[i] = hypre_MPI_RequestMPI_Request(array_of_requests[i]); + } + + ierr = (HYPRE_Int) MPI_Startall((hypre_int)count, array_of_mpi_requests); + + for (i = 0; i < count; i++) + { + hypre_MPI_RequestMPI_Request(array_of_requests[i]) = array_of_mpi_requests[i]; + } + + hypre_TFree(array_of_mpi_requests, HYPRE_MEMORY_HOST); + + return ierr; } HYPRE_Int @@ -1281,7 +1318,7 @@ hypre_MPI_Test( hypre_MPI_Request *request, { hypre_int mpi_flag; HYPRE_Int ierr; - ierr = (HYPRE_Int) MPI_Test(request, &mpi_flag, status); + ierr = (HYPRE_Int) MPI_Test(&hypre_MPI_RequestMPI_Request(*request), &mpi_flag, status); *flag = (HYPRE_Int) mpi_flag; return ierr; } @@ -1293,10 +1330,20 @@ hypre_MPI_Testall( HYPRE_Int count, hypre_MPI_Status *array_of_statuses ) { hypre_int mpi_flag; - HYPRE_Int ierr; - ierr = (HYPRE_Int) MPI_Testall((hypre_int)count, array_of_requests, + HYPRE_Int i, ierr; + + MPI_Request *array_of_mpi_requests = hypre_TAlloc(MPI_Request, count, HYPRE_MEMORY_HOST); + for (i = 0; i < count; i++) + { + array_of_mpi_requests[i] = hypre_MPI_RequestMPI_Request(array_of_requests[i]); + } + + ierr = (HYPRE_Int) MPI_Testall((hypre_int)count, array_of_mpi_requests, &mpi_flag, array_of_statuses); *flag = (HYPRE_Int) mpi_flag; + + hypre_TFree(array_of_mpi_requests, HYPRE_MEMORY_HOST); + return ierr; } @@ -1304,7 +1351,7 @@ HYPRE_Int hypre_MPI_Wait( hypre_MPI_Request *request, hypre_MPI_Status *status ) { - return (HYPRE_Int) MPI_Wait(request, status); + return (HYPRE_Int) MPI_Wait(&hypre_MPI_RequestMPI_Request(*request), status); } HYPRE_Int @@ -1312,8 +1359,20 @@ hypre_MPI_Waitall( HYPRE_Int count, hypre_MPI_Request *array_of_requests, hypre_MPI_Status *array_of_statuses ) { - return (HYPRE_Int) MPI_Waitall((hypre_int)count, - array_of_requests, array_of_statuses); + HYPRE_Int i, ierr; + + MPI_Request *array_of_mpi_requests = hypre_TAlloc(MPI_Request, count, HYPRE_MEMORY_HOST); + for (i = 0; i < count; i++) + { + array_of_mpi_requests[i] = hypre_MPI_RequestMPI_Request(array_of_requests[i]); + } + + ierr = (HYPRE_Int) MPI_Waitall((hypre_int)count, + array_of_mpi_requests, array_of_statuses); + + hypre_TFree(array_of_mpi_requests, HYPRE_MEMORY_HOST); + + return ierr; } HYPRE_Int @@ -1323,10 +1382,20 @@ hypre_MPI_Waitany( HYPRE_Int count, hypre_MPI_Status *status ) { hypre_int mpi_index; - HYPRE_Int ierr; - ierr = (HYPRE_Int) MPI_Waitany((hypre_int)count, array_of_requests, + HYPRE_Int i, ierr; + + MPI_Request *array_of_mpi_requests = hypre_TAlloc(MPI_Request, count, HYPRE_MEMORY_HOST); + for (i = 0; i < count; i++) + { + array_of_mpi_requests[i] = hypre_MPI_RequestMPI_Request(array_of_requests[i]); + } + + ierr = (HYPRE_Int) MPI_Waitany((hypre_int)count, array_of_mpi_requests, &mpi_index, status); *index = (HYPRE_Int) mpi_index; + + hypre_TFree(array_of_mpi_requests, HYPRE_MEMORY_HOST); + return ierr; } @@ -1377,7 +1446,7 @@ hypre_MPI_Scan( void *sendbuf, HYPRE_Int hypre_MPI_Request_free( hypre_MPI_Request *request ) { - return (HYPRE_Int) MPI_Request_free(request); + return (HYPRE_Int) MPI_Request_free(&hypre_MPI_RequestMPI_Request(*request)); } HYPRE_Int diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index d559389ccf..dd1c331624 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -205,7 +205,19 @@ typedef struct #define hypre_MPI_CommMPI_RecvLocation(comm) ((comm).recv_location) typedef MPI_Group hypre_MPI_Group; -typedef MPI_Request hypre_MPI_Request; + +typedef HYPRE_Int (*hypre_mpi_request_action) (void *); +typedef struct +{ + MPI_Request mpi_request; + hypre_mpi_request_action post_wait_action; + void *post_wait_data; +} hypre_MPI_Request; + +#define hypre_MPI_RequestMPI_Request(request) ((request).mpi_request) +#define hypre_MPI_RequestPostWaitAction(request) ((request).post_wait_action) +#define hypre_MPI_RequestPostWaitData(request) ((request).post_wait_data) + typedef MPI_Datatype hypre_MPI_Datatype; typedef MPI_Status hypre_MPI_Status; typedef MPI_Op hypre_MPI_Op; @@ -351,6 +363,7 @@ HYPRE_Int hypre_MPI_Op_free( hypre_MPI_Op *op ); HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, hypre_MPI_Op *op ); hypre_MPI_Comm hypre_MPI_CommFromMPI_Comm(MPI_Comm comm); +hypre_MPI_Request hypre_MPI_RequestFromMPI_Request(MPI_Request request); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, MPI_Comm *newcomm); From 538cf070d228714fe295f407b9aee2930c1dfdd8 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Fri, 8 Dec 2023 16:29:23 -0800 Subject: [PATCH 10/90] changes in euclid/parasails from hypre_mpi_request --- src/distributed_ls/Euclid/Factor_dh.c | 126 +++++++++++------------ src/distributed_ls/Euclid/Mat_dh.c | 140 +++++++++++++------------- src/distributed_ls/ParaSails/Matrix.c | 33 ++++-- 3 files changed, 159 insertions(+), 140 deletions(-) diff --git a/src/distributed_ls/Euclid/Factor_dh.c b/src/distributed_ls/Euclid/Factor_dh.c index 51bf7d33ed..5c68076ced 100644 --- a/src/distributed_ls/Euclid/Factor_dh.c +++ b/src/distributed_ls/Euclid/Factor_dh.c @@ -35,7 +35,7 @@ void Factor_dhCreate(Factor_dh *mat) { START_FUNC_DH HYPRE_Int i; - struct _factor_dh* tmp; + struct _factor_dh* tmp; if (np_dh > MAX_MPI_TASKS) { SET_V_ERROR("you must change MAX_MPI_TASKS and recompile!"); @@ -47,7 +47,7 @@ void Factor_dhCreate(Factor_dh *mat) tmp->m = 0; tmp->n = 0; tmp->id = myid_dh; - tmp->beg_row = 0; + tmp->beg_row = 0; tmp->first_bdry = 0; tmp->bdry_count = 0; tmp->blockJacobi = false; @@ -70,15 +70,15 @@ void Factor_dhCreate(Factor_dh *mat) tmp->numbSolve = NULL; tmp->debug = Parser_dhHasSwitch(parser_dh, "-debug_Factor"); - + /* initialize MPI request to null */ for(i=0; irecv_reqLo[i] = hypre_MPI_REQUEST_NULL; - tmp->recv_reqHi[i] = hypre_MPI_REQUEST_NULL; - tmp->send_reqLo[i] = hypre_MPI_REQUEST_NULL; - tmp->send_reqHi[i] = hypre_MPI_REQUEST_NULL; - tmp->requests[i] = hypre_MPI_REQUEST_NULL; + tmp->recv_reqLo[i] = hypre_MPI_RequestFromMPI_Request(hypre_MPI_REQUEST_NULL); + tmp->recv_reqHi[i] = hypre_MPI_RequestFromMPI_Request(hypre_MPI_REQUEST_NULL); + tmp->send_reqLo[i] = hypre_MPI_RequestFromMPI_Request(hypre_MPI_REQUEST_NULL); + tmp->send_reqHi[i] = hypre_MPI_RequestFromMPI_Request(hypre_MPI_REQUEST_NULL); + tmp->requests[i] = hypre_MPI_RequestFromMPI_Request(hypre_MPI_REQUEST_NULL); } /* Factor_dhZeroTiming(tmp); CHECK_V_ERROR; */ END_FUNC_DH @@ -105,17 +105,17 @@ void Factor_dhDestroy(Factor_dh mat) if (mat->sendindHi != NULL) { FREE_DH(mat->sendindHi); CHECK_V_ERROR; } if (mat->numbSolve != NULL) { Numbering_dhDestroy(mat->numbSolve); CHECK_V_ERROR; } - + /* cleanup MPI requests */ for(i=0; irecv_reqLo[i] != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->recv_reqLo[i])); - if(mat->recv_reqHi[i] != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->recv_reqHi[i])); - if(mat->send_reqLo[i] != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->send_reqLo[i])); - if(mat->send_reqHi[i] != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->send_reqHi[i])); - if(mat->requests[i] != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->requests[i])); + if(hypre_MPI_RequestMPI_Request(mat->recv_reqLo[i]) != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->recv_reqLo[i])); + if(hypre_MPI_RequestMPI_Request(mat->recv_reqHi[i]) != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->recv_reqHi[i])); + if(hypre_MPI_RequestMPI_Request(mat->send_reqLo[i]) != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->send_reqLo[i])); + if(hypre_MPI_RequestMPI_Request(mat->send_reqHi[i]) != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->send_reqHi[i])); + if(hypre_MPI_RequestMPI_Request(mat->requests[i]) != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->requests[i])); } - FREE_DH(mat); CHECK_V_ERROR; + FREE_DH(mat); CHECK_V_ERROR; END_FUNC_DH } @@ -208,14 +208,14 @@ void Factor_dhPrintDiags(Factor_dh mat, FILE *fp) START_FUNC_DH HYPRE_Int beg_row = mat->beg_row; HYPRE_Int m = mat->m, i, pe, *diag = mat->diag; - REAL_DH *aval = mat->aval; + REAL_DH *aval = mat->aval; + - fprintf_dh(fp, "\n----------------------- Factor_dhPrintDiags ------------------\n"); fprintf_dh(fp, "(grep for 'ZERO')\n"); for (pe=0; peid == pe) { hypre_fprintf(fp, "----- subdomain: %i processor: %i\n", pe, myid_dh); for (i=0; iid == pe) { - if (pe == 0) { + if (pe == 0) { fp=openFile_dh(filename, "w"); CHECK_V_ERROR; - } - else { + } + else { fp=openFile_dh(filename, "a"); CHECK_V_ERROR; } @@ -299,7 +299,7 @@ void Factor_dhPrintTriples(Factor_dh mat, char *filename) if (noValues) { hypre_fprintf(fp, "%i %i\n", 1+i+beg_row, 1+mat->cval[j]); } else { - hypre_fprintf(fp, TRIPLES_FORMAT, + hypre_fprintf(fp, TRIPLES_FORMAT, 1+i+beg_row, 1+mat->cval[j], aval[j]); } } @@ -329,9 +329,9 @@ void Factor_dhPrintTriples(Factor_dh mat, char *filename) */ #undef __FUNC__ #define __FUNC__ "setup_receives_private" -static HYPRE_Int setup_receives_private(Factor_dh mat, HYPRE_Int *beg_rows, HYPRE_Int *end_rows, +static HYPRE_Int setup_receives_private(Factor_dh mat, HYPRE_Int *beg_rows, HYPRE_Int *end_rows, HYPRE_Real *recvBuf, hypre_MPI_Request *req, - HYPRE_Int *reqind, HYPRE_Int reqlen, + HYPRE_Int *reqind, HYPRE_Int reqlen, HYPRE_Int *outlist, bool debug) { START_FUNC_DH @@ -343,7 +343,7 @@ static HYPRE_Int setup_receives_private(Factor_dh mat, HYPRE_Int *beg_rows, HYPR hypre_fprintf(logFile, "FACT STARTING: setup_receives_private\n"); } - for (i=0; inum_sendLo = mat->num_sendHi */ - hypre_MPI_Waitall(count, requests, statuses); + hypre_MPI_Waitall(count, requests, statuses); if (debug) { HYPRE_Int j; @@ -494,7 +494,7 @@ static void setup_sends_private(Factor_dh mat, HYPRE_Int *inlist, -#undef __FUNC__ +#undef __FUNC__ #define __FUNC__ "Factor_dhSolveSetup" void Factor_dhSolveSetup(Factor_dh mat, SubdomainGraph_dh sg) { @@ -543,7 +543,7 @@ void Factor_dhSolveSetup(Factor_dh mat, SubdomainGraph_dh sg) mat->num_recvHi = 0; if (numb->num_extLo) { recvBuf = mat->work_y_lo + m; - mat->num_recvLo = setup_receives_private(mat, beg_rows, end_rows, + mat->num_recvLo = setup_receives_private(mat, beg_rows, end_rows, recvBuf, mat->recv_reqLo, numb->idx_extLo, numb->num_extLo, outlist, debug); CHECK_V_ERROR; @@ -554,13 +554,13 @@ void Factor_dhSolveSetup(Factor_dh mat, SubdomainGraph_dh sg) recvBuf = mat->work_x_hi + m + numb->num_extLo; mat->num_recvHi = setup_receives_private(mat, beg_rows, end_rows, recvBuf, mat->recv_reqHi, - numb->idx_extHi, numb->num_extHi, + numb->idx_extHi, numb->num_extHi, outlist, debug); CHECK_V_ERROR; } hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, hcomm); - /* At this point, inlist[j] contains the number of indices + hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, hcomm); + /* At this point, inlist[j] contains the number of indices that this processor must send to P_j. Processors next need to exchange the actual lists of required indices; this is done in setup_sends_private() @@ -600,12 +600,12 @@ void Factor_dhSolveSetup(Factor_dh mat, SubdomainGraph_dh sg) so similar to MatVec, that I put it here, instead of with the other solves located in Euclid_apply.c. */ -static void forward_solve_private(HYPRE_Int m, HYPRE_Int from, HYPRE_Int to, - HYPRE_Int *rp, HYPRE_Int *cval, HYPRE_Int *diag, HYPRE_Real *aval, +static void forward_solve_private(HYPRE_Int m, HYPRE_Int from, HYPRE_Int to, + HYPRE_Int *rp, HYPRE_Int *cval, HYPRE_Int *diag, HYPRE_Real *aval, HYPRE_Real *rhs, HYPRE_Real *work_y, bool debug); -static void backward_solve_private(HYPRE_Int m, HYPRE_Int from, HYPRE_Int to, - HYPRE_Int *rp, HYPRE_Int *cval, HYPRE_Int *diag, HYPRE_Real *aval, +static void backward_solve_private(HYPRE_Int m, HYPRE_Int from, HYPRE_Int to, + HYPRE_Int *rp, HYPRE_Int *cval, HYPRE_Int *diag, HYPRE_Real *aval, HYPRE_Real *work_y, HYPRE_Real *work_x, bool debug); static HYPRE_Int beg_rowG; @@ -625,7 +625,7 @@ void Factor_dhSolve(HYPRE_Real *rhs, HYPRE_Real *lhs, Euclid_dh ctx) HYPRE_Real *aval = mat->aval; HYPRE_Int *sendindLo = mat->sendindLo, *sendindHi = mat->sendindHi; HYPRE_Int sendlenLo = mat->sendlenLo, sendlenHi = mat->sendlenHi; - HYPRE_Real *sendbufLo = mat->sendbufLo, *sendbufHi = mat->sendbufHi; + HYPRE_Real *sendbufLo = mat->sendbufLo, *sendbufHi = mat->sendbufHi; HYPRE_Real *work_y = mat->work_y_lo; HYPRE_Real *work_x = mat->work_x_hi; bool debug = false; @@ -648,10 +648,10 @@ for (i=0; inum_recvLo) { - hypre_MPI_Startall(mat->num_recvLo, mat->recv_reqLo); + hypre_MPI_Startall(mat->num_recvLo, mat->recv_reqLo); } if (mat->num_recvHi) { - hypre_MPI_Startall(mat->num_recvHi, mat->recv_reqHi); + hypre_MPI_Startall(mat->num_recvHi, mat->recv_reqHi); } /*------------------------------------------------------------- @@ -661,7 +661,7 @@ for (i=0; inum_sendHi, mat->send_reqHi); + hypre_MPI_Startall(mat->num_sendHi, mat->send_reqHi); /* debug block */ if (debug) { @@ -730,7 +730,7 @@ for (i=0; i=to; --i) { @@ -910,7 +910,7 @@ void Factor_dhInit(void *A, bool fillFlag, bool avalFlag, EuclidGetDimensions(A, &beg_row, &m, &n); CHECK_V_ERROR; alloc = (HYPRE_Int)(rho*m); - Factor_dhCreate(&F); CHECK_V_ERROR; + Factor_dhCreate(&F); CHECK_V_ERROR; *Fout = F; F->m = m; @@ -970,7 +970,7 @@ void Factor_dhTranspose(Factor_dh A, Factor_dh *Bout) START_FUNC_DH Factor_dh B; - if (np_dh > 1) { SET_V_ERROR("only for sequential"); } + if (np_dh > 1) { SET_V_ERROR("only for sequential"); } Factor_dhCreate(&B); CHECK_V_ERROR; *Bout = B; @@ -1151,7 +1151,7 @@ HYPRE_Real Factor_dhMaxValue(Factor_dh mat) for (i=0; im = 0; tmp->n = 0; - tmp->beg_row = 0; + tmp->beg_row = 0; tmp->bs = 1; tmp->rp = NULL; @@ -117,7 +117,7 @@ void Mat_dhDestroy(Mat_dh mat) Mat_dhMatVecSetdown(mat); CHECK_V_ERROR; } if (mat->numb != NULL) { Numbering_dhDestroy(mat->numb); CHECK_V_ERROR; } - FREE_DH(mat); CHECK_V_ERROR; + FREE_DH(mat); CHECK_V_ERROR; END_FUNC_DH } @@ -152,52 +152,52 @@ void Mat_dhMatVecSetup(Mat_dh mat) HYPRE_Int lastLocal = firstLocal+m; HYPRE_Int *beg_rows, *end_rows; hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - + mat->recv_req = (hypre_MPI_Request *)MALLOC_DH(np_dh * sizeof(hypre_MPI_Request)); CHECK_V_ERROR; mat->send_req = (hypre_MPI_Request *)MALLOC_DH(np_dh * sizeof(hypre_MPI_Request)); CHECK_V_ERROR; mat->status = (hypre_MPI_Status *)MALLOC_DH(np_dh * sizeof(hypre_MPI_Status)); CHECK_V_ERROR; beg_rows = (HYPRE_Int*)MALLOC_DH(np_dh*sizeof(HYPRE_Int)); CHECK_V_ERROR; end_rows = (HYPRE_Int*)MALLOC_DH(np_dh*sizeof(HYPRE_Int)); CHECK_V_ERROR; - + if (np_dh == 1) { /* this is for debugging purposes in some of the drivers */ beg_rows[0] = 0; end_rows[0] = m; } else { ierr = hypre_MPI_Allgather(&firstLocal, 1, HYPRE_MPI_INT, beg_rows, 1, HYPRE_MPI_INT, hcomm); CHECK_MPI_V_ERROR(ierr); - + ierr = hypre_MPI_Allgather(&lastLocal, 1, HYPRE_MPI_INT, end_rows, 1, HYPRE_MPI_INT, hcomm); CHECK_MPI_V_ERROR(ierr); } - + outlist = (HYPRE_Int *)MALLOC_DH(np_dh*sizeof(HYPRE_Int)); CHECK_V_ERROR; inlist = (HYPRE_Int *)MALLOC_DH(np_dh*sizeof(HYPRE_Int)); CHECK_V_ERROR; for (i=0; inumb)); CHECK_V_ERROR; numb = mat->numb; Numbering_dhSetup(numb, mat); CHECK_V_ERROR; - - setup_matvec_receives_private(mat, beg_rows, end_rows, numb->num_ext, + + setup_matvec_receives_private(mat, beg_rows, end_rows, numb->num_ext, numb->idx_ext, outlist); CHECK_V_ERROR; - + if (np_dh == 1) { /* this is for debugging purposes in some of the drivers */ inlist[0] = outlist[0]; } else { ierr = hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, hcomm); CHECK_MPI_V_ERROR(ierr); } - + setup_matvec_sends_private(mat, inlist); CHECK_V_ERROR; - + /* Convert to local indices */ for (row=0; rowrecvbuf = (HYPRE_Real*)MALLOC_DH((reqlen+m) * sizeof(HYPRE_Real)); - for (i=0; iaval; HYPRE_Int *sendind = mat->sendind; HYPRE_Int sendlen = mat->sendlen; - HYPRE_Real *sendbuf = mat->sendbuf; + HYPRE_Real *sendbuf = mat->sendbuf; HYPRE_Real *recvbuf = mat->recvbuf; HYPRE_Real t1 = 0, t2 = 0, t3 = 0, t4 = 0; bool timeFlag = mat->matvec_timing; - - + + if (timeFlag) t1 = hypre_MPI_Wtime(); - + /* Put components of x into the right outgoing buffers */ if (! commsOnly) { - for (i=0; itime[MATVEC_TIME] += (t2 - t1); - + } - + ierr = hypre_MPI_Startall(mat->num_recv, mat->recv_req); CHECK_MPI_V_ERROR(ierr); ierr = hypre_MPI_Startall(mat->num_send, mat->send_req); CHECK_MPI_V_ERROR(ierr); ierr = hypre_MPI_Waitall(mat->num_recv, mat->recv_req, mat->status); CHECK_MPI_V_ERROR(ierr); ierr = hypre_MPI_Waitall(mat->num_send, mat->send_req, mat->status); CHECK_MPI_V_ERROR(ierr); - - + + if (timeFlag) { t3 = hypre_MPI_Wtime(); mat->time[MATVEC_MPI_TIME] += (t3 - t2); } - + /* Copy local part of x into top part of recvbuf */ if (! commsOnly) { for (i=0; itime[MATVEC_TOTAL_TIME] += (t4 - t1); @@ -391,7 +391,7 @@ void Mat_dhMatVec_omp(Mat_dh mat, HYPRE_Real *x, HYPRE_Real *b) HYPRE_Real *aval = mat->aval; HYPRE_Int *sendind = mat->sendind; HYPRE_Int sendlen = mat->sendlen; - HYPRE_Real *sendbuf = mat->sendbuf; + HYPRE_Real *sendbuf = mat->sendbuf; HYPRE_Real *recvbuf = mat->recvbuf; HYPRE_Real t1 = 0, t2 = 0, t3 = 0, t4 = 0, tx = 0; HYPRE_Real *val, temp; @@ -404,7 +404,7 @@ void Mat_dhMatVec_omp(Mat_dh mat, HYPRE_Real *x, HYPRE_Real *b) #ifdef USING_OPENMP_DH #pragma omp parallel for schedule(runtime) private(i) #endif - for (i=0; im, A->beg_row, A->rp, A->cval, + mat_dh_print_graph_private(A->m, A->beg_row, A->rp, A->cval, A->aval, NULL, NULL, NULL, fp); CHECK_V_ERROR; } else { HYPRE_Int beg_row = sg->beg_rowP[myid_dh]; - mat_dh_print_graph_private(A->m, beg_row, A->rp, A->cval, + mat_dh_print_graph_private(A->m, beg_row, A->rp, A->cval, A->aval, sg->n2o_row, sg->o2n_col, sg->o2n_ext, fp); CHECK_V_ERROR; } } @@ -689,14 +689,14 @@ void Mat_dhPrintGraph(Mat_dh A, SubdomainGraph_dh sg, FILE *fp) void Mat_dhPrintRows(Mat_dh A, SubdomainGraph_dh sg, FILE *fp) { START_FUNC_DH - bool noValues; + bool noValues; HYPRE_Int m = A->m, *rp = A->rp, *cval = A->cval; HYPRE_Real *aval = A->aval; noValues = (Parser_dhHasSwitch(parser_dh, "-noValues")); if (noValues) aval = NULL; - /*---------------------------------------------------------------- + /*---------------------------------------------------------------- * case 1: print local portion of unpermuted matrix *----------------------------------------------------------------*/ if (sg == NULL) { @@ -717,7 +717,7 @@ void Mat_dhPrintRows(Mat_dh A, SubdomainGraph_dh sg, FILE *fp) } } - /*---------------------------------------------------------------- + /*---------------------------------------------------------------- * case 2: single mpi task, with multiple subdomains *----------------------------------------------------------------*/ else if (np_dh == 1) { @@ -736,7 +736,7 @@ void Mat_dhPrintRows(Mat_dh A, SubdomainGraph_dh sg, FILE *fp) hypre_fprintf(fp, "\n"); hypre_fprintf(fp, "\n----- A, permuted, single mpi task ------------------\n"); hypre_fprintf(fp, "---- new subdomain: %i; old subdomain: %i\n", i, oldBlock); - hypre_fprintf(fp, " old beg_row: %i; new beg_row: %i\n", + hypre_fprintf(fp, " old beg_row: %i; new beg_row: %i\n", sg->beg_row[oldBlock], sg->beg_rowP[oldBlock]); hypre_fprintf(fp, " local rows in this block: %i\n", sg->row_count[oldBlock]); hypre_fprintf(fp, " bdry rows in this block: %i\n", sg->bdry_count[oldBlock]); @@ -752,7 +752,7 @@ void Mat_dhPrintRows(Mat_dh A, SubdomainGraph_dh sg, FILE *fp) for (k=0; ko2n_col[cval[k]]); + hypre_fprintf(fp, "%i ", 1+sg->o2n_col[cval[k]]); } else { hypre_fprintf(fp, "%i,%g ; ", 1+sg->o2n_col[cval[k]], aval[k]); } @@ -764,7 +764,7 @@ void Mat_dhPrintRows(Mat_dh A, SubdomainGraph_dh sg, FILE *fp) } } - /*---------------------------------------------------------------- + /*---------------------------------------------------------------- * case 3: multiple mpi tasks, one subdomain per task *----------------------------------------------------------------*/ else { @@ -784,14 +784,14 @@ void Mat_dhPrintRows(Mat_dh A, SubdomainGraph_dh sg, FILE *fp) /* case i: column is locally owned */ if (col >= beg_row && col < beg_row+m) { col = o2n_col[col-beg_row] + beg_rowP; - } + } /* case ii: column is external */ else { HYPRE_Int tmp = col; tmp = Hash_i_dhLookup(hash, col); CHECK_V_ERROR; - if (tmp == -1) { - hypre_sprintf(msgBuf_dh, "nonlocal column= %i not in hash table", 1+col); + if (tmp == -1) { + hypre_sprintf(msgBuf_dh, "nonlocal column= %i not in hash table", 1+col); SET_V_ERROR(msgBuf_dh); } else { col = tmp; @@ -819,7 +819,7 @@ void Mat_dhPrintTriples(Mat_dh A, SubdomainGraph_dh sg, char *filename) START_FUNC_DH HYPRE_Int m = A->m, *rp = A->rp, *cval = A->cval; HYPRE_Real *aval = A->aval; - bool noValues; + bool noValues; bool matlab; FILE *fp; @@ -827,7 +827,7 @@ void Mat_dhPrintTriples(Mat_dh A, SubdomainGraph_dh sg, char *filename) if (noValues) aval = NULL; matlab = (Parser_dhHasSwitch(parser_dh, "-matlab")); - /*---------------------------------------------------------------- + /*---------------------------------------------------------------- * case 1: unpermuted matrix, single or multiple mpi tasks *----------------------------------------------------------------*/ if (sg == NULL) { @@ -836,11 +836,11 @@ void Mat_dhPrintTriples(Mat_dh A, SubdomainGraph_dh sg, char *filename) HYPRE_Real val; for (pe=0; pen2o_row[j]; Mat_dhGetRow(A, oldRow, &len, &cval, &aval); CHECK_V_ERROR; - + if (noValues) { for (k=0; ko2n_col[cval[k]]); @@ -900,7 +900,7 @@ void Mat_dhPrintTriples(Mat_dh A, SubdomainGraph_dh sg, char *filename) } } - /*---------------------------------------------------------------- + /*---------------------------------------------------------------- * case 3: multiple mpi tasks, one subdomain per task *----------------------------------------------------------------*/ else { @@ -914,10 +914,10 @@ void Mat_dhPrintTriples(Mat_dh A, SubdomainGraph_dh sg, char *filename) for (pe=0; pe= beg_row && col < beg_row+m) { col = o2n_col[col-beg_row] + beg_rowP; - } + } /* case ii: column is external */ else { HYPRE_Int tmp = col; tmp = Hash_i_dhLookup(hash, col); CHECK_V_ERROR; - if (tmp == -1) { - hypre_sprintf(msgBuf_dh, "nonlocal column= %i not in hash table", 1+col); + if (tmp == -1) { + hypre_sprintf(msgBuf_dh, "nonlocal column= %i not in hash table", 1+col); SET_V_ERROR(msgBuf_dh); } else { col = tmp; @@ -1029,7 +1029,7 @@ void Mat_dhReadCSR(Mat_dh *mat, char *filename) } fp=openFile_dh(filename, "r"); CHECK_V_ERROR; - + Mat_dhCreate(&A); CHECK_V_ERROR; mat_dh_read_csr_private(&A->m, &A->rp, &A->cval, &A->aval, fp); CHECK_V_ERROR; A->n = A->m; @@ -1092,7 +1092,7 @@ void Mat_dhTranspose(Mat_dh A, Mat_dh *Bout) START_FUNC_DH Mat_dh B; - if (np_dh > 1) { SET_V_ERROR("only for sequential"); } + if (np_dh > 1) { SET_V_ERROR("only for sequential"); } Mat_dhCreate(&B); CHECK_V_ERROR; *Bout = B; @@ -1107,7 +1107,7 @@ void Mat_dhTranspose(Mat_dh A, Mat_dh *Bout) void Mat_dhMakeStructurallySymmetric(Mat_dh A) { START_FUNC_DH - if (np_dh > 1) { SET_V_ERROR("only for sequential"); } + if (np_dh > 1) { SET_V_ERROR("only for sequential"); } make_symmetric_private(A->m, &A->rp, &A->cval, &A->aval); CHECK_V_ERROR; END_FUNC_DH } @@ -1235,7 +1235,7 @@ void Mat_dhPrintDiags(Mat_dh A, FILE *fp) #undef __FUNC__ #define __FUNC__ "Mat_dhGetRow" -void Mat_dhGetRow(Mat_dh B, HYPRE_Int globalRow, HYPRE_Int *len, HYPRE_Int **ind, HYPRE_Real **val) +void Mat_dhGetRow(Mat_dh B, HYPRE_Int globalRow, HYPRE_Int *len, HYPRE_Int **ind, HYPRE_Real **val) { START_FUNC_DH HYPRE_Int row = globalRow - B->beg_row; @@ -1245,14 +1245,14 @@ void Mat_dhGetRow(Mat_dh B, HYPRE_Int globalRow, HYPRE_Int *len, HYPRE_Int **ind SET_V_ERROR(msgBuf_dh); } *len = B->rp[row+1] - B->rp[row]; - if (ind != NULL) *ind = B->cval + B->rp[row]; - if (val != NULL) *val = B->aval + B->rp[row]; + if (ind != NULL) *ind = B->cval + B->rp[row]; + if (val != NULL) *val = B->aval + B->rp[row]; END_FUNC_DH } #undef __FUNC__ #define __FUNC__ "Mat_dhRestoreRow" -void Mat_dhRestoreRow(Mat_dh B, HYPRE_Int row, HYPRE_Int *len, HYPRE_Int **ind, HYPRE_Real **val) +void Mat_dhRestoreRow(Mat_dh B, HYPRE_Int row, HYPRE_Int *len, HYPRE_Int **ind, HYPRE_Real **val) { START_FUNC_DH END_FUNC_DH @@ -1279,7 +1279,7 @@ void Mat_dhRowPermute(Mat_dh mat) * permuted matrix has as many entries on its diagonal as * possible. The values on the diagonal are of arbitrary size. * HSL subroutine MC21A/AD is used for this. - * = 2 : Compute a row permutation of the matrix so that the smallest + * = 2 : Compute a row permutation of the matrix so that the smallest * value on the diagonal of the permuted matrix is maximized. * = 3 : Compute a row permutation of the matrix so that the smallest * value on the diagonal of the permuted matrix is maximized. @@ -1289,9 +1289,9 @@ void Mat_dhRowPermute(Mat_dh mat) * of the diagonal entries of the permuted matrix is maximized. * = 5 : Compute a row permutation of the matrix so that the product * of the diagonal entries of the permuted matrix is maximized - * and vectors to scale the matrix so that the nonzero diagonal - * entries of the permuted matrix are one in absolute value and - * all the off-diagonal entries are less than or equal to one in + * and vectors to scale the matrix so that the nonzero diagonal + * entries of the permuted matrix are one in absolute value and + * all the off-diagonal entries are less than or equal to one in * absolute value. #endif @@ -1404,7 +1404,7 @@ void build_adj_lists_private(Mat_dh mat, HYPRE_Int **rpOUT, HYPRE_Int **cvalOUT) #undef __FUNC__ #define __FUNC__ "Mat_dhPartition" -void Mat_dhPartition(Mat_dh mat, HYPRE_Int blocks, +void Mat_dhPartition(Mat_dh mat, HYPRE_Int blocks, HYPRE_Int **beg_rowOUT, HYPRE_Int **row_countOUT, HYPRE_Int **n2oOUT, HYPRE_Int **o2nOUT) { START_FUNC_DH @@ -1437,11 +1437,11 @@ NULL, NULL, 0 /*no edge or vertex weights*/ 0 /*use zero-based numbering*/ blocksIN, -options[5] = +options[5] = 0 :: 0/1 use defauls; use uptions 1..4 - 1 :: + 1 :: edgecutOUT, -part[] +part[] ============================================================= #endif diff --git a/src/distributed_ls/ParaSails/Matrix.c b/src/distributed_ls/ParaSails/Matrix.c index 664fd14150..f5d42595b2 100644 --- a/src/distributed_ls/ParaSails/Matrix.c +++ b/src/distributed_ls/ParaSails/Matrix.c @@ -340,7 +340,7 @@ static void MatrixReadMaster(Matrix *mat, char *filename) offset = ftell(file); hypre_fscanf(file, "%d %d %lf", &row, &col, &value); - request = hypre_MPI_REQUEST_NULL; + request = hypre_MPI_RequestFromMPI_Request(hypre_MPI_REQUEST_NULL); curr_proc = 1; /* proc for which we are looking for the beginning */ while (curr_proc < npes) { @@ -770,8 +770,14 @@ void MatrixMatvec(Matrix *mat, HYPRE_Real *x, HYPRE_Real *y) for (i=0; isendlen; i++) mat->sendbuf[i] = x[mat->sendind[i]]; - hypre_MPI_Startall(mat->num_recv, mat->recv_req); - hypre_MPI_Startall(mat->num_send, mat->send_req); + if (mat->num_recv) + { + hypre_MPI_Startall(mat->num_recv, mat->recv_req); + } + if (mat->num_send) + { + hypre_MPI_Startall(mat->num_send, mat->send_req); + } /* Copy local part of x into top part of recvbuf */ for (i=0; isendlen; i++) mat->sendbuf[i] = x[mat->sendind[i]]; - hypre_MPI_Startall(mat->num_recv, mat->recv_req); - hypre_MPI_Startall(mat->num_send, mat->send_req); + if (mat->num_recv) + { + hypre_MPI_Startall(mat->num_recv, mat->recv_req); + } + + if (mat->num_send) + { + hypre_MPI_Startall(mat->num_send, mat->send_req); + } /* Copy local part of x into top part of recvbuf */ for (i=0; inum_send, mat->recv_req2); + if (mat->num_send) + { + hypre_MPI_Startall(mat->num_send, mat->recv_req2); + } /* initialize accumulator buffer to zero */ for (i=0; irecvlen+num_local; i++) @@ -871,7 +887,10 @@ void MatrixMatvecTrans(Matrix *mat, HYPRE_Real *x, HYPRE_Real *y) } /* Now can send nonlocal parts of solution to other procs */ - hypre_MPI_Startall(mat->num_recv, mat->send_req2); + if (mat->num_recv) + { + hypre_MPI_Startall(mat->num_recv, mat->send_req2); + } /* copy local part of solution into y */ for (i=0; i Date: Fri, 8 Dec 2023 16:30:59 -0800 Subject: [PATCH 11/90] bug fix --- src/utilities/mpistubs.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index e2db785137..b90798a2fe 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -1342,6 +1342,11 @@ hypre_MPI_Testall( HYPRE_Int count, &mpi_flag, array_of_statuses); *flag = (HYPRE_Int) mpi_flag; + for (i = 0; i < count; i++) + { + hypre_MPI_RequestMPI_Request(array_of_requests[i]) = array_of_mpi_requests[i]; + } + hypre_TFree(array_of_mpi_requests, HYPRE_MEMORY_HOST); return ierr; @@ -1370,6 +1375,11 @@ hypre_MPI_Waitall( HYPRE_Int count, ierr = (HYPRE_Int) MPI_Waitall((hypre_int)count, array_of_mpi_requests, array_of_statuses); + for (i = 0; i < count; i++) + { + hypre_MPI_RequestMPI_Request(array_of_requests[i]) = array_of_mpi_requests[i]; + } + hypre_TFree(array_of_mpi_requests, HYPRE_MEMORY_HOST); return ierr; @@ -1394,6 +1404,11 @@ hypre_MPI_Waitany( HYPRE_Int count, &mpi_index, status); *index = (HYPRE_Int) mpi_index; + for (i = 0; i < count; i++) + { + hypre_MPI_RequestMPI_Request(array_of_requests[i]) = array_of_mpi_requests[i]; + } + hypre_TFree(array_of_mpi_requests, HYPRE_MEMORY_HOST); return ierr; From 8d44472e1b8512c48300b9f2ae4ea1f2f7daaded Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Mon, 11 Dec 2023 14:18:28 -0800 Subject: [PATCH 12/90] update par_csr_communication --- src/parcsr_mv/_hypre_parcsr_mv.h | 20 +--- src/parcsr_mv/par_csr_communication.c | 135 +++----------------------- src/parcsr_mv/par_csr_communication.h | 20 +--- 3 files changed, 21 insertions(+), 154 deletions(-) diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index 7838668758..4807beab40 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -49,16 +49,10 @@ struct _hypre_ParCSRCommPkg; typedef struct { struct _hypre_ParCSRCommPkg *comm_pkg; - HYPRE_MemoryLocation send_memory_location; - HYPRE_MemoryLocation recv_memory_location; - HYPRE_Int num_send_bytes; - HYPRE_Int num_recv_bytes; - void *send_data; - void *recv_data; - void *send_data_buffer; - void *recv_data_buffer; - HYPRE_Int num_requests; - hypre_MPI_Request *requests; + void *send_data; + void *recv_data; + HYPRE_Int num_requests; + hypre_MPI_Request *requests; } hypre_ParCSRCommHandle; typedef hypre_ParCSRCommHandle hypre_ParCSRPersistentCommHandle; @@ -148,14 +142,8 @@ hypre_ParCSRCommPkgCopySendMapElmtsToDevice(hypre_ParCSRCommPkg *comm_pkg) *--------------------------------------------------------------------------*/ #define hypre_ParCSRCommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) -#define hypre_ParCSRCommHandleSendMemoryLocation(comm_handle) (comm_handle -> send_memory_location) -#define hypre_ParCSRCommHandleRecvMemoryLocation(comm_handle) (comm_handle -> recv_memory_location) -#define hypre_ParCSRCommHandleNumSendBytes(comm_handle) (comm_handle -> num_send_bytes) -#define hypre_ParCSRCommHandleNumRecvBytes(comm_handle) (comm_handle -> num_recv_bytes) #define hypre_ParCSRCommHandleSendData(comm_handle) (comm_handle -> send_data) #define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) -#define hypre_ParCSRCommHandleSendDataBuffer(comm_handle) (comm_handle -> send_data_buffer) -#define hypre_ParCSRCommHandleRecvDataBuffer(comm_handle) (comm_handle -> recv_data_buffer) #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 295a1eb5cf..253cec47a8 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -359,9 +359,9 @@ hypre_ParCSRCommHandle* hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg, HYPRE_MemoryLocation send_memory_location, - void *send_data_in, + void *send_data, HYPRE_MemoryLocation recv_memory_location, - void *recv_data_in ) + void *recv_data ) { hypre_GpuProfilingPushRange("hypre_ParCSRCommHandleCreate_v2"); @@ -369,19 +369,17 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg); hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); - HYPRE_Int num_send_bytes = 0; - HYPRE_Int num_recv_bytes = 0; hypre_ParCSRCommHandle *comm_handle; - HYPRE_Int num_requests; - hypre_MPI_Request *requests; - HYPRE_Int my_id, num_procs; - hypre_MPI_Datatype mpi_dtype; - void *send_data; - void *recv_data; + HYPRE_Int num_requests = num_sends + num_recvs; + hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); hypre_MPI_CommMPI_SendLocation(hcomm) = hypre_GetActualMemLocation(send_memory_location); hypre_MPI_CommMPI_RecvLocation(hcomm) = hypre_GetActualMemLocation(recv_memory_location); + hypre_MPI_Datatype mpi_dtype = job <= 2 ? HYPRE_MPI_COMPLEX : + job <= 12 ? HYPRE_MPI_INT : + HYPRE_MPI_BIG_INT; + /*-------------------------------------------------------------------- * hypre_Initialize sets up a communication handle, * posts receives and initiates sends. It always requires num_sends, @@ -412,76 +410,6 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, * datatypes need to point to absolute * addresses, e.g. generated using hypre_MPI_Address . *--------------------------------------------------------------------*/ -#ifndef HYPRE_WITH_GPU_AWARE_MPI - switch (job) - { - case 1: - num_send_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_Complex); - num_recv_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_Complex); - break; - case 2: - num_send_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_Complex); - num_recv_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_Complex); - break; - case 11: - num_send_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_Int); - num_recv_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_Int); - break; - case 12: - num_send_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_Int); - num_recv_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_Int); - break; - case 21: - num_send_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_BigInt); - num_recv_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_BigInt); - break; - case 22: - num_send_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_BigInt); - num_recv_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_BigInt); - break; - } - - hypre_MemoryLocation act_send_memory_location = hypre_GetActualMemLocation(send_memory_location); - - if ( act_send_memory_location == hypre_MEMORY_DEVICE || - act_send_memory_location == hypre_MEMORY_UNIFIED ) - { - //send_data = _hypre_TAlloc(char, num_send_bytes, hypre_MEMORY_HOST_PINNED); - send_data = hypre_TAlloc(char, num_send_bytes, HYPRE_MEMORY_HOST); - hypre_GpuProfilingPushRange("MPI-D2H"); - hypre_TMemcpy(send_data, send_data_in, char, num_send_bytes, HYPRE_MEMORY_HOST, - HYPRE_MEMORY_DEVICE); - hypre_GpuProfilingPopRange(); - } - else - { - send_data = send_data_in; - } - - hypre_MemoryLocation act_recv_memory_location = hypre_GetActualMemLocation(recv_memory_location); - - if ( act_recv_memory_location == hypre_MEMORY_DEVICE || - act_recv_memory_location == hypre_MEMORY_UNIFIED ) - { - //recv_data = hypre_TAlloc(char, num_recv_bytes, hypre_MEMORY_HOST_PINNED); - recv_data = hypre_TAlloc(char, num_recv_bytes, HYPRE_MEMORY_HOST); - } - else - { - recv_data = recv_data_in; - } -#else /* #ifndef HYPRE_WITH_GPU_AWARE_MPI */ - send_data = send_data_in; - recv_data = recv_data_in; -#endif - - num_requests = num_sends + num_recvs; - requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); - - hypre_MPI_Comm_size(comm, &num_procs); - hypre_MPI_Comm_rank(comm, &my_id); - - mpi_dtype = job <= 2 ? HYPRE_MPI_COMPLEX : job <= 12 ? HYPRE_MPI_INT : HYPRE_MPI_BIG_INT; switch (job) { @@ -523,24 +451,17 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, break; } } - /*-------------------------------------------------------------------- * set up comm_handle and return *--------------------------------------------------------------------*/ comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); - hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; - hypre_ParCSRCommHandleSendMemoryLocation(comm_handle) = send_memory_location; - hypre_ParCSRCommHandleRecvMemoryLocation(comm_handle) = recv_memory_location; - hypre_ParCSRCommHandleNumSendBytes(comm_handle) = num_send_bytes; - hypre_ParCSRCommHandleNumRecvBytes(comm_handle) = num_recv_bytes; - hypre_ParCSRCommHandleSendData(comm_handle) = send_data_in; - hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data_in; - hypre_ParCSRCommHandleSendDataBuffer(comm_handle) = send_data; - hypre_ParCSRCommHandleRecvDataBuffer(comm_handle) = recv_data; - hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; - hypre_ParCSRCommHandleRequests(comm_handle) = requests; + hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; + hypre_ParCSRCommHandleSendData(comm_handle) = send_data; + hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data; + hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; + hypre_ParCSRCommHandleRequests(comm_handle) = requests; hypre_GpuProfilingPopRange(); @@ -566,41 +487,11 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) hypre_MPI_Status *status0; status0 = hypre_CTAlloc(hypre_MPI_Status, hypre_ParCSRCommHandleNumRequests(comm_handle), HYPRE_MEMORY_HOST); - hypre_GpuProfilingPushRange("hypre_MPI_Waitall"); hypre_MPI_Waitall(hypre_ParCSRCommHandleNumRequests(comm_handle), hypre_ParCSRCommHandleRequests(comm_handle), status0); - hypre_GpuProfilingPopRange(); hypre_TFree(status0, HYPRE_MEMORY_HOST); } -#ifndef HYPRE_WITH_GPU_AWARE_MPI - hypre_MemoryLocation act_send_memory_location = hypre_GetActualMemLocation( - hypre_ParCSRCommHandleSendMemoryLocation(comm_handle)); - if ( act_send_memory_location == hypre_MEMORY_DEVICE || - act_send_memory_location == hypre_MEMORY_UNIFIED ) - { - //hypre_HostPinnedFree(hypre_ParCSRCommHandleSendDataBuffer(comm_handle)); - hypre_TFree(hypre_ParCSRCommHandleSendDataBuffer(comm_handle), HYPRE_MEMORY_HOST); - } - - hypre_MemoryLocation act_recv_memory_location = hypre_GetActualMemLocation( - hypre_ParCSRCommHandleRecvMemoryLocation(comm_handle)); - if ( act_recv_memory_location == hypre_MEMORY_DEVICE || - act_recv_memory_location == hypre_MEMORY_UNIFIED ) - { - hypre_GpuProfilingPushRange("MPI-H2D"); - hypre_TMemcpy( hypre_ParCSRCommHandleRecvData(comm_handle), - hypre_ParCSRCommHandleRecvDataBuffer(comm_handle), - char, - hypre_ParCSRCommHandleNumRecvBytes(comm_handle), - HYPRE_MEMORY_DEVICE, - HYPRE_MEMORY_HOST ); - hypre_GpuProfilingPopRange(); - //hypre_HostPinnedFree(hypre_ParCSRCommHandleRecvDataBuffer(comm_handle)); - hypre_TFree(hypre_ParCSRCommHandleRecvDataBuffer(comm_handle), HYPRE_MEMORY_HOST); - } -#endif - hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); diff --git a/src/parcsr_mv/par_csr_communication.h b/src/parcsr_mv/par_csr_communication.h index 13f5ea0719..1cdf7151ce 100644 --- a/src/parcsr_mv/par_csr_communication.h +++ b/src/parcsr_mv/par_csr_communication.h @@ -34,16 +34,10 @@ struct _hypre_ParCSRCommPkg; typedef struct { struct _hypre_ParCSRCommPkg *comm_pkg; - HYPRE_MemoryLocation send_memory_location; - HYPRE_MemoryLocation recv_memory_location; - HYPRE_Int num_send_bytes; - HYPRE_Int num_recv_bytes; - void *send_data; - void *recv_data; - void *send_data_buffer; - void *recv_data_buffer; - HYPRE_Int num_requests; - hypre_MPI_Request *requests; + void *send_data; + void *recv_data; + HYPRE_Int num_requests; + hypre_MPI_Request *requests; } hypre_ParCSRCommHandle; typedef hypre_ParCSRCommHandle hypre_ParCSRPersistentCommHandle; @@ -133,14 +127,8 @@ hypre_ParCSRCommPkgCopySendMapElmtsToDevice(hypre_ParCSRCommPkg *comm_pkg) *--------------------------------------------------------------------------*/ #define hypre_ParCSRCommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) -#define hypre_ParCSRCommHandleSendMemoryLocation(comm_handle) (comm_handle -> send_memory_location) -#define hypre_ParCSRCommHandleRecvMemoryLocation(comm_handle) (comm_handle -> recv_memory_location) -#define hypre_ParCSRCommHandleNumSendBytes(comm_handle) (comm_handle -> num_send_bytes) -#define hypre_ParCSRCommHandleNumRecvBytes(comm_handle) (comm_handle -> num_recv_bytes) #define hypre_ParCSRCommHandleSendData(comm_handle) (comm_handle -> send_data) #define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) -#define hypre_ParCSRCommHandleSendDataBuffer(comm_handle) (comm_handle -> send_data_buffer) -#define hypre_ParCSRCommHandleRecvDataBuffer(comm_handle) (comm_handle -> recv_data_buffer) #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) From 85f274b1170e95b157c776442a598d76b8bef841 Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Mon, 11 Dec 2023 14:34:07 -0800 Subject: [PATCH 13/90] add lower level Tmemcpy --- src/utilities/memory.c | 8 ++++++++ src/utilities/memory.h | 5 +++++ src/utilities/memory_tracker.h | 11 +++++++++++ 3 files changed, 24 insertions(+) diff --git a/src/utilities/memory.c b/src/utilities/memory.c index cc1bb986a4..059b45b575 100644 --- a/src/utilities/memory.c +++ b/src/utilities/memory.c @@ -776,6 +776,14 @@ hypre_Memcpy_core(void *dst, void *src, size_t size, hypre_MemoryLocation loc_ds hypre_WrongMemoryLocation(); } + +void +_hypre_Memcpy(void *dst, void *src, size_t size, hypre_MemoryLocation loc_dst, + hypre_MemoryLocation loc_src) +{ + hypre_Memcpy_core(dst, src, size, loc_dst, loc_src); +} + /*--------------------------------------------------------------------------* * ExecPolicy *--------------------------------------------------------------------------*/ diff --git a/src/utilities/memory.h b/src/utilities/memory.h index 86464d644c..3fe4f282e6 100644 --- a/src/utilities/memory.h +++ b/src/utilities/memory.h @@ -158,6 +158,9 @@ hypre_GetActualMemLocation(HYPRE_MemoryLocation location) #define hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ (hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc)) +#define _hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ +(_hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc)) + #define hypre_TFree(ptr, location) \ ( hypre_Free((void *)ptr, location), ptr = NULL ) @@ -187,6 +190,8 @@ void * hypre_ReAlloc_v2(void *ptr, size_t old_size, size_t new_size, HYPRE_Memor void * _hypre_MAlloc(size_t size, hypre_MemoryLocation location); void _hypre_Free(void *ptr, hypre_MemoryLocation location); +void _hypre_Memcpy(void *dst, void *src, size_t size, hypre_MemoryLocation loc_dst, + hypre_MemoryLocation loc_src); HYPRE_ExecutionPolicy hypre_GetExecPolicy1(HYPRE_MemoryLocation location); HYPRE_ExecutionPolicy hypre_GetExecPolicy2(HYPRE_MemoryLocation location1, diff --git a/src/utilities/memory_tracker.h b/src/utilities/memory_tracker.h index 72eb01469d..ee4abf5dee 100644 --- a/src/utilities/memory_tracker.h +++ b/src/utilities/memory_tracker.h @@ -160,6 +160,17 @@ extern hypre_MemoryTracker *_hypre_memory_tracker; } \ ) +#define _hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ +( \ +{ \ + _hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc); \ + \ + hypre_MemoryTrackerInsert2("memcpy", (void *) (dst), (void *) (src), sizeof(type)*(count), \ + location_dst, location_src, \ + __FILE__, __func__, __LINE__); \ +} \ +) + #define _hypre_TFree(ptr, location) \ ( \ { \ From 48caed5aa28a1523961594c2f5b00484460c63b8 Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Mon, 11 Dec 2023 14:37:17 -0800 Subject: [PATCH 14/90] add actions in mpi_requests --- src/utilities/mpistubs.c | 214 ++++++++++++++++++++++++++++++++++----- src/utilities/mpistubs.h | 17 ++-- 2 files changed, 202 insertions(+), 29 deletions(-) diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index b90798a2fe..33fcf54307 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -779,6 +779,123 @@ hypre_MPI_RequestFromMPI_Request(MPI_Request request) return hrequest; } +HYPRE_Int +hypre_MPI_RequestSetPostActionFree(void *ptr, + hypre_MemoryLocation ptr_location, + hypre_MPI_Request *request) +{ + hypre_TFree(hypre_MPI_RequestPostAction(*request), HYPRE_MEMORY_HOST); + + HYPRE_Int nb = sizeof(HYPRE_Int) + sizeof(void *) + sizeof(hypre_MemoryLocation); + char *data = hypre_CTAlloc(char, nb, HYPRE_MEMORY_HOST); + hypre_MPI_RequestPostAction(*request) = (void *) data; + + HYPRE_Int action_id = HYPRE_MPI_REQUEST_FREE; + + hypre_TMemcpy(data, &action_id, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(HYPRE_Int); + // + hypre_TMemcpy(data, &ptr, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + // + hypre_TMemcpy(data, &ptr_location, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + + return hypre_error_flag; +} + +HYPRE_Int +hypre_MPI_RequestSetPostActionCopy(void *dest, + hypre_MemoryLocation dest_location, + void *src, + hypre_MemoryLocation src_location, + HYPRE_Int num_bytes, + hypre_MPI_Request *request) +{ + hypre_TFree(hypre_MPI_RequestPostAction(*request), HYPRE_MEMORY_HOST); + + HYPRE_Int nb = 2 * (sizeof(HYPRE_Int) + sizeof(void *) + sizeof(hypre_MemoryLocation)); + char * data = hypre_CTAlloc(char, nb, HYPRE_MEMORY_HOST); + hypre_MPI_RequestPostAction(*request) = (void *) data; + + HYPRE_Int action_id = HYPRE_MPI_REQUEST_COPY; + + hypre_TMemcpy(data, &action_id, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(HYPRE_Int); + // + hypre_TMemcpy(data, &num_bytes, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(HYPRE_Int); + // + hypre_TMemcpy(data, &dest, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + // + hypre_TMemcpy(data, &src, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + // + hypre_TMemcpy(data, &dest_location, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + // + hypre_TMemcpy(data, &src_location, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + + return hypre_error_flag; +} + +HYPRE_Int +hypre_MPI_RequestProcessPostAction(hypre_MPI_Request *request) +{ + if (!hypre_MPI_RequestPostAction(*request)) + { + return hypre_error_flag; + } + + char *data = hypre_MPI_RequestPostAction(*request); + HYPRE_Int action_id; + + hypre_TMemcpy(&action_id, data, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(HYPRE_Int); + + if (action_id == HYPRE_MPI_REQUEST_FREE) + { + void *ptr; + hypre_MemoryLocation ptr_location; + + hypre_TMemcpy(&ptr, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + // + hypre_TMemcpy(&ptr_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + // action! + _hypre_TFree(ptr, ptr_location); + } + else if (action_id == HYPRE_MPI_REQUEST_COPY) + { + void *dest, *src; + HYPRE_Int num_bytes; + hypre_MemoryLocation dest_location, src_location; + + hypre_TMemcpy(&num_bytes, data, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(HYPRE_Int); + // + hypre_TMemcpy(&dest, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + // + hypre_TMemcpy(&src, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + // + hypre_TMemcpy(&dest_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + // + hypre_TMemcpy(&src_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + // action! + hypre_GpuProfilingPushRange("MPI-H2D"); + _hypre_TMemcpy(dest, src, char, num_bytes, dest_location, src_location); + hypre_GpuProfilingPopRange(); + _hypre_TFree(src, src_location); + } + + hypre_TFree(hypre_MPI_RequestPostAction(*request), HYPRE_MEMORY_HOST); + + return hypre_error_flag; +} + HYPRE_Int hypre_MPI_Init( hypre_int *argc, char ***argv ) @@ -1161,23 +1278,65 @@ hypre_MPI_Irecv( void *buf, &hypre_MPI_RequestMPI_Request(*request)); } -#define TYPE_MACRO(MPI_CMD, HYPRE_DTYPE, HYPRE_MPI_DTYPE) \ -{ \ - if (datatype == HYPRE_MPI_DTYPE) \ - { \ - HYPRE_Int i; \ - HYPRE_DTYPE *data = (HYPRE_DTYPE *) buf; \ - for (i = 0; i < num; i++) \ - { \ - HYPRE_Int ip = procs[i]; \ - HYPRE_Int start = displs[i]; \ - HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; \ - MPI_CMD(data + start, len, HYPRE_MPI_DTYPE, \ - ip, tag, hypre_MPI_CommMPI_Comm(comm), \ - &hypre_MPI_RequestMPI_Request(requests[i])); \ - } \ - return hypre_error_flag; \ - } \ +#define TYPE_MACRO_SEND 0 +#define TYPE_MACRO_RECV 1 + +#define TYPE_MACRO(MPI_CMD, SEND_RECV, HYPRE_DTYPE, HYPRE_MPI_DTYPE) \ +{ \ + if (datatype == HYPRE_MPI_DTYPE) \ + { \ + HYPRE_Int i; \ + HYPRE_DTYPE *data = (HYPRE_DTYPE *) buf; \ + HYPRE_Int ntot = displs[num]; \ + HYPRE_Int host_buf = memory_location != hypre_MEMORY_HOST; \ + if (host_buf) \ + { \ + data = _hypre_TAlloc(HYPRE_DTYPE, ntot, hypre_MEMORY_HOST); \ + if (SEND_RECV == TYPE_MACRO_SEND) \ + { \ + hypre_GpuProfilingPushRange("MPI-D2H"); \ + _hypre_TMemcpy(data, buf, HYPRE_DTYPE, ntot, \ + hypre_MEMORY_HOST, memory_location); \ + hypre_GpuProfilingPopRange(); \ + } \ + } \ + for (i = 0; i < num; i++) \ + { \ + HYPRE_Int ip = procs[i]; \ + HYPRE_Int start = displs[i]; \ + HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; \ + MPI_CMD(data + start, len, HYPRE_MPI_DTYPE, \ + ip, tag, hypre_MPI_CommMPI_Comm(comm), \ + &hypre_MPI_RequestMPI_Request(requests[i])); \ + } \ + if (host_buf) \ + { \ + /* register post action in the first request */ \ + if (SEND_RECV == TYPE_MACRO_SEND) \ + { \ + if (num) \ + { \ + hypre_MPI_RequestSetPostActionFree(data, \ + hypre_MEMORY_HOST, \ + &requests[0]); \ + } \ + } \ + else if (SEND_RECV == TYPE_MACRO_RECV) \ + { \ + if (num) \ + { \ + HYPRE_Int num_bytes = ntot * sizeof(HYPRE_DTYPE); \ + hypre_MPI_RequestSetPostActionCopy(buf, \ + memory_location, \ + data, \ + hypre_MEMORY_HOST, \ + num_bytes, \ + &requests[0]); \ + } \ + } \ + } \ + return hypre_error_flag; \ + } \ } HYPRE_Int @@ -1191,9 +1350,11 @@ hypre_MPI_Isend_Multiple( void *buf, hypre_MPI_Comm comm, hypre_MPI_Request *requests ) { - TYPE_MACRO(MPI_Isend, HYPRE_Complex, HYPRE_MPI_COMPLEX); - TYPE_MACRO(MPI_Isend, HYPRE_Int, HYPRE_MPI_INT); - TYPE_MACRO(MPI_Isend, HYPRE_BigInt, HYPRE_MPI_BIG_INT); + hypre_MemoryLocation memory_location = hypre_MPI_CommMPI_SendLocation(comm); + + TYPE_MACRO(MPI_Isend, TYPE_MACRO_SEND, HYPRE_Complex, HYPRE_MPI_COMPLEX); + TYPE_MACRO(MPI_Isend, TYPE_MACRO_SEND, HYPRE_Int, HYPRE_MPI_INT); + TYPE_MACRO(MPI_Isend, TYPE_MACRO_SEND, HYPRE_BigInt, HYPRE_MPI_BIG_INT); return hypre_error_flag; } @@ -1209,9 +1370,11 @@ hypre_MPI_Irecv_Multiple( void *buf, hypre_MPI_Comm comm, hypre_MPI_Request *requests ) { - TYPE_MACRO(MPI_Irecv, HYPRE_Complex, HYPRE_MPI_COMPLEX); - TYPE_MACRO(MPI_Irecv, HYPRE_Int, HYPRE_MPI_INT); - TYPE_MACRO(MPI_Irecv, HYPRE_BigInt, HYPRE_MPI_BIG_INT); + hypre_MemoryLocation memory_location = hypre_MPI_CommMPI_RecvLocation(comm); + + TYPE_MACRO(MPI_Irecv, TYPE_MACRO_RECV, HYPRE_Complex, HYPRE_MPI_COMPLEX); + TYPE_MACRO(MPI_Irecv, TYPE_MACRO_RECV, HYPRE_Int, HYPRE_MPI_INT); + TYPE_MACRO(MPI_Irecv, TYPE_MACRO_RECV, HYPRE_BigInt, HYPRE_MPI_BIG_INT); return hypre_error_flag; } @@ -1364,6 +1527,8 @@ hypre_MPI_Waitall( HYPRE_Int count, hypre_MPI_Request *array_of_requests, hypre_MPI_Status *array_of_statuses ) { + hypre_GpuProfilingPushRange("hypre_MPI_Waitall"); + HYPRE_Int i, ierr; MPI_Request *array_of_mpi_requests = hypre_TAlloc(MPI_Request, count, HYPRE_MEMORY_HOST); @@ -1378,10 +1543,13 @@ hypre_MPI_Waitall( HYPRE_Int count, for (i = 0; i < count; i++) { hypre_MPI_RequestMPI_Request(array_of_requests[i]) = array_of_mpi_requests[i]; + hypre_MPI_RequestProcessPostAction(&array_of_requests[i]); } hypre_TFree(array_of_mpi_requests, HYPRE_MEMORY_HOST); + hypre_GpuProfilingPopRange(); + return ierr; } diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index dd1c331624..384e4c3d45 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -206,17 +206,16 @@ typedef struct typedef MPI_Group hypre_MPI_Group; -typedef HYPRE_Int (*hypre_mpi_request_action) (void *); typedef struct { - MPI_Request mpi_request; - hypre_mpi_request_action post_wait_action; - void *post_wait_data; + MPI_Request mpi_request; + void *post_action; } hypre_MPI_Request; +#define HYPRE_MPI_REQUEST_FREE 1 +#define HYPRE_MPI_REQUEST_COPY 2 #define hypre_MPI_RequestMPI_Request(request) ((request).mpi_request) -#define hypre_MPI_RequestPostWaitAction(request) ((request).post_wait_action) -#define hypre_MPI_RequestPostWaitData(request) ((request).post_wait_data) +#define hypre_MPI_RequestPostAction(request) ((request).post_action) typedef MPI_Datatype hypre_MPI_Datatype; typedef MPI_Status hypre_MPI_Status; @@ -364,6 +363,12 @@ HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int comm hypre_MPI_Op *op ); hypre_MPI_Comm hypre_MPI_CommFromMPI_Comm(MPI_Comm comm); hypre_MPI_Request hypre_MPI_RequestFromMPI_Request(MPI_Request request); +HYPRE_Int hypre_MPI_RequestSetPostActionCopy(void *dest, hypre_MemoryLocation dest_location, + void *src, hypre_MemoryLocation src_location, + HYPRE_Int num_bytes, hypre_MPI_Request *request); +HYPRE_Int hypre_MPI_RequestSetPostActionFree(void *ptr, hypre_MemoryLocation ptr_location, + hypre_MPI_Request *request); +HYPRE_Int hypre_MPI_RequestProcessPostAction(hypre_MPI_Request *request); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, MPI_Comm *newcomm); From a2d56d992e100d944422513ffa18d7eb2e22b83e Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Mon, 11 Dec 2023 14:44:36 -0800 Subject: [PATCH 15/90] update util header --- src/utilities/_hypre_utilities.h | 33 ++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index cda12a45a0..987c72eab3 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -707,6 +707,9 @@ hypre_GetActualMemLocation(HYPRE_MemoryLocation location) #define hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ (hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc)) +#define _hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ +(_hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc)) + #define hypre_TFree(ptr, location) \ ( hypre_Free((void *)ptr, location), ptr = NULL ) @@ -736,6 +739,8 @@ void * hypre_ReAlloc_v2(void *ptr, size_t old_size, size_t new_size, HYPRE_Memor void * _hypre_MAlloc(size_t size, hypre_MemoryLocation location); void _hypre_Free(void *ptr, hypre_MemoryLocation location); +void _hypre_Memcpy(void *dst, void *src, size_t size, hypre_MemoryLocation loc_dst, + hypre_MemoryLocation loc_src); HYPRE_ExecutionPolicy hypre_GetExecPolicy1(HYPRE_MemoryLocation location); HYPRE_ExecutionPolicy hypre_GetExecPolicy2(HYPRE_MemoryLocation location1, @@ -933,6 +938,17 @@ extern hypre_MemoryTracker *_hypre_memory_tracker; } \ ) +#define _hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ +( \ +{ \ + _hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc); \ + \ + hypre_MemoryTrackerInsert2("memcpy", (void *) (dst), (void *) (src), sizeof(type)*(count), \ + location_dst, location_src, \ + __FILE__, __func__, __LINE__); \ +} \ +) + #define _hypre_TFree(ptr, location) \ ( \ { \ @@ -1155,17 +1171,16 @@ typedef struct typedef MPI_Group hypre_MPI_Group; -typedef HYPRE_Int (*hypre_mpi_request_action) (void *); typedef struct { - MPI_Request mpi_request; - hypre_mpi_request_action post_wait_action; - void *post_wait_data; + MPI_Request mpi_request; + void *post_action; } hypre_MPI_Request; +#define HYPRE_MPI_REQUEST_FREE 1 +#define HYPRE_MPI_REQUEST_COPY 2 #define hypre_MPI_RequestMPI_Request(request) ((request).mpi_request) -#define hypre_MPI_RequestPostWaitAction(request) ((request).post_wait_action) -#define hypre_MPI_RequestPostWaitData(request) ((request).post_wait_data) +#define hypre_MPI_RequestPostAction(request) ((request).post_action) typedef MPI_Datatype hypre_MPI_Datatype; typedef MPI_Status hypre_MPI_Status; @@ -1313,6 +1328,12 @@ HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int comm hypre_MPI_Op *op ); hypre_MPI_Comm hypre_MPI_CommFromMPI_Comm(MPI_Comm comm); hypre_MPI_Request hypre_MPI_RequestFromMPI_Request(MPI_Request request); +HYPRE_Int hypre_MPI_RequestSetPostActionCopy(void *dest, hypre_MemoryLocation dest_location, + void *src, hypre_MemoryLocation src_location, + HYPRE_Int num_bytes, hypre_MPI_Request *request); +HYPRE_Int hypre_MPI_RequestSetPostActionFree(void *ptr, hypre_MemoryLocation ptr_location, + hypre_MPI_Request *request); +HYPRE_Int hypre_MPI_RequestProcessPostAction(hypre_MPI_Request *request); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, MPI_Comm *newcomm); From 6b87015211728bc252508dc0d18650fbe4c39f7a Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Tue, 12 Dec 2023 12:33:12 -0800 Subject: [PATCH 16/90] make gpu aware mpi runtime option --- src/parcsr_ls/ame.c | 7 ++- src/parcsr_ls/ams.c | 7 ++- src/parcsr_ls/par_2s_interp_device.c | 21 +++++--- src/parcsr_ls/par_coarsen_device.c | 37 ++++++++----- src/parcsr_ls/par_indepset_device.c | 7 +-- src/parcsr_ls/par_interp_device.c | 28 +++++++--- src/parcsr_ls/par_lr_interp_device.c | 14 +++-- src/parcsr_ls/par_lr_restr_device.c | 7 ++- src/parcsr_ls/par_mod_multi_interp_device.c | 35 +++++++++---- src/parcsr_ls/par_strength_device.c | 7 ++- src/parcsr_mv/par_csr_fffc_device.c | 14 +++-- src/parcsr_mv/par_csr_matop_device.c | 21 +++++--- src/parcsr_mv/par_csr_matvec_device.c | 14 +++-- src/parcsr_mv/par_csr_triplemat_device.c | 14 +++-- src/struct_mv/struct_communication.c | 58 +++++++++++---------- src/utilities/HYPRE_handle.c | 8 +++ src/utilities/HYPRE_utilities.h | 1 + src/utilities/device_utils.c | 6 +++ src/utilities/handle.c | 19 +++++++ src/utilities/handle.h | 6 +++ src/utilities/protos.h | 2 + 21 files changed, 235 insertions(+), 98 deletions(-) diff --git a/src/parcsr_ls/ame.c b/src/parcsr_ls/ame.c index e587dac417..8d90f9c8b9 100644 --- a/src/parcsr_ls/ame.c +++ b/src/parcsr_ls/ame.c @@ -501,9 +501,12 @@ HYPRE_Int hypre_AMESetup(void *esolver) int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif } else diff --git a/src/parcsr_ls/ams.c b/src/parcsr_ls/ams.c index 1344392155..1125028e88 100644 --- a/src/parcsr_ls/ams.c +++ b/src/parcsr_ls/ams.c @@ -589,9 +589,12 @@ HYPRE_Int hypre_ParCSRComputeL1Norms(hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif } else diff --git a/src/parcsr_ls/par_2s_interp_device.c b/src/parcsr_ls/par_2s_interp_device.c index 0b6db66e81..680f560c75 100644 --- a/src/parcsr_ls/par_2s_interp_device.c +++ b/src/parcsr_ls/par_2s_interp_device.c @@ -105,9 +105,12 @@ hypre_BoomerAMGBuildModPartialExtInterpDevice( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, @@ -423,9 +426,12 @@ hypre_BoomerAMGBuildModPartialExtPEInterpDevice( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, @@ -445,9 +451,12 @@ hypre_BoomerAMGBuildModPartialExtPEInterpDevice( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, diff --git a/src/parcsr_ls/par_coarsen_device.c b/src/parcsr_ls/par_coarsen_device.c index 4d77d2db79..cc343c72ef 100644 --- a/src/parcsr_ls/par_coarsen_device.c +++ b/src/parcsr_ls/par_coarsen_device.c @@ -170,9 +170,12 @@ hypre_BoomerAMGCoarsenPMISDevice( hypre_ParCSRMatrix *S, (HYPRE_Int *) send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, @@ -255,10 +258,11 @@ hypre_GetGlobalMeasureDevice( hypre_ParCSRMatrix *S, /* compute local column nnz of the offd part */ hypre_CSRMatrixColNNzRealDevice(S_offd, measure_offd); -#if defined(HYPRE_WITH_GPU_AWARE_MPI) - /* RL: make sure measure_offd is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); -#endif + if (hypre_GetGpuAwareMPI()) + { + /* RL: make sure measure_offd is ready before issuing GPU-GPU MPI */ + hypre_ForceSyncComputeStream(hypre_handle()); + } /* send local column nnz of the offd part to neighbors */ comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, HYPRE_MEMORY_DEVICE, measure_offd, @@ -382,9 +386,12 @@ hypre_PMISCoarseningInitDevice( hypre_ParCSRMatrix *S, /* in */ real_send_buf); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure real_send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, @@ -577,9 +584,12 @@ hypre_PMISCoarseningUpdateCFDevice( hypre_ParCSRMatrix *S, /* in real_send_buf); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure real_send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, @@ -598,9 +608,12 @@ hypre_PMISCoarseningUpdateCFDevice( hypre_ParCSRMatrix *S, /* in CF_marker_diag, int_send_buf); -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, diff --git a/src/parcsr_ls/par_indepset_device.c b/src/parcsr_ls/par_indepset_device.c index 1ba2150826..bd7938fcdf 100644 --- a/src/parcsr_ls/par_indepset_device.c +++ b/src/parcsr_ls/par_indepset_device.c @@ -179,10 +179,11 @@ hypre_BoomerAMGIndepSetDevice( hypre_ParCSRMatrix *S, /*-------------------------------------------------------------------- * Exchange boundary data for IS_marker: send external IS to internal *-------------------------------------------------------------------*/ -#if defined(HYPRE_WITH_GPU_AWARE_MPI) /* RL: make sure IS_marker_offd is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); -#endif + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } comm_handle = hypre_ParCSRCommHandleCreate_v2(12, comm_pkg, HYPRE_MEMORY_DEVICE, IS_marker_offd, diff --git a/src/parcsr_ls/par_interp_device.c b/src/parcsr_ls/par_interp_device.c index 13fd7a8b29..416bfb78b9 100644 --- a/src/parcsr_ls/par_interp_device.c +++ b/src/parcsr_ls/par_interp_device.c @@ -163,9 +163,12 @@ hypre_BoomerAMGBuildDirInterpDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data, @@ -195,9 +198,12 @@ hypre_BoomerAMGBuildDirInterpDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data, @@ -1151,9 +1157,12 @@ hypre_BoomerAMGBuildInterpOnePntDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif /* create a handle to start communication. 11: for integer */ @@ -1218,9 +1227,12 @@ hypre_BoomerAMGBuildInterpOnePntDevice( hypre_ParCSRMatrix *A, thrust::plus() ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure big_int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_int_buf_data, diff --git a/src/parcsr_ls/par_lr_interp_device.c b/src/parcsr_ls/par_lr_interp_device.c index bf537faed8..1f1bb04871 100644 --- a/src/parcsr_ls/par_lr_interp_device.c +++ b/src/parcsr_ls/par_lr_interp_device.c @@ -1101,9 +1101,12 @@ hypre_BoomerAMGBuildExtPIInterpDevice( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, @@ -1398,9 +1401,12 @@ hypre_BoomerAMGBuildExtPEInterpDevice(hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, diff --git a/src/parcsr_ls/par_lr_restr_device.c b/src/parcsr_ls/par_lr_restr_device.c index d90712bab6..2c86d6854f 100644 --- a/src/parcsr_ls/par_lr_restr_device.c +++ b/src/parcsr_ls/par_lr_restr_device.c @@ -284,9 +284,12 @@ hypre_BoomerAMGBuildRestrNeumannAIRDevice( hypre_ParCSRMatrix *A, thrust::plus() ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf_i is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg_Z, HYPRE_MEMORY_DEVICE, send_buf_i, diff --git a/src/parcsr_ls/par_mod_multi_interp_device.c b/src/parcsr_ls/par_mod_multi_interp_device.c index 5eead8d5c0..f2a43e7e06 100644 --- a/src/parcsr_ls/par_mod_multi_interp_device.c +++ b/src/parcsr_ls/par_mod_multi_interp_device.c @@ -384,9 +384,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif dof_func_offd = hypre_TAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_DEVICE); @@ -419,9 +422,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif /* allocate one more see comments in hypre_modmp_compute_num_cols_offd_fine_to_coarse */ @@ -573,9 +579,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif /* create a handle to start communication. 11: for integer */ @@ -1098,9 +1107,12 @@ hypre_GenerateMultipassPiDevice( hypre_ParCSRMatrix *A, big_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure big_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_buf_data, @@ -1395,9 +1407,12 @@ hypre_GenerateMultiPiDevice( hypre_ParCSRMatrix *A, big_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure big_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_buf_data, diff --git a/src/parcsr_ls/par_strength_device.c b/src/parcsr_ls/par_strength_device.c index 9d5d2ec140..d28ad3ece5 100644 --- a/src/parcsr_ls/par_strength_device.c +++ b/src/parcsr_ls/par_strength_device.c @@ -138,9 +138,12 @@ hypre_BoomerAMGCreateSDevice(hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data, diff --git a/src/parcsr_mv/par_csr_fffc_device.c b/src/parcsr_mv/par_csr_fffc_device.c index 32e810207a..f36b9f3485 100644 --- a/src/parcsr_mv/par_csr_fffc_device.c +++ b/src/parcsr_mv/par_csr_fffc_device.c @@ -412,9 +412,12 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, @@ -1595,9 +1598,12 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, diff --git a/src/parcsr_mv/par_csr_matop_device.c b/src/parcsr_mv/par_csr_matop_device.c index 428910fec1..d063bfe440 100644 --- a/src/parcsr_mv/par_csr_matop_device.c +++ b/src/parcsr_mv/par_csr_matop_device.c @@ -837,10 +837,11 @@ hypre_ParcsrGetExternalRowsDeviceInit( hypre_ParCSRMatrix *A, NULL, &comm_pkg_j); -#if defined(HYPRE_WITH_GPU_AWARE_MPI) /* RL: make sure d_send_j/d_send_a is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); -#endif + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } /* init communication */ /* ja */ @@ -1488,9 +1489,12 @@ hypre_ParCSRMatrixTransposeDevice( hypre_ParCSRMatrix *A, thrust::plus() ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure A_offdT is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif if (!hypre_ParCSRMatrixCommPkg(A)) @@ -1924,9 +1928,12 @@ hypre_ParCSRMatrixDiagScaleDevice( hypre_ParCSRMatrix *par_A, #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* make sure send_rdbuf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif /* A_diag = diag(ld) * A_diag * diag(rd) */ diff --git a/src/parcsr_mv/par_csr_matvec_device.c b/src/parcsr_mv/par_csr_matvec_device.c index 66bd3229ba..1636ef253a 100644 --- a/src/parcsr_mv/par_csr_matvec_device.c +++ b/src/parcsr_mv/par_csr_matvec_device.c @@ -205,9 +205,12 @@ hypre_ParCSRMatrixMatvecOutOfPlaceDevice( HYPRE_Complex alpha, hypre_profile_times[HYPRE_TIMER_ID_PACK_UNPACK] += hypre_MPI_Wtime(); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure x_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif /* when using GPUs, start local matvec first in order to overlap with communication */ @@ -426,10 +429,11 @@ hypre_ParCSRMatrixMatvecTDevice( HYPRE_Complex alpha, } } -#if defined(HYPRE_WITH_GPU_AWARE_MPI) /* RL: make sure y_tmp is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); -#endif + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } /* when using GPUs, start local matvec first in order to overlap with communication */ if (diagT) diff --git a/src/parcsr_mv/par_csr_triplemat_device.c b/src/parcsr_mv/par_csr_triplemat_device.c index b9677cfc72..13071f0fcd 100644 --- a/src/parcsr_mv/par_csr_triplemat_device.c +++ b/src/parcsr_mv/par_csr_triplemat_device.c @@ -492,9 +492,12 @@ hypre_ParCSRTMatMatKTDevice( hypre_ParCSRMatrix *A, func1 ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure Cint is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif hypre_CSRMatrixData(Cint) = hypre_CSRMatrixData(Cbar) + local_nnz_Cbar; @@ -767,9 +770,12 @@ hypre_ParCSRMatrixRAPKTDevice( hypre_ParCSRMatrix *R, func1 ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure Cint is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif hypre_CSRMatrixData(Cint) = hypre_CSRMatrixData(Cbar) + local_nnz_Cbar; diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index d18c9660ab..617a531f7e 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -962,38 +962,41 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) if (hypre_GetActualMemLocation(memory_location) != hypre_MEMORY_HOST) { -#if defined(HYPRE_WITH_GPU_AWARE_MPI) - hypre_ForceSyncComputeStream(hypre_handle()); - - send_buffers_mpi = send_buffers; - recv_buffers_mpi = recv_buffers; -#else - memory_location_mpi = HYPRE_MEMORY_HOST; + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); - send_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST); - if (num_sends > 0) + send_buffers_mpi = send_buffers; + recv_buffers_mpi = recv_buffers; + } + else { - size = hypre_CommPkgSendBufsize(comm_pkg); - send_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); - for (i = 1; i < num_sends; i++) + memory_location_mpi = HYPRE_MEMORY_HOST; + + send_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST); + if (num_sends > 0) { - send_buffers_mpi[i] = send_buffers_mpi[i - 1] + (send_buffers[i] - send_buffers[i - 1]); + size = hypre_CommPkgSendBufsize(comm_pkg); + send_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); + for (i = 1; i < num_sends; i++) + { + send_buffers_mpi[i] = send_buffers_mpi[i - 1] + (send_buffers[i] - send_buffers[i - 1]); + } + hypre_TMemcpy(send_buffers_mpi[0], send_buffers[0], HYPRE_Complex, size, HYPRE_MEMORY_HOST, + memory_location); } - hypre_TMemcpy(send_buffers_mpi[0], send_buffers[0], HYPRE_Complex, size, HYPRE_MEMORY_HOST, - memory_location); - } - recv_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_recvs, HYPRE_MEMORY_HOST); - if (num_recvs > 0) - { - size = hypre_CommPkgRecvBufsize(comm_pkg); - recv_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); - for (i = 1; i < num_recvs; i++) + recv_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_recvs, HYPRE_MEMORY_HOST); + if (num_recvs > 0) { - recv_buffers_mpi[i] = recv_buffers_mpi[i - 1] + (recv_buffers[i] - recv_buffers[i - 1]); + size = hypre_CommPkgRecvBufsize(comm_pkg); + recv_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); + for (i = 1; i < num_recvs; i++) + { + recv_buffers_mpi[i] = recv_buffers_mpi[i - 1] + (recv_buffers[i] - recv_buffers[i - 1]); + } } } -#endif } else #endif @@ -1151,9 +1154,10 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) HYPRE_MemoryLocation memory_location_mpi = memory_location; #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) -#if !defined(HYPRE_WITH_GPU_AWARE_MPI) - memory_location_mpi = HYPRE_MEMORY_HOST; -#endif + if (!hypre_GetGpuAwareMPI()) + { + memory_location_mpi = HYPRE_MEMORY_HOST; + } #endif /*-------------------------------------------------------------------- diff --git a/src/utilities/HYPRE_handle.c b/src/utilities/HYPRE_handle.c index 9be97512ce..2a12657b68 100644 --- a/src/utilities/HYPRE_handle.c +++ b/src/utilities/HYPRE_handle.c @@ -49,3 +49,11 @@ HYPRE_SetUseGpuRand( HYPRE_Int use_gpu_rand ) return hypre_SetUseGpuRand(use_gpu_rand); } +/*-------------------------------------------------------------------------- + * HYPRE_SetGPUAwareMPI + *--------------------------------------------------------------------------*/ +HYPRE_Int +HYPRE_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ) +{ + return hypre_SetGpuAwareMPI(use_gpu_aware_mpi); +} diff --git a/src/utilities/HYPRE_utilities.h b/src/utilities/HYPRE_utilities.h index 1ce387a26e..a92fa078f1 100644 --- a/src/utilities/HYPRE_utilities.h +++ b/src/utilities/HYPRE_utilities.h @@ -343,6 +343,7 @@ HYPRE_Int HYPRE_SetSpMVUseVendor( HYPRE_Int use_vendor ); #define HYPRE_SetSpGemmUseCusparse(use_vendor) HYPRE_SetSpGemmUseVendor(use_vendor) HYPRE_Int HYPRE_SetSpGemmUseVendor( HYPRE_Int use_vendor ); HYPRE_Int HYPRE_SetUseGpuRand( HYPRE_Int use_curand ); +HYPRE_Int HYPRE_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ); /*-------------------------------------------------------------------------- * Base objects diff --git a/src/utilities/device_utils.c b/src/utilities/device_utils.c index 5a407a075a..be34a112f4 100644 --- a/src/utilities/device_utils.c +++ b/src/utilities/device_utils.c @@ -79,6 +79,12 @@ hypre_DeviceDataCreate() hypre_DeviceDataCubUvmAllocator(data) = NULL; #endif +#if defined(HYPRE_WITH_GPU_AWARE_MPI) + hypre_DeviceDataUseGpuAwareMPI(data) = 1; +#else + hypre_DeviceDataUseGpuAwareMPI(data) = 0; +#endif + return data; } diff --git a/src/utilities/handle.c b/src/utilities/handle.c index 7e69666cb5..d9057ffe23 100644 --- a/src/utilities/handle.c +++ b/src/utilities/handle.c @@ -146,3 +146,22 @@ hypre_SetUserDeviceMfree(GPUMfreeFunc func) #endif return hypre_error_flag; } + +HYPRE_Int +hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ) +{ +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + hypre_HandleUseGpuAwareMPI(hypre_handle()) = use_gpu_aware_mpi; +#endif + return hypre_error_flag; +} + +HYPRE_Int +hypre_GetGpuAwareMPI() +{ +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + return hypre_HandleUseGpuAwareMPI(hypre_handle()); +#else + return 0; +#endif +} diff --git a/src/utilities/handle.h b/src/utilities/handle.h index 44973dedcd..88b16782d6 100644 --- a/src/utilities/handle.h +++ b/src/utilities/handle.h @@ -29,6 +29,11 @@ typedef struct HYPRE_Int struct_comm_recv_buffer_size; HYPRE_Int struct_comm_send_buffer_size; + /* GPU MPI */ +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_Int use_gpu_aware_mpi; +#endif + #if defined(HYPRE_USING_GPU) hypre_DeviceData *device_data; HYPRE_Int device_gs_method; /* device G-S options */ @@ -71,6 +76,7 @@ typedef struct #define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) #define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) +#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) #define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) diff --git a/src/utilities/protos.h b/src/utilities/protos.h index 2e77b7788b..2e1bc44144 100644 --- a/src/utilities/protos.h +++ b/src/utilities/protos.h @@ -389,6 +389,8 @@ HYPRE_Int hypre_SetUseGpuRand( HYPRE_Int use_gpurand ); HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method ); HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func); HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func); +HYPRE_Int hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ); +HYPRE_Int hypre_GetGpuAwareMPI(); /* int_array.c */ hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size ); From dd212da42be4ccbbe7108c4eeebe2c584bee427e Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Tue, 12 Dec 2023 12:33:12 -0800 Subject: [PATCH 17/90] make gpu aware mpi runtime option --- src/parcsr_ls/ame.c | 7 ++- src/parcsr_ls/ams.c | 7 ++- src/parcsr_ls/par_2s_interp_device.c | 21 +++++--- src/parcsr_ls/par_coarsen_device.c | 37 ++++++++----- src/parcsr_ls/par_indepset_device.c | 7 +-- src/parcsr_ls/par_interp_device.c | 28 +++++++--- src/parcsr_ls/par_lr_interp_device.c | 14 +++-- src/parcsr_ls/par_lr_restr_device.c | 7 ++- src/parcsr_ls/par_mod_multi_interp_device.c | 35 +++++++++---- src/parcsr_ls/par_strength_device.c | 7 ++- src/parcsr_mv/par_csr_fffc_device.c | 14 +++-- src/parcsr_mv/par_csr_matop_device.c | 21 +++++--- src/parcsr_mv/par_csr_matvec_device.c | 14 +++-- src/parcsr_mv/par_csr_triplemat_device.c | 14 +++-- src/struct_mv/struct_communication.c | 58 +++++++++++---------- src/utilities/HYPRE_handle.c | 8 +++ src/utilities/HYPRE_utilities.h | 1 + src/utilities/device_utils.c | 6 +++ src/utilities/handle.c | 19 +++++++ src/utilities/handle.h | 6 +++ src/utilities/protos.h | 2 + 21 files changed, 235 insertions(+), 98 deletions(-) diff --git a/src/parcsr_ls/ame.c b/src/parcsr_ls/ame.c index e587dac417..8d90f9c8b9 100644 --- a/src/parcsr_ls/ame.c +++ b/src/parcsr_ls/ame.c @@ -501,9 +501,12 @@ HYPRE_Int hypre_AMESetup(void *esolver) int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif } else diff --git a/src/parcsr_ls/ams.c b/src/parcsr_ls/ams.c index 12594b6559..b40a767271 100644 --- a/src/parcsr_ls/ams.c +++ b/src/parcsr_ls/ams.c @@ -595,9 +595,12 @@ HYPRE_Int hypre_ParCSRComputeL1Norms(hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif } else diff --git a/src/parcsr_ls/par_2s_interp_device.c b/src/parcsr_ls/par_2s_interp_device.c index 0b6db66e81..680f560c75 100644 --- a/src/parcsr_ls/par_2s_interp_device.c +++ b/src/parcsr_ls/par_2s_interp_device.c @@ -105,9 +105,12 @@ hypre_BoomerAMGBuildModPartialExtInterpDevice( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, @@ -423,9 +426,12 @@ hypre_BoomerAMGBuildModPartialExtPEInterpDevice( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, @@ -445,9 +451,12 @@ hypre_BoomerAMGBuildModPartialExtPEInterpDevice( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, diff --git a/src/parcsr_ls/par_coarsen_device.c b/src/parcsr_ls/par_coarsen_device.c index f9b008b114..42253bcd2c 100644 --- a/src/parcsr_ls/par_coarsen_device.c +++ b/src/parcsr_ls/par_coarsen_device.c @@ -169,9 +169,12 @@ hypre_BoomerAMGCoarsenPMISDevice( hypre_ParCSRMatrix *S, (HYPRE_Int *) send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, @@ -254,10 +257,11 @@ hypre_GetGlobalMeasureDevice( hypre_ParCSRMatrix *S, /* compute local column nnz of the offd part */ hypre_CSRMatrixColNNzRealDevice(S_offd, measure_offd); -#if defined(HYPRE_WITH_GPU_AWARE_MPI) - /* RL: make sure measure_offd is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); -#endif + if (hypre_GetGpuAwareMPI()) + { + /* RL: make sure measure_offd is ready before issuing GPU-GPU MPI */ + hypre_ForceSyncComputeStream(hypre_handle()); + } /* send local column nnz of the offd part to neighbors */ comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, HYPRE_MEMORY_DEVICE, measure_offd, @@ -381,9 +385,12 @@ hypre_PMISCoarseningInitDevice( hypre_ParCSRMatrix *S, /* in */ real_send_buf); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure real_send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, @@ -576,9 +583,12 @@ hypre_PMISCoarseningUpdateCFDevice( hypre_ParCSRMatrix *S, /* in real_send_buf); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure real_send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, @@ -597,9 +607,12 @@ hypre_PMISCoarseningUpdateCFDevice( hypre_ParCSRMatrix *S, /* in CF_marker_diag, int_send_buf); -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, diff --git a/src/parcsr_ls/par_indepset_device.c b/src/parcsr_ls/par_indepset_device.c index 1ba2150826..bd7938fcdf 100644 --- a/src/parcsr_ls/par_indepset_device.c +++ b/src/parcsr_ls/par_indepset_device.c @@ -179,10 +179,11 @@ hypre_BoomerAMGIndepSetDevice( hypre_ParCSRMatrix *S, /*-------------------------------------------------------------------- * Exchange boundary data for IS_marker: send external IS to internal *-------------------------------------------------------------------*/ -#if defined(HYPRE_WITH_GPU_AWARE_MPI) /* RL: make sure IS_marker_offd is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); -#endif + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } comm_handle = hypre_ParCSRCommHandleCreate_v2(12, comm_pkg, HYPRE_MEMORY_DEVICE, IS_marker_offd, diff --git a/src/parcsr_ls/par_interp_device.c b/src/parcsr_ls/par_interp_device.c index 67528b6e47..0fa03f5b66 100644 --- a/src/parcsr_ls/par_interp_device.c +++ b/src/parcsr_ls/par_interp_device.c @@ -162,9 +162,12 @@ hypre_BoomerAMGBuildDirInterpDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data, @@ -194,9 +197,12 @@ hypre_BoomerAMGBuildDirInterpDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data, @@ -1149,9 +1155,12 @@ hypre_BoomerAMGBuildInterpOnePntDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif /* create a handle to start communication. 11: for integer */ @@ -1216,9 +1225,12 @@ hypre_BoomerAMGBuildInterpOnePntDevice( hypre_ParCSRMatrix *A, thrust::plus() ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure big_int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_int_buf_data, diff --git a/src/parcsr_ls/par_lr_interp_device.c b/src/parcsr_ls/par_lr_interp_device.c index bf537faed8..1f1bb04871 100644 --- a/src/parcsr_ls/par_lr_interp_device.c +++ b/src/parcsr_ls/par_lr_interp_device.c @@ -1101,9 +1101,12 @@ hypre_BoomerAMGBuildExtPIInterpDevice( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, @@ -1398,9 +1401,12 @@ hypre_BoomerAMGBuildExtPEInterpDevice(hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, diff --git a/src/parcsr_ls/par_lr_restr_device.c b/src/parcsr_ls/par_lr_restr_device.c index 33aae87221..edca5b180f 100644 --- a/src/parcsr_ls/par_lr_restr_device.c +++ b/src/parcsr_ls/par_lr_restr_device.c @@ -283,9 +283,12 @@ hypre_BoomerAMGBuildRestrNeumannAIRDevice( hypre_ParCSRMatrix *A, thrust::plus() ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf_i is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg_Z, HYPRE_MEMORY_DEVICE, send_buf_i, diff --git a/src/parcsr_ls/par_mod_multi_interp_device.c b/src/parcsr_ls/par_mod_multi_interp_device.c index cda32dd3f5..034fbc15c1 100644 --- a/src/parcsr_ls/par_mod_multi_interp_device.c +++ b/src/parcsr_ls/par_mod_multi_interp_device.c @@ -383,9 +383,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif dof_func_offd = hypre_TAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_DEVICE); @@ -418,9 +421,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif /* allocate one more see comments in hypre_modmp_compute_num_cols_offd_fine_to_coarse */ @@ -572,9 +578,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif /* create a handle to start communication. 11: for integer */ @@ -1096,9 +1105,12 @@ hypre_GenerateMultipassPiDevice( hypre_ParCSRMatrix *A, big_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure big_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_buf_data, @@ -1392,9 +1404,12 @@ hypre_GenerateMultiPiDevice( hypre_ParCSRMatrix *A, big_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure big_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_buf_data, diff --git a/src/parcsr_ls/par_strength_device.c b/src/parcsr_ls/par_strength_device.c index 9d5d2ec140..d28ad3ece5 100644 --- a/src/parcsr_ls/par_strength_device.c +++ b/src/parcsr_ls/par_strength_device.c @@ -138,9 +138,12 @@ hypre_BoomerAMGCreateSDevice(hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data, diff --git a/src/parcsr_mv/par_csr_fffc_device.c b/src/parcsr_mv/par_csr_fffc_device.c index dbd7dd133d..5a472b2557 100644 --- a/src/parcsr_mv/par_csr_fffc_device.c +++ b/src/parcsr_mv/par_csr_fffc_device.c @@ -411,9 +411,12 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, @@ -1593,9 +1596,12 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, diff --git a/src/parcsr_mv/par_csr_matop_device.c b/src/parcsr_mv/par_csr_matop_device.c index a97d11cc43..9da1176763 100644 --- a/src/parcsr_mv/par_csr_matop_device.c +++ b/src/parcsr_mv/par_csr_matop_device.c @@ -837,10 +837,11 @@ hypre_ParcsrGetExternalRowsDeviceInit( hypre_ParCSRMatrix *A, NULL, &comm_pkg_j); -#if defined(HYPRE_WITH_GPU_AWARE_MPI) /* RL: make sure d_send_j/d_send_a is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); -#endif + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } /* init communication */ /* ja */ @@ -1491,9 +1492,12 @@ hypre_ParCSRMatrixTransposeDevice( hypre_ParCSRMatrix *A, thrust::plus() ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure A_offdT is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif if (!hypre_ParCSRMatrixCommPkg(A)) @@ -1927,9 +1931,12 @@ hypre_ParCSRMatrixDiagScaleDevice( hypre_ParCSRMatrix *par_A, #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* make sure send_rdbuf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif /* A_diag = diag(ld) * A_diag * diag(rd) */ diff --git a/src/parcsr_mv/par_csr_matvec_device.c b/src/parcsr_mv/par_csr_matvec_device.c index 66bd3229ba..1636ef253a 100644 --- a/src/parcsr_mv/par_csr_matvec_device.c +++ b/src/parcsr_mv/par_csr_matvec_device.c @@ -205,9 +205,12 @@ hypre_ParCSRMatrixMatvecOutOfPlaceDevice( HYPRE_Complex alpha, hypre_profile_times[HYPRE_TIMER_ID_PACK_UNPACK] += hypre_MPI_Wtime(); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure x_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif /* when using GPUs, start local matvec first in order to overlap with communication */ @@ -426,10 +429,11 @@ hypre_ParCSRMatrixMatvecTDevice( HYPRE_Complex alpha, } } -#if defined(HYPRE_WITH_GPU_AWARE_MPI) /* RL: make sure y_tmp is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); -#endif + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } /* when using GPUs, start local matvec first in order to overlap with communication */ if (diagT) diff --git a/src/parcsr_mv/par_csr_triplemat_device.c b/src/parcsr_mv/par_csr_triplemat_device.c index b9677cfc72..13071f0fcd 100644 --- a/src/parcsr_mv/par_csr_triplemat_device.c +++ b/src/parcsr_mv/par_csr_triplemat_device.c @@ -492,9 +492,12 @@ hypre_ParCSRTMatMatKTDevice( hypre_ParCSRMatrix *A, func1 ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure Cint is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif hypre_CSRMatrixData(Cint) = hypre_CSRMatrixData(Cbar) + local_nnz_Cbar; @@ -767,9 +770,12 @@ hypre_ParCSRMatrixRAPKTDevice( hypre_ParCSRMatrix *R, func1 ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure Cint is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif hypre_CSRMatrixData(Cint) = hypre_CSRMatrixData(Cbar) + local_nnz_Cbar; diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index 600df83685..1d2660252e 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -963,38 +963,41 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) if (hypre_GetActualMemLocation(memory_location) != hypre_MEMORY_HOST) { -#if defined(HYPRE_WITH_GPU_AWARE_MPI) - hypre_ForceSyncComputeStream(hypre_handle()); - - send_buffers_mpi = send_buffers; - recv_buffers_mpi = recv_buffers; -#else - memory_location_mpi = HYPRE_MEMORY_HOST; + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); - send_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST); - if (num_sends > 0) + send_buffers_mpi = send_buffers; + recv_buffers_mpi = recv_buffers; + } + else { - size = hypre_CommPkgSendBufsize(comm_pkg); - send_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); - for (i = 1; i < num_sends; i++) + memory_location_mpi = HYPRE_MEMORY_HOST; + + send_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST); + if (num_sends > 0) { - send_buffers_mpi[i] = send_buffers_mpi[i - 1] + (send_buffers[i] - send_buffers[i - 1]); + size = hypre_CommPkgSendBufsize(comm_pkg); + send_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); + for (i = 1; i < num_sends; i++) + { + send_buffers_mpi[i] = send_buffers_mpi[i - 1] + (send_buffers[i] - send_buffers[i - 1]); + } + hypre_TMemcpy(send_buffers_mpi[0], send_buffers[0], HYPRE_Complex, size, HYPRE_MEMORY_HOST, + memory_location); } - hypre_TMemcpy(send_buffers_mpi[0], send_buffers[0], HYPRE_Complex, size, HYPRE_MEMORY_HOST, - memory_location); - } - recv_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_recvs, HYPRE_MEMORY_HOST); - if (num_recvs > 0) - { - size = hypre_CommPkgRecvBufsize(comm_pkg); - recv_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); - for (i = 1; i < num_recvs; i++) + recv_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_recvs, HYPRE_MEMORY_HOST); + if (num_recvs > 0) { - recv_buffers_mpi[i] = recv_buffers_mpi[i - 1] + (recv_buffers[i] - recv_buffers[i - 1]); + size = hypre_CommPkgRecvBufsize(comm_pkg); + recv_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); + for (i = 1; i < num_recvs; i++) + { + recv_buffers_mpi[i] = recv_buffers_mpi[i - 1] + (recv_buffers[i] - recv_buffers[i - 1]); + } } } -#endif } else #endif @@ -1152,9 +1155,10 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) HYPRE_MemoryLocation memory_location_mpi = memory_location; #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) -#if !defined(HYPRE_WITH_GPU_AWARE_MPI) - memory_location_mpi = HYPRE_MEMORY_HOST; -#endif + if (!hypre_GetGpuAwareMPI()) + { + memory_location_mpi = HYPRE_MEMORY_HOST; + } #endif /*-------------------------------------------------------------------- diff --git a/src/utilities/HYPRE_handle.c b/src/utilities/HYPRE_handle.c index 9be97512ce..2a12657b68 100644 --- a/src/utilities/HYPRE_handle.c +++ b/src/utilities/HYPRE_handle.c @@ -49,3 +49,11 @@ HYPRE_SetUseGpuRand( HYPRE_Int use_gpu_rand ) return hypre_SetUseGpuRand(use_gpu_rand); } +/*-------------------------------------------------------------------------- + * HYPRE_SetGPUAwareMPI + *--------------------------------------------------------------------------*/ +HYPRE_Int +HYPRE_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ) +{ + return hypre_SetGpuAwareMPI(use_gpu_aware_mpi); +} diff --git a/src/utilities/HYPRE_utilities.h b/src/utilities/HYPRE_utilities.h index 1ce387a26e..a92fa078f1 100644 --- a/src/utilities/HYPRE_utilities.h +++ b/src/utilities/HYPRE_utilities.h @@ -343,6 +343,7 @@ HYPRE_Int HYPRE_SetSpMVUseVendor( HYPRE_Int use_vendor ); #define HYPRE_SetSpGemmUseCusparse(use_vendor) HYPRE_SetSpGemmUseVendor(use_vendor) HYPRE_Int HYPRE_SetSpGemmUseVendor( HYPRE_Int use_vendor ); HYPRE_Int HYPRE_SetUseGpuRand( HYPRE_Int use_curand ); +HYPRE_Int HYPRE_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ); /*-------------------------------------------------------------------------- * Base objects diff --git a/src/utilities/device_utils.c b/src/utilities/device_utils.c index d223976750..79db698a7a 100644 --- a/src/utilities/device_utils.c +++ b/src/utilities/device_utils.c @@ -79,6 +79,12 @@ hypre_DeviceDataCreate() hypre_DeviceDataCubUvmAllocator(data) = NULL; #endif +#if defined(HYPRE_WITH_GPU_AWARE_MPI) + hypre_DeviceDataUseGpuAwareMPI(data) = 1; +#else + hypre_DeviceDataUseGpuAwareMPI(data) = 0; +#endif + return data; } diff --git a/src/utilities/handle.c b/src/utilities/handle.c index 792a17b034..1dfe61e136 100644 --- a/src/utilities/handle.c +++ b/src/utilities/handle.c @@ -182,3 +182,22 @@ hypre_SetUserDeviceMfree(GPUMfreeFunc func) return hypre_error_flag; } + +HYPRE_Int +hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ) +{ +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + hypre_HandleUseGpuAwareMPI(hypre_handle()) = use_gpu_aware_mpi; +#endif + return hypre_error_flag; +} + +HYPRE_Int +hypre_GetGpuAwareMPI() +{ +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + return hypre_HandleUseGpuAwareMPI(hypre_handle()); +#else + return 0; +#endif +} diff --git a/src/utilities/handle.h b/src/utilities/handle.h index 44973dedcd..88b16782d6 100644 --- a/src/utilities/handle.h +++ b/src/utilities/handle.h @@ -29,6 +29,11 @@ typedef struct HYPRE_Int struct_comm_recv_buffer_size; HYPRE_Int struct_comm_send_buffer_size; + /* GPU MPI */ +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_Int use_gpu_aware_mpi; +#endif + #if defined(HYPRE_USING_GPU) hypre_DeviceData *device_data; HYPRE_Int device_gs_method; /* device G-S options */ @@ -71,6 +76,7 @@ typedef struct #define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) #define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) +#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) #define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) diff --git a/src/utilities/protos.h b/src/utilities/protos.h index 2e77b7788b..2e1bc44144 100644 --- a/src/utilities/protos.h +++ b/src/utilities/protos.h @@ -389,6 +389,8 @@ HYPRE_Int hypre_SetUseGpuRand( HYPRE_Int use_gpurand ); HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method ); HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func); HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func); +HYPRE_Int hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ); +HYPRE_Int hypre_GetGpuAwareMPI(); /* int_array.c */ hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size ); From 9a7dc2fee02c4600601d665ad99269dfffc07389 Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Tue, 12 Dec 2023 12:58:08 -0800 Subject: [PATCH 18/90] bug fix and add test options --- src/test/ij.c | 9 +++++++++ src/test/sstruct.c | 9 +++++++++ src/test/struct.c | 8 ++++++++ src/utilities/device_utils.c | 6 ------ src/utilities/general.c | 6 ++++++ 5 files changed, 32 insertions(+), 6 deletions(-) diff --git a/src/test/ij.c b/src/test/ij.c index 3ac0feafc8..1b8613c110 100644 --- a/src/test/ij.c +++ b/src/test/ij.c @@ -499,6 +499,8 @@ main( hypre_int argc, char mem_tracker_name[HYPRE_MAX_FILE_NAME_LEN] = {0}; #endif + HYPRE_Int gpu_aware_mpi = 0; + /* Initialize MPI */ hypre_MPI_Init(&argc, &argv); @@ -1597,6 +1599,11 @@ main( hypre_int argc, snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]); } #endif + else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 ) + { + arg_index++; + gpu_aware_mpi = atoi(argv[arg_index++]); + } else { arg_index++; @@ -2716,6 +2723,8 @@ main( hypre_int argc, HYPRE_SetUseGpuRand(use_curand); #endif + HYPRE_SetGpuAwareMPI(gpu_aware_mpi); + /*----------------------------------------------------------- * Set up matrix *-----------------------------------------------------------*/ diff --git a/src/test/sstruct.c b/src/test/sstruct.c index 2f5282423e..d1ad10963a 100644 --- a/src/test/sstruct.c +++ b/src/test/sstruct.c @@ -2460,6 +2460,8 @@ main( hypre_int argc, global_data.memory_location = memory_location; + HYPRE_Int gpu_aware_mpi = 0; + /*----------------------------------------------------------- * Initialize some stuff *-----------------------------------------------------------*/ @@ -2902,6 +2904,11 @@ main( hypre_int argc, snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]); } #endif + else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 ) + { + arg_index++; + gpu_aware_mpi = atoi(argv[arg_index++]); + } else { arg_index++; @@ -2923,6 +2930,8 @@ main( hypre_int argc, HYPRE_SetSpGemmUseVendor(spgemm_use_vendor); #endif + HYPRE_SetGpuAwareMPI(gpu_aware_mpi); + if ( solver_id == 39 && lobpcgFlag ) { solver_id = 10; diff --git a/src/test/struct.c b/src/test/struct.c index 41d2b84854..c31c8db29c 100644 --- a/src/test/struct.c +++ b/src/test/struct.c @@ -188,6 +188,7 @@ main( hypre_int argc, HYPRE_MemoryLocation memory_location = HYPRE_MEMORY_DEVICE; HYPRE_ExecutionPolicy default_exec_policy = HYPRE_EXEC_DEVICE; #endif + HYPRE_Int gpu_aware_mpi = 0; //HYPRE_Int device_level = -2; @@ -565,6 +566,11 @@ main( hypre_int argc, snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]); } #endif + else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 ) + { + arg_index++; + gpu_aware_mpi = atoi(argv[arg_index++]); + } /* end lobpcg */ else { @@ -583,6 +589,8 @@ main( hypre_int argc, /* default execution policy */ HYPRE_SetExecutionPolicy(default_exec_policy); + HYPRE_SetGpuAwareMPI(gpu_aware_mpi); + /* begin lobpcg */ if ( solver_id == 0 && lobpcgFlag ) diff --git a/src/utilities/device_utils.c b/src/utilities/device_utils.c index be34a112f4..5a407a075a 100644 --- a/src/utilities/device_utils.c +++ b/src/utilities/device_utils.c @@ -79,12 +79,6 @@ hypre_DeviceDataCreate() hypre_DeviceDataCubUvmAllocator(data) = NULL; #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) - hypre_DeviceDataUseGpuAwareMPI(data) = 1; -#else - hypre_DeviceDataUseGpuAwareMPI(data) = 0; -#endif - return data; } diff --git a/src/utilities/general.c b/src/utilities/general.c index 5947eaed3f..632a7098a7 100644 --- a/src/utilities/general.c +++ b/src/utilities/general.c @@ -44,6 +44,12 @@ hypre_HandleCreate(void) hypre_HandleDeviceGSMethod(hypre_handle_) = 1; /* CPU: 0; Cusparse: 1 */ #endif +#if defined(HYPRE_WITH_GPU_AWARE_MPI) + hypre_HandleUseGpuAwareMPI(hypre_handle_) = 1; +#else + hypre_HandleUseGpuAwareMPI(hypre_handle_) = 0; +#endif + return hypre_handle_; } From d69a8a88dcb368bdab4cbd018885a513a638d219 Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Tue, 12 Dec 2023 12:58:08 -0800 Subject: [PATCH 19/90] bug fix and add test options --- src/test/ij.c | 9 +++++++++ src/test/sstruct.c | 9 +++++++++ src/test/struct.c | 8 ++++++++ src/utilities/device_utils.c | 6 ------ src/utilities/general.c | 6 ++++++ 5 files changed, 32 insertions(+), 6 deletions(-) diff --git a/src/test/ij.c b/src/test/ij.c index 5e5fe3842c..0ee26e3f7d 100644 --- a/src/test/ij.c +++ b/src/test/ij.c @@ -499,6 +499,8 @@ main( hypre_int argc, char mem_tracker_name[HYPRE_MAX_FILE_NAME_LEN] = {0}; #endif + HYPRE_Int gpu_aware_mpi = 0; + /* Initialize MPI */ hypre_MPI_Init(&argc, &argv); @@ -1597,6 +1599,11 @@ main( hypre_int argc, snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]); } #endif + else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 ) + { + arg_index++; + gpu_aware_mpi = atoi(argv[arg_index++]); + } else { arg_index++; @@ -2716,6 +2723,8 @@ main( hypre_int argc, HYPRE_SetUseGpuRand(use_curand); #endif + HYPRE_SetGpuAwareMPI(gpu_aware_mpi); + /*----------------------------------------------------------- * Set up matrix *-----------------------------------------------------------*/ diff --git a/src/test/sstruct.c b/src/test/sstruct.c index fc7c186d57..542c45206f 100644 --- a/src/test/sstruct.c +++ b/src/test/sstruct.c @@ -2460,6 +2460,8 @@ main( hypre_int argc, global_data.memory_location = memory_location; + HYPRE_Int gpu_aware_mpi = 0; + /*----------------------------------------------------------- * Initialize some stuff *-----------------------------------------------------------*/ @@ -2903,6 +2905,11 @@ main( hypre_int argc, snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]); } #endif + else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 ) + { + arg_index++; + gpu_aware_mpi = atoi(argv[arg_index++]); + } else { arg_index++; @@ -2924,6 +2931,8 @@ main( hypre_int argc, HYPRE_SetSpGemmUseVendor(spgemm_use_vendor); #endif + HYPRE_SetGpuAwareMPI(gpu_aware_mpi); + if ( solver_id == 39 && lobpcgFlag ) { solver_id = 10; diff --git a/src/test/struct.c b/src/test/struct.c index 4295f4aa0a..a192142884 100644 --- a/src/test/struct.c +++ b/src/test/struct.c @@ -188,6 +188,7 @@ main( hypre_int argc, HYPRE_MemoryLocation memory_location = HYPRE_MEMORY_DEVICE; HYPRE_ExecutionPolicy default_exec_policy = HYPRE_EXEC_DEVICE; #endif + HYPRE_Int gpu_aware_mpi = 0; //HYPRE_Int device_level = -2; @@ -566,6 +567,11 @@ main( hypre_int argc, snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]); } #endif + else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 ) + { + arg_index++; + gpu_aware_mpi = atoi(argv[arg_index++]); + } /* end lobpcg */ else { @@ -584,6 +590,8 @@ main( hypre_int argc, /* default execution policy */ HYPRE_SetExecutionPolicy(default_exec_policy); + HYPRE_SetGpuAwareMPI(gpu_aware_mpi); + /* begin lobpcg */ if ( solver_id == 0 && lobpcgFlag ) diff --git a/src/utilities/device_utils.c b/src/utilities/device_utils.c index 79db698a7a..d223976750 100644 --- a/src/utilities/device_utils.c +++ b/src/utilities/device_utils.c @@ -79,12 +79,6 @@ hypre_DeviceDataCreate() hypre_DeviceDataCubUvmAllocator(data) = NULL; #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) - hypre_DeviceDataUseGpuAwareMPI(data) = 1; -#else - hypre_DeviceDataUseGpuAwareMPI(data) = 0; -#endif - return data; } diff --git a/src/utilities/general.c b/src/utilities/general.c index fd1795fb66..764a98cdaa 100644 --- a/src/utilities/general.c +++ b/src/utilities/general.c @@ -44,6 +44,12 @@ hypre_HandleCreate(void) hypre_HandleDeviceGSMethod(hypre_handle_) = 1; /* CPU: 0; Cusparse: 1 */ #endif +#if defined(HYPRE_WITH_GPU_AWARE_MPI) + hypre_HandleUseGpuAwareMPI(hypre_handle_) = 1; +#else + hypre_HandleUseGpuAwareMPI(hypre_handle_) = 0; +#endif + return hypre_handle_; } From 43f4ca8cf8db49e6da7c03b3c555917588812566 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Tue, 12 Dec 2023 12:59:55 -0800 Subject: [PATCH 20/90] run header --- src/utilities/_hypre_utilities.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 513623a8db..d0edeff2ca 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1779,6 +1779,11 @@ typedef struct HYPRE_Int struct_comm_recv_buffer_size; HYPRE_Int struct_comm_send_buffer_size; + /* GPU MPI */ +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_Int use_gpu_aware_mpi; +#endif + #if defined(HYPRE_USING_GPU) hypre_DeviceData *device_data; HYPRE_Int device_gs_method; /* device G-S options */ @@ -1821,6 +1826,7 @@ typedef struct #define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) #define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) +#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) #define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) @@ -2373,6 +2379,8 @@ HYPRE_Int hypre_SetUseGpuRand( HYPRE_Int use_gpurand ); HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method ); HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func); HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func); +HYPRE_Int hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ); +HYPRE_Int hypre_GetGpuAwareMPI(); /* int_array.c */ hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size ); From 98260a1fa55c60f8c796047e21367108eedca6ba Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Tue, 12 Dec 2023 13:27:05 -0800 Subject: [PATCH 21/90] Update src/utilities/handle.c Co-authored-by: Victor A. P. Magri <50467563+victorapm@users.noreply.github.com> --- src/utilities/handle.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/utilities/handle.c b/src/utilities/handle.c index 1dfe61e136..edb6019ecc 100644 --- a/src/utilities/handle.c +++ b/src/utilities/handle.c @@ -188,6 +188,8 @@ hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ) { #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) hypre_HandleUseGpuAwareMPI(hypre_handle()) = use_gpu_aware_mpi; +#else + HYPRE_UNUSED_VAR(use_gpu_aware_mpi); #endif return hypre_error_flag; } From ed55acc82564bd4fa38eaa5c0932948f49cdb00f Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Tue, 12 Dec 2023 13:27:38 -0800 Subject: [PATCH 22/90] bug fix --- src/utilities/general.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/utilities/general.c b/src/utilities/general.c index 764a98cdaa..16491b04c5 100644 --- a/src/utilities/general.c +++ b/src/utilities/general.c @@ -44,10 +44,12 @@ hypre_HandleCreate(void) hypre_HandleDeviceGSMethod(hypre_handle_) = 1; /* CPU: 0; Cusparse: 1 */ #endif +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) #if defined(HYPRE_WITH_GPU_AWARE_MPI) hypre_HandleUseGpuAwareMPI(hypre_handle_) = 1; #else hypre_HandleUseGpuAwareMPI(hypre_handle_) = 0; +#endif #endif return hypre_handle_; From 655aac938a55c14ba7f3b8c3b0981aa56494d0c7 Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Tue, 12 Dec 2023 14:00:36 -0800 Subject: [PATCH 23/90] run header --- src/utilities/_hypre_utilities.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 987c72eab3..a91a56dab7 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1818,6 +1818,11 @@ typedef struct HYPRE_Int struct_comm_recv_buffer_size; HYPRE_Int struct_comm_send_buffer_size; + /* GPU MPI */ +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_Int use_gpu_aware_mpi; +#endif + #if defined(HYPRE_USING_GPU) hypre_DeviceData *device_data; HYPRE_Int device_gs_method; /* device G-S options */ @@ -1860,6 +1865,7 @@ typedef struct #define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) #define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) +#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) #define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) @@ -2412,6 +2418,8 @@ HYPRE_Int hypre_SetUseGpuRand( HYPRE_Int use_gpurand ); HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method ); HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func); HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func); +HYPRE_Int hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ); +HYPRE_Int hypre_GetGpuAwareMPI(); /* int_array.c */ hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size ); From 3897f3325be7f54a21f69f897450474b2d4bedab Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Tue, 12 Dec 2023 14:01:26 -0800 Subject: [PATCH 24/90] add gpu_mpi to the macro --- src/utilities/mpistubs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 33fcf54307..56df2e3f57 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -1287,8 +1287,9 @@ hypre_MPI_Irecv( void *buf, { \ HYPRE_Int i; \ HYPRE_DTYPE *data = (HYPRE_DTYPE *) buf; \ - HYPRE_Int ntot = displs[num]; \ - HYPRE_Int host_buf = memory_location != hypre_MEMORY_HOST; \ + HYPRE_Int ntot = displs[num]; \ + HYPRE_Int gpu_mpi = hypre_GetGpuAwareMPI(); \ + HYPRE_Int host_buf = !gpu_mpi && memory_location != hypre_MEMORY_HOST; \ if (host_buf) \ { \ data = _hypre_TAlloc(HYPRE_DTYPE, ntot, hypre_MEMORY_HOST); \ From 5e7373bbe3487b313b55bf918f1331e95a1ad664 Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Tue, 12 Dec 2023 20:02:04 -0800 Subject: [PATCH 25/90] fix compile errors --- src/utilities/_hypre_utilities.h | 2 +- src/utilities/handle.c | 2 +- src/utilities/protos.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index d0edeff2ca..faa40bedcf 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -2380,7 +2380,7 @@ HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method ); HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func); HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func); HYPRE_Int hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ); -HYPRE_Int hypre_GetGpuAwareMPI(); +HYPRE_Int hypre_GetGpuAwareMPI(void); /* int_array.c */ hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size ); diff --git a/src/utilities/handle.c b/src/utilities/handle.c index edb6019ecc..faeb9f48c8 100644 --- a/src/utilities/handle.c +++ b/src/utilities/handle.c @@ -195,7 +195,7 @@ hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ) } HYPRE_Int -hypre_GetGpuAwareMPI() +hypre_GetGpuAwareMPI(void) { #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) return hypre_HandleUseGpuAwareMPI(hypre_handle()); diff --git a/src/utilities/protos.h b/src/utilities/protos.h index 2e1bc44144..33414521e2 100644 --- a/src/utilities/protos.h +++ b/src/utilities/protos.h @@ -390,7 +390,7 @@ HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method ); HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func); HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func); HYPRE_Int hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ); -HYPRE_Int hypre_GetGpuAwareMPI(); +HYPRE_Int hypre_GetGpuAwareMPI(void); /* int_array.c */ hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size ); From 4f2e63fbf8e8a94a9ec1ff862b118234cc5c7663 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Tue, 12 Dec 2023 21:17:53 -0800 Subject: [PATCH 26/90] update headers --- src/utilities/_hypre_utilities.h | 4 ++++ src/utilities/mpistubs.h | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index c94b1ce47c..818fdd2588 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1354,6 +1354,10 @@ HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); #ifdef __cplusplus } #endif diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 384e4c3d45..74ffdd803f 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -379,6 +379,10 @@ HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); #ifdef __cplusplus } #endif From 01b2abfdd7ee7a919d50a1dc4b08119941c2ce62 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Tue, 12 Dec 2023 21:21:44 -0800 Subject: [PATCH 27/90] run header --- src/utilities/_hypre_utilities.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 452e100286..b7614a0b11 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -2432,11 +2432,7 @@ HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method ); HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func); HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func); HYPRE_Int hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ); -<<<<<<< HEAD -HYPRE_Int hypre_GetGpuAwareMPI(); -======= HYPRE_Int hypre_GetGpuAwareMPI(void); ->>>>>>> 5e7373bbe3487b313b55bf918f1331e95a1ad664 /* int_array.c */ hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size ); From 4845d70b414ef30f42bf759cda354f2dee004d7f Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Thu, 14 Dec 2023 09:04:22 -0800 Subject: [PATCH 28/90] persistent mpi --- src/parcsr_ls/par_relax.c | 10 +- src/parcsr_mv/_hypre_parcsr_mv.h | 179 ++++++++--- src/parcsr_mv/par_csr_communication.c | 447 +++++++++----------------- src/parcsr_mv/par_csr_communication.h | 159 ++++++--- src/parcsr_mv/par_csr_matop.c | 18 +- src/parcsr_mv/par_csr_matvec.c | 34 +- src/parcsr_mv/protos.h | 20 +- src/utilities/_hypre_utilities.h | 36 ++- src/utilities/mpistubs.c | 261 +++++++++------ src/utilities/mpistubs.h | 36 ++- 10 files changed, 657 insertions(+), 543 deletions(-) diff --git a/src/parcsr_ls/par_relax.c b/src/parcsr_ls/par_relax.c index e41f10fdaf..7817470bf7 100644 --- a/src/parcsr_ls/par_relax.c +++ b/src/parcsr_ls/par_relax.c @@ -789,9 +789,9 @@ hypre_BoomerAMGRelaxHybridGaussSeidel_core( hypre_ParCSRMatrix *A, num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); #if defined(HYPRE_USING_PERSISTENT_COMM) - persistent_comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(1, comm_pkg); - v_buf_data = (HYPRE_Real *) hypre_ParCSRCommHandleSendDataBuffer(persistent_comm_handle); - v_ext_data = (HYPRE_Real *) hypre_ParCSRCommHandleRecvDataBuffer(persistent_comm_handle); + persistent_comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(1, comm_pkg, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + v_buf_data = (HYPRE_Real *) hypre_ParCSRCommHandleSendData(persistent_comm_handle); + v_ext_data = (HYPRE_Real *) hypre_ParCSRCommHandleRecvData(persistent_comm_handle); #else v_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), @@ -815,13 +815,13 @@ hypre_BoomerAMGRelaxHybridGaussSeidel_core( hypre_ParCSRMatrix *A, #endif #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleStart(persistent_comm_handle, HYPRE_MEMORY_HOST, v_buf_data); + hypre_ParCSRPersistentCommHandleStart(persistent_comm_handle); #else comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg, v_buf_data, v_ext_data); #endif #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleWait(persistent_comm_handle, HYPRE_MEMORY_HOST, v_ext_data); + hypre_ParCSRPersistentCommHandleWait(persistent_comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index 2e7986f5ae..8a37970081 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -28,7 +28,6 @@ extern "C" { * Structure containing information for doing communications *--------------------------------------------------------------------------*/ -#ifdef HYPRE_USING_PERSISTENT_COMM typedef enum CommPkgJobType { HYPRE_COMM_PKG_JOB_COMPLEX = 0, @@ -37,9 +36,89 @@ typedef enum CommPkgJobType HYPRE_COMM_PKG_JOB_INT_TRANSPOSE, HYPRE_COMM_PKG_JOB_BIGINT, HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE, - NUM_OF_COMM_PKG_JOB_TYPE, + NUM_OF_COMM_PKG_JOB_TYPE } CommPkgJobType; -#endif + +static inline CommPkgJobType +hypre_ParCSRCommHandleGetJobType(HYPRE_Int job) +{ + CommPkgJobType job_type = HYPRE_COMM_PKG_JOB_COMPLEX; + switch (job) + { + case 1: + job_type = HYPRE_COMM_PKG_JOB_COMPLEX; + break; + case 2: + job_type = HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE; + break; + case 11: + job_type = HYPRE_COMM_PKG_JOB_INT; + break; + case 12: + job_type = HYPRE_COMM_PKG_JOB_INT_TRANSPOSE; + break; + case 21: + job_type = HYPRE_COMM_PKG_JOB_BIGINT; + break; + case 22: + job_type = HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE; + break; + } + + return job_type; +} + +static inline hypre_MPI_Datatype +hypre_ParCSRCommHandleGetMPIDataType(HYPRE_Int job) +{ + hypre_MPI_Datatype dtype = HYPRE_MPI_COMPLEX; + + switch (hypre_ParCSRCommHandleGetJobType(job)) + { + case HYPRE_COMM_PKG_JOB_COMPLEX: + case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: + dtype = HYPRE_MPI_COMPLEX; + break; + case HYPRE_COMM_PKG_JOB_INT: + case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: + dtype = HYPRE_MPI_INT; + break; + case HYPRE_COMM_PKG_JOB_BIGINT: + case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: + dtype = HYPRE_MPI_BIG_INT; + break; + default: + break; + } + + return dtype; +} + +static inline HYPRE_Int +hypre_ParCSRCommHandleGetDataTypeSize(HYPRE_Int job) +{ + HYPRE_Int nbytes = 0; + + switch (hypre_ParCSRCommHandleGetJobType(job)) + { + case HYPRE_COMM_PKG_JOB_COMPLEX: + case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: + nbytes = (HYPRE_Int) sizeof(HYPRE_Complex); + break; + case HYPRE_COMM_PKG_JOB_INT: + case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: + nbytes = (HYPRE_Int) sizeof(HYPRE_Int); + break; + case HYPRE_COMM_PKG_JOB_BIGINT: + case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: + nbytes = (HYPRE_Int) sizeof(HYPRE_BigInt); + break; + default: + break; + } + + return nbytes; +} /*-------------------------------------------------------------------------- * hypre_ParCSRCommHandle, hypre_ParCSRPersistentCommHandle @@ -55,6 +134,17 @@ typedef struct hypre_MPI_Request *requests; } hypre_ParCSRCommHandle; +/*-------------------------------------------------------------------------- + * Accessor macros: hypre_ParCSRCommHandle + *--------------------------------------------------------------------------*/ + +#define hypre_ParCSRCommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) +#define hypre_ParCSRCommHandleSendData(comm_handle) (comm_handle -> send_data) +#define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) +#define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) +#define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) +#define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) + typedef hypre_ParCSRCommHandle hypre_ParCSRPersistentCommHandle; typedef struct _hypre_ParCSRCommPkg @@ -72,8 +162,8 @@ typedef struct _hypre_ParCSRCommPkg /* remote communication information */ hypre_MPI_Datatype *send_mpi_types; hypre_MPI_Datatype *recv_mpi_types; -#ifdef HYPRE_USING_PERSISTENT_COMM - hypre_ParCSRPersistentCommHandle *persistent_comm_handles[NUM_OF_COMM_PKG_JOB_TYPE]; +#if defined(HYPRE_USING_PERSISTENT_COMM) + hypre_ParCSRCommHandle *persistent_comm_handles[NUM_OF_COMM_PKG_JOB_TYPE]; #endif #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) /* temporary memory for matvec. cudaMalloc is expensive. alloc once and reuse */ @@ -87,31 +177,33 @@ typedef struct _hypre_ParCSRCommPkg * Accessor macros: hypre_ParCSRCommPkg *--------------------------------------------------------------------------*/ -#define hypre_ParCSRCommPkgComm(comm_pkg) (comm_pkg -> comm) -#define hypre_ParCSRCommPkgNumComponents(comm_pkg) (comm_pkg -> num_components) -#define hypre_ParCSRCommPkgNumSends(comm_pkg) (comm_pkg -> num_sends) -#define hypre_ParCSRCommPkgSendProcs(comm_pkg) (comm_pkg -> send_procs) -#define hypre_ParCSRCommPkgSendProc(comm_pkg, i) (comm_pkg -> send_procs[i]) -#define hypre_ParCSRCommPkgSendMapStarts(comm_pkg) (comm_pkg -> send_map_starts) -#define hypre_ParCSRCommPkgSendMapStart(comm_pkg,i) (comm_pkg -> send_map_starts[i]) -#define hypre_ParCSRCommPkgSendMapElmts(comm_pkg) (comm_pkg -> send_map_elmts) -#define hypre_ParCSRCommPkgDeviceSendMapElmts(comm_pkg) (comm_pkg -> device_send_map_elmts) -#define hypre_ParCSRCommPkgSendMapElmt(comm_pkg,i) (comm_pkg -> send_map_elmts[i]) -#define hypre_ParCSRCommPkgDeviceSendMapElmt(comm_pkg,i) (comm_pkg -> device_send_map_elmts[i]) -#define hypre_ParCSRCommPkgNumRecvs(comm_pkg) (comm_pkg -> num_recvs) -#define hypre_ParCSRCommPkgRecvProcs(comm_pkg) (comm_pkg -> recv_procs) -#define hypre_ParCSRCommPkgRecvProc(comm_pkg, i) (comm_pkg -> recv_procs[i]) -#define hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) (comm_pkg -> recv_vec_starts) -#define hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i) (comm_pkg -> recv_vec_starts[i]) -#define hypre_ParCSRCommPkgSendMPITypes(comm_pkg) (comm_pkg -> send_mpi_types) -#define hypre_ParCSRCommPkgSendMPIType(comm_pkg,i) (comm_pkg -> send_mpi_types[i]) -#define hypre_ParCSRCommPkgRecvMPITypes(comm_pkg) (comm_pkg -> recv_mpi_types) -#define hypre_ParCSRCommPkgRecvMPIType(comm_pkg,i) (comm_pkg -> recv_mpi_types[i]) +#define hypre_ParCSRCommPkgComm(comm_pkg) (comm_pkg -> comm) +#define hypre_ParCSRCommPkgNumComponents(comm_pkg) (comm_pkg -> num_components) +#define hypre_ParCSRCommPkgNumSends(comm_pkg) (comm_pkg -> num_sends) +#define hypre_ParCSRCommPkgSendProcs(comm_pkg) (comm_pkg -> send_procs) +#define hypre_ParCSRCommPkgSendProc(comm_pkg, i) (comm_pkg -> send_procs[i]) +#define hypre_ParCSRCommPkgSendMapStarts(comm_pkg) (comm_pkg -> send_map_starts) +#define hypre_ParCSRCommPkgSendMapStart(comm_pkg,i) (comm_pkg -> send_map_starts[i]) +#define hypre_ParCSRCommPkgSendMapElmts(comm_pkg) (comm_pkg -> send_map_elmts) +#define hypre_ParCSRCommPkgDeviceSendMapElmts(comm_pkg) (comm_pkg -> device_send_map_elmts) +#define hypre_ParCSRCommPkgSendMapElmt(comm_pkg,i) (comm_pkg -> send_map_elmts[i]) +#define hypre_ParCSRCommPkgDeviceSendMapElmt(comm_pkg,i) (comm_pkg -> device_send_map_elmts[i]) +#define hypre_ParCSRCommPkgNumRecvs(comm_pkg) (comm_pkg -> num_recvs) +#define hypre_ParCSRCommPkgRecvProcs(comm_pkg) (comm_pkg -> recv_procs) +#define hypre_ParCSRCommPkgRecvProc(comm_pkg, i) (comm_pkg -> recv_procs[i]) +#define hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) (comm_pkg -> recv_vec_starts) +#define hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i) (comm_pkg -> recv_vec_starts[i]) +#define hypre_ParCSRCommPkgSendMPITypes(comm_pkg) (comm_pkg -> send_mpi_types) +#define hypre_ParCSRCommPkgSendMPIType(comm_pkg,i) (comm_pkg -> send_mpi_types[i]) +#define hypre_ParCSRCommPkgRecvMPITypes(comm_pkg) (comm_pkg -> recv_mpi_types) +#define hypre_ParCSRCommPkgRecvMPIType(comm_pkg,i) (comm_pkg -> recv_mpi_types[i]) +#define hypre_ParCSRCommPkgPersistentCommHandles(comm_pkg) (comm_pkg -> persistent_comm_handles) +#define hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg,i) (comm_pkg -> persistent_comm_handles[i]) #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) -#define hypre_ParCSRCommPkgTmpData(comm_pkg) ((comm_pkg) -> tmp_data) -#define hypre_ParCSRCommPkgBufData(comm_pkg) ((comm_pkg) -> buf_data) -#define hypre_ParCSRCommPkgMatrixE(comm_pkg) ((comm_pkg) -> matrix_E) +#define hypre_ParCSRCommPkgTmpData(comm_pkg) ((comm_pkg) -> tmp_data) +#define hypre_ParCSRCommPkgBufData(comm_pkg) ((comm_pkg) -> buf_data) +#define hypre_ParCSRCommPkgMatrixE(comm_pkg) ((comm_pkg) -> matrix_E) #endif static inline void @@ -139,17 +231,6 @@ hypre_ParCSRCommPkgCopySendMapElmtsToDevice(hypre_ParCSRCommPkg *comm_pkg) #endif } -/*-------------------------------------------------------------------------- - * Accessor macros: hypre_ParCSRCommHandle - *--------------------------------------------------------------------------*/ - -#define hypre_ParCSRCommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) -#define hypre_ParCSRCommHandleSendData(comm_handle) (comm_handle -> send_data) -#define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) -#define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) -#define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) -#define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) - #endif /* HYPRE_PAR_CSR_COMMUNICATION_HEADER */ /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other @@ -1008,15 +1089,17 @@ HYPRE_Int hypre_ParCSRMatrixDropSmallEntriesDevice( hypre_ParCSRMatrix *A, HYPRE HYPRE_Int hypre_ParCSRCommPkgCreateMatrixE( hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int local_ncols ); #ifdef HYPRE_USING_PERSISTENT_COMM -hypre_ParCSRPersistentCommHandle* hypre_ParCSRPersistentCommHandleCreate(HYPRE_Int job, - hypre_ParCSRCommPkg *comm_pkg); -hypre_ParCSRPersistentCommHandle* hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_Int job, - hypre_ParCSRCommPkg *comm_pkg); -void hypre_ParCSRPersistentCommHandleDestroy(hypre_ParCSRPersistentCommHandle *comm_handle); -void hypre_ParCSRPersistentCommHandleStart(hypre_ParCSRPersistentCommHandle *comm_handle, - HYPRE_MemoryLocation send_memory_location, void *send_data); -void hypre_ParCSRPersistentCommHandleWait(hypre_ParCSRPersistentCommHandle *comm_handle, - HYPRE_MemoryLocation recv_memory_location, void *recv_data); +hypre_ParCSRCommHandle* hypre_ParCSRPersistentCommHandleCreate(HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location); +hypre_ParCSRCommHandle* hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location); +HYPRE_Int hypre_ParCSRPersistentCommHandleDestroy(hypre_ParCSRCommHandle *comm_handle); +HYPRE_Int hypre_ParCSRPersistentCommHandleStart(hypre_ParCSRCommHandle *comm_handle); +HYPRE_Int hypre_ParCSRPersistentCommHandleWait(hypre_ParCSRCommHandle *comm_handle); #endif HYPRE_Int hypre_ParcsrGetExternalRowsInit( hypre_ParCSRMatrix *A, HYPRE_Int indices_len, diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 1eb126646d..6a7aebb57e 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -7,35 +7,60 @@ #include "_hypre_parcsr_mv.h" -/*==========================================================================*/ +/*--------------------------------------------------------------------------------------- + * hypre_ParCSR(Persistent)CommHandleCreate sets up a communication handle, + * posts receives and initiates sends. It always requires num_sends, + * num_recvs, recv_procs and send_procs to be set in comm_pkg. + * There are different options for job: + * job = 1 : is used to initialize communication exchange for the parts + * of vector needed to perform a Matvec, it requires send_data + * and recv_data to be doubles, recv_vec_starts and + * send_map_starts need to be set in comm_pkg. + * job = 2 : is used to initialize communication exchange for the parts + * of vector needed to perform a MatvecT, it requires send_data + * and recv_data to be doubles, recv_vec_starts and + * send_map_starts need to be set in comm_pkg. + * job = 11: similar to job = 1, but exchanges data of type HYPRE_Int (not HYPRE_Complex), + * requires send_data and recv_data to be ints + * recv_vec_starts and send_map_starts need to be set in comm_pkg. + * job = 12: similar to job = 2, but exchanges data of type HYPRE_Int (not HYPRE_Complex), + * requires send_data and recv_data to be ints + * recv_vec_starts and send_map_starts need to be set in comm_pkg. + * job = 21: similar to job = 1, but exchanges data of type HYPRE_BigInt (not HYPRE_Complex), + * requires send_data and recv_data to be ints + * recv_vec_starts and send_map_starts need to be set in comm_pkg. + * job = 22: similar to job = 2, but exchanges data of type HYPRE_BigInt (not HYPRE_Complex), + * requires send_data and recv_data to be ints + * recv_vec_starts and send_map_starts need to be set in comm_pkg. + * default: ignores send_data and recv_data, requires send_mpi_types + * and recv_mpi_types to be set in comm_pkg. + * datatypes need to point to absolute + * addresses, e.g. generated using hypre_MPI_Address . + *-------------------------------------------------------------------------------------*/ #ifdef HYPRE_USING_PERSISTENT_COMM -static CommPkgJobType getJobTypeOf(HYPRE_Int job) + +HYPRE_Int +hypre_ParCSRPersistentCommHandleCreateBuffer( HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location, + HYPRE_Int num_send_elems, + HYPRE_Int num_recv_elems, + HYPRE_Int size_of_elem, + hypre_ParCSRCommHandle *comm_handle ) { - CommPkgJobType job_type = HYPRE_COMM_PKG_JOB_COMPLEX; - switch (job) + if (!hypre_ParCSRCommHandleSendData(comm_handle)) { - case 1: - job_type = HYPRE_COMM_PKG_JOB_COMPLEX; - break; - case 2: - job_type = HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE; - break; - case 11: - job_type = HYPRE_COMM_PKG_JOB_INT; - break; - case 12: - job_type = HYPRE_COMM_PKG_JOB_INT_TRANSPOSE; - break; - case 21: - job_type = HYPRE_COMM_PKG_JOB_BIGINT; - break; - case 22: - job_type = HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE; - break; - } // switch (job) + hypre_ParCSRCommHandleSendData(comm_handle) = hypre_TAlloc(char, num_send_elems * size_of_elem, + send_memory_location); + } + + if (!hypre_ParCSRCommHandleRecvData(comm_handle)) + { + hypre_ParCSRCommHandleRecvData(comm_handle) = hypre_TAlloc(char, num_recv_elems * size_of_elem, + recv_memory_location); + } - return job_type; + return hypre_error_flag; } /*------------------------------------------------------------------ @@ -45,195 +70,81 @@ static CommPkgJobType getJobTypeOf(HYPRE_Int job) * allocated and CommHandle owns the buffer *------------------------------------------------------------------*/ -hypre_ParCSRPersistentCommHandle* -hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg ) +hypre_ParCSRCommHandle* +hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location ) { - HYPRE_Int i; - size_t num_bytes_send, num_bytes_recv; - - hypre_ParCSRPersistentCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRPersistentCommHandle, 1, - HYPRE_MEMORY_HOST); - - CommPkgJobType job_type = getJobTypeOf(job); - - HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); - MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); - - HYPRE_Int num_requests = num_sends + num_recvs; - hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); - - hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; - hypre_ParCSRCommHandleRequests(comm_handle) = requests; - - void *send_buff = NULL, *recv_buff = NULL; - - switch (job_type) + HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); + HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); + MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + HYPRE_Int num_requests = num_sends + num_recvs; + hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); + hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); + hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); + + switch (hypre_ParCSRCommHandleGetJobType(job)) { case HYPRE_COMM_PKG_JOB_COMPLEX: - num_bytes_send = sizeof(HYPRE_Complex) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - num_bytes_recv = sizeof(HYPRE_Complex) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - send_buff = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - HYPRE_MEMORY_HOST); - recv_buff = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - HYPRE_MEMORY_HOST); - for (i = 0; i < num_recvs; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Recv_init( (HYPRE_Complex *)recv_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX, - ip, 0, hcomm, requests + i ); - } - for (i = 0; i < num_sends; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Send_init( (HYPRE_Complex *)send_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX, - ip, 0, hcomm, requests + num_recvs + i ); - } - break; - - case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: - num_bytes_recv = sizeof(HYPRE_Complex) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - num_bytes_send = sizeof(HYPRE_Complex) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - recv_buff = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - HYPRE_MEMORY_HOST); - send_buff = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - HYPRE_MEMORY_HOST); - for (i = 0; i < num_sends; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Recv_init( (HYPRE_Complex *)recv_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX, - ip, 0, hcomm, requests + i ); - } - for (i = 0; i < num_recvs; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Send_init( (HYPRE_Complex *)send_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX, - ip, 0, hcomm, requests + num_sends + i ); - } - break; - case HYPRE_COMM_PKG_JOB_INT: - num_bytes_send = sizeof(HYPRE_Int) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - num_bytes_recv = sizeof(HYPRE_Int) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - send_buff = hypre_TAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - HYPRE_MEMORY_HOST); - recv_buff = hypre_TAlloc(HYPRE_Int, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - HYPRE_MEMORY_HOST); - for (i = 0; i < num_recvs; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Recv_init( (HYPRE_Int *)recv_buff + vec_start, vec_len, HYPRE_MPI_INT, - ip, 0, hcomm, requests + i ); - } - for (i = 0; i < num_sends; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Send_init( (HYPRE_Int *)send_buff + vec_start, vec_len, HYPRE_MPI_INT, - ip, 0, hcomm, requests + num_recvs + i ); - } - break; - - case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: - num_bytes_recv = sizeof(HYPRE_Int) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - num_bytes_send = sizeof(HYPRE_Int) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - recv_buff = hypre_TAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - HYPRE_MEMORY_HOST); - send_buff = hypre_TAlloc(HYPRE_Int, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - HYPRE_MEMORY_HOST); - for (i = 0; i < num_sends; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Recv_init( (HYPRE_Int *)recv_buff + vec_start, vec_len, HYPRE_MPI_INT, - ip, 0, hcomm, requests + i ); - } - for (i = 0; i < num_recvs; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Send_init( (HYPRE_Int *)send_buff + vec_start, vec_len, HYPRE_MPI_INT, - ip, 0, hcomm, requests + num_sends + i ); - } - break; - case HYPRE_COMM_PKG_JOB_BIGINT: - num_bytes_send = sizeof(HYPRE_BigInt) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - num_bytes_recv = sizeof(HYPRE_BigInt) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - send_buff = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - HYPRE_MEMORY_HOST); - recv_buff = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - HYPRE_MEMORY_HOST); - for (i = 0; i < num_recvs; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Recv_init( (HYPRE_BigInt *)recv_buff + (HYPRE_BigInt)vec_start, vec_len, - HYPRE_MPI_BIG_INT, - ip, 0, hcomm, requests + i ); - } - for (i = 0; i < num_sends; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Send_init( (HYPRE_BigInt *)send_buff + (HYPRE_BigInt)vec_start, vec_len, - HYPRE_MPI_BIG_INT, - ip, 0, hcomm, requests + num_recvs + i); - } + { + hypre_ParCSRPersistentCommHandleCreateBuffer(send_memory_location, recv_memory_location, + hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), + hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), + hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); + + hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), + num_recvs, + hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), + NULL, mpi_dtype, + hypre_ParCSRCommPkgRecvProcs(comm_pkg), + 0, hcomm, requests); + + hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), + num_sends, + hypre_ParCSRCommPkgSendMapStarts(comm_pkg), + NULL, mpi_dtype, + hypre_ParCSRCommPkgSendProcs(comm_pkg), + 0, hcomm, requests + num_recvs); + break; + } + case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: + case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: - num_bytes_recv = sizeof(HYPRE_BigInt) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - num_bytes_send = sizeof(HYPRE_BigInt) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - recv_buff = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - HYPRE_MEMORY_HOST); - send_buff = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - HYPRE_MEMORY_HOST); - for (i = 0; i < num_sends; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Recv_init( (HYPRE_BigInt *)recv_buff + (HYPRE_BigInt)vec_start, vec_len, - HYPRE_MPI_BIG_INT, - ip, 0, hcomm, requests + i ); - } - for (i = 0; i < num_recvs; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; + { + hypre_ParCSRPersistentCommHandleCreateBuffer(send_memory_location, recv_memory_location, + hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), + hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), + hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); + + hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), + num_sends, + hypre_ParCSRCommPkgSendMapStarts(comm_pkg), + NULL, mpi_dtype, + hypre_ParCSRCommPkgSendProcs(comm_pkg), + 0, hcomm, requests); + + hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), + num_recvs, + hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), + NULL, mpi_dtype, + hypre_ParCSRCommPkgRecvProcs(comm_pkg), + 0, hcomm, requests + num_sends); - hypre_MPI_Send_init( (HYPRE_BigInt *)send_buff + (HYPRE_BigInt)vec_start, vec_len, - HYPRE_MPI_BIG_INT, - ip, 0, hcomm, requests + num_sends + i); - } break; - default: - hypre_assert(1 == 0); + + default: break; - } // switch (job_type) + } + } - hypre_ParCSRCommHandleRecvDataBuffer(comm_handle) = recv_buff; - hypre_ParCSRCommHandleSendDataBuffer(comm_handle) = send_buff; - hypre_ParCSRCommHandleNumSendBytes(comm_handle) = num_bytes_send; - hypre_ParCSRCommHandleNumRecvBytes(comm_handle) = num_bytes_recv; + hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; + hypre_ParCSRCommHandleRequests(comm_handle) = requests; return ( comm_handle ); } @@ -242,58 +153,49 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm * hypre_ParCSRCommPkgGetPersistentCommHandle *------------------------------------------------------------------*/ -hypre_ParCSRPersistentCommHandle* -hypre_ParCSRCommPkgGetPersistentCommHandle( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg ) +hypre_ParCSRCommHandle* +hypre_ParCSRCommPkgGetPersistentCommHandle( HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location ) { - CommPkgJobType type = getJobTypeOf(job); - if (!comm_pkg->persistent_comm_handles[type]) + CommPkgJobType type = hypre_ParCSRCommHandleGetJobType(job); + + if (!hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg, type)) { /* data is owned by persistent comm handle */ - comm_pkg->persistent_comm_handles[type] = - hypre_ParCSRPersistentCommHandleCreate(job, comm_pkg); + hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg, type) = + hypre_ParCSRPersistentCommHandleCreate(job, comm_pkg, send_memory_location, recv_memory_location); } - return comm_pkg->persistent_comm_handles[type]; + return hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg, type); } /*------------------------------------------------------------------ * hypre_ParCSRPersistentCommHandleDestroy *------------------------------------------------------------------*/ -void -hypre_ParCSRPersistentCommHandleDestroy( hypre_ParCSRPersistentCommHandle *comm_handle ) +HYPRE_Int +hypre_ParCSRPersistentCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) { if (comm_handle) { - hypre_TFree(hypre_ParCSRCommHandleSendDataBuffer(comm_handle), HYPRE_MEMORY_HOST); - hypre_TFree(hypre_ParCSRCommHandleRecvDataBuffer(comm_handle), HYPRE_MEMORY_HOST); - hypre_TFree(comm_handle->requests, HYPRE_MEMORY_HOST); - + hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); } + + return hypre_error_flag; } /*------------------------------------------------------------------ * hypre_ParCSRPersistentCommHandleStart *------------------------------------------------------------------*/ -void -hypre_ParCSRPersistentCommHandleStart( hypre_ParCSRPersistentCommHandle *comm_handle, - HYPRE_MemoryLocation send_memory_location, - void *send_data ) +HYPRE_Int +hypre_ParCSRPersistentCommHandleStart( hypre_ParCSRCommHandle *comm_handle ) { - hypre_ParCSRCommHandleSendData(comm_handle) = send_data; - hypre_ParCSRCommHandleSendMemoryLocation(comm_handle) = send_memory_location; - if (hypre_ParCSRCommHandleNumRequests(comm_handle) > 0) { - hypre_TMemcpy( hypre_ParCSRCommHandleSendDataBuffer(comm_handle), - send_data, - char, - hypre_ParCSRCommHandleNumSendBytes(comm_handle), - HYPRE_MEMORY_HOST, - send_memory_location ); - HYPRE_Int ret = hypre_MPI_Startall(hypre_ParCSRCommHandleNumRequests(comm_handle), hypre_ParCSRCommHandleRequests(comm_handle)); if (hypre_MPI_SUCCESS != ret) @@ -302,20 +204,17 @@ hypre_ParCSRPersistentCommHandleStart( hypre_ParCSRPersistentCommHandle *comm_ha /*hypre_printf("MPI error %d in %s (%s, line %u)\n", ret, __FUNCTION__, __FILE__, __LINE__);*/ } } + + return hypre_error_flag; } /*------------------------------------------------------------------ * hypre_ParCSRPersistentCommHandleWait *------------------------------------------------------------------*/ -void -hypre_ParCSRPersistentCommHandleWait( hypre_ParCSRPersistentCommHandle *comm_handle, - HYPRE_MemoryLocation recv_memory_location, - void *recv_data ) +HYPRE_Int +hypre_ParCSRPersistentCommHandleWait( hypre_ParCSRCommHandle *comm_handle ) { - hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data; - hypre_ParCSRCommHandleRecvMemoryLocation(comm_handle) = recv_memory_location; - if (hypre_ParCSRCommHandleNumRequests(comm_handle) > 0) { HYPRE_Int ret = hypre_MPI_Waitall(hypre_ParCSRCommHandleNumRequests(comm_handle), @@ -326,14 +225,9 @@ hypre_ParCSRPersistentCommHandleWait( hypre_ParCSRPersistentCommHandle *comm_han hypre_error_w_msg(HYPRE_ERROR_GENERIC, "MPI error\n"); /*hypre_printf("MPI error %d in %s (%s, line %u)\n", ret, __FUNCTION__, __FILE__, __LINE__);*/ } - - hypre_TMemcpy(recv_data, - hypre_ParCSRCommHandleRecvDataBuffer(comm_handle), - char, - hypre_ParCSRCommHandleNumRecvBytes(comm_handle), - recv_memory_location, - HYPRE_MEMORY_HOST); } + + return hypre_error_flag; } #endif // HYPRE_USING_PERSISTENT_COMM @@ -365,57 +259,23 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, { hypre_GpuProfilingPushRange("hypre_ParCSRCommHandleCreate_v2"); - HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); - MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); - hypre_ParCSRCommHandle *comm_handle; - HYPRE_Int num_requests = num_sends + num_recvs; - hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); + HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); + HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); + MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg); + hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + HYPRE_Int num_requests = num_sends + num_recvs; + hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); + hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); + hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); hypre_MPI_CommMPI_SendLocation(hcomm) = hypre_GetActualMemLocation(send_memory_location); hypre_MPI_CommMPI_RecvLocation(hcomm) = hypre_GetActualMemLocation(recv_memory_location); - hypre_MPI_Datatype mpi_dtype = job <= 2 ? HYPRE_MPI_COMPLEX : - job <= 12 ? HYPRE_MPI_INT : - HYPRE_MPI_BIG_INT; - - /*-------------------------------------------------------------------- - * hypre_Initialize sets up a communication handle, - * posts receives and initiates sends. It always requires num_sends, - * num_recvs, recv_procs and send_procs to be set in comm_pkg. - * There are different options for job: - * job = 1 : is used to initialize communication exchange for the parts - * of vector needed to perform a Matvec, it requires send_data - * and recv_data to be doubles, recv_vec_starts and - * send_map_starts need to be set in comm_pkg. - * job = 2 : is used to initialize communication exchange for the parts - * of vector needed to perform a MatvecT, it requires send_data - * and recv_data to be doubles, recv_vec_starts and - * send_map_starts need to be set in comm_pkg. - * job = 11: similar to job = 1, but exchanges data of type HYPRE_Int (not HYPRE_Complex), - * requires send_data and recv_data to be ints - * recv_vec_starts and send_map_starts need to be set in comm_pkg. - * job = 12: similar to job = 2, but exchanges data of type HYPRE_Int (not HYPRE_Complex), - * requires send_data and recv_data to be ints - * recv_vec_starts and send_map_starts need to be set in comm_pkg. - * job = 21: similar to job = 1, but exchanges data of type HYPRE_BigInt (not HYPRE_Complex), - * requires send_data and recv_data to be ints - * recv_vec_starts and send_map_starts need to be set in comm_pkg. - * job = 22: similar to job = 2, but exchanges data of type HYPRE_BigInt (not HYPRE_Complex), - * requires send_data and recv_data to be ints - * recv_vec_starts and send_map_starts need to be set in comm_pkg. - * default: ignores send_data and recv_data, requires send_mpi_types - * and recv_mpi_types to be set in comm_pkg. - * datatypes need to point to absolute - * addresses, e.g. generated using hypre_MPI_Address . - *--------------------------------------------------------------------*/ - - switch (job) + switch (hypre_ParCSRCommHandleGetJobType(job)) { - case 1: - case 11: - case 21: + case HYPRE_COMM_PKG_JOB_COMPLEX: + case HYPRE_COMM_PKG_JOB_INT: + case HYPRE_COMM_PKG_JOB_BIGINT: { hypre_MPI_Irecv_Multiple(recv_data, num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), @@ -432,9 +292,9 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, break; } - case 2: - case 12: - case 22: + case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: + case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: + case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: { hypre_MPI_Irecv_Multiple(recv_data, num_sends, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), @@ -449,14 +309,15 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, 0, hcomm, requests + num_sends); break; + + default: + break; } } /*-------------------------------------------------------------------- * set up comm_handle and return *--------------------------------------------------------------------*/ - comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); - hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; hypre_ParCSRCommHandleSendData(comm_handle) = send_data; hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data; diff --git a/src/parcsr_mv/par_csr_communication.h b/src/parcsr_mv/par_csr_communication.h index 6dd979d252..fb03f5de3b 100644 --- a/src/parcsr_mv/par_csr_communication.h +++ b/src/parcsr_mv/par_csr_communication.h @@ -13,7 +13,6 @@ * Structure containing information for doing communications *--------------------------------------------------------------------------*/ -#ifdef HYPRE_USING_PERSISTENT_COMM typedef enum CommPkgJobType { HYPRE_COMM_PKG_JOB_COMPLEX = 0, @@ -22,9 +21,89 @@ typedef enum CommPkgJobType HYPRE_COMM_PKG_JOB_INT_TRANSPOSE, HYPRE_COMM_PKG_JOB_BIGINT, HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE, - NUM_OF_COMM_PKG_JOB_TYPE, + NUM_OF_COMM_PKG_JOB_TYPE } CommPkgJobType; -#endif + +static inline CommPkgJobType +hypre_ParCSRCommHandleGetJobType(HYPRE_Int job) +{ + CommPkgJobType job_type = HYPRE_COMM_PKG_JOB_COMPLEX; + switch (job) + { + case 1: + job_type = HYPRE_COMM_PKG_JOB_COMPLEX; + break; + case 2: + job_type = HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE; + break; + case 11: + job_type = HYPRE_COMM_PKG_JOB_INT; + break; + case 12: + job_type = HYPRE_COMM_PKG_JOB_INT_TRANSPOSE; + break; + case 21: + job_type = HYPRE_COMM_PKG_JOB_BIGINT; + break; + case 22: + job_type = HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE; + break; + } + + return job_type; +} + +static inline hypre_MPI_Datatype +hypre_ParCSRCommHandleGetMPIDataType(HYPRE_Int job) +{ + hypre_MPI_Datatype dtype = HYPRE_MPI_COMPLEX; + + switch (hypre_ParCSRCommHandleGetJobType(job)) + { + case HYPRE_COMM_PKG_JOB_COMPLEX: + case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: + dtype = HYPRE_MPI_COMPLEX; + break; + case HYPRE_COMM_PKG_JOB_INT: + case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: + dtype = HYPRE_MPI_INT; + break; + case HYPRE_COMM_PKG_JOB_BIGINT: + case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: + dtype = HYPRE_MPI_BIG_INT; + break; + default: + break; + } + + return dtype; +} + +static inline HYPRE_Int +hypre_ParCSRCommHandleGetDataTypeSize(HYPRE_Int job) +{ + HYPRE_Int nbytes = 0; + + switch (hypre_ParCSRCommHandleGetJobType(job)) + { + case HYPRE_COMM_PKG_JOB_COMPLEX: + case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: + nbytes = (HYPRE_Int) sizeof(HYPRE_Complex); + break; + case HYPRE_COMM_PKG_JOB_INT: + case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: + nbytes = (HYPRE_Int) sizeof(HYPRE_Int); + break; + case HYPRE_COMM_PKG_JOB_BIGINT: + case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: + nbytes = (HYPRE_Int) sizeof(HYPRE_BigInt); + break; + default: + break; + } + + return nbytes; +} /*-------------------------------------------------------------------------- * hypre_ParCSRCommHandle, hypre_ParCSRPersistentCommHandle @@ -40,6 +119,17 @@ typedef struct hypre_MPI_Request *requests; } hypre_ParCSRCommHandle; +/*-------------------------------------------------------------------------- + * Accessor macros: hypre_ParCSRCommHandle + *--------------------------------------------------------------------------*/ + +#define hypre_ParCSRCommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) +#define hypre_ParCSRCommHandleSendData(comm_handle) (comm_handle -> send_data) +#define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) +#define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) +#define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) +#define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) + typedef hypre_ParCSRCommHandle hypre_ParCSRPersistentCommHandle; typedef struct _hypre_ParCSRCommPkg @@ -57,8 +147,8 @@ typedef struct _hypre_ParCSRCommPkg /* remote communication information */ hypre_MPI_Datatype *send_mpi_types; hypre_MPI_Datatype *recv_mpi_types; -#ifdef HYPRE_USING_PERSISTENT_COMM - hypre_ParCSRPersistentCommHandle *persistent_comm_handles[NUM_OF_COMM_PKG_JOB_TYPE]; +#if defined(HYPRE_USING_PERSISTENT_COMM) + hypre_ParCSRCommHandle *persistent_comm_handles[NUM_OF_COMM_PKG_JOB_TYPE]; #endif #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) /* temporary memory for matvec. cudaMalloc is expensive. alloc once and reuse */ @@ -72,31 +162,33 @@ typedef struct _hypre_ParCSRCommPkg * Accessor macros: hypre_ParCSRCommPkg *--------------------------------------------------------------------------*/ -#define hypre_ParCSRCommPkgComm(comm_pkg) (comm_pkg -> comm) -#define hypre_ParCSRCommPkgNumComponents(comm_pkg) (comm_pkg -> num_components) -#define hypre_ParCSRCommPkgNumSends(comm_pkg) (comm_pkg -> num_sends) -#define hypre_ParCSRCommPkgSendProcs(comm_pkg) (comm_pkg -> send_procs) -#define hypre_ParCSRCommPkgSendProc(comm_pkg, i) (comm_pkg -> send_procs[i]) -#define hypre_ParCSRCommPkgSendMapStarts(comm_pkg) (comm_pkg -> send_map_starts) -#define hypre_ParCSRCommPkgSendMapStart(comm_pkg,i) (comm_pkg -> send_map_starts[i]) -#define hypre_ParCSRCommPkgSendMapElmts(comm_pkg) (comm_pkg -> send_map_elmts) -#define hypre_ParCSRCommPkgDeviceSendMapElmts(comm_pkg) (comm_pkg -> device_send_map_elmts) -#define hypre_ParCSRCommPkgSendMapElmt(comm_pkg,i) (comm_pkg -> send_map_elmts[i]) -#define hypre_ParCSRCommPkgDeviceSendMapElmt(comm_pkg,i) (comm_pkg -> device_send_map_elmts[i]) -#define hypre_ParCSRCommPkgNumRecvs(comm_pkg) (comm_pkg -> num_recvs) -#define hypre_ParCSRCommPkgRecvProcs(comm_pkg) (comm_pkg -> recv_procs) -#define hypre_ParCSRCommPkgRecvProc(comm_pkg, i) (comm_pkg -> recv_procs[i]) -#define hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) (comm_pkg -> recv_vec_starts) -#define hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i) (comm_pkg -> recv_vec_starts[i]) -#define hypre_ParCSRCommPkgSendMPITypes(comm_pkg) (comm_pkg -> send_mpi_types) -#define hypre_ParCSRCommPkgSendMPIType(comm_pkg,i) (comm_pkg -> send_mpi_types[i]) -#define hypre_ParCSRCommPkgRecvMPITypes(comm_pkg) (comm_pkg -> recv_mpi_types) -#define hypre_ParCSRCommPkgRecvMPIType(comm_pkg,i) (comm_pkg -> recv_mpi_types[i]) +#define hypre_ParCSRCommPkgComm(comm_pkg) (comm_pkg -> comm) +#define hypre_ParCSRCommPkgNumComponents(comm_pkg) (comm_pkg -> num_components) +#define hypre_ParCSRCommPkgNumSends(comm_pkg) (comm_pkg -> num_sends) +#define hypre_ParCSRCommPkgSendProcs(comm_pkg) (comm_pkg -> send_procs) +#define hypre_ParCSRCommPkgSendProc(comm_pkg, i) (comm_pkg -> send_procs[i]) +#define hypre_ParCSRCommPkgSendMapStarts(comm_pkg) (comm_pkg -> send_map_starts) +#define hypre_ParCSRCommPkgSendMapStart(comm_pkg,i) (comm_pkg -> send_map_starts[i]) +#define hypre_ParCSRCommPkgSendMapElmts(comm_pkg) (comm_pkg -> send_map_elmts) +#define hypre_ParCSRCommPkgDeviceSendMapElmts(comm_pkg) (comm_pkg -> device_send_map_elmts) +#define hypre_ParCSRCommPkgSendMapElmt(comm_pkg,i) (comm_pkg -> send_map_elmts[i]) +#define hypre_ParCSRCommPkgDeviceSendMapElmt(comm_pkg,i) (comm_pkg -> device_send_map_elmts[i]) +#define hypre_ParCSRCommPkgNumRecvs(comm_pkg) (comm_pkg -> num_recvs) +#define hypre_ParCSRCommPkgRecvProcs(comm_pkg) (comm_pkg -> recv_procs) +#define hypre_ParCSRCommPkgRecvProc(comm_pkg, i) (comm_pkg -> recv_procs[i]) +#define hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) (comm_pkg -> recv_vec_starts) +#define hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i) (comm_pkg -> recv_vec_starts[i]) +#define hypre_ParCSRCommPkgSendMPITypes(comm_pkg) (comm_pkg -> send_mpi_types) +#define hypre_ParCSRCommPkgSendMPIType(comm_pkg,i) (comm_pkg -> send_mpi_types[i]) +#define hypre_ParCSRCommPkgRecvMPITypes(comm_pkg) (comm_pkg -> recv_mpi_types) +#define hypre_ParCSRCommPkgRecvMPIType(comm_pkg,i) (comm_pkg -> recv_mpi_types[i]) +#define hypre_ParCSRCommPkgPersistentCommHandles(comm_pkg) (comm_pkg -> persistent_comm_handles) +#define hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg,i) (comm_pkg -> persistent_comm_handles[i]) #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) -#define hypre_ParCSRCommPkgTmpData(comm_pkg) ((comm_pkg) -> tmp_data) -#define hypre_ParCSRCommPkgBufData(comm_pkg) ((comm_pkg) -> buf_data) -#define hypre_ParCSRCommPkgMatrixE(comm_pkg) ((comm_pkg) -> matrix_E) +#define hypre_ParCSRCommPkgTmpData(comm_pkg) ((comm_pkg) -> tmp_data) +#define hypre_ParCSRCommPkgBufData(comm_pkg) ((comm_pkg) -> buf_data) +#define hypre_ParCSRCommPkgMatrixE(comm_pkg) ((comm_pkg) -> matrix_E) #endif static inline void @@ -124,15 +216,4 @@ hypre_ParCSRCommPkgCopySendMapElmtsToDevice(hypre_ParCSRCommPkg *comm_pkg) #endif } -/*-------------------------------------------------------------------------- - * Accessor macros: hypre_ParCSRCommHandle - *--------------------------------------------------------------------------*/ - -#define hypre_ParCSRCommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) -#define hypre_ParCSRCommHandleSendData(comm_handle) (comm_handle -> send_data) -#define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) -#define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) -#define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) -#define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) - #endif /* HYPRE_PAR_CSR_COMMUNICATION_HEADER */ diff --git a/src/parcsr_mv/par_csr_matop.c b/src/parcsr_mv/par_csr_matop.c index 2c886dc2db..d66b9f0808 100644 --- a/src/parcsr_mv/par_csr_matop.c +++ b/src/parcsr_mv/par_csr_matop.c @@ -6291,7 +6291,6 @@ hypre_ParCSRMatrixDiagScaleHost( hypre_ParCSRMatrix *par_A, /* Local variables */ HYPRE_Int i; hypre_Vector *rdbuf; - HYPRE_Complex *recv_rdbuf_data; HYPRE_Complex *send_rdbuf_data; /*--------------------------------------------------------------------- @@ -6313,23 +6312,20 @@ hypre_ParCSRMatrixDiagScaleHost( hypre_ParCSRMatrix *par_A, #if defined(HYPRE_USING_PERSISTENT_COMM) hypre_ParCSRPersistentCommHandle *comm_handle = - hypre_ParCSRCommPkgGetPersistentCommHandle(1, comm_pkg); + hypre_ParCSRCommPkgGetPersistentCommHandle(1, comm_pkg, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - hypre_VectorData(rdbuf) = (HYPRE_Complex *) - hypre_ParCSRCommHandleRecvDataBuffer(comm_handle); + hypre_VectorData(rdbuf) = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvData(comm_handle); hypre_SeqVectorSetDataOwner(rdbuf, 0); #else hypre_ParCSRCommHandle *comm_handle; -#endif - - /* Initialize rdbuf */ hypre_SeqVectorInitialize_v2(rdbuf, HYPRE_MEMORY_HOST); - recv_rdbuf_data = hypre_VectorData(rdbuf); + HYPRE_Complex *recv_rdbuf_data = hypre_VectorData(rdbuf); +#endif /* Allocate send buffer for rdbuf */ #if defined(HYPRE_USING_PERSISTENT_COMM) - send_rdbuf_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendDataBuffer(comm_handle); + send_rdbuf_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendData(comm_handle); #else send_rdbuf_data = hypre_TAlloc(HYPRE_Complex, send_map_starts[num_sends], HYPRE_MEMORY_HOST); #endif @@ -6345,7 +6341,7 @@ hypre_ParCSRMatrixDiagScaleHost( hypre_ParCSRMatrix *par_A, /* Non-blocking communication starts */ #ifdef HYPRE_USING_PERSISTENT_COMM - hypre_ParCSRPersistentCommHandleStart(comm_handle, HYPRE_MEMORY_HOST, send_rdbuf_data); + hypre_ParCSRPersistentCommHandleStart(comm_handle); #else comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, @@ -6362,7 +6358,7 @@ hypre_ParCSRMatrixDiagScaleHost( hypre_ParCSRMatrix *par_A, /* Non-blocking communication ends */ #ifdef HYPRE_USING_PERSISTENT_COMM - hypre_ParCSRPersistentCommHandleWait(comm_handle, HYPRE_MEMORY_HOST, recv_rdbuf_data); + hypre_ParCSRPersistentCommHandleWait(comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif diff --git a/src/parcsr_mv/par_csr_matvec.c b/src/parcsr_mv/par_csr_matvec.c index fcf23d7788..61fb76e8e8 100644 --- a/src/parcsr_mv/par_csr_matvec.c +++ b/src/parcsr_mv/par_csr_matvec.c @@ -49,7 +49,6 @@ hypre_ParCSRMatrixMatvecOutOfPlaceHost( HYPRE_Complex alpha, HYPRE_Int idxstride = hypre_VectorIndexStride(x_local); HYPRE_Int num_vectors = hypre_VectorNumVectors(x_local); HYPRE_Complex *x_local_data = hypre_VectorData(x_local); - HYPRE_Complex *x_tmp_data; HYPRE_Complex *x_buf_data; HYPRE_ANNOTATE_FUNC_BEGIN; @@ -123,7 +122,7 @@ hypre_ParCSRMatrixMatvecOutOfPlaceHost( HYPRE_Complex alpha, #if defined(HYPRE_USING_PERSISTENT_COMM) hypre_ParCSRPersistentCommHandle *persistent_comm_handle = - hypre_ParCSRCommPkgGetPersistentCommHandle(1, comm_pkg); + hypre_ParCSRCommPkgGetPersistentCommHandle(1, comm_pkg, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); #else hypre_ParCSRCommHandle *comm_handle; #endif @@ -134,21 +133,18 @@ hypre_ParCSRMatrixMatvecOutOfPlaceHost( HYPRE_Complex alpha, *--------------------------------------------------------------------*/ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_VectorData(x_tmp) = (HYPRE_Complex *) - hypre_ParCSRCommHandleRecvDataBuffer(persistent_comm_handle); + hypre_VectorData(x_tmp) = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvData(persistent_comm_handle); hypre_SeqVectorSetDataOwner(x_tmp, 0); #endif hypre_SeqVectorInitialize_v2(x_tmp, HYPRE_MEMORY_HOST); - x_tmp_data = hypre_VectorData(x_tmp); /*--------------------------------------------------------------------- * Allocate data send buffer *--------------------------------------------------------------------*/ #if defined(HYPRE_USING_PERSISTENT_COMM) - x_buf_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendDataBuffer(persistent_comm_handle); - + x_buf_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendData(persistent_comm_handle); #else x_buf_data = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), @@ -179,12 +175,11 @@ hypre_ParCSRMatrixMatvecOutOfPlaceHost( HYPRE_Complex alpha, /* Non-blocking communication starts */ #ifdef HYPRE_USING_PERSISTENT_COMM - hypre_ParCSRPersistentCommHandleStart(persistent_comm_handle, - HYPRE_MEMORY_HOST, x_buf_data); + hypre_ParCSRPersistentCommHandleStart(persistent_comm_handle); #else comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_HOST, x_buf_data, - HYPRE_MEMORY_HOST, x_tmp_data); + HYPRE_MEMORY_HOST, hypre_VectorData(x_tmp)); #endif #ifdef HYPRE_PROFILE @@ -200,7 +195,7 @@ hypre_ParCSRMatrixMatvecOutOfPlaceHost( HYPRE_Complex alpha, /* Non-blocking communication ends */ #ifdef HYPRE_USING_PERSISTENT_COMM - hypre_ParCSRPersistentCommHandleWait(persistent_comm_handle, HYPRE_MEMORY_HOST, x_tmp_data); + hypre_ParCSRPersistentCommHandleWait(persistent_comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif @@ -306,7 +301,6 @@ hypre_ParCSRMatrixMatvecTHost( HYPRE_Complex alpha, HYPRE_BigInt x_size = hypre_ParVectorGlobalSize(x); HYPRE_BigInt y_size = hypre_ParVectorGlobalSize(y); - HYPRE_Complex *y_tmp_data; HYPRE_Complex *y_buf_data; HYPRE_Complex *y_local_data = hypre_VectorData(y_local); HYPRE_Int idxstride = hypre_VectorIndexStride(y_local); @@ -384,7 +378,7 @@ hypre_ParCSRMatrixMatvecTHost( HYPRE_Complex alpha, #if defined(HYPRE_USING_PERSISTENT_COMM) hypre_ParCSRPersistentCommHandle *persistent_comm_handle = - hypre_ParCSRCommPkgGetPersistentCommHandle(2, comm_pkg); + hypre_ParCSRCommPkgGetPersistentCommHandle(2, comm_pkg, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); #else hypre_ParCSRCommHandle *comm_handle; #endif @@ -395,20 +389,18 @@ hypre_ParCSRMatrixMatvecTHost( HYPRE_Complex alpha, *--------------------------------------------------------------------*/ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_VectorData(y_tmp) = (HYPRE_Complex *) - hypre_ParCSRCommHandleSendDataBuffer(persistent_comm_handle); + hypre_VectorData(y_tmp) = (HYPRE_Complex *) hypre_ParCSRCommHandleSendData(persistent_comm_handle); hypre_SeqVectorSetDataOwner(y_tmp, 0); #endif hypre_SeqVectorInitialize_v2(y_tmp, HYPRE_MEMORY_HOST); - y_tmp_data = hypre_VectorData(y_tmp); /*--------------------------------------------------------------------- * Allocate receive data buffer *--------------------------------------------------------------------*/ #if defined(HYPRE_USING_PERSISTENT_COMM) - y_buf_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvDataBuffer(persistent_comm_handle); + y_buf_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvData(persistent_comm_handle); #else y_buf_data = hypre_TAlloc(HYPRE_Complex, @@ -440,11 +432,10 @@ hypre_ParCSRMatrixMatvecTHost( HYPRE_Complex alpha, /* Non-blocking communication starts */ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleStart(persistent_comm_handle, HYPRE_MEMORY_HOST, y_tmp_data); - + hypre_ParCSRPersistentCommHandleStart(persistent_comm_handle); #else comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, - HYPRE_MEMORY_HOST, y_tmp_data, + HYPRE_MEMORY_HOST, hypre_VectorData(y_tmp), HYPRE_MEMORY_HOST, y_buf_data ); #endif @@ -469,8 +460,7 @@ hypre_ParCSRMatrixMatvecTHost( HYPRE_Complex alpha, /* Non-blocking communication ends */ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleWait(persistent_comm_handle, - HYPRE_MEMORY_HOST, y_buf_data); + hypre_ParCSRPersistentCommHandleWait(persistent_comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif diff --git a/src/parcsr_mv/protos.h b/src/parcsr_mv/protos.h index db451bc922..f049c5abf4 100644 --- a/src/parcsr_mv/protos.h +++ b/src/parcsr_mv/protos.h @@ -365,15 +365,17 @@ HYPRE_Int hypre_ParCSRMatrixDropSmallEntriesDevice( hypre_ParCSRMatrix *A, HYPRE HYPRE_Int hypre_ParCSRCommPkgCreateMatrixE( hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int local_ncols ); #ifdef HYPRE_USING_PERSISTENT_COMM -hypre_ParCSRPersistentCommHandle* hypre_ParCSRPersistentCommHandleCreate(HYPRE_Int job, - hypre_ParCSRCommPkg *comm_pkg); -hypre_ParCSRPersistentCommHandle* hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_Int job, - hypre_ParCSRCommPkg *comm_pkg); -void hypre_ParCSRPersistentCommHandleDestroy(hypre_ParCSRPersistentCommHandle *comm_handle); -void hypre_ParCSRPersistentCommHandleStart(hypre_ParCSRPersistentCommHandle *comm_handle, - HYPRE_MemoryLocation send_memory_location, void *send_data); -void hypre_ParCSRPersistentCommHandleWait(hypre_ParCSRPersistentCommHandle *comm_handle, - HYPRE_MemoryLocation recv_memory_location, void *recv_data); +hypre_ParCSRCommHandle* hypre_ParCSRPersistentCommHandleCreate(HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location); +hypre_ParCSRCommHandle* hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location); +HYPRE_Int hypre_ParCSRPersistentCommHandleDestroy(hypre_ParCSRCommHandle *comm_handle); +HYPRE_Int hypre_ParCSRPersistentCommHandleStart(hypre_ParCSRCommHandle *comm_handle); +HYPRE_Int hypre_ParCSRPersistentCommHandleWait(hypre_ParCSRCommHandle *comm_handle); #endif HYPRE_Int hypre_ParcsrGetExternalRowsInit( hypre_ParCSRMatrix *A, HYPRE_Int indices_len, diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index b7614a0b11..dea6b2a4f4 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1181,16 +1181,28 @@ typedef struct typedef MPI_Group hypre_MPI_Group; +#define HYPRE_MPI_REQUEST_FREE 1 +#define HYPRE_MPI_REQUEST_COPY 2 + typedef struct { - MPI_Request mpi_request; - void *post_action; + HYPRE_Int count; + HYPRE_Int data_size; + char *data; +} hypre_MPI_Request_Action; + +#define hypre_MPI_Request_ActionCount(action) ((action) -> count) +#define hypre_MPI_Request_ActionDataSize(action) ((action) -> data_size) +#define hypre_MPI_Request_ActionData(action) ((action) -> data) + +typedef struct +{ + MPI_Request mpi_request; + hypre_MPI_Request_Action action[2]; } hypre_MPI_Request; -#define HYPRE_MPI_REQUEST_FREE 1 -#define HYPRE_MPI_REQUEST_COPY 2 -#define hypre_MPI_RequestMPI_Request(request) ((request).mpi_request) -#define hypre_MPI_RequestPostAction(request) ((request).post_action) +#define hypre_MPI_RequestMPI_Request(request) ((request).mpi_request) +#define hypre_MPI_RequestAction(request, i) ((request).action[i]) typedef MPI_Datatype hypre_MPI_Datatype; typedef MPI_Status hypre_MPI_Status; @@ -1338,12 +1350,12 @@ HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int comm hypre_MPI_Op *op ); hypre_MPI_Comm hypre_MPI_CommFromMPI_Comm(MPI_Comm comm); hypre_MPI_Request hypre_MPI_RequestFromMPI_Request(MPI_Request request); -HYPRE_Int hypre_MPI_RequestSetPostActionCopy(void *dest, hypre_MemoryLocation dest_location, - void *src, hypre_MemoryLocation src_location, - HYPRE_Int num_bytes, hypre_MPI_Request *request); -HYPRE_Int hypre_MPI_RequestSetPostActionFree(void *ptr, hypre_MemoryLocation ptr_location, - hypre_MPI_Request *request); -HYPRE_Int hypre_MPI_RequestProcessPostAction(hypre_MPI_Request *request); +HYPRE_Int hypre_MPI_RequestSetActionCopy(HYPRE_Int i, void *dest, hypre_MemoryLocation dest_location, + void *src, hypre_MemoryLocation src_location, + HYPRE_Int num_bytes, hypre_MPI_Request *request); +HYPRE_Int hypre_MPI_RequestSetActionFree(HYPRE_Int i, void *ptr, hypre_MemoryLocation ptr_location, + hypre_MPI_Request *request); +HYPRE_Int hypre_MPI_RequestProcessAction(HYPRE_Int i, hypre_MPI_Request *request); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, MPI_Comm *newcomm); diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 56df2e3f57..32f66d5727 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -780,118 +780,132 @@ hypre_MPI_RequestFromMPI_Request(MPI_Request request) } HYPRE_Int -hypre_MPI_RequestSetPostActionFree(void *ptr, - hypre_MemoryLocation ptr_location, - hypre_MPI_Request *request) +hypre_MPI_RequestSetActionFree(HYPRE_Int i, + void *ptr, + hypre_MemoryLocation ptr_location, + hypre_MPI_Request *request) { - hypre_TFree(hypre_MPI_RequestPostAction(*request), HYPRE_MEMORY_HOST); + HYPRE_Int action_id = HYPRE_MPI_REQUEST_FREE; + hypre_MPI_Request_Action *action = &hypre_MPI_RequestAction(*request, i); HYPRE_Int nb = sizeof(HYPRE_Int) + sizeof(void *) + sizeof(hypre_MemoryLocation); - char *data = hypre_CTAlloc(char, nb, HYPRE_MEMORY_HOST); - hypre_MPI_RequestPostAction(*request) = (void *) data; + HYPRE_Int data_size = hypre_MPI_Request_ActionDataSize(action); - HYPRE_Int action_id = HYPRE_MPI_REQUEST_FREE; + hypre_MPI_Request_ActionCount(action) ++; + hypre_MPI_Request_ActionDataSize(action) = data_size + nb; + hypre_MPI_Request_ActionData(action) = hypre_TReAlloc(hypre_MPI_Request_ActionData(action), + char, + hypre_MPI_Request_ActionDataSize(action), + HYPRE_MEMORY_HOST); + char *data = hypre_MPI_Request_ActionData(action) + data_size; hypre_TMemcpy(data, &action_id, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); data += sizeof(HYPRE_Int); - // hypre_TMemcpy(data, &ptr, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); data += sizeof(void *); - // hypre_TMemcpy(data, &ptr_location, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + + hypre_assert(data == hypre_MPI_Request_ActionData(action) + hypre_MPI_Request_ActionDataSize(action)); return hypre_error_flag; } HYPRE_Int -hypre_MPI_RequestSetPostActionCopy(void *dest, - hypre_MemoryLocation dest_location, - void *src, - hypre_MemoryLocation src_location, - HYPRE_Int num_bytes, - hypre_MPI_Request *request) +hypre_MPI_RequestSetActionCopy(HYPRE_Int i, + void *dest, + hypre_MemoryLocation dest_location, + void *src, + hypre_MemoryLocation src_location, + HYPRE_Int num_bytes, + hypre_MPI_Request *request) { - hypre_TFree(hypre_MPI_RequestPostAction(*request), HYPRE_MEMORY_HOST); + HYPRE_Int action_id = HYPRE_MPI_REQUEST_COPY; + hypre_MPI_Request_Action *action = &hypre_MPI_RequestAction(*request, i); HYPRE_Int nb = 2 * (sizeof(HYPRE_Int) + sizeof(void *) + sizeof(hypre_MemoryLocation)); - char * data = hypre_CTAlloc(char, nb, HYPRE_MEMORY_HOST); - hypre_MPI_RequestPostAction(*request) = (void *) data; + HYPRE_Int data_size = hypre_MPI_Request_ActionDataSize(action); - HYPRE_Int action_id = HYPRE_MPI_REQUEST_COPY; + hypre_MPI_Request_ActionCount(action) ++; + hypre_MPI_Request_ActionDataSize(action) = data_size + nb; + hypre_MPI_Request_ActionData(action) = hypre_TReAlloc(hypre_MPI_Request_ActionData(action), + char, + hypre_MPI_Request_ActionDataSize(action), + HYPRE_MEMORY_HOST); + char *data = hypre_MPI_Request_ActionData(action) + data_size; hypre_TMemcpy(data, &action_id, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); data += sizeof(HYPRE_Int); - // hypre_TMemcpy(data, &num_bytes, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); data += sizeof(HYPRE_Int); - // hypre_TMemcpy(data, &dest, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); data += sizeof(void *); - // hypre_TMemcpy(data, &src, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); data += sizeof(void *); - // hypre_TMemcpy(data, &dest_location, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); data += sizeof(hypre_MemoryLocation); - // hypre_TMemcpy(data, &src_location, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + + hypre_assert(data == hypre_MPI_Request_ActionData(action) + hypre_MPI_Request_ActionDataSize(action)); return hypre_error_flag; } HYPRE_Int -hypre_MPI_RequestProcessPostAction(hypre_MPI_Request *request) +hypre_MPI_RequestProcessAction(HYPRE_Int i, + hypre_MPI_Request *request) { - if (!hypre_MPI_RequestPostAction(*request)) - { - return hypre_error_flag; - } - - char *data = hypre_MPI_RequestPostAction(*request); - HYPRE_Int action_id; + hypre_MPI_Request_Action *action = &hypre_MPI_RequestAction(*request, i); + HYPRE_Int count = hypre_MPI_Request_ActionCount(action); + char *data = hypre_MPI_Request_ActionData(action); + HYPRE_Int k; - hypre_TMemcpy(&action_id, data, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(HYPRE_Int); - - if (action_id == HYPRE_MPI_REQUEST_FREE) - { - void *ptr; - hypre_MemoryLocation ptr_location; - - hypre_TMemcpy(&ptr, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(void *); - // - hypre_TMemcpy(&ptr_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - // action! - _hypre_TFree(ptr, ptr_location); - } - else if (action_id == HYPRE_MPI_REQUEST_COPY) + for (k = 0; k < count; k ++) { - void *dest, *src; - HYPRE_Int num_bytes; - hypre_MemoryLocation dest_location, src_location; + HYPRE_Int action_id; - hypre_TMemcpy(&num_bytes, data, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + hypre_TMemcpy(&action_id, data, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); data += sizeof(HYPRE_Int); - // - hypre_TMemcpy(&dest, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(void *); - // - hypre_TMemcpy(&src, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(void *); - // - hypre_TMemcpy(&dest_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(hypre_MemoryLocation); - // - hypre_TMemcpy(&src_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - // action! - hypre_GpuProfilingPushRange("MPI-H2D"); - _hypre_TMemcpy(dest, src, char, num_bytes, dest_location, src_location); - hypre_GpuProfilingPopRange(); - _hypre_TFree(src, src_location); + + if (action_id == HYPRE_MPI_REQUEST_FREE) + { + void *ptr; + hypre_MemoryLocation ptr_location; + hypre_TMemcpy(&ptr, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + hypre_TMemcpy(&ptr_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + // action! + _hypre_TFree(ptr, ptr_location); + } + else if (action_id == HYPRE_MPI_REQUEST_COPY) + { + void *dest, *src; + HYPRE_Int num_bytes; + hypre_MemoryLocation dest_location, src_location; + hypre_TMemcpy(&num_bytes, data, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(HYPRE_Int); + hypre_TMemcpy(&dest, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + hypre_TMemcpy(&src, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + hypre_TMemcpy(&dest_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + hypre_TMemcpy(&src_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + // action! + hypre_GpuProfilingPushRange("MPI-H2D"); + _hypre_TMemcpy(dest, src, char, num_bytes, dest_location, src_location); + hypre_GpuProfilingPopRange(); + } } - hypre_TFree(hypre_MPI_RequestPostAction(*request), HYPRE_MEMORY_HOST); + hypre_assert(data == hypre_MPI_Request_ActionData(action) + hypre_MPI_Request_ActionDataSize(action)); + + hypre_MPI_Request_ActionCount(action) = 0; + hypre_MPI_Request_ActionDataSize(action) = 0; + hypre_TFree(hypre_MPI_Request_ActionData(action), HYPRE_MEMORY_HOST); return hypre_error_flag; } @@ -1278,8 +1292,20 @@ hypre_MPI_Irecv( void *buf, &hypre_MPI_RequestMPI_Request(*request)); } -#define TYPE_MACRO_SEND 0 -#define TYPE_MACRO_RECV 1 +#define TYPE_MACRO_SEND 0 +#define TYPE_MACRO_RECV 1 +#define TYPE_MACRO_SEND_INIT 2 + +static inline HYPRE_Int +hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location) +{ +#if defined(HYPRE_USING_GPU) + return !hypre_GetGpuAwareMPI() && memory_location != hypre_MEMORY_HOST; +#else + /* RL: can return 1 for debugging purpose */ + return 1; +#endif +} #define TYPE_MACRO(MPI_CMD, SEND_RECV, HYPRE_DTYPE, HYPRE_MPI_DTYPE) \ { \ @@ -1288,8 +1314,7 @@ hypre_MPI_Irecv( void *buf, HYPRE_Int i; \ HYPRE_DTYPE *data = (HYPRE_DTYPE *) buf; \ HYPRE_Int ntot = displs[num]; \ - HYPRE_Int gpu_mpi = hypre_GetGpuAwareMPI(); \ - HYPRE_Int host_buf = !gpu_mpi && memory_location != hypre_MEMORY_HOST; \ + HYPRE_Int host_buf = hypre_MPINeedHostBuffer(memory_location); \ if (host_buf) \ { \ data = _hypre_TAlloc(HYPRE_DTYPE, ntot, hypre_MEMORY_HOST); \ @@ -1310,30 +1335,41 @@ hypre_MPI_Irecv( void *buf, ip, tag, hypre_MPI_CommMPI_Comm(comm), \ &hypre_MPI_RequestMPI_Request(requests[i])); \ } \ - if (host_buf) \ + if (num && host_buf) \ { \ - /* register post action in the first request */ \ + /* register pre/post action in the first request */ \ if (SEND_RECV == TYPE_MACRO_SEND) \ { \ - if (num) \ - { \ - hypre_MPI_RequestSetPostActionFree(data, \ - hypre_MEMORY_HOST, \ - &requests[0]); \ - } \ + hypre_MPI_RequestSetActionFree(1, data, \ + hypre_MEMORY_HOST, \ + &requests[0]); \ + } \ + else if (SEND_RECV == TYPE_MACRO_SEND_INIT) \ + { \ + hypre_MPI_RequestSetActionCopy(0, buf, \ + memory_location, \ + data, \ + hypre_MEMORY_HOST, \ + ntot * sizeof(HYPRE_DTYPE), \ + &requests[0]); \ + \ + hypre_MPI_RequestSetActionFree(1, data, \ + hypre_MEMORY_HOST, \ + &requests[0]); \ + \ } \ else if (SEND_RECV == TYPE_MACRO_RECV) \ { \ - if (num) \ - { \ - HYPRE_Int num_bytes = ntot * sizeof(HYPRE_DTYPE); \ - hypre_MPI_RequestSetPostActionCopy(buf, \ - memory_location, \ - data, \ - hypre_MEMORY_HOST, \ - num_bytes, \ - &requests[0]); \ - } \ + hypre_MPI_RequestSetActionCopy(1, buf, \ + memory_location, \ + data, \ + hypre_MEMORY_HOST, \ + ntot * sizeof(HYPRE_DTYPE), \ + &requests[0]); \ + \ + hypre_MPI_RequestSetActionFree(1, data, \ + hypre_MEMORY_HOST, \ + &requests[0]); \ } \ } \ return hypre_error_flag; \ @@ -1395,6 +1431,26 @@ hypre_MPI_Send_init( void *buf, &hypre_MPI_RequestMPI_Request(*request)); } +HYPRE_Int +hypre_MPI_Send_init_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPI_Comm comm, + hypre_MPI_Request *requests ) +{ + hypre_MemoryLocation memory_location = hypre_MPI_CommMPI_SendLocation(comm); + + TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_Complex, HYPRE_MPI_COMPLEX); + TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_Int, HYPRE_MPI_INT); + TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_BigInt, HYPRE_MPI_BIG_INT); + + return hypre_error_flag; +} + HYPRE_Int hypre_MPI_Recv_init( void *buf, HYPRE_Int count, @@ -1410,6 +1466,26 @@ hypre_MPI_Recv_init( void *buf, &hypre_MPI_RequestMPI_Request(*request)); } +HYPRE_Int +hypre_MPI_Recv_init_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPI_Comm comm, + hypre_MPI_Request *requests ) +{ + hypre_MemoryLocation memory_location = hypre_MPI_CommMPI_RecvLocation(comm); + + TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV, HYPRE_Complex, HYPRE_MPI_COMPLEX); + TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV, HYPRE_Int, HYPRE_MPI_INT); + TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV, HYPRE_BigInt, HYPRE_MPI_BIG_INT); + + return hypre_error_flag; +} + HYPRE_Int hypre_MPI_Irsend( void *buf, HYPRE_Int count, @@ -1435,6 +1511,7 @@ hypre_MPI_Startall( HYPRE_Int count, for (i = 0; i < count; i++) { array_of_mpi_requests[i] = hypre_MPI_RequestMPI_Request(array_of_requests[i]); + hypre_MPI_RequestProcessAction(0, &array_of_requests[i]); } ierr = (HYPRE_Int) MPI_Startall((hypre_int)count, array_of_mpi_requests); @@ -1544,7 +1621,7 @@ hypre_MPI_Waitall( HYPRE_Int count, for (i = 0; i < count; i++) { hypre_MPI_RequestMPI_Request(array_of_requests[i]) = array_of_mpi_requests[i]; - hypre_MPI_RequestProcessPostAction(&array_of_requests[i]); + hypre_MPI_RequestProcessAction(1, &array_of_requests[i]); } hypre_TFree(array_of_mpi_requests, HYPRE_MEMORY_HOST); diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 74ffdd803f..1df3374d29 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -206,16 +206,28 @@ typedef struct typedef MPI_Group hypre_MPI_Group; +#define HYPRE_MPI_REQUEST_FREE 1 +#define HYPRE_MPI_REQUEST_COPY 2 + typedef struct { - MPI_Request mpi_request; - void *post_action; + HYPRE_Int count; + HYPRE_Int data_size; + char *data; +} hypre_MPI_Request_Action; + +#define hypre_MPI_Request_ActionCount(action) ((action) -> count) +#define hypre_MPI_Request_ActionDataSize(action) ((action) -> data_size) +#define hypre_MPI_Request_ActionData(action) ((action) -> data) + +typedef struct +{ + MPI_Request mpi_request; + hypre_MPI_Request_Action action[2]; } hypre_MPI_Request; -#define HYPRE_MPI_REQUEST_FREE 1 -#define HYPRE_MPI_REQUEST_COPY 2 -#define hypre_MPI_RequestMPI_Request(request) ((request).mpi_request) -#define hypre_MPI_RequestPostAction(request) ((request).post_action) +#define hypre_MPI_RequestMPI_Request(request) ((request).mpi_request) +#define hypre_MPI_RequestAction(request, i) ((request).action[i]) typedef MPI_Datatype hypre_MPI_Datatype; typedef MPI_Status hypre_MPI_Status; @@ -363,12 +375,12 @@ HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int comm hypre_MPI_Op *op ); hypre_MPI_Comm hypre_MPI_CommFromMPI_Comm(MPI_Comm comm); hypre_MPI_Request hypre_MPI_RequestFromMPI_Request(MPI_Request request); -HYPRE_Int hypre_MPI_RequestSetPostActionCopy(void *dest, hypre_MemoryLocation dest_location, - void *src, hypre_MemoryLocation src_location, - HYPRE_Int num_bytes, hypre_MPI_Request *request); -HYPRE_Int hypre_MPI_RequestSetPostActionFree(void *ptr, hypre_MemoryLocation ptr_location, - hypre_MPI_Request *request); -HYPRE_Int hypre_MPI_RequestProcessPostAction(hypre_MPI_Request *request); +HYPRE_Int hypre_MPI_RequestSetActionCopy(HYPRE_Int i, void *dest, hypre_MemoryLocation dest_location, + void *src, hypre_MemoryLocation src_location, + HYPRE_Int num_bytes, hypre_MPI_Request *request); +HYPRE_Int hypre_MPI_RequestSetActionFree(HYPRE_Int i, void *ptr, hypre_MemoryLocation ptr_location, + hypre_MPI_Request *request); +HYPRE_Int hypre_MPI_RequestProcessAction(HYPRE_Int i, hypre_MPI_Request *request); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, MPI_Comm *newcomm); From 3a5f2ae19552a76d99c9b2a004e85ffab7004be2 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Thu, 14 Dec 2023 18:00:07 -0800 Subject: [PATCH 29/90] persistent mpi --- src/utilities/general.c | 1 + src/utilities/handle.h | 3 + src/utilities/mpistubs.c | 116 +++++++++++++++++++++++++-------------- src/utilities/mpistubs.h | 20 ++++--- 4 files changed, 91 insertions(+), 49 deletions(-) diff --git a/src/utilities/general.c b/src/utilities/general.c index 16491b04c5..2f53b0baae 100644 --- a/src/utilities/general.c +++ b/src/utilities/general.c @@ -50,6 +50,7 @@ hypre_HandleCreate(void) #else hypre_HandleUseGpuAwareMPI(hypre_handle_) = 0; #endif + hypre_HandleMPIHostBufferLocation(hypre_handle_) = hypre_MEMORY_HOST; #endif return hypre_handle_; diff --git a/src/utilities/handle.h b/src/utilities/handle.h index 88b16782d6..efe490ac86 100644 --- a/src/utilities/handle.h +++ b/src/utilities/handle.h @@ -34,6 +34,8 @@ typedef struct HYPRE_Int use_gpu_aware_mpi; #endif + hypre_MemoryLocation mpi_host_buffer_location; + #if defined(HYPRE_USING_GPU) hypre_DeviceData *device_data; HYPRE_Int device_gs_method; /* device G-S options */ @@ -77,6 +79,7 @@ typedef struct #define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) #define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) #define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) +#define hypre_HandleMPIHostBufferLocation(hypre_handle) ((hypre_handle) -> mpi_host_buffer_location) #define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 32f66d5727..22a87e1830 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -779,6 +779,21 @@ hypre_MPI_RequestFromMPI_Request(MPI_Request request) return hrequest; } +HYPRE_Int +hypre_MPI_RequestClear(hypre_MPI_Request *request) +{ + HYPRE_Int i; + for (i = 0; i < 2; i++) + { + hypre_MPI_Request_Action *action = &hypre_MPI_RequestAction(*request, i); + hypre_MPI_Request_ActionCount(action) = 0; + hypre_MPI_Request_ActionDataSize(action) = 0; + hypre_TFree(hypre_MPI_Request_ActionData(action), HYPRE_MEMORY_HOST); + } + + return hypre_error_flag; +} + HYPRE_Int hypre_MPI_RequestSetActionFree(HYPRE_Int i, void *ptr, @@ -903,10 +918,6 @@ hypre_MPI_RequestProcessAction(HYPRE_Int i, hypre_assert(data == hypre_MPI_Request_ActionData(action) + hypre_MPI_Request_ActionDataSize(action)); - hypre_MPI_Request_ActionCount(action) = 0; - hypre_MPI_Request_ActionDataSize(action) = 0; - hypre_TFree(hypre_MPI_Request_ActionData(action), HYPRE_MEMORY_HOST); - return hypre_error_flag; } @@ -1295,43 +1306,44 @@ hypre_MPI_Irecv( void *buf, #define TYPE_MACRO_SEND 0 #define TYPE_MACRO_RECV 1 #define TYPE_MACRO_SEND_INIT 2 +#define TYPE_MACRO_RECV_INIT 3 -static inline HYPRE_Int -hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location) -{ -#if defined(HYPRE_USING_GPU) - return !hypre_GetGpuAwareMPI() && memory_location != hypre_MEMORY_HOST; -#else - /* RL: can return 1 for debugging purpose */ - return 1; -#endif -} - -#define TYPE_MACRO(MPI_CMD, SEND_RECV, HYPRE_DTYPE, HYPRE_MPI_DTYPE) \ +#define P_TYPE_MACRO(MPI_CMD, SEND_RECV, HYPRE_DTYPE, HYPRE_MPI_DTYPE, PBUF) \ { \ if (datatype == HYPRE_MPI_DTYPE) \ { \ HYPRE_Int i; \ - HYPRE_DTYPE *data = (HYPRE_DTYPE *) buf; \ + HYPRE_DTYPE *_buf = (HYPRE_DTYPE *) buf; \ HYPRE_Int ntot = displs[num]; \ HYPRE_Int host_buf = hypre_MPINeedHostBuffer(memory_location); \ + hypre_MemoryLocation host_buf_location = \ + hypre_HandleMPIHostBufferLocation(hypre_handle()); \ if (host_buf) \ { \ - data = _hypre_TAlloc(HYPRE_DTYPE, ntot, hypre_MEMORY_HOST); \ if (SEND_RECV == TYPE_MACRO_SEND) \ { \ + _buf = _hypre_TAlloc(HYPRE_DTYPE, ntot, host_buf_location); \ hypre_GpuProfilingPushRange("MPI-D2H"); \ - _hypre_TMemcpy(data, buf, HYPRE_DTYPE, ntot, \ - hypre_MEMORY_HOST, memory_location); \ + _hypre_TMemcpy(_buf, buf, HYPRE_DTYPE, ntot, \ + host_buf_location, memory_location); \ hypre_GpuProfilingPopRange(); \ } \ + else if (SEND_RECV == TYPE_MACRO_RECV) \ + { \ + _buf = _hypre_TAlloc(HYPRE_DTYPE, ntot, host_buf_location); \ + } \ + else if (SEND_RECV == TYPE_MACRO_SEND_INIT || \ + SEND_RECV == TYPE_MACRO_RECV_INIT) \ + { \ + _buf = PBUF; \ + } \ } \ for (i = 0; i < num; i++) \ { \ HYPRE_Int ip = procs[i]; \ HYPRE_Int start = displs[i]; \ HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; \ - MPI_CMD(data + start, len, HYPRE_MPI_DTYPE, \ + MPI_CMD(_buf + start, len, HYPRE_MPI_DTYPE, \ ip, tag, hypre_MPI_CommMPI_Comm(comm), \ &hypre_MPI_RequestMPI_Request(requests[i])); \ } \ @@ -1340,35 +1352,39 @@ hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location) /* register pre/post action in the first request */ \ if (SEND_RECV == TYPE_MACRO_SEND) \ { \ - hypre_MPI_RequestSetActionFree(1, data, \ - hypre_MEMORY_HOST, \ + hypre_MPI_RequestSetActionFree(1, _buf, \ + host_buf_location, \ &requests[0]); \ } \ else if (SEND_RECV == TYPE_MACRO_SEND_INIT) \ { \ - hypre_MPI_RequestSetActionCopy(0, buf, \ + hypre_MPI_RequestSetActionCopy(0, _buf, \ + host_buf_location, \ + buf, \ memory_location, \ - data, \ - hypre_MEMORY_HOST, \ ntot * sizeof(HYPRE_DTYPE), \ &requests[0]); \ - \ - hypre_MPI_RequestSetActionFree(1, data, \ - hypre_MEMORY_HOST, \ - &requests[0]); \ - \ } \ else if (SEND_RECV == TYPE_MACRO_RECV) \ { \ hypre_MPI_RequestSetActionCopy(1, buf, \ memory_location, \ - data, \ - hypre_MEMORY_HOST, \ + _buf, \ + host_buf_location, \ ntot * sizeof(HYPRE_DTYPE), \ &requests[0]); \ \ - hypre_MPI_RequestSetActionFree(1, data, \ - hypre_MEMORY_HOST, \ + hypre_MPI_RequestSetActionFree(1, _buf, \ + host_buf_location, \ + &requests[0]); \ + } \ + else if (SEND_RECV == TYPE_MACRO_RECV_INIT) \ + { \ + hypre_MPI_RequestSetActionCopy(1, buf, \ + memory_location, \ + _buf, \ + host_buf_location, \ + ntot * sizeof(HYPRE_DTYPE), \ &requests[0]); \ } \ } \ @@ -1376,6 +1392,9 @@ hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location) } \ } +#define TYPE_MACRO(MPI_CMD, SEND_RECV, HYPRE_DTYPE, HYPRE_MPI_DTYPE) \ + P_TYPE_MACRO(MPI_CMD, SEND_RECV, HYPRE_DTYPE, HYPRE_MPI_DTYPE, NULL) + HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, @@ -1433,6 +1452,7 @@ hypre_MPI_Send_init( void *buf, HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, + void *pbuf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, @@ -1444,9 +1464,9 @@ hypre_MPI_Send_init_Multiple( void *buf, { hypre_MemoryLocation memory_location = hypre_MPI_CommMPI_SendLocation(comm); - TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_Complex, HYPRE_MPI_COMPLEX); - TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_Int, HYPRE_MPI_INT); - TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_BigInt, HYPRE_MPI_BIG_INT); + P_TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_Complex, HYPRE_MPI_COMPLEX, pbuf); + P_TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_Int, HYPRE_MPI_INT, pbuf); + P_TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_BigInt, HYPRE_MPI_BIG_INT, pbuf); return hypre_error_flag; } @@ -1468,6 +1488,7 @@ hypre_MPI_Recv_init( void *buf, HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, + void *pbuf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, @@ -1479,9 +1500,9 @@ hypre_MPI_Recv_init_Multiple( void *buf, { hypre_MemoryLocation memory_location = hypre_MPI_CommMPI_RecvLocation(comm); - TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV, HYPRE_Complex, HYPRE_MPI_COMPLEX); - TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV, HYPRE_Int, HYPRE_MPI_INT); - TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV, HYPRE_BigInt, HYPRE_MPI_BIG_INT); + P_TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV_INIT, HYPRE_Complex, HYPRE_MPI_COMPLEX, pbuf); + P_TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV_INIT, HYPRE_Int, HYPRE_MPI_INT, pbuf); + P_TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV_INIT, HYPRE_BigInt, HYPRE_MPI_BIG_INT, pbuf); return hypre_error_flag; } @@ -1822,4 +1843,17 @@ hypre_MPI_Info_free( hypre_MPI_Info *info ) } #endif +HYPRE_Int +hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location) +{ +#if defined(HYPRE_USING_GPU) + return !hypre_GetGpuAwareMPI() && + memory_location != hypre_MEMORY_HOST && + memory_location != hypre_MEMORY_HOST_PINNED; +#else + /* RL: can return 1 for debugging purpose */ + return 1; +#endif +} + #endif diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 1df3374d29..ab309cea89 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -381,20 +381,24 @@ HYPRE_Int hypre_MPI_RequestSetActionCopy(HYPRE_Int i, void *dest, hypre_MemoryLo HYPRE_Int hypre_MPI_RequestSetActionFree(HYPRE_Int i, void *ptr, hypre_MemoryLocation ptr_location, hypre_MPI_Request *request); HYPRE_Int hypre_MPI_RequestProcessAction(HYPRE_Int i, hypre_MPI_Request *request); -#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) -HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, - hypre_MPI_Info info, MPI_Comm *newcomm); -HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); -HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); -#endif +HYPRE_Int hypre_MPI_RequestClear(hypre_MPI_Request *request); +HYPRE_Int hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location); HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); -HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, void *pbuf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); -HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, void *pbuf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); + +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) +HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, + hypre_MPI_Info info, MPI_Comm *newcomm); +HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); +HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); +#endif + #ifdef __cplusplus } #endif From 4392afe83c20cda3d22c2cf9a31a296dff1a7630 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Thu, 14 Dec 2023 18:01:43 -0800 Subject: [PATCH 30/90] add persistent buffer in par_csr_comm --- src/parcsr_mv/_hypre_parcsr_mv.h | 8 ++++++ src/parcsr_mv/par_csr_communication.c | 40 +++++++++++++++++++++++++-- src/parcsr_mv/par_csr_communication.h | 7 +++++ src/parcsr_mv/protos.h | 1 + src/utilities/_hypre_utilities.h | 23 +++++++++------ 5 files changed, 68 insertions(+), 11 deletions(-) diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index 8a37970081..0f187e15d5 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -130,6 +130,11 @@ typedef struct struct _hypre_ParCSRCommPkg *comm_pkg; void *send_data; void *recv_data; +#if defined(HYPRE_USING_PERSISTENT_COMM) + /* persistent HOST buffer */ + void *send_buffer; + void *recv_buffer; +#endif HYPRE_Int num_requests; hypre_MPI_Request *requests; } hypre_ParCSRCommHandle; @@ -141,6 +146,8 @@ typedef struct #define hypre_ParCSRCommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) #define hypre_ParCSRCommHandleSendData(comm_handle) (comm_handle -> send_data) #define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) +#define hypre_ParCSRCommHandleSendBuffer(comm_handle) (comm_handle -> send_buffer) +#define hypre_ParCSRCommHandleRecvBuffer(comm_handle) (comm_handle -> recv_buffer) #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) @@ -992,6 +999,7 @@ HYPRE_Int hypre_ParCSRFindExtendCommPkg(MPI_Comm comm, HYPRE_BigInt global_num_c HYPRE_BigInt first_col_diag, HYPRE_Int num_cols_diag, HYPRE_BigInt *col_starts, hypre_IJAssumedPart *apart, HYPRE_Int indices_len, HYPRE_BigInt *indices, hypre_ParCSRCommPkg **extend_comm_pkg); +HYPRE_Int hypre_ParCSRCommHandleDestroyRequests( hypre_ParCSRCommHandle *comm_handle ); /* par_csr_matop.c */ HYPRE_Int hypre_ParCSRMatrixScale(hypre_ParCSRMatrix *A, HYPRE_Complex scalar); diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 6a7aebb57e..628232d3c4 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -38,7 +38,7 @@ * addresses, e.g. generated using hypre_MPI_Address . *-------------------------------------------------------------------------------------*/ -#ifdef HYPRE_USING_PERSISTENT_COMM +#if defined(HYPRE_USING_PERSISTENT_COMM) HYPRE_Int hypre_ParCSRPersistentCommHandleCreateBuffer( HYPRE_MemoryLocation send_memory_location, @@ -60,6 +60,20 @@ hypre_ParCSRPersistentCommHandleCreateBuffer( HYPRE_MemoryLocation send_memor recv_memory_location); } + if (!hypre_ParCSRCommHandleSendBuffer(comm_handle) && + hypre_MPINeedHostBuffer(hypre_GetActualMemLocation(send_memory_location))) + { + hypre_ParCSRCommHandleSendBuffer(comm_handle) = _hypre_TAlloc(char, num_send_elems * size_of_elem, + hypre_HandleMPIHostBufferLocation(hypre_handle())); + } + + if (!hypre_ParCSRCommHandleRecvBuffer(comm_handle) && + hypre_MPINeedHostBuffer(hypre_GetActualMemLocation(recv_memory_location))) + { + hypre_ParCSRCommHandleRecvBuffer(comm_handle) = _hypre_TAlloc(char, num_recv_elems * size_of_elem, + hypre_HandleMPIHostBufferLocation(hypre_handle())); + } + return hypre_error_flag; } @@ -97,6 +111,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), + hypre_ParCSRCommHandleRecvBuffer(comm_handle), num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, @@ -104,6 +119,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, 0, hcomm, requests); hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), + hypre_ParCSRCommHandleSendBuffer(comm_handle), num_sends, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, @@ -123,6 +139,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), + hypre_ParCSRCommHandleRecvBuffer(comm_handle), num_sends, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, @@ -130,6 +147,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, 0, hcomm, requests); hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), + hypre_ParCSRCommHandleSendBuffer(comm_handle), num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, @@ -171,6 +189,22 @@ hypre_ParCSRCommPkgGetPersistentCommHandle( HYPRE_Int job, return hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg, type); } +HYPRE_Int +hypre_ParCSRCommHandleDestroyRequests( hypre_ParCSRCommHandle *comm_handle ) +{ + if (comm_handle) + { + HYPRE_Int i; + for (i = 0; i < hypre_ParCSRCommHandleNumRequests(comm_handle); i++) + { + hypre_MPI_RequestClear(&hypre_ParCSRCommHandleRequest(comm_handle, i)); + } + hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); + } + + return hypre_error_flag; +} + /*------------------------------------------------------------------ * hypre_ParCSRPersistentCommHandleDestroy *------------------------------------------------------------------*/ @@ -180,7 +214,7 @@ hypre_ParCSRPersistentCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) { if (comm_handle) { - hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); + hypre_ParCSRCommHandleDestroyRequests(comm_handle); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); } @@ -353,7 +387,7 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) hypre_TFree(status0, HYPRE_MEMORY_HOST); } - hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); + hypre_ParCSRCommHandleDestroyRequests(comm_handle); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); hypre_GpuProfilingPopRange(); diff --git a/src/parcsr_mv/par_csr_communication.h b/src/parcsr_mv/par_csr_communication.h index fb03f5de3b..60a5167951 100644 --- a/src/parcsr_mv/par_csr_communication.h +++ b/src/parcsr_mv/par_csr_communication.h @@ -115,6 +115,11 @@ typedef struct struct _hypre_ParCSRCommPkg *comm_pkg; void *send_data; void *recv_data; +#if defined(HYPRE_USING_PERSISTENT_COMM) + /* persistent HOST buffer */ + void *send_buffer; + void *recv_buffer; +#endif HYPRE_Int num_requests; hypre_MPI_Request *requests; } hypre_ParCSRCommHandle; @@ -126,6 +131,8 @@ typedef struct #define hypre_ParCSRCommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) #define hypre_ParCSRCommHandleSendData(comm_handle) (comm_handle -> send_data) #define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) +#define hypre_ParCSRCommHandleSendBuffer(comm_handle) (comm_handle -> send_buffer) +#define hypre_ParCSRCommHandleRecvBuffer(comm_handle) (comm_handle -> recv_buffer) #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) diff --git a/src/parcsr_mv/protos.h b/src/parcsr_mv/protos.h index f049c5abf4..30808f202a 100644 --- a/src/parcsr_mv/protos.h +++ b/src/parcsr_mv/protos.h @@ -268,6 +268,7 @@ HYPRE_Int hypre_ParCSRFindExtendCommPkg(MPI_Comm comm, HYPRE_BigInt global_num_c HYPRE_BigInt first_col_diag, HYPRE_Int num_cols_diag, HYPRE_BigInt *col_starts, hypre_IJAssumedPart *apart, HYPRE_Int indices_len, HYPRE_BigInt *indices, hypre_ParCSRCommPkg **extend_comm_pkg); +HYPRE_Int hypre_ParCSRCommHandleDestroyRequests( hypre_ParCSRCommHandle *comm_handle ); /* par_csr_matop.c */ HYPRE_Int hypre_ParCSRMatrixScale(hypre_ParCSRMatrix *A, HYPRE_Complex scalar); diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index dea6b2a4f4..14c854e0c5 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1356,20 +1356,24 @@ HYPRE_Int hypre_MPI_RequestSetActionCopy(HYPRE_Int i, void *dest, hypre_MemoryLo HYPRE_Int hypre_MPI_RequestSetActionFree(HYPRE_Int i, void *ptr, hypre_MemoryLocation ptr_location, hypre_MPI_Request *request); HYPRE_Int hypre_MPI_RequestProcessAction(HYPRE_Int i, hypre_MPI_Request *request); -#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) -HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, - hypre_MPI_Info info, MPI_Comm *newcomm); -HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); -HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); -#endif +HYPRE_Int hypre_MPI_RequestClear(hypre_MPI_Request *request); +HYPRE_Int hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location); HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); -HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, void *pbuf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); -HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, void *pbuf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); + +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) +HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, + hypre_MPI_Info info, MPI_Comm *newcomm); +HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); +HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); +#endif + #ifdef __cplusplus } #endif @@ -1848,6 +1852,8 @@ typedef struct HYPRE_Int use_gpu_aware_mpi; #endif + hypre_MemoryLocation mpi_host_buffer_location; + #if defined(HYPRE_USING_GPU) hypre_DeviceData *device_data; HYPRE_Int device_gs_method; /* device G-S options */ @@ -1891,6 +1897,7 @@ typedef struct #define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) #define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) #define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) +#define hypre_HandleMPIHostBufferLocation(hypre_handle) ((hypre_handle) -> mpi_host_buffer_location) #define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) From f8ebeb321cf4031bc024c8a928ae93b1f8037ae8 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Fri, 15 Dec 2023 13:24:43 -0800 Subject: [PATCH 31/90] fix memory leaks in persist mpi --- src/parcsr_mv/_hypre_parcsr_mv.h | 10 ++++- src/parcsr_mv/par_csr_communication.c | 56 ++++++++++++++++----------- src/parcsr_mv/par_csr_communication.h | 8 ++-- 3 files changed, 46 insertions(+), 28 deletions(-) diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index 0f187e15d5..b323edd0c0 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -130,10 +130,14 @@ typedef struct struct _hypre_ParCSRCommPkg *comm_pkg; void *send_data; void *recv_data; -#if defined(HYPRE_USING_PERSISTENT_COMM) - /* persistent HOST buffer */ +#if 1 || defined(HYPRE_USING_GPU) + /* HOST buffers for non-GPU-aware MPIs */ void *send_buffer; void *recv_buffer; +#endif +#if defined(HYPRE_USING_PERSISTENT_COMM) + HYPRE_MemoryLocation send_location; + HYPRE_MemoryLocation recv_location; #endif HYPRE_Int num_requests; hypre_MPI_Request *requests; @@ -148,6 +152,8 @@ typedef struct #define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) #define hypre_ParCSRCommHandleSendBuffer(comm_handle) (comm_handle -> send_buffer) #define hypre_ParCSRCommHandleRecvBuffer(comm_handle) (comm_handle -> recv_buffer) +#define hypre_ParCSRCommHandleSendLocation(comm_handle) (comm_handle -> send_location) +#define hypre_ParCSRCommHandleRecvLocation(comm_handle) (comm_handle -> recv_location) #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 628232d3c4..63ad710ce7 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -38,15 +38,13 @@ * addresses, e.g. generated using hypre_MPI_Address . *-------------------------------------------------------------------------------------*/ -#if defined(HYPRE_USING_PERSISTENT_COMM) - HYPRE_Int -hypre_ParCSRPersistentCommHandleCreateBuffer( HYPRE_MemoryLocation send_memory_location, - HYPRE_MemoryLocation recv_memory_location, - HYPRE_Int num_send_elems, - HYPRE_Int num_recv_elems, - HYPRE_Int size_of_elem, - hypre_ParCSRCommHandle *comm_handle ) +hypre_ParCSRCommHandleCreateBuffer( HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location, + HYPRE_Int num_send_elems, + HYPRE_Int num_recv_elems, + HYPRE_Int size_of_elem, + hypre_ParCSRCommHandle *comm_handle ) { if (!hypre_ParCSRCommHandleSendData(comm_handle)) { @@ -77,6 +75,8 @@ hypre_ParCSRPersistentCommHandleCreateBuffer( HYPRE_MemoryLocation send_memor return hypre_error_flag; } +#if defined(HYPRE_USING_PERSISTENT_COMM) + /*------------------------------------------------------------------ * hypre_ParCSRPersistentCommHandleCreate * @@ -105,10 +105,10 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, case HYPRE_COMM_PKG_JOB_INT: case HYPRE_COMM_PKG_JOB_BIGINT: { - hypre_ParCSRPersistentCommHandleCreateBuffer(send_memory_location, recv_memory_location, - hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); + hypre_ParCSRCommHandleCreateBuffer(send_memory_location, recv_memory_location, + hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), + hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), + hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), hypre_ParCSRCommHandleRecvBuffer(comm_handle), @@ -133,10 +133,10 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: { - hypre_ParCSRPersistentCommHandleCreateBuffer(send_memory_location, recv_memory_location, - hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); + hypre_ParCSRCommHandleCreateBuffer(send_memory_location, recv_memory_location, + hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), + hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), + hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), hypre_ParCSRCommHandleRecvBuffer(comm_handle), @@ -161,8 +161,11 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, } } - hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; - hypre_ParCSRCommHandleRequests(comm_handle) = requests; + hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; + hypre_ParCSRCommHandleSendLocation(comm_handle) = send_memory_location; + hypre_ParCSRCommHandleRecvLocation(comm_handle) = recv_memory_location; + hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; + hypre_ParCSRCommHandleRequests(comm_handle) = requests; return ( comm_handle ); } @@ -195,6 +198,7 @@ hypre_ParCSRCommHandleDestroyRequests( hypre_ParCSRCommHandle *comm_handle ) if (comm_handle) { HYPRE_Int i; + for (i = 0; i < hypre_ParCSRCommHandleNumRequests(comm_handle); i++) { hypre_MPI_RequestClear(&hypre_ParCSRCommHandleRequest(comm_handle, i)); @@ -214,6 +218,10 @@ hypre_ParCSRPersistentCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) { if (comm_handle) { + hypre_TFree(hypre_ParCSRCommHandleSendData(comm_handle), hypre_ParCSRCommHandleSendLocation(comm_handle)); + hypre_TFree(hypre_ParCSRCommHandleRecvData(comm_handle), hypre_ParCSRCommHandleRecvLocation(comm_handle)); + _hypre_TFree(hypre_ParCSRCommHandleSendBuffer(comm_handle), hypre_HandleMPIHostBufferLocation(hypre_handle())); + _hypre_TFree(hypre_ParCSRCommHandleRecvBuffer(comm_handle), hypre_HandleMPIHostBufferLocation(hypre_handle())); hypre_ParCSRCommHandleDestroyRequests(comm_handle); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); } @@ -352,11 +360,13 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, * set up comm_handle and return *--------------------------------------------------------------------*/ - hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; - hypre_ParCSRCommHandleSendData(comm_handle) = send_data; - hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data; - hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; - hypre_ParCSRCommHandleRequests(comm_handle) = requests; + hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; + hypre_ParCSRCommHandleSendData(comm_handle) = send_data; + hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data; + hypre_ParCSRCommHandleSendLocation(comm_handle) = send_memory_location; + hypre_ParCSRCommHandleRecvLocation(comm_handle) = recv_memory_location; + hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; + hypre_ParCSRCommHandleRequests(comm_handle) = requests; hypre_GpuProfilingPopRange(); diff --git a/src/parcsr_mv/par_csr_communication.h b/src/parcsr_mv/par_csr_communication.h index 60a5167951..3d37f8fa8e 100644 --- a/src/parcsr_mv/par_csr_communication.h +++ b/src/parcsr_mv/par_csr_communication.h @@ -115,11 +115,11 @@ typedef struct struct _hypre_ParCSRCommPkg *comm_pkg; void *send_data; void *recv_data; -#if defined(HYPRE_USING_PERSISTENT_COMM) - /* persistent HOST buffer */ + /* HOST buffers for non-GPU-aware MPIs */ void *send_buffer; void *recv_buffer; -#endif + HYPRE_MemoryLocation send_location; + HYPRE_MemoryLocation recv_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; } hypre_ParCSRCommHandle; @@ -133,6 +133,8 @@ typedef struct #define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) #define hypre_ParCSRCommHandleSendBuffer(comm_handle) (comm_handle -> send_buffer) #define hypre_ParCSRCommHandleRecvBuffer(comm_handle) (comm_handle -> recv_buffer) +#define hypre_ParCSRCommHandleSendLocation(comm_handle) (comm_handle -> send_location) +#define hypre_ParCSRCommHandleRecvLocation(comm_handle) (comm_handle -> recv_location) #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) From b5dbb026d19c8ea46bcccfa5d3cd6480aead5b8c Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Sun, 17 Dec 2023 11:47:46 -0800 Subject: [PATCH 32/90] unifying persistent --- src/parcsr_ls/par_relax.c | 2 +- src/parcsr_mv/_hypre_parcsr_mv.h | 12 +- src/parcsr_mv/par_csr_communication.h | 6 +- src/parcsr_mv/par_csr_matop.c | 2 +- src/parcsr_mv/par_csr_matvec.c | 4 +- src/parcsr_mv/protos.h | 2 +- src/utilities/_hypre_utilities.h | 24 ++-- src/utilities/mpistubs.c | 162 +++++++++++--------------- src/utilities/mpistubs.h | 24 ++-- 9 files changed, 115 insertions(+), 123 deletions(-) diff --git a/src/parcsr_ls/par_relax.c b/src/parcsr_ls/par_relax.c index 7817470bf7..d5bf386203 100644 --- a/src/parcsr_ls/par_relax.c +++ b/src/parcsr_ls/par_relax.c @@ -821,7 +821,7 @@ hypre_BoomerAMGRelaxHybridGaussSeidel_core( hypre_ParCSRMatrix *A, #endif #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleWait(persistent_comm_handle); + hypre_ParCSRCommHandleWait(persistent_comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index b323edd0c0..d332fa88e1 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -130,15 +130,13 @@ typedef struct struct _hypre_ParCSRCommPkg *comm_pkg; void *send_data; void *recv_data; -#if 1 || defined(HYPRE_USING_GPU) - /* HOST buffers for non-GPU-aware MPIs */ + /* send/recv buffers to copy to/from */ void *send_buffer; void *recv_buffer; -#endif -#if defined(HYPRE_USING_PERSISTENT_COMM) HYPRE_MemoryLocation send_location; HYPRE_MemoryLocation recv_location; -#endif + hypre_MemoryLocation send_buffer_location; + hypre_MemoryLocation recv_buffer_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; } hypre_ParCSRCommHandle; @@ -154,6 +152,8 @@ typedef struct #define hypre_ParCSRCommHandleRecvBuffer(comm_handle) (comm_handle -> recv_buffer) #define hypre_ParCSRCommHandleSendLocation(comm_handle) (comm_handle -> send_location) #define hypre_ParCSRCommHandleRecvLocation(comm_handle) (comm_handle -> recv_location) +#define hypre_ParCSRCommHandleSendBufferLocation(comm_handle) (comm_handle -> send_buffer_location) +#define hypre_ParCSRCommHandleRecvBufferLocation(comm_handle) (comm_handle -> recv_buffer_location) #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) @@ -978,6 +978,7 @@ hypre_ParCSRCommHandle *hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, void *send_data_in, HYPRE_MemoryLocation recv_memory_location, void *recv_data_in ); +HYPRE_Int hypre_ParCSRCommHandleWait(hypre_ParCSRCommHandle *comm_handle); HYPRE_Int hypre_ParCSRCommHandleDestroy ( hypre_ParCSRCommHandle *comm_handle ); void hypre_ParCSRCommPkgCreate_core ( MPI_Comm comm, HYPRE_BigInt *col_map_offd, HYPRE_BigInt first_col_diag, HYPRE_BigInt *col_starts, HYPRE_Int num_cols_diag, @@ -1113,7 +1114,6 @@ hypre_ParCSRCommHandle* hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_Int job HYPRE_MemoryLocation recv_memory_location); HYPRE_Int hypre_ParCSRPersistentCommHandleDestroy(hypre_ParCSRCommHandle *comm_handle); HYPRE_Int hypre_ParCSRPersistentCommHandleStart(hypre_ParCSRCommHandle *comm_handle); -HYPRE_Int hypre_ParCSRPersistentCommHandleWait(hypre_ParCSRCommHandle *comm_handle); #endif HYPRE_Int hypre_ParcsrGetExternalRowsInit( hypre_ParCSRMatrix *A, HYPRE_Int indices_len, diff --git a/src/parcsr_mv/par_csr_communication.h b/src/parcsr_mv/par_csr_communication.h index 3d37f8fa8e..ff7f25bf65 100644 --- a/src/parcsr_mv/par_csr_communication.h +++ b/src/parcsr_mv/par_csr_communication.h @@ -115,11 +115,13 @@ typedef struct struct _hypre_ParCSRCommPkg *comm_pkg; void *send_data; void *recv_data; - /* HOST buffers for non-GPU-aware MPIs */ + /* send/recv buffers to copy to/from */ void *send_buffer; void *recv_buffer; HYPRE_MemoryLocation send_location; HYPRE_MemoryLocation recv_location; + hypre_MemoryLocation send_buffer_location; + hypre_MemoryLocation recv_buffer_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; } hypre_ParCSRCommHandle; @@ -135,6 +137,8 @@ typedef struct #define hypre_ParCSRCommHandleRecvBuffer(comm_handle) (comm_handle -> recv_buffer) #define hypre_ParCSRCommHandleSendLocation(comm_handle) (comm_handle -> send_location) #define hypre_ParCSRCommHandleRecvLocation(comm_handle) (comm_handle -> recv_location) +#define hypre_ParCSRCommHandleSendBufferLocation(comm_handle) (comm_handle -> send_buffer_location) +#define hypre_ParCSRCommHandleRecvBufferLocation(comm_handle) (comm_handle -> recv_buffer_location) #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) diff --git a/src/parcsr_mv/par_csr_matop.c b/src/parcsr_mv/par_csr_matop.c index d66b9f0808..d5545425b8 100644 --- a/src/parcsr_mv/par_csr_matop.c +++ b/src/parcsr_mv/par_csr_matop.c @@ -6358,7 +6358,7 @@ hypre_ParCSRMatrixDiagScaleHost( hypre_ParCSRMatrix *par_A, /* Non-blocking communication ends */ #ifdef HYPRE_USING_PERSISTENT_COMM - hypre_ParCSRPersistentCommHandleWait(comm_handle); + hypre_ParCSRCommHandleWait(comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif diff --git a/src/parcsr_mv/par_csr_matvec.c b/src/parcsr_mv/par_csr_matvec.c index 61fb76e8e8..8f0257165b 100644 --- a/src/parcsr_mv/par_csr_matvec.c +++ b/src/parcsr_mv/par_csr_matvec.c @@ -195,7 +195,7 @@ hypre_ParCSRMatrixMatvecOutOfPlaceHost( HYPRE_Complex alpha, /* Non-blocking communication ends */ #ifdef HYPRE_USING_PERSISTENT_COMM - hypre_ParCSRPersistentCommHandleWait(persistent_comm_handle); + hypre_ParCSRCommHandleWait(persistent_comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif @@ -460,7 +460,7 @@ hypre_ParCSRMatrixMatvecTHost( HYPRE_Complex alpha, /* Non-blocking communication ends */ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleWait(persistent_comm_handle); + hypre_ParCSRCommHandleWait(persistent_comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif diff --git a/src/parcsr_mv/protos.h b/src/parcsr_mv/protos.h index 30808f202a..5b317ec8ff 100644 --- a/src/parcsr_mv/protos.h +++ b/src/parcsr_mv/protos.h @@ -241,6 +241,7 @@ hypre_ParCSRCommHandle *hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, void *send_data_in, HYPRE_MemoryLocation recv_memory_location, void *recv_data_in ); +HYPRE_Int hypre_ParCSRCommHandleWait(hypre_ParCSRCommHandle *comm_handle); HYPRE_Int hypre_ParCSRCommHandleDestroy ( hypre_ParCSRCommHandle *comm_handle ); void hypre_ParCSRCommPkgCreate_core ( MPI_Comm comm, HYPRE_BigInt *col_map_offd, HYPRE_BigInt first_col_diag, HYPRE_BigInt *col_starts, HYPRE_Int num_cols_diag, @@ -376,7 +377,6 @@ hypre_ParCSRCommHandle* hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_Int job HYPRE_MemoryLocation recv_memory_location); HYPRE_Int hypre_ParCSRPersistentCommHandleDestroy(hypre_ParCSRCommHandle *comm_handle); HYPRE_Int hypre_ParCSRPersistentCommHandleStart(hypre_ParCSRCommHandle *comm_handle); -HYPRE_Int hypre_ParCSRPersistentCommHandleWait(hypre_ParCSRCommHandle *comm_handle); #endif HYPRE_Int hypre_ParcsrGetExternalRowsInit( hypre_ParCSRMatrix *A, HYPRE_Int indices_len, diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 14c854e0c5..41dcdfd347 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1170,14 +1170,22 @@ typedef HYPRE_Int hypre_MPI_Info; typedef struct { - MPI_Comm mpi_comm; - hypre_MemoryLocation send_location; - hypre_MemoryLocation recv_location; + MPI_Comm mpi_comm; + hypre_MemoryLocation send_location; + hypre_MemoryLocation recv_location; + void *send_copy; + void *recv_copy; + hypre_MemoryLocation send_copy_location; + hypre_MemoryLocation recv_copy_location; } hypre_MPI_Comm; -#define hypre_MPI_CommMPI_Comm(comm) ((comm).mpi_comm) -#define hypre_MPI_CommMPI_SendLocation(comm) ((comm).send_location) -#define hypre_MPI_CommMPI_RecvLocation(comm) ((comm).recv_location) +#define hypre_MPI_CommMPI_Comm(comm) ((comm).mpi_comm) +#define hypre_MPI_CommMPI_SendLocation(comm) ((comm).send_location) +#define hypre_MPI_CommMPI_RecvLocation(comm) ((comm).recv_location) +#define hypre_MPI_CommMPI_SendCopy(comm) ((comm).send_copy) +#define hypre_MPI_CommMPI_RecvCopy(comm) ((comm).recv_copy) +#define hypre_MPI_CommMPI_SendCopyLocation(comm) ((comm).send_copy_location) +#define hypre_MPI_CommMPI_RecvCopyLocation(comm) ((comm).recv_copy_location) typedef MPI_Group hypre_MPI_Group; @@ -1362,9 +1370,9 @@ HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); -HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, void *pbuf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); -HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, void *pbuf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 22a87e1830..17d5c9fec1 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -1308,92 +1308,66 @@ hypre_MPI_Irecv( void *buf, #define TYPE_MACRO_SEND_INIT 2 #define TYPE_MACRO_RECV_INIT 3 -#define P_TYPE_MACRO(MPI_CMD, SEND_RECV, HYPRE_DTYPE, HYPRE_MPI_DTYPE, PBUF) \ -{ \ - if (datatype == HYPRE_MPI_DTYPE) \ - { \ - HYPRE_Int i; \ - HYPRE_DTYPE *_buf = (HYPRE_DTYPE *) buf; \ - HYPRE_Int ntot = displs[num]; \ - HYPRE_Int host_buf = hypre_MPINeedHostBuffer(memory_location); \ - hypre_MemoryLocation host_buf_location = \ - hypre_HandleMPIHostBufferLocation(hypre_handle()); \ - if (host_buf) \ - { \ - if (SEND_RECV == TYPE_MACRO_SEND) \ - { \ - _buf = _hypre_TAlloc(HYPRE_DTYPE, ntot, host_buf_location); \ - hypre_GpuProfilingPushRange("MPI-D2H"); \ - _hypre_TMemcpy(_buf, buf, HYPRE_DTYPE, ntot, \ - host_buf_location, memory_location); \ - hypre_GpuProfilingPopRange(); \ - } \ - else if (SEND_RECV == TYPE_MACRO_RECV) \ - { \ - _buf = _hypre_TAlloc(HYPRE_DTYPE, ntot, host_buf_location); \ - } \ - else if (SEND_RECV == TYPE_MACRO_SEND_INIT || \ - SEND_RECV == TYPE_MACRO_RECV_INIT) \ - { \ - _buf = PBUF; \ - } \ - } \ - for (i = 0; i < num; i++) \ - { \ - HYPRE_Int ip = procs[i]; \ - HYPRE_Int start = displs[i]; \ - HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; \ - MPI_CMD(_buf + start, len, HYPRE_MPI_DTYPE, \ - ip, tag, hypre_MPI_CommMPI_Comm(comm), \ - &hypre_MPI_RequestMPI_Request(requests[i])); \ - } \ - if (num && host_buf) \ - { \ - /* register pre/post action in the first request */ \ - if (SEND_RECV == TYPE_MACRO_SEND) \ - { \ - hypre_MPI_RequestSetActionFree(1, _buf, \ - host_buf_location, \ - &requests[0]); \ - } \ - else if (SEND_RECV == TYPE_MACRO_SEND_INIT) \ - { \ - hypre_MPI_RequestSetActionCopy(0, _buf, \ - host_buf_location, \ - buf, \ - memory_location, \ - ntot * sizeof(HYPRE_DTYPE), \ - &requests[0]); \ - } \ - else if (SEND_RECV == TYPE_MACRO_RECV) \ - { \ - hypre_MPI_RequestSetActionCopy(1, buf, \ - memory_location, \ - _buf, \ - host_buf_location, \ - ntot * sizeof(HYPRE_DTYPE), \ - &requests[0]); \ - \ - hypre_MPI_RequestSetActionFree(1, _buf, \ - host_buf_location, \ - &requests[0]); \ - } \ - else if (SEND_RECV == TYPE_MACRO_RECV_INIT) \ - { \ - hypre_MPI_RequestSetActionCopy(1, buf, \ - memory_location, \ - _buf, \ - host_buf_location, \ - ntot * sizeof(HYPRE_DTYPE), \ - &requests[0]); \ - } \ - } \ - return hypre_error_flag; \ - } \ -} - -#define TYPE_MACRO(MPI_CMD, SEND_RECV, HYPRE_DTYPE, HYPRE_MPI_DTYPE) \ - P_TYPE_MACRO(MPI_CMD, SEND_RECV, HYPRE_DTYPE, HYPRE_MPI_DTYPE, NULL) +#define TYPE_MACRO(MPI_CMD, SEND_RECV, HYPRE_DTYPE, HYPRE_MPI_DTYPE) \ +{ \ + if (datatype == HYPRE_MPI_DTYPE) \ + { \ + if (!num) \ + { \ + return hypre_error_flag; \ + } \ + HYPRE_Int i, ntot = displs[num]; \ + void *cbuf = NULL; \ + if (SEND_RECV == TYPE_MACRO_SEND || SEND_RECV == TYPE_MACRO_SEND_INIT) \ + { \ + cbuf = hypre_MPI_CommMPI_SendCopy(comm); \ + } \ + else if (SEND_RECV == TYPE_MACRO_RECV || SEND_RECV == TYPE_MACRO_RECV_INIT) \ + { \ + cbuf = hypre_MPI_CommMPI_RecvCopy(comm); \ + } \ + HYPRE_DTYPE *_buf = (HYPRE_DTYPE *) (cbuf ? cbuf : buf); \ + if (SEND_RECV == TYPE_MACRO_SEND && _buf != buf) \ + { \ + hypre_GpuProfilingPushRange("MPI-D2H"); \ + _hypre_TMemcpy(_buf, buf, HYPRE_DTYPE, ntot, \ + hypre_MPI_CommMPI_SendCopyLocation(comm), memory_location); \ + hypre_GpuProfilingPopRange(); \ + } \ + for (i = 0; i < num; i++) \ + { \ + HYPRE_Int ip = procs[i]; \ + HYPRE_Int start = displs[i]; \ + HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; \ + MPI_CMD(_buf + start, len, HYPRE_MPI_DTYPE, \ + ip, tag, hypre_MPI_CommMPI_Comm(comm), \ + &hypre_MPI_RequestMPI_Request(requests[i])); \ + } \ + if (_buf != buf) \ + { \ + /* register pre/post action in the first request */ \ + if (SEND_RECV == TYPE_MACRO_SEND_INIT) \ + { \ + hypre_MPI_RequestSetActionCopy(0, _buf, \ + hypre_MPI_CommMPI_SendCopyLocation(comm), \ + buf, \ + memory_location, \ + ntot * sizeof(HYPRE_DTYPE), \ + &requests[0]); \ + } \ + else if (SEND_RECV == TYPE_MACRO_RECV || SEND_RECV == TYPE_MACRO_RECV_INIT) \ + { \ + hypre_MPI_RequestSetActionCopy(1, buf, \ + memory_location, \ + _buf, \ + hypre_MPI_CommMPI_RecvCopyLocation(comm), \ + ntot * sizeof(HYPRE_DTYPE), \ + &requests[0]); \ + } \ + } \ + return hypre_error_flag; \ + } \ +} HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, @@ -1452,7 +1426,6 @@ hypre_MPI_Send_init( void *buf, HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, - void *pbuf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, @@ -1464,9 +1437,9 @@ hypre_MPI_Send_init_Multiple( void *buf, { hypre_MemoryLocation memory_location = hypre_MPI_CommMPI_SendLocation(comm); - P_TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_Complex, HYPRE_MPI_COMPLEX, pbuf); - P_TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_Int, HYPRE_MPI_INT, pbuf); - P_TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_BigInt, HYPRE_MPI_BIG_INT, pbuf); + TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_Complex, HYPRE_MPI_COMPLEX); + TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_Int, HYPRE_MPI_INT); + TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_BigInt, HYPRE_MPI_BIG_INT); return hypre_error_flag; } @@ -1488,7 +1461,6 @@ hypre_MPI_Recv_init( void *buf, HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, - void *pbuf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, @@ -1500,9 +1472,9 @@ hypre_MPI_Recv_init_Multiple( void *buf, { hypre_MemoryLocation memory_location = hypre_MPI_CommMPI_RecvLocation(comm); - P_TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV_INIT, HYPRE_Complex, HYPRE_MPI_COMPLEX, pbuf); - P_TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV_INIT, HYPRE_Int, HYPRE_MPI_INT, pbuf); - P_TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV_INIT, HYPRE_BigInt, HYPRE_MPI_BIG_INT, pbuf); + TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV_INIT, HYPRE_Complex, HYPRE_MPI_COMPLEX); + TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV_INIT, HYPRE_Int, HYPRE_MPI_INT); + TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV_INIT, HYPRE_BigInt, HYPRE_MPI_BIG_INT); return hypre_error_flag; } @@ -1851,7 +1823,7 @@ hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location) memory_location != hypre_MEMORY_HOST && memory_location != hypre_MEMORY_HOST_PINNED; #else - /* RL: can return 1 for debugging purpose */ + /* RL: return 1 for debugging purpose */ return 1; #endif } diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index ab309cea89..6055965d41 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -195,14 +195,22 @@ typedef HYPRE_Int hypre_MPI_Info; typedef struct { - MPI_Comm mpi_comm; - hypre_MemoryLocation send_location; - hypre_MemoryLocation recv_location; + MPI_Comm mpi_comm; + hypre_MemoryLocation send_location; + hypre_MemoryLocation recv_location; + void *send_copy; + void *recv_copy; + hypre_MemoryLocation send_copy_location; + hypre_MemoryLocation recv_copy_location; } hypre_MPI_Comm; -#define hypre_MPI_CommMPI_Comm(comm) ((comm).mpi_comm) -#define hypre_MPI_CommMPI_SendLocation(comm) ((comm).send_location) -#define hypre_MPI_CommMPI_RecvLocation(comm) ((comm).recv_location) +#define hypre_MPI_CommMPI_Comm(comm) ((comm).mpi_comm) +#define hypre_MPI_CommMPI_SendLocation(comm) ((comm).send_location) +#define hypre_MPI_CommMPI_RecvLocation(comm) ((comm).recv_location) +#define hypre_MPI_CommMPI_SendCopy(comm) ((comm).send_copy) +#define hypre_MPI_CommMPI_RecvCopy(comm) ((comm).recv_copy) +#define hypre_MPI_CommMPI_SendCopyLocation(comm) ((comm).send_copy_location) +#define hypre_MPI_CommMPI_RecvCopyLocation(comm) ((comm).recv_copy_location) typedef MPI_Group hypre_MPI_Group; @@ -387,9 +395,9 @@ HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); -HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, void *pbuf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); -HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, void *pbuf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) From d7c2188cd5be73705c82610021d1233ca16d6100 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Sun, 17 Dec 2023 12:14:47 -0800 Subject: [PATCH 33/90] refactoring parcsr comm persistent --- src/parcsr_mv/par_csr_communication.c | 145 +++++++++++++++----------- 1 file changed, 83 insertions(+), 62 deletions(-) diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 63ad710ce7..4ac47cd1d8 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -46,6 +46,7 @@ hypre_ParCSRCommHandleCreateBuffer( HYPRE_MemoryLocation send_memory_location HYPRE_Int size_of_elem, hypre_ParCSRCommHandle *comm_handle ) { +#if defined(HYPRE_USING_PERSISTENT_COMM) if (!hypre_ParCSRCommHandleSendData(comm_handle)) { hypre_ParCSRCommHandleSendData(comm_handle) = hypre_TAlloc(char, num_send_elems * size_of_elem, @@ -57,19 +58,39 @@ hypre_ParCSRCommHandleCreateBuffer( HYPRE_MemoryLocation send_memory_location hypre_ParCSRCommHandleRecvData(comm_handle) = hypre_TAlloc(char, num_recv_elems * size_of_elem, recv_memory_location); } +#endif if (!hypre_ParCSRCommHandleSendBuffer(comm_handle) && hypre_MPINeedHostBuffer(hypre_GetActualMemLocation(send_memory_location))) { - hypre_ParCSRCommHandleSendBuffer(comm_handle) = _hypre_TAlloc(char, num_send_elems * size_of_elem, - hypre_HandleMPIHostBufferLocation(hypre_handle())); + hypre_MemoryLocation location = hypre_HandleMPIHostBufferLocation(hypre_handle()); + hypre_ParCSRCommHandleSendBufferLocation(comm_handle) = location; + hypre_ParCSRCommHandleSendBuffer(comm_handle) = _hypre_TAlloc(char, num_send_elems * size_of_elem, location); } if (!hypre_ParCSRCommHandleRecvBuffer(comm_handle) && hypre_MPINeedHostBuffer(hypre_GetActualMemLocation(recv_memory_location))) { - hypre_ParCSRCommHandleRecvBuffer(comm_handle) = _hypre_TAlloc(char, num_recv_elems * size_of_elem, - hypre_HandleMPIHostBufferLocation(hypre_handle())); + hypre_MemoryLocation location = hypre_HandleMPIHostBufferLocation(hypre_handle()); + hypre_ParCSRCommHandleRecvBufferLocation(comm_handle) = location; + hypre_ParCSRCommHandleRecvBuffer(comm_handle) = _hypre_TAlloc(char, num_recv_elems * size_of_elem, location); + } + + return hypre_error_flag; +} + +HYPRE_Int +hypre_ParCSRCommHandleDestroyRequests( hypre_ParCSRCommHandle *comm_handle ) +{ + if (comm_handle) + { + HYPRE_Int i; + + for (i = 0; i < hypre_ParCSRCommHandleNumRequests(comm_handle); i++) + { + hypre_MPI_RequestClear(&hypre_ParCSRCommHandleRequest(comm_handle, i)); + } + hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); } return hypre_error_flag; @@ -111,7 +132,6 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), - hypre_ParCSRCommHandleRecvBuffer(comm_handle), num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, @@ -119,7 +139,6 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, 0, hcomm, requests); hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), - hypre_ParCSRCommHandleSendBuffer(comm_handle), num_sends, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, @@ -139,7 +158,6 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), - hypre_ParCSRCommHandleRecvBuffer(comm_handle), num_sends, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, @@ -147,7 +165,6 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, 0, hcomm, requests); hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), - hypre_ParCSRCommHandleSendBuffer(comm_handle), num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, @@ -192,23 +209,6 @@ hypre_ParCSRCommPkgGetPersistentCommHandle( HYPRE_Int job, return hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg, type); } -HYPRE_Int -hypre_ParCSRCommHandleDestroyRequests( hypre_ParCSRCommHandle *comm_handle ) -{ - if (comm_handle) - { - HYPRE_Int i; - - for (i = 0; i < hypre_ParCSRCommHandleNumRequests(comm_handle); i++) - { - hypre_MPI_RequestClear(&hypre_ParCSRCommHandleRequest(comm_handle, i)); - } - hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); - } - - return hypre_error_flag; -} - /*------------------------------------------------------------------ * hypre_ParCSRPersistentCommHandleDestroy *------------------------------------------------------------------*/ @@ -216,16 +216,18 @@ hypre_ParCSRCommHandleDestroyRequests( hypre_ParCSRCommHandle *comm_handle ) HYPRE_Int hypre_ParCSRPersistentCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) { - if (comm_handle) + if (!comm_handle) { - hypre_TFree(hypre_ParCSRCommHandleSendData(comm_handle), hypre_ParCSRCommHandleSendLocation(comm_handle)); - hypre_TFree(hypre_ParCSRCommHandleRecvData(comm_handle), hypre_ParCSRCommHandleRecvLocation(comm_handle)); - _hypre_TFree(hypre_ParCSRCommHandleSendBuffer(comm_handle), hypre_HandleMPIHostBufferLocation(hypre_handle())); - _hypre_TFree(hypre_ParCSRCommHandleRecvBuffer(comm_handle), hypre_HandleMPIHostBufferLocation(hypre_handle())); - hypre_ParCSRCommHandleDestroyRequests(comm_handle); - hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); + return hypre_error_flag; } + hypre_TFree(hypre_ParCSRCommHandleSendData(comm_handle), hypre_ParCSRCommHandleSendLocation(comm_handle)); + hypre_TFree(hypre_ParCSRCommHandleRecvData(comm_handle), hypre_ParCSRCommHandleRecvLocation(comm_handle)); + _hypre_TFree(hypre_ParCSRCommHandleSendBuffer(comm_handle), hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); + _hypre_TFree(hypre_ParCSRCommHandleRecvBuffer(comm_handle), hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); + hypre_ParCSRCommHandleDestroyRequests(comm_handle); + hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); + return hypre_error_flag; } @@ -250,27 +252,6 @@ hypre_ParCSRPersistentCommHandleStart( hypre_ParCSRCommHandle *comm_handle ) return hypre_error_flag; } -/*------------------------------------------------------------------ - * hypre_ParCSRPersistentCommHandleWait - *------------------------------------------------------------------*/ - -HYPRE_Int -hypre_ParCSRPersistentCommHandleWait( hypre_ParCSRCommHandle *comm_handle ) -{ - if (hypre_ParCSRCommHandleNumRequests(comm_handle) > 0) - { - HYPRE_Int ret = hypre_MPI_Waitall(hypre_ParCSRCommHandleNumRequests(comm_handle), - hypre_ParCSRCommHandleRequests(comm_handle), - hypre_MPI_STATUSES_IGNORE); - if (hypre_MPI_SUCCESS != ret) - { - hypre_error_w_msg(HYPRE_ERROR_GENERIC, "MPI error\n"); - /*hypre_printf("MPI error %d in %s (%s, line %u)\n", ret, __FUNCTION__, __FILE__, __LINE__);*/ - } - } - - return hypre_error_flag; -} #endif // HYPRE_USING_PERSISTENT_COMM /*------------------------------------------------------------------ @@ -319,6 +300,16 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, case HYPRE_COMM_PKG_JOB_INT: case HYPRE_COMM_PKG_JOB_BIGINT: { + hypre_ParCSRCommHandleCreateBuffer(send_memory_location, recv_memory_location, + hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), + hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), + hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); + + hypre_MPI_CommMPI_SendCopy(hcomm) = hypre_ParCSRCommHandleSendBuffer(comm_handle); + hypre_MPI_CommMPI_RecvCopy(hcomm) = hypre_ParCSRCommHandleRecvBuffer(comm_handle); + hypre_MPI_CommMPI_SendCopyLocation(hcomm) = hypre_ParCSRCommHandleSendBufferLocation(comm_handle); + hypre_MPI_CommMPI_RecvCopyLocation(hcomm) = hypre_ParCSRCommHandleRecvBufferLocation(comm_handle); + hypre_MPI_Irecv_Multiple(recv_data, num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, @@ -338,6 +329,16 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: { + hypre_ParCSRCommHandleCreateBuffer(send_memory_location, recv_memory_location, + hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), + hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), + hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); + + hypre_MPI_CommMPI_SendCopy(hcomm) = hypre_ParCSRCommHandleSendBuffer(comm_handle); + hypre_MPI_CommMPI_RecvCopy(hcomm) = hypre_ParCSRCommHandleRecvBuffer(comm_handle); + hypre_MPI_CommMPI_SendCopyLocation(hcomm) = hypre_ParCSRCommHandleSendBufferLocation(comm_handle); + hypre_MPI_CommMPI_RecvCopyLocation(hcomm) = hypre_ParCSRCommHandleRecvBufferLocation(comm_handle); + hypre_MPI_Irecv_Multiple(recv_data, num_sends, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, @@ -374,29 +375,49 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, } /*------------------------------------------------------------------ - * hypre_ParCSRCommHandleDestroy + * hypre_ParCSRCommHandleWait *------------------------------------------------------------------*/ HYPRE_Int -hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) +hypre_ParCSRCommHandleWait( hypre_ParCSRCommHandle *comm_handle ) { if ( comm_handle == NULL ) { return hypre_error_flag; } + if (hypre_ParCSRCommHandleNumRequests(comm_handle)) + { + HYPRE_Int ret = hypre_MPI_Waitall(hypre_ParCSRCommHandleNumRequests(comm_handle), + hypre_ParCSRCommHandleRequests(comm_handle), + hypre_MPI_STATUSES_IGNORE); + if (hypre_MPI_SUCCESS != ret) + { + char errmsg[256]; + hypre_sprintf(errmsg, "MPI error %d in %s (%s, line %u)\n", ret, __FUNCTION__, __FILE__, __LINE__); + hypre_error_w_msg(HYPRE_ERROR_GENERIC, errmsg); + } + } + + return hypre_error_flag; +} + +/*------------------------------------------------------------------ + * hypre_ParCSRCommHandleDestroy + *------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) +{ hypre_GpuProfilingPushRange("hypre_ParCSRCommHandleDestroy"); - if (hypre_ParCSRCommHandleNumRequests(comm_handle)) + if (!comm_handle) { - hypre_MPI_Status *status0; - status0 = hypre_CTAlloc(hypre_MPI_Status, - hypre_ParCSRCommHandleNumRequests(comm_handle), HYPRE_MEMORY_HOST); - hypre_MPI_Waitall(hypre_ParCSRCommHandleNumRequests(comm_handle), - hypre_ParCSRCommHandleRequests(comm_handle), status0); - hypre_TFree(status0, HYPRE_MEMORY_HOST); + return hypre_error_flag; } + hypre_ParCSRCommHandleWait(comm_handle); + hypre_ParCSRCommHandleDestroyRequests(comm_handle); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); From 9e5d0c9c2456b65e1e6f5d01d7363c6329ba8636 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Mon, 18 Dec 2023 13:27:01 -0800 Subject: [PATCH 34/90] fix memory leaks --- src/parcsr_mv/par_csr_communication.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 4ac47cd1d8..2cc11175df 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -418,6 +418,9 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) hypre_ParCSRCommHandleWait(comm_handle); + _hypre_TFree(hypre_ParCSRCommHandleSendBuffer(comm_handle), hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); + _hypre_TFree(hypre_ParCSRCommHandleRecvBuffer(comm_handle), hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); + hypre_ParCSRCommHandleDestroyRequests(comm_handle); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); From df37bf91dd63fe1fed1e9ee8fc9278aa74529238 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Fri, 21 Jun 2024 19:04:02 -0700 Subject: [PATCH 35/90] revert hypre_MPI_comm struct --- src/IJ_mv/HYPRE_IJMatrix.c | 5 +- src/IJ_mv/HYPRE_IJVector.c | 5 +- src/IJ_mv/IJMatrix_parcsr.c | 5 +- src/IJ_mv/IJVector_parcsr.c | 3 +- src/distributed_ls/Euclid/Euclid_dh.c | 7 +- src/distributed_ls/Euclid/ExternalRows_dh.c | 40 +- src/distributed_ls/Euclid/Factor_dh.c | 25 +- src/distributed_ls/Euclid/Mat_dh.c | 27 +- src/distributed_ls/Euclid/SubdomainGraph_dh.c | 36 +- src/distributed_ls/Euclid/TimeLog_dh.c | 5 +- src/distributed_ls/Euclid/blas_dh.c | 6 +- src/distributed_ls/Euclid/mat_dh_private.c | 31 +- src/distributed_ls/ParaSails/ConjGrad.c | 3 +- src/distributed_ls/ParaSails/DiagScale.c | 12 +- src/distributed_ls/ParaSails/FGmres.c | 3 +- src/distributed_ls/ParaSails/LoadBal.c | 19 +- src/distributed_ls/ParaSails/Matrix.c | 37 +- src/distributed_ls/ParaSails/ParaSails.c | 57 +-- .../HYPRE_DistributedMatrixPilutSolver.c | 5 +- src/distributed_ls/pilut/comm.c | 19 +- src/distributed_ls/pilut/parilut.c | 16 +- src/distributed_ls/pilut/serilut.c | 3 +- src/distributed_ls/pilut/trifactor.c | 24 +- src/parcsr_block_mv/par_csr_block_comm.c | 9 +- src/parcsr_block_mv/par_csr_block_interp.c | 15 +- src/parcsr_block_mv/par_csr_block_matrix.c | 6 +- .../par_csr_block_rap_communication.c | 9 +- src/parcsr_ls/amg_hybrid.c | 3 +- src/parcsr_ls/ams.c | 8 +- src/parcsr_ls/gen_redcs_mat.c | 47 +- src/parcsr_ls/par_2s_interp.c | 10 +- src/parcsr_ls/par_amg_setup.c | 5 +- src/parcsr_ls/par_amgdd_helpers.c | 40 +- src/parcsr_ls/par_amgdd_setup.c | 13 +- src/parcsr_ls/par_amgdd_solve.c | 5 +- src/parcsr_ls/par_cgc_coarsen.c | 15 +- src/parcsr_ls/par_coarse_parms.c | 3 +- src/parcsr_ls/par_coarsen.c | 6 +- src/parcsr_ls/par_cr.c | 11 +- src/parcsr_ls/par_gauss_elim.c | 8 +- src/parcsr_ls/par_gsmg.c | 3 +- src/parcsr_ls/par_ilu.c | 6 +- src/parcsr_ls/par_ilu_setup.c | 70 ++- src/parcsr_ls/par_interp.c | 15 +- src/parcsr_ls/par_lr_interp.c | 18 +- src/parcsr_ls/par_lr_restr.c | 6 +- src/parcsr_ls/par_mgr.c | 32 +- src/parcsr_ls/par_mgr_coarsen.c | 3 +- src/parcsr_ls/par_mgr_setup.c | 3 +- src/parcsr_ls/par_mod_lr_interp.c | 9 +- src/parcsr_ls/par_mod_multi_interp.c | 21 +- src/parcsr_ls/par_multi_interp.c | 7 +- src/parcsr_ls/par_rap_communication.c | 18 +- src/parcsr_ls/par_relax.c | 10 +- src/parcsr_ls/par_relax_more.c | 3 +- src/parcsr_ls/par_restr.c | 3 +- src/parcsr_ls/par_scaled_matnorm.c | 3 +- src/parcsr_ls/par_stats.c | 5 +- src/parcsr_ls/par_strength.c | 8 +- src/parcsr_ls/par_sv_interp.c | 3 +- src/parcsr_ls/par_sv_interp_ln.c | 3 +- src/parcsr_ls/partial.c | 15 +- src/parcsr_mv/HYPRE_parcsr_matrix.c | 16 +- src/parcsr_mv/communicationT.c | 9 +- src/parcsr_mv/gen_fffc.c | 39 +- src/parcsr_mv/par_csr_assumed_part.c | 5 +- src/parcsr_mv/par_csr_bool_matrix.c | 18 +- src/parcsr_mv/par_csr_communication.c | 47 +- src/parcsr_mv/par_csr_matop.c | 443 ++++++++++++++++-- src/parcsr_mv/par_csr_matrix.c | 54 +-- src/parcsr_mv/par_csr_matrix_stats.c | 7 +- src/parcsr_mv/par_vector.c | 27 +- src/parcsr_mv/par_vector_batched.c | 6 +- src/sstruct_ls/maxwell_TV_setup.c | 3 +- src/sstruct_ls/sstruct_sharedDOFComm.c | 9 +- src/sstruct_mv/HYPRE_sstruct_graph.c | 4 +- src/sstruct_mv/sstruct_grid.c | 5 +- src/struct_ls/pfmg_setup.c | 5 +- src/struct_mv/assumed_part.c | 6 +- src/struct_mv/box_manager.c | 16 +- src/struct_mv/struct_communication.c | 5 +- src/struct_mv/struct_grid.c | 10 +- src/struct_mv/struct_innerprod.c | 3 +- src/test/ij.c | 10 +- src/test/sstruct.c | 4 +- src/utilities/_hypre_utilities.h | 34 +- src/utilities/exchange_data.c | 26 +- src/utilities/mpistubs.c | 94 ++-- src/utilities/mpistubs.h | 34 +- src/utilities/timing.c | 12 +- 90 files changed, 976 insertions(+), 845 deletions(-) diff --git a/src/IJ_mv/HYPRE_IJMatrix.c b/src/IJ_mv/HYPRE_IJMatrix.c index a38b815ef7..21e4c8c598 100644 --- a/src/IJ_mv/HYPRE_IJMatrix.c +++ b/src/IJ_mv/HYPRE_IJMatrix.c @@ -48,7 +48,6 @@ HYPRE_IJMatrixCreate( MPI_Comm comm, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &myid); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (ilower > iupper + 1 || ilower < 0) { @@ -92,7 +91,7 @@ HYPRE_IJMatrixCreate( MPI_Comm comm, info[0] = ilower; info[1] = jlower; } - hypre_MPI_Bcast(info, 2, HYPRE_MPI_BIG_INT, 0, hcomm); + hypre_MPI_Bcast(info, 2, HYPRE_MPI_BIG_INT, 0, comm); row0 = info[0]; col0 = info[1]; @@ -102,7 +101,7 @@ HYPRE_IJMatrixCreate( MPI_Comm comm, info[0] = iupper; info[1] = jupper; } - hypre_MPI_Bcast(info, 2, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(info, 2, HYPRE_MPI_BIG_INT, num_procs - 1, comm); rowN = info[0]; colN = info[1]; diff --git a/src/IJ_mv/HYPRE_IJVector.c b/src/IJ_mv/HYPRE_IJVector.c index 58145c9efe..c65cac9fe0 100644 --- a/src/IJ_mv/HYPRE_IJVector.c +++ b/src/IJ_mv/HYPRE_IJVector.c @@ -39,7 +39,6 @@ HYPRE_IJVectorCreate( MPI_Comm comm, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (jlower > jupper + 1 || jlower < 0) { @@ -61,13 +60,13 @@ HYPRE_IJVectorCreate( MPI_Comm comm, { row0 = jlower; } - hypre_MPI_Bcast(&row0, 1, HYPRE_MPI_BIG_INT, 0, hcomm); + hypre_MPI_Bcast(&row0, 1, HYPRE_MPI_BIG_INT, 0, comm); /* proc (num_procs-1) has the last row */ if (my_id == (num_procs - 1)) { rowN = jupper; } - hypre_MPI_Bcast(&rowN, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&rowN, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); hypre_IJVectorGlobalFirstRow(vec) = row0; hypre_IJVectorGlobalNumRows(vec) = rowN - row0 + 1; diff --git a/src/IJ_mv/IJMatrix_parcsr.c b/src/IJ_mv/IJMatrix_parcsr.c index f4935eebb8..61b999b94c 100644 --- a/src/IJ_mv/IJMatrix_parcsr.c +++ b/src/IJ_mv/IJMatrix_parcsr.c @@ -2562,7 +2562,6 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) //HYPRE_Int row_len; HYPRE_Int max_num_threads; HYPRE_Int aux_flag, aux_flag_global; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_ANNOTATE_FUNC_BEGIN; @@ -2576,7 +2575,7 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) { aux_flag = 1; } - hypre_MPI_Allreduce(&aux_flag, &aux_flag_global, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&aux_flag, &aux_flag_global, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); if (aux_flag_global && (!aux_flag)) { hypre_MPI_Comm_rank(comm, &my_id); @@ -2634,7 +2633,7 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) }*/ off_proc_i_indx = hypre_AuxParCSRMatrixOffProcIIndx(aux_matrix); hypre_MPI_Allreduce(&off_proc_i_indx, &offd_proc_elmts, 1, HYPRE_MPI_INT, - hypre_MPI_SUM, hcomm); + hypre_MPI_SUM, comm); if (offd_proc_elmts) { max_off_proc_elmts = hypre_AuxParCSRMatrixMaxOffProcElmts(aux_matrix); diff --git a/src/IJ_mv/IJVector_parcsr.c b/src/IJ_mv/IJVector_parcsr.c index 876223e113..80cf583dad 100644 --- a/src/IJ_mv/IJVector_parcsr.c +++ b/src/IJ_mv/IJVector_parcsr.c @@ -607,7 +607,6 @@ hypre_IJVectorAssemblePar(hypre_IJVector *vector) hypre_AuxParVector *aux_vector = (hypre_AuxParVector*) hypre_IJVectorTranslator(vector); MPI_Comm comm = hypre_IJVectorComm(vector); HYPRE_Int print_level = hypre_IJVectorPrintLevel(vector); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (!par_vector) { @@ -628,7 +627,7 @@ hypre_IJVectorAssemblePar(hypre_IJVector *vector) HYPRE_Complex *off_proc_data; current_num_elmts = hypre_AuxParVectorCurrentOffProcElmts(aux_vector); hypre_MPI_Allreduce(¤t_num_elmts, &off_proc_elmts, 1, HYPRE_MPI_INT, - hypre_MPI_SUM, hcomm); + hypre_MPI_SUM, comm); if (off_proc_elmts) { max_off_proc_elmts = hypre_AuxParVectorMaxOffProcElmts(aux_vector); diff --git a/src/distributed_ls/Euclid/Euclid_dh.c b/src/distributed_ls/Euclid/Euclid_dh.c index 4b25e929c6..1c16c8d03b 100644 --- a/src/distributed_ls/Euclid/Euclid_dh.c +++ b/src/distributed_ls/Euclid/Euclid_dh.c @@ -407,8 +407,7 @@ void compute_rho_private(Euclid_dh ctx) bufGlobal[1] = bufLocal[1]; bufGlobal[2] = bufLocal[2]; } else { - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - hypre_MPI_Reduce(bufLocal, bufGlobal, 3, hypre_MPI_REAL, hypre_MPI_SUM, 0, hcomm); + hypre_MPI_Reduce(bufLocal, bufGlobal, 3, hypre_MPI_REAL, hypre_MPI_SUM, 0, comm_dh); } if (myid_dh == 0) { @@ -886,9 +885,7 @@ void reduce_timings_private(Euclid_dh ctx) HYPRE_Real bufOUT[TIMING_BINS]; hypre_TMemcpy(bufOUT, ctx->timing, HYPRE_Real, TIMING_BINS, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - hypre_MPI_Reduce(bufOUT, ctx->timing, TIMING_BINS, hypre_MPI_REAL, hypre_MPI_MAX, 0, hcomm); + hypre_MPI_Reduce(bufOUT, ctx->timing, TIMING_BINS, hypre_MPI_REAL, hypre_MPI_MAX, 0, comm_dh); } ctx->timingsWereReduced = true; diff --git a/src/distributed_ls/Euclid/ExternalRows_dh.c b/src/distributed_ls/Euclid/ExternalRows_dh.c index f9943bf774..8e6d558186 100644 --- a/src/distributed_ls/Euclid/ExternalRows_dh.c +++ b/src/distributed_ls/Euclid/ExternalRows_dh.c @@ -187,14 +187,12 @@ void rcv_ext_storage_private(ExternalRows_dh er) if (logFile != NULL && er->debug) debug = true; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - /* get number of rows, and total nonzeros, that each lo-nabor will send */ for (i=0; ireq1+i); - hypre_MPI_Irecv(rcv_nz_counts+i, 1, HYPRE_MPI_INT, nabor, NZ_CT_TAG, hcomm, er->req2+i); - } + hypre_MPI_Irecv(rcv_row_counts+i, 1, HYPRE_MPI_INT, nabor, ROW_CT_TAG, comm_dh, er->req1+i); + hypre_MPI_Irecv(rcv_nz_counts+i, 1, HYPRE_MPI_INT, nabor, NZ_CT_TAG, comm_dh, er->req2+i); + } hypre_MPI_Waitall(loCount, er->req1, er->status); hypre_MPI_Waitall(loCount, er->req2, er->status); @@ -211,8 +209,8 @@ void rcv_ext_storage_private(ExternalRows_dh er) HYPRE_Int nabor = loNabors[i]; lengths[i] = (HYPRE_Int*)MALLOC_DH(nz*sizeof(HYPRE_Int)); CHECK_V_ERROR; numbers[i] = (HYPRE_Int*)MALLOC_DH(nz*sizeof(HYPRE_Int)); CHECK_V_ERROR; - hypre_MPI_Irecv(lengths[i], nz, HYPRE_MPI_INT, nabor, ROW_LENGTH_TAG, hcomm, er->req1+i); - hypre_MPI_Irecv(numbers[i], nz, HYPRE_MPI_INT, nabor, ROW_NUMBER_TAG, hcomm, er->req2+i); + hypre_MPI_Irecv(lengths[i], nz, HYPRE_MPI_INT, nabor, ROW_LENGTH_TAG, comm_dh, er->req1+i); + hypre_MPI_Irecv(numbers[i], nz, HYPRE_MPI_INT, nabor, ROW_NUMBER_TAG, comm_dh, er->req2+i); } hypre_MPI_Waitall(loCount, er->req1, er->status); hypre_MPI_Waitall(loCount, er->req2, er->status); @@ -307,16 +305,14 @@ void rcv_external_rows_private(ExternalRows_dh er) HYPRE_Int *extRowCval = er->cvalExt, *extRowFill = er->fillExt; HYPRE_Real *extRowAval = er->avalExt; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - /* start receives of external rows */ nz = 0; for (i=0; ireq1+i); - hypre_MPI_Irecv(extRowFill+offset, nz, HYPRE_MPI_INT, nabor, FILL_TAG, hcomm, er->req2+i); - hypre_MPI_Irecv(extRowAval+offset, nz, hypre_MPI_REAL, nabor, AVAL_TAG, hcomm, er->req3+i); + hypre_MPI_Irecv(extRowCval+offset, nz, HYPRE_MPI_INT, nabor, CVAL_TAG, comm_dh, er->req1+i); + hypre_MPI_Irecv(extRowFill+offset, nz, HYPRE_MPI_INT, nabor, FILL_TAG, comm_dh, er->req2+i); + hypre_MPI_Irecv(extRowAval+offset, nz, hypre_MPI_REAL, nabor, AVAL_TAG, comm_dh, er->req3+i); offset += nz; } @@ -447,14 +443,12 @@ void send_ext_storage_private(ExternalRows_dh er) hypre_fprintf(logFile, "EXR send_ext_storage_private:: nz Count = %i\n", nz); } - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - /* send number of rows, and total nonzeros, to higher ordered nabors */ for (i=0; ireq1+i); - hypre_MPI_Isend(&nz, 1, HYPRE_MPI_INT, nabor, NZ_CT_TAG, hcomm, er->req2+i); - } + hypre_MPI_Isend(&rowCount, 1, HYPRE_MPI_INT, nabor, ROW_CT_TAG, comm_dh, er->req1+i); + hypre_MPI_Isend(&nz, 1, HYPRE_MPI_INT, nabor, NZ_CT_TAG, comm_dh, er->req2+i); + } /* set up array for global row numbers */ for (i=0, j=first_bdry; jreq3+i); - hypre_MPI_Isend(nzCounts, rowCount, HYPRE_MPI_INT, nabor, ROW_LENGTH_TAG, hcomm, er->req4+i); + hypre_MPI_Isend(nzNumbers, rowCount, HYPRE_MPI_INT, nabor, ROW_NUMBER_TAG, comm_dh, er->req3+i); + hypre_MPI_Isend(nzCounts, rowCount, HYPRE_MPI_INT, nabor, ROW_LENGTH_TAG, comm_dh, er->req4+i); } END_FUNC_DH @@ -533,14 +527,12 @@ void send_external_rows_private(ExternalRows_dh er) } } - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - /* start sends to higher-ordred nabors */ for (i=0; icval_req+i); - hypre_MPI_Isend(fillSend, nz, HYPRE_MPI_INT, nabor, FILL_TAG, hcomm, er->fill_req+i); - hypre_MPI_Isend(avalSend, nz, hypre_MPI_REAL, nabor, AVAL_TAG, hcomm, er->aval_req+i); + hypre_MPI_Isend(cvalSend, nz, HYPRE_MPI_INT, nabor, CVAL_TAG, comm_dh, er->cval_req+i); + hypre_MPI_Isend(fillSend, nz, HYPRE_MPI_INT, nabor, FILL_TAG, comm_dh, er->fill_req+i); + hypre_MPI_Isend(avalSend, nz, hypre_MPI_REAL, nabor, AVAL_TAG, comm_dh, er->aval_req+i); } END_FUNC_DH } diff --git a/src/distributed_ls/Euclid/Factor_dh.c b/src/distributed_ls/Euclid/Factor_dh.c index c789f80e75..c520662f92 100644 --- a/src/distributed_ls/Euclid/Factor_dh.c +++ b/src/distributed_ls/Euclid/Factor_dh.c @@ -158,8 +158,7 @@ HYPRE_Int Factor_dhReadNz(Factor_dh mat) START_FUNC_DH HYPRE_Int ierr, retval = mat->rp[mat->m]; HYPRE_Int nz = retval; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - ierr = hypre_MPI_Allreduce(&nz, &retval, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); CHECK_MPI_ERROR(ierr); + ierr = hypre_MPI_Allreduce(&nz, &retval, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm_dh); CHECK_MPI_ERROR(ierr); END_FUNC_VAL(retval) } @@ -372,13 +371,12 @@ static HYPRE_Int setup_receives_private(Factor_dh mat, HYPRE_Int *beg_rows, HYPR receive; this matching receive will be started later, in setup_sends_private. */ - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - hypre_MPI_Isend(reqind+i, j-i, HYPRE_MPI_INT, this_pe, 444, hcomm, &request); + hypre_MPI_Isend(reqind+i, j-i, HYPRE_MPI_INT, this_pe, 444, comm_dh, &request); hypre_MPI_Request_free(&request); /* set up persistent comms for receiving the values from this_pe */ hypre_MPI_Recv_init(recvBuf+i, j-i, hypre_MPI_REAL, this_pe, 555, - hcomm, req+num_recv); + comm_dh, req+num_recv); ++num_recv; } @@ -404,7 +402,6 @@ static void setup_sends_private(Factor_dh mat, HYPRE_Int *inlist, HYPRE_Real *sendBuf; HYPRE_Int myidNEW = o2n_subdomain[myid_dh]; HYPRE_Int count; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); if (debug) { hypre_fprintf(logFile, "FACT \nSTARTING: setup_sends_private\n"); @@ -452,11 +449,11 @@ static void setup_sends_private(Factor_dh mat, HYPRE_Int *inlist, /* matching receive, for list of unknowns that will be sent, during the triangular solves, from ourselves to P_i */ - hypre_MPI_Irecv(rcvBuf, inlist[i], HYPRE_MPI_INT, i, 444, hcomm, requests+count); + hypre_MPI_Irecv(rcvBuf, inlist[i], HYPRE_MPI_INT, i, 444, comm_dh, requests+count); ++count; /* Set up the send */ - hypre_MPI_Send_init(sendBuf, inlist[i], hypre_MPI_REAL, i, 555, hcomm, sendReq); + hypre_MPI_Send_init(sendBuf, inlist[i], hypre_MPI_REAL, i, 555, comm_dh, sendReq); } } @@ -559,8 +556,7 @@ void Factor_dhSolveSetup(Factor_dh mat, SubdomainGraph_dh sg) outlist, debug); CHECK_V_ERROR; } - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, hcomm); + hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, comm_dh); /* At this point, inlist[j] contains the number of indices that this processor must send to P_j. Processors next need to exchange the actual lists of required indices; this is done @@ -1128,8 +1124,7 @@ HYPRE_Real Factor_dhMaxPivotInverse(Factor_dh mat) if (np_dh == 1) { minGlobal = min; } else { - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - hypre_MPI_Reduce(&min, &minGlobal, 1, hypre_MPI_REAL, hypre_MPI_MIN, 0, hcomm); + hypre_MPI_Reduce(&min, &minGlobal, 1, hypre_MPI_REAL, hypre_MPI_MIN, 0, comm_dh); } if (minGlobal == 0) { @@ -1156,8 +1151,7 @@ HYPRE_Real Factor_dhMaxValue(Factor_dh mat) if (np_dh == 1) { maxGlobal = max; } else { - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - hypre_MPI_Reduce(&max, &maxGlobal, 1, hypre_MPI_REAL, hypre_MPI_MAX, 0, hcomm); + hypre_MPI_Reduce(&max, &maxGlobal, 1, hypre_MPI_REAL, hypre_MPI_MAX, 0, comm_dh); } END_FUNC_VAL(maxGlobal) } @@ -1187,8 +1181,7 @@ HYPRE_Real Factor_dhCondEst(Factor_dh mat, Euclid_dh ctx) if (np_dh == 1) { maxGlobal = max; } else { - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - hypre_MPI_Reduce(&max, &maxGlobal, 1, hypre_MPI_REAL, hypre_MPI_MAX, 0, hcomm); + hypre_MPI_Reduce(&max, &maxGlobal, 1, hypre_MPI_REAL, hypre_MPI_MAX, 0, comm_dh); } END_FUNC_VAL(maxGlobal) } diff --git a/src/distributed_ls/Euclid/Mat_dh.c b/src/distributed_ls/Euclid/Mat_dh.c index 5da0143db8..741497a1ca 100644 --- a/src/distributed_ls/Euclid/Mat_dh.c +++ b/src/distributed_ls/Euclid/Mat_dh.c @@ -153,7 +153,6 @@ void Mat_dhMatVecSetup(Mat_dh mat) HYPRE_Int firstLocal = mat->beg_row; HYPRE_Int lastLocal = firstLocal+m; HYPRE_Int *beg_rows, *end_rows; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); mat->recv_req = (hypre_MPI_Request *)MALLOC_DH(np_dh * sizeof(hypre_MPI_Request)); CHECK_V_ERROR; mat->send_req = (hypre_MPI_Request *)MALLOC_DH(np_dh * sizeof(hypre_MPI_Request)); CHECK_V_ERROR; @@ -165,9 +164,11 @@ void Mat_dhMatVecSetup(Mat_dh mat) beg_rows[0] = 0; end_rows[0] = m; } else { - ierr = hypre_MPI_Allgather(&firstLocal, 1, HYPRE_MPI_INT, beg_rows, 1, HYPRE_MPI_INT, hcomm); CHECK_MPI_V_ERROR(ierr); + ierr = hypre_MPI_Allgather(&firstLocal, 1, HYPRE_MPI_INT, beg_rows, 1, HYPRE_MPI_INT, comm_dh); - ierr = hypre_MPI_Allgather(&lastLocal, 1, HYPRE_MPI_INT, end_rows, 1, HYPRE_MPI_INT, hcomm); CHECK_MPI_V_ERROR(ierr); + CHECK_MPI_V_ERROR(ierr); + + ierr = hypre_MPI_Allgather(&lastLocal, 1, HYPRE_MPI_INT, end_rows, 1, HYPRE_MPI_INT, comm_dh); CHECK_MPI_V_ERROR(ierr); } outlist = (HYPRE_Int *)MALLOC_DH(np_dh*sizeof(HYPRE_Int)); CHECK_V_ERROR; @@ -188,7 +189,7 @@ void Mat_dhMatVecSetup(Mat_dh mat) if (np_dh == 1) { /* this is for debugging purposes in some of the drivers */ inlist[0] = outlist[0]; } else { - ierr = hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, hcomm); CHECK_MPI_V_ERROR(ierr); + ierr = hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, comm_dh); CHECK_MPI_V_ERROR(ierr); } setup_matvec_sends_private(mat, inlist); CHECK_V_ERROR; @@ -221,7 +222,6 @@ void setup_matvec_receives_private(Mat_dh mat, HYPRE_Int *beg_rows, HYPRE_Int *e HYPRE_Int ierr, i, j, this_pe; hypre_MPI_Request request; HYPRE_Int m = mat->m; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); mat->num_recv = 0; @@ -242,14 +242,14 @@ void setup_matvec_receives_private(Mat_dh mat, HYPRE_Int *beg_rows, HYPRE_Int *e } /* Request rows in reqind[i..j-1] */ - ierr = hypre_MPI_Isend(&reqind[i], j-i, HYPRE_MPI_INT, this_pe, 444, hcomm, &request); CHECK_MPI_V_ERROR(ierr); + ierr = hypre_MPI_Isend(&reqind[i], j-i, HYPRE_MPI_INT, this_pe, 444, comm_dh, &request); CHECK_MPI_V_ERROR(ierr); ierr = hypre_MPI_Request_free(&request); CHECK_MPI_V_ERROR(ierr); /* Count of number of number of indices needed from this_pe */ outlist[this_pe] = j-i; ierr = hypre_MPI_Recv_init(&mat->recvbuf[i+m], j-i, hypre_MPI_REAL, this_pe, 555, - hcomm, &mat->recv_req[mat->num_recv]); CHECK_MPI_V_ERROR(ierr); + comm_dh, &mat->recv_req[mat->num_recv]); CHECK_MPI_V_ERROR(ierr); mat->num_recv++; mat->recvlen += j-i; /* only used for statistical reporting */ @@ -267,7 +267,6 @@ void setup_matvec_sends_private(Mat_dh mat, HYPRE_Int *inlist) HYPRE_Int ierr, i, j, sendlen, first = mat->beg_row; hypre_MPI_Request *requests; hypre_MPI_Status *statuses; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); requests = (hypre_MPI_Request *) MALLOC_DH(np_dh * sizeof(hypre_MPI_Request)); CHECK_V_ERROR; statuses = (hypre_MPI_Status *) MALLOC_DH(np_dh * sizeof(hypre_MPI_Status)); CHECK_V_ERROR; @@ -284,10 +283,10 @@ void setup_matvec_sends_private(Mat_dh mat, HYPRE_Int *inlist) for (i=0; isendind[j], inlist[i], HYPRE_MPI_INT, i, 444, hcomm, + ierr = hypre_MPI_Irecv(&mat->sendind[j], inlist[i], HYPRE_MPI_INT, i, 444, comm_dh, &requests[mat->num_send]); CHECK_MPI_V_ERROR(ierr); /* Set up the send */ - ierr = hypre_MPI_Send_init(&mat->sendbuf[j], inlist[i], hypre_MPI_REAL, i, 555, hcomm, + ierr = hypre_MPI_Send_init(&mat->sendbuf[j], inlist[i], hypre_MPI_REAL, i, 555, comm_dh, &mat->send_req[mat->num_send]); CHECK_MPI_V_ERROR(ierr); mat->num_send++; @@ -541,8 +540,7 @@ HYPRE_Int Mat_dhReadNz(Mat_dh mat) START_FUNC_DH HYPRE_Int ierr, retval = mat->rp[mat->m]; HYPRE_Int nz = retval; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - ierr = hypre_MPI_Allreduce(&nz, &retval, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); CHECK_MPI_ERROR(ierr); + ierr = hypre_MPI_Allreduce(&nz, &retval, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm_dh); CHECK_MPI_ERROR(ierr); END_FUNC_VAL(retval) } @@ -600,9 +598,8 @@ void Mat_dhReduceTiming(Mat_dh mat) if (mat->time[MATVEC_MPI_TIME]) { mat->time[MATVEC_RATIO] = mat->time[MATVEC_TIME] / mat->time[MATVEC_MPI_TIME]; } - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - hypre_MPI_Allreduce(mat->time, mat->time_min, MAT_DH_BINS, hypre_MPI_REAL, hypre_MPI_MIN, hcomm); - hypre_MPI_Allreduce(mat->time, mat->time_max, MAT_DH_BINS, hypre_MPI_REAL, hypre_MPI_MAX, hcomm); + hypre_MPI_Allreduce(mat->time, mat->time_min, MAT_DH_BINS, hypre_MPI_REAL, hypre_MPI_MIN, comm_dh); + hypre_MPI_Allreduce(mat->time, mat->time_max, MAT_DH_BINS, hypre_MPI_REAL, hypre_MPI_MAX, comm_dh); END_FUNC_DH } diff --git a/src/distributed_ls/Euclid/SubdomainGraph_dh.c b/src/distributed_ls/Euclid/SubdomainGraph_dh.c index b92699bcce..190f9bc532 100644 --- a/src/distributed_ls/Euclid/SubdomainGraph_dh.c +++ b/src/distributed_ls/Euclid/SubdomainGraph_dh.c @@ -467,7 +467,6 @@ void init_mpi_private(SubdomainGraph_dh s, HYPRE_Int blocks, bool bj, void *A) HYPRE_Int m, n, beg_row; bool symmetric; HYPRE_Real t1; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); symmetric = Parser_dhHasSwitch(parser_dh, "-sym"); CHECK_V_ERROR; if (Parser_dhHasSwitch(parser_dh, "-makeSymmetric")) { @@ -495,8 +494,8 @@ void init_mpi_private(SubdomainGraph_dh s, HYPRE_Int blocks, bool bj, void *A) * At this point, beg_rowP[] is a copy of beg_row[]) *-------------------------------------------------------------*/ if (!bj) { - hypre_MPI_Allgather(&beg_row, 1, HYPRE_MPI_INT, s->beg_row, 1, HYPRE_MPI_INT, hcomm); - hypre_MPI_Allgather(&m, 1, HYPRE_MPI_INT, s->row_count, 1, HYPRE_MPI_INT, hcomm); + hypre_MPI_Allgather(&beg_row, 1, HYPRE_MPI_INT, s->beg_row, 1, HYPRE_MPI_INT, comm_dh); + hypre_MPI_Allgather(&m, 1, HYPRE_MPI_INT, s->row_count, 1, HYPRE_MPI_INT, comm_dh); hypre_TMemcpy(s->beg_rowP, s->beg_row, HYPRE_Int, np_dh, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); } else { s->beg_row[myid_dh] = beg_row; @@ -549,7 +548,7 @@ void init_mpi_private(SubdomainGraph_dh s, HYPRE_Int blocks, bool bj, void *A) } /* exchange number of boundary rows with all neighbors */ - hypre_MPI_Allgather(&bdryCount, 1, HYPRE_MPI_INT, s->bdry_count, 1, HYPRE_MPI_INT, hcomm); + hypre_MPI_Allgather(&bdryCount, 1, HYPRE_MPI_INT, s->bdry_count, 1, HYPRE_MPI_INT, comm_dh); /* form local permutation */ idx = 0; @@ -650,7 +649,6 @@ void SubdomainGraph_dhExchangePerms(SubdomainGraph_dh s) HYPRE_Int myFirstBdry = m - myBdryCount; HYPRE_Int *n2o_row = s->n2o_row; Hash_i_dh n2o_table, o2n_table; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); if (logFile != NULL && s->debug) debug = true; @@ -706,14 +704,15 @@ void SubdomainGraph_dhExchangePerms(SubdomainGraph_dh s) HYPRE_Int *buf = recvBuf + naborIdx[i]; HYPRE_Int ct = 2*bdryNodeCounts[nabr]; - hypre_MPI_Isend(sendBuf, 2*myBdryCount, HYPRE_MPI_INT, nabr, 444, hcomm, &(send_req[i])); + + hypre_MPI_Isend(sendBuf, 2*myBdryCount, HYPRE_MPI_INT, nabr, 444, comm_dh, &(send_req[i])); if (debug) { hypre_fprintf(logFile , "SUBG sending %i elts to %i\n", 2*myBdryCount, nabr); fflush(logFile); } - hypre_MPI_Irecv(buf, ct, HYPRE_MPI_INT, nabr, 444, hcomm, &(recv_req[i])); + hypre_MPI_Irecv(buf, ct, HYPRE_MPI_INT, nabr, 444, comm_dh, &(recv_req[i])); if (debug) { hypre_fprintf(logFile, "SUBG receiving %i elts from %i\n", ct, nabr); @@ -766,20 +765,19 @@ void form_subdomaingraph_mpi_private(SubdomainGraph_dh s) HYPRE_Int i, j, nz, *adj, *ptrs = s->ptrs; hypre_MPI_Request *recvReqs = NULL, sendReq; hypre_MPI_Status *statuses = NULL, status; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); /* all processors tell root how many nabors they have */ if (myid_dh == 0) { idxAll = (HYPRE_Int*)MALLOC_DH(np_dh*sizeof(HYPRE_Int)); CHECK_V_ERROR; } - hypre_MPI_Gather(&nct, 1, HYPRE_MPI_INT, idxAll, 1, HYPRE_MPI_INT, 0, hcomm); + hypre_MPI_Gather(&nct, 1, HYPRE_MPI_INT, idxAll, 1, HYPRE_MPI_INT, 0, comm_dh); /* root counts edges in graph, and broacasts to all */ if (myid_dh == 0) { nz = 0; for (i=0; iallNabors, *myNabors; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); myNabors = (HYPRE_Int*)MALLOC_DH(np_dh*sizeof(HYPRE_Int)); CHECK_V_ERROR; marker = (HYPRE_Int*)MALLOC_DH(np_dh*sizeof(HYPRE_Int)); CHECK_V_ERROR; @@ -994,7 +991,7 @@ hypre_fprintf(stderr, "\n"); */ /* find out who my neighbors are that I cannot discern locally */ - hypre_MPI_Alltoall(marker, 1, HYPRE_MPI_INT, nabors, 1, HYPRE_MPI_INT, hcomm); CHECK_V_ERROR; + hypre_MPI_Alltoall(marker, 1, HYPRE_MPI_INT, nabors, 1, HYPRE_MPI_INT, comm_dh); CHECK_V_ERROR; /* add in neighbors that I know about from scanning my adjacency lists */ for (i=0; idesc[t->last], "========== totals, and reset ==========\n"); t->last += 1; - hypre_MPI_Allreduce(t->time, timeMax, t->last, hypre_MPI_REAL, hypre_MPI_MAX, hcomm); - hypre_MPI_Allreduce(t->time, timeMin, t->last, hypre_MPI_REAL, hypre_MPI_MIN, hcomm); + hypre_MPI_Allreduce(t->time, timeMax, t->last, hypre_MPI_REAL, hypre_MPI_MAX, comm_dh); + hypre_MPI_Allreduce(t->time, timeMin, t->last, hypre_MPI_REAL, hypre_MPI_MIN, comm_dh); wasSummed = true; } diff --git a/src/distributed_ls/Euclid/blas_dh.c b/src/distributed_ls/Euclid/blas_dh.c index c4f9feeffd..a75cbc5d26 100644 --- a/src/distributed_ls/Euclid/blas_dh.c +++ b/src/distributed_ls/Euclid/blas_dh.c @@ -114,8 +114,7 @@ HYPRE_Real InnerProd(HYPRE_Int n, HYPRE_Real *x, HYPRE_Real *y) } if (np_dh > 1) { - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - hypre_MPI_Allreduce(&local_result, &result, 1, hypre_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_result, &result, 1, hypre_MPI_REAL, hypre_MPI_SUM, comm_dh); } else { result = local_result; } @@ -141,8 +140,7 @@ HYPRE_Real Norm2(HYPRE_Int n, HYPRE_Real *x) } if (np_dh > 1) { - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - hypre_MPI_Allreduce(&local_result, &result, 1, hypre_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_result, &result, 1, hypre_MPI_REAL, hypre_MPI_SUM, comm_dh); } else { result = local_result; } diff --git a/src/distributed_ls/Euclid/mat_dh_private.c b/src/distributed_ls/Euclid/mat_dh_private.c index ddf9133578..6e53cc45d5 100644 --- a/src/distributed_ls/Euclid/mat_dh_private.c +++ b/src/distributed_ls/Euclid/mat_dh_private.c @@ -1030,7 +1030,7 @@ void partition_and_distribute_metis_private(Mat_dh A, Mat_dh *Bout) /* broadcast number of rows to all processors */ if (myid_dh == 0) m = A->m; - hypre_MPI_Bcast(&m, 1, HYPRE_MPI_INT, 0, hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD)); + hypre_MPI_Bcast(&m, 1, HYPRE_MPI_INT, 0, hypre_MPI_COMM_WORLD); /* broadcast number of nonzeros in each row to all processors */ rowLengths = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR; @@ -1042,8 +1042,7 @@ void partition_and_distribute_metis_private(Mat_dh A, Mat_dh *Bout) rowLengths[i] = tmp[i+1] - tmp[i]; } } - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - hypre_MPI_Bcast(rowLengths, m, HYPRE_MPI_INT, 0, hcomm); + hypre_MPI_Bcast(rowLengths, m, HYPRE_MPI_INT, 0, comm_dh); /* partition matrix */ if (myid_dh == 0) { @@ -1063,7 +1062,7 @@ void partition_and_distribute_metis_private(Mat_dh A, Mat_dh *Bout) } /* broadcast partitiioning information to all processors */ - hypre_MPI_Bcast(rowToBlock, m, HYPRE_MPI_INT, 0, hcomm); + hypre_MPI_Bcast(rowToBlock, m, HYPRE_MPI_INT, 0, comm_dh); /* allocate storage for local portion of matrix */ mat_par_read_allocate_private(&B, m, rowLengths, rowToBlock); CHECK_V_ERROR; @@ -1084,8 +1083,8 @@ void partition_and_distribute_metis_private(Mat_dh A, Mat_dh *Bout) SET_V_ERROR(msgBuf_dh); } - hypre_MPI_Isend(cval+rp[i], count, HYPRE_MPI_INT, owner, CVAL_TAG, hcomm, send_req+2*i); - hypre_MPI_Isend(aval+rp[i], count, hypre_MPI_REAL, owner, AVAL_TAG, hcomm, send_req+2*i+1); + hypre_MPI_Isend(cval+rp[i], count, HYPRE_MPI_INT, owner, CVAL_TAG, comm_dh, send_req+2*i); + hypre_MPI_Isend(aval+rp[i], count, hypre_MPI_REAL, owner, AVAL_TAG, comm_dh, send_req+2*i+1); } } @@ -1107,8 +1106,8 @@ void partition_and_distribute_metis_private(Mat_dh A, Mat_dh *Bout) SET_V_ERROR(msgBuf_dh); } - hypre_MPI_Irecv(cval+rp[i], count, HYPRE_MPI_INT, 0, CVAL_TAG, hcomm, rcv_req+2*i); - hypre_MPI_Irecv(aval+rp[i], count, hypre_MPI_REAL, 0, AVAL_TAG, hcomm, rcv_req+2*i+1); + hypre_MPI_Irecv(cval+rp[i], count, HYPRE_MPI_INT, 0, CVAL_TAG, comm_dh, rcv_req+2*i); + hypre_MPI_Irecv(aval+rp[i], count, hypre_MPI_REAL, 0, AVAL_TAG, comm_dh, rcv_req+2*i+1); } } @@ -1155,7 +1154,7 @@ void partition_and_distribute_private(Mat_dh A, Mat_dh *Bout) /* broadcast number of rows to all processors */ if (myid_dh == 0) m = A->m; - hypre_MPI_Bcast(&m, 1, HYPRE_MPI_INT, 0, hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD)); + hypre_MPI_Bcast(&m, 1, HYPRE_MPI_INT, 0, hypre_MPI_COMM_WORLD); /* broadcast number of nonzeros in each row to all processors */ rowLengths = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR; @@ -1165,9 +1164,7 @@ void partition_and_distribute_private(Mat_dh A, Mat_dh *Bout) rowLengths[i] = tmp[i+1] - tmp[i]; } } - - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm_dh); - hypre_MPI_Bcast(rowLengths, m, HYPRE_MPI_INT, 0, hcomm); + hypre_MPI_Bcast(rowLengths, m, HYPRE_MPI_INT, 0, comm_dh); /* partition matrix */ rowToBlock = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR; @@ -1178,7 +1175,7 @@ void partition_and_distribute_private(Mat_dh A, Mat_dh *Bout) } /* broadcast partitiioning information to all processors */ - hypre_MPI_Bcast(rowToBlock, m, HYPRE_MPI_INT, 0, hcomm); + hypre_MPI_Bcast(rowToBlock, m, HYPRE_MPI_INT, 0, comm_dh); /* allocate storage for local portion of matrix */ mat_par_read_allocate_private(&B, m, rowLengths, rowToBlock); CHECK_V_ERROR; @@ -1199,8 +1196,8 @@ void partition_and_distribute_private(Mat_dh A, Mat_dh *Bout) SET_V_ERROR(msgBuf_dh); } - hypre_MPI_Isend(cval+rp[i], count, HYPRE_MPI_INT, owner, CVAL_TAG, hcomm, send_req+2*i); - hypre_MPI_Isend(aval+rp[i], count, hypre_MPI_REAL, owner, AVAL_TAG, hcomm, send_req+2*i+1); + hypre_MPI_Isend(cval+rp[i], count, HYPRE_MPI_INT, owner, CVAL_TAG, comm_dh, send_req+2*i); + hypre_MPI_Isend(aval+rp[i], count, hypre_MPI_REAL, owner, AVAL_TAG, comm_dh, send_req+2*i+1); } } @@ -1222,8 +1219,8 @@ void partition_and_distribute_private(Mat_dh A, Mat_dh *Bout) SET_V_ERROR(msgBuf_dh); } - hypre_MPI_Irecv(cval+rp[i], count, HYPRE_MPI_INT, 0, CVAL_TAG, hcomm, rcv_req+2*i); - hypre_MPI_Irecv(aval+rp[i], count, hypre_MPI_REAL, 0, AVAL_TAG, hcomm, rcv_req+2*i+1); + hypre_MPI_Irecv(cval+rp[i], count, HYPRE_MPI_INT, 0, CVAL_TAG, comm_dh, rcv_req+2*i); + hypre_MPI_Irecv(aval+rp[i], count, hypre_MPI_REAL, 0, AVAL_TAG, comm_dh, rcv_req+2*i+1); } } diff --git a/src/distributed_ls/ParaSails/ConjGrad.c b/src/distributed_ls/ParaSails/ConjGrad.c index 659f869cd3..0ef71b36fc 100644 --- a/src/distributed_ls/ParaSails/ConjGrad.c +++ b/src/distributed_ls/ParaSails/ConjGrad.c @@ -21,12 +21,11 @@ static HYPRE_Real InnerProd(HYPRE_Int n, HYPRE_Real *x, HYPRE_Real *y, MPI_Comm comm) { HYPRE_Real local_result, result; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_Int one = 1; local_result = hypre_ddot(&n, x, &one, y, &one); - hypre_MPI_Allreduce(&local_result, &result, 1, hypre_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_result, &result, 1, hypre_MPI_REAL, hypre_MPI_SUM, comm); return result; } diff --git a/src/distributed_ls/ParaSails/DiagScale.c b/src/distributed_ls/ParaSails/DiagScale.c index 24efb44258..a347cf6bd9 100644 --- a/src/distributed_ls/ParaSails/DiagScale.c +++ b/src/distributed_ls/ParaSails/DiagScale.c @@ -48,7 +48,6 @@ static void ExchangeDiagEntries(MPI_Comm comm, Matrix *mat, HYPRE_Int reqlen, { hypre_MPI_Request request; HYPRE_Int i, j, this_pe; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_shell_sort(reqlen, reqind); @@ -70,11 +69,11 @@ static void ExchangeDiagEntries(MPI_Comm comm, Matrix *mat, HYPRE_Int reqlen, /* Post receive for diagonal values */ hypre_MPI_Irecv(&diags[i], j-i, hypre_MPI_REAL, this_pe, DIAG_VALS_TAG, - hcomm, &requests[*num_requests]); + comm, &requests[*num_requests]); /* Request rows in reqind[i..j-1] */ hypre_MPI_Isend(&reqind[i], j-i, HYPRE_MPI_INT, this_pe, DIAG_INDS_TAG, - hcomm, &request); + comm, &request); hypre_MPI_Request_free(&request); (*num_requests)++; @@ -100,14 +99,13 @@ static void ExchangeDiagEntriesServer(MPI_Comm comm, Matrix *mat, HYPRE_Int *recvbuf; HYPRE_Real *sendbuf; HYPRE_Int i, j, source, count; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* recvbuf contains requested indices */ /* sendbuf contains corresponding diagonal entries */ for (i=0; ibeg_row - 1; /* imaginary end of previous block */ @@ -186,7 +184,7 @@ void LoadBalDonorSend(MPI_Comm comm, Matrix *mat, Numbering *numb, } hypre_MPI_Isend(donor_data[i].buffer, buflen, HYPRE_MPI_INT, donor_data[i].pe, - LOADBAL_REQ_TAG, hcomm, &request[i]); + LOADBAL_REQ_TAG, comm, &request[i]); } *local_beg_row = send_end_row + 1; @@ -206,17 +204,16 @@ void LoadBalRecipRecv(MPI_Comm comm, Numbering *numb, HYPRE_Int *buffer, *bufferp; HYPRE_Int beg_row, end_row; HYPRE_Int len; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); for (i=0; ibeg_rows = (HYPRE_Int *) MemAlloc(mat->mem, npes * sizeof(HYPRE_Int)); mat->end_rows = (HYPRE_Int *) MemAlloc(mat->mem, npes * sizeof(HYPRE_Int)); - hypre_MPI_Allgather(&beg_row, 1, HYPRE_MPI_INT, mat->beg_rows, 1, HYPRE_MPI_INT, hcomm); - hypre_MPI_Allgather(&end_row, 1, HYPRE_MPI_INT, mat->end_rows, 1, HYPRE_MPI_INT, hcomm); + hypre_MPI_Allgather(&beg_row, 1, HYPRE_MPI_INT, mat->beg_rows, 1, HYPRE_MPI_INT, comm); + hypre_MPI_Allgather(&end_row, 1, HYPRE_MPI_INT, mat->end_rows, 1, HYPRE_MPI_INT, comm); mat->num_recv = 0; mat->num_send = 0; @@ -237,8 +236,6 @@ HYPRE_Int MatrixRowPe(Matrix *mat, HYPRE_Int row) HYPRE_Int MatrixNnz(Matrix *mat) { HYPRE_Int num_local, i, total, alltotal; - MPI_Comm comm = mat->comm; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_local = mat->end_row - mat->beg_row + 1; @@ -246,7 +243,7 @@ HYPRE_Int MatrixNnz(Matrix *mat) for (i=0; ilens[i]; - hypre_MPI_Allreduce(&total, &alltotal, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&total, &alltotal, 1, HYPRE_MPI_INT, hypre_MPI_SUM, mat->comm); return alltotal; } @@ -318,7 +315,6 @@ static void MatrixReadMaster(Matrix *mat, char *filename) hypre_MPI_Comm_size(mat->comm, &npes); hypre_MPI_Comm_rank(mat->comm, &mype); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); file = fopen(filename, "r"); hypre_assert(file != NULL); @@ -348,7 +344,7 @@ static void MatrixReadMaster(Matrix *mat, char *filename) { hypre_MPI_Wait(&request, &status); outbuf = offset; - hypre_MPI_Isend(&outbuf, 1, hypre_MPI_LONG, curr_proc, 0, hcomm, &request); + hypre_MPI_Isend(&outbuf, 1, hypre_MPI_LONG, curr_proc, 0, comm, &request); curr_proc++; } offset = ftell(file); @@ -425,7 +421,6 @@ static void MatrixReadMaster(Matrix *mat, char *filename) static void MatrixReadSlave(Matrix *mat, char *filename) { MPI_Comm comm = mat->comm; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Status status; HYPRE_Int mype; FILE *file; @@ -446,7 +441,7 @@ static void MatrixReadSlave(Matrix *mat, char *filename) hypre_MPI_Comm_rank(mat->comm, &mype); - hypre_MPI_Recv(&offset, 1, hypre_MPI_LONG, 0, 0, hcomm, &status); + hypre_MPI_Recv(&offset, 1, hypre_MPI_LONG, 0, 0, comm, &status); time0 = hypre_MPI_Wtime(); ret = fseek(file, offset, SEEK_SET); @@ -538,13 +533,12 @@ void RhsRead(HYPRE_Real *rhs, Matrix *mat, char *filename) hypre_MPI_Comm_size(mat->comm, &npes); hypre_MPI_Comm_rank(mat->comm, &mype); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(mat->comm); num_local = mat->end_row - mat->beg_row + 1; if (mype != 0) { - hypre_MPI_Recv(rhs, num_local, hypre_MPI_REAL, 0, 0, hcomm, &status); + hypre_MPI_Recv(rhs, num_local, hypre_MPI_REAL, 0, 0, mat->comm, &status); return; } @@ -583,7 +577,7 @@ void RhsRead(HYPRE_Real *rhs, Matrix *mat, char *filename) else hypre_fscanf(file, "%lf", &buffer[i]); - hypre_MPI_Send(buffer, num_local, hypre_MPI_REAL, pe, 0, hcomm); + hypre_MPI_Send(buffer, num_local, hypre_MPI_REAL, pe, 0, mat->comm); } hypre_TFree(buffer,HYPRE_MEMORY_HOST); @@ -598,7 +592,6 @@ static void SetupReceives(Matrix *mat, HYPRE_Int reqlen, HYPRE_Int *reqind, HYPR HYPRE_Int i, j, this_pe, mype; hypre_MPI_Request request; MPI_Comm comm = mat->comm; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_Int num_local = mat->end_row - mat->beg_row + 1; hypre_MPI_Comm_rank(comm, &mype); @@ -625,17 +618,17 @@ static void SetupReceives(Matrix *mat, HYPRE_Int reqlen, HYPRE_Int *reqind, HYPR } /* Request rows in reqind[i..j-1] */ - hypre_MPI_Isend(&reqind[i], j-i, HYPRE_MPI_INT, this_pe, 444, hcomm, &request); + hypre_MPI_Isend(&reqind[i], j-i, HYPRE_MPI_INT, this_pe, 444, comm, &request); hypre_MPI_Request_free(&request); /* Count of number of number of indices needed from this_pe */ outlist[this_pe] = j-i; hypre_MPI_Recv_init(&mat->recvbuf[i+num_local], j-i, hypre_MPI_REAL, this_pe, 555, - hcomm, &mat->recv_req[mat->num_recv]); + comm, &mat->recv_req[mat->num_recv]); hypre_MPI_Send_init(&mat->recvbuf[i+num_local], j-i, hypre_MPI_REAL, this_pe, 666, - hcomm, &mat->send_req2[mat->num_recv]); + comm, &mat->send_req2[mat->num_recv]); mat->num_recv++; } @@ -652,7 +645,6 @@ static void SetupSends(Matrix *mat, HYPRE_Int *inlist) hypre_MPI_Request *requests; hypre_MPI_Status *statuses; MPI_Comm comm = mat->comm; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_rank(comm, &mype); hypre_MPI_Comm_size(comm, &npes); @@ -679,15 +671,15 @@ static void SetupSends(Matrix *mat, HYPRE_Int *inlist) if (inlist[i] != 0) { /* Post receive for the actual indices */ - hypre_MPI_Irecv(&mat->sendind[j], inlist[i], HYPRE_MPI_INT, i, 444, hcomm, + hypre_MPI_Irecv(&mat->sendind[j], inlist[i], HYPRE_MPI_INT, i, 444, comm, &requests[mat->num_send]); /* Set up the send */ - hypre_MPI_Send_init(&mat->sendbuf[j], inlist[i], hypre_MPI_REAL, i, 555, hcomm, + hypre_MPI_Send_init(&mat->sendbuf[j], inlist[i], hypre_MPI_REAL, i, 555, comm, &mat->send_req[mat->num_send]); /* Set up the receive for the transpose */ - hypre_MPI_Recv_init(&mat->sendbuf[j], inlist[i], hypre_MPI_REAL, i, 666, hcomm, + hypre_MPI_Recv_init(&mat->sendbuf[j], inlist[i], hypre_MPI_REAL, i, 666, comm, &mat->recv_req2[mat->num_send]); mat->num_send++; @@ -719,7 +711,6 @@ void MatrixComplete(Matrix *mat) hypre_MPI_Comm_rank(mat->comm, &mype); hypre_MPI_Comm_size(mat->comm, &npes); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(mat->comm); mat->recv_req = hypre_TAlloc(hypre_MPI_Request, npes , HYPRE_MEMORY_HOST); mat->send_req = hypre_TAlloc(hypre_MPI_Request, npes , HYPRE_MEMORY_HOST); @@ -736,7 +727,7 @@ void MatrixComplete(Matrix *mat) SetupReceives(mat, mat->numb->num_ind - mat->numb->num_loc, &mat->numb->local_to_global[mat->numb->num_loc], outlist); - hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, hcomm); + hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, mat->comm); SetupSends(mat, inlist); diff --git a/src/distributed_ls/ParaSails/ParaSails.c b/src/distributed_ls/ParaSails/ParaSails.c index 808852e89f..e09236d417 100644 --- a/src/distributed_ls/ParaSails/ParaSails.c +++ b/src/distributed_ls/ParaSails/ParaSails.c @@ -63,11 +63,10 @@ HYPRE_Int FindNumReplies(MPI_Comm comm, HYPRE_Int *replies_list) hypre_MPI_Comm_rank(comm, &mype); hypre_MPI_Comm_size(comm, &npes); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); replies_list2 = hypre_TAlloc(HYPRE_Int, npes , HYPRE_MEMORY_HOST); - hypre_MPI_Allreduce(replies_list, replies_list2, npes, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(replies_list, replies_list2, npes, HYPRE_MPI_INT, hypre_MPI_SUM, comm); num_replies = replies_list2[mype]; hypre_TFree(replies_list2,HYPRE_MEMORY_HOST); @@ -99,7 +98,6 @@ static void SendRequests(MPI_Comm comm, HYPRE_Int tag, Matrix *mat, HYPRE_Int re { hypre_MPI_Request request; HYPRE_Int i, j, this_pe; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_shell_sort(reqlen, reqind); @@ -121,7 +119,7 @@ static void SendRequests(MPI_Comm comm, HYPRE_Int tag, Matrix *mat, HYPRE_Int re /* Request rows in reqind[i..j-1] */ hypre_MPI_Isend(&reqind[i], j-i, HYPRE_MPI_INT, this_pe, tag, - hcomm, &request); + comm, &request); hypre_MPI_Request_free(&request); (*num_requests)++; @@ -150,10 +148,9 @@ static void SendRequests(MPI_Comm comm, HYPRE_Int tag, Matrix *mat, HYPRE_Int re static void ReceiveRequest(MPI_Comm comm, HYPRE_Int *source, HYPRE_Int tag, HYPRE_Int **buffer, HYPRE_Int *buflen, HYPRE_Int *count) { - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Status status; - hypre_MPI_Probe(hypre_MPI_ANY_SOURCE, tag, hcomm, &status); + hypre_MPI_Probe(hypre_MPI_ANY_SOURCE, tag, comm, &status); *source = status.hypre_MPI_SOURCE; hypre_MPI_Get_count(&status, HYPRE_MPI_INT, count); @@ -164,7 +161,7 @@ static void ReceiveRequest(MPI_Comm comm, HYPRE_Int *source, HYPRE_Int tag, HYPR *buffer = hypre_TAlloc(HYPRE_Int, *buflen , HYPRE_MEMORY_HOST); } - hypre_MPI_Recv(*buffer, *count, HYPRE_MPI_INT, *source, tag, hcomm, &status); + hypre_MPI_Recv(*buffer, *count, HYPRE_MPI_INT, *source, tag, comm, &status); } /*-------------------------------------------------------------------------- @@ -193,7 +190,6 @@ static void SendReplyPrunedRows(MPI_Comm comm, Numbering *numb, HYPRE_Int sendbacksize, j; HYPRE_Int len, *ind, *indbuf, *indbufp; HYPRE_Int temp; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Determine the size of the integer message we need to send back */ sendbacksize = count+1; /* length of header part */ @@ -230,7 +226,7 @@ static void SendReplyPrunedRows(MPI_Comm comm, Numbering *numb, } hypre_MPI_Isend(indbuf, indbufp-indbuf, HYPRE_MPI_INT, dest, ROW_REPI_TAG, - hcomm, request); + comm, request); } /*-------------------------------------------------------------------------- @@ -249,16 +245,15 @@ static void ReceiveReplyPrunedRows(MPI_Comm comm, Numbering *numb, hypre_MPI_Status status; HYPRE_Int source, count; HYPRE_Int len, *ind, num_rows, *row_nums, j; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Don't know the size of reply, so use probe and get count */ - hypre_MPI_Probe(hypre_MPI_ANY_SOURCE, ROW_REPI_TAG, hcomm, &status); + hypre_MPI_Probe(hypre_MPI_ANY_SOURCE, ROW_REPI_TAG, comm, &status); source = status.hypre_MPI_SOURCE; hypre_MPI_Get_count(&status, HYPRE_MPI_INT, &count); /* Allocate space in stored rows data structure */ ind = PrunedRowsAlloc(pruned_rows, count); - hypre_MPI_Recv(ind, count, HYPRE_MPI_INT, source, ROW_REPI_TAG, hcomm, &status); + hypre_MPI_Recv(ind, count, HYPRE_MPI_INT, source, ROW_REPI_TAG, comm, &status); /* Parse the message */ num_rows = *ind++; /* number of rows */ @@ -309,7 +304,6 @@ static void SendReplyStoredRows(MPI_Comm comm, Numbering *numb, HYPRE_Int len, *ind, *indbuf, *indbufp; HYPRE_Real *val, *valbuf, *valbufp; HYPRE_Int temp; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Determine the size of the integer message we need to send back */ sendbacksize = count+1; /* length of header part */ @@ -351,12 +345,12 @@ static void SendReplyStoredRows(MPI_Comm comm, Numbering *numb, } hypre_MPI_Isend(indbuf, indbufp-indbuf, HYPRE_MPI_INT, dest, ROW_REPI_TAG, - hcomm, request); + comm, request); hypre_MPI_Request_free(request); hypre_MPI_Isend(valbuf, valbufp-valbuf, hypre_MPI_REAL, dest, ROW_REPV_TAG, - hcomm, request); + comm, request); } /*-------------------------------------------------------------------------- @@ -374,18 +368,17 @@ static void ReceiveReplyStoredRows(MPI_Comm comm, Numbering *numb, HYPRE_Int source, count; HYPRE_Int len, *ind, num_rows, *row_nums, j; HYPRE_Real *val; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Don't know the size of reply, so use probe and get count */ - hypre_MPI_Probe(hypre_MPI_ANY_SOURCE, ROW_REPI_TAG, hcomm, &status); + hypre_MPI_Probe(hypre_MPI_ANY_SOURCE, ROW_REPI_TAG, comm, &status); source = status.hypre_MPI_SOURCE; hypre_MPI_Get_count(&status, HYPRE_MPI_INT, &count); /* Allocate space in stored rows data structure */ ind = StoredRowsAllocInd(stored_rows, count); - hypre_MPI_Recv(ind, count, HYPRE_MPI_INT, source, ROW_REPI_TAG, hcomm, &status); + hypre_MPI_Recv(ind, count, HYPRE_MPI_INT, source, ROW_REPI_TAG, comm, &status); val = StoredRowsAllocVal(stored_rows, count); - hypre_MPI_Recv(val, count, hypre_MPI_REAL, source, ROW_REPV_TAG, hcomm, &status); + hypre_MPI_Recv(val, count, hypre_MPI_REAL, source, ROW_REPV_TAG, comm, &status); /* Parse the message */ num_rows = *ind++; /* number of rows */ @@ -1414,7 +1407,6 @@ static HYPRE_Real SelectThresh(MPI_Comm comm, Matrix *A, DiagScale *diag_scale, HYPRE_Real *val; HYPRE_Real localsum = 0.0, sum; HYPRE_Real temp; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Buffer for storing the values in each row when computing the i-th smallest element - buffer will grow if necessary */ @@ -1450,7 +1442,7 @@ static HYPRE_Real SelectThresh(MPI_Comm comm, Matrix *A, DiagScale *diag_scale, } /* Find the average across all processors */ - hypre_MPI_Allreduce(&localsum, &sum, 1, hypre_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&localsum, &sum, 1, hypre_MPI_REAL, hypre_MPI_SUM, comm); hypre_MPI_Comm_size(comm, &npes); hypre_TFree(buffer,HYPRE_MEMORY_HOST); @@ -1469,7 +1461,6 @@ static HYPRE_Real SelectFilter(MPI_Comm comm, Matrix *M, DiagScale *diag_scale, HYPRE_Real *val; HYPRE_Real localsum = 0.0, sum; HYPRE_Real temp = 1.0; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Buffer for storing the values in each row when computing the i-th smallest element - buffer will grow if necessary */ @@ -1507,7 +1498,7 @@ static HYPRE_Real SelectFilter(MPI_Comm comm, Matrix *M, DiagScale *diag_scale, } /* Find the average across all processors */ - hypre_MPI_Allreduce(&localsum, &sum, 1, hypre_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&localsum, &sum, 1, hypre_MPI_REAL, hypre_MPI_SUM, comm); hypre_MPI_Comm_size(comm, &npes); hypre_TFree(buffer,HYPRE_MEMORY_HOST); @@ -1652,13 +1643,12 @@ ParaSails *ParaSailsCreate(MPI_Comm comm, HYPRE_Int beg_row, HYPRE_Int end_row, ps->end_row = end_row; hypre_MPI_Comm_size(comm, &npes); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); ps->beg_rows = hypre_TAlloc(HYPRE_Int, npes , HYPRE_MEMORY_HOST); ps->end_rows = hypre_TAlloc(HYPRE_Int, npes , HYPRE_MEMORY_HOST); - hypre_MPI_Allgather(&beg_row, 1, HYPRE_MPI_INT, ps->beg_rows, 1, HYPRE_MPI_INT, hcomm); - hypre_MPI_Allgather(&end_row, 1, HYPRE_MPI_INT, ps->end_rows, 1, HYPRE_MPI_INT, hcomm); + hypre_MPI_Allgather(&beg_row, 1, HYPRE_MPI_INT, ps->beg_rows, 1, HYPRE_MPI_INT, comm); + hypre_MPI_Allgather(&end_row, 1, HYPRE_MPI_INT, ps->end_rows, 1, HYPRE_MPI_INT, comm); return ps; } @@ -1792,7 +1782,6 @@ HYPRE_Int ParaSailsSetupValues(ParaSails *ps, Matrix *A, HYPRE_Real filter) HYPRE_Int i; HYPRE_Real time0, time1; MPI_Comm comm = ps->comm; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_Int error = 0, error_sum; time0 = hypre_MPI_Wtime(); @@ -1857,7 +1846,7 @@ HYPRE_Int ParaSailsSetupValues(ParaSails *ps, Matrix *A, HYPRE_Real filter) LoadBalReturn(load_bal, ps->comm, ps->M); /* check if there was an error in computing the approximate inverse */ - hypre_MPI_Allreduce(&error, &error_sum, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&error, &error_sum, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); if (error_sum != 0) { hypre_printf("Hypre-ParaSails detected a problem. The input matrix\n"); @@ -1990,7 +1979,6 @@ HYPRE_Real ParaSailsStatsPattern(ParaSails *ps, Matrix *A) HYPRE_Int n, nnzm, nnza; MPI_Comm comm = ps->comm; HYPRE_Real max_pattern_time, max_cost, ave_cost; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_rank(comm, &mype); hypre_MPI_Comm_size(comm, &npes); @@ -2004,9 +1992,9 @@ HYPRE_Real ParaSailsStatsPattern(ParaSails *ps, Matrix *A) } hypre_MPI_Allreduce(&ps->setup_pattern_time, &max_pattern_time, - 1, hypre_MPI_REAL, hypre_MPI_MAX, hcomm); - hypre_MPI_Allreduce(&ps->cost, &max_cost, 1, hypre_MPI_REAL, hypre_MPI_MAX, hcomm); - hypre_MPI_Allreduce(&ps->cost, &ave_cost, 1, hypre_MPI_REAL, hypre_MPI_SUM, hcomm); + 1, hypre_MPI_REAL, hypre_MPI_MAX, comm); + hypre_MPI_Allreduce(&ps->cost, &max_cost, 1, hypre_MPI_REAL, hypre_MPI_MAX, comm); + hypre_MPI_Allreduce(&ps->cost, &ave_cost, 1, hypre_MPI_REAL, hypre_MPI_SUM, comm); ave_cost = ave_cost / (HYPRE_Real) npes; if (mype) @@ -2043,7 +2031,6 @@ void ParaSailsStatsValues(ParaSails *ps, Matrix *A) hypre_MPI_Comm_rank(comm, &mype); hypre_MPI_Comm_size(comm, &npes); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); nnzm = MatrixNnz(ps->M); nnza = MatrixNnz(A); @@ -2054,13 +2041,13 @@ void ParaSailsStatsValues(ParaSails *ps, Matrix *A) } hypre_MPI_Allreduce(&ps->setup_values_time, &max_values_time, - 1, hypre_MPI_REAL, hypre_MPI_MAX, hcomm); + 1, hypre_MPI_REAL, hypre_MPI_MAX, comm); if (!mype) setup_times = hypre_TAlloc(HYPRE_Real, npes , HYPRE_MEMORY_HOST); temp = ps->setup_pattern_time + ps->setup_values_time; - hypre_MPI_Gather(&temp, 1, hypre_MPI_REAL, setup_times, 1, hypre_MPI_REAL, 0, hcomm); + hypre_MPI_Gather(&temp, 1, hypre_MPI_REAL, setup_times, 1, hypre_MPI_REAL, 0, comm); if (mype) return; diff --git a/src/distributed_ls/pilut/HYPRE_DistributedMatrixPilutSolver.c b/src/distributed_ls/pilut/HYPRE_DistributedMatrixPilutSolver.c index deeaf10dce..d300e3ba8c 100644 --- a/src/distributed_ls/pilut/HYPRE_DistributedMatrixPilutSolver.c +++ b/src/distributed_ls/pilut/HYPRE_DistributedMatrixPilutSolver.c @@ -369,9 +369,8 @@ HYPRE_Int HYPRE_DistributedMatrixPilutSolverSetup( HYPRE_DistributedMatrixPilutS rowdist = DataDistTypeRowdist( hypre_DistributedMatrixPilutSolverDataDist( solver ) ); - MPI_Comm comm = hypre_DistributedMatrixPilutSolverComm(solver); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); - hypre_MPI_Allgather( &start, 1, HYPRE_MPI_INT, rowdist, 1, HYPRE_MPI_INT, hcomm ); + hypre_MPI_Allgather( &start, 1, HYPRE_MPI_INT, rowdist, 1, HYPRE_MPI_INT, + hypre_DistributedMatrixPilutSolverComm(solver) ); rowdist[ nprocs ] = n; diff --git a/src/distributed_ls/pilut/comm.c b/src/distributed_ls/pilut/comm.c index 136c03f70b..cca3302ccb 100644 --- a/src/distributed_ls/pilut/comm.c +++ b/src/distributed_ls/pilut/comm.c @@ -34,8 +34,7 @@ HYPRE_Int hypre_GlobalSEMax(HYPRE_Int value, MPI_Comm hypre_MPI_Context ) { HYPRE_Int max; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_Context); - hypre_MPI_Allreduce( &value, &max, 1, HYPRE_MPI_INT, hypre_MPI_MAX, hcomm ); + hypre_MPI_Allreduce( &value, &max, 1, HYPRE_MPI_INT, hypre_MPI_MAX, hypre_MPI_Context ); return max; } @@ -47,8 +46,7 @@ HYPRE_Int hypre_GlobalSEMax(HYPRE_Int value, MPI_Comm hypre_MPI_Context ) HYPRE_Int hypre_GlobalSEMin(HYPRE_Int value, MPI_Comm hypre_MPI_Context) { HYPRE_Int min; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_Context); - hypre_MPI_Allreduce( &value, &min, 1, HYPRE_MPI_INT, hypre_MPI_MIN, hcomm ); + hypre_MPI_Allreduce( &value, &min, 1, HYPRE_MPI_INT, hypre_MPI_MIN, hypre_MPI_Context ); return min; } @@ -59,8 +57,8 @@ HYPRE_Int hypre_GlobalSEMin(HYPRE_Int value, MPI_Comm hypre_MPI_Context) HYPRE_Int hypre_GlobalSESum(HYPRE_Int value, MPI_Comm hypre_MPI_Context) { HYPRE_Int sum; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_Context); - hypre_MPI_Allreduce( &value, &sum, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm ); + + hypre_MPI_Allreduce( &value, &sum, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hypre_MPI_Context ); return sum; } @@ -71,8 +69,7 @@ HYPRE_Int hypre_GlobalSESum(HYPRE_Int value, MPI_Comm hypre_MPI_Context) HYPRE_Real hypre_GlobalSEMaxDouble(HYPRE_Real value, MPI_Comm hypre_MPI_Context) { HYPRE_Real max; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_Context); - hypre_MPI_Allreduce( &value, &max, 1, hypre_MPI_REAL, hypre_MPI_MAX, hcomm ); + hypre_MPI_Allreduce( &value, &max, 1, hypre_MPI_REAL, hypre_MPI_MAX, hypre_MPI_Context ); return max; } @@ -83,8 +80,7 @@ HYPRE_Real hypre_GlobalSEMaxDouble(HYPRE_Real value, MPI_Comm hypre_MPI_Context) HYPRE_Real hypre_GlobalSEMinDouble(HYPRE_Real value, MPI_Comm hypre_MPI_Context) { HYPRE_Real min; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_Context); - hypre_MPI_Allreduce( &value, &min, 1, hypre_MPI_REAL, hypre_MPI_MIN, hcomm ); + hypre_MPI_Allreduce( &value, &min, 1, hypre_MPI_REAL, hypre_MPI_MIN, hypre_MPI_Context ); return min; } @@ -95,8 +91,7 @@ HYPRE_Real hypre_GlobalSEMinDouble(HYPRE_Real value, MPI_Comm hypre_MPI_Context) HYPRE_Real hypre_GlobalSESumDouble(HYPRE_Real value, MPI_Comm hypre_MPI_Context) { HYPRE_Real sum; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_Context); - hypre_MPI_Allreduce( &value, &sum, 1, hypre_MPI_REAL, hypre_MPI_SUM, hcomm ); + hypre_MPI_Allreduce( &value, &sum, 1, hypre_MPI_REAL, hypre_MPI_SUM, hypre_MPI_Context ); return sum; } diff --git a/src/distributed_ls/pilut/parilut.c b/src/distributed_ls/pilut/parilut.c index dbb9d4b49f..81ae3959db 100644 --- a/src/distributed_ls/pilut/parilut.c +++ b/src/distributed_ls/pilut/parilut.c @@ -196,7 +196,6 @@ void hypre_ComputeCommInfo(ReduceMatType *rmat, CommInfoType *cinfo, HYPRE_Int * HYPRE_Int *rrowind, *rnbrptr, *rnbrind, *srowind, *snbrind, *snbrptr; hypre_MPI_Status Status ; hypre_MPI_Request *index_requests; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(pilut_comm); #ifdef HYPRE_DEBUG hypre_PrintLine("hypre_ComputeCommInfo", globals); @@ -277,7 +276,7 @@ void hypre_ComputeCommInfo(ReduceMatType *rmat, CommInfoType *cinfo, HYPRE_Int * pilu_send[rnbrind[i]] = rnbrptr[i+1]-rnbrptr[i]; /* The # of rows I need */ hypre_MPI_Alltoall( pilu_send, 1, HYPRE_MPI_INT, - pilu_recv, 1, HYPRE_MPI_INT, hcomm ); + pilu_recv, 1, HYPRE_MPI_INT, pilut_comm ); nsend = 0; snnbr = 0; @@ -309,12 +308,12 @@ void hypre_ComputeCommInfo(ReduceMatType *rmat, CommInfoType *cinfo, HYPRE_Int * /* issue asynchronous recieves */ for (i=0; i 0 ) { /* Something to recv */ hypre_MPI_Irecv( raddr[i]+rdone[i], rnum[i], hypre_MPI_REAL, - rpes[i], TAG, hcomm, &receive_requests[i] ); + rpes[i], TAG, pilut_comm, &receive_requests[i] ); rdone[i] += rnum[i] ; } @@ -133,7 +132,7 @@ void hypre_LDUSolve(DataDistType *ddist, FactorMatType *ldu, HYPRE_Real *x, HYPR gatherbuf[l] = lx[sindex[j]]; hypre_MPI_Send( gatherbuf, l, hypre_MPI_REAL, - spes[i], TAG, hcomm ); + spes[i], TAG, pilut_comm ); auxsptr[i] = j; } @@ -206,7 +205,7 @@ void hypre_LDUSolve(DataDistType *ddist, FactorMatType *ldu, HYPRE_Real *x, HYPR for (i=0; i 0 ) { /* Something to recv */ hypre_MPI_Irecv( raddr[i]+rdone[i], rnum[i], hypre_MPI_REAL, - rpes[i], TAG, hcomm, &receive_requests[ i ] ); + rpes[i], TAG, pilut_comm, &receive_requests[ i ] ); rdone[i] += rnum[i] ; } @@ -219,7 +218,7 @@ void hypre_LDUSolve(DataDistType *ddist, FactorMatType *ldu, HYPRE_Real *x, HYPR gatherbuf[l] = ux[sindex[j]]; hypre_MPI_Send( gatherbuf, l, hypre_MPI_REAL, - spes[i], TAG, hcomm ); + spes[i], TAG, pilut_comm ); auxsptr[i] = j; } @@ -346,7 +345,6 @@ void hypre_SetUpFactor(DataDistType *ddist, FactorMatType *ldu, HYPRE_Int maxnz, hypre_MPI_Status Status; hypre_MPI_Request *receive_requests; hypre_MPI_Datatype MyColType_rnbr; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(pilut_comm); /* data common to L and U */ lnrows = ddist->ddist_lnrows; @@ -409,7 +407,7 @@ void hypre_SetUpFactor(DataDistType *ddist, FactorMatType *ldu, HYPRE_Int maxnz, TriSolveComm->rnbrpes = rnbrpes ; hypre_MPI_Alltoall( petotal, 1, HYPRE_MPI_INT, - lu_recv, 1, HYPRE_MPI_INT, hcomm ); + lu_recv, 1, HYPRE_MPI_INT, pilut_comm ); /* Determine to how many processors you will be sending data */ snbrpes = 0; @@ -461,7 +459,7 @@ void hypre_SetUpFactor(DataDistType *ddist, FactorMatType *ldu, HYPRE_Int maxnz, /* Start asynchronous receives */ for (i=0; i 0) { hypre_MPI_Send( rind+k, petotal[i], HYPRE_MPI_INT , - i, TAG_SetUp_rind, hcomm ); + i, TAG_SetUp_rind, pilut_comm ); /* recv info for hypre_LDUSolve */ raddr[rnbrpes] = x + k + lnrows; @@ -520,7 +518,7 @@ void hypre_SetUpFactor(DataDistType *ddist, FactorMatType *ldu, HYPRE_Int maxnz, for (i=0; i 0) { hypre_MPI_Irecv( rind+k, petotal[i], HYPRE_MPI_INT, - i, TAG_SetUp_reord, hcomm, &receive_requests[i] ); + i, TAG_SetUp_reord, pilut_comm, &receive_requests[i] ); k += petotal[i]; } } @@ -528,7 +526,7 @@ void hypre_SetUpFactor(DataDistType *ddist, FactorMatType *ldu, HYPRE_Int maxnz, /* Write them back to the processors that send them to me */ for (i=0; irnum+i, 1, MyColType_rnbr, - rpes[i], TAG_SetUp_rnum, hcomm, &Status ); + rpes[i], TAG_SetUp_rnum, pilut_comm, &Status ); } hypre_MPI_Type_free( &MyColType_rnbr ); diff --git a/src/parcsr_block_mv/par_csr_block_comm.c b/src/parcsr_block_mv/par_csr_block_comm.c index 27954dc253..af5c08f791 100644 --- a/src/parcsr_block_mv/par_csr_block_comm.c +++ b/src/parcsr_block_mv/par_csr_block_comm.c @@ -46,7 +46,6 @@ hypre_ParCSRBlockCommHandleCreate(HYPRE_Int job, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); j = 0; @@ -61,7 +60,7 @@ hypre_ParCSRBlockCommHandleCreate(HYPRE_Int job, vec_len = (hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start) * bnnz; hypre_MPI_Irecv(&d_recv_data[vec_start * bnnz], vec_len, - HYPRE_MPI_COMPLEX, ip, 0, hcomm, &requests[j++]); + HYPRE_MPI_COMPLEX, ip, 0, comm, &requests[j++]); } for (i = 0; i < num_sends; i++) { @@ -70,7 +69,7 @@ hypre_ParCSRBlockCommHandleCreate(HYPRE_Int job, (hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start) * bnnz; ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); hypre_MPI_Isend(&d_send_data[vec_start * bnnz], vec_len, - HYPRE_MPI_COMPLEX, ip, 0, hcomm, &requests[j++]); + HYPRE_MPI_COMPLEX, ip, 0, comm, &requests[j++]); } break; } @@ -84,7 +83,7 @@ hypre_ParCSRBlockCommHandleCreate(HYPRE_Int job, (hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start) * bnnz; ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); hypre_MPI_Irecv(&d_recv_data[vec_start * bnnz], vec_len, - HYPRE_MPI_COMPLEX, ip, 0, hcomm, &requests[j++]); + HYPRE_MPI_COMPLEX, ip, 0, comm, &requests[j++]); } for (i = 0; i < num_recvs; i++) { @@ -93,7 +92,7 @@ hypre_ParCSRBlockCommHandleCreate(HYPRE_Int job, vec_len = (hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start) * bnnz; hypre_MPI_Isend(&d_send_data[vec_start * bnnz], vec_len, - HYPRE_MPI_COMPLEX, ip, 0, hcomm, &requests[j++]); + HYPRE_MPI_COMPLEX, ip, 0, comm, &requests[j++]); } break; } diff --git a/src/parcsr_block_mv/par_csr_block_interp.c b/src/parcsr_block_mv/par_csr_block_interp.c index 516be92db7..c06b9c05b5 100644 --- a/src/parcsr_block_mv/par_csr_block_interp.c +++ b/src/parcsr_block_mv/par_csr_block_interp.c @@ -141,7 +141,6 @@ hypre_BoomerAMGBuildBlockInterp( hypre_ParCSRBlockMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* num_threads = hypre_NumThreads(); */ num_threads = 1; @@ -152,7 +151,7 @@ hypre_BoomerAMGBuildBlockInterp( hypre_ParCSRBlockMatrix *A, my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -1714,7 +1713,6 @@ hypre_BoomerAMGBuildBlockInterpDiag( hypre_ParCSRBlockMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); if (num_functions > 1) @@ -1724,7 +1722,7 @@ hypre_BoomerAMGBuildBlockInterpDiag( hypre_ParCSRBlockMatrix *A, my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -2842,7 +2840,6 @@ hypre_BoomerAMGBuildBlockInterpRV( hypre_ParCSRBlockMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); if (num_functions > 1) @@ -2852,7 +2849,7 @@ hypre_BoomerAMGBuildBlockInterpRV( hypre_ParCSRBlockMatrix *A, my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -3901,12 +3898,11 @@ hypre_BoomerAMGBuildBlockInterpRV2( hypre_ParCSRBlockMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -4917,7 +4913,6 @@ hypre_BoomerAMGBuildBlockDirInterp( hypre_ParCSRBlockMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); if (num_functions > 1) @@ -4927,7 +4922,7 @@ hypre_BoomerAMGBuildBlockDirInterp( hypre_ParCSRBlockMatrix *A, my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns diff --git a/src/parcsr_block_mv/par_csr_block_matrix.c b/src/parcsr_block_mv/par_csr_block_matrix.c index 31b0f280f3..8e2820f7d9 100644 --- a/src/parcsr_block_mv/par_csr_block_matrix.c +++ b/src/parcsr_block_mv/par_csr_block_matrix.c @@ -163,7 +163,6 @@ HYPRE_Int hypre_ParCSRBlockMatrixSetNumNonzeros( hypre_ParCSRBlockMatrix *matrix) { MPI_Comm comm = hypre_ParCSRBlockMatrixComm(matrix); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(matrix); HYPRE_Int *diag_i = hypre_CSRBlockMatrixI(diag); hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(matrix); @@ -175,7 +174,7 @@ hypre_ParCSRBlockMatrixSetNumNonzeros( hypre_ParCSRBlockMatrix *matrix) local_num_nonzeros = (HYPRE_BigInt)(diag_i[local_num_rows] + offd_i[local_num_rows]); hypre_MPI_Allreduce(&local_num_nonzeros, &total_num_nonzeros, 1, HYPRE_MPI_BIG_INT, - hypre_MPI_SUM, hcomm); + hypre_MPI_SUM, comm); hypre_ParCSRBlockMatrixNumNonzeros(matrix) = total_num_nonzeros; return ierr; @@ -189,7 +188,6 @@ HYPRE_Int hypre_ParCSRBlockMatrixSetDNumNonzeros( hypre_ParCSRBlockMatrix *matrix) { MPI_Comm comm = hypre_ParCSRBlockMatrixComm(matrix); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(matrix); HYPRE_Int *diag_i = hypre_CSRBlockMatrixI(diag); hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(matrix); @@ -201,7 +199,7 @@ hypre_ParCSRBlockMatrixSetDNumNonzeros( hypre_ParCSRBlockMatrix *matrix) local_num_nonzeros = (HYPRE_Real) diag_i[local_num_rows] + (HYPRE_Real) offd_i[local_num_rows]; hypre_MPI_Allreduce(&local_num_nonzeros, &total_num_nonzeros, 1, - HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + HYPRE_MPI_REAL, hypre_MPI_SUM, comm); hypre_ParCSRBlockMatrixDNumNonzeros(matrix) = total_num_nonzeros; return ierr; diff --git a/src/parcsr_block_mv/par_csr_block_rap_communication.c b/src/parcsr_block_mv/par_csr_block_rap_communication.c index 317f249003..5c6ef6886c 100644 --- a/src/parcsr_block_mv/par_csr_block_rap_communication.c +++ b/src/parcsr_block_mv/par_csr_block_rap_communication.c @@ -57,7 +57,6 @@ hypre_GetCommPkgBlockRTFromCommPkgBlockA( hypre_ParCSRBlockMatrix *RT, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*-------------------------------------------------------------------------- * determine num_recvs, recv_procs and recv_vec_starts for RT @@ -121,11 +120,11 @@ hypre_GetCommPkgBlockRTFromCommPkgBlockA( hypre_ParCSRBlockMatrix *RT, j = 0; for (i = 0; i < num_sends_A; i++) - hypre_MPI_Irecv(&change_array[i], 1, HYPRE_MPI_INT, send_procs_A[i], 0, hcomm, + hypre_MPI_Irecv(&change_array[i], 1, HYPRE_MPI_INT, send_procs_A[i], 0, comm, &requests[j++]); for (i = 0; i < num_recvs_A; i++) - hypre_MPI_Isend(&proc_mark[i], 1, HYPRE_MPI_INT, recv_procs_A[i], 0, hcomm, + hypre_MPI_Isend(&proc_mark[i], 1, HYPRE_MPI_INT, recv_procs_A[i], 0, comm, &requests[j++]); hypre_MPI_Waitall(num_requests, requests, status); @@ -169,7 +168,7 @@ hypre_GetCommPkgBlockRTFromCommPkgBlockA( hypre_ParCSRBlockMatrix *RT, vec_start = send_map_starts_RT[i]; vec_len = send_map_starts_RT[i + 1] - vec_start; hypre_MPI_Irecv(&send_big_elmts[vec_start], vec_len, HYPRE_MPI_BIG_INT, - send_procs_RT[i], 0, hcomm, &requests[j++]); + send_procs_RT[i], 0, comm, &requests[j++]); } for (i = 0; i < num_recvs_RT; i++) @@ -177,7 +176,7 @@ hypre_GetCommPkgBlockRTFromCommPkgBlockA( hypre_ParCSRBlockMatrix *RT, vec_start = recv_vec_starts_RT[i]; vec_len = recv_vec_starts_RT[i + 1] - vec_start; hypre_MPI_Isend(&col_map_offd_RT[vec_start], vec_len, HYPRE_MPI_BIG_INT, - recv_procs_RT[i], 0, hcomm, &requests[j++]); + recv_procs_RT[i], 0, comm, &requests[j++]); } hypre_MPI_Waitall(j, requests, status); diff --git a/src/parcsr_ls/amg_hybrid.c b/src/parcsr_ls/amg_hybrid.c index 28ea3a5fcd..5c509cf37e 100644 --- a/src/parcsr_ls/amg_hybrid.c +++ b/src/parcsr_ls/amg_hybrid.c @@ -1581,9 +1581,8 @@ hypre_AMGHybridGetSetupSolveTime( void *AMGhybrid_vdata, t[3] = AMGhybrid_data->solve_time2; MPI_Comm comm = AMGhybrid_data->comm; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); - hypre_MPI_Allreduce(t, time, 4, hypre_MPI_REAL, hypre_MPI_MAX, hcomm); + hypre_MPI_Allreduce(t, time, 4, hypre_MPI_REAL, hypre_MPI_MAX, comm); return hypre_error_flag; } diff --git a/src/parcsr_ls/ams.c b/src/parcsr_ls/ams.c index c19ec09c13..89cce32481 100644 --- a/src/parcsr_ls/ams.c +++ b/src/parcsr_ls/ams.c @@ -2938,7 +2938,6 @@ hypre_AMSSetup(void *solver, ams_data -> A = A; MPI_Comm comm = hypre_ParCSRMatrixComm(A); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Modifications for problems with zero-conductivity regions */ if (ams_data -> interior_nodes) @@ -3124,7 +3123,7 @@ hypre_AMSSetup(void *solver, } lfactor *= 1e-10; /* scaling factor: max|A_ij|*1e-10 */ - hypre_MPI_Allreduce(&lfactor, &factor, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hcomm); + hypre_MPI_Allreduce(&lfactor, &factor, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, comm); } hypre_ParCSRMatrixAdd(factor, A, 1.0, B, &C); @@ -4382,15 +4381,14 @@ hypre_AMSFEISetup(void *solver, HYPRE_Real *x_data, *y_data, *z_data; MPI_Comm comm = hypre_ParCSRMatrixComm(A); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_BigInt vert_part[2], num_global_vert; HYPRE_BigInt vert_start, vert_end; HYPRE_BigInt big_local_vert = (HYPRE_BigInt) num_local_vert; /* Find the processor partitioning of the vertices */ - hypre_MPI_Scan(&big_local_vert, &vert_part[1], 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_local_vert, &vert_part[1], 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); vert_part[0] = vert_part[1] - big_local_vert; - hypre_MPI_Allreduce(&big_local_vert, &num_global_vert, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&big_local_vert, &num_global_vert, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); /* Construct hypre parallel vectors for the vertex coordinates */ x_coord = hypre_ParVectorCreate(comm, num_global_vert, vert_part); diff --git a/src/parcsr_ls/gen_redcs_mat.c b/src/parcsr_ls/gen_redcs_mat.c index b7bb528385..c85a4020b4 100644 --- a/src/parcsr_ls/gen_redcs_mat.c +++ b/src/parcsr_ls/gen_redcs_mat.c @@ -91,7 +91,6 @@ hypre_seqAMGSetup( hypre_ParAMGData *amg_data, HYPRE_BigInt row_starts[2]; hypre_GenerateSubComm(comm, num_rows, &new_comm); - hypre_MPI_Comm hnew_comm = hypre_MPI_CommFromMPI_Comm(new_comm); /*hypre_MPI_Group orig_group, new_group; HYPRE_Int *ranks, new_num_procs, *row_starts; @@ -125,11 +124,11 @@ hypre_seqAMGSetup( hypre_ParAMGData *amg_data, if (redundant) { - hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hnew_comm); + hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm); } else { - hypre_MPI_Gather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, 0, hnew_comm); + hypre_MPI_Gather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, 0, new_comm); } /* alloc space in seq data structure only for participating procs*/ @@ -211,11 +210,11 @@ hypre_seqAMGSetup( hypre_ParAMGData *amg_data, if (redundant) { hypre_MPI_Allgatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, - displs, HYPRE_MPI_INT, hnew_comm ); + displs, HYPRE_MPI_INT, new_comm ); if (num_functions > 1) { hypre_MPI_Allgatherv ( hypre_IntArrayData(dof_func_array[level]), num_rows, HYPRE_MPI_INT, - seq_dof_func, info, displs, HYPRE_MPI_INT, hnew_comm ); + seq_dof_func, info, displs, HYPRE_MPI_INT, new_comm ); HYPRE_BoomerAMGSetDofFunc(coarse_solver, seq_dof_func); } } @@ -223,14 +222,14 @@ hypre_seqAMGSetup( hypre_ParAMGData *amg_data, { if (A_seq_i) hypre_MPI_Gatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, - displs, HYPRE_MPI_INT, 0, hnew_comm ); + displs, HYPRE_MPI_INT, 0, new_comm ); else hypre_MPI_Gatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, A_seq_i, info, - displs, HYPRE_MPI_INT, 0, hnew_comm ); + displs, HYPRE_MPI_INT, 0, new_comm ); if (num_functions > 1) { hypre_MPI_Gatherv ( hypre_IntArrayData(dof_func_array[level]), num_rows, HYPRE_MPI_INT, - seq_dof_func, info, displs, HYPRE_MPI_INT, 0, hnew_comm ); + seq_dof_func, info, displs, HYPRE_MPI_INT, 0, new_comm ); if (my_id == 0) { HYPRE_BoomerAMGSetDofFunc(coarse_solver, seq_dof_func); } } } @@ -268,21 +267,21 @@ hypre_seqAMGSetup( hypre_ParAMGData *amg_data, { hypre_MPI_Allgatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT, A_seq_j, info, displs2, - HYPRE_MPI_INT, hnew_comm ); + HYPRE_MPI_INT, new_comm ); hypre_MPI_Allgatherv ( A_tmp_data, num_nonzeros, HYPRE_MPI_REAL, A_seq_data, info, displs2, - HYPRE_MPI_REAL, hnew_comm ); + HYPRE_MPI_REAL, new_comm ); } else { hypre_MPI_Gatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT, A_seq_j, info, displs2, - HYPRE_MPI_INT, 0, hnew_comm ); + HYPRE_MPI_INT, 0, new_comm ); hypre_MPI_Gatherv ( A_tmp_data, num_nonzeros, HYPRE_MPI_REAL, A_seq_data, info, displs2, - HYPRE_MPI_REAL, 0, hnew_comm ); + HYPRE_MPI_REAL, 0, new_comm ); } hypre_TFree(info, HYPRE_MEMORY_HOST); @@ -359,7 +358,6 @@ hypre_seqAMGCycle( hypre_ParAMGData *amg_data, /* Acquire seq data */ MPI_Comm new_comm = hypre_ParAMGDataNewComm(amg_data); - hypre_MPI_Comm hnew_comm = hypre_MPI_CommFromMPI_Comm(new_comm); HYPRE_Solver coarse_solver = hypre_ParAMGDataCoarseSolver(amg_data); hypre_ParCSRMatrix *A_coarse = hypre_ParAMGDataACoarse(amg_data); hypre_ParVector *F_coarse = hypre_ParAMGDataFCoarse(amg_data); @@ -401,11 +399,11 @@ hypre_seqAMGCycle( hypre_ParAMGData *amg_data, local_info = nf; if (redundant) { - hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hnew_comm); + hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm); } else { - hypre_MPI_Gather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, 0, hnew_comm); + hypre_MPI_Gather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, 0, new_comm); } if (redundant || my_id == 0) @@ -427,11 +425,11 @@ hypre_seqAMGCycle( hypre_ParAMGData *amg_data, if (redundant) hypre_MPI_Allgatherv ( f_data, nf, HYPRE_MPI_REAL, recv_buf, info, displs, - HYPRE_MPI_REAL, hnew_comm ); + HYPRE_MPI_REAL, new_comm ); else hypre_MPI_Gatherv ( f_data, nf, HYPRE_MPI_REAL, recv_buf, info, displs, - HYPRE_MPI_REAL, 0, hnew_comm ); + HYPRE_MPI_REAL, 0, new_comm ); if (redundant || my_id == 0) { @@ -444,14 +442,14 @@ hypre_seqAMGCycle( hypre_ParAMGData *amg_data, { hypre_MPI_Allgatherv ( u_data, n, HYPRE_MPI_REAL, recv_buf, info, displs, - HYPRE_MPI_REAL, hnew_comm ); + HYPRE_MPI_REAL, new_comm ); hypre_TFree(displs, HYPRE_MEMORY_HOST); hypre_TFree(info, HYPRE_MEMORY_HOST); } else hypre_MPI_Gatherv ( u_data, n, HYPRE_MPI_REAL, recv_buf, info, displs, - HYPRE_MPI_REAL, 0, hnew_comm ); + HYPRE_MPI_REAL, 0, new_comm ); /* clean up */ if (redundant || my_id == 0) @@ -481,11 +479,11 @@ hypre_seqAMGCycle( hypre_ParAMGData *amg_data, } hypre_MPI_Scatterv ( local_data, info, displs, HYPRE_MPI_REAL, - u_data, n, HYPRE_MPI_REAL, 0, hnew_comm ); + u_data, n, HYPRE_MPI_REAL, 0, new_comm ); /*if (my_id == 0) local_data = hypre_VectorData(hypre_ParVectorLocalVector(F_coarse)); hypre_MPI_Scatterv ( local_data, info, displs, HYPRE_MPI_REAL, - f_data, n, HYPRE_MPI_REAL, 0, hnew_comm );*/ + f_data, n, HYPRE_MPI_REAL, 0, new_comm );*/ if (my_id == 0) { hypre_TFree(displs, HYPRE_MEMORY_HOST); } hypre_TFree(info, HYPRE_MEMORY_HOST); } @@ -512,7 +510,6 @@ hypre_GenerateSubComm(MPI_Comm comm, HYPRE_Int *list_len; hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (participate) { @@ -523,7 +520,7 @@ hypre_GenerateSubComm(MPI_Comm comm, my_info = 0; } - hypre_MPI_Allreduce(&my_info, &new_num_procs, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&my_info, &new_num_procs, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); if (new_num_procs == 0) { @@ -541,7 +538,7 @@ hypre_GenerateSubComm(MPI_Comm comm, { my_info = my_id; } - hypre_MPI_Allreduce(&my_info, &ranks[2], 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&my_info, &ranks[2], 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); } else { @@ -563,7 +560,7 @@ hypre_GenerateSubComm(MPI_Comm comm, hypre_MPI_Op_create((hypre_MPI_User_function *)hypre_merge_lists, 0, &hypre_MPI_MERGE); - hypre_MPI_Allreduce(info, ranks, list_len[0], HYPRE_MPI_INT, hypre_MPI_MERGE, hcomm); + hypre_MPI_Allreduce(info, ranks, list_len[0], HYPRE_MPI_INT, hypre_MPI_MERGE, comm); hypre_MPI_Op_free (&hypre_MPI_MERGE); diff --git a/src/parcsr_ls/par_2s_interp.c b/src/parcsr_ls/par_2s_interp.c index 66f4c4a735..05594b709e 100644 --- a/src/parcsr_ls/par_2s_interp.c +++ b/src/parcsr_ls/par_2s_interp.c @@ -130,12 +130,11 @@ hypre_BoomerAMGBuildModPartialExtInterpHost( hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } if (my_id == (num_procs - 1)) { total_old_global_cpts = num_old_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); - hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); n_Cpts = num_cpts_global[1] - num_cpts_global[0]; n_old_Cpts = num_old_cpts_global[1] - num_old_cpts_global[0]; @@ -776,12 +775,11 @@ hypre_BoomerAMGBuildModPartialExtPEInterpHost( hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } if (my_id == (num_procs - 1)) { total_old_global_cpts = num_old_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); - hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); n_Cpts = num_cpts_global[1] - num_cpts_global[0]; n_old_Cpts = num_old_cpts_global[1] - num_old_cpts_global[0]; diff --git a/src/parcsr_ls/par_amg_setup.c b/src/parcsr_ls/par_amg_setup.c index 93167a4944..4b8525abbf 100644 --- a/src/parcsr_ls/par_amg_setup.c +++ b/src/parcsr_ls/par_amg_setup.c @@ -237,7 +237,6 @@ hypre_BoomerAMGSetup( void *amg_vdata, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*A_new = hypre_CSRMatrixDeleteZeros(hypre_ParCSRMatrixDiag(A), 1.e-16); hypre_CSRMatrixPrint(A_new, "Atestnew"); */ @@ -1642,7 +1641,7 @@ hypre_BoomerAMGSetup( void *amg_vdata, { coarse_size = coarse_pnts_global[1]; } - hypre_MPI_Bcast(&coarse_size, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&coarse_size, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /* if no coarse-grid, stop coarsening, and set the * coarsest solve to be a single sweep of default smoother or smoother set by user */ @@ -2095,7 +2094,7 @@ hypre_BoomerAMGSetup( void *amg_vdata, { coarse_size = coarse_pnts_global[1]; } - hypre_MPI_Bcast(&coarse_size, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&coarse_size, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } else /* no aggressive coarsening */ { diff --git a/src/parcsr_ls/par_amgdd_helpers.c b/src/parcsr_ls/par_amgdd_helpers.c index 4037560650..26645f35c9 100644 --- a/src/parcsr_ls/par_amgdd_helpers.c +++ b/src/parcsr_ls/par_amgdd_helpers.c @@ -317,9 +317,8 @@ hypre_BoomerAMGDD_FindNeighborProcessors( hypre_ParCSRMatrix *A, hypre_AMGDDCommPkgNumRecvProcs(compGridCommPkg)[level], HYPRE_MEMORY_HOST); for (i = 0; i < hypre_AMGDDCommPkgNumRecvProcs(compGridCommPkg)[level]; i++) { - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Irecv(&(recv_sizes[i]), 1, HYPRE_MPI_INT, - hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 6, hcomm, + hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 6, hypre_MPI_COMM_WORLD, &(requests[request_cnt++])); } HYPRE_Int *send_sizes = hypre_CTAlloc(HYPRE_Int, @@ -333,9 +332,8 @@ hypre_BoomerAMGDD_FindNeighborProcessors( hypre_ParCSRMatrix *A, send_sizes[i]++; } } - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Isend(&(send_sizes[i]), 1, HYPRE_MPI_INT, - hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 6, hcomm, + hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 6, hypre_MPI_COMM_WORLD, &(requests[request_cnt++])); } @@ -358,9 +356,8 @@ hypre_BoomerAMGDD_FindNeighborProcessors( hypre_ParCSRMatrix *A, for (i = 0; i < hypre_AMGDDCommPkgNumRecvProcs(compGridCommPkg)[level]; i++) { recv_buffers[i] = hypre_CTAlloc(HYPRE_Int, recv_sizes[i], HYPRE_MEMORY_HOST); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Irecv(recv_buffers[i], recv_sizes[i], HYPRE_MPI_INT, - hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 7, hcomm, + hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 7, hypre_MPI_COMM_WORLD, &(requests[request_cnt++])); } // Setup and send the send buffers @@ -378,9 +375,8 @@ hypre_BoomerAMGDD_FindNeighborProcessors( hypre_ParCSRMatrix *A, } } - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Isend(send_buffers[i], send_sizes[i], HYPRE_MPI_INT, - hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 7, hcomm, + hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 7, hypre_MPI_COMM_WORLD, &(requests[request_cnt++])); } @@ -452,9 +448,8 @@ hypre_BoomerAMGDD_FindNeighborProcessors( hypre_ParCSRMatrix *A, request_cnt = 0; for (i = 0; i < csr_num_sends; i++) { - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Irecv(&(recv_sizes[i]), 1, HYPRE_MPI_INT, hypre_ParCSRCommPkgSendProc(commPkg, i), 4, - hcomm, &(requests[request_cnt++])); + hypre_MPI_COMM_WORLD, &(requests[request_cnt++])); } for (i = 0; i < csr_num_recvs; i++) { @@ -466,9 +461,8 @@ hypre_BoomerAMGDD_FindNeighborProcessors( hypre_ParCSRMatrix *A, send_sizes[i] += 2 + 2 * num_req_dofs[i][j]; } } - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Isend(&(send_sizes[i]), 1, HYPRE_MPI_INT, hypre_ParCSRCommPkgRecvProc(commPkg, i), 4, - hcomm, &(requests[request_cnt++])); + hypre_MPI_COMM_WORLD, &(requests[request_cnt++])); } // Wait on the recv sizes, then free and re-allocate the requests and statuses @@ -484,9 +478,8 @@ hypre_BoomerAMGDD_FindNeighborProcessors( hypre_ParCSRMatrix *A, for (i = 0; i < csr_num_sends; i++) { recv_buffers[i] = hypre_CTAlloc(HYPRE_Int, recv_sizes[i], HYPRE_MEMORY_HOST); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Irecv(recv_buffers[i], recv_sizes[i], HYPRE_MPI_INT, hypre_ParCSRCommPkgSendProc(commPkg, - i), 5, hcomm, &(requests[request_cnt++])); + i), 5, hypre_MPI_COMM_WORLD, &(requests[request_cnt++])); } // Setup the send buffer and post the sends @@ -509,9 +502,8 @@ hypre_BoomerAMGDD_FindNeighborProcessors( hypre_ParCSRMatrix *A, } } } - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Isend(send_buffers[i], send_sizes[i], HYPRE_MPI_INT, hypre_ParCSRCommPkgRecvProc(commPkg, - i), 5, hcomm, &(requests[request_cnt++])); + i), 5, hypre_MPI_COMM_WORLD, &(requests[request_cnt++])); } // Free the req dof info for (i = 0; i < csr_num_recvs; i++) @@ -2634,16 +2626,14 @@ hypre_BoomerAMGDD_CommunicateRemainingMatrixInfo( hypre_ParAMGDDData* amgdd_data for (proc = 0; proc < num_recv_procs; proc++) { - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Irecv(&(recv_sizes[2 * proc]), 2, HYPRE_MPI_INT, recv_procs[proc], 1, - hcomm, + hypre_MPI_COMM_WORLD, &(size_requests[request_cnt++])); } for (proc = 0; proc < num_send_procs; proc++) { - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Isend(&(send_sizes[2 * proc]), 2, HYPRE_MPI_INT, send_procs[proc], 1, - hcomm, + hypre_MPI_COMM_WORLD, &(size_requests[request_cnt++])); } @@ -2827,11 +2817,10 @@ hypre_BoomerAMGDD_CommunicateRemainingMatrixInfo( hypre_ParAMGDDData* amgdd_data for (proc = 0; proc < num_send_procs; proc++) { - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); hypre_MPI_Isend(int_send_buffers[proc], send_sizes[2 * proc], HYPRE_MPI_INT, send_procs[proc], 2, - hcomm, &(buf_requests[request_cnt++])); + hypre_MPI_COMM_WORLD, &(buf_requests[request_cnt++])); hypre_MPI_Isend(complex_send_buffers[proc], send_sizes[2 * proc + 1], HYPRE_MPI_COMPLEX, - send_procs[proc], 3, hcomm, &(buf_requests[request_cnt++])); + send_procs[proc], 3, hypre_MPI_COMM_WORLD, &(buf_requests[request_cnt++])); } // Wait on buffer sizes @@ -2840,14 +2829,13 @@ hypre_BoomerAMGDD_CommunicateRemainingMatrixInfo( hypre_ParAMGDDData* amgdd_data // Allocate and post recvs for (proc = 0; proc < num_recv_procs; proc++) { - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD); int_recv_buffers[proc] = hypre_CTAlloc(HYPRE_Int, recv_sizes[2 * proc], HYPRE_MEMORY_HOST); complex_recv_buffers[proc] = hypre_CTAlloc(HYPRE_Complex, recv_sizes[2 * proc + 1], HYPRE_MEMORY_HOST); hypre_MPI_Irecv(int_recv_buffers[proc], recv_sizes[2 * proc], HYPRE_MPI_INT, recv_procs[proc], 2, - hcomm, &(buf_requests[request_cnt++])); + hypre_MPI_COMM_WORLD, &(buf_requests[request_cnt++])); hypre_MPI_Irecv(complex_recv_buffers[proc], recv_sizes[2 * proc + 1], HYPRE_MPI_COMPLEX, - recv_procs[proc], 3, hcomm, &(buf_requests[request_cnt++])); + recv_procs[proc], 3, hypre_MPI_COMM_WORLD, &(buf_requests[request_cnt++])); } // Wait on buffers diff --git a/src/parcsr_ls/par_amgdd_setup.c b/src/parcsr_ls/par_amgdd_setup.c index 8a497f0bcd..a844073843 100644 --- a/src/parcsr_ls/par_amgdd_setup.c +++ b/src/parcsr_ls/par_amgdd_setup.c @@ -168,7 +168,6 @@ hypre_BoomerAMGDDSetup( void *amgdd_vdata, for (level = num_levels - 1; level >= amgdd_start_level; level--) { comm = hypre_ParCSRMatrixComm(A_array[level]); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_send_procs = hypre_AMGDDCommPkgNumSendProcs(compGridCommPkg)[level]; num_recv_procs = hypre_AMGDDCommPkgNumRecvProcs(compGridCommPkg)[level]; num_requests = num_send_procs + num_recv_procs; @@ -193,7 +192,7 @@ hypre_BoomerAMGDDSetup( void *amgdd_vdata, for (i = 0; i < num_recv_procs; i++) { hypre_MPI_Irecv(&(recv_buffer_size[level][i]), 1, HYPRE_MPI_INT, - hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 0, hcomm, &(requests[request_counter++])); + hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 0, comm, &(requests[request_counter++])); } } @@ -215,7 +214,7 @@ hypre_BoomerAMGDDSetup( void *amgdd_vdata, for (i = 0; i < num_send_procs; i++) { hypre_MPI_Isend(&(send_buffer_size[level][i]), 1, HYPRE_MPI_INT, - hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 0, hcomm, &(requests[request_counter++])); + hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 0, comm, &(requests[request_counter++])); } } @@ -228,13 +227,13 @@ hypre_BoomerAMGDDSetup( void *amgdd_vdata, { recv_buffer[i] = hypre_CTAlloc(HYPRE_Int, recv_buffer_size[level][i], HYPRE_MEMORY_HOST); hypre_MPI_Irecv(recv_buffer[i], recv_buffer_size[level][i], HYPRE_MPI_INT, - hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 1, hcomm, &(requests[request_counter++])); + hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 1, comm, &(requests[request_counter++])); } for (i = 0; i < num_send_procs; i++) { hypre_MPI_Isend(send_buffer[i], send_buffer_size[level][i], HYPRE_MPI_INT, - hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 1, hcomm, &(requests[request_counter++])); + hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 1, comm, &(requests[request_counter++])); } // Wait for buffers to be received @@ -271,14 +270,14 @@ hypre_BoomerAMGDDSetup( void *amgdd_vdata, { send_flag_buffer[i] = hypre_CTAlloc(HYPRE_Int, send_flag_buffer_size[i], HYPRE_MEMORY_HOST); hypre_MPI_Irecv(send_flag_buffer[i], send_flag_buffer_size[i], HYPRE_MPI_INT, - hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 2, hcomm, &(requests[request_counter++])); + hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 2, comm, &(requests[request_counter++])); } // send the recv_map_send_buffer's for (i = 0; i < num_recv_procs; i++) { hypre_MPI_Isend(recv_map_send_buffer[i], recv_map_send_buffer_size[i], HYPRE_MPI_INT, - hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 2, hcomm, &(requests[request_counter++])); + hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 2, comm, &(requests[request_counter++])); } // wait for maps to be received diff --git a/src/parcsr_ls/par_amgdd_solve.c b/src/parcsr_ls/par_amgdd_solve.c index edb986cca6..cb5c5d092a 100644 --- a/src/parcsr_ls/par_amgdd_solve.c +++ b/src/parcsr_ls/par_amgdd_solve.c @@ -415,7 +415,6 @@ hypre_BoomerAMGDD_ResidualCommunication( hypre_ParAMGDDData *amgdd_data ) { // Get some communication info comm = hypre_ParCSRMatrixComm(A_array[level]); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_size(comm, &num_procs); if (num_procs > 1) @@ -438,7 +437,7 @@ hypre_BoomerAMGDD_ResidualCommunication( hypre_ParAMGDDData *amgdd_data ) recv_buffer_size = hypre_AMGDDCommPkgRecvBufferSize(compGridCommPkg)[level][i]; recv_buffers[i] = hypre_CTAlloc(HYPRE_Complex, recv_buffer_size, HYPRE_MEMORY_HOST); hypre_MPI_Irecv(recv_buffers[i], recv_buffer_size, HYPRE_MPI_COMPLEX, - hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 3, hcomm, &requests[request_counter++]); + hypre_AMGDDCommPkgRecvProcs(compGridCommPkg)[level][i], 3, comm, &requests[request_counter++]); } for (i = 0; i < num_sends; i++) @@ -446,7 +445,7 @@ hypre_BoomerAMGDD_ResidualCommunication( hypre_ParAMGDDData *amgdd_data ) send_buffer_size = hypre_AMGDDCommPkgSendBufferSize(compGridCommPkg)[level][i]; send_buffers[i] = hypre_BoomerAMGDD_PackResidualBuffer(compGrid, compGridCommPkg, level, i); hypre_MPI_Isend(send_buffers[i], send_buffer_size, HYPRE_MPI_COMPLEX, - hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 3, hcomm, &requests[request_counter++]); + hypre_AMGDDCommPkgSendProcs(compGridCommPkg)[level][i], 3, comm, &requests[request_counter++]); } // wait for buffers to be received diff --git a/src/parcsr_ls/par_cgc_coarsen.c b/src/parcsr_ls/par_cgc_coarsen.c index 72ab3acf0c..b4a59737c0 100644 --- a/src/parcsr_ls/par_cgc_coarsen.c +++ b/src/parcsr_ls/par_cgc_coarsen.c @@ -669,7 +669,6 @@ HYPRE_Int hypre_BoomerAMGCoarsenCGC (hypre_ParCSRMatrix *S, HYPRE_Int numbero hypre_MPI_Comm_size (comm, &mpisize); hypre_MPI_Comm_rank (comm, &mpirank); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); #if 0 if (!mpirank) @@ -707,7 +706,7 @@ HYPRE_Int hypre_BoomerAMGCoarsenCGC (hypre_ParCSRMatrix *S, HYPRE_Int numbero anyway, here it is: */ HYPRE_Int nlocal = vertexrange[1] - vertexrange[0]; vertexrange_all = hypre_CTAlloc(HYPRE_Int, mpisize + 1, HYPRE_MEMORY_HOST); - hypre_MPI_Allgather (&nlocal, 1, HYPRE_MPI_INT, vertexrange_all + 1, 1, HYPRE_MPI_INT, hcomm); + hypre_MPI_Allgather (&nlocal, 1, HYPRE_MPI_INT, vertexrange_all + 1, 1, HYPRE_MPI_INT, comm); vertexrange_all[0] = 0; for (j = 2; j <= mpisize; j++) { vertexrange_all[j] += vertexrange_all[j - 1]; } } @@ -833,7 +832,6 @@ HYPRE_Int hypre_AmgCGCPrepare (hypre_ParCSRMatrix *S, HYPRE_Int nlocal, HYPRE_In hypre_MPI_Comm_size (comm, &mpisize); hypre_MPI_Comm_rank (comm, &mpirank); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (!comm_pkg) { @@ -847,7 +845,7 @@ HYPRE_Int hypre_AmgCGCPrepare (hypre_ParCSRMatrix *S, HYPRE_Int nlocal, HYPRE_In HYPRE_Int scan_recv; vertexrange = hypre_CTAlloc(HYPRE_Int, 2, HYPRE_MEMORY_HOST); - hypre_MPI_Scan(&nlocal, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&nlocal, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); /* first point in my range */ vertexrange[0] = scan_recv - nlocal; /* first point in next proc's range */ @@ -960,7 +958,6 @@ hypre_AmgCGCGraphAssemble(hypre_ParCSRMatrix *S, hypre_MPI_Comm_size (comm, &mpisize); hypre_MPI_Comm_rank (comm, &mpirank); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* determine neighbor processors */ num_recvs = hypre_ParCSRCommPkgNumRecvs (comm_pkg); @@ -986,10 +983,10 @@ hypre_AmgCGCGraphAssemble(hypre_ParCSRMatrix *S, for (i = 0; i < num_recvs; i++) { - hypre_MPI_Irecv (pointrange_nonlocal + 2 * i, 2, HYPRE_MPI_INT, recv_procs[i], tag_pointrange, hcomm, + hypre_MPI_Irecv (pointrange_nonlocal + 2 * i, 2, HYPRE_MPI_INT, recv_procs[i], tag_pointrange, comm, &recvrequest[2 * i]); hypre_MPI_Irecv (vertexrange_nonlocal + 2 * i, 2, HYPRE_MPI_INT, recv_procs[i], tag_vertexrange, - hcomm, + comm, &recvrequest[2 * i + 1]); } for (i = 0; i < num_sends; i++) @@ -998,9 +995,9 @@ hypre_AmgCGCGraphAssemble(hypre_ParCSRMatrix *S, int_buf_data[2 * i + 1] = pointrange_end; int_buf_data2[2 * i] = vertexrange_start; int_buf_data2[2 * i + 1] = vertexrange_end; - hypre_MPI_Isend (int_buf_data + 2 * i, 2, HYPRE_MPI_INT, send_procs[i], tag_pointrange, hcomm, + hypre_MPI_Isend (int_buf_data + 2 * i, 2, HYPRE_MPI_INT, send_procs[i], tag_pointrange, comm, &sendrequest[2 * i]); - hypre_MPI_Isend (int_buf_data2 + 2 * i, 2, HYPRE_MPI_INT, send_procs[i], tag_vertexrange, hcomm, + hypre_MPI_Isend (int_buf_data2 + 2 * i, 2, HYPRE_MPI_INT, send_procs[i], tag_vertexrange, comm, &sendrequest[2 * i + 1]); } hypre_MPI_Waitall (2 * (num_sends + num_recvs), sendrequest, hypre_MPI_STATUSES_IGNORE); diff --git a/src/parcsr_ls/par_coarse_parms.c b/src/parcsr_ls/par_coarse_parms.c index d284024291..22b5f2dc0f 100644 --- a/src/parcsr_ls/par_coarse_parms.c +++ b/src/parcsr_ls/par_coarse_parms.c @@ -87,8 +87,7 @@ hypre_BoomerAMGCoarseParmsHost(MPI_Comm comm, { HYPRE_BigInt scan_recv; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); - hypre_MPI_Scan(&local_coarse_size, &scan_recv, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&local_coarse_size, &scan_recv, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); /* first point in my range */ coarse_pnts_global[0] = scan_recv - local_coarse_size; diff --git a/src/parcsr_ls/par_coarsen.c b/src/parcsr_ls/par_coarsen.c index 6cee0438f2..d2cafbb8d4 100644 --- a/src/parcsr_ls/par_coarsen.c +++ b/src/parcsr_ls/par_coarsen.c @@ -167,7 +167,6 @@ hypre_BoomerAMGCoarsen( hypre_ParCSRMatrix *S, if (debug_flag == 3) { wall_time = time_getWallclockSeconds(); } hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (!comm_pkg) { @@ -552,7 +551,7 @@ hypre_BoomerAMGCoarsen( hypre_ParCSRMatrix *S, *------------------------------------------------*/ big_graph_size = (HYPRE_BigInt) graph_size; - hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); if (global_graph_size == 0) { @@ -2190,7 +2189,6 @@ hypre_BoomerAMGCoarsenPMISHost( hypre_ParCSRMatrix *S, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (!comm_pkg) { @@ -2473,7 +2471,7 @@ hypre_BoomerAMGCoarsenPMISHost( hypre_ParCSRMatrix *S, big_graph_size = (HYPRE_BigInt) graph_size; /* stop the coarsening if nothing left to be coarsened */ - hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); /* if (my_id == 0) { hypre_printf("graph size %b\n", global_graph_size); } */ diff --git a/src/parcsr_ls/par_cr.c b/src/parcsr_ls/par_cr.c index d13f5afadc..9d038d36f2 100644 --- a/src/parcsr_ls/par_cr.c +++ b/src/parcsr_ls/par_cr.c @@ -1694,7 +1694,6 @@ hypre_BoomerAMGIndepPMIS( hypre_ParCSRMatrix *S, if (debug_flag == 3) { wall_time = time_getWallclockSeconds(); } hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (!comm_pkg) { @@ -1960,7 +1959,7 @@ hypre_BoomerAMGIndepPMIS( hypre_ParCSRMatrix *S, HYPRE_BigInt big_graph_size = (HYPRE_BigInt) graph_size; /* stop the coarsening if nothing left to be coarsened */ - hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); if (global_graph_size == 0) { @@ -2313,7 +2312,6 @@ hypre_BoomerAMGIndepPMISa( hypre_ParCSRMatrix *S, if (debug_flag == 3) { wall_time = time_getWallclockSeconds(); } hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (!comm_pkg) { @@ -2573,7 +2571,7 @@ hypre_BoomerAMGIndepPMISa( hypre_ParCSRMatrix *S, HYPRE_BigInt big_graph_size = (HYPRE_BigInt) graph_size; /* stop the coarsening if nothing left to be coarsened */ - hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); if (global_graph_size == 0) { @@ -2905,7 +2903,6 @@ hypre_BoomerAMGCoarsenCR( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); @@ -3197,7 +3194,7 @@ hypre_BoomerAMGCoarsenCR( hypre_ParCSRMatrix *A, } } - hypre_MPI_Allreduce(&local_max, &global_max, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hcomm); + hypre_MPI_Allreduce(&local_max, &global_max, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, comm); if (num_functions == 1) /*if(CRaddCpoints == 0)*/ { @@ -3342,7 +3339,7 @@ hypre_BoomerAMGCoarsenCR( hypre_ParCSRMatrix *A, } } nstages += 1; - hypre_MPI_Allreduce(&num_coarse, &global_nc, 1, HYPRE_MPI_INT, hypre_MPI_MAX, hcomm); + hypre_MPI_Allreduce(&num_coarse, &global_nc, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm); } else { diff --git a/src/parcsr_ls/par_gauss_elim.c b/src/parcsr_ls/par_gauss_elim.c index d0ce55570e..4c15f708a6 100644 --- a/src/parcsr_ls/par_gauss_elim.c +++ b/src/parcsr_ls/par_gauss_elim.c @@ -154,7 +154,6 @@ hypre_GaussElimSetup(hypre_ParAMGData *amg_data, /* Generate sub communicator - processes that have nonzero num_rows */ hypre_GenerateSubComm(comm, num_rows, &new_comm); hypre_ParAMGDataNewComm(amg_data) = new_comm; - hypre_MPI_Comm hnew_comm = hypre_MPI_CommFromMPI_Comm(new_comm); if (num_rows) { @@ -177,7 +176,7 @@ hypre_GaussElimSetup(hypre_ParAMGData *amg_data, displs = &comm_info[new_num_procs]; hypre_ParAMGDataCommInfo(amg_data) = comm_info; - hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hnew_comm); + hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm); displs[0] = 0; mat_displs[0] = 0; @@ -216,7 +215,7 @@ hypre_GaussElimSetup(hypre_ParAMGData *amg_data, } hypre_MPI_Allgatherv(A_mat_local, A_mat_local_size, HYPRE_MPI_REAL, A_mat, mat_info, - mat_displs, HYPRE_MPI_REAL, hnew_comm); + mat_displs, HYPRE_MPI_REAL, new_comm); /* Set dense matrix - We store it in row-major format when using hypre's internal Gaussian Elimination or in column-major format if using LAPACK solvers */ @@ -459,7 +458,6 @@ hypre_GaussElimSolve(hypre_ParAMGData *amg_data, HYPRE_Int *displs, *info; HYPRE_Int new_num_procs; - hypre_MPI_Comm hnew_comm = hypre_MPI_CommFromMPI_Comm(new_comm); #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_GS_ELIM_SOLVE] -= hypre_MPI_Wtime(); @@ -535,7 +533,7 @@ hypre_GaussElimSolve(hypre_ParAMGData *amg_data, /* TODO (VPM): Add GPU-aware MPI support to buffers */ hypre_MPI_Allgatherv(f_data_h, num_rows, HYPRE_MPI_REAL, b_data_h, - info, displs, HYPRE_MPI_REAL, hnew_comm); + info, displs, HYPRE_MPI_REAL, new_comm); if (f_data_h != f_data) { diff --git a/src/parcsr_ls/par_gsmg.c b/src/parcsr_ls/par_gsmg.c index 5af8f376dd..da9e3af3ca 100644 --- a/src/parcsr_ls/par_gsmg.c +++ b/src/parcsr_ls/par_gsmg.c @@ -286,8 +286,7 @@ hypre_ParCSRMatrixChooseThresh(hypre_ParCSRMatrix *S) } } - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); - hypre_MPI_Allreduce(&minimax, &minmin, 1, HYPRE_MPI_REAL, hypre_MPI_MIN, hcomm); + hypre_MPI_Allreduce(&minimax, &minmin, 1, HYPRE_MPI_REAL, hypre_MPI_MIN, comm); return minmin; } diff --git a/src/parcsr_ls/par_ilu.c b/src/parcsr_ls/par_ilu.c index 4268647830..5842349707 100644 --- a/src/parcsr_ls/par_ilu.c +++ b/src/parcsr_ls/par_ilu.c @@ -3807,7 +3807,6 @@ hypre_ParCSRMatrixNormFro(hypre_ParCSRMatrix *A, HYPRE_Real *norm_io) HYPRE_Real local_norm = 0.0; HYPRE_Real global_norm; MPI_Comm comm = hypre_ParCSRMatrixComm(A); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); @@ -3821,7 +3820,7 @@ hypre_ParCSRMatrixNormFro(hypre_ParCSRMatrix *A, HYPRE_Real *norm_io) local_norm += global_norm * global_norm; /* do communication to get global total sum */ - hypre_MPI_Allreduce(&local_norm, &global_norm, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_norm, &global_norm, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); *norm_io = hypre_sqrt(global_norm); @@ -3844,7 +3843,6 @@ hypre_ParCSRMatrixResNormFro(hypre_ParCSRMatrix *A, HYPRE_Real *norm_io) HYPRE_Real local_norm = 0.0; HYPRE_Real global_norm; MPI_Comm comm = hypre_ParCSRMatrixComm(A); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); @@ -3860,7 +3858,7 @@ hypre_ParCSRMatrixResNormFro(hypre_ParCSRMatrix *A, HYPRE_Real *norm_io) local_norm += global_norm * global_norm; /* do communication to get global total sum */ - hypre_MPI_Allreduce(&local_norm, &global_norm, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_norm, &global_norm, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); *norm_io = hypre_sqrt(global_norm); return hypre_error_flag; diff --git a/src/parcsr_ls/par_ilu_setup.c b/src/parcsr_ls/par_ilu_setup.c index 80b709b04b..00726b0780 100644 --- a/src/parcsr_ls/par_ilu_setup.c +++ b/src/parcsr_ls/par_ilu_setup.c @@ -137,7 +137,6 @@ hypre_ILUSetup( void *ilu_vdata, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); #if defined(HYPRE_USING_GPU) hypre_CSRMatrixDestroy(matALU_d); matALU_d = NULL; @@ -977,7 +976,7 @@ hypre_ILUSetup( void *ilu_vdata, HYPRE_BigInt global_start, S_total_rows, S_row_starts[2]; HYPRE_BigInt big_m = (HYPRE_BigInt) m; - hypre_MPI_Allreduce(&big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); if (S_total_rows > 0) { @@ -991,7 +990,7 @@ hypre_ILUSetup( void *ilu_vdata, hypre_ParCSRMatrixRowStarts(matA)); hypre_ParVectorInitialize(Ytemp); - hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); S_row_starts[0] = global_start - big_m; S_row_starts[1] = global_start; @@ -1158,7 +1157,7 @@ hypre_ILUSetup( void *ilu_vdata, { nnzBEF += hypre_CSRMatrixNumNonzeros(matF_d); } - hypre_MPI_Allreduce(&nnzBEF, &nnzG, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&nnzBEF, &nnzG, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); if (matS) { hypre_ParCSRMatrixSetDNumNonzeros(matS); @@ -1186,7 +1185,7 @@ hypre_ILUSetup( void *ilu_vdata, { nnzBEF += hypre_CSRMatrixNumNonzeros(matF_d); } - hypre_MPI_Allreduce(&nnzBEF, &nnzG, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&nnzBEF, &nnzG, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); if (matS) { hypre_ParCSRMatrixSetDNumNonzeros(matS); @@ -1215,7 +1214,7 @@ hypre_ILUSetup( void *ilu_vdata, /* borrow i for local nnz of S */ nnzS_offd_local = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(matS)); hypre_MPI_Allreduce(&nnzS_offd_local, &nnzS_offd, 1, HYPRE_MPI_REAL, - hypre_MPI_SUM, hcomm); + hypre_MPI_SUM, comm); nnzS = nnzS * hypre_ParILUDataOperatorComplexity(schur_precond_ilu) + nnzS_offd; break; @@ -1851,7 +1850,6 @@ hypre_ILUSetupRAPILU0Device(hypre_ParCSRMatrix *A, HYPRE_Int test_opt) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_Int *rperm = NULL; HYPRE_Int m = n - nLU; HYPRE_Int i; @@ -1972,13 +1970,13 @@ hypre_ILUSetupRAPILU0Device(hypre_ParCSRMatrix *A, HYPRE_BigInt S_total_rows, S_row_starts[2]; HYPRE_BigInt big_m = (HYPRE_BigInt)m; - hypre_MPI_Allreduce(&big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); if (S_total_rows > 0) { { HYPRE_BigInt global_start; - hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); S_row_starts[0] = global_start - big_m; S_row_starts[1] = global_start; } @@ -2245,7 +2243,6 @@ hypre_ILUSetupMILU0(hypre_ParCSRMatrix *A, */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); comm_pkg = hypre_ParCSRMatrixCommPkg(A); /* setup if not yet built */ @@ -2662,7 +2659,7 @@ hypre_ILUSetupMILU0(hypre_ParCSRMatrix *A, * Check if we need to create Schur complement */ HYPRE_BigInt big_m = (HYPRE_BigInt)m; - hypre_MPI_Allreduce(&big_m, &total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&big_m, &total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); /* only form when total_rows > 0 */ if (total_rows > 0) @@ -2671,7 +2668,7 @@ hypre_ILUSetupMILU0(hypre_ParCSRMatrix *A, /* need to get new column start */ { HYPRE_BigInt global_start; - hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); col_starts[0] = global_start - m; col_starts[1] = global_start; } @@ -2783,7 +2780,7 @@ hypre_ILUSetupMILU0(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) ctrL; - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; matU = hypre_ParCSRMatrixCreate( comm, @@ -2810,7 +2807,7 @@ hypre_ILUSetupMILU0(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) ctrU; - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; /* free memory */ hypre_TFree(wL, HYPRE_MEMORY_HOST); @@ -3398,7 +3395,6 @@ hypre_ILUSetupILUK(hypre_ParCSRMatrix *A, /* set Comm_Pkg if not yet built */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); comm_pkg = hypre_ParCSRMatrixCommPkg(A); if (!comm_pkg) { @@ -3659,7 +3655,7 @@ hypre_ILUSetupILUK(hypre_ParCSRMatrix *A, * Check if we need to create Schur complement */ HYPRE_BigInt big_m = (HYPRE_BigInt)m; - hypre_MPI_Allreduce(&big_m, &total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&big_m, &total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); /* only form when total_rows > 0 */ if ( total_rows > 0 ) { @@ -3667,7 +3663,7 @@ hypre_ILUSetupILUK(hypre_ParCSRMatrix *A, /* need to get new column start */ { HYPRE_BigInt global_start; - hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); col_starts[0] = global_start - m; col_starts[1] = global_start; } @@ -3784,7 +3780,7 @@ hypre_ILUSetupILUK(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (L_diag_i[n]); - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; matU = hypre_ParCSRMatrixCreate( comm, @@ -3810,7 +3806,7 @@ hypre_ILUSetupILUK(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (U_diag_i[n]); - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; /* free */ @@ -3991,7 +3987,6 @@ hypre_ILUSetupILUT(hypre_ParCSRMatrix *A, */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); comm_pkg = hypre_ParCSRMatrixCommPkg(A); /* create if not yet built */ if (!comm_pkg) @@ -4532,7 +4527,7 @@ hypre_ILUSetupILUT(hypre_ParCSRMatrix *A, * Check if we need to create Schur complement */ HYPRE_BigInt big_m = (HYPRE_BigInt)m; - hypre_MPI_Allreduce(&big_m, &total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&big_m, &total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); /* only form when total_rows > 0 */ if ( total_rows > 0 ) @@ -4541,7 +4536,7 @@ hypre_ILUSetupILUT(hypre_ParCSRMatrix *A, /* need to get new column start */ { HYPRE_BigInt global_start; - hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); col_starts[0] = global_start - m; col_starts[1] = global_start; } @@ -4660,7 +4655,7 @@ hypre_ILUSetupILUT(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (L_diag_i[n]); - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; matU = hypre_ParCSRMatrixCreate( comm, @@ -4687,7 +4682,7 @@ hypre_ILUSetupILUT(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (U_diag_i[n]); - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; /* free working array */ @@ -4918,7 +4913,6 @@ hypre_ILUSetupILU0RAS(hypre_ParCSRMatrix *A, * get communication stuffs first */ hypre_MPI_Comm_size(comm, &num_procs); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); comm_pkg = hypre_ParCSRMatrixCommPkg(A); /* Setup if not yet built */ @@ -5409,12 +5403,12 @@ hypre_ILUSetupILU0RAS(hypre_ParCSRMatrix *A, } HYPRE_BigInt big_total_rows = (HYPRE_BigInt)total_rows; - hypre_MPI_Allreduce(&big_total_rows, &global_num_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&big_total_rows, &global_num_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); /* need to get new column start */ { HYPRE_BigInt global_start; - hypre_MPI_Scan(&big_total_rows, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_total_rows, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); col_starts[0] = global_start - total_rows; col_starts[1] = global_start; } @@ -5443,7 +5437,7 @@ hypre_ILUSetupILU0RAS(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) ctrL; - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; matU = hypre_ParCSRMatrixCreate( comm, @@ -5470,7 +5464,7 @@ hypre_ILUSetupILU0RAS(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) ctrU; - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; /* free memory */ hypre_TFree(wL, HYPRE_MEMORY_HOST); @@ -6142,7 +6136,6 @@ hypre_ILUSetupILUKRAS(hypre_ParCSRMatrix *A, /* communication */ hypre_ParCSRCommPkg *comm_pkg; hypre_MPI_Comm_size(comm, &num_procs); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* reverse permutation array */ HYPRE_Int *rperm; @@ -6530,11 +6523,11 @@ hypre_ILUSetupILUKRAS(hypre_ParCSRMatrix *A, */ HYPRE_BigInt big_total_rows = (HYPRE_BigInt)total_rows; hypre_MPI_Allreduce(&big_total_rows, &global_num_rows, 1, HYPRE_MPI_BIG_INT, - hypre_MPI_SUM, hcomm); + hypre_MPI_SUM, comm); /* need to get new column start */ { HYPRE_BigInt global_start; - hypre_MPI_Scan(&big_total_rows, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_total_rows, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); col_starts[0] = global_start - total_rows; col_starts[1] = global_start; } @@ -6562,7 +6555,7 @@ hypre_ILUSetupILUKRAS(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (L_diag_i[total_rows]); - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; matU = hypre_ParCSRMatrixCreate( comm, @@ -6588,7 +6581,7 @@ hypre_ILUSetupILUKRAS(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (U_diag_i[total_rows]); - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; /* free */ @@ -6728,7 +6721,6 @@ hypre_ILUSetupILUTRAS(hypre_ParCSRMatrix *A, * setup communication stuffs first */ hypre_MPI_Comm_size(comm, &num_procs); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); comm_pkg = hypre_ParCSRMatrixCommPkg(A); /* create if not yet built */ if (!comm_pkg) @@ -7474,11 +7466,11 @@ hypre_ILUSetupILUTRAS(hypre_ParCSRMatrix *A, */ HYPRE_BigInt big_total_rows = (HYPRE_BigInt)total_rows; hypre_MPI_Allreduce(&big_total_rows, &global_num_rows, 1, HYPRE_MPI_BIG_INT, - hypre_MPI_SUM, hcomm); + hypre_MPI_SUM, comm); /* need to get new column start */ { HYPRE_BigInt global_start; - hypre_MPI_Scan(&big_total_rows, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_total_rows, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); col_starts[0] = global_start - total_rows; col_starts[1] = global_start; } @@ -7508,7 +7500,7 @@ hypre_ILUSetupILUTRAS(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (L_diag_i[total_rows]); - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; matU = hypre_ParCSRMatrixCreate( comm, @@ -7535,7 +7527,7 @@ hypre_ILUSetupILUTRAS(hypre_ParCSRMatrix *A, } /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (U_diag_i[total_rows]); - hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; /* free working array */ diff --git a/src/parcsr_ls/par_interp.c b/src/parcsr_ls/par_interp.c index 0ef0b7177e..6decad9644 100644 --- a/src/parcsr_ls/par_interp.c +++ b/src/parcsr_ls/par_interp.c @@ -127,12 +127,11 @@ hypre_BoomerAMGBuildInterp( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); //my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -1095,13 +1094,12 @@ hypre_BoomerAMGBuildInterpHE( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); //my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -1968,12 +1966,11 @@ hypre_BoomerAMGBuildDirInterpHost( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); //my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -2766,13 +2763,12 @@ hypre_BoomerAMGBuildInterpModUnk( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); //my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -3927,11 +3923,10 @@ hypre_BoomerAMGBuildInterpOnePntHost( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns diff --git a/src/parcsr_ls/par_lr_interp.c b/src/parcsr_ls/par_lr_interp.c index 79bee76f99..7ca6380a7e 100644 --- a/src/parcsr_ls/par_lr_interp.c +++ b/src/parcsr_ls/par_lr_interp.c @@ -145,11 +145,10 @@ hypre_BoomerAMGBuildStdInterp(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); if (!comm_pkg) { @@ -1147,11 +1146,10 @@ hypre_BoomerAMGBuildExtPIInterpHost(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); if (!comm_pkg) { @@ -2046,11 +2044,10 @@ hypre_BoomerAMGBuildExtPICCInterp(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); if (!comm_pkg) { @@ -3036,11 +3033,10 @@ hypre_BoomerAMGBuildFFInterp(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); if (!comm_pkg) { @@ -3955,11 +3951,10 @@ hypre_BoomerAMGBuildFF1Interp(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); if (!comm_pkg) { @@ -4891,14 +4886,13 @@ hypre_BoomerAMGBuildExtInterpHost(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); if (!comm_pkg) { diff --git a/src/parcsr_ls/par_lr_restr.c b/src/parcsr_ls/par_lr_restr.c index a783a6d885..1c7ce2a7c3 100644 --- a/src/parcsr_ls/par_lr_restr.c +++ b/src/parcsr_ls/par_lr_restr.c @@ -168,7 +168,6 @@ hypre_BoomerAMGBuildRestrDist2AIR( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*-------------- global number of C points and my start position */ /*my_first_cpt = num_cpts_global[0];*/ @@ -176,7 +175,7 @@ hypre_BoomerAMGBuildRestrDist2AIR( hypre_ParCSRMatrix *A, { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -1717,7 +1716,6 @@ hypre_BoomerAMGBuildRestrNeumannAIRHost( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_MemoryLocation memory_location_R = hypre_ParCSRMatrixMemoryLocation(A); @@ -1727,7 +1725,7 @@ hypre_BoomerAMGBuildRestrNeumannAIRHost( hypre_ParCSRMatrix *A, { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns diff --git a/src/parcsr_ls/par_mgr.c b/src/parcsr_ls/par_mgr.c index 784be6366c..ed3a5c77b8 100644 --- a/src/parcsr_ls/par_mgr.c +++ b/src/parcsr_ls/par_mgr.c @@ -1398,7 +1398,6 @@ hypre_MGRBuildPHost( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); nfpoints = 0; for (i = 0; i < A_nr_of_rows; i++) @@ -1466,7 +1465,7 @@ hypre_MGRBuildPHost( hypre_ParCSRMatrix *A, { nC_global = num_cpts_global[1]; } - hypre_MPI_Bcast(&nC_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&nC_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } /* Construct P from matrix product W_diag */ @@ -1629,7 +1628,6 @@ hypre_MGRBuildP( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); //num_threads = hypre_NumThreads(); // Temporary fix, disable threading // TODO: enable threading @@ -1637,7 +1635,7 @@ hypre_MGRBuildP( hypre_ParCSRMatrix *A, //my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -2209,7 +2207,6 @@ hypre_MGRBuildPDRS( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); //num_threads = hypre_NumThreads(); // Temporary fix, disable threading // TODO: enable threading @@ -2217,7 +2214,7 @@ hypre_MGRBuildPDRS( hypre_ParCSRMatrix *A, //my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -2748,7 +2745,6 @@ hypre_MGRGetAcfCPR(hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); // Count total F-points // Also setup F to C column map @@ -2779,7 +2775,7 @@ hypre_MGRGetAcfCPR(hypre_ParCSRMatrix *A, //hypre_printf("my_id = %d, cpts_this = %d, cpts_next = %d\n", my_id, num_row_cpts_global[0], num_row_cpts_global[1]); if (my_id == (num_procs - 1)) { total_global_row_cpts = num_row_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_row_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_row_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /* get the number of coarse rows */ hypre_IntArrayData(wrap_cf) = f_marker; @@ -2793,7 +2789,7 @@ hypre_MGRGetAcfCPR(hypre_ParCSRMatrix *A, //hypre_printf("my_id = %d, cpts_this = %d, cpts_next = %d\n", my_id, num_col_fpts_global[0], num_col_fpts_global[1]); if (my_id == (num_procs - 1)) { total_global_col_fpts = num_col_fpts_global[1]; } - hypre_MPI_Bcast(&total_global_col_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_col_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); // First pass: count the nnz of A_CF jj_counter = 0; @@ -3503,10 +3499,9 @@ hypre_MGRBuildInterpApproximateInverse(hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*----------------------------------------------------------------------- * Allocate arrays. @@ -4799,7 +4794,6 @@ hypre_ParCSRMatrixBlockDiagMatrixHost( hypre_ParCSRMatrix *A, hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Sanity check */ if ((num_rows_A > 0) && (num_rows_A < blk_size)) @@ -4837,7 +4831,7 @@ hypre_ParCSRMatrixBlockDiagMatrixHost( hypre_ParCSRMatrix *A, if (CF_marker) { num_rows_big = (HYPRE_BigInt) B_diag_num_rows; - hypre_MPI_Scan(&num_rows_big, &scan_recv, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&num_rows_big, &scan_recv, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); /* first point in my range */ row_starts_B[0] = scan_recv - num_rows_big; @@ -4848,7 +4842,7 @@ hypre_ParCSRMatrixBlockDiagMatrixHost( hypre_ParCSRMatrix *A, { num_rows_B = row_starts_B[1]; } - hypre_MPI_Bcast(&num_rows_B, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&num_rows_B, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } else { @@ -6076,7 +6070,6 @@ hypre_MGRGetSubBlock( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); //num_threads = hypre_NumThreads(); // Temporary fix, disable threading // TODO: enable threading @@ -6095,7 +6088,7 @@ hypre_MGRGetSubBlock( hypre_ParCSRMatrix *A, // my_first_row_cpt = num_row_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_row_cpts = num_row_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_row_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_row_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /* get the number of coarse rows */ hypre_IntArrayData(wrap_cf) = col_cf_marker; @@ -6108,7 +6101,7 @@ hypre_MGRGetSubBlock( hypre_ParCSRMatrix *A, // my_first_col_cpt = num_col_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_col_cpts = num_col_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_col_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_col_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns @@ -6702,7 +6695,6 @@ hypre_MGRDataPrint(void *mgr_vdata) /* Get rank ID */ comm = hypre_ParCSRMatrixComm(par_A); hypre_MPI_Comm_rank(comm, &myid); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Create new "ls_" folder (info_path) */ if (((print_level & HYPRE_MGR_PRINT_INFO_PARAMS) || @@ -6720,7 +6712,7 @@ hypre_MGRDataPrint(void *mgr_vdata) hypre_CreateNextDirOfSequence(topdir, "ls_", &info_path); info_path_length = strlen(info_path) + 1; } - hypre_MPI_Bcast(&info_path_length, 1, HYPRE_MPI_INT, 0, hcomm); + hypre_MPI_Bcast(&info_path_length, 1, HYPRE_MPI_INT, 0, comm); if (info_path_length > 0) { @@ -6734,7 +6726,7 @@ hypre_MGRDataPrint(void *mgr_vdata) hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unable to create info path!"); return hypre_error_flag; } - hypre_MPI_Bcast(info_path, info_path_length, hypre_MPI_CHAR, 0, hcomm); + hypre_MPI_Bcast(info_path, info_path_length, hypre_MPI_CHAR, 0, comm); /* Save info_path */ (mgr_data -> info_path) = info_path; diff --git a/src/parcsr_ls/par_mgr_coarsen.c b/src/parcsr_ls/par_mgr_coarsen.c index 2011f638b0..8d4c6edb72 100644 --- a/src/parcsr_ls/par_mgr_coarsen.c +++ b/src/parcsr_ls/par_mgr_coarsen.c @@ -40,8 +40,7 @@ hypre_MGRCoarseParms(MPI_Comm comm, /* Scan global starts */ sbuffer_send[0] = (HYPRE_BigInt) num_cpts; sbuffer_send[1] = (HYPRE_BigInt) num_fpts; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); - hypre_MPI_Scan(&sbuffer_send, &sbuffer_recv, 2, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&sbuffer_send, &sbuffer_recv, 2, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); /* First points in next processor's range */ row_starts_cpts[1] = sbuffer_recv[0]; diff --git a/src/parcsr_ls/par_mgr_setup.c b/src/parcsr_ls/par_mgr_setup.c index cd0cab53c9..ac789fb3e2 100644 --- a/src/parcsr_ls/par_mgr_setup.c +++ b/src/parcsr_ls/par_mgr_setup.c @@ -2017,7 +2017,6 @@ hypre_MGRSetupFrelaxVcycleData( void *mgr_vdata, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); local_size = hypre_ParCSRMatrixNumRows(A); @@ -2186,7 +2185,7 @@ hypre_MGRSetupFrelaxVcycleData( void *mgr_vdata, { coarse_size = coarse_pnts_global_lvl[1]; } - hypre_MPI_Bcast(&coarse_size, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&coarse_size, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); if (coarse_size == 0) // stop coarsening { diff --git a/src/parcsr_ls/par_mod_lr_interp.c b/src/parcsr_ls/par_mod_lr_interp.c index 987a7c3683..7fc998942c 100644 --- a/src/parcsr_ls/par_mod_lr_interp.c +++ b/src/parcsr_ls/par_mod_lr_interp.c @@ -119,10 +119,9 @@ hypre_BoomerAMGBuildModExtInterpHost(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); n_Cpts = num_cpts_global[1] - num_cpts_global[0]; hypre_ParCSRMatrixGenerateFFFCHost(A, CF_marker, num_cpts_global, S, &As_FC, &As_FF); @@ -702,10 +701,9 @@ hypre_BoomerAMGBuildModExtPIInterpHost(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); n_Cpts = num_cpts_global[1] - num_cpts_global[0]; hypre_ParCSRMatrixGenerateFFFCHost(A, CF_marker, num_cpts_global, S, &As_FC, &As_FF); @@ -1366,10 +1364,9 @@ hypre_BoomerAMGBuildModExtPEInterpHost(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); n_Cpts = num_cpts_global[1] - num_cpts_global[0]; hypre_ParCSRMatrixGenerateFFFCHost(A, CF_marker, num_cpts_global, S, &As_FC, &As_FF); diff --git a/src/parcsr_ls/par_mod_multi_interp.c b/src/parcsr_ls/par_mod_multi_interp.c index a0e1e60c02..1280ba36ba 100644 --- a/src/parcsr_ls/par_mod_multi_interp.c +++ b/src/parcsr_ls/par_mod_multi_interp.c @@ -98,7 +98,6 @@ hypre_BoomerAMGBuildModMultipassHost( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (num_procs > 1) { @@ -106,7 +105,7 @@ hypre_BoomerAMGBuildModMultipassHost( hypre_ParCSRMatrix *A, { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } else { @@ -200,7 +199,7 @@ hypre_BoomerAMGBuildModMultipassHost( hypre_ParCSRMatrix *A, current_pass = 1; num_passes = 1; /* color points according to pass number */ - hypre_MPI_Allreduce(&remaining, &global_remaining, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&remaining, &global_remaining, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); while (global_remaining > 0) { HYPRE_Int remaining_pts = (HYPRE_Int) remaining; @@ -266,7 +265,7 @@ hypre_BoomerAMGBuildModMultipassHost( hypre_ParCSRMatrix *A, hypre_ParCSRCommHandleDestroy(comm_handle); } old_global_remaining = global_remaining; - hypre_MPI_Allreduce(&remaining, &global_remaining, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&remaining, &global_remaining, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); /* if the number of remaining points does not change, we have a situation of isolated areas of * fine points that are not connected to any C-points, and the pass generation process breaks * down. Those points can be ignored, i.e. the corresponding rows in P will just be 0 @@ -597,7 +596,6 @@ hypre_GenerateMultipassPi( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* define P matrices */ @@ -625,15 +623,15 @@ hypre_GenerateMultipassPi( hypre_ParCSRMatrix *A, HYPRE_BigInt big_Fpts; big_Fpts = num_points; - hypre_MPI_Scan(&big_Fpts, f_pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_Fpts, f_pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); f_pts_starts[0] = f_pts_starts[1] - big_Fpts; if (my_id == num_procs - 1) { total_global_fpts = f_pts_starts[1]; total_global_cpts = c_pts_starts[1]; } - hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } else { @@ -924,7 +922,6 @@ hypre_GenerateMultiPi( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* define P matrices */ @@ -952,15 +949,15 @@ hypre_GenerateMultiPi( hypre_ParCSRMatrix *A, HYPRE_BigInt big_Fpts; big_Fpts = num_points; - hypre_MPI_Scan(&big_Fpts, f_pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_Fpts, f_pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); f_pts_starts[0] = f_pts_starts[1] - big_Fpts; if (my_id == num_procs - 1) { total_global_fpts = f_pts_starts[1]; total_global_cpts = c_pts_starts[1]; } - hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } else { diff --git a/src/parcsr_ls/par_multi_interp.c b/src/parcsr_ls/par_multi_interp.c index 10342ba7b6..eba0f98daf 100644 --- a/src/parcsr_ls/par_multi_interp.c +++ b/src/parcsr_ls/par_multi_interp.c @@ -206,12 +206,11 @@ hypre_BoomerAMGBuildMultipassHost( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; /* total_global_cpts = 0; */ if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); if (!comm_pkg) { @@ -528,7 +527,7 @@ hypre_BoomerAMGBuildMultipassHost( hypre_ParCSRMatrix *A, pass = 2; local_pass_array_size = (HYPRE_BigInt)(pass_array_size - cnt); hypre_MPI_Allreduce(&local_pass_array_size, &global_pass_array_size, 1, HYPRE_MPI_BIG_INT, - hypre_MPI_SUM, hcomm); + hypre_MPI_SUM, comm); while (global_pass_array_size && pass < max_num_passes) { for (i = pass_array_size - 1; i > cnt - 1; i--) @@ -569,7 +568,7 @@ hypre_BoomerAMGBuildMultipassHost( hypre_ParCSRMatrix *A, local_pass_array_size = (HYPRE_BigInt)(pass_array_size - cnt); hypre_MPI_Allreduce(&local_pass_array_size, &global_pass_array_size, 1, HYPRE_MPI_BIG_INT, - hypre_MPI_SUM, hcomm); + hypre_MPI_SUM, comm); index = 0; for (i = 0; i < num_sends; i++) { diff --git a/src/parcsr_ls/par_rap_communication.c b/src/parcsr_ls/par_rap_communication.c index 89345e5049..073ad96654 100644 --- a/src/parcsr_ls/par_rap_communication.c +++ b/src/parcsr_ls/par_rap_communication.c @@ -62,7 +62,6 @@ hypre_GetCommPkgRTFromCommPkgA( hypre_ParCSRMatrix *RT, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*-------------------------------------------------------------------------- * determine num_recvs, recv_procs and recv_vec_starts for RT @@ -178,11 +177,11 @@ hypre_GetCommPkgRTFromCommPkgA( hypre_ParCSRMatrix *RT, j = 0; for (i = 0; i < num_sends_A; i++) - hypre_MPI_Irecv(&change_array[i], 1, HYPRE_MPI_INT, send_procs_A[i], 0, hcomm, + hypre_MPI_Irecv(&change_array[i], 1, HYPRE_MPI_INT, send_procs_A[i], 0, comm, &requests[j++]); for (i = 0; i < num_recvs_A; i++) - hypre_MPI_Isend(&proc_mark[i], 1, HYPRE_MPI_INT, recv_procs_A[i], 0, hcomm, + hypre_MPI_Isend(&proc_mark[i], 1, HYPRE_MPI_INT, recv_procs_A[i], 0, comm, &requests[j++]); hypre_MPI_Waitall(num_requests, requests, status); @@ -228,7 +227,7 @@ hypre_GetCommPkgRTFromCommPkgA( hypre_ParCSRMatrix *RT, vec_start = send_map_starts_RT[i]; vec_len = send_map_starts_RT[i + 1] - vec_start; hypre_MPI_Irecv(&send_big_elmts[vec_start], vec_len, HYPRE_MPI_BIG_INT, - send_procs_RT[i], 0, hcomm, &requests[j++]); + send_procs_RT[i], 0, comm, &requests[j++]); } for (i = 0; i < num_recvs_RT; i++) @@ -236,7 +235,7 @@ hypre_GetCommPkgRTFromCommPkgA( hypre_ParCSRMatrix *RT, vec_start = recv_vec_starts_RT[i]; vec_len = recv_vec_starts_RT[i + 1] - vec_start; hypre_MPI_Isend(&col_map_offd_RT[vec_start], vec_len, HYPRE_MPI_BIG_INT, - recv_procs_RT[i], 0, hcomm, &requests[j++]); + recv_procs_RT[i], 0, comm, &requests[j++]); } hypre_MPI_Waitall(j, requests, status); @@ -279,7 +278,6 @@ hypre_GenerateSendMapAndCommPkg(MPI_Comm comm, HYPRE_Int num_sends, HYPRE_Int nu HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_BigInt first_col_diag = hypre_ParCSRMatrixFirstColDiag(A); HYPRE_BigInt *send_big_elmts = NULL; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*-------------------------------------------------------------------------- * generate send_map_starts and send_map_elmts @@ -291,14 +289,14 @@ hypre_GenerateSendMapAndCommPkg(MPI_Comm comm, HYPRE_Int num_sends, HYPRE_Int nu j = 0; for (i = 0; i < num_sends; i++) { - hypre_MPI_Irecv(&send_map_starts[i + 1], 1, HYPRE_MPI_INT, send_procs[i], 0, hcomm, + hypre_MPI_Irecv(&send_map_starts[i + 1], 1, HYPRE_MPI_INT, send_procs[i], 0, comm, &requests[j++]); } for (i = 0; i < num_recvs; i++) { vec_len = recv_vec_starts[i + 1] - recv_vec_starts[i]; - hypre_MPI_Isend(&vec_len, 1, HYPRE_MPI_INT, recv_procs[i], 0, hcomm, &requests[j++]); + hypre_MPI_Isend(&vec_len, 1, HYPRE_MPI_INT, recv_procs[i], 0, comm, &requests[j++]); } hypre_MPI_Waitall(j, requests, status); @@ -318,7 +316,7 @@ hypre_GenerateSendMapAndCommPkg(MPI_Comm comm, HYPRE_Int num_sends, HYPRE_Int nu vec_start = send_map_starts[i]; vec_len = send_map_starts[i + 1] - vec_start; hypre_MPI_Irecv(&send_big_elmts[vec_start], vec_len, HYPRE_MPI_BIG_INT, - send_procs[i], 0, hcomm, &requests[j++]); + send_procs[i], 0, comm, &requests[j++]); } for (i = 0; i < num_recvs; i++) @@ -326,7 +324,7 @@ hypre_GenerateSendMapAndCommPkg(MPI_Comm comm, HYPRE_Int num_sends, HYPRE_Int nu vec_start = recv_vec_starts[i]; vec_len = recv_vec_starts[i + 1] - vec_start; hypre_MPI_Isend(&col_map_offd[vec_start], vec_len, HYPRE_MPI_BIG_INT, - recv_procs[i], 0, hcomm, &requests[j++]); + recv_procs[i], 0, comm, &requests[j++]); } hypre_MPI_Waitall(j, requests, status); diff --git a/src/parcsr_ls/par_relax.c b/src/parcsr_ls/par_relax.c index d5bf386203..9754fce97d 100644 --- a/src/parcsr_ls/par_relax.c +++ b/src/parcsr_ls/par_relax.c @@ -417,7 +417,6 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (num_procs > 1) { @@ -453,7 +452,7 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A, v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)]; } hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0, - hcomm, &requests[jr++]); + comm, &requests[jr++]); } } hypre_MPI_Waitall(jr, requests, status); @@ -469,7 +468,7 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A, vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0, - hcomm, &requests[jr++]); + comm, &requests[jr++]); } hypre_MPI_Waitall(jr, requests, status); } @@ -566,7 +565,6 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (num_procs > 1) { @@ -623,7 +621,7 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A, v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)]; } hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0, - hcomm, &requests[jr++]); + comm, &requests[jr++]); } } hypre_MPI_Waitall(jr, requests, status); @@ -639,7 +637,7 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A, vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0, - hcomm, &requests[jr++]); + comm, &requests[jr++]); } hypre_MPI_Waitall(jr, requests, status); } diff --git a/src/parcsr_ls/par_relax_more.c b/src/parcsr_ls/par_relax_more.c index 42fb317e31..3d337a5571 100644 --- a/src/parcsr_ls/par_relax_more.c +++ b/src/parcsr_ls/par_relax_more.c @@ -50,7 +50,6 @@ hypre_ParCSRMaxEigEstimateHost( hypre_ParCSRMatrix *A, /* matrix to relax HYPRE_Real send_buf[2], recv_buf[2]; MPI_Comm comm = hypre_ParCSRMatrixComm(A); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A); @@ -106,7 +105,7 @@ hypre_ParCSRMaxEigEstimateHost( hypre_ParCSRMatrix *A, /* matrix to relax /* get e_min e_max across procs */ hypre_MPI_Allreduce(send_buf, recv_buf, 2, HYPRE_MPI_REAL, hypre_MPI_MAX, - hcomm); + comm); e_min = -recv_buf[0]; e_max = recv_buf[1]; diff --git a/src/parcsr_ls/par_restr.c b/src/parcsr_ls/par_restr.c index b967cf93ed..ba1cabd500 100644 --- a/src/parcsr_ls/par_restr.c +++ b/src/parcsr_ls/par_restr.c @@ -113,7 +113,6 @@ hypre_BoomerAMGBuildRestrAIR( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*-------------- global number of C points and my start position */ /*my_first_cpt = num_cpts_global[0];*/ @@ -121,7 +120,7 @@ hypre_BoomerAMGBuildRestrAIR( hypre_ParCSRMatrix *A, { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /*------------------------------------------------------------------- * Get the CF_marker data for the off-processor columns diff --git a/src/parcsr_ls/par_scaled_matnorm.c b/src/parcsr_ls/par_scaled_matnorm.c index b8af4a944a..d4e24e569b 100644 --- a/src/parcsr_ls/par_scaled_matnorm.c +++ b/src/parcsr_ls/par_scaled_matnorm.c @@ -23,7 +23,6 @@ hypre_ParCSRMatrixScaledNorm( hypre_ParCSRMatrix *A, HYPRE_Real *scnorm) hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); MPI_Comm comm = hypre_ParCSRMatrixComm(A); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); HYPRE_Int *diag_i = hypre_CSRMatrixI(diag); HYPRE_Int *diag_j = hypre_CSRMatrixJ(diag); @@ -117,7 +116,7 @@ hypre_ParCSRMatrixScaledNorm( hypre_ParCSRMatrix *A, HYPRE_Real *scnorm) } } - hypre_MPI_Allreduce(&max_row_sum, &mat_norm, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hcomm); + hypre_MPI_Allreduce(&max_row_sum, &mat_norm, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, comm); hypre_ParVectorDestroy(dinvsqrt); hypre_SeqVectorDestroy(sum); diff --git a/src/parcsr_ls/par_stats.c b/src/parcsr_ls/par_stats.c index 94d3a43ecc..c31fdd76db 100644 --- a/src/parcsr_ls/par_stats.c +++ b/src/parcsr_ls/par_stats.c @@ -155,7 +155,6 @@ hypre_BoomerAMGSetupStats( void *amg_vdata, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); num_threads = hypre_NumThreads(); A_array = hypre_ParAMGDataAArray(amg_data); @@ -657,7 +656,7 @@ hypre_BoomerAMGSetupStats( void *amg_vdata, send_buff[2] = - min_rowsum; send_buff[3] = max_rowsum; - hypre_MPI_Reduce(send_buff, gather_buff, 4, HYPRE_MPI_REAL, hypre_MPI_MAX, 0, hcomm); + hypre_MPI_Reduce(send_buff, gather_buff, 4, HYPRE_MPI_REAL, hypre_MPI_MAX, 0, comm); if (my_id == 0) { @@ -953,7 +952,7 @@ hypre_BoomerAMGSetupStats( void *amg_vdata, send_buff[4] = - min_weight; send_buff[5] = max_weight; - hypre_MPI_Reduce(send_buff, gather_buff, 6, HYPRE_MPI_REAL, hypre_MPI_MAX, 0, hcomm); + hypre_MPI_Reduce(send_buff, gather_buff, 6, HYPRE_MPI_REAL, hypre_MPI_MAX, 0, comm); if (my_id == 0) { diff --git a/src/parcsr_ls/par_strength.c b/src/parcsr_ls/par_strength.c index 3d40f34470..dc667cdfef 100644 --- a/src/parcsr_ls/par_strength.c +++ b/src/parcsr_ls/par_strength.c @@ -1539,7 +1539,6 @@ hypre_BoomerAMGCreateSCommPkg(hypre_ParCSRMatrix *A, HYPRE_Int **col_offd_S_to_A_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Status *status; hypre_MPI_Request *requests; hypre_ParCSRCommPkg *comm_pkg_A = hypre_ParCSRMatrixCommPkg(A); @@ -1690,12 +1689,12 @@ hypre_BoomerAMGCreateSCommPkg(hypre_ParCSRMatrix *A, j = 0; for (i = 0; i < num_sends_A; i++) { - hypre_MPI_Irecv(&send_change[i], 1, HYPRE_MPI_INT, send_procs_A[i], 0, hcomm, &requests[j++]); + hypre_MPI_Irecv(&send_change[i], 1, HYPRE_MPI_INT, send_procs_A[i], 0, comm, &requests[j++]); } for (i = 0; i < num_recvs_A; i++) { - hypre_MPI_Isend(&recv_change[i], 1, HYPRE_MPI_INT, recv_procs_A[i], 0, hcomm, &requests[j++]); + hypre_MPI_Isend(&recv_change[i], 1, HYPRE_MPI_INT, recv_procs_A[i], 0, comm, &requests[j++]); } status = hypre_CTAlloc(hypre_MPI_Status, j, HYPRE_MEMORY_HOST); @@ -1891,12 +1890,11 @@ hypre_BoomerAMGCreate2ndSHost( hypre_ParCSRMatrix *S, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = coarse_row_starts[0]; my_last_cpt = coarse_row_starts[1] - 1; if (my_id == (num_procs - 1)) { global_num_coarse = coarse_row_starts[1]; } - hypre_MPI_Bcast(&global_num_coarse, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&global_num_coarse, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); if (num_cols_offd_S) { diff --git a/src/parcsr_ls/par_sv_interp.c b/src/parcsr_ls/par_sv_interp.c index f65689e16c..50759176e4 100644 --- a/src/parcsr_ls/par_sv_interp.c +++ b/src/parcsr_ls/par_sv_interp.c @@ -356,7 +356,6 @@ hypre_BoomerAMG_GMExpandInterp( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &myid); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); #if SV_DEBUG { @@ -1407,7 +1406,7 @@ hypre_BoomerAMG_GMExpandInterp( hypre_ParCSRMatrix *A, new_col_starts[1] = (col_starts[1] / (HYPRE_BigInt)num_functions) * (HYPRE_BigInt)new_nf; if (myid == (num_procs - 1)) { g_nc = new_col_starts[1]; } - hypre_MPI_Bcast(&g_nc, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&g_nc, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } else /* not first level */ { diff --git a/src/parcsr_ls/par_sv_interp_ln.c b/src/parcsr_ls/par_sv_interp_ln.c index 87916350de..336fd2e2ba 100644 --- a/src/parcsr_ls/par_sv_interp_ln.c +++ b/src/parcsr_ls/par_sv_interp_ln.c @@ -223,7 +223,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &myid); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); #if SV_DEBUG @@ -2485,7 +2484,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, { g_nc = new_col_starts[1]; } - hypre_MPI_Bcast(&g_nc, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&g_nc, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } else /* not first level */ { diff --git a/src/parcsr_ls/partial.c b/src/parcsr_ls/partial.c index 15812883d9..3bd452322c 100644 --- a/src/parcsr_ls/partial.c +++ b/src/parcsr_ls/partial.c @@ -137,7 +137,6 @@ hypre_BoomerAMGBuildPartialExtPIInterp(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); max_num_threads = hypre_NumThreads(); my_first_cpt = num_cpts_global[0]; @@ -149,8 +148,8 @@ hypre_BoomerAMGBuildPartialExtPIInterp(hypre_ParCSRMatrix *A, total_global_cpts = num_cpts_global[1]; total_old_global_cpts = num_old_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); - hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); if (!comm_pkg) { @@ -1008,7 +1007,6 @@ hypre_BoomerAMGBuildPartialStdInterp(hypre_ParCSRMatrix *A, /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; /*my_first_old_cpt = num_old_cpts_global[0];*/ @@ -1020,8 +1018,8 @@ hypre_BoomerAMGBuildPartialStdInterp(hypre_ParCSRMatrix *A, total_global_cpts = num_cpts_global[1]; total_old_global_cpts = num_old_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); - hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); if (!comm_pkg) { @@ -2015,7 +2013,6 @@ hypre_BoomerAMGBuildPartialExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker /* BEGIN */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; /*my_first_old_cpt = num_old_cpts_global[0];*/ @@ -2026,8 +2023,8 @@ hypre_BoomerAMGBuildPartialExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker total_global_cpts = num_cpts_global[1]; total_old_global_cpts = num_old_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); - hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_old_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); if (!comm_pkg) { diff --git a/src/parcsr_mv/HYPRE_parcsr_matrix.c b/src/parcsr_mv/HYPRE_parcsr_matrix.c index bd04b4a699..1595425dba 100644 --- a/src/parcsr_mv/HYPRE_parcsr_matrix.c +++ b/src/parcsr_mv/HYPRE_parcsr_matrix.c @@ -147,8 +147,8 @@ HYPRE_ParCSRMatrixGetRowPartitioning( HYPRE_ParCSRMatrix matrix, return hypre_error_flag; } - MPI_Comm comm = hypre_ParCSRMatrixComm((hypre_ParCSRMatrix *) matrix); - hypre_MPI_Comm_size(comm, &num_procs); + hypre_MPI_Comm_size(hypre_ParCSRMatrixComm((hypre_ParCSRMatrix *) matrix), + &num_procs); row_starts = hypre_ParCSRMatrixRowStarts((hypre_ParCSRMatrix *) matrix); if (!row_starts) { return -1; } row_partitioning = hypre_CTAlloc(HYPRE_BigInt, num_procs + 1, HYPRE_MEMORY_HOST); @@ -169,6 +169,7 @@ HYPRE_ParCSRMatrixGetGlobalRowPartitioning( HYPRE_ParCSRMatrix matrix, HYPRE_Int all_procs, HYPRE_BigInt **row_partitioning_ptr ) { + MPI_Comm comm; HYPRE_Int my_id; HYPRE_BigInt *row_partitioning = NULL; @@ -178,8 +179,7 @@ HYPRE_ParCSRMatrixGetGlobalRowPartitioning( HYPRE_ParCSRMatrix matrix, return hypre_error_flag; } - MPI_Comm comm = hypre_ParCSRMatrixComm((hypre_ParCSRMatrix *) matrix); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + comm = hypre_ParCSRMatrixComm((hypre_ParCSRMatrix *) matrix); hypre_MPI_Comm_rank(comm, &my_id); HYPRE_Int num_procs; @@ -195,12 +195,12 @@ HYPRE_ParCSRMatrixGetGlobalRowPartitioning( HYPRE_ParCSRMatrix matrix, if (all_procs) { hypre_MPI_Allgather(&row_start, 1, HYPRE_MPI_BIG_INT, row_partitioning, - 1, HYPRE_MPI_BIG_INT, hcomm); + 1, HYPRE_MPI_BIG_INT, comm); } else { hypre_MPI_Gather(&row_start, 1, HYPRE_MPI_BIG_INT, row_partitioning, - 1, HYPRE_MPI_BIG_INT, 0, hcomm); + 1, HYPRE_MPI_BIG_INT, 0, comm); } if (my_id == 0 || all_procs) @@ -230,8 +230,8 @@ HYPRE_ParCSRMatrixGetColPartitioning( HYPRE_ParCSRMatrix matrix, return hypre_error_flag; } - MPI_Comm comm = hypre_ParCSRMatrixComm((hypre_ParCSRMatrix *) matrix); - hypre_MPI_Comm_size(comm, &num_procs); + hypre_MPI_Comm_size(hypre_ParCSRMatrixComm((hypre_ParCSRMatrix *) matrix), + &num_procs); col_starts = hypre_ParCSRMatrixColStarts((hypre_ParCSRMatrix *) matrix); if (!col_starts) { return -1; } col_partitioning = hypre_CTAlloc(HYPRE_BigInt, num_procs + 1, HYPRE_MEMORY_HOST); diff --git a/src/parcsr_mv/communicationT.c b/src/parcsr_mv/communicationT.c index 33623b2586..f9436d88ac 100644 --- a/src/parcsr_mv/communicationT.c +++ b/src/parcsr_mv/communicationT.c @@ -169,7 +169,6 @@ hypre_MatTCommPkgCreate_core ( HYPRE_BigInt col, kc; HYPRE_Int * recv_sz_buf; HYPRE_Int * row_marker; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); @@ -237,7 +236,7 @@ hypre_MatTCommPkgCreate_core ( num_recvs = num_procs - 1; local_info = num_procs + num_cols_offd + num_cols_diag; - hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hcomm); + hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); /* ---------------------------------------------------------------------- * generate information to be send: tmp contains for each recv_proc: @@ -277,7 +276,7 @@ hypre_MatTCommPkgCreate_core ( hypre_MPI_Allgatherv(tmp, local_info, HYPRE_MPI_BIG_INT, recv_buf, info, displs, HYPRE_MPI_BIG_INT, - hcomm); + comm); /* ---------------------------------------------------------------------- * determine send_procs and actual elements to be send (in send_map_elmts) @@ -426,7 +425,7 @@ hypre_MatTCommPkgCreate_core ( /* scatter-gather num_sends, to set up the size for the main comm. step */ i = 3 * num_sends; - hypre_MPI_Allgather( &i, 1, HYPRE_MPI_INT, all_num_sends3, 1, HYPRE_MPI_INT, hcomm ); + hypre_MPI_Allgather( &i, 1, HYPRE_MPI_INT, all_num_sends3, 1, HYPRE_MPI_INT, comm ); displs[0] = 0; for ( p = 0; p < num_procs; ++p ) { @@ -445,7 +444,7 @@ hypre_MatTCommPkgCreate_core ( }; hypre_MPI_Allgatherv( send_buf, 3 * num_sends, HYPRE_MPI_INT, - recv_sz_buf, all_num_sends3, displs, HYPRE_MPI_INT, hcomm); + recv_sz_buf, all_num_sends3, displs, HYPRE_MPI_INT, comm); recv_vec_starts[0] = 0; j2 = 0; j = 0; diff --git a/src/parcsr_mv/gen_fffc.c b/src/parcsr_mv/gen_fffc.c index 8fc8ff6520..62772e663b 100644 --- a/src/parcsr_mv/gen_fffc.c +++ b/src/parcsr_mv/gen_fffc.c @@ -22,8 +22,7 @@ hypre_ParCSRMatrixGenerateFFFCHost( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **A_FC_ptr, hypre_ParCSRMatrix **A_FF_ptr) { - MPI_Comm comm = hypre_ParCSRMatrixComm(A); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + MPI_Comm comm = hypre_ParCSRMatrixComm(A); HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A); if (!hypre_ParCSRMatrixCommPkg(A)) { @@ -171,15 +170,15 @@ hypre_ParCSRMatrixGenerateFFFCHost( hypre_ParCSRMatrix *A, n_Fpts = fpt_array[num_threads]; big_Fpts = n_Fpts; - hypre_MPI_Scan(&big_Fpts, fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_Fpts, fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); fpts_starts[0] = fpts_starts[1] - big_Fpts; if (my_id == num_procs - 1) { total_global_fpts = fpts_starts[1]; total_global_cpts = cpts_starts[1]; } - hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } #ifdef HYPRE_USING_OPENMP #pragma omp barrier @@ -492,6 +491,10 @@ hypre_ParCSRMatrixGenerateFFFCHost( hypre_ParCSRMatrix *A, * hypre_ParCSRMatrixGenerateFFFC * * Generate AFF or AFC + * + * TODO (VPM): build the communication package of the resulting matrices + * (A_FF and A_FC) from the communication package of the original matrix + * without doing MPI calls. *--------------------------------------------------------------------------*/ HYPRE_Int @@ -532,8 +535,7 @@ hypre_ParCSRMatrixGenerateFFFC3( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **A_FC_ptr, hypre_ParCSRMatrix **A_FF_ptr) { - MPI_Comm comm = hypre_ParCSRMatrixComm(A); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + MPI_Comm comm = hypre_ParCSRMatrixComm(A); HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_ParCSRCommHandle *comm_handle; @@ -689,8 +691,8 @@ hypre_ParCSRMatrixGenerateFFFC3( hypre_ParCSRMatrix *A, big_Fpts = n_Fpts; big_new_Fpts = n_new_Fpts; - hypre_MPI_Scan(&big_Fpts, fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); - hypre_MPI_Scan(&big_new_Fpts, new_fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_Fpts, fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Scan(&big_new_Fpts, new_fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); fpts_starts[0] = fpts_starts[1] - big_Fpts; new_fpts_starts[0] = new_fpts_starts[1] - big_new_Fpts; if (my_id == num_procs - 1) @@ -699,9 +701,9 @@ hypre_ParCSRMatrixGenerateFFFC3( hypre_ParCSRMatrix *A, total_global_fpts = fpts_starts[1]; total_global_cpts = cpts_starts[1]; } - hypre_MPI_Bcast(&total_global_new_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); - hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_new_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } #ifdef HYPRE_USING_OPENMP #pragma omp barrier @@ -1074,8 +1076,7 @@ hypre_ParCSRMatrixGenerateFFFCD3( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **A_FF_ptr, HYPRE_Real **D_lambda_ptr) { - MPI_Comm comm = hypre_ParCSRMatrixComm(A); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + MPI_Comm comm = hypre_ParCSRMatrixComm(A); HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_ParCSRCommHandle *comm_handle; @@ -1230,9 +1231,9 @@ hypre_ParCSRMatrixGenerateFFFCD3( hypre_ParCSRMatrix *A, big_Fpts = n_Fpts; big_new_Fpts = n_new_Fpts; - hypre_MPI_Scan(&big_Fpts, fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_Fpts, fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); hypre_MPI_Scan(&big_new_Fpts, new_fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, - hypre_MPI_SUM, hcomm); + hypre_MPI_SUM, comm); fpts_starts[0] = fpts_starts[1] - big_Fpts; new_fpts_starts[0] = new_fpts_starts[1] - big_new_Fpts; if (my_id == num_procs - 1) @@ -1241,9 +1242,9 @@ hypre_ParCSRMatrixGenerateFFFCD3( hypre_ParCSRMatrix *A, total_global_fpts = fpts_starts[1]; total_global_cpts = cpts_starts[1]; } - hypre_MPI_Bcast(&total_global_new_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); - hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_new_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } #ifdef HYPRE_USING_OPENMP #pragma omp barrier diff --git a/src/parcsr_mv/par_csr_assumed_part.c b/src/parcsr_mv/par_csr_assumed_part.c index c5a06aaf0b..7356168d2a 100644 --- a/src/parcsr_mv/par_csr_assumed_part.c +++ b/src/parcsr_mv/par_csr_assumed_part.c @@ -49,7 +49,6 @@ hypre_LocateAssumedPartition(MPI_Comm comm, HYPRE_BigInt row_start, HYPRE_BigInt hypre_MPI_Request *requests; hypre_MPI_Status status0, *statuses; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_ANNOTATE_FUNC_BEGIN; @@ -180,7 +179,7 @@ hypre_LocateAssumedPartition(MPI_Comm comm, HYPRE_BigInt row_start, HYPRE_BigInt for (i = 0; i < contact_list_length; i++) { hypre_MPI_Isend(&CONTACT(i, 1), 2, HYPRE_MPI_BIG_INT, CONTACT(i, 0), flag1, - hcomm, &requests[i]); + comm, &requests[i]); /*hypre_MPI_COMM_WORLD, &requests[i]);*/ } @@ -262,7 +261,7 @@ hypre_LocateAssumedPartition(MPI_Comm comm, HYPRE_BigInt row_start, HYPRE_BigInt while (rows_found != locate_row_count) { hypre_MPI_Recv( tmp_range, 2, HYPRE_MPI_BIG_INT, hypre_MPI_ANY_SOURCE, - flag1, hcomm, &status0); + flag1, comm, &status0); /*flag1 , hypre_MPI_COMM_WORLD, &status0);*/ if (part->length == part->storage_length) diff --git a/src/parcsr_mv/par_csr_bool_matrix.c b/src/parcsr_mv/par_csr_bool_matrix.c index 296dfc0043..45657bb631 100644 --- a/src/parcsr_mv/par_csr_bool_matrix.c +++ b/src/parcsr_mv/par_csr_bool_matrix.c @@ -351,7 +351,6 @@ HYPRE_Int hypre_ParCSRBooleanMatrixInitialize( hypre_ParCSRBooleanMatrix *matrix HYPRE_Int hypre_ParCSRBooleanMatrixSetNNZ( hypre_ParCSRBooleanMatrix *matrix) { MPI_Comm comm = hypre_ParCSRBooleanMatrix_Get_Comm(matrix); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_CSRBooleanMatrix *diag = hypre_ParCSRBooleanMatrix_Get_Diag(matrix); HYPRE_Int *diag_i = hypre_CSRBooleanMatrix_Get_I(diag); hypre_CSRBooleanMatrix *offd = hypre_ParCSRBooleanMatrix_Get_Offd(matrix); @@ -363,7 +362,7 @@ HYPRE_Int hypre_ParCSRBooleanMatrixSetNNZ( hypre_ParCSRBooleanMatrix *matrix) local_num_nonzeros = diag_i[local_num_rows] + offd_i[local_num_rows]; hypre_MPI_Allreduce(&local_num_nonzeros, &total_num_nonzeros, 1, HYPRE_MPI_INT, - hypre_MPI_SUM, hcomm); + hypre_MPI_SUM, comm); hypre_ParCSRBooleanMatrix_Get_NNZ(matrix) = total_num_nonzeros; return ierr; } @@ -648,8 +647,7 @@ HYPRE_Int hypre_ParCSRBooleanMatrixGetLocalRange(hypre_ParCSRBooleanMatrix *matr HYPRE_Int ierr = 0; HYPRE_Int my_id; - MPI_Comm comm = hypre_ParCSRBooleanMatrix_Get_Comm(matrix); - hypre_MPI_Comm_rank(comm, &my_id ); + hypre_MPI_Comm_rank( hypre_ParCSRBooleanMatrix_Get_Comm(matrix), &my_id ); *row_start = hypre_ParCSRBooleanMatrix_Get_RowStarts(matrix)[ my_id ]; *row_end = hypre_ParCSRBooleanMatrix_Get_RowStarts(matrix)[ my_id + 1 ] - 1; @@ -691,8 +689,7 @@ HYPRE_Int hypre_ParCSRBooleanMatrixGetRow(hypre_ParCSRBooleanMatrix *mat, if (hypre_ParCSRBooleanMatrix_Get_Getrowactive(mat)) { return (-1); } - MPI_Comm comm = hypre_ParCSRBooleanMatrix_Get_Comm(mat); - hypre_MPI_Comm_rank(comm, &my_id); + hypre_MPI_Comm_rank( hypre_ParCSRBooleanMatrix_Get_Comm(mat), &my_id ); hypre_ParCSRBooleanMatrix_Get_Getrowactive(mat) = 1; @@ -852,7 +849,6 @@ hypre_CSRBooleanMatrixToParCSRBooleanMatrix( MPI_Comm comm, HYPRE_Int i, j, ind; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); @@ -863,7 +859,7 @@ hypre_CSRBooleanMatrixToParCSRBooleanMatrix( MPI_Comm comm, a_i = hypre_CSRBooleanMatrix_Get_I(A); a_j = hypre_CSRBooleanMatrix_Get_J(A); } - hypre_MPI_Bcast(global_data, 2, HYPRE_MPI_BIG_INT, 0, hcomm); + hypre_MPI_Bcast(global_data, 2, HYPRE_MPI_BIG_INT, 0, comm); global_num_rows = global_data[0]; global_num_cols = global_data[1]; @@ -890,7 +886,7 @@ hypre_CSRBooleanMatrixToParCSRBooleanMatrix( MPI_Comm comm, local_num_nonzeros[num_procs - 1] = a_i[(HYPRE_Int)global_num_rows] - a_i[(HYPRE_Int)row_starts[num_procs - 1]]; } - hypre_MPI_Scatter(local_num_nonzeros, 1, HYPRE_MPI_INT, &num_nonzeros, 1, HYPRE_MPI_INT, 0, hcomm); + hypre_MPI_Scatter(local_num_nonzeros, 1, HYPRE_MPI_INT, &num_nonzeros, 1, HYPRE_MPI_INT, 0, comm); if (my_id == 0) { num_nonzeros = local_num_nonzeros[0]; } @@ -909,7 +905,7 @@ hypre_CSRBooleanMatrixToParCSRBooleanMatrix( MPI_Comm comm, &a_i[(HYPRE_Int)row_starts[i]], &a_j[ind], &csr_matrix_datatypes[i]); - hypre_MPI_Isend(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[i], i, 0, hcomm, + hypre_MPI_Isend(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[i], i, 0, comm, &requests[j++]); hypre_MPI_Type_free(&csr_matrix_datatypes[i]); } @@ -928,7 +924,7 @@ hypre_CSRBooleanMatrixToParCSRBooleanMatrix( MPI_Comm comm, hypre_CSRBooleanMatrix_Get_I(local_A), hypre_CSRBooleanMatrix_Get_J(local_A), csr_matrix_datatypes); - hypre_MPI_Recv(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[0], 0, 0, hcomm, &status0); + hypre_MPI_Recv(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[0], 0, 0, comm, &status0); hypre_MPI_Type_free(csr_matrix_datatypes); } diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 2cc11175df..dd95406333 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -114,7 +114,6 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_Int num_requests = num_sends + num_recvs; hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); @@ -136,14 +135,14 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, hcomm, requests); + 0, comm, requests); hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), num_sends, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, hcomm, requests + num_recvs); + 0, comm, requests + num_recvs); break; } @@ -162,14 +161,14 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, hcomm, requests); + 0, comm, requests); hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, hcomm, requests + num_sends); + 0, comm, requests + num_sends); break; @@ -285,14 +284,13 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_Int num_requests = num_sends + num_recvs; hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); - hypre_MPI_CommMPI_SendLocation(hcomm) = hypre_GetActualMemLocation(send_memory_location); - hypre_MPI_CommMPI_RecvLocation(hcomm) = hypre_GetActualMemLocation(recv_memory_location); + hypre_MPICommSetSendLocation(comm, hypre_GetActualMemLocation(send_memory_location)); + hypre_MPICommSetRecvLocation(comm, hypre_GetActualMemLocation(recv_memory_location)); switch (hypre_ParCSRCommHandleGetJobType(job)) { @@ -305,22 +303,22 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); - hypre_MPI_CommMPI_SendCopy(hcomm) = hypre_ParCSRCommHandleSendBuffer(comm_handle); - hypre_MPI_CommMPI_RecvCopy(hcomm) = hypre_ParCSRCommHandleRecvBuffer(comm_handle); - hypre_MPI_CommMPI_SendCopyLocation(hcomm) = hypre_ParCSRCommHandleSendBufferLocation(comm_handle); - hypre_MPI_CommMPI_RecvCopyLocation(hcomm) = hypre_ParCSRCommHandleRecvBufferLocation(comm_handle); + hypre_MPICommSetSendCopy(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); + hypre_MPICommSetRecvCopy(comm, hypre_ParCSRCommHandleRecvBuffer(comm_handle)); + hypre_MPICommSetSendCopyLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); + hypre_MPICommSetRecvCopyLocation(comm, hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); hypre_MPI_Irecv_Multiple(recv_data, num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, hcomm, requests); + 0, comm, requests); hypre_MPI_Isend_Multiple(send_data, num_sends, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, hcomm, requests + num_recvs); + 0, comm, requests + num_recvs); break; } @@ -334,22 +332,22 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); - hypre_MPI_CommMPI_SendCopy(hcomm) = hypre_ParCSRCommHandleSendBuffer(comm_handle); - hypre_MPI_CommMPI_RecvCopy(hcomm) = hypre_ParCSRCommHandleRecvBuffer(comm_handle); - hypre_MPI_CommMPI_SendCopyLocation(hcomm) = hypre_ParCSRCommHandleSendBufferLocation(comm_handle); - hypre_MPI_CommMPI_RecvCopyLocation(hcomm) = hypre_ParCSRCommHandleRecvBufferLocation(comm_handle); + hypre_MPICommSetSendCopy(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); + hypre_MPICommSetRecvCopy(comm, hypre_ParCSRCommHandleRecvBuffer(comm_handle)); + hypre_MPICommSetSendCopyLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); + hypre_MPICommSetRecvCopyLocation(comm, hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); hypre_MPI_Irecv_Multiple(recv_data, num_sends, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, hcomm, requests); + 0, comm, requests); hypre_MPI_Isend_Multiple(send_data, num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, hcomm, requests + num_sends); + 0, comm, requests + num_sends); break; @@ -473,7 +471,6 @@ hypre_ParCSRCommPkgCreate_core( hypre_MPI_Request *requests = NULL; hypre_MPI_Status *status = NULL; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); @@ -534,7 +531,7 @@ hypre_ParCSRCommPkgCreate_core( local_info = 2 * num_recvs; - hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hcomm); + hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); /* ---------------------------------------------------------------------- * generate information to be sent: tmp contains for each recv_proc: @@ -569,7 +566,7 @@ hypre_ParCSRCommPkgCreate_core( } hypre_MPI_Allgatherv(tmp, local_info, HYPRE_MPI_INT, recv_buf, info, - displs, HYPRE_MPI_INT, hcomm); + displs, HYPRE_MPI_INT, comm); /* ---------------------------------------------------------------------- * determine num_sends and number of elements to be sent @@ -630,7 +627,7 @@ hypre_ParCSRCommPkgCreate_core( vec_len = send_map_starts[i + 1] - vec_start; ip = send_procs[i]; hypre_MPI_Irecv(&big_buf_data[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, hcomm, &requests[j++]); + ip, 0, comm, &requests[j++]); } for (i = 0; i < num_recvs; i++) { @@ -638,7 +635,7 @@ hypre_ParCSRCommPkgCreate_core( vec_len = recv_vec_starts[i + 1] - vec_start; ip = recv_procs[i]; hypre_MPI_Isend(&col_map_offd[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, hcomm, &requests[j++]); + ip, 0, comm, &requests[j++]); } if (num_requests) diff --git a/src/parcsr_mv/par_csr_matop.c b/src/parcsr_mv/par_csr_matop.c index d5545425b8..8de1dc9a29 100644 --- a/src/parcsr_mv/par_csr_matop.c +++ b/src/parcsr_mv/par_csr_matop.c @@ -2347,7 +2347,6 @@ hypre_ParCSRMatrixGenSpanningTree( hypre_ParCSRMatrix *G_csr, /* fetch the communication information from */ comm = hypre_ParCSRMatrixComm(G_csr); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_rank(comm, &mypid); hypre_MPI_Comm_size(comm, &nprocs); comm_pkg = hypre_ParCSRMatrixCommPkg(G_csr); @@ -2388,7 +2387,7 @@ hypre_ParCSRMatrixGenSpanningTree( hypre_ParCSRMatrix *G_csr, pgraph_i = hypre_TAlloc(HYPRE_Int, (nprocs + 1), HYPRE_MEMORY_HOST); recv_cnts = hypre_TAlloc(HYPRE_Int, nprocs, HYPRE_MEMORY_HOST); hypre_MPI_Allgather(&n_proc_array, 1, HYPRE_MPI_INT, recv_cnts, 1, - HYPRE_MPI_INT, hcomm); + HYPRE_MPI_INT, comm); pgraph_i[0] = 0; for (i = 1; i <= nprocs; i++) { @@ -2396,7 +2395,7 @@ hypre_ParCSRMatrixGenSpanningTree( hypre_ParCSRMatrix *G_csr, } pgraph_j = hypre_TAlloc(HYPRE_Int, pgraph_i[nprocs], HYPRE_MEMORY_HOST); hypre_MPI_Allgatherv(proc_array, n_proc_array, HYPRE_MPI_INT, pgraph_j, - recv_cnts, pgraph_i, HYPRE_MPI_INT, hcomm); + recv_cnts, pgraph_i, HYPRE_MPI_INT, comm); hypre_TFree(recv_cnts, HYPRE_MEMORY_HOST); /* BFS on the processor graph to determine parent and children */ @@ -2553,7 +2552,6 @@ void hypre_ParCSRMatrixExtractSubmatrices( hypre_ParCSRMatrix *A_csr, A_diag_j = hypre_CSRMatrixJ(A_diag); A_diag_a = hypre_CSRMatrixData(A_diag); comm = hypre_ParCSRMatrixComm(A_csr); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_rank(comm, &mypid); hypre_MPI_Comm_size(comm, &nprocs); if (nprocs > 1) @@ -2569,7 +2567,7 @@ void hypre_ParCSRMatrixExtractSubmatrices( hypre_ParCSRMatrix *A_csr, proc_offsets1 = hypre_TAlloc(HYPRE_Int, (nprocs + 1), HYPRE_MEMORY_HOST); proc_offsets2 = hypre_TAlloc(HYPRE_Int, (nprocs + 1), HYPRE_MEMORY_HOST); hypre_MPI_Allgather(&nindices, 1, HYPRE_MPI_INT, proc_offsets1, 1, - HYPRE_MPI_INT, hcomm); + HYPRE_MPI_INT, comm); k = 0; for (i = 0; i < nprocs; i++) { @@ -2919,7 +2917,6 @@ void hypre_ParCSRMatrixExtractRowSubmatrices( hypre_ParCSRMatrix *A_csr, A_offd_i = hypre_CSRMatrixI(A_offd); A_offd_j = hypre_CSRMatrixJ(A_offd); comm = hypre_ParCSRMatrixComm(A_csr); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_rank(comm, &mypid); hypre_MPI_Comm_size(comm, &nprocs); @@ -2930,7 +2927,7 @@ void hypre_ParCSRMatrixExtractRowSubmatrices( hypre_ParCSRMatrix *A_csr, proc_offsets1 = hypre_TAlloc(HYPRE_Int, (nprocs + 1), HYPRE_MEMORY_HOST); proc_offsets2 = hypre_TAlloc(HYPRE_Int, (nprocs + 1), HYPRE_MEMORY_HOST); hypre_MPI_Allgather(&nindices, 1, HYPRE_MPI_INT, proc_offsets1, 1, - HYPRE_MPI_INT, hcomm); + HYPRE_MPI_INT, comm); k = 0; for (i = 0; i < nprocs; i++) { @@ -4188,7 +4185,8 @@ hypre_ParvecBdiagInvScal( hypre_ParVector *b, hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); - HYPRE_Int i, j, s, block_start, block_end; + HYPRE_Int i, j, s; + HYPRE_BigInt block_start, block_end; HYPRE_BigInt nrow_global = hypre_ParVectorGlobalSize(b); HYPRE_BigInt first_row = hypre_ParVectorFirstIndex(b); HYPRE_BigInt last_row = hypre_ParVectorLastIndex(b); @@ -4220,7 +4218,8 @@ hypre_ParvecBdiagInvScal( hypre_ParVector *b, hypre_ParCSRCommHandle *comm_handle; hypre_ParVector *bnew = hypre_ParVectorCreate( hypre_ParVectorComm(b), - hypre_ParVectorGlobalSize(b), hypre_ParVectorPartitioning(b) ); + hypre_ParVectorGlobalSize(b), + hypre_ParVectorPartitioning(b) ); hypre_ParVectorInitialize(bnew); hypre_Vector *bnew_local = hypre_ParVectorLocalVector(bnew); HYPRE_Complex *bnew_local_data = hypre_VectorData(bnew_local); @@ -5432,14 +5431,13 @@ HYPRE_Real hypre_ParCSRMatrixFnorm( hypre_ParCSRMatrix *A ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_Real f_diag, f_offd, local_result, result; f_diag = hypre_CSRMatrixFnorm(hypre_ParCSRMatrixDiag(A)); f_offd = hypre_CSRMatrixFnorm(hypre_ParCSRMatrixOffd(A)); local_result = f_diag * f_diag + f_offd * f_offd; - hypre_MPI_Allreduce(&local_result, &result, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&local_result, &result, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); return hypre_sqrt(result); } @@ -5457,7 +5455,6 @@ hypre_ParCSRMatrixInfNorm( hypre_ParCSRMatrix *A, HYPRE_Real *norm ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* diag part of A */ hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); @@ -5528,7 +5525,7 @@ hypre_ParCSRMatrixInfNorm( hypre_ParCSRMatrix *A, } #endif - hypre_MPI_Allreduce(&maxsum, norm, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hcomm); + hypre_MPI_Allreduce(&maxsum, norm, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, comm); return hypre_error_flag; } @@ -5692,6 +5689,8 @@ hypre_ExchangeExternalRowsWait(void *vrequest) * * extract submatrix A_{FF}, A_{FC}, A_{CF} or A_{CC} * char job[2] = "FF", "FC", "CF" or "CC" + * + * TODO (VPM): Can we do the same with hypre_ParCSRMatrixGenerateFFFC? *--------------------------------------------------------------------------*/ HYPRE_Int @@ -5703,7 +5702,6 @@ hypre_ParCSRMatrixExtractSubmatrixFC( hypre_ParCSRMatrix *A, HYPRE_Real strength_thresh) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_ParCSRCommHandle *comm_handle; @@ -5761,7 +5759,7 @@ hypre_ParCSRMatrixExtractSubmatrixFC( hypre_ParCSRMatrix *A, { total_global_cpts = cpts_starts[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); nc_local = (HYPRE_Int)(cpts_starts[1] - cpts_starts[0]); } @@ -5777,13 +5775,13 @@ hypre_ParCSRMatrixExtractSubmatrixFC( hypre_ParCSRMatrix *A, } } big_nf_local = (HYPRE_BigInt) nf_local; - hypre_MPI_Scan(&big_nf_local, fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_nf_local, fpts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); fpts_starts[0] = fpts_starts[1] - nf_local; if (my_id == num_procs - 1) { total_global_fpts = fpts_starts[1]; } - hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } if (row_set == -1 && col_set == -1) @@ -5850,7 +5848,8 @@ hypre_ParCSRMatrixExtractSubmatrixFC( hypre_ParCSRMatrix *A, hypre_assert(k == B_ncol_local); num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - send_buf_data = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), + send_buf_data = hypre_TAlloc(HYPRE_BigInt, + hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST); k = 0; for (i = 0; i < num_sends; i++) @@ -5994,7 +5993,7 @@ hypre_ParCSRMatrixExtractSubmatrixFC( hypre_ParCSRMatrix *A, } for (j = A_offd_i[i]; j < A_offd_i[i + 1]; j++) { - HYPRE_Int j1 = sub_idx_offd[A_offd_j[j]]; + HYPRE_Int j1 = (HYPRE_Int) sub_idx_offd[A_offd_j[j]]; if ((j1 != -1) && (hypre_cabs(A_offd_a[j]) > (strength_thresh * maxel))) { hypre_assert(j1 >= 0 && j1 < num_cols_B_offd); @@ -6082,6 +6081,7 @@ hypre_ParCSRMatrixDropSmallEntriesHost( hypre_ParCSRMatrix *A, HYPRE_BigInt first_row = hypre_ParCSRMatrixFirstRowIndex(A); HYPRE_Int nrow_local = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int my_id, num_procs; + /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); @@ -6224,26 +6224,20 @@ hypre_ParCSRMatrixDropSmallEntries( hypre_ParCSRMatrix *A, return hypre_error_flag; } - hypre_GpuProfilingPushRange("ParCSRMatrixDropSmallEntries"); - - HYPRE_Int ierr = 0; - #if defined(HYPRE_USING_GPU) HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) ); if (exec == HYPRE_EXEC_DEVICE) { - ierr = hypre_ParCSRMatrixDropSmallEntriesDevice(A, tol, type); + hypre_ParCSRMatrixDropSmallEntriesDevice(A, tol, type); } else #endif { - ierr = hypre_ParCSRMatrixDropSmallEntriesHost(A, tol, type); + hypre_ParCSRMatrixDropSmallEntriesHost(A, tol, type); } - hypre_GpuProfilingPopRange(); - - return ierr; + return hypre_error_flag; } /*-------------------------------------------------------------------------- @@ -6314,7 +6308,8 @@ hypre_ParCSRMatrixDiagScaleHost( hypre_ParCSRMatrix *par_A, hypre_ParCSRPersistentCommHandle *comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(1, comm_pkg, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - hypre_VectorData(rdbuf) = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvData(comm_handle); + hypre_VectorData(rdbuf) = (HYPRE_Complex *) + hypre_ParCSRCommHandleRecvData(comm_handle); hypre_SeqVectorSetDataOwner(rdbuf, 0); #else @@ -6579,7 +6574,7 @@ hypre_ParCSRDiagScaleVectorHost( hypre_ParCSRMatrix *par_A, } /*-------------------------------------------------------------------------- - * HYPRE_ParCSRDiagScaleVector + * hypre_ParCSRDiagScaleVector *--------------------------------------------------------------------------*/ HYPRE_Int @@ -6587,8 +6582,6 @@ hypre_ParCSRDiagScaleVector( hypre_ParCSRMatrix *par_A, hypre_ParVector *par_y, hypre_ParVector *par_x ) { - hypre_GpuProfilingPushRange("hypre_ParCSRDiagScaleVector"); - /* Local Matrix and Vectors */ hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(par_A); hypre_Vector *x = hypre_ParVectorLocalVector(par_x); @@ -6658,7 +6651,391 @@ hypre_ParCSRDiagScaleVector( hypre_ParCSRMatrix *par_A, hypre_ParCSRDiagScaleVectorHost(par_A, par_y, par_x); } - hypre_GpuProfilingPopRange(); + return hypre_error_flag; +} + +#if 0 +/*-------------------------------------------------------------------------- + * hypre_ParCSRMatrixBlockColSumHost + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixBlockColSumHost( hypre_ParCSRMatrix *A, + hypre_DenseBlockMatrix *B ) +{ + /* ParCSRMatrix A */ + HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A); + + /* A_diag */ + hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); + HYPRE_Complex *A_diag_data = hypre_CSRMatrixData(A_diag); + HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); + HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); + HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); + + /* A_offd */ + hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); + HYPRE_Complex *A_offd_data = hypre_CSRMatrixData(A_offd); + HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); + HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); + HYPRE_Int num_rows_offd_A = hypre_CSRMatrixNumRows(A_offd); + HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd); + + /* Output vector variables */ + HYPRE_Int num_cols_block_B = hypre_DenseBlockMatrixNumColsBlock(B); + + /* Local variables */ + HYPRE_Int i, j, col; + HYPRE_Int ib, ir, jr; + HYPRE_Complex *recv_data; + HYPRE_Complex *send_data; + + /* Communication variables */ + hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); + HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); + HYPRE_Int *send_map_elmts; + HYPRE_Int *send_map_starts; +#if defined(HYPRE_USING_PERSISTENT_COMM) + hypre_ParCSRPersistentCommHandle *comm_handle; +#else + hypre_ParCSRCommHandle *comm_handle; +#endif + + /* Update commpkg offsets */ + hypre_ParCSRCommPkgUpdateVecStarts(comm_pkg, 1, 0, 1); + send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); + send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); + + /* Allocate the recv and send buffers */ +#if defined(HYPRE_USING_PERSISTENT_COMM) + comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_COMM_PKG_JOB_COMPLEX, comm_pkg); + recv_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvDataBuffer(comm_handle); + send_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendDataBuffer(comm_handle); + send_data = hypre_Memset((void *) send_data, 0, + (size_t) (num_cols_offd_A) * sizeof(HYPRE_Complex), + memory_location); +#else + send_data = hypre_CTAlloc(HYPRE_Complex, num_cols_offd_A, memory_location); + recv_data = hypre_TAlloc(HYPRE_Complex, send_map_starts[num_sends], memory_location); +#endif + + /* Pack send data */ + for (i = 0; i < num_rows_offd_A; i++) + { + for (j = A_offd_i[i]; j < A_offd_i[i + 1]; j++) + { + col = A_offd_j[j]; + send_data[col] += A_offd_data[j]; + } + } + + /* Non-blocking communication starts */ +#if defined(HYPRE_USING_PERSISTENT_COMM) + hypre_ParCSRPersistentCommHandleStart(comm_handle, memory_location, send_data); + +#else + comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, + memory_location, send_data, + memory_location, recv_data); +#endif + + /* Overlapped local computation. */ + for (i = 0; i < num_rows_diag_A; i++) + { + ir = i % num_cols_block_B; + for (j = A_diag_i[i]; j < A_diag_i[i + 1]; j++) + { + col = A_diag_j[j]; + ib = col / num_cols_block_B; + jr = col % num_cols_block_B; + + hypre_DenseBlockMatrixDataBIJ(B, ib, ir, jr) += A_diag_data[j]; + } + } + + /* Non-blocking communication ends */ +#if defined(HYPRE_USING_PERSISTENT_COMM) + hypre_ParCSRPersistentCommHandleWait(comm_handle, memory_location, recv_data); +#else + hypre_ParCSRCommHandleDestroy(comm_handle); +#endif + + /* Unpack recv data */ + for (i = send_map_starts[0]; i < send_map_starts[num_sends]; i++) + { + col = send_map_elmts[i]; + ib = col / num_cols_block_B; + ir = col % num_cols_block_B; + jr = i % num_cols_block_B; + + hypre_DenseBlockMatrixDataBIJ(B, ib, ir, jr) += recv_data[i]; + } + + /* Free memory */ +#if !defined(HYPRE_USING_PERSISTENT_COMM) + hypre_TFree(send_data, memory_location); + hypre_TFree(recv_data, memory_location); +#endif return hypre_error_flag; } + +/*-------------------------------------------------------------------------- + * hypre_ParCSRMatrixBlockColSum + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixBlockColSum( hypre_ParCSRMatrix *A, + HYPRE_Int row_major, + HYPRE_Int num_rows_block, + HYPRE_Int num_cols_block, + hypre_DenseBlockMatrix **B_ptr ) +{ + HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A); + HYPRE_BigInt num_rows_A = hypre_ParCSRMatrixGlobalNumRows(A); + HYPRE_BigInt num_cols_A = hypre_ParCSRMatrixGlobalNumCols(A); + + hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); + HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); + HYPRE_Int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag); + + hypre_DenseBlockMatrix *B; + + /*--------------------------------------------- + * Sanity checks + *---------------------------------------------*/ + + if (num_rows_block < 1 || num_cols_block < 1) + { + *B_ptr = NULL; + return hypre_error_flag; + } + + if (num_rows_A % ((HYPRE_BigInt) num_rows_block)) + { + hypre_error_w_msg(HYPRE_ERROR_GENERIC, + "Global number of rows is not divisable by the block dimension"); + return hypre_error_flag; + } + + if (num_cols_A % ((HYPRE_BigInt) num_cols_block)) + { + hypre_error_w_msg(HYPRE_ERROR_GENERIC, + "Global number of columns is not divisable by the block dimension"); + return hypre_error_flag; + } + + HYPRE_ANNOTATE_FUNC_BEGIN; + if (!hypre_ParCSRMatrixCommPkg(A)) + { + hypre_MatvecCommPkgCreate(A); + } + + /*--------------------------------------------- + * Compute block column sum matrix + *---------------------------------------------*/ + + /* Create output matrix */ + B = hypre_DenseBlockMatrixCreate(row_major, + num_rows_diag_A, num_cols_diag_A, + num_rows_block, num_cols_block); + + /* Initialize the output matrix */ + hypre_DenseBlockMatrixInitializeOn(B, memory_location); + +#if defined(HYPRE_USING_GPU) + HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(memory_location); + + if (exec == HYPRE_EXEC_DEVICE) + { + /* TODO (VPM): hypre_ParCSRMatrixColSumReduceDevice */ + hypre_ParCSRMatrixMigrate(A, HYPRE_MEMORY_HOST); + hypre_ParCSRMatrixBlockColSumHost(A, B); + hypre_ParCSRMatrixMigrate(A, HYPRE_MEMORY_DEVICE); + hypre_DenseBlockMatrixMigrate(B, HYPRE_MEMORY_DEVICE); + } + else +#endif + { + hypre_ParCSRMatrixBlockColSumHost(A, B); + } + + /* Set output pointer */ + *B_ptr = B; + + HYPRE_ANNOTATE_FUNC_END; + + return hypre_error_flag; +} + +/*-------------------------------------------------------------------------- + * hypre_ParCSRMatrixColSumHost + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixColSumHost( hypre_ParCSRMatrix *A, + hypre_ParVector *b ) +{ + /* ParCSRMatrix A */ + HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A); + + /* A_diag */ + hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); + HYPRE_Complex *A_diag_data = hypre_CSRMatrixData(A_diag); + HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); + HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); + HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); + + /* A_offd */ + hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); + HYPRE_Complex *A_offd_data = hypre_CSRMatrixData(A_offd); + HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); + HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); + HYPRE_Int num_rows_offd_A = hypre_CSRMatrixNumRows(A_offd); + HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd); + + /* Local variables */ + HYPRE_Int i, j, col; + HYPRE_Complex *recv_data; + HYPRE_Complex *send_data; + + /* Communication variables */ + hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); + HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); + HYPRE_Int *send_map_elmts; + HYPRE_Int *send_map_starts; +#if defined(HYPRE_USING_PERSISTENT_COMM) + hypre_ParCSRPersistentCommHandle *comm_handle; +#else + hypre_ParCSRCommHandle *comm_handle; +#endif + + /* Update commpkg offsets */ + hypre_ParCSRCommPkgUpdateVecStarts(comm_pkg, 1, 0, 1); + send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); + send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); + + /* Allocate the recv and send buffers */ +#if defined(HYPRE_USING_PERSISTENT_COMM) + comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_COMM_PKG_JOB_COMPLEX, comm_pkg); + recv_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvDataBuffer(comm_handle); + send_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendDataBuffer(comm_handle); + send_data = hypre_Memset((void *) send_data, 0, + (size_t) (num_cols_offd_A) * sizeof(HYPRE_Complex), + memory_location); +#else + send_data = hypre_CTAlloc(HYPRE_Complex, num_cols_offd_A, memory_location); + recv_data = hypre_TAlloc(HYPRE_Complex, send_map_starts[num_sends], memory_location); +#endif + + /* Pack send data */ + for (i = 0; i < num_rows_offd_A; i++) + { + for (j = A_offd_i[i]; j < A_offd_i[i + 1]; j++) + { + col = A_offd_j[j]; + send_data[col] += A_offd_data[j]; + } + } + + /* Non-blocking communication starts */ +#if defined(HYPRE_USING_PERSISTENT_COMM) + hypre_ParCSRPersistentCommHandleStart(comm_handle, memory_location, send_data); + +#else + comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, + memory_location, send_data, + memory_location, recv_data); +#endif + + /* Overlapped local computation. */ + for (i = 0; i < num_rows_diag_A; i++) + { + for (j = A_diag_i[i]; j < A_diag_i[i + 1]; j++) + { + col = A_diag_j[j]; + hypre_ParVectorEntryI(b, col) += A_diag_data[j]; + } + } + + /* Non-blocking communication ends */ +#if defined(HYPRE_USING_PERSISTENT_COMM) + hypre_ParCSRPersistentCommHandleWait(comm_handle, memory_location, recv_data); +#else + hypre_ParCSRCommHandleDestroy(comm_handle); +#endif + + /* Unpack recv data */ + for (i = send_map_starts[0]; i < send_map_starts[num_sends]; i++) + { + col = send_map_elmts[i]; + hypre_ParVectorEntryI(b, col) += recv_data[i]; + } + + /* Free memory */ +#if !defined(HYPRE_USING_PERSISTENT_COMM) + hypre_TFree(send_data, memory_location); + hypre_TFree(recv_data, memory_location); +#endif + + return hypre_error_flag; +} + +/*-------------------------------------------------------------------------- + * hypre_ParCSRMatrixColSum + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixColSum( hypre_ParCSRMatrix *A, + hypre_ParVector **b_ptr ) +{ + MPI_Comm comm = hypre_ParCSRMatrixComm(A); + HYPRE_BigInt global_num_cols = hypre_ParCSRMatrixGlobalNumCols(A); + HYPRE_BigInt *col_starts = hypre_ParCSRMatrixColStarts(A); + HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A); + + hypre_ParVector *b; + + HYPRE_ANNOTATE_FUNC_BEGIN; + if (!hypre_ParCSRMatrixCommPkg(A)) + { + hypre_MatvecCommPkgCreate(A); + } + + /* Create output vector */ + b = hypre_ParVectorCreate(comm, global_num_cols, col_starts); + + /* Initialize the output vector */ + hypre_ParVectorInitialize_v2(b, memory_location); + + /*--------------------------------------------- + * Compute column sum vector + *---------------------------------------------*/ + +#if defined(HYPRE_USING_GPU) + HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(memory_location); + + if (exec == HYPRE_EXEC_DEVICE) + { + /* TODO (VPM): hypre_ParCSRMatrixColSumDevice */ + hypre_ParCSRMatrixMigrate(A, HYPRE_MEMORY_HOST); + hypre_ParVectorMigrate(b, HYPRE_MEMORY_HOST); + hypre_ParCSRMatrixColSumHost(A, b); + hypre_ParCSRMatrixMigrate(A, HYPRE_MEMORY_DEVICE); + hypre_ParVectorMigrate(b, HYPRE_MEMORY_DEVICE); + } + else +#endif + { + hypre_ParCSRMatrixColSumHost(A, b); + } + + /* Set output pointer */ + *b_ptr = b; + + HYPRE_ANNOTATE_FUNC_END; + + return hypre_error_flag; +} +#endif + diff --git a/src/parcsr_mv/par_csr_matrix.c b/src/parcsr_mv/par_csr_matrix.c index be3857c903..9dc774d3fa 100644 --- a/src/parcsr_mv/par_csr_matrix.c +++ b/src/parcsr_mv/par_csr_matrix.c @@ -325,7 +325,6 @@ hypre_ParCSRMatrixSetNumNonzeros_core( hypre_ParCSRMatrix *matrix, const char* f } comm = hypre_ParCSRMatrixComm(matrix); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); diag = hypre_ParCSRMatrixDiag(matrix); offd = hypre_ParCSRMatrixOffd(matrix); @@ -342,7 +341,7 @@ hypre_ParCSRMatrixSetNumNonzeros_core( hypre_ParCSRMatrix *matrix, const char* f hypre_CSRMatrixNumNonzeros(offd) ); hypre_MPI_Allreduce(&local_num_nonzeros, &total_num_nonzeros, 1, HYPRE_MPI_BIG_INT, - hypre_MPI_SUM, hcomm); + hypre_MPI_SUM, comm); hypre_ParCSRMatrixNumNonzeros(matrix) = total_num_nonzeros; } @@ -354,7 +353,7 @@ hypre_ParCSRMatrixSetNumNonzeros_core( hypre_ParCSRMatrix *matrix, const char* f hypre_CSRMatrixNumNonzeros(offd) ); hypre_MPI_Allreduce(&local_num_nonzeros, &total_num_nonzeros, 1, - HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + HYPRE_MPI_REAL, hypre_MPI_SUM, comm); hypre_ParCSRMatrixDNumNonzeros(matrix) = total_num_nonzeros; } @@ -394,7 +393,6 @@ HYPRE_Int hypre_ParCSRMatrixSetNumRownnz( hypre_ParCSRMatrix *matrix ) { MPI_Comm comm = hypre_ParCSRMatrixComm(matrix); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(matrix); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(matrix); HYPRE_Int *rownnz_diag = hypre_CSRMatrixRownnz(diag); @@ -429,7 +427,7 @@ hypre_ParCSRMatrixSetNumRownnz( hypre_ParCSRMatrix *matrix ) local_num_rownnz += (HYPRE_BigInt) ((num_rownnz_diag - i) + (num_rownnz_offd - j)); hypre_MPI_Allreduce(&local_num_rownnz, &global_num_rownnz, 1, - HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); hypre_ParCSRMatrixGlobalNumRownnz(matrix) = global_num_rownnz; @@ -1612,7 +1610,6 @@ hypre_CSRMatrixToParCSRMatrix( MPI_Comm comm, hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); total_size = 4; if (my_id == 0) @@ -1686,7 +1683,7 @@ hypre_CSRMatrixToParCSRMatrix( MPI_Comm comm, A_i = hypre_CSRMatrixI(A); A_j = hypre_CSRMatrixJ(A); } - hypre_MPI_Bcast(global_data, 3, HYPRE_MPI_BIG_INT, 0, hcomm); + hypre_MPI_Bcast(global_data, 3, HYPRE_MPI_BIG_INT, 0, comm); global_num_rows = global_data[0]; global_num_cols = global_data[1]; global_size = global_data[2]; @@ -1699,29 +1696,29 @@ hypre_CSRMatrixToParCSRMatrix( MPI_Comm comm, { send_start = 4; hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT, - &row_starts[0], 1, HYPRE_MPI_BIG_INT, 0, hcomm); + &row_starts[0], 1, HYPRE_MPI_BIG_INT, 0, comm); send_start = 5; hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT, - &row_starts[1], 1, HYPRE_MPI_BIG_INT, 0, hcomm); + &row_starts[1], 1, HYPRE_MPI_BIG_INT, 0, comm); send_start = 4 + (num_procs + 1); hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT, - &col_starts[0], 1, HYPRE_MPI_BIG_INT, 0, hcomm); + &col_starts[0], 1, HYPRE_MPI_BIG_INT, 0, comm); send_start = 5 + (num_procs + 1); hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT, - &col_starts[1], 1, HYPRE_MPI_BIG_INT, 0, hcomm); + &col_starts[1], 1, HYPRE_MPI_BIG_INT, 0, comm); } else if ((global_data[3] == 0) || (global_data[3] == 1)) { send_start = 4; hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT, - &row_starts[0], 1, HYPRE_MPI_BIG_INT, 0, hcomm); + &row_starts[0], 1, HYPRE_MPI_BIG_INT, 0, comm); send_start = 5; hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT, - &row_starts[1], 1, HYPRE_MPI_BIG_INT, 0, hcomm); + &row_starts[1], 1, HYPRE_MPI_BIG_INT, 0, comm); if (global_data[3] == 0) { @@ -1733,11 +1730,11 @@ hypre_CSRMatrixToParCSRMatrix( MPI_Comm comm, { send_start = 4; hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT, - &col_starts[0], 1, HYPRE_MPI_BIG_INT, 0, hcomm); + &col_starts[0], 1, HYPRE_MPI_BIG_INT, 0, comm); send_start = 5; hypre_MPI_Scatter(&global_data[send_start], 1, HYPRE_MPI_BIG_INT, - &col_starts[1], 1, HYPRE_MPI_BIG_INT, 0, hcomm); + &col_starts[1], 1, HYPRE_MPI_BIG_INT, 0, comm); } } hypre_TFree(global_data, HYPRE_MEMORY_HOST); @@ -1771,8 +1768,8 @@ hypre_CSRMatrixToParCSRMatrix( MPI_Comm comm, } //num_nonzeros_proc[num_procs-1] = A_i[(HYPRE_Int)global_num_rows] - A_i[(HYPRE_Int)row_starts[num_procs-1]]; } - hypre_MPI_Scatter(num_rows_proc, 1, HYPRE_MPI_INT, &num_rows, 1, HYPRE_MPI_INT, 0, hcomm); - hypre_MPI_Scatter(num_nonzeros_proc, 1, HYPRE_MPI_INT, &num_nonzeros, 1, HYPRE_MPI_INT, 0, hcomm); + hypre_MPI_Scatter(num_rows_proc, 1, HYPRE_MPI_INT, &num_rows, 1, HYPRE_MPI_INT, 0, comm); + hypre_MPI_Scatter(num_nonzeros_proc, 1, HYPRE_MPI_INT, &num_nonzeros, 1, HYPRE_MPI_INT, 0, comm); /* RL: this is not correct: (HYPRE_Int) global_num_cols */ local_A = hypre_CSRMatrixCreate(num_rows, (HYPRE_Int) global_num_cols, num_nonzeros); @@ -1792,7 +1789,7 @@ hypre_CSRMatrixToParCSRMatrix( MPI_Comm comm, &A_i[(HYPRE_Int) global_row_starts[i]], &A_j[ind], &csr_matrix_datatypes[i]); - hypre_MPI_Isend(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[i], i, 0, hcomm, + hypre_MPI_Isend(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[i], i, 0, comm, &requests[i - 1]); hypre_MPI_Type_free(&csr_matrix_datatypes[i]); } @@ -1826,7 +1823,7 @@ hypre_CSRMatrixToParCSRMatrix( MPI_Comm comm, hypre_CSRMatrixI(local_A), hypre_CSRMatrixJ(local_A), &csr_matrix_datatypes[0]); - hypre_MPI_Recv(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[0], 0, 0, hcomm, &status0); + hypre_MPI_Recv(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[0], 0, 0, comm, &status0); hypre_MPI_Type_free(csr_matrix_datatypes); } @@ -2169,7 +2166,6 @@ hypre_ParCSRMatrixToCSRMatrixAll_v2( hypre_ParCSRMatrix *par_matrix, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Clone input matrix to host memory */ par_temp = hypre_ParCSRMatrixClone_v2(par_matrix, 1, HYPRE_MEMORY_HOST); @@ -2236,11 +2232,11 @@ hypre_ParCSRMatrixToCSRMatrixAll_v2( hypre_ParCSRMatrix *par_matrix, if (local_num_rows) { /* look for a message from processor 0 */ - hypre_MPI_Probe(0, tag1, hcomm, &status1); + hypre_MPI_Probe(0, tag1, comm, &status1); hypre_MPI_Get_count(&status1, HYPRE_MPI_INT, &count); send_info = hypre_CTAlloc(HYPRE_Int, count, HYPRE_MEMORY_HOST); - hypre_MPI_Recv(send_info, count, HYPRE_MPI_INT, 0, tag1, hcomm, &status1); + hypre_MPI_Recv(send_info, count, HYPRE_MPI_INT, 0, tag1, comm, &status1); /* now unpack */ num_types = send_info[0]; @@ -2308,7 +2304,7 @@ hypre_ParCSRMatrixToCSRMatrixAll_v2( hypre_ParCSRMatrix *par_matrix, for (i = start; i < num_types; i++) { hypre_MPI_Isend(send_info, count, HYPRE_MPI_INT, used_procs[i], tag1, - hcomm, &requests[i - start]); + comm, &requests[i - start]); } hypre_MPI_Waitall(num_types - start, requests, status); @@ -2350,13 +2346,13 @@ hypre_ParCSRMatrixToCSRMatrixAll_v2( hypre_ParCSRMatrix *par_matrix, proc_id = used_procs[i]; vec_len = (HYPRE_Int)(new_vec_starts[i + 1] - new_vec_starts[i]); hypre_MPI_Irecv(&matrix_i[new_vec_starts[i] + 1], vec_len, HYPRE_MPI_INT, - proc_id, tag2, hcomm, &requests[j++]); + proc_id, tag2, comm, &requests[j++]); } for (i = 0; i < num_types; i++) { proc_id = used_procs[i]; hypre_MPI_Isend(&local_matrix_i[1], local_num_rows, HYPRE_MPI_INT, - proc_id, tag2, hcomm, &requests[j++]); + proc_id, tag2, comm, &requests[j++]); } hypre_MPI_Waitall(j, requests, status); @@ -2389,17 +2385,17 @@ hypre_ParCSRMatrixToCSRMatrixAll_v2( hypre_ParCSRMatrix *par_matrix, start_index = matrix_i[(HYPRE_Int)new_vec_starts[i]]; num_data = matrix_i[(HYPRE_Int)new_vec_starts[i + 1]] - start_index; hypre_MPI_Irecv(&matrix_data[start_index], num_data, HYPRE_MPI_COMPLEX, - used_procs[i], tag1, hcomm, &requests[j++]); + used_procs[i], tag1, comm, &requests[j++]); hypre_MPI_Irecv(&matrix_j[start_index], num_data, HYPRE_MPI_INT, - used_procs[i], tag3, hcomm, &requests[j++]); + used_procs[i], tag3, comm, &requests[j++]); } local_num_nonzeros = local_matrix_i[local_num_rows]; for (i = 0; i < num_types; i++) { hypre_MPI_Isend(local_matrix_data, local_num_nonzeros, HYPRE_MPI_COMPLEX, - used_procs[i], tag1, hcomm, &requests[j++]); + used_procs[i], tag1, comm, &requests[j++]); hypre_MPI_Isend(local_matrix_j, local_num_nonzeros, HYPRE_MPI_INT, - used_procs[i], tag3, hcomm, &requests[j++]); + used_procs[i], tag3, comm, &requests[j++]); } hypre_MPI_Waitall(num_requests, requests, status); diff --git a/src/parcsr_mv/par_csr_matrix_stats.c b/src/parcsr_mv/par_csr_matrix_stats.c index e892f7a846..7925d1a203 100644 --- a/src/parcsr_mv/par_csr_matrix_stats.c +++ b/src/parcsr_mv/par_csr_matrix_stats.c @@ -347,7 +347,6 @@ hypre_ParCSRMatrixStatsArrayCompute(HYPRE_Int num_matrices, /* We assume all MPI communicators are equal */ comm = hypre_ParCSRMatrixComm(matrices[0]); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Allocate MPI buffers */ recvbuffer = hypre_CTAlloc(HYPRE_Real, 4 * num_matrices, HYPRE_MEMORY_HOST); @@ -389,7 +388,7 @@ hypre_ParCSRMatrixStatsArrayCompute(HYPRE_Int num_matrices, } hypre_MPI_Reduce(sendbuffer, recvbuffer, 4 * num_matrices, - HYPRE_MPI_REAL, hypre_MPI_MAX, 0, hcomm); + HYPRE_MPI_REAL, hypre_MPI_MAX, 0, comm); /* Unpack MPI buffers */ for (i = 0; i < num_matrices; i++) @@ -420,7 +419,7 @@ hypre_ParCSRMatrixStatsArrayCompute(HYPRE_Int num_matrices, } hypre_MPI_Reduce(sendbuffer, recvbuffer, 3 * num_matrices, - HYPRE_MPI_REAL, hypre_MPI_SUM, 0, hcomm); + HYPRE_MPI_REAL, hypre_MPI_SUM, 0, comm); /* Unpack MPI buffers */ for (i = 0; i < num_matrices; i++) @@ -467,7 +466,7 @@ hypre_ParCSRMatrixStatsArrayCompute(HYPRE_Int num_matrices, } hypre_MPI_Reduce(sendbuffer, recvbuffer, 2 * num_matrices, - HYPRE_MPI_REAL, hypre_MPI_SUM, 0, hcomm); + HYPRE_MPI_REAL, hypre_MPI_SUM, 0, comm); /* Unpack MPI buffers */ for (i = 0; i < num_matrices; i++) diff --git a/src/parcsr_mv/par_vector.c b/src/parcsr_mv/par_vector.c index 0e77ba677f..79a90eb76f 100644 --- a/src/parcsr_mv/par_vector.c +++ b/src/parcsr_mv/par_vector.c @@ -498,7 +498,6 @@ hypre_ParVectorInnerProd( hypre_ParVector *x, hypre_ParVector *y ) { MPI_Comm comm = hypre_ParVectorComm(x); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); @@ -509,7 +508,7 @@ hypre_ParVectorInnerProd( hypre_ParVector *x, hypre_profile_times[HYPRE_TIMER_ID_ALL_REDUCE] -= hypre_MPI_Wtime(); #endif hypre_MPI_Allreduce(&local_result, &result, 1, HYPRE_MPI_REAL, - hypre_MPI_SUM, hcomm); + hypre_MPI_SUM, comm); #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_ALL_REDUCE] += hypre_MPI_Wtime(); #endif @@ -585,7 +584,6 @@ hypre_VectorToParVector ( MPI_Comm comm, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (my_id == 0) { @@ -595,9 +593,9 @@ hypre_VectorToParVector ( MPI_Comm comm, global_vecstride = hypre_VectorVectorStride(v); } - hypre_MPI_Bcast(&global_size, 1, HYPRE_MPI_BIG_INT, 0, hcomm); - hypre_MPI_Bcast(&num_vectors, 1, HYPRE_MPI_INT, 0, hcomm); - hypre_MPI_Bcast(&global_vecstride, 1, HYPRE_MPI_INT, 0, hcomm); + hypre_MPI_Bcast(&global_size, 1, HYPRE_MPI_BIG_INT, 0, comm); + hypre_MPI_Bcast(&num_vectors, 1, HYPRE_MPI_INT, 0, comm); + hypre_MPI_Bcast(&global_vecstride, 1, HYPRE_MPI_INT, 0, comm); if (num_vectors == 1) { @@ -618,7 +616,7 @@ hypre_VectorToParVector ( MPI_Comm comm, global_vec_starts = hypre_CTAlloc(HYPRE_BigInt, num_procs + 1, HYPRE_MEMORY_HOST); } hypre_MPI_Gather(&first_index, 1, HYPRE_MPI_BIG_INT, global_vec_starts, - 1, HYPRE_MPI_BIG_INT, 0, hcomm); + 1, HYPRE_MPI_BIG_INT, 0, comm); if (my_id == 0) { global_vec_starts[num_procs] = hypre_ParVectorGlobalSize(par_vector); @@ -642,7 +640,7 @@ hypre_VectorToParVector ( MPI_Comm comm, { hypre_MPI_Isend( &v_data[(HYPRE_Int) global_vec_starts[p]] + j * global_vecstride, (HYPRE_Int)(global_vec_starts[p + 1] - global_vec_starts[p]), - HYPRE_MPI_COMPLEX, p, 0, hcomm, &requests[k++] ); + HYPRE_MPI_COMPLEX, p, 0, comm, &requests[k++] ); } if (num_vectors == 1) { @@ -669,7 +667,7 @@ hypre_VectorToParVector ( MPI_Comm comm, { for ( j = 0; j < num_vectors; ++j ) hypre_MPI_Recv( local_data + j * vecstride, local_size, HYPRE_MPI_COMPLEX, - 0, 0, hcomm, &status0 ); + 0, 0, comm, &status0 ); } if (global_vec_starts) @@ -741,7 +739,6 @@ hypre_ParVectorToVectorAll_v2( hypre_ParVector *par_v, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); local_size = (HYPRE_Int)(last_index - first_index + 1); if (hypre_GetActualMemLocation(hypre_ParVectorMemoryLocation(par_v)) != @@ -808,11 +805,11 @@ hypre_ParVectorToVectorAll_v2( hypre_ParVector *par_v, if (local_size) { /* look for a message from processor 0 */ - hypre_MPI_Probe(0, tag1, hcomm, &status1); + hypre_MPI_Probe(0, tag1, comm, &status1); hypre_MPI_Get_count(&status1, HYPRE_MPI_INT, &count); send_info = hypre_CTAlloc(HYPRE_Int, count, HYPRE_MEMORY_HOST); - hypre_MPI_Recv(send_info, count, HYPRE_MPI_INT, 0, tag1, hcomm, &status1); + hypre_MPI_Recv(send_info, count, HYPRE_MPI_INT, 0, tag1, comm, &status1); /* now unpack */ num_types = send_info[0]; @@ -879,7 +876,7 @@ hypre_ParVectorToVectorAll_v2( hypre_ParVector *par_v, for (i = start; i < num_types; i++) { hypre_MPI_Isend(send_info, count, HYPRE_MPI_INT, used_procs[i], - tag1, hcomm, &requests[i - start]); + tag1, comm, &requests[i - start]); } hypre_MPI_Waitall(num_types - start, requests, status); @@ -926,12 +923,12 @@ hypre_ParVectorToVectorAll_v2( hypre_ParVector *par_v, { vec_len = (HYPRE_Int) (new_vec_starts[i + 1] - new_vec_starts[i]); hypre_MPI_Irecv(&vector_data[(HYPRE_Int)new_vec_starts[i]], num_vectors * vec_len, - HYPRE_MPI_COMPLEX, used_procs[i], tag2, hcomm, &requests[j++]); + HYPRE_MPI_COMPLEX, used_procs[i], tag2, comm, &requests[j++]); } for (i = 0; i < num_types; i++) { hypre_MPI_Isend(local_data, num_vectors * local_size, HYPRE_MPI_COMPLEX, - used_procs[i], tag2, hcomm, &requests[j++]); + used_procs[i], tag2, comm, &requests[j++]); } hypre_MPI_Waitall(num_requests, requests, status); diff --git a/src/parcsr_mv/par_vector_batched.c b/src/parcsr_mv/par_vector_batched.c index 9e20f4bbd4..18dc8f8008 100644 --- a/src/parcsr_mv/par_vector_batched.c +++ b/src/parcsr_mv/par_vector_batched.c @@ -53,7 +53,6 @@ hypre_ParVectorMassInnerProd( hypre_ParVector *x, HYPRE_Real *result ) { MPI_Comm comm = hypre_ParVectorComm(x); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); HYPRE_Real *local_result; HYPRE_Int i; @@ -73,7 +72,7 @@ hypre_ParVectorMassInnerProd( hypre_ParVector *x, hypre_profile_times[HYPRE_TIMER_ID_ALL_REDUCE] -= hypre_MPI_Wtime(); #endif hypre_MPI_Allreduce(local_result, result, k, HYPRE_MPI_REAL, - hypre_MPI_SUM, hcomm); + hypre_MPI_SUM, comm); #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_ALL_REDUCE] += hypre_MPI_Wtime(); #endif @@ -98,7 +97,6 @@ hypre_ParVectorMassDotpTwo ( hypre_ParVector *x, HYPRE_Real *result_y ) { MPI_Comm comm = hypre_ParVectorComm(x); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_Real *local_result, *result; @@ -121,7 +119,7 @@ hypre_ParVectorMassDotpTwo ( hypre_ParVector *x, hypre_profile_times[HYPRE_TIMER_ID_ALL_REDUCE] -= hypre_MPI_Wtime(); #endif hypre_MPI_Allreduce(local_result, result, 2 * k, HYPRE_MPI_REAL, - hypre_MPI_SUM, hcomm); + hypre_MPI_SUM, comm); #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_ALL_REDUCE] += hypre_MPI_Wtime(); #endif diff --git a/src/sstruct_ls/maxwell_TV_setup.c b/src/sstruct_ls/maxwell_TV_setup.c index 08efa08ed4..0a9e76a055 100644 --- a/src/sstruct_ls/maxwell_TV_setup.c +++ b/src/sstruct_ls/maxwell_TV_setup.c @@ -871,9 +871,8 @@ hypre_MaxwellTV_Setup(void *maxwell_vdata, lev_nboxes = 0; MPI_Comm comm = hypre_SStructGridComm(egrid_l[l + 1]); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Allreduce(&nboxes, &lev_nboxes, 1, HYPRE_MPI_INT, hypre_MPI_SUM, - hcomm); + comm); if (lev_nboxes) /* there were coarsen boxes */ { diff --git a/src/sstruct_ls/sstruct_sharedDOFComm.c b/src/sstruct_ls/sstruct_sharedDOFComm.c index 0610c8495e..35b2978c31 100644 --- a/src/sstruct_ls/sstruct_sharedDOFComm.c +++ b/src/sstruct_ls/sstruct_sharedDOFComm.c @@ -154,7 +154,6 @@ hypre_SStructSharedDOF_ParcsrMatRowsComm( hypre_SStructGrid *grid, hypre_MPI_Comm_rank(A_comm, &myproc); hypre_MPI_Comm_size(grid_comm, &nprocs); - hypre_MPI_Comm hgrid_comm = hypre_MPI_CommFromMPI_Comm(grid_comm); start_rank = hypre_ParCSRMatrixFirstRowIndex(A); end_rank = hypre_ParCSRMatrixLastRowIndex(A); @@ -783,7 +782,7 @@ hypre_SStructSharedDOF_ParcsrMatRowsComm( hypre_SStructGrid *grid, { rbuffer_RowsNcols[proc] = hypre_TAlloc(HYPRE_Int, 2 * RecvFromProcs[proc], HYPRE_MEMORY_HOST); hypre_MPI_Irecv(rbuffer_RowsNcols[proc], 2 * RecvFromProcs[proc], HYPRE_MPI_INT, - proc, 0, hgrid_comm, &requests[j++]); + proc, 0, grid_comm, &requests[j++]); } /* if (RecvFromProcs[proc]) */ } /* for (proc= 0; proc< nprocs; proc++) */ @@ -793,7 +792,7 @@ hypre_SStructSharedDOF_ParcsrMatRowsComm( hypre_SStructGrid *grid, if (tot_nsendRowsNcols[proc]) { hypre_MPI_Isend(send_RowsNcols[proc], tot_nsendRowsNcols[proc], HYPRE_MPI_INT, proc, - 0, hgrid_comm, &requests[j++]); + 0, grid_comm, &requests[j++]); } } @@ -837,7 +836,7 @@ hypre_SStructSharedDOF_ParcsrMatRowsComm( hypre_SStructGrid *grid, if (RecvFromProcs[proc]) { hypre_MPI_Irecv(rbuffer_ColsData[proc], 2 * send_RowsNcols_alloc[proc], HYPRE_MPI_REAL, - proc, 1, hgrid_comm, &requests[j++]); + proc, 1, grid_comm, &requests[j++]); } /* if (RecvFromProcs[proc]) */ } /* for (proc= 0; proc< nprocs; proc++) */ @@ -846,7 +845,7 @@ hypre_SStructSharedDOF_ParcsrMatRowsComm( hypre_SStructGrid *grid, if (tot_sendColsData[proc]) { hypre_MPI_Isend(vals[proc], tot_sendColsData[proc], HYPRE_MPI_REAL, proc, - 1, hgrid_comm, &requests[j++]); + 1, grid_comm, &requests[j++]); } } diff --git a/src/sstruct_mv/HYPRE_sstruct_graph.c b/src/sstruct_mv/HYPRE_sstruct_graph.c index f5bd343f25..5691325e7d 100644 --- a/src/sstruct_mv/HYPRE_sstruct_graph.c +++ b/src/sstruct_mv/HYPRE_sstruct_graph.c @@ -391,10 +391,8 @@ HYPRE_SStructGraphAssemble( HYPRE_SStructGraph graph ) /* if any processor has added entries, then all need to participate */ - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); - hypre_MPI_Allreduce(&n_add_entries, &global_n_add_entries, - 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); + 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); if (global_n_add_entries > 0 ) { diff --git a/src/sstruct_mv/sstruct_grid.c b/src/sstruct_mv/sstruct_grid.c index b9d1620d66..5b2601e04e 100644 --- a/src/sstruct_mv/sstruct_grid.c +++ b/src/sstruct_mv/sstruct_grid.c @@ -480,7 +480,6 @@ hypre_SStructGridAssembleBoxManagers( hypre_SStructGrid *grid ) hypre_MPI_Comm_size(comm, &nprocs); hypre_MPI_Comm_rank(comm, &myproc); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*find offset and ghost offsets */ { @@ -489,7 +488,7 @@ hypre_SStructGridAssembleBoxManagers( hypre_SStructGrid *grid ) /* offsets */ hypre_MPI_Scan( - &local_size, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); + &local_size, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); /* first point in my range */ offsets[0] = scan_recv - local_size; /* first point in next proc's range */ @@ -499,7 +498,7 @@ hypre_SStructGridAssembleBoxManagers( hypre_SStructGrid *grid ) /* ghost offsets */ hypre_MPI_Scan( - &ghlocal_size, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hcomm); + &ghlocal_size, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); /* first point in my range */ ghoffsets[0] = scan_recv - ghlocal_size; /* first point in next proc's range */ diff --git a/src/struct_ls/pfmg_setup.c b/src/struct_ls/pfmg_setup.c index 2315291cec..caead8c63a 100644 --- a/src/struct_ls/pfmg_setup.c +++ b/src/struct_ls/pfmg_setup.c @@ -774,7 +774,6 @@ hypre_PFMGComputeDxyz( hypre_StructMatrix *A, HYPRE_Int i, d; MPI_Comm comm = hypre_StructMatrixComm(A); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*---------------------------------------------------------- * Initialize some things @@ -847,13 +846,13 @@ hypre_PFMGComputeDxyz( hypre_StructMatrix *A, tcxyz[1] = cxyz[1]; tcxyz[2] = cxyz[2]; hypre_MPI_Allreduce(tcxyz, cxyz, 3, HYPRE_MPI_REAL, hypre_MPI_SUM, - hcomm); + comm); tcxyz[0] = sqcxyz[0]; tcxyz[1] = sqcxyz[1]; tcxyz[2] = sqcxyz[2]; hypre_MPI_Allreduce(tcxyz, sqcxyz, 3, HYPRE_MPI_REAL, hypre_MPI_SUM, - hcomm); + comm); for (d = 0; d < 3; d++) { diff --git a/src/struct_mv/assumed_part.c b/src/struct_mv/assumed_part.c index eeeaa807cb..a7badb373a 100644 --- a/src/struct_mv/assumed_part.c +++ b/src/struct_mv/assumed_part.c @@ -287,7 +287,6 @@ hypre_APGetAllBoxesInRegions( hypre_BoxArray *region_array, HYPRE_Real *send_buf_vol; HYPRE_Real *vol_array; HYPRE_Real *dbl_vol_and_count; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); count_array = *p_count_array; vol_array = *p_vol_array; @@ -313,7 +312,7 @@ hypre_APGetAllBoxesInRegions( hypre_BoxArray *region_array, } hypre_MPI_Allreduce(send_buf_vol, dbl_vol_and_count, num_regions * 2, - HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + HYPRE_MPI_REAL, hypre_MPI_SUM, comm); /* Unpack */ for (i = 0; i < num_regions; i++) @@ -352,7 +351,6 @@ hypre_APShrinkRegions( hypre_BoxArray *region_array, hypre_Box *my_box, *result_box, *grow_box, *region; hypre_Index grow_index, imin, imax; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); ndim = hypre_BoxArrayNDim(my_box_array); ndim2 = 2 * ndim; @@ -460,7 +458,7 @@ hypre_APShrinkRegions( hypre_BoxArray *region_array, /* Do an Allreduce on size and volume to get the global information */ hypre_MPI_Allreduce(indices, recvbuf, num_regions * ndim2, HYPRE_MPI_INT, - hypre_MPI_MIN, hcomm); + hypre_MPI_MIN, comm); /* Unpack the "shrunk" regions */ /* For each region */ diff --git a/src/struct_mv/box_manager.c b/src/struct_mv/box_manager.c index f40a9d8ada..b0ac4cf6c5 100644 --- a/src/struct_mv/box_manager.c +++ b/src/struct_mv/box_manager.c @@ -273,14 +273,13 @@ hypre_BoxManGetGlobalIsGatherCalled( hypre_BoxManager *manager, HYPRE_Int nprocs; hypre_MPI_Comm_size(comm, &nprocs); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); loc_is_gather = hypre_BoxManIsGatherCalled(manager); if (nprocs > 1) { hypre_MPI_Allreduce(&loc_is_gather, is_gather, 1, HYPRE_MPI_INT, - hypre_MPI_LOR, hcomm); + hypre_MPI_LOR, comm); } else /* just one proc */ { @@ -1160,7 +1159,6 @@ hypre_BoxManAssemble( hypre_BoxManager *manager ) /* initilize */ hypre_MPI_Comm_rank(comm, &myid); hypre_MPI_Comm_size(comm, &nprocs); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); gather_regions = hypre_BoxManGatherRegions(manager); nentries = hypre_BoxManNEntries(manager); @@ -1178,7 +1176,7 @@ hypre_BoxManAssemble( hypre_BoxManager *manager ) { is_gather = hypre_BoxManIsGatherCalled(manager); hypre_MPI_Allreduce(&is_gather, &global_is_gather, 1, HYPRE_MPI_INT, - hypre_MPI_LOR, hcomm); + hypre_MPI_LOR, comm); } else /* just one proc */ { @@ -1319,7 +1317,7 @@ hypre_BoxManAssemble( hypre_BoxManager *manager ) sendbuf2[1] = (HYPRE_Real) num_my_entries; hypre_MPI_Allreduce(&sendbuf2, &recvbuf2, 2, HYPRE_MPI_REAL, - hypre_MPI_SUM, hcomm); + hypre_MPI_SUM, comm); global_volume = recvbuf2[0]; global_num_boxes = (HYPRE_Int) recvbuf2[1]; @@ -1427,7 +1425,7 @@ hypre_BoxManAssemble( hypre_BoxManager *manager ) send_statbuf[2] = num_my_entries; hypre_MPI_Allreduce(send_statbuf, statbuf, 3, HYPRE_MPI_INT, - hypre_MPI_MAX, hcomm); + hypre_MPI_MAX, comm); //max_proc_count = statbuf[0]; @@ -1775,7 +1773,7 @@ hypre_BoxManAssemble( hypre_BoxManager *manager ) recv_counts = hypre_CTAlloc(HYPRE_Int, nprocs, HYPRE_MEMORY_HOST); hypre_MPI_Allgather(&send_count_bytes, 1, HYPRE_MPI_INT, - recv_counts, 1, HYPRE_MPI_INT, hcomm); + recv_counts, 1, HYPRE_MPI_INT, comm); displs = hypre_CTAlloc(HYPRE_Int, nprocs, HYPRE_MEMORY_HOST); displs[0] = 0; @@ -1844,7 +1842,7 @@ hypre_BoxManAssemble( hypre_BoxManager *manager ) /* now send_buf is ready to go! */ hypre_MPI_Allgatherv(send_buf, send_count_bytes, hypre_MPI_BYTE, - recv_buf, recv_counts, displs, hypre_MPI_BYTE, hcomm); + recv_buf, recv_counts, displs, hypre_MPI_BYTE, comm); /* unpack recv_buf into entries - let's just unpack them all into the entries table - this way they will already be sorted - so we set @@ -2205,7 +2203,7 @@ hypre_BoxManAssemble( hypre_BoxManager *manager ) if (global_num_boxes == nentries) { all_known = 1; } hypre_MPI_Allreduce(&all_known, &global_all_known, 1, HYPRE_MPI_INT, - hypre_MPI_LAND, hcomm); + hypre_MPI_LAND, comm); hypre_BoxManAllGlobalKnown(manager) = global_all_known; } diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index 8c330449bd..1d2660252e 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -831,7 +831,6 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Int num_sends = hypre_CommPkgNumSends(comm_pkg); HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); MPI_Comm comm = hypre_CommPkgComm(comm_pkg); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_Int num_requests; hypre_MPI_Request *requests; @@ -1039,7 +1038,7 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, hypre_MPI_Irecv(recv_buffers_mpi[i], hypre_CommTypeBufsize(comm_type)*sizeof(HYPRE_Complex), hypre_MPI_BYTE, hypre_CommTypeProc(comm_type), - tag, hcomm, &requests[j++]); + tag, comm, &requests[j++]); if ( hypre_CommPkgFirstComm(comm_pkg) ) { size = hypre_CommPrefixSize(hypre_CommTypeNumEntries(comm_type)); @@ -1054,7 +1053,7 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, hypre_MPI_Isend(send_buffers_mpi[i], hypre_CommTypeBufsize(comm_type)*sizeof(HYPRE_Complex), hypre_MPI_BYTE, hypre_CommTypeProc(comm_type), - tag, hcomm, &requests[j++]); + tag, comm, &requests[j++]); if ( hypre_CommPkgFirstComm(comm_pkg) ) { size = hypre_CommPrefixSize(hypre_CommTypeNumEntries(comm_type)); diff --git a/src/struct_mv/struct_grid.c b/src/struct_mv/struct_grid.c index 521b17aa29..254a47faea 100644 --- a/src/struct_mv/struct_grid.c +++ b/src/struct_mv/struct_grid.c @@ -276,7 +276,6 @@ hypre_StructGridAssemble( hypre_StructGrid *grid ) hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &myid); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* has the box manager been created? */ if (boxman == NULL) @@ -376,7 +375,7 @@ hypre_StructGridAssemble( hypre_StructGrid *grid ) HYPRE_BigInt big_size = (HYPRE_BigInt)size; hypre_MPI_Allreduce(&big_size, &global_size, 1, HYPRE_MPI_BIG_INT, - hypre_MPI_SUM, hcomm); + hypre_MPI_SUM, comm); hypre_StructGridGlobalSize(grid) = global_size; /* TO DO: this HYPRE_Int * could overflow! (used * to calc flops) */ @@ -437,7 +436,7 @@ hypre_StructGridAssemble( hypre_StructGrid *grid ) sendbuf6[d + ndim] = -hypre_BoxIMaxD(bounding_box, d); } hypre_MPI_Allreduce(sendbuf6, recvbuf6, 2 * ndim, HYPRE_MPI_INT, - hypre_MPI_MIN, hcomm); + hypre_MPI_MIN, comm); /* unpack buffer */ for (d = 0; d < ndim; d++) { @@ -594,7 +593,6 @@ hypre_GatherAllBoxes(MPI_Comm comm, hypre_MPI_Comm_size(comm, &num_all_procs); hypre_MPI_Comm_rank(comm, &my_rank); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* compute recvcounts and displs */ item_size = 2 * ndim + 1; @@ -602,7 +600,7 @@ hypre_GatherAllBoxes(MPI_Comm comm, recvcounts = hypre_TAlloc(HYPRE_Int, num_all_procs, HYPRE_MEMORY_HOST); displs = hypre_TAlloc(HYPRE_Int, num_all_procs, HYPRE_MEMORY_HOST); hypre_MPI_Allgather(&sendcount, 1, HYPRE_MPI_INT, - recvcounts, 1, HYPRE_MPI_INT, hcomm); + recvcounts, 1, HYPRE_MPI_INT, comm); displs[0] = 0; recvbuf_size = recvcounts[0]; for (p = 1; p < num_all_procs; p++) @@ -631,7 +629,7 @@ hypre_GatherAllBoxes(MPI_Comm comm, /* get global grid info */ hypre_MPI_Allgatherv(sendbuf, sendcount, HYPRE_MPI_INT, - recvbuf, recvcounts, displs, HYPRE_MPI_INT, hcomm); + recvbuf, recvcounts, displs, HYPRE_MPI_INT, comm); /* sort recvbuf by process rank? */ diff --git a/src/struct_mv/struct_innerprod.c b/src/struct_mv/struct_innerprod.c index d7ec309ef2..303047c833 100644 --- a/src/struct_mv/struct_innerprod.c +++ b/src/struct_mv/struct_innerprod.c @@ -41,7 +41,6 @@ hypre_StructInnerProd( hypre_StructVector *x, HYPRE_Int i; MPI_Comm comm = hypre_StructVectorComm(x); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); #if 0 //defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) const HYPRE_Int data_location = hypre_StructGridDataLocation(hypre_StructVectorGrid(y)); @@ -102,7 +101,7 @@ hypre_StructInnerProd( hypre_StructVector *x, process_result = (HYPRE_Real) local_result; hypre_MPI_Allreduce(&process_result, &final_innerprod_result, 1, - HYPRE_MPI_REAL, hypre_MPI_SUM, hcomm); + HYPRE_MPI_REAL, hypre_MPI_SUM, comm); hypre_IncFLOPCount(2 * hypre_StructVectorGlobalSize(x)); diff --git a/src/test/ij.c b/src/test/ij.c index 6ad4a3a39b..0ee26e3f7d 100644 --- a/src/test/ij.c +++ b/src/test/ij.c @@ -10157,7 +10157,6 @@ BuildFuncsFromOneFile( HYPRE_Int argc, comm = hypre_MPI_COMM_WORLD; hypre_MPI_Comm_rank(comm, &myid ); hypre_MPI_Comm_size(comm, &num_procs ); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*----------------------------------------------------------- * Parse command line @@ -10211,7 +10210,7 @@ BuildFuncsFromOneFile( HYPRE_Int argc, { hypre_MPI_Isend(&dof_func[partitioning[i]], (partitioning[i + 1] - partitioning[i]), - HYPRE_MPI_INT, i, 0, hcomm, &requests[i - 1]); + HYPRE_MPI_INT, i, 0, comm, &requests[i - 1]); } for (i = 0; i < local_size; i++) { @@ -10223,7 +10222,7 @@ BuildFuncsFromOneFile( HYPRE_Int argc, } else { - hypre_MPI_Recv(dof_func_local, local_size, HYPRE_MPI_INT, 0, 0, hcomm, &status0); + hypre_MPI_Recv(dof_func_local, local_size, HYPRE_MPI_INT, 0, 0, comm, &status0); } *dof_func_ptr = dof_func_local; @@ -10330,7 +10329,6 @@ BuildBigArrayFromOneFile( HYPRE_Int argc, *-----------------------------------------------------------*/ hypre_MPI_Comm_rank(comm, &myid); hypre_MPI_Comm_size(comm, &num_procs); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*----------------------------------------------------------- * Parse command line @@ -10405,7 +10403,7 @@ BuildBigArrayFromOneFile( HYPRE_Int argc, displs[proc + 1] = displs[proc] + send_counts[proc]; } } - hypre_MPI_Scatter(send_counts, 1, HYPRE_MPI_INT, size, 1, HYPRE_MPI_INT, 0, hcomm); + hypre_MPI_Scatter(send_counts, 1, HYPRE_MPI_INT, size, 1, HYPRE_MPI_INT, 0, comm); if (myid == 0) { @@ -10428,7 +10426,7 @@ BuildBigArrayFromOneFile( HYPRE_Int argc, array = hypre_CTAlloc(HYPRE_BigInt, *size, HYPRE_MEMORY_HOST); hypre_MPI_Scatterv(send_buffer, send_counts, displs, HYPRE_MPI_BIG_INT, - array, *size, HYPRE_MPI_BIG_INT, 0, hcomm); + array, *size, HYPRE_MPI_BIG_INT, 0, comm); *array_ptr = array; /* Free memory */ diff --git a/src/test/sstruct.c b/src/test/sstruct.c index a645ed10eb..542c45206f 100644 --- a/src/test/sstruct.c +++ b/src/test/sstruct.c @@ -462,11 +462,11 @@ ReadData( char *filename, } } /* broadcast the data size */ - hypre_MPI_Bcast(&sdata_size, 1, HYPRE_MPI_INT, 0, hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD)); + hypre_MPI_Bcast(&sdata_size, 1, HYPRE_MPI_INT, 0, hypre_MPI_COMM_WORLD); /* broadcast the data */ sdata = hypre_TReAlloc(sdata, char, sdata_size, HYPRE_MEMORY_HOST); - hypre_MPI_Bcast(sdata, sdata_size, hypre_MPI_CHAR, 0, hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD)); + hypre_MPI_Bcast(sdata, sdata_size, hypre_MPI_CHAR, 0, hypre_MPI_COMM_WORLD); /*----------------------------------------------------------- * Parse the data and fill ProblemData structure diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 41dcdfd347..8e19d1c194 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1168,24 +1168,21 @@ typedef HYPRE_Int hypre_MPI_Info; * MPI stubs to do casting of HYPRE_Int and hypre_int correctly *****************************************************************************/ -typedef struct -{ - MPI_Comm mpi_comm; - hypre_MemoryLocation send_location; - hypre_MemoryLocation recv_location; - void *send_copy; - void *recv_copy; - hypre_MemoryLocation send_copy_location; - hypre_MemoryLocation recv_copy_location; -} hypre_MPI_Comm; - -#define hypre_MPI_CommMPI_Comm(comm) ((comm).mpi_comm) -#define hypre_MPI_CommMPI_SendLocation(comm) ((comm).send_location) -#define hypre_MPI_CommMPI_RecvLocation(comm) ((comm).recv_location) -#define hypre_MPI_CommMPI_SendCopy(comm) ((comm).send_copy) -#define hypre_MPI_CommMPI_RecvCopy(comm) ((comm).recv_copy) -#define hypre_MPI_CommMPI_SendCopyLocation(comm) ((comm).send_copy_location) -#define hypre_MPI_CommMPI_RecvCopyLocation(comm) ((comm).recv_copy_location) +typedef MPI_Comm hypre_MPI_Comm; + +hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPI_Comm comm); +hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm); +hypre_MemoryLocation hypre_MPICommGetSendCopyLocation(hypre_MPI_Comm comm); +hypre_MemoryLocation hypre_MPICommGetRecvCopyLocation(hypre_MPI_Comm comm); +void* hypre_MPICommGetSendCopy(hypre_MPI_Comm comm); +void* hypre_MPICommGetRecvCopy(hypre_MPI_Comm comm); + +HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendCopyLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvCopyLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendCopy(hypre_MPI_Comm comm, void*); +HYPRE_Int hypre_MPICommSetRecvCopy(hypre_MPI_Comm comm, void*); typedef MPI_Group hypre_MPI_Group; @@ -1356,7 +1353,6 @@ HYPRE_Int hypre_MPI_Type_free( hypre_MPI_Datatype *datatype ); HYPRE_Int hypre_MPI_Op_free( hypre_MPI_Op *op ); HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, hypre_MPI_Op *op ); -hypre_MPI_Comm hypre_MPI_CommFromMPI_Comm(MPI_Comm comm); hypre_MPI_Request hypre_MPI_RequestFromMPI_Request(MPI_Request request); HYPRE_Int hypre_MPI_RequestSetActionCopy(HYPRE_Int i, void *dest, hypre_MemoryLocation dest_location, void *src, hypre_MemoryLocation src_location, diff --git a/src/utilities/exchange_data.c b/src/utilities/exchange_data.c index b668227bbc..933da2ba64 100644 --- a/src/utilities/exchange_data.c +++ b/src/utilities/exchange_data.c @@ -200,7 +200,6 @@ hypre_DataExchangeList(HYPRE_Int num_contacts, const HYPRE_Int term_tag = 1004 * rnum; const HYPRE_Int post_tag = 1006 * rnum; - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_MPI_Comm_size(comm, &num_procs ); hypre_MPI_Comm_rank(comm, &myid ); @@ -265,7 +264,7 @@ hypre_DataExchangeList(HYPRE_Int num_contacts, hypre_MPI_Irecv(response_ptrs[i], max_response_total_bytes, hypre_MPI_BYTE, contact_proc_list[i], - response_tag, hcomm, &response_requests[i]); + response_tag, comm, &response_requests[i]); } /* send out contact messages */ @@ -276,7 +275,7 @@ hypre_DataExchangeList(HYPRE_Int num_contacts, size = contact_send_buf_starts[i + 1] - contact_send_buf_starts[i] ; hypre_MPI_Isend(contact_ptrs[i], size * contact_obj_size, hypre_MPI_BYTE, contact_proc_list[i], - contact_tag, hcomm, &contact_requests[i]); + contact_tag, comm, &contact_requests[i]); /* start_ptr += (size*contact_obj_size); */ start_ptr = (void *) ((char *) start_ptr + (size * contact_obj_size)); } @@ -303,12 +302,11 @@ hypre_DataExchangeList(HYPRE_Int num_contacts, for (i = 0; i < tree -> num_child; i++) { - hypre_MPI_Irecv(NULL, 0, HYPRE_MPI_INT, (tree -> child_id)[i], term_tag, hcomm, + hypre_MPI_Irecv(NULL, 0, HYPRE_MPI_INT, (tree -> child_id)[i], term_tag, comm, &term_requests[i]); } terminate = 0; - children_complete = 0; } else if (num_procs == 1 && num_contacts > 0) /* added 11/08 */ @@ -323,7 +321,7 @@ hypre_DataExchangeList(HYPRE_Int num_contacts, while (!terminate) { /* did I receive any contact messages? */ - hypre_MPI_Iprobe(hypre_MPI_ANY_SOURCE, contact_tag, hcomm, + hypre_MPI_Iprobe(hypre_MPI_ANY_SOURCE, contact_tag, comm, &contact_flag, &status); while (contact_flag) @@ -349,7 +347,7 @@ hypre_DataExchangeList(HYPRE_Int num_contacts, /* this must be blocking - can't fill recv without the buffer*/ hypre_MPI_Recv(recv_contact_buf, contact_size * contact_obj_size, - hypre_MPI_BYTE, proc, contact_tag, hcomm, &fill_status); + hypre_MPI_BYTE, proc, contact_tag, comm, &fill_status); response_obj->fill_response(recv_contact_buf, contact_size, proc, response_obj, comm, &send_response_buf, @@ -390,7 +388,7 @@ hypre_DataExchangeList(HYPRE_Int num_contacts, hypre_MPI_Isend(post_array[post_array_size], size, hypre_MPI_BYTE, proc, post_tag, /*hypre_MPI_COMM_WORLD, */ - hcomm, + comm, &post_send_requests[post_array_size]); post_array_size++; @@ -407,12 +405,12 @@ hypre_DataExchangeList(HYPRE_Int num_contacts, /*send the block of data that includes the overhead */ /* this is a blocking send - the recv has already been posted */ hypre_MPI_Send(send_response_buf, max_response_total_bytes, - hypre_MPI_BYTE, proc, response_tag, hcomm); + hypre_MPI_BYTE, proc, response_tag, comm); /*--------------------------------------------------------------*/ /* look for any more contact messages*/ - hypre_MPI_Iprobe(hypre_MPI_ANY_SOURCE, contact_tag, hcomm, + hypre_MPI_Iprobe(hypre_MPI_ANY_SOURCE, contact_tag, comm, &contact_flag, &status); } @@ -439,10 +437,10 @@ hypre_DataExchangeList(HYPRE_Int num_contacts, if (children_complete & (myid > 0)) /*root does not have a parent*/ { hypre_MPI_Isend(NULL, 0, HYPRE_MPI_INT, tree -> parent_id, term_tag, - hcomm, &request_parent); + comm, &request_parent); hypre_MPI_Irecv(NULL, 0, HYPRE_MPI_INT, tree -> parent_id, term_tag, - hcomm, &term_request1); + comm, &term_request1); } } else /*have we gotten a term message from our parent? */ @@ -463,7 +461,7 @@ hypre_DataExchangeList(HYPRE_Int num_contacts, { /*a blocking send - recv has been posted already*/ hypre_MPI_Send(NULL, 0, HYPRE_MPI_INT, (tree -> child_id)[i], - term_tag, hcomm); + term_tag, comm); } } } @@ -533,7 +531,7 @@ hypre_DataExchangeList(HYPRE_Int num_contacts, post_ptrs[count] = index_ptr; hypre_MPI_Irecv(post_ptrs[count], size, hypre_MPI_BYTE, contact_proc_list[i], post_tag, - hcomm, &post_recv_requests[count]); + comm, &post_recv_requests[count]); count++; /* index_ptr+=size;*/ index_ptr = (void *) ((char *) index_ptr + size); diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 17d5c9fec1..d1af4fab0c 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -759,16 +759,6 @@ HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ) #else -hypre_MPI_Comm -hypre_MPI_CommFromMPI_Comm(MPI_Comm comm) -{ - hypre_MPI_Comm hcomm; - hypre_Memset(&hcomm, 0, sizeof(hypre_MPI_Comm), HYPRE_MEMORY_HOST); - hypre_MPI_CommMPI_Comm(hcomm) = comm; - - return hcomm; -} - hypre_MPI_Request hypre_MPI_RequestFromMPI_Request(MPI_Request request) { @@ -1078,8 +1068,7 @@ hypre_MPI_Alltoall( void *sendbuf, hypre_MPI_Comm comm ) { return (HYPRE_Int) MPI_Alltoall(sendbuf, (hypre_int)sendcount, sendtype, - recvbuf, (hypre_int)recvcount, recvtype, - hypre_MPI_CommMPI_Comm(comm)); + recvbuf, (hypre_int)recvcount, recvtype, comm); } HYPRE_Int @@ -1092,8 +1081,7 @@ hypre_MPI_Allgather( void *sendbuf, hypre_MPI_Comm comm ) { return (HYPRE_Int) MPI_Allgather(sendbuf, (hypre_int)sendcount, sendtype, - recvbuf, (hypre_int)recvcount, recvtype, - hypre_MPI_CommMPI_Comm(comm)); + recvbuf, (hypre_int)recvcount, recvtype, comm); } HYPRE_Int @@ -1110,7 +1098,7 @@ hypre_MPI_Allgatherv( void *sendbuf, HYPRE_Int i; HYPRE_Int ierr; - MPI_Comm_size(hypre_MPI_CommMPI_Comm(comm), &csize); + MPI_Comm_size(comm, &csize); mpi_recvcounts = hypre_TAlloc(hypre_int, csize, HYPRE_MEMORY_HOST); mpi_displs = hypre_TAlloc(hypre_int, csize, HYPRE_MEMORY_HOST); for (i = 0; i < csize; i++) @@ -1120,7 +1108,7 @@ hypre_MPI_Allgatherv( void *sendbuf, } ierr = (HYPRE_Int) MPI_Allgatherv(sendbuf, (hypre_int)sendcount, sendtype, recvbuf, mpi_recvcounts, mpi_displs, - recvtype, hypre_MPI_CommMPI_Comm(comm)); + recvtype, comm); hypre_TFree(mpi_recvcounts, HYPRE_MEMORY_HOST); hypre_TFree(mpi_displs, HYPRE_MEMORY_HOST); @@ -1139,7 +1127,7 @@ hypre_MPI_Gather( void *sendbuf, { return (HYPRE_Int) MPI_Gather(sendbuf, (hypre_int) sendcount, sendtype, recvbuf, (hypre_int) recvcount, recvtype, - (hypre_int)root, hypre_MPI_CommMPI_Comm(comm)); + (hypre_int)root, comm); } HYPRE_Int @@ -1159,8 +1147,8 @@ hypre_MPI_Gatherv(void *sendbuf, HYPRE_Int i; HYPRE_Int ierr; - MPI_Comm_size(hypre_MPI_CommMPI_Comm(comm), &csize); - MPI_Comm_rank(hypre_MPI_CommMPI_Comm(comm), &croot); + MPI_Comm_size(comm, &csize); + MPI_Comm_rank(comm, &croot); if (croot == (hypre_int) root) { mpi_recvcounts = hypre_TAlloc(hypre_int, csize, HYPRE_MEMORY_HOST); @@ -1173,7 +1161,7 @@ hypre_MPI_Gatherv(void *sendbuf, } ierr = (HYPRE_Int) MPI_Gatherv(sendbuf, (hypre_int)sendcount, sendtype, recvbuf, mpi_recvcounts, mpi_displs, - recvtype, (hypre_int) root, hypre_MPI_CommMPI_Comm(comm)); + recvtype, (hypre_int) root, comm); hypre_TFree(mpi_recvcounts, HYPRE_MEMORY_HOST); hypre_TFree(mpi_displs, HYPRE_MEMORY_HOST); @@ -1192,7 +1180,7 @@ hypre_MPI_Scatter( void *sendbuf, { return (HYPRE_Int) MPI_Scatter(sendbuf, (hypre_int)sendcount, sendtype, recvbuf, (hypre_int)recvcount, recvtype, - (hypre_int)root, hypre_MPI_CommMPI_Comm(comm)); + (hypre_int)root, comm); } HYPRE_Int @@ -1212,8 +1200,8 @@ hypre_MPI_Scatterv(void *sendbuf, HYPRE_Int i; HYPRE_Int ierr; - MPI_Comm_size(hypre_MPI_CommMPI_Comm(comm), &csize); - MPI_Comm_rank(hypre_MPI_CommMPI_Comm(comm), &croot); + MPI_Comm_size(comm, &csize); + MPI_Comm_rank(comm, &croot); if (croot == (hypre_int) root) { mpi_sendcounts = hypre_TAlloc(hypre_int, csize, HYPRE_MEMORY_HOST); @@ -1226,8 +1214,7 @@ hypre_MPI_Scatterv(void *sendbuf, } ierr = (HYPRE_Int) MPI_Scatterv(sendbuf, mpi_sendcounts, mpi_displs, sendtype, recvbuf, (hypre_int) recvcount, - recvtype, (hypre_int) root, - hypre_MPI_CommMPI_Comm(comm)); + recvtype, (hypre_int) root, comm); hypre_TFree(mpi_sendcounts, HYPRE_MEMORY_HOST); hypre_TFree(mpi_displs, HYPRE_MEMORY_HOST); @@ -1242,8 +1229,7 @@ hypre_MPI_Bcast( void *buffer, hypre_MPI_Comm comm ) { return (HYPRE_Int) MPI_Bcast(buffer, (hypre_int)count, datatype, - (hypre_int)root, - hypre_MPI_CommMPI_Comm(comm)); + (hypre_int)root, comm); } HYPRE_Int @@ -1255,8 +1241,7 @@ hypre_MPI_Send( void *buf, hypre_MPI_Comm comm ) { return (HYPRE_Int) MPI_Send(buf, (hypre_int)count, datatype, - (hypre_int)dest, (hypre_int)tag, - hypre_MPI_CommMPI_Comm(comm)); + (hypre_int)dest, (hypre_int)tag, comm); } HYPRE_Int @@ -1269,8 +1254,7 @@ hypre_MPI_Recv( void *buf, hypre_MPI_Status *status ) { return (HYPRE_Int) MPI_Recv(buf, (hypre_int)count, datatype, - (hypre_int)source, (hypre_int)tag, - hypre_MPI_CommMPI_Comm(comm), status); + (hypre_int)source, (hypre_int)tag, comm, status); } HYPRE_Int @@ -1283,8 +1267,7 @@ hypre_MPI_Isend( void *buf, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Isend(buf, (hypre_int)count, datatype, - (hypre_int)dest, (hypre_int)tag, - hypre_MPI_CommMPI_Comm(comm), + (hypre_int)dest, (hypre_int)tag, comm, &hypre_MPI_RequestMPI_Request(*request)); } @@ -1298,8 +1281,7 @@ hypre_MPI_Irecv( void *buf, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Irecv(buf, (hypre_int)count, datatype, - (hypre_int)source, (hypre_int)tag, - hypre_MPI_CommMPI_Comm(comm), + (hypre_int)source, (hypre_int)tag, comm, &hypre_MPI_RequestMPI_Request(*request)); } @@ -1320,18 +1302,18 @@ hypre_MPI_Irecv( void *buf, void *cbuf = NULL; \ if (SEND_RECV == TYPE_MACRO_SEND || SEND_RECV == TYPE_MACRO_SEND_INIT) \ { \ - cbuf = hypre_MPI_CommMPI_SendCopy(comm); \ + cbuf = hypre_MPICommGetSendCopy(comm); \ } \ else if (SEND_RECV == TYPE_MACRO_RECV || SEND_RECV == TYPE_MACRO_RECV_INIT) \ { \ - cbuf = hypre_MPI_CommMPI_RecvCopy(comm); \ + cbuf = hypre_MPICommGetRecvCopy(comm); \ } \ HYPRE_DTYPE *_buf = (HYPRE_DTYPE *) (cbuf ? cbuf : buf); \ if (SEND_RECV == TYPE_MACRO_SEND && _buf != buf) \ { \ hypre_GpuProfilingPushRange("MPI-D2H"); \ _hypre_TMemcpy(_buf, buf, HYPRE_DTYPE, ntot, \ - hypre_MPI_CommMPI_SendCopyLocation(comm), memory_location); \ + hypre_MPICommGetSendCopyLocation(comm), memory_location); \ hypre_GpuProfilingPopRange(); \ } \ for (i = 0; i < num; i++) \ @@ -1340,7 +1322,7 @@ hypre_MPI_Irecv( void *buf, HYPRE_Int start = displs[i]; \ HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; \ MPI_CMD(_buf + start, len, HYPRE_MPI_DTYPE, \ - ip, tag, hypre_MPI_CommMPI_Comm(comm), \ + ip, tag, comm, \ &hypre_MPI_RequestMPI_Request(requests[i])); \ } \ if (_buf != buf) \ @@ -1349,7 +1331,7 @@ hypre_MPI_Irecv( void *buf, if (SEND_RECV == TYPE_MACRO_SEND_INIT) \ { \ hypre_MPI_RequestSetActionCopy(0, _buf, \ - hypre_MPI_CommMPI_SendCopyLocation(comm), \ + hypre_MPICommGetSendCopyLocation(comm), \ buf, \ memory_location, \ ntot * sizeof(HYPRE_DTYPE), \ @@ -1360,7 +1342,7 @@ hypre_MPI_Irecv( void *buf, hypre_MPI_RequestSetActionCopy(1, buf, \ memory_location, \ _buf, \ - hypre_MPI_CommMPI_RecvCopyLocation(comm), \ + hypre_MPICommGetRecvCopyLocation(comm), \ ntot * sizeof(HYPRE_DTYPE), \ &requests[0]); \ } \ @@ -1380,7 +1362,7 @@ hypre_MPI_Isend_Multiple( void *buf, hypre_MPI_Comm comm, hypre_MPI_Request *requests ) { - hypre_MemoryLocation memory_location = hypre_MPI_CommMPI_SendLocation(comm); + hypre_MemoryLocation memory_location = hypre_MPICommGetSendLocation(comm); TYPE_MACRO(MPI_Isend, TYPE_MACRO_SEND, HYPRE_Complex, HYPRE_MPI_COMPLEX); TYPE_MACRO(MPI_Isend, TYPE_MACRO_SEND, HYPRE_Int, HYPRE_MPI_INT); @@ -1400,7 +1382,7 @@ hypre_MPI_Irecv_Multiple( void *buf, hypre_MPI_Comm comm, hypre_MPI_Request *requests ) { - hypre_MemoryLocation memory_location = hypre_MPI_CommMPI_RecvLocation(comm); + hypre_MemoryLocation memory_location = hypre_MPICommGetRecvLocation(comm); TYPE_MACRO(MPI_Irecv, TYPE_MACRO_RECV, HYPRE_Complex, HYPRE_MPI_COMPLEX); TYPE_MACRO(MPI_Irecv, TYPE_MACRO_RECV, HYPRE_Int, HYPRE_MPI_INT); @@ -1419,8 +1401,7 @@ hypre_MPI_Send_init( void *buf, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Send_init(buf, (hypre_int)count, datatype, - (hypre_int)dest, (hypre_int)tag, - hypre_MPI_CommMPI_Comm(comm), + (hypre_int)dest, (hypre_int)tag, comm, &hypre_MPI_RequestMPI_Request(*request)); } @@ -1435,7 +1416,7 @@ hypre_MPI_Send_init_Multiple( void *buf, hypre_MPI_Comm comm, hypre_MPI_Request *requests ) { - hypre_MemoryLocation memory_location = hypre_MPI_CommMPI_SendLocation(comm); + hypre_MemoryLocation memory_location = hypre_MPICommGetSendLocation(comm); TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_Complex, HYPRE_MPI_COMPLEX); TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_Int, HYPRE_MPI_INT); @@ -1454,8 +1435,7 @@ hypre_MPI_Recv_init( void *buf, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Recv_init(buf, (hypre_int)count, datatype, - (hypre_int)dest, (hypre_int)tag, - hypre_MPI_CommMPI_Comm(comm), + (hypre_int)dest, (hypre_int)tag, comm, &hypre_MPI_RequestMPI_Request(*request)); } @@ -1470,7 +1450,7 @@ hypre_MPI_Recv_init_Multiple( void *buf, hypre_MPI_Comm comm, hypre_MPI_Request *requests ) { - hypre_MemoryLocation memory_location = hypre_MPI_CommMPI_RecvLocation(comm); + hypre_MemoryLocation memory_location = hypre_MPICommGetRecvLocation(comm); TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV_INIT, HYPRE_Complex, HYPRE_MPI_COMPLEX); TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV_INIT, HYPRE_Int, HYPRE_MPI_INT); @@ -1489,8 +1469,7 @@ hypre_MPI_Irsend( void *buf, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Irsend(buf, (hypre_int)count, datatype, - (hypre_int)dest, (hypre_int)tag, - hypre_MPI_CommMPI_Comm(comm), + (hypre_int)dest, (hypre_int)tag, comm, &hypre_MPI_RequestMPI_Request(*request)); } @@ -1525,8 +1504,7 @@ hypre_MPI_Probe( HYPRE_Int source, hypre_MPI_Comm comm, hypre_MPI_Status *status ) { - return (HYPRE_Int) MPI_Probe((hypre_int)source, (hypre_int)tag, - hypre_MPI_CommMPI_Comm(comm), status); + return (HYPRE_Int) MPI_Probe((hypre_int)source, (hypre_int)tag, comm, status); } HYPRE_Int @@ -1538,8 +1516,7 @@ hypre_MPI_Iprobe( HYPRE_Int source, { hypre_int mpi_flag; HYPRE_Int ierr; - ierr = (HYPRE_Int) MPI_Iprobe((hypre_int)source, (hypre_int)tag, - hypre_MPI_CommMPI_Comm(comm), + ierr = (HYPRE_Int) MPI_Iprobe((hypre_int)source, (hypre_int)tag, comm, &mpi_flag, status); *flag = (HYPRE_Int) mpi_flag; return ierr; @@ -1664,7 +1641,7 @@ hypre_MPI_Allreduce( void *sendbuf, hypre_GpuProfilingPushRange("MPI_Allreduce"); HYPRE_Int result = MPI_Allreduce(sendbuf, recvbuf, (hypre_int)count, - datatype, op, hypre_MPI_CommMPI_Comm(comm)); + datatype, op, comm); hypre_GpuProfilingPopRange(); @@ -1681,8 +1658,7 @@ hypre_MPI_Reduce( void *sendbuf, hypre_MPI_Comm comm ) { return (HYPRE_Int) MPI_Reduce(sendbuf, recvbuf, (hypre_int)count, - datatype, op, (hypre_int)root, - hypre_MPI_CommMPI_Comm(comm)); + datatype, op, (hypre_int)root, comm); } HYPRE_Int @@ -1694,7 +1670,7 @@ hypre_MPI_Scan( void *sendbuf, hypre_MPI_Comm comm ) { return (HYPRE_Int) MPI_Scan(sendbuf, recvbuf, (hypre_int)count, - datatype, op, hypre_MPI_CommMPI_Comm(comm)); + datatype, op, comm); } HYPRE_Int diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 6055965d41..a7c840daf8 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -193,24 +193,21 @@ typedef HYPRE_Int hypre_MPI_Info; * MPI stubs to do casting of HYPRE_Int and hypre_int correctly *****************************************************************************/ -typedef struct -{ - MPI_Comm mpi_comm; - hypre_MemoryLocation send_location; - hypre_MemoryLocation recv_location; - void *send_copy; - void *recv_copy; - hypre_MemoryLocation send_copy_location; - hypre_MemoryLocation recv_copy_location; -} hypre_MPI_Comm; - -#define hypre_MPI_CommMPI_Comm(comm) ((comm).mpi_comm) -#define hypre_MPI_CommMPI_SendLocation(comm) ((comm).send_location) -#define hypre_MPI_CommMPI_RecvLocation(comm) ((comm).recv_location) -#define hypre_MPI_CommMPI_SendCopy(comm) ((comm).send_copy) -#define hypre_MPI_CommMPI_RecvCopy(comm) ((comm).recv_copy) -#define hypre_MPI_CommMPI_SendCopyLocation(comm) ((comm).send_copy_location) -#define hypre_MPI_CommMPI_RecvCopyLocation(comm) ((comm).recv_copy_location) +typedef MPI_Comm hypre_MPI_Comm; + +hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPI_Comm comm); +hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm); +hypre_MemoryLocation hypre_MPICommGetSendCopyLocation(hypre_MPI_Comm comm); +hypre_MemoryLocation hypre_MPICommGetRecvCopyLocation(hypre_MPI_Comm comm); +void* hypre_MPICommGetSendCopy(hypre_MPI_Comm comm); +void* hypre_MPICommGetRecvCopy(hypre_MPI_Comm comm); + +HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendCopyLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvCopyLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendCopy(hypre_MPI_Comm comm, void*); +HYPRE_Int hypre_MPICommSetRecvCopy(hypre_MPI_Comm comm, void*); typedef MPI_Group hypre_MPI_Group; @@ -381,7 +378,6 @@ HYPRE_Int hypre_MPI_Type_free( hypre_MPI_Datatype *datatype ); HYPRE_Int hypre_MPI_Op_free( hypre_MPI_Op *op ); HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, hypre_MPI_Op *op ); -hypre_MPI_Comm hypre_MPI_CommFromMPI_Comm(MPI_Comm comm); hypre_MPI_Request hypre_MPI_RequestFromMPI_Request(MPI_Request request); HYPRE_Int hypre_MPI_RequestSetActionCopy(HYPRE_Int i, void *dest, hypre_MemoryLocation dest_location, void *src, hypre_MemoryLocation src_location, diff --git a/src/utilities/timing.c b/src/utilities/timing.c index 521796a9c6..af64058c0a 100644 --- a/src/utilities/timing.c +++ b/src/utilities/timing.c @@ -345,8 +345,7 @@ hypre_PrintTiming( const char *heading, return ierr; } - hypre_MPI_Comm_rank(comm, &myrank); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + hypre_MPI_Comm_rank(comm, &myrank ); /* print heading */ if (myrank == 0) @@ -363,9 +362,9 @@ hypre_PrintTiming( const char *heading, local_wall_time = hypre_TimingWallTime(i); local_cpu_time = hypre_TimingCPUTime(i); hypre_MPI_Allreduce(&local_wall_time, &wall_time, 1, - hypre_MPI_REAL, hypre_MPI_MAX, hcomm); + hypre_MPI_REAL, hypre_MPI_MAX, comm); hypre_MPI_Allreduce(&local_cpu_time, &cpu_time, 1, - hypre_MPI_REAL, hypre_MPI_MAX, hcomm); + hypre_MPI_REAL, hypre_MPI_MAX, comm); if (myrank == 0) { @@ -423,8 +422,7 @@ hypre_GetTiming( const char *heading, return ierr; } - hypre_MPI_Comm_rank(comm, &myrank); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); + hypre_MPI_Comm_rank(comm, &myrank ); /* print heading */ if (myrank == 0) @@ -440,7 +438,7 @@ hypre_GetTiming( const char *heading, { local_wall_time = hypre_TimingWallTime(i); hypre_MPI_Allreduce(&local_wall_time, &wall_time, 1, - hypre_MPI_REAL, hypre_MPI_MAX, hcomm); + hypre_MPI_REAL, hypre_MPI_MAX, comm); if (myrank == 0) { From 56a4f73256fb70d67ea4d255365e3a691baef657 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Fri, 21 Jun 2024 19:13:00 -0700 Subject: [PATCH 36/90] revert more hypre MPI comm --- src/IJ_mv/HYPRE_IJMatrix.c | 1 + src/IJ_mv/IJMatrix_parcsr_device.c | 3 +-- src/IJ_mv/IJVector_parcsr_device.c | 3 +-- src/parcsr_ls/par_coarse_parms_device.c | 3 +-- src/parcsr_ls/par_coarsen_device.c | 3 +-- src/parcsr_ls/par_ilu_setup_device.c | 5 ++--- src/parcsr_ls/par_interp_device.c | 6 ++---- src/parcsr_ls/par_lr_restr_device.c | 3 +-- src/parcsr_ls/par_mgr_device.c | 8 +++----- src/parcsr_ls/par_mod_multi_interp_device.c | 21 +++++++++------------ src/parcsr_ls/par_relax_more_device.c | 3 +-- src/parcsr_mv/par_csr_fffc_device.c | 10 ++++------ src/test/maxwell_unscaled.c | 4 ++-- src/test/sstruct_fac.c | 4 ++-- 14 files changed, 31 insertions(+), 46 deletions(-) diff --git a/src/IJ_mv/HYPRE_IJMatrix.c b/src/IJ_mv/HYPRE_IJMatrix.c index 21e4c8c598..3b7cc08b99 100644 --- a/src/IJ_mv/HYPRE_IJMatrix.c +++ b/src/IJ_mv/HYPRE_IJMatrix.c @@ -49,6 +49,7 @@ HYPRE_IJMatrixCreate( MPI_Comm comm, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &myid); + if (ilower > iupper + 1 || ilower < 0) { hypre_error_in_arg(2); diff --git a/src/IJ_mv/IJMatrix_parcsr_device.c b/src/IJ_mv/IJMatrix_parcsr_device.c index f485b59835..09c14a7e6c 100644 --- a/src/IJ_mv/IJMatrix_parcsr_device.c +++ b/src/IJ_mv/IJMatrix_parcsr_device.c @@ -573,7 +573,6 @@ HYPRE_Int hypre_IJMatrixAssembleParCSRDevice(hypre_IJMatrix *matrix) { MPI_Comm comm = hypre_IJMatrixComm(matrix); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_BigInt *row_partitioning = hypre_IJMatrixRowPartitioning(matrix); HYPRE_BigInt *col_partitioning = hypre_IJMatrixColPartitioning(matrix); HYPRE_BigInt row_start = row_partitioning[0]; @@ -610,7 +609,7 @@ hypre_IJMatrixAssembleParCSRDevice(hypre_IJMatrix *matrix) #endif HYPRE_Int nelms_off = nelms - nelms_on; HYPRE_Int nelms_off_max; - hypre_MPI_Allreduce(&nelms_off, &nelms_off_max, 1, HYPRE_MPI_INT, hypre_MPI_MAX, hcomm); + hypre_MPI_Allreduce(&nelms_off, &nelms_off_max, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm); /* communicate for aux off-proc and add to remote aux on-proc */ if (nelms_off_max) diff --git a/src/IJ_mv/IJVector_parcsr_device.c b/src/IJ_mv/IJVector_parcsr_device.c index c452bf5385..b61ac633cb 100644 --- a/src/IJ_mv/IJVector_parcsr_device.c +++ b/src/IJ_mv/IJVector_parcsr_device.c @@ -436,7 +436,6 @@ HYPRE_Int hypre_IJVectorAssembleParDevice(hypre_IJVector *vector) { MPI_Comm comm = hypre_IJVectorComm(vector); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_ParVector *par_vector = (hypre_ParVector*) hypre_IJVectorObject(vector); hypre_AuxParVector *aux_vector = (hypre_AuxParVector*) hypre_IJVectorTranslator(vector); HYPRE_BigInt *IJpartitioning = hypre_IJVectorPartitioning(vector); @@ -471,7 +470,7 @@ hypre_IJVectorAssembleParDevice(hypre_IJVector *vector) #endif HYPRE_Int nelms_off = nelms - nelms_on; HYPRE_Int nelms_off_max; - hypre_MPI_Allreduce(&nelms_off, &nelms_off_max, 1, HYPRE_MPI_INT, hypre_MPI_MAX, hcomm); + hypre_MPI_Allreduce(&nelms_off, &nelms_off_max, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm); /* communicate for aux off-proc and add to remote aux on-proc */ if (nelms_off_max) diff --git a/src/parcsr_ls/par_coarse_parms_device.c b/src/parcsr_ls/par_coarse_parms_device.c index 1b592336c5..b6e863870c 100644 --- a/src/parcsr_ls/par_coarse_parms_device.c +++ b/src/parcsr_ls/par_coarse_parms_device.c @@ -104,9 +104,8 @@ hypre_BoomerAMGCoarseParmsDevice(MPI_Comm comm, } { - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); HYPRE_BigInt scan_recv; - hypre_MPI_Scan(&local_coarse_size, &scan_recv, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&local_coarse_size, &scan_recv, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); /* first point in my range */ coarse_pnts_global[0] = scan_recv - local_coarse_size; diff --git a/src/parcsr_ls/par_coarsen_device.c b/src/parcsr_ls/par_coarsen_device.c index cc343c72ef..42253bcd2c 100644 --- a/src/parcsr_ls/par_coarsen_device.c +++ b/src/parcsr_ls/par_coarsen_device.c @@ -58,7 +58,6 @@ hypre_BoomerAMGCoarsenPMISDevice( hypre_ParCSRMatrix *S, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (!comm_pkg) { @@ -138,7 +137,7 @@ hypre_BoomerAMGCoarsenPMISDevice( hypre_ParCSRMatrix *S, big_graph_size = graph_diag_size; /* stop the coarsening if nothing left to be coarsened */ - hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); /* if (my_id == 0) { hypre_printf("graph size %b\n", global_graph_size); } */ diff --git a/src/parcsr_ls/par_ilu_setup_device.c b/src/parcsr_ls/par_ilu_setup_device.c index c012e49479..52c351a4b3 100644 --- a/src/parcsr_ls/par_ilu_setup_device.c +++ b/src/parcsr_ls/par_ilu_setup_device.c @@ -101,7 +101,6 @@ hypre_ILUSetupILUDevice(HYPRE_Int ilu_type, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* Build the inverse permutation arrays */ if (perm_data && qperm_data) @@ -199,7 +198,7 @@ hypre_ILUSetupILUDevice(HYPRE_Int ilu_type, /* Compute total rows in Schur block */ HYPRE_BigInt big_m = (HYPRE_BigInt) m; - hypre_MPI_Allreduce(&big_m, &total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&big_m, &total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); /* only form when total_rows > 0 */ if (total_rows > 0) @@ -207,7 +206,7 @@ hypre_ILUSetupILUDevice(HYPRE_Int ilu_type, /* now create S - need to get new column start */ { HYPRE_BigInt global_start; - hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); col_starts[0] = global_start - m; col_starts[1] = global_start; } diff --git a/src/parcsr_ls/par_interp_device.c b/src/parcsr_ls/par_interp_device.c index 416bfb78b9..0fa03f5b66 100644 --- a/src/parcsr_ls/par_interp_device.c +++ b/src/parcsr_ls/par_interp_device.c @@ -120,13 +120,12 @@ hypre_BoomerAMGBuildDirInterpDevice( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast( &total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast( &total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); if (!comm_pkg) { @@ -1099,11 +1098,10 @@ hypre_BoomerAMGBuildInterpOnePntDevice( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); my_first_cpt = num_cpts_global[0]; if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /* fine to coarse mapping */ fine_to_coarse = hypre_TAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_DEVICE); diff --git a/src/parcsr_ls/par_lr_restr_device.c b/src/parcsr_ls/par_lr_restr_device.c index 2c86d6854f..edca5b180f 100644 --- a/src/parcsr_ls/par_lr_restr_device.c +++ b/src/parcsr_ls/par_lr_restr_device.c @@ -61,14 +61,13 @@ hypre_BoomerAMGBuildRestrNeumannAIRDevice( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /* global number of C points and my start position */ if (my_id == (num_procs - 1)) { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); /* get AFF and ACF */ hypre_ParCSRMatrix *AFF, *ACF, *Dinv, *N, *X, *X2, *Z, *Z2; diff --git a/src/parcsr_ls/par_mgr_device.c b/src/parcsr_ls/par_mgr_device.c index 3e3aca02b6..9460b0cd09 100644 --- a/src/parcsr_ls/par_mgr_device.c +++ b/src/parcsr_ls/par_mgr_device.c @@ -140,7 +140,6 @@ hypre_MGRBuildPDevice(hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); hypre_GpuProfilingPushRange("MGRBuildP"); @@ -243,7 +242,7 @@ hypre_MGRBuildPDevice(hypre_ParCSRMatrix *A, { nC_global = num_cpts_global[1]; } - hypre_MPI_Bcast(&nC_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&nC_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } W_nr_of_rows = hypre_CSRMatrixNumRows(W_diag); @@ -914,7 +913,6 @@ hypre_ParCSRMatrixBlockDiagMatrixDevice( hypre_ParCSRMatrix *A, hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); /*----------------------------------------------------------------- * Count the number of points matching point_type in CF_marker @@ -948,7 +946,7 @@ hypre_ParCSRMatrixBlockDiagMatrixDevice( hypre_ParCSRMatrix *A, if (CF_marker) { num_rows_big = (HYPRE_BigInt) B_diag_num_rows; - hypre_MPI_Scan(&num_rows_big, &scan_recv, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&num_rows_big, &scan_recv, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); /* first point in my range */ row_starts_B[0] = scan_recv - num_rows_big; @@ -959,7 +957,7 @@ hypre_ParCSRMatrixBlockDiagMatrixDevice( hypre_ParCSRMatrix *A, { num_rows_B = row_starts_B[1]; } - hypre_MPI_Bcast(&num_rows_B, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&num_rows_B, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } else { diff --git a/src/parcsr_ls/par_mod_multi_interp_device.c b/src/parcsr_ls/par_mod_multi_interp_device.c index f2a43e7e06..034fbc15c1 100644 --- a/src/parcsr_ls/par_mod_multi_interp_device.c +++ b/src/parcsr_ls/par_mod_multi_interp_device.c @@ -255,7 +255,6 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); if (num_procs > 1) { @@ -263,7 +262,7 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, { total_global_cpts = num_cpts_global[1]; } - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } else { @@ -445,7 +444,7 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, num_passes = 1; /* color points according to pass number */ remaining_big = remaining; - hypre_MPI_Allreduce(&remaining_big, &global_remaining, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&remaining_big, &global_remaining, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); hypre_GpuProfilingPopRange(); @@ -598,7 +597,7 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, HYPRE_BigInt old_global_remaining = global_remaining; remaining_big = remaining; - hypre_MPI_Allreduce(&remaining_big, &global_remaining, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Allreduce(&remaining_big, &global_remaining, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); /* if the number of remaining points does not change, we have a situation of isolated areas of * fine points that are not connected to any C-points, and the pass generation process breaks @@ -1052,7 +1051,6 @@ hypre_GenerateMultipassPiDevice( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); fine_to_coarse = hypre_TAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_DEVICE); @@ -1062,7 +1060,7 @@ hypre_GenerateMultipassPiDevice( hypre_ParCSRMatrix *A, { HYPRE_BigInt big_Fpts = num_points; - hypre_MPI_Scan(&big_Fpts, f_pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_Fpts, f_pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); f_pts_starts[0] = f_pts_starts[1] - big_Fpts; @@ -1071,8 +1069,8 @@ hypre_GenerateMultipassPiDevice( hypre_ParCSRMatrix *A, total_global_fpts = f_pts_starts[1]; total_global_cpts = c_pts_starts[1]; } - hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } else { @@ -1352,7 +1350,6 @@ hypre_GenerateMultiPiDevice( hypre_ParCSRMatrix *A, /* MPI size and rank*/ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); fine_to_coarse = hypre_TAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_DEVICE); @@ -1362,7 +1359,7 @@ hypre_GenerateMultiPiDevice( hypre_ParCSRMatrix *A, { HYPRE_BigInt big_Fpts = num_points; - hypre_MPI_Scan(&big_Fpts, f_pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&big_Fpts, f_pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); f_pts_starts[0] = f_pts_starts[1] - big_Fpts; @@ -1371,8 +1368,8 @@ hypre_GenerateMultiPiDevice( hypre_ParCSRMatrix *A, total_global_fpts = f_pts_starts[1]; total_global_cpts = c_pts_starts[1]; } - hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); - hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&total_global_fpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); + hypre_MPI_Bcast(&total_global_cpts, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } else { diff --git a/src/parcsr_ls/par_relax_more_device.c b/src/parcsr_ls/par_relax_more_device.c index 22e785b95a..0c75d055bb 100644 --- a/src/parcsr_ls/par_relax_more_device.c +++ b/src/parcsr_ls/par_relax_more_device.c @@ -182,9 +182,8 @@ hypre_ParCSRMaxEigEstimateDevice( hypre_ParCSRMatrix *A, send_buf[1] = e_max; MPI_Comm comm = hypre_ParCSRMatrixComm(A); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); - hypre_MPI_Allreduce(send_buf, recv_buf, 2, HYPRE_MPI_REAL, hypre_MPI_MAX, hcomm); + hypre_MPI_Allreduce(send_buf, recv_buf, 2, HYPRE_MPI_REAL, hypre_MPI_MAX, comm); /* return */ if ( hypre_abs(e_min) > hypre_abs(e_max) ) diff --git a/src/parcsr_mv/par_csr_fffc_device.c b/src/parcsr_mv/par_csr_fffc_device.c index f36b9f3485..5a472b2557 100644 --- a/src/parcsr_mv/par_csr_fffc_device.c +++ b/src/parcsr_mv/par_csr_fffc_device.c @@ -283,7 +283,6 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); n_local = hypre_ParCSRMatrixNumRows(A); row_starts = hypre_ParCSRMatrixRowStarts(A); @@ -292,7 +291,7 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A, { nC_global = cpts_starts[1]; } - hypre_MPI_Bcast(&nC_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&nC_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); nC_local = (HYPRE_Int) (cpts_starts[1] - cpts_starts[0]); fpts_starts[0] = row_starts[0] - cpts_starts[0]; fpts_starts[1] = row_starts[1] - cpts_starts[1]; @@ -321,13 +320,13 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A, HYPRE_BigInt nF2_local_big = nF2_local; - hypre_MPI_Scan(&nF2_local_big, f2pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, hcomm); + hypre_MPI_Scan(&nF2_local_big, f2pts_starts + 1, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); f2pts_starts[0] = f2pts_starts[1] - nF2_local_big; if (my_id == (num_procs - 1)) { nF2_global = f2pts_starts[1]; } - hypre_MPI_Bcast(&nF2_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&nF2_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } /* map from all points (i.e, F+C) to F/C indices */ @@ -1501,7 +1500,6 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_MPI_Comm hcomm = hypre_MPI_CommFromMPI_Comm(comm); n_local = hypre_ParCSRMatrixNumRows(A); row_starts = hypre_ParCSRMatrixRowStarts(A); @@ -1518,7 +1516,7 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A, { nC_global = cpts_starts[1]; } - hypre_MPI_Bcast(&nC_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, hcomm); + hypre_MPI_Bcast(&nC_global, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); nC_local = (HYPRE_Int) (cpts_starts[1] - cpts_starts[0]); fpts_starts[0] = row_starts[0] - cpts_starts[0]; fpts_starts[1] = row_starts[1] - cpts_starts[1]; diff --git a/src/test/maxwell_unscaled.c b/src/test/maxwell_unscaled.c index 19b689c248..4eb8142bdd 100644 --- a/src/test/maxwell_unscaled.c +++ b/src/test/maxwell_unscaled.c @@ -275,11 +275,11 @@ ReadData( char *filename, } /* broadcast the data size */ - hypre_MPI_Bcast(&sdata_size, 1, HYPRE_MPI_INT, 0, hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD)); + hypre_MPI_Bcast(&sdata_size, 1, HYPRE_MPI_INT, 0, hypre_MPI_COMM_WORLD); /* broadcast the data */ sdata = hypre_TReAlloc(sdata, char, sdata_size, HYPRE_MEMORY_HOST); - hypre_MPI_Bcast(sdata, sdata_size, hypre_MPI_CHAR, 0, hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD)); + hypre_MPI_Bcast(sdata, sdata_size, hypre_MPI_CHAR, 0, hypre_MPI_COMM_WORLD); /*----------------------------------------------------------- * Parse the data and fill ProblemData structure diff --git a/src/test/sstruct_fac.c b/src/test/sstruct_fac.c index 5892e48c5a..8310f51156 100644 --- a/src/test/sstruct_fac.c +++ b/src/test/sstruct_fac.c @@ -321,11 +321,11 @@ ReadData( char *filename, } /* broadcast the data size */ - hypre_MPI_Bcast(&sdata_size, 1, HYPRE_MPI_INT, 0, hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD)); + hypre_MPI_Bcast(&sdata_size, 1, HYPRE_MPI_INT, 0, hypre_MPI_COMM_WORLD); /* broadcast the data */ sdata = hypre_TReAlloc(sdata, char, sdata_size, HYPRE_MEMORY_HOST); - hypre_MPI_Bcast(sdata, sdata_size, hypre_MPI_CHAR, 0, hypre_MPI_CommFromMPI_Comm(hypre_MPI_COMM_WORLD)); + hypre_MPI_Bcast(sdata, sdata_size, hypre_MPI_CHAR, 0, hypre_MPI_COMM_WORLD); /*----------------------------------------------------------- * Parse the data and fill ProblemData structure From fe6b841abeffa7453fb34b537e0e8d1ac78b50e4 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Fri, 21 Jun 2024 21:48:30 -0700 Subject: [PATCH 37/90] minor changes --- src/parcsr_ls/ams.c | 4 +--- src/parcsr_ls/gen_redcs_mat.c | 1 + src/parcsr_ls/par_gauss_elim.c | 1 - src/parcsr_ls/par_mgr_device.c | 1 - src/parcsr_ls/par_relax_more.c | 4 +--- src/parcsr_ls/par_relax_more_device.c | 5 ++--- src/sstruct_ls/maxwell_TV_setup.c | 3 +-- src/struct_ls/pfmg_setup.c | 6 ++---- src/struct_mv/struct_innerprod.c | 4 +--- src/utilities/device_utils.c | 2 +- src/utilities/error.c | 2 +- 11 files changed, 11 insertions(+), 22 deletions(-) diff --git a/src/parcsr_ls/ams.c b/src/parcsr_ls/ams.c index 89cce32481..b40a767271 100644 --- a/src/parcsr_ls/ams.c +++ b/src/parcsr_ls/ams.c @@ -2937,8 +2937,6 @@ hypre_AMSSetup(void *solver, ams_data -> A = A; - MPI_Comm comm = hypre_ParCSRMatrixComm(A); - /* Modifications for problems with zero-conductivity regions */ if (ams_data -> interior_nodes) { @@ -3123,7 +3121,7 @@ hypre_AMSSetup(void *solver, } lfactor *= 1e-10; /* scaling factor: max|A_ij|*1e-10 */ - hypre_MPI_Allreduce(&lfactor, &factor, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, comm); + hypre_MPI_Allreduce(&lfactor, &factor, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_ParCSRMatrixComm(A)); } hypre_ParCSRMatrixAdd(factor, A, 1.0, B, &C); diff --git a/src/parcsr_ls/gen_redcs_mat.c b/src/parcsr_ls/gen_redcs_mat.c index c85a4020b4..a81890282c 100644 --- a/src/parcsr_ls/gen_redcs_mat.c +++ b/src/parcsr_ls/gen_redcs_mat.c @@ -92,6 +92,7 @@ hypre_seqAMGSetup( hypre_ParAMGData *amg_data, hypre_GenerateSubComm(comm, num_rows, &new_comm); + /*hypre_MPI_Group orig_group, new_group; HYPRE_Int *ranks, new_num_procs, *row_starts; diff --git a/src/parcsr_ls/par_gauss_elim.c b/src/parcsr_ls/par_gauss_elim.c index 4c15f708a6..3016fcd542 100644 --- a/src/parcsr_ls/par_gauss_elim.c +++ b/src/parcsr_ls/par_gauss_elim.c @@ -458,7 +458,6 @@ hypre_GaussElimSolve(hypre_ParAMGData *amg_data, HYPRE_Int *displs, *info; HYPRE_Int new_num_procs; - #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_GS_ELIM_SOLVE] -= hypre_MPI_Wtime(); #endif diff --git a/src/parcsr_ls/par_mgr_device.c b/src/parcsr_ls/par_mgr_device.c index 9460b0cd09..7f0d7912ed 100644 --- a/src/parcsr_ls/par_mgr_device.c +++ b/src/parcsr_ls/par_mgr_device.c @@ -140,7 +140,6 @@ hypre_MGRBuildPDevice(hypre_ParCSRMatrix *A, hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); - hypre_GpuProfilingPushRange("MGRBuildP"); #if defined(HYPRE_USING_SYCL) diff --git a/src/parcsr_ls/par_relax_more.c b/src/parcsr_ls/par_relax_more.c index 3d337a5571..ae8433b14c 100644 --- a/src/parcsr_ls/par_relax_more.c +++ b/src/parcsr_ls/par_relax_more.c @@ -49,8 +49,6 @@ hypre_ParCSRMaxEigEstimateHost( hypre_ParCSRMatrix *A, /* matrix to relax HYPRE_Real e_min = 0.0; HYPRE_Real send_buf[2], recv_buf[2]; - MPI_Comm comm = hypre_ParCSRMatrixComm(A); - HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A); if (scale > 1) @@ -105,7 +103,7 @@ hypre_ParCSRMaxEigEstimateHost( hypre_ParCSRMatrix *A, /* matrix to relax /* get e_min e_max across procs */ hypre_MPI_Allreduce(send_buf, recv_buf, 2, HYPRE_MPI_REAL, hypre_MPI_MAX, - comm); + hypre_ParCSRMatrixComm(A)); e_min = -recv_buf[0]; e_max = recv_buf[1]; diff --git a/src/parcsr_ls/par_relax_more_device.c b/src/parcsr_ls/par_relax_more_device.c index 0c75d055bb..5c21011ae2 100644 --- a/src/parcsr_ls/par_relax_more_device.c +++ b/src/parcsr_ls/par_relax_more_device.c @@ -181,9 +181,8 @@ hypre_ParCSRMaxEigEstimateDevice( hypre_ParCSRMatrix *A, send_buf[0] = -e_min; send_buf[1] = e_max; - MPI_Comm comm = hypre_ParCSRMatrixComm(A); - - hypre_MPI_Allreduce(send_buf, recv_buf, 2, HYPRE_MPI_REAL, hypre_MPI_MAX, comm); + hypre_MPI_Allreduce(send_buf, recv_buf, 2, HYPRE_MPI_REAL, hypre_MPI_MAX, + hypre_ParCSRMatrixComm(A)); /* return */ if ( hypre_abs(e_min) > hypre_abs(e_max) ) diff --git a/src/sstruct_ls/maxwell_TV_setup.c b/src/sstruct_ls/maxwell_TV_setup.c index 0a9e76a055..42e5e91cda 100644 --- a/src/sstruct_ls/maxwell_TV_setup.c +++ b/src/sstruct_ls/maxwell_TV_setup.c @@ -870,9 +870,8 @@ hypre_MaxwellTV_Setup(void *maxwell_vdata, } lev_nboxes = 0; - MPI_Comm comm = hypre_SStructGridComm(egrid_l[l + 1]); hypre_MPI_Allreduce(&nboxes, &lev_nboxes, 1, HYPRE_MPI_INT, hypre_MPI_SUM, - comm); + hypre_SStructGridComm(egrid_l[l + 1])); if (lev_nboxes) /* there were coarsen boxes */ { diff --git a/src/struct_ls/pfmg_setup.c b/src/struct_ls/pfmg_setup.c index caead8c63a..116475d75b 100644 --- a/src/struct_ls/pfmg_setup.c +++ b/src/struct_ls/pfmg_setup.c @@ -773,8 +773,6 @@ hypre_PFMGComputeDxyz( hypre_StructMatrix *A, HYPRE_Int constant_coefficient; HYPRE_Int i, d; - MPI_Comm comm = hypre_StructMatrixComm(A); - /*---------------------------------------------------------- * Initialize some things *----------------------------------------------------------*/ @@ -846,13 +844,13 @@ hypre_PFMGComputeDxyz( hypre_StructMatrix *A, tcxyz[1] = cxyz[1]; tcxyz[2] = cxyz[2]; hypre_MPI_Allreduce(tcxyz, cxyz, 3, HYPRE_MPI_REAL, hypre_MPI_SUM, - comm); + hypre_StructMatrixComm(A)); tcxyz[0] = sqcxyz[0]; tcxyz[1] = sqcxyz[1]; tcxyz[2] = sqcxyz[2]; hypre_MPI_Allreduce(tcxyz, sqcxyz, 3, HYPRE_MPI_REAL, hypre_MPI_SUM, - comm); + hypre_StructMatrixComm(A)); for (d = 0; d < 3; d++) { diff --git a/src/struct_mv/struct_innerprod.c b/src/struct_mv/struct_innerprod.c index 303047c833..d15b5cc0b8 100644 --- a/src/struct_mv/struct_innerprod.c +++ b/src/struct_mv/struct_innerprod.c @@ -40,8 +40,6 @@ hypre_StructInnerProd( hypre_StructVector *x, HYPRE_Int ndim = hypre_StructVectorNDim(x); HYPRE_Int i; - MPI_Comm comm = hypre_StructVectorComm(x); - #if 0 //defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) const HYPRE_Int data_location = hypre_StructGridDataLocation(hypre_StructVectorGrid(y)); #endif @@ -101,7 +99,7 @@ hypre_StructInnerProd( hypre_StructVector *x, process_result = (HYPRE_Real) local_result; hypre_MPI_Allreduce(&process_result, &final_innerprod_result, 1, - HYPRE_MPI_REAL, hypre_MPI_SUM, comm); + HYPRE_MPI_REAL, hypre_MPI_SUM, hypre_StructVectorComm(x)); hypre_IncFLOPCount(2 * hypre_StructVectorGlobalSize(x)); diff --git a/src/utilities/device_utils.c b/src/utilities/device_utils.c index 69386d57db..d223976750 100644 --- a/src/utilities/device_utils.c +++ b/src/utilities/device_utils.c @@ -2971,7 +2971,7 @@ hypre_bind_device( HYPRE_Int device_id_in, /* device id that want to bind */ hypre_int device_id; - MPI_Comm node_comm; + hypre_MPI_Comm node_comm; hypre_MPI_Comm_split_type( comm, hypre_MPI_COMM_TYPE_SHARED, myid, hypre_MPI_INFO_NULL, &node_comm ); hypre_MPI_Comm_rank(node_comm, &myNodeid); diff --git a/src/utilities/error.c b/src/utilities/error.c index 60dcc7ea06..248873abc4 100644 --- a/src/utilities/error.c +++ b/src/utilities/error.c @@ -206,8 +206,8 @@ HYPRE_PrintErrorMessages(MPI_Comm comm) char *msg; hypre_MPI_Barrier(comm); - hypre_MPI_Comm_rank(comm, &myid); + hypre_MPI_Comm_rank(comm, &myid); for (msg = err.memory; msg < (err.memory + err.msg_sz); msg += strlen(msg) + 1) { hypre_fprintf(stderr, "%d: %s", myid, msg); From 82f1b60ce37fab8c2cd989d763a6c24776501d61 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Fri, 21 Jun 2024 22:08:47 -0700 Subject: [PATCH 38/90] fix after merge --- src/parcsr_mv/par_csr_matop.c | 259 ---------------------------------- 1 file changed, 259 deletions(-) diff --git a/src/parcsr_mv/par_csr_matop.c b/src/parcsr_mv/par_csr_matop.c index be90f9bf75..7783b85c34 100644 --- a/src/parcsr_mv/par_csr_matop.c +++ b/src/parcsr_mv/par_csr_matop.c @@ -7036,262 +7036,3 @@ hypre_ParCSRMatrixColSum( hypre_ParCSRMatrix *A, return hypre_error_flag; } - -/*-------------------------------------------------------------------------- - * hypre_ParCSRMatrixBlockColSum - *--------------------------------------------------------------------------*/ - -HYPRE_Int -hypre_ParCSRMatrixBlockColSum( hypre_ParCSRMatrix *A, - HYPRE_Int row_major, - HYPRE_Int num_rows_block, - HYPRE_Int num_cols_block, - hypre_DenseBlockMatrix **B_ptr ) -{ - HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A); - HYPRE_BigInt num_rows_A = hypre_ParCSRMatrixGlobalNumRows(A); - HYPRE_BigInt num_cols_A = hypre_ParCSRMatrixGlobalNumCols(A); - - hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); - HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); - HYPRE_Int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag); - - hypre_DenseBlockMatrix *B; - - /*--------------------------------------------- - * Sanity checks - *---------------------------------------------*/ - - if (num_rows_block < 1 || num_cols_block < 1) - { - *B_ptr = NULL; - return hypre_error_flag; - } - - if (num_rows_A % ((HYPRE_BigInt) num_rows_block)) - { - hypre_error_w_msg(HYPRE_ERROR_GENERIC, - "Global number of rows is not divisable by the block dimension"); - return hypre_error_flag; - } - - if (num_cols_A % ((HYPRE_BigInt) num_cols_block)) - { - hypre_error_w_msg(HYPRE_ERROR_GENERIC, - "Global number of columns is not divisable by the block dimension"); - return hypre_error_flag; - } - - HYPRE_ANNOTATE_FUNC_BEGIN; - if (!hypre_ParCSRMatrixCommPkg(A)) - { - hypre_MatvecCommPkgCreate(A); - } - - /*--------------------------------------------- - * Compute block column sum matrix - *---------------------------------------------*/ - - /* Create output matrix */ - B = hypre_DenseBlockMatrixCreate(row_major, - num_rows_diag_A, num_cols_diag_A, - num_rows_block, num_cols_block); - - /* Initialize the output matrix */ - hypre_DenseBlockMatrixInitializeOn(B, memory_location); - -#if defined(HYPRE_USING_GPU) - HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(memory_location); - - if (exec == HYPRE_EXEC_DEVICE) - { - /* TODO (VPM): hypre_ParCSRMatrixColSumReduceDevice */ - hypre_ParCSRMatrixMigrate(A, HYPRE_MEMORY_HOST); - hypre_ParCSRMatrixBlockColSumHost(A, B); - hypre_ParCSRMatrixMigrate(A, HYPRE_MEMORY_DEVICE); - hypre_DenseBlockMatrixMigrate(B, HYPRE_MEMORY_DEVICE); - } - else -#endif - { - hypre_ParCSRMatrixBlockColSumHost(A, B); - } - - /* Set output pointer */ - *B_ptr = B; - - HYPRE_ANNOTATE_FUNC_END; - - return hypre_error_flag; -} - -/*-------------------------------------------------------------------------- - * hypre_ParCSRMatrixColSumHost - *--------------------------------------------------------------------------*/ - -HYPRE_Int -hypre_ParCSRMatrixColSumHost( hypre_ParCSRMatrix *A, - hypre_ParVector *b ) -{ - /* ParCSRMatrix A */ - HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A); - - /* A_diag */ - hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); - HYPRE_Complex *A_diag_data = hypre_CSRMatrixData(A_diag); - HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); - HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); - HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); - - /* A_offd */ - hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); - HYPRE_Complex *A_offd_data = hypre_CSRMatrixData(A_offd); - HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); - HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); - HYPRE_Int num_rows_offd_A = hypre_CSRMatrixNumRows(A_offd); - HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd); - - /* Local variables */ - HYPRE_Int i, j, col; - HYPRE_Complex *recv_data; - HYPRE_Complex *send_data; - - /* Communication variables */ - hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); - HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - HYPRE_Int *send_map_elmts; - HYPRE_Int *send_map_starts; -#if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandle *comm_handle; -#else - hypre_ParCSRCommHandle *comm_handle; -#endif - - /* Update commpkg offsets */ - hypre_ParCSRCommPkgUpdateVecStarts(comm_pkg, 1, 0, 1); - send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); - send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); - - /* Allocate the recv and send buffers */ -#if defined(HYPRE_USING_PERSISTENT_COMM) - comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_COMM_PKG_JOB_COMPLEX, comm_pkg); - recv_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvDataBuffer(comm_handle); - send_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendDataBuffer(comm_handle); - send_data = hypre_Memset((void *) send_data, 0, - (size_t) (num_cols_offd_A) * sizeof(HYPRE_Complex), - memory_location); -#else - send_data = hypre_CTAlloc(HYPRE_Complex, num_cols_offd_A, memory_location); - recv_data = hypre_TAlloc(HYPRE_Complex, send_map_starts[num_sends], memory_location); -#endif - - /* Pack send data */ - for (i = 0; i < num_rows_offd_A; i++) - { - for (j = A_offd_i[i]; j < A_offd_i[i + 1]; j++) - { - col = A_offd_j[j]; - send_data[col] += A_offd_data[j]; - } - } - - /* Non-blocking communication starts */ -#if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleStart(comm_handle, memory_location, send_data); - -#else - comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, - memory_location, send_data, - memory_location, recv_data); -#endif - - /* Overlapped local computation. */ - for (i = 0; i < num_rows_diag_A; i++) - { - for (j = A_diag_i[i]; j < A_diag_i[i + 1]; j++) - { - col = A_diag_j[j]; - hypre_ParVectorEntryI(b, col) += A_diag_data[j]; - } - } - - /* Non-blocking communication ends */ -#if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleWait(comm_handle, memory_location, recv_data); -#else - hypre_ParCSRCommHandleDestroy(comm_handle); -#endif - - /* Unpack recv data */ - for (i = send_map_starts[0]; i < send_map_starts[num_sends]; i++) - { - col = send_map_elmts[i]; - hypre_ParVectorEntryI(b, col) += recv_data[i]; - } - - /* Free memory */ -#if !defined(HYPRE_USING_PERSISTENT_COMM) - hypre_TFree(send_data, memory_location); - hypre_TFree(recv_data, memory_location); -#endif - - return hypre_error_flag; -} - -/*-------------------------------------------------------------------------- - * hypre_ParCSRMatrixColSum - *--------------------------------------------------------------------------*/ - -HYPRE_Int -hypre_ParCSRMatrixColSum( hypre_ParCSRMatrix *A, - hypre_ParVector **b_ptr ) -{ - MPI_Comm comm = hypre_ParCSRMatrixComm(A); - HYPRE_BigInt global_num_cols = hypre_ParCSRMatrixGlobalNumCols(A); - HYPRE_BigInt *col_starts = hypre_ParCSRMatrixColStarts(A); - HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A); - - hypre_ParVector *b; - - HYPRE_ANNOTATE_FUNC_BEGIN; - if (!hypre_ParCSRMatrixCommPkg(A)) - { - hypre_MatvecCommPkgCreate(A); - } - - /* Create output vector */ - b = hypre_ParVectorCreate(comm, global_num_cols, col_starts); - - /* Initialize the output vector */ - hypre_ParVectorInitialize_v2(b, memory_location); - - /*--------------------------------------------- - * Compute column sum vector - *---------------------------------------------*/ - -#if defined(HYPRE_USING_GPU) - HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(memory_location); - - if (exec == HYPRE_EXEC_DEVICE) - { - /* TODO (VPM): hypre_ParCSRMatrixColSumDevice */ - hypre_ParCSRMatrixMigrate(A, HYPRE_MEMORY_HOST); - hypre_ParVectorMigrate(b, HYPRE_MEMORY_HOST); - hypre_ParCSRMatrixColSumHost(A, b); - hypre_ParCSRMatrixMigrate(A, HYPRE_MEMORY_DEVICE); - hypre_ParVectorMigrate(b, HYPRE_MEMORY_DEVICE); - } - else -#endif - { - hypre_ParCSRMatrixColSumHost(A, b); - } - - /* Set output pointer */ - *b_ptr = b; - - HYPRE_ANNOTATE_FUNC_END; - - return hypre_error_flag; -} - From 09eb22fe90e2ac3722cbd224c6c86311317da9d0 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Fri, 21 Jun 2024 22:15:01 -0700 Subject: [PATCH 39/90] minor changes --- src/utilities/mpistubs.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 023f3220bd..45d9a895e2 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -292,20 +292,20 @@ typedef MPI_User_function hypre_MPI_User_function; /* mpistubs.c */ HYPRE_Int hypre_MPI_Init( hypre_int *argc, char ***argv ); HYPRE_Int hypre_MPI_Finalize( void ); -HYPRE_Int hypre_MPI_Abort( MPI_Comm comm, HYPRE_Int errorcode ); +HYPRE_Int hypre_MPI_Abort( hypre_MPI_Comm comm, HYPRE_Int errorcode ); HYPRE_Real hypre_MPI_Wtime( void ); HYPRE_Real hypre_MPI_Wtick( void ); -HYPRE_Int hypre_MPI_Barrier( MPI_Comm comm ); -HYPRE_Int hypre_MPI_Comm_create( MPI_Comm comm, hypre_MPI_Group group, - MPI_Comm *newcomm ); -HYPRE_Int hypre_MPI_Comm_dup( MPI_Comm comm, MPI_Comm *newcomm ); -MPI_Comm hypre_MPI_Comm_f2c( hypre_int comm ); -HYPRE_Int hypre_MPI_Comm_size( MPI_Comm comm, HYPRE_Int *size ); -HYPRE_Int hypre_MPI_Comm_rank( MPI_Comm comm, HYPRE_Int *rank ); -HYPRE_Int hypre_MPI_Comm_free( MPI_Comm *comm ); -HYPRE_Int hypre_MPI_Comm_group( MPI_Comm comm, hypre_MPI_Group *group ); -HYPRE_Int hypre_MPI_Comm_split( MPI_Comm comm, HYPRE_Int n, HYPRE_Int m, - MPI_Comm * comms ); +HYPRE_Int hypre_MPI_Barrier( hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Comm_create( hypre_MPI_Comm comm, hypre_MPI_Group group, + hypre_MPI_Comm *newcomm ); +HYPRE_Int hypre_MPI_Comm_dup( hypre_MPI_Comm comm, hypre_MPI_Comm *newcomm ); +hypre_MPI_Comm hypre_MPI_Comm_f2c( hypre_int comm ); +HYPRE_Int hypre_MPI_Comm_size( hypre_MPI_Comm comm, HYPRE_Int *size ); +HYPRE_Int hypre_MPI_Comm_rank( hypre_MPI_Comm comm, HYPRE_Int *rank ); +HYPRE_Int hypre_MPI_Comm_free( hypre_MPI_Comm *comm ); +HYPRE_Int hypre_MPI_Comm_group( hypre_MPI_Comm comm, hypre_MPI_Group *group ); +HYPRE_Int hypre_MPI_Comm_split( hypre_MPI_Comm comm, HYPRE_Int n, HYPRE_Int m, + hypre_MPI_Comm * comms ); HYPRE_Int hypre_MPI_Group_incl( hypre_MPI_Group group, HYPRE_Int n, HYPRE_Int *ranks, hypre_MPI_Group *newgroup ); HYPRE_Int hypre_MPI_Group_free( hypre_MPI_Group *group ); @@ -400,8 +400,8 @@ HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *dis hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) -HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, - hypre_MPI_Info info, MPI_Comm *newcomm); +HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, + hypre_MPI_Info info, hypre_MPI_Comm *newcomm); HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); #endif From 2085e3d1b1e1e68527ae0d833fd3f4a47fe7892b Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Mon, 24 Jun 2024 21:41:01 -0700 Subject: [PATCH 40/90] a working version --- src/parcsr_mv/par_csr_communication.c | 16 +-- src/utilities/_hypre_utilities.h | 57 +++++++---- src/utilities/general.c | 12 +++ src/utilities/handle.h | 17 +++- src/utilities/mpistubs.c | 138 +++++++++++++++++++++++++- src/utilities/mpistubs.h | 12 +-- 6 files changed, 211 insertions(+), 41 deletions(-) diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index e21e6f6edd..c767df4300 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -303,10 +303,10 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); - hypre_MPICommSetSendCopy(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); - hypre_MPICommSetRecvCopy(comm, hypre_ParCSRCommHandleRecvBuffer(comm_handle)); - hypre_MPICommSetSendCopyLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); - hypre_MPICommSetRecvCopyLocation(comm, hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); + hypre_MPICommSetSendBuffer(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); + hypre_MPICommSetRecvBuffer(comm, hypre_ParCSRCommHandleRecvBuffer(comm_handle)); + hypre_MPICommSetSendBufferLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); + hypre_MPICommSetRecvBufferLocation(comm, hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); hypre_MPI_Irecv_Multiple(recv_data, num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), @@ -332,10 +332,10 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); - hypre_MPICommSetSendCopy(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); - hypre_MPICommSetRecvCopy(comm, hypre_ParCSRCommHandleRecvBuffer(comm_handle)); - hypre_MPICommSetSendCopyLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); - hypre_MPICommSetRecvCopyLocation(comm, hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); + hypre_MPICommSetSendBuffer(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); + hypre_MPICommSetRecvBuffer(comm, hypre_ParCSRCommHandleRecvBuffer(comm_handle)); + hypre_MPICommSetSendBufferLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); + hypre_MPICommSetRecvBufferLocation(comm, hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); hypre_MPI_Irecv_Multiple(recv_data, num_sends, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 8841c1fd8e..d7f42fdbce 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1185,15 +1185,15 @@ hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPI_Comm comm); hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm); hypre_MemoryLocation hypre_MPICommGetSendCopyLocation(hypre_MPI_Comm comm); hypre_MemoryLocation hypre_MPICommGetRecvCopyLocation(hypre_MPI_Comm comm); -void* hypre_MPICommGetSendCopy(hypre_MPI_Comm comm); -void* hypre_MPICommGetRecvCopy(hypre_MPI_Comm comm); +void* hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm); +void* hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm); HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetSendCopyLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetRecvCopyLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetSendCopy(hypre_MPI_Comm comm, void*); -HYPRE_Int hypre_MPICommSetRecvCopy(hypre_MPI_Comm comm, void*); +HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); +HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); typedef MPI_Group hypre_MPI_Group; @@ -1276,20 +1276,20 @@ typedef MPI_User_function hypre_MPI_User_function; /* mpistubs.c */ HYPRE_Int hypre_MPI_Init( hypre_int *argc, char ***argv ); HYPRE_Int hypre_MPI_Finalize( void ); -HYPRE_Int hypre_MPI_Abort( MPI_Comm comm, HYPRE_Int errorcode ); +HYPRE_Int hypre_MPI_Abort( hypre_MPI_Comm comm, HYPRE_Int errorcode ); HYPRE_Real hypre_MPI_Wtime( void ); HYPRE_Real hypre_MPI_Wtick( void ); -HYPRE_Int hypre_MPI_Barrier( MPI_Comm comm ); -HYPRE_Int hypre_MPI_Comm_create( MPI_Comm comm, hypre_MPI_Group group, - MPI_Comm *newcomm ); -HYPRE_Int hypre_MPI_Comm_dup( MPI_Comm comm, MPI_Comm *newcomm ); -MPI_Comm hypre_MPI_Comm_f2c( hypre_int comm ); -HYPRE_Int hypre_MPI_Comm_size( MPI_Comm comm, HYPRE_Int *size ); -HYPRE_Int hypre_MPI_Comm_rank( MPI_Comm comm, HYPRE_Int *rank ); -HYPRE_Int hypre_MPI_Comm_free( MPI_Comm *comm ); -HYPRE_Int hypre_MPI_Comm_group( MPI_Comm comm, hypre_MPI_Group *group ); -HYPRE_Int hypre_MPI_Comm_split( MPI_Comm comm, HYPRE_Int n, HYPRE_Int m, - MPI_Comm * comms ); +HYPRE_Int hypre_MPI_Barrier( hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Comm_create( hypre_MPI_Comm comm, hypre_MPI_Group group, + hypre_MPI_Comm *newcomm ); +HYPRE_Int hypre_MPI_Comm_dup( hypre_MPI_Comm comm, hypre_MPI_Comm *newcomm ); +hypre_MPI_Comm hypre_MPI_Comm_f2c( hypre_int comm ); +HYPRE_Int hypre_MPI_Comm_size( hypre_MPI_Comm comm, HYPRE_Int *size ); +HYPRE_Int hypre_MPI_Comm_rank( hypre_MPI_Comm comm, HYPRE_Int *rank ); +HYPRE_Int hypre_MPI_Comm_free( hypre_MPI_Comm *comm ); +HYPRE_Int hypre_MPI_Comm_group( hypre_MPI_Comm comm, hypre_MPI_Group *group ); +HYPRE_Int hypre_MPI_Comm_split( hypre_MPI_Comm comm, HYPRE_Int n, HYPRE_Int m, + hypre_MPI_Comm * comms ); HYPRE_Int hypre_MPI_Group_incl( hypre_MPI_Group group, HYPRE_Int n, HYPRE_Int *ranks, hypre_MPI_Group *newgroup ); HYPRE_Int hypre_MPI_Group_free( hypre_MPI_Group *group ); @@ -1384,8 +1384,8 @@ HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *dis hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) -HYPRE_Int hypre_MPI_Comm_split_type(MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, - hypre_MPI_Info info, MPI_Comm *newcomm); +HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, + hypre_MPI_Info info, hypre_MPI_Comm *newcomm); HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); #endif @@ -1851,6 +1851,8 @@ extern "C++" struct hypre_DeviceData; typedef struct hypre_DeviceData hypre_DeviceData; +#define HYPRE_MAX_NUM_COMM_KEYS 8 + typedef struct { HYPRE_Int hypre_error; @@ -1863,7 +1865,10 @@ typedef struct HYPRE_Int struct_comm_recv_buffer_size; HYPRE_Int struct_comm_send_buffer_size; - /* GPU MPI */ + /* MPI */ + hypre_int comm_keys[HYPRE_MAX_NUM_COMM_KEYS]; + hypre_MemoryLocation mpi_attr_locations[hypre_NUM_MEMORY_LOCATION]; + #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int use_gpu_aware_mpi; #endif @@ -1912,6 +1917,16 @@ typedef struct #define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) #define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) + +#define hypre_HandleMPICommKeys(hypre_handle) ((hypre_handle) -> comm_keys) +#define hypre_HandleMPICommKeySendLocation(hypre_handle) ((hypre_handle) -> comm_keys[0]) +#define hypre_HandleMPICommKeyRecvLocation(hypre_handle) ((hypre_handle) -> comm_keys[1]) +#define hypre_HandleMPICommKeySendBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[2]) +#define hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[3]) +#define hypre_HandleMPICommKeySendBuffer(hypre_handle) ((hypre_handle) -> comm_keys[4]) +#define hypre_HandleMPICommKeyRecvBuffer(hypre_handle) ((hypre_handle) -> comm_keys[5]) +#define hypre_HandleMPIAttrLocations(hypre_handle) ((hypre_handle) -> mpi_attr_locations) + #define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) #define hypre_HandleMPIHostBufferLocation(hypre_handle) ((hypre_handle) -> mpi_host_buffer_location) diff --git a/src/utilities/general.c b/src/utilities/general.c index 2f53b0baae..cd7904bdd1 100644 --- a/src/utilities/general.c +++ b/src/utilities/general.c @@ -44,6 +44,18 @@ hypre_HandleCreate(void) hypre_HandleDeviceGSMethod(hypre_handle_) = 1; /* CPU: 0; Cusparse: 1 */ #endif + HYPRE_Int i; + for (i = 0; i < HYPRE_MAX_NUM_COMM_KEYS; i++) + { + MPI_Comm_create_keyval( MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, + &hypre_HandleMPICommKeys(hypre_handle_)[i], (void *)0 ); + } + + for (i = 0; i < hypre_NUM_MEMORY_LOCATION; i++) + { + hypre_HandleMPIAttrLocations(hypre_handle_)[i] = i; + } + #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) #if defined(HYPRE_WITH_GPU_AWARE_MPI) hypre_HandleUseGpuAwareMPI(hypre_handle_) = 1; diff --git a/src/utilities/handle.h b/src/utilities/handle.h index efe490ac86..aedb515fac 100644 --- a/src/utilities/handle.h +++ b/src/utilities/handle.h @@ -17,6 +17,8 @@ struct hypre_DeviceData; typedef struct hypre_DeviceData hypre_DeviceData; +#define HYPRE_MAX_NUM_COMM_KEYS 8 + typedef struct { HYPRE_Int hypre_error; @@ -29,7 +31,10 @@ typedef struct HYPRE_Int struct_comm_recv_buffer_size; HYPRE_Int struct_comm_send_buffer_size; - /* GPU MPI */ + /* MPI */ + hypre_int comm_keys[HYPRE_MAX_NUM_COMM_KEYS]; + hypre_MemoryLocation mpi_attr_locations[hypre_NUM_MEMORY_LOCATION]; + #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int use_gpu_aware_mpi; #endif @@ -78,6 +83,16 @@ typedef struct #define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) #define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) + +#define hypre_HandleMPICommKeys(hypre_handle) ((hypre_handle) -> comm_keys) +#define hypre_HandleMPICommKeySendLocation(hypre_handle) ((hypre_handle) -> comm_keys[0]) +#define hypre_HandleMPICommKeyRecvLocation(hypre_handle) ((hypre_handle) -> comm_keys[1]) +#define hypre_HandleMPICommKeySendBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[2]) +#define hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[3]) +#define hypre_HandleMPICommKeySendBuffer(hypre_handle) ((hypre_handle) -> comm_keys[4]) +#define hypre_HandleMPICommKeyRecvBuffer(hypre_handle) ((hypre_handle) -> comm_keys[5]) +#define hypre_HandleMPIAttrLocations(hypre_handle) ((hypre_handle) -> mpi_attr_locations) + #define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) #define hypre_HandleMPIHostBufferLocation(hypre_handle) ((hypre_handle) -> mpi_host_buffer_location) diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index d1af4fab0c..fe18e43cd1 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -759,6 +759,134 @@ HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ) #else +HYPRE_Int +hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation send_location) +{ + hypre_Handle *handle = hypre_handle(); + MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendLocation(handle), + &hypre_HandleMPIAttrLocations(handle)[send_location]); + return hypre_error_flag; +} + +hypre_MemoryLocation +hypre_MPICommGetSendLocation(hypre_MPI_Comm comm) +{ + hypre_int flag; + hypre_MemoryLocation *atrr_val = NULL, send_location = hypre_MEMORY_UNDEFINED; + MPI_Comm_get_attr(comm, hypre_HandleMPICommKeySendLocation(hypre_handle()), &atrr_val, &flag); + if (flag) + { + send_location = *atrr_val; + } + return (send_location); +} + +HYPRE_Int +hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation recv_location) +{ + hypre_Handle *handle = hypre_handle(); + MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvLocation(handle), + &hypre_HandleMPIAttrLocations(handle)[recv_location]); + return hypre_error_flag; +} + +hypre_MemoryLocation +hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm) +{ + hypre_int flag; + hypre_MemoryLocation *atrr_val = NULL, recv_location = hypre_MEMORY_UNDEFINED; + MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyRecvLocation(hypre_handle()), &atrr_val, &flag); + if (flag) + { + recv_location = *atrr_val; + } + return (recv_location); +} + +HYPRE_Int +hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation send_buffer_location) +{ + hypre_Handle *handle = hypre_handle(); + MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendBufferLocation(handle), + &hypre_HandleMPIAttrLocations(handle)[send_buffer_location]); + return hypre_error_flag; +} + +hypre_MemoryLocation +hypre_MPICommGetSendBufferLocation(hypre_MPI_Comm comm) +{ + hypre_int flag; + hypre_MemoryLocation *atrr_val = NULL, send_buffer_location = hypre_MEMORY_UNDEFINED; + MPI_Comm_get_attr(comm, hypre_HandleMPICommKeySendBufferLocation(hypre_handle()), &atrr_val, &flag); + if (flag) + { + send_buffer_location = *atrr_val; + } + return (send_buffer_location); +} + +HYPRE_Int +hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation recv_buffer_location) +{ + hypre_Handle *handle = hypre_handle(); + MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvBufferLocation(handle), + &hypre_HandleMPIAttrLocations(handle)[recv_buffer_location]); + return hypre_error_flag; +} + +hypre_MemoryLocation +hypre_MPICommGetRecvBufferLocation(hypre_MPI_Comm comm) +{ + hypre_int flag; + hypre_MemoryLocation *atrr_val = NULL, recv_buffer_location = hypre_MEMORY_UNDEFINED; + MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle()), &atrr_val, &flag); + if (flag) + { + recv_buffer_location = *atrr_val; + } + return (recv_buffer_location); +} + +HYPRE_Int +hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void *buffer) +{ + MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendBuffer(hypre_handle()), buffer); + return hypre_error_flag; +} + +void * +hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm) +{ + hypre_int flag; + void *buffer = NULL; + MPI_Comm_get_attr(comm, hypre_HandleMPICommKeySendBuffer(hypre_handle()), &buffer, &flag); + if (!flag) + { + buffer = NULL; + } + return (buffer); +} + +HYPRE_Int +hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void *buffer) +{ + MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvBuffer(hypre_handle()), buffer); + return hypre_error_flag; +} + +void * +hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm) +{ + hypre_int flag; + void *buffer = NULL; + MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyRecvBuffer(hypre_handle()), &buffer, &flag); + if (!flag) + { + buffer = NULL; + } + return (buffer); +} + hypre_MPI_Request hypre_MPI_RequestFromMPI_Request(MPI_Request request) { @@ -1302,18 +1430,18 @@ hypre_MPI_Irecv( void *buf, void *cbuf = NULL; \ if (SEND_RECV == TYPE_MACRO_SEND || SEND_RECV == TYPE_MACRO_SEND_INIT) \ { \ - cbuf = hypre_MPICommGetSendCopy(comm); \ + cbuf = hypre_MPICommGetSendBuffer(comm); \ } \ else if (SEND_RECV == TYPE_MACRO_RECV || SEND_RECV == TYPE_MACRO_RECV_INIT) \ { \ - cbuf = hypre_MPICommGetRecvCopy(comm); \ + cbuf = hypre_MPICommGetRecvBuffer(comm); \ } \ HYPRE_DTYPE *_buf = (HYPRE_DTYPE *) (cbuf ? cbuf : buf); \ if (SEND_RECV == TYPE_MACRO_SEND && _buf != buf) \ { \ hypre_GpuProfilingPushRange("MPI-D2H"); \ _hypre_TMemcpy(_buf, buf, HYPRE_DTYPE, ntot, \ - hypre_MPICommGetSendCopyLocation(comm), memory_location); \ + hypre_MPICommGetSendBufferLocation(comm), memory_location); \ hypre_GpuProfilingPopRange(); \ } \ for (i = 0; i < num; i++) \ @@ -1331,7 +1459,7 @@ hypre_MPI_Irecv( void *buf, if (SEND_RECV == TYPE_MACRO_SEND_INIT) \ { \ hypre_MPI_RequestSetActionCopy(0, _buf, \ - hypre_MPICommGetSendCopyLocation(comm), \ + hypre_MPICommGetSendBufferLocation(comm), \ buf, \ memory_location, \ ntot * sizeof(HYPRE_DTYPE), \ @@ -1342,7 +1470,7 @@ hypre_MPI_Irecv( void *buf, hypre_MPI_RequestSetActionCopy(1, buf, \ memory_location, \ _buf, \ - hypre_MPICommGetRecvCopyLocation(comm), \ + hypre_MPICommGetRecvBufferLocation(comm), \ ntot * sizeof(HYPRE_DTYPE), \ &requests[0]); \ } \ diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 45d9a895e2..945cf1665a 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -201,15 +201,15 @@ hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPI_Comm comm); hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm); hypre_MemoryLocation hypre_MPICommGetSendCopyLocation(hypre_MPI_Comm comm); hypre_MemoryLocation hypre_MPICommGetRecvCopyLocation(hypre_MPI_Comm comm); -void* hypre_MPICommGetSendCopy(hypre_MPI_Comm comm); -void* hypre_MPICommGetRecvCopy(hypre_MPI_Comm comm); +void* hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm); +void* hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm); HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetSendCopyLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetRecvCopyLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetSendCopy(hypre_MPI_Comm comm, void*); -HYPRE_Int hypre_MPICommSetRecvCopy(hypre_MPI_Comm comm, void*); +HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); +HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); typedef MPI_Group hypre_MPI_Group; From d0a769c1a3b2a947da7a9051b659d69d5b8ca90c Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Tue, 25 Jun 2024 10:41:27 -0700 Subject: [PATCH 41/90] another way to implement --- src/utilities/_hypre_utilities.h | 2 -- src/utilities/general.c | 5 --- src/utilities/handle.h | 2 -- src/utilities/mpistubs.c | 52 +++++++++++++++----------------- 4 files changed, 24 insertions(+), 37 deletions(-) diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index d7f42fdbce..83675340b2 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1867,7 +1867,6 @@ typedef struct /* MPI */ hypre_int comm_keys[HYPRE_MAX_NUM_COMM_KEYS]; - hypre_MemoryLocation mpi_attr_locations[hypre_NUM_MEMORY_LOCATION]; #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int use_gpu_aware_mpi; @@ -1925,7 +1924,6 @@ typedef struct #define hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[3]) #define hypre_HandleMPICommKeySendBuffer(hypre_handle) ((hypre_handle) -> comm_keys[4]) #define hypre_HandleMPICommKeyRecvBuffer(hypre_handle) ((hypre_handle) -> comm_keys[5]) -#define hypre_HandleMPIAttrLocations(hypre_handle) ((hypre_handle) -> mpi_attr_locations) #define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) #define hypre_HandleMPIHostBufferLocation(hypre_handle) ((hypre_handle) -> mpi_host_buffer_location) diff --git a/src/utilities/general.c b/src/utilities/general.c index cd7904bdd1..c1d94f8f1f 100644 --- a/src/utilities/general.c +++ b/src/utilities/general.c @@ -51,11 +51,6 @@ hypre_HandleCreate(void) &hypre_HandleMPICommKeys(hypre_handle_)[i], (void *)0 ); } - for (i = 0; i < hypre_NUM_MEMORY_LOCATION; i++) - { - hypre_HandleMPIAttrLocations(hypre_handle_)[i] = i; - } - #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) #if defined(HYPRE_WITH_GPU_AWARE_MPI) hypre_HandleUseGpuAwareMPI(hypre_handle_) = 1; diff --git a/src/utilities/handle.h b/src/utilities/handle.h index aedb515fac..46be3ddba0 100644 --- a/src/utilities/handle.h +++ b/src/utilities/handle.h @@ -33,7 +33,6 @@ typedef struct /* MPI */ hypre_int comm_keys[HYPRE_MAX_NUM_COMM_KEYS]; - hypre_MemoryLocation mpi_attr_locations[hypre_NUM_MEMORY_LOCATION]; #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int use_gpu_aware_mpi; @@ -91,7 +90,6 @@ typedef struct #define hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[3]) #define hypre_HandleMPICommKeySendBuffer(hypre_handle) ((hypre_handle) -> comm_keys[4]) #define hypre_HandleMPICommKeyRecvBuffer(hypre_handle) ((hypre_handle) -> comm_keys[5]) -#define hypre_HandleMPIAttrLocations(hypre_handle) ((hypre_handle) -> mpi_attr_locations) #define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) #define hypre_HandleMPIHostBufferLocation(hypre_handle) ((hypre_handle) -> mpi_host_buffer_location) diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index fe18e43cd1..61e2261cb4 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -760,91 +760,87 @@ HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ) #else HYPRE_Int -hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation send_location) +hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) { hypre_Handle *handle = hypre_handle(); - MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendLocation(handle), - &hypre_HandleMPIAttrLocations(handle)[send_location]); + MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendLocation(handle), (void *) location); return hypre_error_flag; } hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPI_Comm comm) { - hypre_int flag; - hypre_MemoryLocation *atrr_val = NULL, send_location = hypre_MEMORY_UNDEFINED; + hypre_int flag, *atrr_val; + hypre_MemoryLocation location = hypre_MEMORY_UNDEFINED; MPI_Comm_get_attr(comm, hypre_HandleMPICommKeySendLocation(hypre_handle()), &atrr_val, &flag); if (flag) { - send_location = *atrr_val; + location = (MPI_Aint) atrr_val; } - return (send_location); + return (location); } HYPRE_Int -hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation recv_location) +hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) { hypre_Handle *handle = hypre_handle(); - MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvLocation(handle), - &hypre_HandleMPIAttrLocations(handle)[recv_location]); + MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvLocation(handle), (void *) location); return hypre_error_flag; } hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm) { - hypre_int flag; - hypre_MemoryLocation *atrr_val = NULL, recv_location = hypre_MEMORY_UNDEFINED; + hypre_int flag, *atrr_val; + hypre_MemoryLocation location = hypre_MEMORY_UNDEFINED; MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyRecvLocation(hypre_handle()), &atrr_val, &flag); if (flag) { - recv_location = *atrr_val; + location = (MPI_Aint) atrr_val; } - return (recv_location); + return (location); } HYPRE_Int -hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation send_buffer_location) +hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) { hypre_Handle *handle = hypre_handle(); - MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendBufferLocation(handle), - &hypre_HandleMPIAttrLocations(handle)[send_buffer_location]); + MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendBufferLocation(handle), (void *) location); return hypre_error_flag; } hypre_MemoryLocation hypre_MPICommGetSendBufferLocation(hypre_MPI_Comm comm) { - hypre_int flag; - hypre_MemoryLocation *atrr_val = NULL, send_buffer_location = hypre_MEMORY_UNDEFINED; + hypre_int flag, *atrr_val; + hypre_MemoryLocation location = hypre_MEMORY_UNDEFINED; MPI_Comm_get_attr(comm, hypre_HandleMPICommKeySendBufferLocation(hypre_handle()), &atrr_val, &flag); if (flag) { - send_buffer_location = *atrr_val; + location = (MPI_Aint) atrr_val; } - return (send_buffer_location); + return (location); } HYPRE_Int -hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation recv_buffer_location) +hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) { hypre_Handle *handle = hypre_handle(); - MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvBufferLocation(handle), - &hypre_HandleMPIAttrLocations(handle)[recv_buffer_location]); + MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvBufferLocation(handle), (void *) location); return hypre_error_flag; } hypre_MemoryLocation hypre_MPICommGetRecvBufferLocation(hypre_MPI_Comm comm) { - hypre_int flag; - hypre_MemoryLocation *atrr_val = NULL, recv_buffer_location = hypre_MEMORY_UNDEFINED; + hypre_int flag, *atrr_val; + hypre_MemoryLocation location = hypre_MEMORY_UNDEFINED; MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle()), &atrr_val, &flag); if (flag) { - recv_buffer_location = *atrr_val; + location = (MPI_Aint) atrr_val; } - return (recv_buffer_location); + return (location); } HYPRE_Int From 5c2567af5540fe9a2d2cf249bb16f8b30f81d259 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Wed, 26 Jun 2024 14:16:36 -0700 Subject: [PATCH 42/90] add comm in comm_handle --- src/parcsr_mv/_hypre_parcsr_mv.h | 2 ++ src/parcsr_mv/par_csr_communication.c | 7 ++++++- src/parcsr_mv/par_csr_communication.h | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index 9e63f627ce..00e46b80e5 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -140,6 +140,7 @@ typedef struct hypre_MemoryLocation recv_buffer_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; + MPI_Comm comm; } hypre_ParCSRCommHandle; /*-------------------------------------------------------------------------- @@ -158,6 +159,7 @@ typedef struct #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) +#define hypre_ParCSRCommHandleComm(comm_handle) (comm_handle -> comm) typedef hypre_ParCSRCommHandle hypre_ParCSRPersistentCommHandle; diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index c767df4300..6eb8e515b1 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -283,12 +283,15 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); - MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg); + MPI_Comm comm_orig = hypre_ParCSRCommPkgComm(comm_pkg); + MPI_Comm comm; HYPRE_Int num_requests = num_sends + num_recvs; hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); + hypre_MPI_Comm_dup(comm_orig, &comm); + hypre_MPICommSetSendLocation(comm, hypre_GetActualMemLocation(send_memory_location)); hypre_MPICommSetRecvLocation(comm, hypre_GetActualMemLocation(recv_memory_location)); @@ -366,6 +369,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_ParCSRCommHandleRecvLocation(comm_handle) = recv_memory_location; hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; hypre_ParCSRCommHandleRequests(comm_handle) = requests; + hypre_ParCSRCommHandleComm(comm_handle) = comm; hypre_GpuProfilingPopRange(); @@ -420,6 +424,7 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) _hypre_TFree(hypre_ParCSRCommHandleRecvBuffer(comm_handle), hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); hypre_ParCSRCommHandleDestroyRequests(comm_handle); + hypre_MPI_Comm_free(&hypre_ParCSRCommHandleComm(comm_handle)); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); hypre_GpuProfilingPopRange(); diff --git a/src/parcsr_mv/par_csr_communication.h b/src/parcsr_mv/par_csr_communication.h index ff7f25bf65..fc93bf9451 100644 --- a/src/parcsr_mv/par_csr_communication.h +++ b/src/parcsr_mv/par_csr_communication.h @@ -124,6 +124,7 @@ typedef struct hypre_MemoryLocation recv_buffer_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; + MPI_Comm comm; } hypre_ParCSRCommHandle; /*-------------------------------------------------------------------------- @@ -142,6 +143,7 @@ typedef struct #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) +#define hypre_ParCSRCommHandleComm(comm_handle) (comm_handle -> comm) typedef hypre_ParCSRCommHandle hypre_ParCSRPersistentCommHandle; From 1cedf1317ce0f6c77648ba5fc34821a9bf464363 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Wed, 26 Jun 2024 14:39:13 -0700 Subject: [PATCH 43/90] debug for non-mpi (not finished) --- src/utilities/_hypre_utilities.h | 52 ++++-- src/utilities/general.c | 4 +- src/utilities/handle.h | 2 +- src/utilities/mpistubs.c | 279 ++++++++++++++++++++----------- src/utilities/mpistubs.h | 52 ++++-- 5 files changed, 252 insertions(+), 137 deletions(-) diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 83675340b2..e48cb7bc53 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1002,7 +1002,7 @@ extern hypre_MemoryTracker *_hypre_memory_tracker; extern "C" { #endif -#ifdef HYPRE_SEQUENTIAL +#if defined(HYPRE_SEQUENTIAL) /****************************************************************************** * MPI stubs to generate serial codes without mpi @@ -1117,6 +1117,9 @@ extern "C" { #define MPI_Op_create hypre_MPI_Op_create #define MPI_User_function hypre_MPI_User_function #define MPI_Info_create hypre_MPI_Info_create +#define MPI_Comm_set_attr hypre_MPI_Comm_set_attr +#define MPI_Comm_copy_attr_function hypre_MPI_Comm_copy_attr_function +#define MPI_Comm_delete_attr_function hypre_MPI_Comm_delete_attr_function /*-------------------------------------------------------------------------- * Types, etc. @@ -1128,6 +1131,8 @@ typedef HYPRE_Int hypre_MPI_Group; typedef HYPRE_Int hypre_MPI_Request; typedef HYPRE_Int hypre_MPI_Datatype; typedef void (hypre_MPI_User_function) (void); +typedef void (hypre_MPI_Comm_copy_attr_function) (void); +typedef void (hypre_MPI_Comm_delete_attr_function) (void); typedef struct { @@ -1136,7 +1141,7 @@ typedef struct } hypre_MPI_Status; typedef HYPRE_Int hypre_MPI_Op; -typedef HYPRE_Int hypre_MPI_Aint; +typedef intptr_t hypre_MPI_Aint; typedef HYPRE_Int hypre_MPI_Info; #define hypre_MPI_COMM_SELF 1 @@ -1173,6 +1178,11 @@ typedef HYPRE_Int hypre_MPI_Info; #define hypre_MPI_ANY_SOURCE 1 #define hypre_MPI_ANY_TAG 1 +#define hypre_MPI_COMM_NULL_COPY_FN NULL +#define hypre_MPI_COMM_NULL_DELETE_FN NULL + +#define hypre_MPI_RequestMPI_Request(request) (request) + #else /****************************************************************************** @@ -1180,21 +1190,6 @@ typedef HYPRE_Int hypre_MPI_Info; *****************************************************************************/ typedef MPI_Comm hypre_MPI_Comm; - -hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPI_Comm comm); -hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm); -hypre_MemoryLocation hypre_MPICommGetSendCopyLocation(hypre_MPI_Comm comm); -hypre_MemoryLocation hypre_MPICommGetRecvCopyLocation(hypre_MPI_Comm comm); -void* hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm); -void* hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm); - -HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); -HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); - typedef MPI_Group hypre_MPI_Group; #define HYPRE_MPI_REQUEST_FREE 1 @@ -1226,6 +1221,8 @@ typedef MPI_Op hypre_MPI_Op; typedef MPI_Aint hypre_MPI_Aint; typedef MPI_Info hypre_MPI_Info; typedef MPI_User_function hypre_MPI_User_function; +typedef MPI_Comm_copy_attr_function hypre_MPI_Comm_copy_attr_function; +typedef MPI_Comm_delete_attr_function hypre_MPI_Comm_delete_attr_function; #define hypre_MPI_COMM_WORLD MPI_COMM_WORLD #define hypre_MPI_COMM_NULL MPI_COMM_NULL @@ -1263,6 +1260,9 @@ typedef MPI_User_function hypre_MPI_User_function; #define hypre_MPI_TAG MPI_TAG #define hypre_MPI_LAND MPI_LAND +#define hypre_MPI_COMM_NULL_COPY_FN MPI_COMM_NULL_COPY_FN +#define hypre_MPI_COMM_NULL_DELETE_FN MPI_COMM_NULL_DELETE_FN + #endif /****************************************************************************** @@ -1390,6 +1390,24 @@ HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); #endif +HYPRE_Int hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr_fn, +hypre_MPI_Comm_delete_attr_function *comm_delete_attr_fn, HYPRE_Int *comm_keyval, void *extra_state); +HYPRE_Int hypre_MPI_Comm_set_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val); +HYPRE_Int hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val, HYPRE_Int *flag); + +hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPI_Comm comm); +hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm); +hypre_MemoryLocation hypre_MPICommGetSendBufferLocation(hypre_MPI_Comm comm); +hypre_MemoryLocation hypre_MPICommGetRecvBufferLocation(hypre_MPI_Comm comm); +void* hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm); +void* hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm); +HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); +HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); + #ifdef __cplusplus } #endif diff --git a/src/utilities/general.c b/src/utilities/general.c index c1d94f8f1f..9c33936e8d 100644 --- a/src/utilities/general.c +++ b/src/utilities/general.c @@ -47,8 +47,8 @@ hypre_HandleCreate(void) HYPRE_Int i; for (i = 0; i < HYPRE_MAX_NUM_COMM_KEYS; i++) { - MPI_Comm_create_keyval( MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, - &hypre_HandleMPICommKeys(hypre_handle_)[i], (void *)0 ); + hypre_MPI_Comm_create_keyval( hypre_MPI_COMM_NULL_COPY_FN, hypre_MPI_COMM_NULL_DELETE_FN, + &hypre_HandleMPICommKeys(hypre_handle_)[i], (void *)0 ); } #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) diff --git a/src/utilities/handle.h b/src/utilities/handle.h index 46be3ddba0..0114002119 100644 --- a/src/utilities/handle.h +++ b/src/utilities/handle.h @@ -32,7 +32,7 @@ typedef struct HYPRE_Int struct_comm_send_buffer_size; /* MPI */ - hypre_int comm_keys[HYPRE_MAX_NUM_COMM_KEYS]; + HYPRE_Int comm_keys[HYPRE_MAX_NUM_COMM_KEYS]; #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int use_gpu_aware_mpi; diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 61e2261cb4..1279d79b44 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -753,134 +753,89 @@ HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ) } #endif -/****************************************************************************** - * MPI stubs to do casting of HYPRE_Int and hypre_int correctly - *****************************************************************************/ - -#else - HYPRE_Int -hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) -{ - hypre_Handle *handle = hypre_handle(); - MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendLocation(handle), (void *) location); - return hypre_error_flag; -} - -hypre_MemoryLocation -hypre_MPICommGetSendLocation(hypre_MPI_Comm comm) +hypre_MPI_Comm_set_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val) { - hypre_int flag, *atrr_val; - hypre_MemoryLocation location = hypre_MEMORY_UNDEFINED; - MPI_Comm_get_attr(comm, hypre_HandleMPICommKeySendLocation(hypre_handle()), &atrr_val, &flag); - if (flag) - { - location = (MPI_Aint) atrr_val; - } - return (location); + return (0); } HYPRE_Int -hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) -{ - hypre_Handle *handle = hypre_handle(); - MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvLocation(handle), (void *) location); - return hypre_error_flag; -} - -hypre_MemoryLocation -hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm) +hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val, HYPRE_Int *flag) { - hypre_int flag, *atrr_val; - hypre_MemoryLocation location = hypre_MEMORY_UNDEFINED; - MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyRecvLocation(hypre_handle()), &atrr_val, &flag); - if (flag) - { - location = (MPI_Aint) atrr_val; - } - return (location); + return (0); } HYPRE_Int -hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) +hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr_fn, + hypre_MPI_Comm_delete_attr_function *comm_delete_attr_fn, + HYPRE_Int *comm_keyval, + void *extra_state) { - hypre_Handle *handle = hypre_handle(); - MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendBufferLocation(handle), (void *) location); - return hypre_error_flag; + *comm_keyval = 0; + return (0); } -hypre_MemoryLocation -hypre_MPICommGetSendBufferLocation(hypre_MPI_Comm comm) +HYPRE_Int +hypre_MPI_Isend_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPI_Comm comm, + hypre_MPI_Request *requests ) { - hypre_int flag, *atrr_val; - hypre_MemoryLocation location = hypre_MEMORY_UNDEFINED; - MPI_Comm_get_attr(comm, hypre_HandleMPICommKeySendBufferLocation(hypre_handle()), &atrr_val, &flag); - if (flag) - { - location = (MPI_Aint) atrr_val; - } - return (location); + return (0); } HYPRE_Int -hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) +hypre_MPI_Irecv_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPI_Comm comm, + hypre_MPI_Request *requests ) { - hypre_Handle *handle = hypre_handle(); - MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvBufferLocation(handle), (void *) location); - return hypre_error_flag; + return (0); } -hypre_MemoryLocation -hypre_MPICommGetRecvBufferLocation(hypre_MPI_Comm comm) -{ - hypre_int flag, *atrr_val; - hypre_MemoryLocation location = hypre_MEMORY_UNDEFINED; - MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle()), &atrr_val, &flag); - if (flag) - { - location = (MPI_Aint) atrr_val; - } - return (location); -} +/****************************************************************************** + * MPI stubs to do casting of HYPRE_Int and hypre_int correctly + *****************************************************************************/ -HYPRE_Int -hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void *buffer) -{ - MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendBuffer(hypre_handle()), buffer); - return hypre_error_flag; -} +#else -void * -hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm) +HYPRE_Int +hypre_MPI_Comm_set_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val) { - hypre_int flag; - void *buffer = NULL; - MPI_Comm_get_attr(comm, hypre_HandleMPICommKeySendBuffer(hypre_handle()), &buffer, &flag); - if (!flag) - { - buffer = NULL; - } - return (buffer); + return (HYPRE_Int) MPI_Comm_set_attr(comm, (hypre_int) comm_keyval, attribute_val); } HYPRE_Int -hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void *buffer) +hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val, HYPRE_Int *flag) { - MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvBuffer(hypre_handle()), buffer); - return hypre_error_flag; + hypre_int mpi_flag; + HYPRE_Int ierr; + ierr = (HYPRE_Int) MPI_Comm_get_attr(comm, (hypre_int) comm_keyval, attribute_val, &mpi_flag); + *flag = (HYPRE_Int) mpi_flag; + return ierr; } -void * -hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm) +HYPRE_Int +hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr_fn, + hypre_MPI_Comm_delete_attr_function *comm_delete_attr_fn, + HYPRE_Int *comm_keyval, + void *extra_state) { - hypre_int flag; - void *buffer = NULL; - MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyRecvBuffer(hypre_handle()), &buffer, &flag); - if (!flag) - { - buffer = NULL; - } - return (buffer); + hypre_int mpi_comm_keyval; + HYPRE_Int ierr; + ierr = MPI_Comm_create_keyval(comm_copy_attr_fn, comm_delete_attr_fn, &mpi_comm_keyval, extra_state); + *comm_keyval = mpi_comm_keyval; + return ierr; } hypre_MPI_Request @@ -1915,6 +1870,8 @@ hypre_MPI_Info_free( hypre_MPI_Info *info ) } #endif +#endif + HYPRE_Int hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location) { @@ -1928,4 +1885,126 @@ hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location) #endif } -#endif +HYPRE_Int +hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) +{ + hypre_Handle *handle = hypre_handle(); + hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendLocation(handle), (void *) location); + return hypre_error_flag; +} + +hypre_MemoryLocation +hypre_MPICommGetSendLocation(hypre_MPI_Comm comm) +{ + HYPRE_Int flag, *atrr_val; + hypre_MemoryLocation location = hypre_MEMORY_UNDEFINED; + hypre_MPI_Comm_get_attr(comm, hypre_HandleMPICommKeySendLocation(hypre_handle()), &atrr_val, &flag); + if (flag) + { + location = (hypre_MPI_Aint) atrr_val; + } + return (location); +} + +HYPRE_Int +hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) +{ + hypre_Handle *handle = hypre_handle(); + hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvLocation(handle), (void *) location); + return hypre_error_flag; +} + +hypre_MemoryLocation +hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm) +{ + HYPRE_Int flag, *atrr_val; + hypre_MemoryLocation location = hypre_MEMORY_UNDEFINED; + hypre_MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyRecvLocation(hypre_handle()), &atrr_val, &flag); + if (flag) + { + location = (hypre_MPI_Aint) atrr_val; + } + return (location); +} + +HYPRE_Int +hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) +{ + hypre_Handle *handle = hypre_handle(); + hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendBufferLocation(handle), (void *) location); + return hypre_error_flag; +} + +hypre_MemoryLocation +hypre_MPICommGetSendBufferLocation(hypre_MPI_Comm comm) +{ + HYPRE_Int flag, *atrr_val; + hypre_MemoryLocation location = hypre_MEMORY_UNDEFINED; + hypre_MPI_Comm_get_attr(comm, hypre_HandleMPICommKeySendBufferLocation(hypre_handle()), &atrr_val, &flag); + if (flag) + { + location = (hypre_MPI_Aint) atrr_val; + } + return (location); +} + +HYPRE_Int +hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) +{ + hypre_Handle *handle = hypre_handle(); + hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvBufferLocation(handle), (void *) location); + return hypre_error_flag; +} + +hypre_MemoryLocation +hypre_MPICommGetRecvBufferLocation(hypre_MPI_Comm comm) +{ + HYPRE_Int flag, *atrr_val; + hypre_MemoryLocation location = hypre_MEMORY_UNDEFINED; + hypre_MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle()), &atrr_val, &flag); + if (flag) + { + location = (hypre_MPI_Aint) atrr_val; + } + return (location); +} + +HYPRE_Int +hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void *buffer) +{ + hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendBuffer(hypre_handle()), buffer); + return hypre_error_flag; +} + +void * +hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm) +{ + HYPRE_Int flag; + void *buffer = NULL; + hypre_MPI_Comm_get_attr(comm, hypre_HandleMPICommKeySendBuffer(hypre_handle()), &buffer, &flag); + if (!flag) + { + buffer = NULL; + } + return (buffer); +} + +HYPRE_Int +hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void *buffer) +{ + hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvBuffer(hypre_handle()), buffer); + return hypre_error_flag; +} + +void * +hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm) +{ + HYPRE_Int flag; + void *buffer = NULL; + hypre_MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyRecvBuffer(hypre_handle()), &buffer, &flag); + if (!flag) + { + buffer = NULL; + } + return (buffer); +} diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 945cf1665a..d9a8ed76d3 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -18,7 +18,7 @@ extern "C" { #endif -#ifdef HYPRE_SEQUENTIAL +#if defined(HYPRE_SEQUENTIAL) /****************************************************************************** * MPI stubs to generate serial codes without mpi @@ -133,6 +133,9 @@ extern "C" { #define MPI_Op_create hypre_MPI_Op_create #define MPI_User_function hypre_MPI_User_function #define MPI_Info_create hypre_MPI_Info_create +#define MPI_Comm_set_attr hypre_MPI_Comm_set_attr +#define MPI_Comm_copy_attr_function hypre_MPI_Comm_copy_attr_function +#define MPI_Comm_delete_attr_function hypre_MPI_Comm_delete_attr_function /*-------------------------------------------------------------------------- * Types, etc. @@ -144,6 +147,8 @@ typedef HYPRE_Int hypre_MPI_Group; typedef HYPRE_Int hypre_MPI_Request; typedef HYPRE_Int hypre_MPI_Datatype; typedef void (hypre_MPI_User_function) (void); +typedef void (hypre_MPI_Comm_copy_attr_function) (void); +typedef void (hypre_MPI_Comm_delete_attr_function) (void); typedef struct { @@ -152,7 +157,7 @@ typedef struct } hypre_MPI_Status; typedef HYPRE_Int hypre_MPI_Op; -typedef HYPRE_Int hypre_MPI_Aint; +typedef intptr_t hypre_MPI_Aint; typedef HYPRE_Int hypre_MPI_Info; #define hypre_MPI_COMM_SELF 1 @@ -189,6 +194,11 @@ typedef HYPRE_Int hypre_MPI_Info; #define hypre_MPI_ANY_SOURCE 1 #define hypre_MPI_ANY_TAG 1 +#define hypre_MPI_COMM_NULL_COPY_FN NULL +#define hypre_MPI_COMM_NULL_DELETE_FN NULL + +#define hypre_MPI_RequestMPI_Request(request) (request) + #else /****************************************************************************** @@ -196,21 +206,6 @@ typedef HYPRE_Int hypre_MPI_Info; *****************************************************************************/ typedef MPI_Comm hypre_MPI_Comm; - -hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPI_Comm comm); -hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm); -hypre_MemoryLocation hypre_MPICommGetSendCopyLocation(hypre_MPI_Comm comm); -hypre_MemoryLocation hypre_MPICommGetRecvCopyLocation(hypre_MPI_Comm comm); -void* hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm); -void* hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm); - -HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); -HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); - typedef MPI_Group hypre_MPI_Group; #define HYPRE_MPI_REQUEST_FREE 1 @@ -242,6 +237,8 @@ typedef MPI_Op hypre_MPI_Op; typedef MPI_Aint hypre_MPI_Aint; typedef MPI_Info hypre_MPI_Info; typedef MPI_User_function hypre_MPI_User_function; +typedef MPI_Comm_copy_attr_function hypre_MPI_Comm_copy_attr_function; +typedef MPI_Comm_delete_attr_function hypre_MPI_Comm_delete_attr_function; #define hypre_MPI_COMM_WORLD MPI_COMM_WORLD #define hypre_MPI_COMM_NULL MPI_COMM_NULL @@ -279,6 +276,9 @@ typedef MPI_User_function hypre_MPI_User_function; #define hypre_MPI_TAG MPI_TAG #define hypre_MPI_LAND MPI_LAND +#define hypre_MPI_COMM_NULL_COPY_FN MPI_COMM_NULL_COPY_FN +#define hypre_MPI_COMM_NULL_DELETE_FN MPI_COMM_NULL_DELETE_FN + #endif /****************************************************************************** @@ -406,6 +406,24 @@ HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); #endif +HYPRE_Int hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr_fn, +hypre_MPI_Comm_delete_attr_function *comm_delete_attr_fn, HYPRE_Int *comm_keyval, void *extra_state); +HYPRE_Int hypre_MPI_Comm_set_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val); +HYPRE_Int hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val, HYPRE_Int *flag); + +hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPI_Comm comm); +hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm); +hypre_MemoryLocation hypre_MPICommGetSendBufferLocation(hypre_MPI_Comm comm); +hypre_MemoryLocation hypre_MPICommGetRecvBufferLocation(hypre_MPI_Comm comm); +void* hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm); +void* hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm); +HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); +HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); + #ifdef __cplusplus } #endif From 499547ec4b8863114bffb124665b498bd2fc75c0 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Thu, 27 Jun 2024 09:32:30 -0700 Subject: [PATCH 44/90] change mpi request back --- src/distributed_ls/Euclid/Factor_dh.c | 20 ++++++++++---------- src/distributed_ls/ParaSails/Matrix.c | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/distributed_ls/Euclid/Factor_dh.c b/src/distributed_ls/Euclid/Factor_dh.c index c520662f92..c150e78a59 100644 --- a/src/distributed_ls/Euclid/Factor_dh.c +++ b/src/distributed_ls/Euclid/Factor_dh.c @@ -74,11 +74,11 @@ void Factor_dhCreate(Factor_dh *mat) /* initialize MPI request to null */ for(i=0; irecv_reqLo[i] = hypre_MPI_RequestFromMPI_Request(hypre_MPI_REQUEST_NULL); - tmp->recv_reqHi[i] = hypre_MPI_RequestFromMPI_Request(hypre_MPI_REQUEST_NULL); - tmp->send_reqLo[i] = hypre_MPI_RequestFromMPI_Request(hypre_MPI_REQUEST_NULL); - tmp->send_reqHi[i] = hypre_MPI_RequestFromMPI_Request(hypre_MPI_REQUEST_NULL); - tmp->requests[i] = hypre_MPI_RequestFromMPI_Request(hypre_MPI_REQUEST_NULL); + tmp->recv_reqLo[i] = hypre_MPI_REQUEST_NULL; + tmp->recv_reqHi[i] = hypre_MPI_REQUEST_NULL; + tmp->send_reqLo[i] = hypre_MPI_REQUEST_NULL; + tmp->send_reqHi[i] = hypre_MPI_REQUEST_NULL; + tmp->requests[i] = hypre_MPI_REQUEST_NULL; } /* Factor_dhZeroTiming(tmp); CHECK_V_ERROR; */ END_FUNC_DH @@ -109,11 +109,11 @@ void Factor_dhDestroy(Factor_dh mat) /* cleanup MPI requests */ for(i=0; irecv_reqLo[i]) != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->recv_reqLo[i])); - if(hypre_MPI_RequestMPI_Request(mat->recv_reqHi[i]) != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->recv_reqHi[i])); - if(hypre_MPI_RequestMPI_Request(mat->send_reqLo[i]) != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->send_reqLo[i])); - if(hypre_MPI_RequestMPI_Request(mat->send_reqHi[i]) != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->send_reqHi[i])); - if(hypre_MPI_RequestMPI_Request(mat->requests[i]) != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->requests[i])); + if(mat->recv_reqLo[i] != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->recv_reqLo[i])); + if(mat->recv_reqHi[i] != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->recv_reqHi[i])); + if(mat->send_reqLo[i] != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->send_reqLo[i])); + if(mat->send_reqHi[i] != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->send_reqHi[i])); + if(mat->requests[i] != hypre_MPI_REQUEST_NULL) hypre_MPI_Request_free(&(mat->requests[i])); } FREE_DH(mat); CHECK_V_ERROR; END_FUNC_DH diff --git a/src/distributed_ls/ParaSails/Matrix.c b/src/distributed_ls/ParaSails/Matrix.c index 7f41e1aa33..d2da1f60d3 100644 --- a/src/distributed_ls/ParaSails/Matrix.c +++ b/src/distributed_ls/ParaSails/Matrix.c @@ -336,7 +336,7 @@ static void MatrixReadMaster(Matrix *mat, char *filename) offset = ftell(file); hypre_fscanf(file, "%d %d %lf", &row, &col, &value); - request = hypre_MPI_RequestFromMPI_Request(hypre_MPI_REQUEST_NULL); + request = hypre_MPI_REQUEST_NULL; curr_proc = 1; /* proc for which we are looking for the beginning */ while (curr_proc < npes) { From c364769e03ab875c76d3bed664eb440a06832fd7 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Thu, 27 Jun 2024 09:35:07 -0700 Subject: [PATCH 45/90] code refactor --- src/parcsr_mv/_hypre_parcsr_mv.h | 2 + src/parcsr_mv/par_csr_communication.c | 56 ++-- src/parcsr_mv/par_csr_communication.h | 2 + src/utilities/_hypre_utilities.h | 75 ++--- src/utilities/mpistubs.c | 381 +++++++++----------------- src/utilities/mpistubs.h | 73 ++--- 6 files changed, 221 insertions(+), 368 deletions(-) diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index 00e46b80e5..f0745a6541 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -140,6 +140,7 @@ typedef struct hypre_MemoryLocation recv_buffer_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; + MPI_Request grequest; MPI_Comm comm; } hypre_ParCSRCommHandle; @@ -159,6 +160,7 @@ typedef struct #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) +#define hypre_ParCSRCommHandleGRequest(comm_handle) (comm_handle -> grequest) #define hypre_ParCSRCommHandleComm(comm_handle) (comm_handle -> comm) typedef hypre_ParCSRCommHandle hypre_ParCSRPersistentCommHandle; diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 6eb8e515b1..edff016450 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -79,23 +79,6 @@ hypre_ParCSRCommHandleCreateBuffer( HYPRE_MemoryLocation send_memory_location return hypre_error_flag; } -HYPRE_Int -hypre_ParCSRCommHandleDestroyRequests( hypre_ParCSRCommHandle *comm_handle ) -{ - if (comm_handle) - { - HYPRE_Int i; - - for (i = 0; i < hypre_ParCSRCommHandleNumRequests(comm_handle); i++) - { - hypre_MPI_RequestClear(&hypre_ParCSRCommHandleRequest(comm_handle, i)); - } - hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); - } - - return hypre_error_flag; -} - #if defined(HYPRE_USING_PERSISTENT_COMM) /*------------------------------------------------------------------ @@ -224,7 +207,7 @@ hypre_ParCSRPersistentCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) hypre_TFree(hypre_ParCSRCommHandleRecvData(comm_handle), hypre_ParCSRCommHandleRecvLocation(comm_handle)); _hypre_TFree(hypre_ParCSRCommHandleSendBuffer(comm_handle), hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); _hypre_TFree(hypre_ParCSRCommHandleRecvBuffer(comm_handle), hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); - hypre_ParCSRCommHandleDestroyRequests(comm_handle); + hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); return hypre_error_flag; @@ -267,6 +250,15 @@ hypre_ParCSRCommHandleCreate ( HYPRE_Int job, HYPRE_MEMORY_HOST, recv_data); } +hypre_int hypre_grequest_noop_query_fn(void *extra_state, MPI_Status *status) { return MPI_SUCCESS; } + +hypre_int hypre_grequest_free_fn(void *extra_state) +{ + return MPI_SUCCESS; +} + +hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete) { return MPI_SUCCESS; } + /*------------------------------------------------------------------ * hypre_ParCSRCommHandleCreate_v2 *------------------------------------------------------------------*/ @@ -286,6 +278,8 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, MPI_Comm comm_orig = hypre_ParCSRCommPkgComm(comm_pkg); MPI_Comm comm; HYPRE_Int num_requests = num_sends + num_recvs; + HYPRE_Int nelem_recvs = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[num_recvs]; + HYPRE_Int data_size = hypre_ParCSRCommHandleGetDataTypeSize(job); hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); @@ -358,6 +352,23 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, break; } } +#if 0 + hypre_MPI_GRequest_Action *action; + hypre_MPI_GRequestGetCopyAction(recv_data, + hypre_GetActualMemLocation(recv_memory_location), + hypre_ParCSRCommHandleRecvBuffer(comm_handle), + hypre_ParCSRCommHandleRecvBufferLocation(comm_handle), + nelem_recvs * data_size, + &action); + + MPI_Grequest_start(hypre_grequest_noop_query_fn, + hypre_grequest_free_fn, + hypre_grequest_noop_cancel_fn, + action, + &hypre_ParCSRCommHandleGRequest(comm_handle)); +#endif +printf("%d\n", __LINE__); + /*-------------------------------------------------------------------- * set up comm_handle and return *--------------------------------------------------------------------*/ @@ -401,6 +412,10 @@ hypre_ParCSRCommHandleWait( hypre_ParCSRCommHandle *comm_handle ) } } + //MPI_Grequest_complete(hypre_ParCSRCommHandleGRequest(comm_handle)); + + //MPI_Wait(&hypre_ParCSRCommHandleGRequest(comm_handle), MPI_STATUS_IGNORE); + return hypre_error_flag; } @@ -417,13 +432,12 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) { return hypre_error_flag; } - +printf("%d\n", __LINE__); hypre_ParCSRCommHandleWait(comm_handle); _hypre_TFree(hypre_ParCSRCommHandleSendBuffer(comm_handle), hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); _hypre_TFree(hypre_ParCSRCommHandleRecvBuffer(comm_handle), hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); - - hypre_ParCSRCommHandleDestroyRequests(comm_handle); + hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); hypre_MPI_Comm_free(&hypre_ParCSRCommHandleComm(comm_handle)); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); diff --git a/src/parcsr_mv/par_csr_communication.h b/src/parcsr_mv/par_csr_communication.h index fc93bf9451..1328ecdbf8 100644 --- a/src/parcsr_mv/par_csr_communication.h +++ b/src/parcsr_mv/par_csr_communication.h @@ -124,6 +124,7 @@ typedef struct hypre_MemoryLocation recv_buffer_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; + hypre_MPI_Request grequest; MPI_Comm comm; } hypre_ParCSRCommHandle; @@ -143,6 +144,7 @@ typedef struct #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) +#define hypre_ParCSRCommHandleGRequest(comm_handle) (comm_handle -> grequest) #define hypre_ParCSRCommHandleComm(comm_handle) (comm_handle -> comm) typedef hypre_ParCSRCommHandle hypre_ParCSRPersistentCommHandle; diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index e48cb7bc53..925f942086 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1178,10 +1178,8 @@ typedef HYPRE_Int hypre_MPI_Info; #define hypre_MPI_ANY_SOURCE 1 #define hypre_MPI_ANY_TAG 1 -#define hypre_MPI_COMM_NULL_COPY_FN NULL -#define hypre_MPI_COMM_NULL_DELETE_FN NULL - -#define hypre_MPI_RequestMPI_Request(request) (request) +#define hypre_MPI_COMM_NULL_COPY_FN NULL +#define hypre_MPI_COMM_NULL_DELETE_FN NULL #else @@ -1189,38 +1187,15 @@ typedef HYPRE_Int hypre_MPI_Info; * MPI stubs to do casting of HYPRE_Int and hypre_int correctly *****************************************************************************/ -typedef MPI_Comm hypre_MPI_Comm; -typedef MPI_Group hypre_MPI_Group; - -#define HYPRE_MPI_REQUEST_FREE 1 -#define HYPRE_MPI_REQUEST_COPY 2 - -typedef struct -{ - HYPRE_Int count; - HYPRE_Int data_size; - char *data; -} hypre_MPI_Request_Action; - -#define hypre_MPI_Request_ActionCount(action) ((action) -> count) -#define hypre_MPI_Request_ActionDataSize(action) ((action) -> data_size) -#define hypre_MPI_Request_ActionData(action) ((action) -> data) - -typedef struct -{ - MPI_Request mpi_request; - hypre_MPI_Request_Action action[2]; -} hypre_MPI_Request; - -#define hypre_MPI_RequestMPI_Request(request) ((request).mpi_request) -#define hypre_MPI_RequestAction(request, i) ((request).action[i]) - -typedef MPI_Datatype hypre_MPI_Datatype; -typedef MPI_Status hypre_MPI_Status; -typedef MPI_Op hypre_MPI_Op; -typedef MPI_Aint hypre_MPI_Aint; -typedef MPI_Info hypre_MPI_Info; -typedef MPI_User_function hypre_MPI_User_function; +typedef MPI_Comm hypre_MPI_Comm; +typedef MPI_Group hypre_MPI_Group; +typedef MPI_Request hypre_MPI_Request; +typedef MPI_Datatype hypre_MPI_Datatype; +typedef MPI_Status hypre_MPI_Status; +typedef MPI_Op hypre_MPI_Op; +typedef MPI_Aint hypre_MPI_Aint; +typedef MPI_Info hypre_MPI_Info; +typedef MPI_User_function hypre_MPI_User_function; typedef MPI_Comm_copy_attr_function hypre_MPI_Comm_copy_attr_function; typedef MPI_Comm_delete_attr_function hypre_MPI_Comm_delete_attr_function; @@ -1269,6 +1244,20 @@ typedef MPI_Comm_delete_attr_function hypre_MPI_Comm_delete_attr_function; * Everything below this applies to both ifdef cases above *****************************************************************************/ +#define HYPRE_MPI_GREQUEST_FREE 1 +#define HYPRE_MPI_GREQUEST_COPY 2 + +typedef struct +{ + HYPRE_Int count; + HYPRE_Int data_size; + char *data; +} hypre_MPI_GRequest_Action; + +#define hypre_MPI_GRequest_ActionCount(action) ((action) -> count) +#define hypre_MPI_GRequest_ActionDataSize(action) ((action) -> data_size) +#define hypre_MPI_GRequest_ActionData(action) ((action) -> data) + /*-------------------------------------------------------------------------- * Prototypes *--------------------------------------------------------------------------*/ @@ -1365,14 +1354,10 @@ HYPRE_Int hypre_MPI_Type_free( hypre_MPI_Datatype *datatype ); HYPRE_Int hypre_MPI_Op_free( hypre_MPI_Op *op ); HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, hypre_MPI_Op *op ); -hypre_MPI_Request hypre_MPI_RequestFromMPI_Request(MPI_Request request); -HYPRE_Int hypre_MPI_RequestSetActionCopy(HYPRE_Int i, void *dest, hypre_MemoryLocation dest_location, - void *src, hypre_MemoryLocation src_location, - HYPRE_Int num_bytes, hypre_MPI_Request *request); -HYPRE_Int hypre_MPI_RequestSetActionFree(HYPRE_Int i, void *ptr, hypre_MemoryLocation ptr_location, - hypre_MPI_Request *request); -HYPRE_Int hypre_MPI_RequestProcessAction(HYPRE_Int i, hypre_MPI_Request *request); -HYPRE_Int hypre_MPI_RequestClear(hypre_MPI_Request *request); +HYPRE_Int hypre_MPI_GRequestGetCopyAction(void *dest, hypre_MemoryLocation dest_location, + void *src, hypre_MemoryLocation src_location, + HYPRE_Int num_bytes, hypre_MPI_GRequest_Action **action_ptr); +HYPRE_Int hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action); HYPRE_Int hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location); HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); @@ -1884,7 +1869,7 @@ typedef struct HYPRE_Int struct_comm_send_buffer_size; /* MPI */ - hypre_int comm_keys[HYPRE_MAX_NUM_COMM_KEYS]; + HYPRE_Int comm_keys[HYPRE_MAX_NUM_COMM_KEYS]; #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int use_gpu_aware_mpi; diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 1279d79b44..1c32606018 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -838,158 +838,6 @@ hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr return ierr; } -hypre_MPI_Request -hypre_MPI_RequestFromMPI_Request(MPI_Request request) -{ - hypre_MPI_Request hrequest; - hypre_Memset(&hrequest, 0, sizeof(hypre_MPI_Request), HYPRE_MEMORY_HOST); - hypre_MPI_RequestMPI_Request(hrequest) = request; - - return hrequest; -} - -HYPRE_Int -hypre_MPI_RequestClear(hypre_MPI_Request *request) -{ - HYPRE_Int i; - for (i = 0; i < 2; i++) - { - hypre_MPI_Request_Action *action = &hypre_MPI_RequestAction(*request, i); - hypre_MPI_Request_ActionCount(action) = 0; - hypre_MPI_Request_ActionDataSize(action) = 0; - hypre_TFree(hypre_MPI_Request_ActionData(action), HYPRE_MEMORY_HOST); - } - - return hypre_error_flag; -} - -HYPRE_Int -hypre_MPI_RequestSetActionFree(HYPRE_Int i, - void *ptr, - hypre_MemoryLocation ptr_location, - hypre_MPI_Request *request) -{ - HYPRE_Int action_id = HYPRE_MPI_REQUEST_FREE; - hypre_MPI_Request_Action *action = &hypre_MPI_RequestAction(*request, i); - - HYPRE_Int nb = sizeof(HYPRE_Int) + sizeof(void *) + sizeof(hypre_MemoryLocation); - HYPRE_Int data_size = hypre_MPI_Request_ActionDataSize(action); - - hypre_MPI_Request_ActionCount(action) ++; - hypre_MPI_Request_ActionDataSize(action) = data_size + nb; - hypre_MPI_Request_ActionData(action) = hypre_TReAlloc(hypre_MPI_Request_ActionData(action), - char, - hypre_MPI_Request_ActionDataSize(action), - HYPRE_MEMORY_HOST); - - char *data = hypre_MPI_Request_ActionData(action) + data_size; - hypre_TMemcpy(data, &action_id, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(HYPRE_Int); - hypre_TMemcpy(data, &ptr, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(void *); - hypre_TMemcpy(data, &ptr_location, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(hypre_MemoryLocation); - - hypre_assert(data == hypre_MPI_Request_ActionData(action) + hypre_MPI_Request_ActionDataSize(action)); - - return hypre_error_flag; -} - -HYPRE_Int -hypre_MPI_RequestSetActionCopy(HYPRE_Int i, - void *dest, - hypre_MemoryLocation dest_location, - void *src, - hypre_MemoryLocation src_location, - HYPRE_Int num_bytes, - hypre_MPI_Request *request) -{ - HYPRE_Int action_id = HYPRE_MPI_REQUEST_COPY; - hypre_MPI_Request_Action *action = &hypre_MPI_RequestAction(*request, i); - - HYPRE_Int nb = 2 * (sizeof(HYPRE_Int) + sizeof(void *) + sizeof(hypre_MemoryLocation)); - HYPRE_Int data_size = hypre_MPI_Request_ActionDataSize(action); - - hypre_MPI_Request_ActionCount(action) ++; - hypre_MPI_Request_ActionDataSize(action) = data_size + nb; - hypre_MPI_Request_ActionData(action) = hypre_TReAlloc(hypre_MPI_Request_ActionData(action), - char, - hypre_MPI_Request_ActionDataSize(action), - HYPRE_MEMORY_HOST); - - char *data = hypre_MPI_Request_ActionData(action) + data_size; - hypre_TMemcpy(data, &action_id, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(HYPRE_Int); - hypre_TMemcpy(data, &num_bytes, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(HYPRE_Int); - hypre_TMemcpy(data, &dest, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(void *); - hypre_TMemcpy(data, &src, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(void *); - hypre_TMemcpy(data, &dest_location, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(hypre_MemoryLocation); - hypre_TMemcpy(data, &src_location, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(hypre_MemoryLocation); - - hypre_assert(data == hypre_MPI_Request_ActionData(action) + hypre_MPI_Request_ActionDataSize(action)); - - return hypre_error_flag; -} - -HYPRE_Int -hypre_MPI_RequestProcessAction(HYPRE_Int i, - hypre_MPI_Request *request) -{ - hypre_MPI_Request_Action *action = &hypre_MPI_RequestAction(*request, i); - HYPRE_Int count = hypre_MPI_Request_ActionCount(action); - char *data = hypre_MPI_Request_ActionData(action); - HYPRE_Int k; - - for (k = 0; k < count; k ++) - { - HYPRE_Int action_id; - - hypre_TMemcpy(&action_id, data, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(HYPRE_Int); - - if (action_id == HYPRE_MPI_REQUEST_FREE) - { - void *ptr; - hypre_MemoryLocation ptr_location; - hypre_TMemcpy(&ptr, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(void *); - hypre_TMemcpy(&ptr_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(hypre_MemoryLocation); - // action! - _hypre_TFree(ptr, ptr_location); - } - else if (action_id == HYPRE_MPI_REQUEST_COPY) - { - void *dest, *src; - HYPRE_Int num_bytes; - hypre_MemoryLocation dest_location, src_location; - hypre_TMemcpy(&num_bytes, data, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(HYPRE_Int); - hypre_TMemcpy(&dest, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(void *); - hypre_TMemcpy(&src, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(void *); - hypre_TMemcpy(&dest_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(hypre_MemoryLocation); - hypre_TMemcpy(&src_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - data += sizeof(hypre_MemoryLocation); - // action! - hypre_GpuProfilingPushRange("MPI-H2D"); - _hypre_TMemcpy(dest, src, char, num_bytes, dest_location, src_location); - hypre_GpuProfilingPopRange(); - } - } - - hypre_assert(data == hypre_MPI_Request_ActionData(action) + hypre_MPI_Request_ActionDataSize(action)); - - return hypre_error_flag; -} - HYPRE_Int hypre_MPI_Init( hypre_int *argc, char ***argv ) @@ -1346,8 +1194,7 @@ hypre_MPI_Isend( void *buf, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Isend(buf, (hypre_int)count, datatype, - (hypre_int)dest, (hypre_int)tag, comm, - &hypre_MPI_RequestMPI_Request(*request)); + (hypre_int)dest, (hypre_int)tag, comm, request); } HYPRE_Int @@ -1360,8 +1207,7 @@ hypre_MPI_Irecv( void *buf, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Irecv(buf, (hypre_int)count, datatype, - (hypre_int)source, (hypre_int)tag, comm, - &hypre_MPI_RequestMPI_Request(*request)); + (hypre_int)source, (hypre_int)tag, comm, request); } #define TYPE_MACRO_SEND 0 @@ -1400,31 +1246,7 @@ hypre_MPI_Irecv( void *buf, HYPRE_Int ip = procs[i]; \ HYPRE_Int start = displs[i]; \ HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; \ - MPI_CMD(_buf + start, len, HYPRE_MPI_DTYPE, \ - ip, tag, comm, \ - &hypre_MPI_RequestMPI_Request(requests[i])); \ - } \ - if (_buf != buf) \ - { \ - /* register pre/post action in the first request */ \ - if (SEND_RECV == TYPE_MACRO_SEND_INIT) \ - { \ - hypre_MPI_RequestSetActionCopy(0, _buf, \ - hypre_MPICommGetSendBufferLocation(comm), \ - buf, \ - memory_location, \ - ntot * sizeof(HYPRE_DTYPE), \ - &requests[0]); \ - } \ - else if (SEND_RECV == TYPE_MACRO_RECV || SEND_RECV == TYPE_MACRO_RECV_INIT) \ - { \ - hypre_MPI_RequestSetActionCopy(1, buf, \ - memory_location, \ - _buf, \ - hypre_MPICommGetRecvBufferLocation(comm), \ - ntot * sizeof(HYPRE_DTYPE), \ - &requests[0]); \ - } \ + MPI_CMD(_buf + start, len, HYPRE_MPI_DTYPE, ip, tag, comm, &requests[i]); \ } \ return hypre_error_flag; \ } \ @@ -1480,8 +1302,7 @@ hypre_MPI_Send_init( void *buf, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Send_init(buf, (hypre_int)count, datatype, - (hypre_int)dest, (hypre_int)tag, comm, - &hypre_MPI_RequestMPI_Request(*request)); + (hypre_int)dest, (hypre_int)tag, comm, request); } HYPRE_Int @@ -1514,8 +1335,7 @@ hypre_MPI_Recv_init( void *buf, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Recv_init(buf, (hypre_int)count, datatype, - (hypre_int)dest, (hypre_int)tag, comm, - &hypre_MPI_RequestMPI_Request(*request)); + (hypre_int)dest, (hypre_int)tag, comm, request); } HYPRE_Int @@ -1548,33 +1368,14 @@ hypre_MPI_Irsend( void *buf, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Irsend(buf, (hypre_int)count, datatype, - (hypre_int)dest, (hypre_int)tag, comm, - &hypre_MPI_RequestMPI_Request(*request)); + (hypre_int)dest, (hypre_int)tag, comm, request); } HYPRE_Int hypre_MPI_Startall( HYPRE_Int count, hypre_MPI_Request *array_of_requests ) { - HYPRE_Int i, ierr; - MPI_Request *array_of_mpi_requests = hypre_CTAlloc(MPI_Request, count, HYPRE_MEMORY_HOST); - - for (i = 0; i < count; i++) - { - array_of_mpi_requests[i] = hypre_MPI_RequestMPI_Request(array_of_requests[i]); - hypre_MPI_RequestProcessAction(0, &array_of_requests[i]); - } - - ierr = (HYPRE_Int) MPI_Startall((hypre_int)count, array_of_mpi_requests); - - for (i = 0; i < count; i++) - { - hypre_MPI_RequestMPI_Request(array_of_requests[i]) = array_of_mpi_requests[i]; - } - - hypre_TFree(array_of_mpi_requests, HYPRE_MEMORY_HOST); - - return ierr; + return (HYPRE_Int) MPI_Startall((hypre_int)count, array_of_requests); } HYPRE_Int @@ -1608,7 +1409,7 @@ hypre_MPI_Test( hypre_MPI_Request *request, { hypre_int mpi_flag; HYPRE_Int ierr; - ierr = (HYPRE_Int) MPI_Test(&hypre_MPI_RequestMPI_Request(*request), &mpi_flag, status); + ierr = (HYPRE_Int) MPI_Test(request, &mpi_flag, status); *flag = (HYPRE_Int) mpi_flag; return ierr; } @@ -1620,25 +1421,12 @@ hypre_MPI_Testall( HYPRE_Int count, hypre_MPI_Status *array_of_statuses ) { hypre_int mpi_flag; - HYPRE_Int i, ierr; - - MPI_Request *array_of_mpi_requests = hypre_TAlloc(MPI_Request, count, HYPRE_MEMORY_HOST); - for (i = 0; i < count; i++) - { - array_of_mpi_requests[i] = hypre_MPI_RequestMPI_Request(array_of_requests[i]); - } + HYPRE_Int ierr; - ierr = (HYPRE_Int) MPI_Testall((hypre_int)count, array_of_mpi_requests, + ierr = (HYPRE_Int) MPI_Testall((hypre_int)count, array_of_requests, &mpi_flag, array_of_statuses); *flag = (HYPRE_Int) mpi_flag; - for (i = 0; i < count; i++) - { - hypre_MPI_RequestMPI_Request(array_of_requests[i]) = array_of_mpi_requests[i]; - } - - hypre_TFree(array_of_mpi_requests, HYPRE_MEMORY_HOST); - return ierr; } @@ -1646,7 +1434,7 @@ HYPRE_Int hypre_MPI_Wait( hypre_MPI_Request *request, hypre_MPI_Status *status ) { - return (HYPRE_Int) MPI_Wait(&hypre_MPI_RequestMPI_Request(*request), status); + return (HYPRE_Int) MPI_Wait(request, status); } HYPRE_Int @@ -1655,26 +1443,9 @@ hypre_MPI_Waitall( HYPRE_Int count, hypre_MPI_Status *array_of_statuses ) { hypre_GpuProfilingPushRange("hypre_MPI_Waitall"); - - HYPRE_Int i, ierr; - - MPI_Request *array_of_mpi_requests = hypre_TAlloc(MPI_Request, count, HYPRE_MEMORY_HOST); - for (i = 0; i < count; i++) - { - array_of_mpi_requests[i] = hypre_MPI_RequestMPI_Request(array_of_requests[i]); - } - + HYPRE_Int ierr; ierr = (HYPRE_Int) MPI_Waitall((hypre_int)count, - array_of_mpi_requests, array_of_statuses); - - for (i = 0; i < count; i++) - { - hypre_MPI_RequestMPI_Request(array_of_requests[i]) = array_of_mpi_requests[i]; - hypre_MPI_RequestProcessAction(1, &array_of_requests[i]); - } - - hypre_TFree(array_of_mpi_requests, HYPRE_MEMORY_HOST); - + array_of_requests, array_of_statuses); hypre_GpuProfilingPopRange(); return ierr; @@ -1687,25 +1458,12 @@ hypre_MPI_Waitany( HYPRE_Int count, hypre_MPI_Status *status ) { hypre_int mpi_index; - HYPRE_Int i, ierr; - - MPI_Request *array_of_mpi_requests = hypre_TAlloc(MPI_Request, count, HYPRE_MEMORY_HOST); - for (i = 0; i < count; i++) - { - array_of_mpi_requests[i] = hypre_MPI_RequestMPI_Request(array_of_requests[i]); - } + HYPRE_Int ierr; - ierr = (HYPRE_Int) MPI_Waitany((hypre_int)count, array_of_mpi_requests, + ierr = (HYPRE_Int) MPI_Waitany((hypre_int)count, array_of_requests, &mpi_index, status); *index = (HYPRE_Int) mpi_index; - for (i = 0; i < count; i++) - { - hypre_MPI_RequestMPI_Request(array_of_requests[i]) = array_of_mpi_requests[i]; - } - - hypre_TFree(array_of_mpi_requests, HYPRE_MEMORY_HOST); - return ierr; } @@ -1755,7 +1513,7 @@ hypre_MPI_Scan( void *sendbuf, HYPRE_Int hypre_MPI_Request_free( hypre_MPI_Request *request ) { - return (HYPRE_Int) MPI_Request_free(&hypre_MPI_RequestMPI_Request(*request)); + return (HYPRE_Int) MPI_Request_free(request); } HYPRE_Int @@ -2008,3 +1766,110 @@ hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm) } return (buffer); } + +HYPRE_Int +hypre_MPI_GRequestGetCopyAction(void *dest, + hypre_MemoryLocation dest_location, + void *src, + hypre_MemoryLocation src_location, + HYPRE_Int num_bytes, + hypre_MPI_GRequest_Action **action_ptr) +{ + if (dest == src || num_bytes == 0) + { + *action_ptr = NULL; + return hypre_error_flag; + } + + HYPRE_Int action_id = HYPRE_MPI_GREQUEST_COPY; + hypre_MPI_GRequest_Action *action = hypre_CTAlloc(hypre_MPI_GRequest_Action, 1, HYPRE_MEMORY_HOST); + + HYPRE_Int nb = 2 * (sizeof(HYPRE_Int) + sizeof(void *) + sizeof(hypre_MemoryLocation)); + HYPRE_Int data_size = hypre_MPI_GRequest_ActionDataSize(action); + + hypre_MPI_GRequest_ActionCount(action) ++; + hypre_MPI_GRequest_ActionDataSize(action) = data_size + nb; + hypre_MPI_GRequest_ActionData(action) = hypre_TReAlloc(hypre_MPI_GRequest_ActionData(action), + char, + hypre_MPI_GRequest_ActionDataSize(action), + HYPRE_MEMORY_HOST); + + char *data = hypre_MPI_GRequest_ActionData(action) + data_size; + hypre_TMemcpy(data, &action_id, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(HYPRE_Int); + hypre_TMemcpy(data, &num_bytes, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(HYPRE_Int); + hypre_TMemcpy(data, &dest, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + hypre_TMemcpy(data, &src, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + hypre_TMemcpy(data, &dest_location, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + hypre_TMemcpy(data, &src_location, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + + hypre_assert(data == hypre_MPI_GRequest_ActionData(action) + hypre_MPI_GRequest_ActionDataSize(action)); + + *action_ptr = action; + + return hypre_error_flag; +} + +HYPRE_Int +hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action) +{ + if (!action) + { + return hypre_error_flag; + } + + HYPRE_Int count = hypre_MPI_GRequest_ActionCount(action); + char *data = hypre_MPI_GRequest_ActionData(action); + HYPRE_Int k; + + for (k = 0; k < count; k ++) + { + HYPRE_Int action_id; + + hypre_TMemcpy(&action_id, data, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(HYPRE_Int); + + if (action_id == HYPRE_MPI_GREQUEST_FREE) + { + void *ptr; + hypre_MemoryLocation ptr_location; + hypre_TMemcpy(&ptr, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + hypre_TMemcpy(&ptr_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + // action! + _hypre_TFree(ptr, ptr_location); + } + else if (action_id == HYPRE_MPI_GREQUEST_COPY) + { + void *dest, *src; + HYPRE_Int num_bytes; + hypre_MemoryLocation dest_location, src_location; + hypre_TMemcpy(&num_bytes, data, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(HYPRE_Int); + hypre_TMemcpy(&dest, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + hypre_TMemcpy(&src, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + hypre_TMemcpy(&dest_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + hypre_TMemcpy(&src_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + // action! + hypre_GpuProfilingPushRange("MPI-H2D"); + _hypre_TMemcpy(dest, src, char, num_bytes, dest_location, src_location); + hypre_GpuProfilingPopRange(); + } + } + + hypre_assert(data == hypre_MPI_GRequest_ActionData(action) + hypre_MPI_GRequest_ActionDataSize(action)); + + hypre_TFree(action, HYPRE_MEMORY_HOST); + + return hypre_error_flag; +} diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index d9a8ed76d3..7b7356c6c7 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -194,10 +194,8 @@ typedef HYPRE_Int hypre_MPI_Info; #define hypre_MPI_ANY_SOURCE 1 #define hypre_MPI_ANY_TAG 1 -#define hypre_MPI_COMM_NULL_COPY_FN NULL -#define hypre_MPI_COMM_NULL_DELETE_FN NULL - -#define hypre_MPI_RequestMPI_Request(request) (request) +#define hypre_MPI_COMM_NULL_COPY_FN NULL +#define hypre_MPI_COMM_NULL_DELETE_FN NULL #else @@ -205,38 +203,15 @@ typedef HYPRE_Int hypre_MPI_Info; * MPI stubs to do casting of HYPRE_Int and hypre_int correctly *****************************************************************************/ -typedef MPI_Comm hypre_MPI_Comm; -typedef MPI_Group hypre_MPI_Group; - -#define HYPRE_MPI_REQUEST_FREE 1 -#define HYPRE_MPI_REQUEST_COPY 2 - -typedef struct -{ - HYPRE_Int count; - HYPRE_Int data_size; - char *data; -} hypre_MPI_Request_Action; - -#define hypre_MPI_Request_ActionCount(action) ((action) -> count) -#define hypre_MPI_Request_ActionDataSize(action) ((action) -> data_size) -#define hypre_MPI_Request_ActionData(action) ((action) -> data) - -typedef struct -{ - MPI_Request mpi_request; - hypre_MPI_Request_Action action[2]; -} hypre_MPI_Request; - -#define hypre_MPI_RequestMPI_Request(request) ((request).mpi_request) -#define hypre_MPI_RequestAction(request, i) ((request).action[i]) - -typedef MPI_Datatype hypre_MPI_Datatype; -typedef MPI_Status hypre_MPI_Status; -typedef MPI_Op hypre_MPI_Op; -typedef MPI_Aint hypre_MPI_Aint; -typedef MPI_Info hypre_MPI_Info; -typedef MPI_User_function hypre_MPI_User_function; +typedef MPI_Comm hypre_MPI_Comm; +typedef MPI_Group hypre_MPI_Group; +typedef MPI_Request hypre_MPI_Request; +typedef MPI_Datatype hypre_MPI_Datatype; +typedef MPI_Status hypre_MPI_Status; +typedef MPI_Op hypre_MPI_Op; +typedef MPI_Aint hypre_MPI_Aint; +typedef MPI_Info hypre_MPI_Info; +typedef MPI_User_function hypre_MPI_User_function; typedef MPI_Comm_copy_attr_function hypre_MPI_Comm_copy_attr_function; typedef MPI_Comm_delete_attr_function hypre_MPI_Comm_delete_attr_function; @@ -285,6 +260,20 @@ typedef MPI_Comm_delete_attr_function hypre_MPI_Comm_delete_attr_function; * Everything below this applies to both ifdef cases above *****************************************************************************/ +#define HYPRE_MPI_GREQUEST_FREE 1 +#define HYPRE_MPI_GREQUEST_COPY 2 + +typedef struct +{ + HYPRE_Int count; + HYPRE_Int data_size; + char *data; +} hypre_MPI_GRequest_Action; + +#define hypre_MPI_GRequest_ActionCount(action) ((action) -> count) +#define hypre_MPI_GRequest_ActionDataSize(action) ((action) -> data_size) +#define hypre_MPI_GRequest_ActionData(action) ((action) -> data) + /*-------------------------------------------------------------------------- * Prototypes *--------------------------------------------------------------------------*/ @@ -381,14 +370,10 @@ HYPRE_Int hypre_MPI_Type_free( hypre_MPI_Datatype *datatype ); HYPRE_Int hypre_MPI_Op_free( hypre_MPI_Op *op ); HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, hypre_MPI_Op *op ); -hypre_MPI_Request hypre_MPI_RequestFromMPI_Request(MPI_Request request); -HYPRE_Int hypre_MPI_RequestSetActionCopy(HYPRE_Int i, void *dest, hypre_MemoryLocation dest_location, - void *src, hypre_MemoryLocation src_location, - HYPRE_Int num_bytes, hypre_MPI_Request *request); -HYPRE_Int hypre_MPI_RequestSetActionFree(HYPRE_Int i, void *ptr, hypre_MemoryLocation ptr_location, - hypre_MPI_Request *request); -HYPRE_Int hypre_MPI_RequestProcessAction(HYPRE_Int i, hypre_MPI_Request *request); -HYPRE_Int hypre_MPI_RequestClear(hypre_MPI_Request *request); +HYPRE_Int hypre_MPI_GRequestGetCopyAction(void *dest, hypre_MemoryLocation dest_location, + void *src, hypre_MemoryLocation src_location, + HYPRE_Int num_bytes, hypre_MPI_GRequest_Action **action_ptr); +HYPRE_Int hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action); HYPRE_Int hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location); HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); From bd23e900d7cce59cf06b464f682183ab4fa723b3 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Thu, 27 Jun 2024 09:37:24 -0700 Subject: [PATCH 46/90] minor changes --- src/distributed_ls/ParaSails/Matrix.c | 31 ++++++--------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/src/distributed_ls/ParaSails/Matrix.c b/src/distributed_ls/ParaSails/Matrix.c index d2da1f60d3..cc9f3622e7 100644 --- a/src/distributed_ls/ParaSails/Matrix.c +++ b/src/distributed_ls/ParaSails/Matrix.c @@ -761,14 +761,8 @@ void MatrixMatvec(Matrix *mat, HYPRE_Real *x, HYPRE_Real *y) for (i=0; isendlen; i++) mat->sendbuf[i] = x[mat->sendind[i]]; - if (mat->num_recv) - { - hypre_MPI_Startall(mat->num_recv, mat->recv_req); - } - if (mat->num_send) - { - hypre_MPI_Startall(mat->num_send, mat->send_req); - } + hypre_MPI_Startall(mat->num_recv, mat->recv_req); + hypre_MPI_Startall(mat->num_send, mat->send_req); /* Copy local part of x into top part of recvbuf */ for (i=0; isendlen; i++) mat->sendbuf[i] = x[mat->sendind[i]]; - if (mat->num_recv) - { - hypre_MPI_Startall(mat->num_recv, mat->recv_req); - } - - if (mat->num_send) - { - hypre_MPI_Startall(mat->num_send, mat->send_req); - } + hypre_MPI_Startall(mat->num_recv, mat->recv_req); + hypre_MPI_Startall(mat->num_send, mat->send_req); /* Copy local part of x into top part of recvbuf */ for (i=0; inum_send) - { - hypre_MPI_Startall(mat->num_send, mat->recv_req2); - } + hypre_MPI_Startall(mat->num_send, mat->recv_req2); /* initialize accumulator buffer to zero */ for (i=0; irecvlen+num_local; i++) @@ -878,10 +862,7 @@ void MatrixMatvecTrans(Matrix *mat, HYPRE_Real *x, HYPRE_Real *y) } /* Now can send nonlocal parts of solution to other procs */ - if (mat->num_recv) - { - hypre_MPI_Startall(mat->num_recv, mat->send_req2); - } + hypre_MPI_Startall(mat->num_recv, mat->send_req2); /* copy local part of solution into y */ for (i=0; i Date: Thu, 27 Jun 2024 10:48:43 -0700 Subject: [PATCH 47/90] minor changes --- src/parcsr_mv/protos.h | 1 - src/utilities/mpistubs.c | 57 ++++++++++++++++++---------------------- 2 files changed, 25 insertions(+), 33 deletions(-) diff --git a/src/parcsr_mv/protos.h b/src/parcsr_mv/protos.h index 6b68715c53..3a40ca2513 100644 --- a/src/parcsr_mv/protos.h +++ b/src/parcsr_mv/protos.h @@ -273,7 +273,6 @@ HYPRE_Int hypre_ParCSRFindExtendCommPkg(MPI_Comm comm, HYPRE_BigInt global_num_c HYPRE_BigInt first_col_diag, HYPRE_Int num_cols_diag, HYPRE_BigInt *col_starts, hypre_IJAssumedPart *apart, HYPRE_Int indices_len, HYPRE_BigInt *indices, hypre_ParCSRCommPkg **extend_comm_pkg); -HYPRE_Int hypre_ParCSRCommHandleDestroyRequests( hypre_ParCSRCommHandle *comm_handle ); /* par_csr_matop.c */ HYPRE_Int hypre_ParCSRMatrixScale(hypre_ParCSRMatrix *A, HYPRE_Complex scalar); diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 1c32606018..0a078e1416 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -14,13 +14,13 @@ * a Fortran integer and hence usually the size of hypre_int. ****************************************************************************/ -MPI_Comm +hypre_MPI_Comm hypre_MPI_Comm_f2c( hypre_int comm ) { #ifdef HYPRE_HAVE_MPI_COMM_F2C - return (MPI_Comm) MPI_Comm_f2c(comm); + return (hypre_MPI_Comm) MPI_Comm_f2c(comm); #else - return (MPI_Comm) (size_t)comm; + return (hypre_MPI_Comm) (size_t)comm; #endif } @@ -852,7 +852,7 @@ hypre_MPI_Finalize( void ) } HYPRE_Int -hypre_MPI_Abort( MPI_Comm comm, +hypre_MPI_Abort( hypre_MPI_Comm comm, HYPRE_Int errorcode ) { return (HYPRE_Int) MPI_Abort(comm, (hypre_int)errorcode); @@ -871,28 +871,28 @@ hypre_MPI_Wtick( void ) } HYPRE_Int -hypre_MPI_Barrier( MPI_Comm comm ) +hypre_MPI_Barrier( hypre_MPI_Comm comm ) { return (HYPRE_Int) MPI_Barrier(comm); } HYPRE_Int -hypre_MPI_Comm_create( MPI_Comm comm, +hypre_MPI_Comm_create( hypre_MPI_Comm comm, hypre_MPI_Group group, - MPI_Comm *newcomm ) + hypre_MPI_Comm *newcomm ) { return (HYPRE_Int) MPI_Comm_create(comm, group, newcomm); } HYPRE_Int -hypre_MPI_Comm_dup( MPI_Comm comm, - MPI_Comm *newcomm ) +hypre_MPI_Comm_dup( hypre_MPI_Comm comm, + hypre_MPI_Comm *newcomm ) { return (HYPRE_Int) MPI_Comm_dup(comm, newcomm); } HYPRE_Int -hypre_MPI_Comm_size( MPI_Comm comm, +hypre_MPI_Comm_size( hypre_MPI_Comm comm, HYPRE_Int *size ) { hypre_int mpi_size; @@ -903,7 +903,7 @@ hypre_MPI_Comm_size( MPI_Comm comm, } HYPRE_Int -hypre_MPI_Comm_rank( MPI_Comm comm, +hypre_MPI_Comm_rank( hypre_MPI_Comm comm, HYPRE_Int *rank ) { hypre_int mpi_rank; @@ -914,23 +914,23 @@ hypre_MPI_Comm_rank( MPI_Comm comm, } HYPRE_Int -hypre_MPI_Comm_free( MPI_Comm *comm ) +hypre_MPI_Comm_free( hypre_MPI_Comm *comm ) { return (HYPRE_Int) MPI_Comm_free(comm); } HYPRE_Int -hypre_MPI_Comm_group( MPI_Comm comm, +hypre_MPI_Comm_group( hypre_MPI_Comm comm, hypre_MPI_Group *group ) { return (HYPRE_Int) MPI_Comm_group(comm, group); } HYPRE_Int -hypre_MPI_Comm_split( MPI_Comm comm, - HYPRE_Int color, - HYPRE_Int key, - MPI_Comm *newcomm ) +hypre_MPI_Comm_split( hypre_MPI_Comm comm, + HYPRE_Int color, + HYPRE_Int key, + MPI_Comm *newcomm ) { return (HYPRE_Int) MPI_Comm_split(comm, (hypre_int) color, (hypre_int) key, newcomm); } @@ -1302,7 +1302,8 @@ hypre_MPI_Send_init( void *buf, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Send_init(buf, (hypre_int)count, datatype, - (hypre_int)dest, (hypre_int)tag, comm, request); + (hypre_int)dest, (hypre_int)tag, + comm, request); } HYPRE_Int @@ -1335,7 +1336,8 @@ hypre_MPI_Recv_init( void *buf, hypre_MPI_Request *request ) { return (HYPRE_Int) MPI_Recv_init(buf, (hypre_int)count, datatype, - (hypre_int)dest, (hypre_int)tag, comm, request); + (hypre_int)dest, (hypre_int)tag, + comm, request); } HYPRE_Int @@ -1422,11 +1424,9 @@ hypre_MPI_Testall( HYPRE_Int count, { hypre_int mpi_flag; HYPRE_Int ierr; - ierr = (HYPRE_Int) MPI_Testall((hypre_int)count, array_of_requests, &mpi_flag, array_of_statuses); *flag = (HYPRE_Int) mpi_flag; - return ierr; } @@ -1442,13 +1442,8 @@ hypre_MPI_Waitall( HYPRE_Int count, hypre_MPI_Request *array_of_requests, hypre_MPI_Status *array_of_statuses ) { - hypre_GpuProfilingPushRange("hypre_MPI_Waitall"); - HYPRE_Int ierr; - ierr = (HYPRE_Int) MPI_Waitall((hypre_int)count, - array_of_requests, array_of_statuses); - hypre_GpuProfilingPopRange(); - - return ierr; + return (HYPRE_Int) MPI_Waitall((hypre_int)count, + array_of_requests, array_of_statuses); } HYPRE_Int @@ -1459,11 +1454,9 @@ hypre_MPI_Waitany( HYPRE_Int count, { hypre_int mpi_index; HYPRE_Int ierr; - ierr = (HYPRE_Int) MPI_Waitany((hypre_int)count, array_of_requests, &mpi_index, status); *index = (HYPRE_Int) mpi_index; - return ierr; } @@ -1609,8 +1602,8 @@ hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, hypre #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int -hypre_MPI_Comm_split_type( MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, - hypre_MPI_Info info, MPI_Comm *newcomm ) +hypre_MPI_Comm_split_type( hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, + hypre_MPI_Info info, hypre_MPI_Comm *newcomm ) { return (HYPRE_Int) MPI_Comm_split_type(comm, split_type, key, info, newcomm ); } From 67678d4058b23049173edfa5d89fe61510c54509 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Thu, 27 Jun 2024 10:58:06 -0700 Subject: [PATCH 48/90] a small change --- src/utilities/mpistubs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 0a078e1416..4013175f45 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -930,7 +930,7 @@ HYPRE_Int hypre_MPI_Comm_split( hypre_MPI_Comm comm, HYPRE_Int color, HYPRE_Int key, - MPI_Comm *newcomm ) + hypre_MPI_Comm *newcomm ) { return (HYPRE_Int) MPI_Comm_split(comm, (hypre_int) color, (hypre_int) key, newcomm); } From 7ecf8efe1dc9f61f949b9a6d26219a0eba732f3e Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Thu, 27 Jun 2024 15:05:21 -0700 Subject: [PATCH 49/90] fixed some issues; add hypre_ to function names --- src/parcsr_mv/_hypre_parcsr_mv.h | 3 +- src/parcsr_mv/par_csr_communication.c | 44 +++++++++++++-------------- src/utilities/_hypre_utilities.h | 15 +++++++++ src/utilities/mpistubs.c | 21 +++++++++++-- src/utilities/mpistubs.h | 15 +++++++++ 5 files changed, 71 insertions(+), 27 deletions(-) diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index f0745a6541..de46ccd505 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -140,7 +140,7 @@ typedef struct hypre_MemoryLocation recv_buffer_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; - MPI_Request grequest; + hypre_MPI_Request grequest; MPI_Comm comm; } hypre_ParCSRCommHandle; @@ -1020,7 +1020,6 @@ HYPRE_Int hypre_ParCSRFindExtendCommPkg(MPI_Comm comm, HYPRE_BigInt global_num_c HYPRE_BigInt first_col_diag, HYPRE_Int num_cols_diag, HYPRE_BigInt *col_starts, hypre_IJAssumedPart *apart, HYPRE_Int indices_len, HYPRE_BigInt *indices, hypre_ParCSRCommPkg **extend_comm_pkg); -HYPRE_Int hypre_ParCSRCommHandleDestroyRequests( hypre_ParCSRCommHandle *comm_handle ); /* par_csr_matop.c */ HYPRE_Int hypre_ParCSRMatrixScale(hypre_ParCSRMatrix *A, HYPRE_Complex scalar); diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index edff016450..69787be21c 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -250,15 +250,14 @@ hypre_ParCSRCommHandleCreate ( HYPRE_Int job, HYPRE_MEMORY_HOST, recv_data); } -hypre_int hypre_grequest_noop_query_fn(void *extra_state, MPI_Status *status) { return MPI_SUCCESS; } - hypre_int hypre_grequest_free_fn(void *extra_state) { - return MPI_SUCCESS; + hypre_MPI_GRequest_Action *action = (hypre_MPI_GRequest_Action *) extra_state; + hypre_MPI_GRequestProcessAction(action); + hypre_TFree(action, HYPRE_MEMORY_HOST); + return hypre_MPI_SUCCESS; } -hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete) { return MPI_SUCCESS; } - /*------------------------------------------------------------------ * hypre_ParCSRCommHandleCreate_v2 *------------------------------------------------------------------*/ @@ -278,11 +277,11 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, MPI_Comm comm_orig = hypre_ParCSRCommPkgComm(comm_pkg); MPI_Comm comm; HYPRE_Int num_requests = num_sends + num_recvs; - HYPRE_Int nelem_recvs = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[num_recvs]; HYPRE_Int data_size = hypre_ParCSRCommHandleGetDataTypeSize(job); hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); + HYPRE_Int num_send_elems = 0, num_recv_elems = 0; hypre_MPI_Comm_dup(comm_orig, &comm); @@ -295,9 +294,10 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, case HYPRE_COMM_PKG_JOB_INT: case HYPRE_COMM_PKG_JOB_BIGINT: { + num_send_elems = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); + num_recv_elems = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); hypre_ParCSRCommHandleCreateBuffer(send_memory_location, recv_memory_location, - hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), + num_send_elems, num_recv_elems, hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); hypre_MPICommSetSendBuffer(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); @@ -324,9 +324,10 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: { + num_send_elems = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); + num_recv_elems = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); hypre_ParCSRCommHandleCreateBuffer(send_memory_location, recv_memory_location, - hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), + num_send_elems, num_recv_elems, hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); hypre_MPICommSetSendBuffer(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); @@ -352,22 +353,20 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, break; } } -#if 0 + hypre_MPI_GRequest_Action *action; hypre_MPI_GRequestGetCopyAction(recv_data, hypre_GetActualMemLocation(recv_memory_location), hypre_ParCSRCommHandleRecvBuffer(comm_handle), hypre_ParCSRCommHandleRecvBufferLocation(comm_handle), - nelem_recvs * data_size, + num_recv_elems * data_size, &action); - MPI_Grequest_start(hypre_grequest_noop_query_fn, - hypre_grequest_free_fn, - hypre_grequest_noop_cancel_fn, - action, - &hypre_ParCSRCommHandleGRequest(comm_handle)); -#endif -printf("%d\n", __LINE__); + hypre_MPI_Grequest_start(hypre_grequest_noop_query_fn, + hypre_grequest_free_fn, + hypre_grequest_noop_cancel_fn, + action, + &hypre_ParCSRCommHandleGRequest(comm_handle)); /*-------------------------------------------------------------------- * set up comm_handle and return @@ -412,9 +411,8 @@ hypre_ParCSRCommHandleWait( hypre_ParCSRCommHandle *comm_handle ) } } - //MPI_Grequest_complete(hypre_ParCSRCommHandleGRequest(comm_handle)); - - //MPI_Wait(&hypre_ParCSRCommHandleGRequest(comm_handle), MPI_STATUS_IGNORE); + hypre_MPI_Grequest_complete(hypre_ParCSRCommHandleGRequest(comm_handle)); + hypre_MPI_Wait(&hypre_ParCSRCommHandleGRequest(comm_handle), MPI_STATUS_IGNORE); return hypre_error_flag; } @@ -432,7 +430,7 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) { return hypre_error_flag; } -printf("%d\n", __LINE__); + hypre_ParCSRCommHandleWait(comm_handle); _hypre_TFree(hypre_ParCSRCommHandleSendBuffer(comm_handle), hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 925f942086..8d35e414e9 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1120,6 +1120,9 @@ extern "C" { #define MPI_Comm_set_attr hypre_MPI_Comm_set_attr #define MPI_Comm_copy_attr_function hypre_MPI_Comm_copy_attr_function #define MPI_Comm_delete_attr_function hypre_MPI_Comm_delete_attr_function +#define MPI_Grequest_query_function hypre_MPI_Grequest_query_function; +#define MPI_Grequest_free_function hypre_MPI_Grequest_free_function; +#define MPI_Grequest_cancel_function hypre_MPI_Grequest_cancel_function; /*-------------------------------------------------------------------------- * Types, etc. @@ -1133,6 +1136,9 @@ typedef HYPRE_Int hypre_MPI_Datatype; typedef void (hypre_MPI_User_function) (void); typedef void (hypre_MPI_Comm_copy_attr_function) (void); typedef void (hypre_MPI_Comm_delete_attr_function) (void); +typedef void (hypre_MPI_Grequest_query_function) (void); +typedef void (hypre_MPI_Grequest_free_function) (void); +typedef void (hypre_MPI_Grequest_cancel_function) (void); typedef struct { @@ -1198,6 +1204,9 @@ typedef MPI_Info hypre_MPI_Info; typedef MPI_User_function hypre_MPI_User_function; typedef MPI_Comm_copy_attr_function hypre_MPI_Comm_copy_attr_function; typedef MPI_Comm_delete_attr_function hypre_MPI_Comm_delete_attr_function; +typedef MPI_Grequest_query_function hypre_MPI_Grequest_query_function; +typedef MPI_Grequest_free_function hypre_MPI_Grequest_free_function; +typedef MPI_Grequest_cancel_function hypre_MPI_Grequest_cancel_function; #define hypre_MPI_COMM_WORLD MPI_COMM_WORLD #define hypre_MPI_COMM_NULL MPI_COMM_NULL @@ -1379,6 +1388,9 @@ HYPRE_Int hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_c hypre_MPI_Comm_delete_attr_function *comm_delete_attr_fn, HYPRE_Int *comm_keyval, void *extra_state); HYPRE_Int hypre_MPI_Comm_set_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val); HYPRE_Int hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val, HYPRE_Int *flag); +HYPRE_Int hypre_MPI_Grequest_start(hypre_MPI_Grequest_query_function *query_fn, hypre_MPI_Grequest_free_function *free_fn, + hypre_MPI_Grequest_cancel_function *cancel_fn, void *extra_state, hypre_MPI_Request *request); +HYPRE_Int hypre_MPI_Grequest_complete( hypre_MPI_Request request ); hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPI_Comm comm); hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm); @@ -1393,6 +1405,9 @@ HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLo HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); +hypre_int hypre_grequest_noop_query_fn(void *extra_state, MPI_Status *status); +hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete); + #ifdef __cplusplus } #endif diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 4013175f45..333918a5df 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -838,6 +838,22 @@ hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr return ierr; } +HYPRE_Int +hypre_MPI_Grequest_start(hypre_MPI_Grequest_query_function *query_fn, + hypre_MPI_Grequest_free_function *free_fn, + hypre_MPI_Grequest_cancel_function *cancel_fn, + void *extra_state, + hypre_MPI_Request *request) +{ + return (HYPRE_Int) MPI_Grequest_start(query_fn, free_fn, cancel_fn, extra_state, request); +} + +HYPRE_Int +hypre_MPI_Grequest_complete( hypre_MPI_Request request ) +{ + return (HYPRE_Int) MPI_Grequest_complete(request); +} + HYPRE_Int hypre_MPI_Init( hypre_int *argc, char ***argv ) @@ -1862,7 +1878,8 @@ hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action) hypre_assert(data == hypre_MPI_GRequest_ActionData(action) + hypre_MPI_GRequest_ActionDataSize(action)); - hypre_TFree(action, HYPRE_MEMORY_HOST); - return hypre_error_flag; } + +hypre_int hypre_grequest_noop_query_fn(void *extra_state, hypre_MPI_Status *status) { return hypre_MPI_SUCCESS; } +hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete) { return hypre_MPI_SUCCESS; } diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 7b7356c6c7..0eac92a305 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -136,6 +136,9 @@ extern "C" { #define MPI_Comm_set_attr hypre_MPI_Comm_set_attr #define MPI_Comm_copy_attr_function hypre_MPI_Comm_copy_attr_function #define MPI_Comm_delete_attr_function hypre_MPI_Comm_delete_attr_function +#define MPI_Grequest_query_function hypre_MPI_Grequest_query_function; +#define MPI_Grequest_free_function hypre_MPI_Grequest_free_function; +#define MPI_Grequest_cancel_function hypre_MPI_Grequest_cancel_function; /*-------------------------------------------------------------------------- * Types, etc. @@ -149,6 +152,9 @@ typedef HYPRE_Int hypre_MPI_Datatype; typedef void (hypre_MPI_User_function) (void); typedef void (hypre_MPI_Comm_copy_attr_function) (void); typedef void (hypre_MPI_Comm_delete_attr_function) (void); +typedef void (hypre_MPI_Grequest_query_function) (void); +typedef void (hypre_MPI_Grequest_free_function) (void); +typedef void (hypre_MPI_Grequest_cancel_function) (void); typedef struct { @@ -214,6 +220,9 @@ typedef MPI_Info hypre_MPI_Info; typedef MPI_User_function hypre_MPI_User_function; typedef MPI_Comm_copy_attr_function hypre_MPI_Comm_copy_attr_function; typedef MPI_Comm_delete_attr_function hypre_MPI_Comm_delete_attr_function; +typedef MPI_Grequest_query_function hypre_MPI_Grequest_query_function; +typedef MPI_Grequest_free_function hypre_MPI_Grequest_free_function; +typedef MPI_Grequest_cancel_function hypre_MPI_Grequest_cancel_function; #define hypre_MPI_COMM_WORLD MPI_COMM_WORLD #define hypre_MPI_COMM_NULL MPI_COMM_NULL @@ -395,6 +404,9 @@ HYPRE_Int hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_c hypre_MPI_Comm_delete_attr_function *comm_delete_attr_fn, HYPRE_Int *comm_keyval, void *extra_state); HYPRE_Int hypre_MPI_Comm_set_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val); HYPRE_Int hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val, HYPRE_Int *flag); +HYPRE_Int hypre_MPI_Grequest_start(hypre_MPI_Grequest_query_function *query_fn, hypre_MPI_Grequest_free_function *free_fn, + hypre_MPI_Grequest_cancel_function *cancel_fn, void *extra_state, hypre_MPI_Request *request); +HYPRE_Int hypre_MPI_Grequest_complete( hypre_MPI_Request request ); hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPI_Comm comm); hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm); @@ -409,6 +421,9 @@ HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLo HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); +hypre_int hypre_grequest_noop_query_fn(void *extra_state, MPI_Status *status); +hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete); + #ifdef __cplusplus } #endif From a972b79a82b04da503cf66cad38e7c9fd711358c Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Sat, 29 Jun 2024 17:34:48 -0700 Subject: [PATCH 50/90] put Grequest in mpi_multi_recv --- src/parcsr_mv/_hypre_parcsr_mv.h | 30 +----- src/parcsr_mv/par_csr_communication.c | 130 ++++++++++++-------------- src/parcsr_mv/par_csr_communication.h | 30 +----- src/utilities/_hypre_utilities.h | 9 +- src/utilities/mpistubs.c | 98 ++++++++++++++++--- src/utilities/mpistubs.h | 7 +- 6 files changed, 164 insertions(+), 140 deletions(-) diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index de46ccd505..fe388238b1 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -95,32 +95,6 @@ hypre_ParCSRCommHandleGetMPIDataType(HYPRE_Int job) return dtype; } -static inline HYPRE_Int -hypre_ParCSRCommHandleGetDataTypeSize(HYPRE_Int job) -{ - HYPRE_Int nbytes = 0; - - switch (hypre_ParCSRCommHandleGetJobType(job)) - { - case HYPRE_COMM_PKG_JOB_COMPLEX: - case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: - nbytes = (HYPRE_Int) sizeof(HYPRE_Complex); - break; - case HYPRE_COMM_PKG_JOB_INT: - case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: - nbytes = (HYPRE_Int) sizeof(HYPRE_Int); - break; - case HYPRE_COMM_PKG_JOB_BIGINT: - case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: - nbytes = (HYPRE_Int) sizeof(HYPRE_BigInt); - break; - default: - break; - } - - return nbytes; -} - /*-------------------------------------------------------------------------- * hypre_ParCSRCommHandle, hypre_ParCSRPersistentCommHandle *--------------------------------------------------------------------------*/ @@ -140,7 +114,7 @@ typedef struct hypre_MemoryLocation recv_buffer_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; - hypre_MPI_Request grequest; + hypre_MPI_Request extra_request; MPI_Comm comm; } hypre_ParCSRCommHandle; @@ -160,7 +134,7 @@ typedef struct #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) -#define hypre_ParCSRCommHandleGRequest(comm_handle) (comm_handle -> grequest) +#define hypre_ParCSRCommHandleExtraRequest(comm_handle) (comm_handle -> extra_request) #define hypre_ParCSRCommHandleComm(comm_handle) (comm_handle -> comm) typedef hypre_ParCSRCommHandle hypre_ParCSRPersistentCommHandle; diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 69787be21c..6f4232af2d 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -46,20 +46,6 @@ hypre_ParCSRCommHandleCreateBuffer( HYPRE_MemoryLocation send_memory_location HYPRE_Int size_of_elem, hypre_ParCSRCommHandle *comm_handle ) { -#if defined(HYPRE_USING_PERSISTENT_COMM) - if (!hypre_ParCSRCommHandleSendData(comm_handle)) - { - hypre_ParCSRCommHandleSendData(comm_handle) = hypre_TAlloc(char, num_send_elems * size_of_elem, - send_memory_location); - } - - if (!hypre_ParCSRCommHandleRecvData(comm_handle)) - { - hypre_ParCSRCommHandleRecvData(comm_handle) = hypre_TAlloc(char, num_recv_elems * size_of_elem, - recv_memory_location); - } -#endif - if (!hypre_ParCSRCommHandleSendBuffer(comm_handle) && hypre_MPINeedHostBuffer(hypre_GetActualMemLocation(send_memory_location))) { @@ -96,11 +82,16 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, { HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); - MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg); + MPI_Comm comm_orig = hypre_ParCSRCommPkgComm(comm_pkg); + MPI_Comm comm; HYPRE_Int num_requests = num_sends + num_recvs; + HYPRE_Int data_size = hypre_ParCSRCommHandleGetDataTypeSize(job); hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); + HYPRE_Int num_send_elems = 0, num_recv_elems = 0; + + hypre_MPI_Comm_dup(comm_orig, &comm); switch (hypre_ParCSRCommHandleGetJobType(job)) { @@ -108,10 +99,23 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, case HYPRE_COMM_PKG_JOB_INT: case HYPRE_COMM_PKG_JOB_BIGINT: { + num_send_elems = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); + num_recv_elems = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); + + if (!hypre_ParCSRCommHandleSendData(comm_handle)) + { + hypre_ParCSRCommHandleSendData(comm_handle) = hypre_TAlloc(char, num_send_elems * data_size, + send_memory_location); + } + + if (!hypre_ParCSRCommHandleRecvData(comm_handle)) + { + hypre_ParCSRCommHandleRecvData(comm_handle) = hypre_TAlloc(char, num_recv_elems * data_size, + recv_memory_location); + } + hypre_ParCSRCommHandleCreateBuffer(send_memory_location, recv_memory_location, - hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); + num_send_elems, num_recv_elems, data_size, comm_handle); hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), num_recvs, @@ -134,10 +138,23 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: { + num_send_elems = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); + num_recv_elems = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); + + if (!hypre_ParCSRCommHandleSendData(comm_handle)) + { + hypre_ParCSRCommHandleSendData(comm_handle) = hypre_TAlloc(char, num_send_elems * data_size, + send_memory_location); + } + + if (!hypre_ParCSRCommHandleRecvData(comm_handle)) + { + hypre_ParCSRCommHandleRecvData(comm_handle) = hypre_TAlloc(char, num_recv_elems * data_size, + recv_memory_location); + } + hypre_ParCSRCommHandleCreateBuffer(send_memory_location, recv_memory_location, - hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); + num_send_elems, num_recv_elems, data_size, comm_handle); hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), num_sends, @@ -154,10 +171,10 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, 0, comm, requests + num_sends); break; + } - default: + default: break; - } } hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; @@ -250,14 +267,6 @@ hypre_ParCSRCommHandleCreate ( HYPRE_Int job, HYPRE_MEMORY_HOST, recv_data); } -hypre_int hypre_grequest_free_fn(void *extra_state) -{ - hypre_MPI_GRequest_Action *action = (hypre_MPI_GRequest_Action *) extra_state; - hypre_MPI_GRequestProcessAction(action); - hypre_TFree(action, HYPRE_MEMORY_HOST); - return hypre_MPI_SUCCESS; -} - /*------------------------------------------------------------------ * hypre_ParCSRCommHandleCreate_v2 *------------------------------------------------------------------*/ @@ -272,18 +281,20 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, { hypre_GpuProfilingPushRange("hypre_ParCSRCommHandleCreate_v2"); - HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); - MPI_Comm comm_orig = hypre_ParCSRCommPkgComm(comm_pkg); + HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); + HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); + MPI_Comm comm_orig = hypre_ParCSRCommPkgComm(comm_pkg); MPI_Comm comm; - HYPRE_Int num_requests = num_sends + num_recvs; - HYPRE_Int data_size = hypre_ParCSRCommHandleGetDataTypeSize(job); - hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); - hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); - hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); + HYPRE_Int num_requests = num_sends + num_recvs; + hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); + hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); + hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); + hypre_MPI_Request *extra_request = &hypre_ParCSRCommHandleExtraRequest(comm_handle); HYPRE_Int num_send_elems = 0, num_recv_elems = 0; + HYPRE_Int data_size; hypre_MPI_Comm_dup(comm_orig, &comm); + hypre_MPI_Type_size(mpi_dtype, &data_size); hypre_MPICommSetSendLocation(comm, hypre_GetActualMemLocation(send_memory_location)); hypre_MPICommSetRecvLocation(comm, hypre_GetActualMemLocation(recv_memory_location)); @@ -297,20 +308,18 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, num_send_elems = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); num_recv_elems = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); hypre_ParCSRCommHandleCreateBuffer(send_memory_location, recv_memory_location, - num_send_elems, num_recv_elems, - hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); + num_send_elems, num_recv_elems, data_size, comm_handle); - hypre_MPICommSetSendBuffer(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); hypre_MPICommSetRecvBuffer(comm, hypre_ParCSRCommHandleRecvBuffer(comm_handle)); - hypre_MPICommSetSendBufferLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); hypre_MPICommSetRecvBufferLocation(comm, hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); - hypre_MPI_Irecv_Multiple(recv_data, num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, comm, requests); + 0, comm, requests, extra_request); + hypre_MPICommSetSendBuffer(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); + hypre_MPICommSetSendBufferLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); hypre_MPI_Isend_Multiple(send_data, num_sends, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, @@ -327,20 +336,18 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, num_send_elems = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); num_recv_elems = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); hypre_ParCSRCommHandleCreateBuffer(send_memory_location, recv_memory_location, - num_send_elems, num_recv_elems, - hypre_ParCSRCommHandleGetDataTypeSize(job), comm_handle); + num_send_elems, num_recv_elems, data_size, comm_handle); - hypre_MPICommSetSendBuffer(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); hypre_MPICommSetRecvBuffer(comm, hypre_ParCSRCommHandleRecvBuffer(comm_handle)); - hypre_MPICommSetSendBufferLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); hypre_MPICommSetRecvBufferLocation(comm, hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); - hypre_MPI_Irecv_Multiple(recv_data, num_sends, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, comm, requests); + 0, comm, requests, extra_request); + hypre_MPICommSetSendBuffer(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); + hypre_MPICommSetSendBufferLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); hypre_MPI_Isend_Multiple(send_data, num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, @@ -348,26 +355,12 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, 0, comm, requests + num_sends); break; + } - default: + default: break; - } } - hypre_MPI_GRequest_Action *action; - hypre_MPI_GRequestGetCopyAction(recv_data, - hypre_GetActualMemLocation(recv_memory_location), - hypre_ParCSRCommHandleRecvBuffer(comm_handle), - hypre_ParCSRCommHandleRecvBufferLocation(comm_handle), - num_recv_elems * data_size, - &action); - - hypre_MPI_Grequest_start(hypre_grequest_noop_query_fn, - hypre_grequest_free_fn, - hypre_grequest_noop_cancel_fn, - action, - &hypre_ParCSRCommHandleGRequest(comm_handle)); - /*-------------------------------------------------------------------- * set up comm_handle and return *--------------------------------------------------------------------*/ @@ -411,8 +404,7 @@ hypre_ParCSRCommHandleWait( hypre_ParCSRCommHandle *comm_handle ) } } - hypre_MPI_Grequest_complete(hypre_ParCSRCommHandleGRequest(comm_handle)); - hypre_MPI_Wait(&hypre_ParCSRCommHandleGRequest(comm_handle), MPI_STATUS_IGNORE); + hypre_MPI_Wait(&hypre_ParCSRCommHandleExtraRequest(comm_handle), MPI_STATUS_IGNORE); return hypre_error_flag; } diff --git a/src/parcsr_mv/par_csr_communication.h b/src/parcsr_mv/par_csr_communication.h index 1328ecdbf8..13e42d38dd 100644 --- a/src/parcsr_mv/par_csr_communication.h +++ b/src/parcsr_mv/par_csr_communication.h @@ -79,32 +79,6 @@ hypre_ParCSRCommHandleGetMPIDataType(HYPRE_Int job) return dtype; } -static inline HYPRE_Int -hypre_ParCSRCommHandleGetDataTypeSize(HYPRE_Int job) -{ - HYPRE_Int nbytes = 0; - - switch (hypre_ParCSRCommHandleGetJobType(job)) - { - case HYPRE_COMM_PKG_JOB_COMPLEX: - case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: - nbytes = (HYPRE_Int) sizeof(HYPRE_Complex); - break; - case HYPRE_COMM_PKG_JOB_INT: - case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: - nbytes = (HYPRE_Int) sizeof(HYPRE_Int); - break; - case HYPRE_COMM_PKG_JOB_BIGINT: - case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: - nbytes = (HYPRE_Int) sizeof(HYPRE_BigInt); - break; - default: - break; - } - - return nbytes; -} - /*-------------------------------------------------------------------------- * hypre_ParCSRCommHandle, hypre_ParCSRPersistentCommHandle *--------------------------------------------------------------------------*/ @@ -124,7 +98,7 @@ typedef struct hypre_MemoryLocation recv_buffer_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; - hypre_MPI_Request grequest; + hypre_MPI_Request extra_request; MPI_Comm comm; } hypre_ParCSRCommHandle; @@ -144,7 +118,7 @@ typedef struct #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) -#define hypre_ParCSRCommHandleGRequest(comm_handle) (comm_handle -> grequest) +#define hypre_ParCSRCommHandleExtraRequest(comm_handle) (comm_handle -> extra_request) #define hypre_ParCSRCommHandleComm(comm_handle) (comm_handle -> comm) typedef hypre_ParCSRCommHandle hypre_ParCSRPersistentCommHandle; diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 8d35e414e9..4ba6b75431 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1371,7 +1371,8 @@ HYPRE_Int hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location); HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests, +hypre_MPI_Request *extra_request ); HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, @@ -1391,6 +1392,7 @@ HYPRE_Int hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, vo HYPRE_Int hypre_MPI_Grequest_start(hypre_MPI_Grequest_query_function *query_fn, hypre_MPI_Grequest_free_function *free_fn, hypre_MPI_Grequest_cancel_function *cancel_fn, void *extra_state, hypre_MPI_Request *request); HYPRE_Int hypre_MPI_Grequest_complete( hypre_MPI_Request request ); +HYPRE_Int hypre_MPI_Type_size(hypre_MPI_Datatype datatype, HYPRE_Int *size); hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPI_Comm comm); hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm); @@ -1398,14 +1400,17 @@ hypre_MemoryLocation hypre_MPICommGetSendBufferLocation(hypre_MPI_Comm comm); hypre_MemoryLocation hypre_MPICommGetRecvBufferLocation(hypre_MPI_Comm comm); void* hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm); void* hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm); +hypre_MPI_Request * hypre_MPICommGetGRequest(hypre_MPI_Comm comm); HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); +HYPRE_Int hypre_MPICommSetGRequest(hypre_MPI_Comm comm, hypre_MPI_Request *request); -hypre_int hypre_grequest_noop_query_fn(void *extra_state, MPI_Status *status); +hypre_int hypre_grequest_free_fn(void *extra_state); +hypre_int hypre_grequest_noop_query_fn(void *extra_state, hypre_MPI_Status *status); hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete); #ifdef __cplusplus diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 333918a5df..561d6f89d5 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -854,6 +854,16 @@ hypre_MPI_Grequest_complete( hypre_MPI_Request request ) return (HYPRE_Int) MPI_Grequest_complete(request); } +HYPRE_Int +hypre_MPI_Type_size(hypre_MPI_Datatype datatype, HYPRE_Int *size) +{ + hypre_int mpi_size; + HYPRE_Int ierr; + ierr = MPI_Type_size(datatype, &mpi_size); + *size = (HYPRE_Int) mpi_size; + return ierr; +} + HYPRE_Int hypre_MPI_Init( hypre_int *argc, char ***argv ) @@ -1279,11 +1289,35 @@ hypre_MPI_Isend_Multiple( void *buf, hypre_MPI_Comm comm, hypre_MPI_Request *requests ) { - hypre_MemoryLocation memory_location = hypre_MPICommGetSendLocation(comm); + if (!num) + { + return hypre_error_flag; + } - TYPE_MACRO(MPI_Isend, TYPE_MACRO_SEND, HYPRE_Complex, HYPRE_MPI_COMPLEX); - TYPE_MACRO(MPI_Isend, TYPE_MACRO_SEND, HYPRE_Int, HYPRE_MPI_INT); - TYPE_MACRO(MPI_Isend, TYPE_MACRO_SEND, HYPRE_BigInt, HYPRE_MPI_BIG_INT); + HYPRE_Int data_size; + hypre_MPI_Type_size(datatype, &data_size); + + void *cbuf = hypre_MPICommGetSendBuffer(comm); + void *sbuf = cbuf ? cbuf : buf; + if (sbuf != buf) + { + hypre_GpuProfilingPushRange("MPI-D2H"); + _hypre_TMemcpy(sbuf, + buf, + char, + displs[num] * data_size, + hypre_MPICommGetSendBufferLocation(comm), + hypre_MPICommGetSendLocation(comm)); + hypre_GpuProfilingPopRange(); + } + + HYPRE_Int i; + for (i = 0; i < num; i++) + { + HYPRE_Int start = displs[i]; + HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; + hypre_MPI_Isend((char *) sbuf + start * data_size, len, datatype, procs[i], tag, comm, &requests[i]); + } return hypre_error_flag; } @@ -1297,13 +1331,44 @@ hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, - hypre_MPI_Request *requests ) + hypre_MPI_Request *requests, + hypre_MPI_Request *extra_request) { - hypre_MemoryLocation memory_location = hypre_MPICommGetRecvLocation(comm); + *extra_request = hypre_MPI_REQUEST_NULL; - TYPE_MACRO(MPI_Irecv, TYPE_MACRO_RECV, HYPRE_Complex, HYPRE_MPI_COMPLEX); - TYPE_MACRO(MPI_Irecv, TYPE_MACRO_RECV, HYPRE_Int, HYPRE_MPI_INT); - TYPE_MACRO(MPI_Irecv, TYPE_MACRO_RECV, HYPRE_BigInt, HYPRE_MPI_BIG_INT); + if (!num) + { + return hypre_error_flag; + } + + HYPRE_Int data_size, i; + + hypre_MPI_Type_size(datatype, &data_size); + void *cbuf = hypre_MPICommGetRecvBuffer(comm); + void *rbuf = cbuf ? cbuf : buf; + + for (i = 0; i < num; i++) + { + HYPRE_Int start = displs[i]; + HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; + hypre_MPI_Irecv((char *) rbuf + start * data_size, len, datatype, procs[i], tag, comm, &requests[i]); + } + + if (rbuf != buf) + { + hypre_MPI_GRequest_Action *action; + hypre_MemoryLocation recv_memory_location = hypre_MPICommGetRecvLocation(comm); + hypre_MemoryLocation recv_buffer_location = hypre_MPICommGetRecvBufferLocation(comm); + HYPRE_Int num_recv_elems = displs[num]; + hypre_MPI_GRequestGetCopyAction(buf, recv_memory_location, rbuf, recv_buffer_location, + num_recv_elems * data_size, &action); + + hypre_MPI_Grequest_start(hypre_grequest_noop_query_fn, + hypre_grequest_free_fn, + hypre_grequest_noop_cancel_fn, + action, extra_request); + hypre_MPI_Grequest_complete(*extra_request); + } return hypre_error_flag; } @@ -1881,5 +1946,16 @@ hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action) return hypre_error_flag; } -hypre_int hypre_grequest_noop_query_fn(void *extra_state, hypre_MPI_Status *status) { return hypre_MPI_SUCCESS; } -hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete) { return hypre_MPI_SUCCESS; } +hypre_int +hypre_grequest_free_fn(void *extra_state) +{ + hypre_MPI_GRequest_Action *action = (hypre_MPI_GRequest_Action *) extra_state; + hypre_MPI_GRequestProcessAction(action); + hypre_TFree(action, HYPRE_MEMORY_HOST); + return hypre_MPI_SUCCESS; +} + +hypre_int +hypre_grequest_noop_query_fn(void *extra_state, hypre_MPI_Status *status) { return hypre_MPI_SUCCESS; } +hypre_int +hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete) { return hypre_MPI_SUCCESS; } diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 0eac92a305..4a3b2e50f8 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -387,7 +387,8 @@ HYPRE_Int hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location); HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests, +hypre_MPI_Request *extra_request ); HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, @@ -407,6 +408,7 @@ HYPRE_Int hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, vo HYPRE_Int hypre_MPI_Grequest_start(hypre_MPI_Grequest_query_function *query_fn, hypre_MPI_Grequest_free_function *free_fn, hypre_MPI_Grequest_cancel_function *cancel_fn, void *extra_state, hypre_MPI_Request *request); HYPRE_Int hypre_MPI_Grequest_complete( hypre_MPI_Request request ); +HYPRE_Int hypre_MPI_Type_size(hypre_MPI_Datatype datatype, HYPRE_Int *size); hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPI_Comm comm); hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm); @@ -421,7 +423,8 @@ HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLo HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); -hypre_int hypre_grequest_noop_query_fn(void *extra_state, MPI_Status *status); +hypre_int hypre_grequest_free_fn(void *extra_state); +hypre_int hypre_grequest_noop_query_fn(void *extra_state, hypre_MPI_Status *status); hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete); #ifdef __cplusplus From e280057cddec526bf93583f748be0e82a9a5a988 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Sun, 30 Jun 2024 22:30:35 -0700 Subject: [PATCH 51/90] reworked persistent mpi --- src/parcsr_mv/_hypre_parcsr_mv.h | 9 +- src/parcsr_mv/par_csr_communication.c | 98 +++++++++++----- src/parcsr_mv/par_csr_communication.h | 9 +- src/parcsr_mv/par_csr_matop.c | 22 ++-- src/utilities/_hypre_utilities.h | 11 +- src/utilities/mpistubs.c | 154 +++++++++++++++----------- src/utilities/mpistubs.h | 9 +- 7 files changed, 197 insertions(+), 115 deletions(-) diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index fe388238b1..dd7b737891 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -103,6 +103,7 @@ struct _hypre_ParCSRCommPkg; typedef struct { struct _hypre_ParCSRCommPkg *comm_pkg; + HYPRE_Int persistent; void *send_data; void *recv_data; /* send/recv buffers to copy to/from */ @@ -114,7 +115,8 @@ typedef struct hypre_MemoryLocation recv_buffer_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; - hypre_MPI_Request extra_request; + HYPRE_Int num_extra_requests; + hypre_MPI_Request *extra_requests; MPI_Comm comm; } hypre_ParCSRCommHandle; @@ -123,6 +125,7 @@ typedef struct *--------------------------------------------------------------------------*/ #define hypre_ParCSRCommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) +#define hypre_ParCSRCommHandlePersistent(comm_handle) (comm_handle -> persistent) #define hypre_ParCSRCommHandleSendData(comm_handle) (comm_handle -> send_data) #define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) #define hypre_ParCSRCommHandleSendBuffer(comm_handle) (comm_handle -> send_buffer) @@ -134,7 +137,9 @@ typedef struct #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) -#define hypre_ParCSRCommHandleExtraRequest(comm_handle) (comm_handle -> extra_request) +#define hypre_ParCSRCommHandleNumExtraRequests(comm_handle) (comm_handle -> num_extra_requests) +#define hypre_ParCSRCommHandleExtraRequests(comm_handle) (comm_handle -> extra_requests) +#define hypre_ParCSRCommHandleExtraRequest(comm_handle, i) (comm_handle -> extra_requests[i]) #define hypre_ParCSRCommHandleComm(comm_handle) (comm_handle -> comm) typedef hypre_ParCSRCommHandle hypre_ParCSRPersistentCommHandle; diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 6f4232af2d..57d9e4e018 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -80,18 +80,25 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, HYPRE_MemoryLocation send_memory_location, HYPRE_MemoryLocation recv_memory_location ) { - HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); - MPI_Comm comm_orig = hypre_ParCSRCommPkgComm(comm_pkg); + hypre_GpuProfilingPushRange("hypre_ParCSRPersistentCommHandleCreate"); + + HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); + HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); + MPI_Comm comm_orig = hypre_ParCSRCommPkgComm(comm_pkg); MPI_Comm comm; - HYPRE_Int num_requests = num_sends + num_recvs; - HYPRE_Int data_size = hypre_ParCSRCommHandleGetDataTypeSize(job); - hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); - hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); - hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); + HYPRE_Int num_requests = num_sends + num_recvs; + hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); + hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); + hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); + hypre_MPI_Request *extra_requests = hypre_CTAlloc(hypre_MPI_Request, 2, HYPRE_MEMORY_HOST); HYPRE_Int num_send_elems = 0, num_recv_elems = 0; + HYPRE_Int data_size; hypre_MPI_Comm_dup(comm_orig, &comm); + hypre_MPI_Type_size(mpi_dtype, &data_size); + + hypre_MPICommSetSendLocation(comm, hypre_GetActualMemLocation(send_memory_location)); + hypre_MPICommSetRecvLocation(comm, hypre_GetActualMemLocation(recv_memory_location)); switch (hypre_ParCSRCommHandleGetJobType(job)) { @@ -117,19 +124,23 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommHandleCreateBuffer(send_memory_location, recv_memory_location, num_send_elems, num_recv_elems, data_size, comm_handle); + hypre_MPICommSetRecvBuffer(comm, hypre_ParCSRCommHandleRecvBuffer(comm_handle)); + hypre_MPICommSetRecvBufferLocation(comm, hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, comm, requests); + 0, comm, requests, &extra_requests[0]); + hypre_MPICommSetSendBuffer(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); + hypre_MPICommSetSendBufferLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), num_sends, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, comm, requests + num_recvs); + 0, comm, requests + num_recvs, &extra_requests[1]); break; } @@ -161,14 +172,14 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, comm, requests); + 0, comm, requests, &extra_requests[0]); hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, comm, requests + num_sends); + 0, comm, requests + num_sends, &extra_requests[1]); break; } @@ -177,11 +188,15 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, break; } - hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; - hypre_ParCSRCommHandleSendLocation(comm_handle) = send_memory_location; - hypre_ParCSRCommHandleRecvLocation(comm_handle) = recv_memory_location; - hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; - hypre_ParCSRCommHandleRequests(comm_handle) = requests; + hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; + hypre_ParCSRCommHandlePersistent(comm_handle) = 1; + hypre_ParCSRCommHandleSendLocation(comm_handle) = send_memory_location; + hypre_ParCSRCommHandleRecvLocation(comm_handle) = recv_memory_location; + hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; + hypre_ParCSRCommHandleRequests(comm_handle) = requests; + hypre_ParCSRCommHandleNumExtraRequests(comm_handle) = 2; + hypre_ParCSRCommHandleExtraRequests(comm_handle) = extra_requests; + hypre_ParCSRCommHandleComm(comm_handle) = comm; return ( comm_handle ); } @@ -220,11 +235,22 @@ hypre_ParCSRPersistentCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) return hypre_error_flag; } + HYPRE_Int i; + hypre_TFree(hypre_ParCSRCommHandleSendData(comm_handle), hypre_ParCSRCommHandleSendLocation(comm_handle)); hypre_TFree(hypre_ParCSRCommHandleRecvData(comm_handle), hypre_ParCSRCommHandleRecvLocation(comm_handle)); _hypre_TFree(hypre_ParCSRCommHandleSendBuffer(comm_handle), hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); _hypre_TFree(hypre_ParCSRCommHandleRecvBuffer(comm_handle), hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); + for (i = 0; i < hypre_ParCSRCommHandleNumExtraRequests(comm_handle); i++) + { + if (hypre_ParCSRCommHandleExtraRequest(comm_handle, i) != hypre_MPI_REQUEST_NULL) + { + hypre_MPI_Request_free(&hypre_ParCSRCommHandleExtraRequest(comm_handle, i)); + } + } + hypre_TFree(hypre_ParCSRCommHandleExtraRequests(comm_handle), HYPRE_MEMORY_HOST); + hypre_MPI_Comm_free(&hypre_ParCSRCommHandleComm(comm_handle)); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); return hypre_error_flag; @@ -237,6 +263,9 @@ hypre_ParCSRPersistentCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) HYPRE_Int hypre_ParCSRPersistentCommHandleStart( hypre_ParCSRCommHandle *comm_handle ) { + HYPRE_Int flag; + hypre_MPI_Request_get_status(hypre_ParCSRCommHandleExtraRequest(comm_handle, 1), &flag, MPI_STATUS_IGNORE); + if (hypre_ParCSRCommHandleNumRequests(comm_handle) > 0) { HYPRE_Int ret = hypre_MPI_Startall(hypre_ParCSRCommHandleNumRequests(comm_handle), @@ -289,7 +318,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); - hypre_MPI_Request *extra_request = &hypre_ParCSRCommHandleExtraRequest(comm_handle); + hypre_MPI_Request *extra_requests = hypre_CTAlloc(hypre_MPI_Request, 1, HYPRE_MEMORY_HOST); HYPRE_Int num_send_elems = 0, num_recv_elems = 0; HYPRE_Int data_size; @@ -316,7 +345,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, comm, requests, extra_request); + 0, comm, requests, extra_requests); hypre_MPICommSetSendBuffer(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); hypre_MPICommSetSendBufferLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); @@ -344,7 +373,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, comm, requests, extra_request); + 0, comm, requests, extra_requests); hypre_MPICommSetSendBuffer(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); hypre_MPICommSetSendBufferLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); @@ -365,14 +394,16 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, * set up comm_handle and return *--------------------------------------------------------------------*/ - hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; - hypre_ParCSRCommHandleSendData(comm_handle) = send_data; - hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data; - hypre_ParCSRCommHandleSendLocation(comm_handle) = send_memory_location; - hypre_ParCSRCommHandleRecvLocation(comm_handle) = recv_memory_location; - hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; - hypre_ParCSRCommHandleRequests(comm_handle) = requests; - hypre_ParCSRCommHandleComm(comm_handle) = comm; + hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; + hypre_ParCSRCommHandleSendData(comm_handle) = send_data; + hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data; + hypre_ParCSRCommHandleSendLocation(comm_handle) = send_memory_location; + hypre_ParCSRCommHandleRecvLocation(comm_handle) = recv_memory_location; + hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; + hypre_ParCSRCommHandleRequests(comm_handle) = requests; + hypre_ParCSRCommHandleNumExtraRequests(comm_handle) = 1; + hypre_ParCSRCommHandleExtraRequests(comm_handle) = extra_requests; + hypre_ParCSRCommHandleComm(comm_handle) = comm; hypre_GpuProfilingPopRange(); @@ -404,7 +435,15 @@ hypre_ParCSRCommHandleWait( hypre_ParCSRCommHandle *comm_handle ) } } - hypre_MPI_Wait(&hypre_ParCSRCommHandleExtraRequest(comm_handle), MPI_STATUS_IGNORE); + if (hypre_ParCSRCommHandlePersistent(comm_handle)) + { + HYPRE_Int flag; + hypre_MPI_Request_get_status(hypre_ParCSRCommHandleExtraRequest(comm_handle, 0), &flag, MPI_STATUS_IGNORE); + } + else + { + hypre_MPI_Wait(&hypre_ParCSRCommHandleExtraRequest(comm_handle, 0), MPI_STATUS_IGNORE); + } return hypre_error_flag; } @@ -428,6 +467,7 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) _hypre_TFree(hypre_ParCSRCommHandleSendBuffer(comm_handle), hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); _hypre_TFree(hypre_ParCSRCommHandleRecvBuffer(comm_handle), hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); + hypre_TFree(hypre_ParCSRCommHandleExtraRequests(comm_handle), HYPRE_MEMORY_HOST); hypre_MPI_Comm_free(&hypre_ParCSRCommHandleComm(comm_handle)); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); diff --git a/src/parcsr_mv/par_csr_communication.h b/src/parcsr_mv/par_csr_communication.h index 13e42d38dd..ca4eaacd7d 100644 --- a/src/parcsr_mv/par_csr_communication.h +++ b/src/parcsr_mv/par_csr_communication.h @@ -87,6 +87,7 @@ struct _hypre_ParCSRCommPkg; typedef struct { struct _hypre_ParCSRCommPkg *comm_pkg; + HYPRE_Int persistent; void *send_data; void *recv_data; /* send/recv buffers to copy to/from */ @@ -98,7 +99,8 @@ typedef struct hypre_MemoryLocation recv_buffer_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; - hypre_MPI_Request extra_request; + HYPRE_Int num_extra_requests; + hypre_MPI_Request *extra_requests; MPI_Comm comm; } hypre_ParCSRCommHandle; @@ -107,6 +109,7 @@ typedef struct *--------------------------------------------------------------------------*/ #define hypre_ParCSRCommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) +#define hypre_ParCSRCommHandlePersistent(comm_handle) (comm_handle -> persistent) #define hypre_ParCSRCommHandleSendData(comm_handle) (comm_handle -> send_data) #define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) #define hypre_ParCSRCommHandleSendBuffer(comm_handle) (comm_handle -> send_buffer) @@ -118,7 +121,9 @@ typedef struct #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) -#define hypre_ParCSRCommHandleExtraRequest(comm_handle) (comm_handle -> extra_request) +#define hypre_ParCSRCommHandleNumExtraRequests(comm_handle) (comm_handle -> num_extra_requests) +#define hypre_ParCSRCommHandleExtraRequests(comm_handle) (comm_handle -> extra_requests) +#define hypre_ParCSRCommHandleExtraRequest(comm_handle, i) (comm_handle -> extra_requests[i]) #define hypre_ParCSRCommHandleComm(comm_handle) (comm_handle -> comm) typedef hypre_ParCSRCommHandle hypre_ParCSRPersistentCommHandle; diff --git a/src/parcsr_mv/par_csr_matop.c b/src/parcsr_mv/par_csr_matop.c index 7783b85c34..2049ca994c 100644 --- a/src/parcsr_mv/par_csr_matop.c +++ b/src/parcsr_mv/par_csr_matop.c @@ -6707,9 +6707,10 @@ hypre_ParCSRMatrixBlockColSumHost( hypre_ParCSRMatrix *A, /* Allocate the recv and send buffers */ #if defined(HYPRE_USING_PERSISTENT_COMM) - comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_COMM_PKG_JOB_COMPLEX, comm_pkg); - recv_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvDataBuffer(comm_handle); - send_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendDataBuffer(comm_handle); + comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_COMM_PKG_JOB_COMPLEX, comm_pkg, + memory_location, memory_location); + recv_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvData(comm_handle); + send_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendData(comm_handle); send_data = hypre_Memset((void *) send_data, 0, (size_t) (num_cols_offd_A) * sizeof(HYPRE_Complex), memory_location); @@ -6730,7 +6731,7 @@ hypre_ParCSRMatrixBlockColSumHost( hypre_ParCSRMatrix *A, /* Non-blocking communication starts */ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleStart(comm_handle, memory_location, send_data); + hypre_ParCSRPersistentCommHandleStart(comm_handle); #else comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, @@ -6754,7 +6755,7 @@ hypre_ParCSRMatrixBlockColSumHost( hypre_ParCSRMatrix *A, /* Non-blocking communication ends */ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleWait(comm_handle, memory_location, recv_data); + hypre_ParCSRCommHandleWait(comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif @@ -6916,9 +6917,10 @@ hypre_ParCSRMatrixColSumHost( hypre_ParCSRMatrix *A, /* Allocate the recv and send buffers */ #if defined(HYPRE_USING_PERSISTENT_COMM) - comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_COMM_PKG_JOB_COMPLEX, comm_pkg); - recv_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvDataBuffer(comm_handle); - send_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendDataBuffer(comm_handle); + comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_COMM_PKG_JOB_COMPLEX, comm_pkg, + memory_location, memory_location); + recv_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvData(comm_handle); + send_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendData(comm_handle); send_data = hypre_Memset((void *) send_data, 0, (size_t) (num_cols_offd_A) * sizeof(HYPRE_Complex), memory_location); @@ -6939,7 +6941,7 @@ hypre_ParCSRMatrixColSumHost( hypre_ParCSRMatrix *A, /* Non-blocking communication starts */ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleStart(comm_handle, memory_location, send_data); + hypre_ParCSRPersistentCommHandleStart(comm_handle); #else comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, @@ -6959,7 +6961,7 @@ hypre_ParCSRMatrixColSumHost( hypre_ParCSRMatrix *A, /* Non-blocking communication ends */ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleWait(comm_handle, memory_location, recv_data); + hypre_ParCSRCommHandleWait(comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 4ba6b75431..f3fab93b61 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1349,6 +1349,7 @@ HYPRE_Int hypre_MPI_Reduce( void *sendbuf, void *recvbuf, HYPRE_Int count, HYPRE_Int hypre_MPI_Scan( void *sendbuf, void *recvbuf, HYPRE_Int count, hypre_MPI_Datatype datatype, hypre_MPI_Op op, hypre_MPI_Comm comm ); HYPRE_Int hypre_MPI_Request_free( hypre_MPI_Request *request ); +HYPRE_Int hypre_MPI_Request_get_status(hypre_MPI_Request request, HYPRE_Int *flag, hypre_MPI_Status *status); HYPRE_Int hypre_MPI_Type_contiguous( HYPRE_Int count, hypre_MPI_Datatype oldtype, hypre_MPI_Datatype *newtype ); HYPRE_Int hypre_MPI_Type_vector( HYPRE_Int count, HYPRE_Int blocklength, HYPRE_Int stride, @@ -1374,9 +1375,11 @@ HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests, hypre_MPI_Request *extra_request ); HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests, +hypre_MPI_Request *extra_request ); HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests, +hypre_MPI_Request *extra_request ); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, @@ -1400,17 +1403,15 @@ hypre_MemoryLocation hypre_MPICommGetSendBufferLocation(hypre_MPI_Comm comm); hypre_MemoryLocation hypre_MPICommGetRecvBufferLocation(hypre_MPI_Comm comm); void* hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm); void* hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm); -hypre_MPI_Request * hypre_MPICommGetGRequest(hypre_MPI_Comm comm); HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); -HYPRE_Int hypre_MPICommSetGRequest(hypre_MPI_Comm comm, hypre_MPI_Request *request); hypre_int hypre_grequest_free_fn(void *extra_state); -hypre_int hypre_grequest_noop_query_fn(void *extra_state, hypre_MPI_Status *status); +hypre_int hypre_grequest_query_fn(void *extra_state, hypre_MPI_Status *status); hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete); #ifdef __cplusplus diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 561d6f89d5..7ea9aae56e 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -809,6 +809,15 @@ hypre_MPI_Irecv_Multiple( void *buf, #else +HYPRE_Int +hypre_MPI_Request_get_status(hypre_MPI_Request request, HYPRE_Int *flag, hypre_MPI_Status *status) +{ + hypre_int mpi_flag; + HYPRE_Int ierr = MPI_Request_get_status(request, &mpi_flag, status); + *flag = (HYPRE_Int) mpi_flag; + return ierr; +} + HYPRE_Int hypre_MPI_Comm_set_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val) { @@ -1236,48 +1245,6 @@ hypre_MPI_Irecv( void *buf, (hypre_int)source, (hypre_int)tag, comm, request); } -#define TYPE_MACRO_SEND 0 -#define TYPE_MACRO_RECV 1 -#define TYPE_MACRO_SEND_INIT 2 -#define TYPE_MACRO_RECV_INIT 3 - -#define TYPE_MACRO(MPI_CMD, SEND_RECV, HYPRE_DTYPE, HYPRE_MPI_DTYPE) \ -{ \ - if (datatype == HYPRE_MPI_DTYPE) \ - { \ - if (!num) \ - { \ - return hypre_error_flag; \ - } \ - HYPRE_Int i, ntot = displs[num]; \ - void *cbuf = NULL; \ - if (SEND_RECV == TYPE_MACRO_SEND || SEND_RECV == TYPE_MACRO_SEND_INIT) \ - { \ - cbuf = hypre_MPICommGetSendBuffer(comm); \ - } \ - else if (SEND_RECV == TYPE_MACRO_RECV || SEND_RECV == TYPE_MACRO_RECV_INIT) \ - { \ - cbuf = hypre_MPICommGetRecvBuffer(comm); \ - } \ - HYPRE_DTYPE *_buf = (HYPRE_DTYPE *) (cbuf ? cbuf : buf); \ - if (SEND_RECV == TYPE_MACRO_SEND && _buf != buf) \ - { \ - hypre_GpuProfilingPushRange("MPI-D2H"); \ - _hypre_TMemcpy(_buf, buf, HYPRE_DTYPE, ntot, \ - hypre_MPICommGetSendBufferLocation(comm), memory_location); \ - hypre_GpuProfilingPopRange(); \ - } \ - for (i = 0; i < num; i++) \ - { \ - HYPRE_Int ip = procs[i]; \ - HYPRE_Int start = displs[i]; \ - HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; \ - MPI_CMD(_buf + start, len, HYPRE_MPI_DTYPE, ip, tag, comm, &requests[i]); \ - } \ - return hypre_error_flag; \ - } \ -} - HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, @@ -1357,13 +1324,11 @@ hypre_MPI_Irecv_Multiple( void *buf, if (rbuf != buf) { hypre_MPI_GRequest_Action *action; - hypre_MemoryLocation recv_memory_location = hypre_MPICommGetRecvLocation(comm); - hypre_MemoryLocation recv_buffer_location = hypre_MPICommGetRecvBufferLocation(comm); - HYPRE_Int num_recv_elems = displs[num]; - hypre_MPI_GRequestGetCopyAction(buf, recv_memory_location, rbuf, recv_buffer_location, - num_recv_elems * data_size, &action); + hypre_MPI_GRequestGetCopyAction(buf, hypre_MPICommGetRecvLocation(comm), rbuf, + hypre_MPICommGetRecvBufferLocation(comm), + displs[num] * data_size, &action); - hypre_MPI_Grequest_start(hypre_grequest_noop_query_fn, + hypre_MPI_Grequest_start(hypre_grequest_query_fn, hypre_grequest_free_fn, hypre_grequest_noop_cancel_fn, action, extra_request); @@ -1396,13 +1361,42 @@ hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, - hypre_MPI_Request *requests ) + hypre_MPI_Request *requests, + hypre_MPI_Request *extra_request) { - hypre_MemoryLocation memory_location = hypre_MPICommGetSendLocation(comm); + *extra_request = hypre_MPI_REQUEST_NULL; + + if (!num) + { + return hypre_error_flag; + } + + HYPRE_Int data_size; + hypre_MPI_Type_size(datatype, &data_size); - TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_Complex, HYPRE_MPI_COMPLEX); - TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_Int, HYPRE_MPI_INT); - TYPE_MACRO(MPI_Send_init, TYPE_MACRO_SEND_INIT, HYPRE_BigInt, HYPRE_MPI_BIG_INT); + void *cbuf = hypre_MPICommGetSendBuffer(comm); + void *sbuf = cbuf ? cbuf : buf; + if (sbuf != buf) + { + hypre_MPI_GRequest_Action *action; + hypre_MPI_GRequestGetCopyAction(sbuf, hypre_MPICommGetSendBufferLocation(comm), buf, + hypre_MPICommGetSendLocation(comm), + displs[num] * data_size, &action); + + hypre_MPI_Grequest_start(hypre_grequest_query_fn, + hypre_grequest_free_fn, + hypre_grequest_noop_cancel_fn, + action, extra_request); + hypre_MPI_Grequest_complete(*extra_request); + } + + HYPRE_Int i; + for (i = 0; i < num; i++) + { + HYPRE_Int start = displs[i]; + HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; + hypre_MPI_Send_init((char *) sbuf + start * data_size, len, datatype, procs[i], tag, comm, &requests[i]); + } return hypre_error_flag; } @@ -1414,7 +1408,7 @@ hypre_MPI_Recv_init( void *buf, HYPRE_Int dest, HYPRE_Int tag, hypre_MPI_Comm comm, - hypre_MPI_Request *request ) + hypre_MPI_Request *request) { return (HYPRE_Int) MPI_Recv_init(buf, (hypre_int)count, datatype, (hypre_int)dest, (hypre_int)tag, @@ -1430,13 +1424,42 @@ hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, - hypre_MPI_Request *requests ) + hypre_MPI_Request *requests, + hypre_MPI_Request *extra_request) { - hypre_MemoryLocation memory_location = hypre_MPICommGetRecvLocation(comm); + *extra_request = hypre_MPI_REQUEST_NULL; + + if (!num) + { + return hypre_error_flag; + } + + HYPRE_Int data_size, i; + + hypre_MPI_Type_size(datatype, &data_size); + void *cbuf = hypre_MPICommGetRecvBuffer(comm); + void *rbuf = cbuf ? cbuf : buf; + + for (i = 0; i < num; i++) + { + HYPRE_Int start = displs[i]; + HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; + hypre_MPI_Recv_init((char *) rbuf + start * data_size, len, datatype, procs[i], tag, comm, &requests[i]); + } - TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV_INIT, HYPRE_Complex, HYPRE_MPI_COMPLEX); - TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV_INIT, HYPRE_Int, HYPRE_MPI_INT); - TYPE_MACRO(MPI_Recv_init, TYPE_MACRO_RECV_INIT, HYPRE_BigInt, HYPRE_MPI_BIG_INT); + if (rbuf != buf) + { + hypre_MPI_GRequest_Action *action; + hypre_MPI_GRequestGetCopyAction(buf, hypre_MPICommGetRecvLocation(comm), rbuf, + hypre_MPICommGetRecvBufferLocation(comm), + displs[num] * data_size, &action); + + hypre_MPI_Grequest_start(hypre_grequest_query_fn, + hypre_grequest_free_fn, + hypre_grequest_noop_cancel_fn, + action, extra_request); + hypre_MPI_Grequest_complete(*extra_request); + } return hypre_error_flag; } @@ -1935,7 +1958,7 @@ hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action) hypre_TMemcpy(&src_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); data += sizeof(hypre_MemoryLocation); // action! - hypre_GpuProfilingPushRange("MPI-H2D"); + hypre_GpuProfilingPushRange("MPI-H2D/D2H"); _hypre_TMemcpy(dest, src, char, num_bytes, dest_location, src_location); hypre_GpuProfilingPopRange(); } @@ -1949,13 +1972,16 @@ hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action) hypre_int hypre_grequest_free_fn(void *extra_state) { - hypre_MPI_GRequest_Action *action = (hypre_MPI_GRequest_Action *) extra_state; - hypre_MPI_GRequestProcessAction(action); - hypre_TFree(action, HYPRE_MEMORY_HOST); + hypre_TFree(extra_state, HYPRE_MEMORY_HOST); return hypre_MPI_SUCCESS; } hypre_int -hypre_grequest_noop_query_fn(void *extra_state, hypre_MPI_Status *status) { return hypre_MPI_SUCCESS; } +hypre_grequest_query_fn(void *extra_state, hypre_MPI_Status *status) +{ + hypre_MPI_GRequestProcessAction((hypre_MPI_GRequest_Action *) extra_state); + return hypre_MPI_SUCCESS; +} + hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete) { return hypre_MPI_SUCCESS; } diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 4a3b2e50f8..497a128919 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -365,6 +365,7 @@ HYPRE_Int hypre_MPI_Reduce( void *sendbuf, void *recvbuf, HYPRE_Int count, HYPRE_Int hypre_MPI_Scan( void *sendbuf, void *recvbuf, HYPRE_Int count, hypre_MPI_Datatype datatype, hypre_MPI_Op op, hypre_MPI_Comm comm ); HYPRE_Int hypre_MPI_Request_free( hypre_MPI_Request *request ); +HYPRE_Int hypre_MPI_Request_get_status(hypre_MPI_Request request, HYPRE_Int *flag, hypre_MPI_Status *status); HYPRE_Int hypre_MPI_Type_contiguous( HYPRE_Int count, hypre_MPI_Datatype oldtype, hypre_MPI_Datatype *newtype ); HYPRE_Int hypre_MPI_Type_vector( HYPRE_Int count, HYPRE_Int blocklength, HYPRE_Int stride, @@ -390,9 +391,11 @@ HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests, hypre_MPI_Request *extra_request ); HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests, +hypre_MPI_Request *extra_request ); HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests, +hypre_MPI_Request *extra_request ); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, @@ -424,7 +427,7 @@ HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); hypre_int hypre_grequest_free_fn(void *extra_state); -hypre_int hypre_grequest_noop_query_fn(void *extra_state, hypre_MPI_Status *status); +hypre_int hypre_grequest_query_fn(void *extra_state, hypre_MPI_Status *status); hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete); #ifdef __cplusplus From 6ef031b4d62055702467811925060842e61b569e Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Mon, 1 Jul 2024 15:19:54 -0700 Subject: [PATCH 52/90] clean up code --- src/parcsr_mv/_hypre_parcsr_mv.h | 7 + src/parcsr_mv/par_csr_communication.c | 324 +++++++++++--------------- src/parcsr_mv/protos.h | 7 + 3 files changed, 152 insertions(+), 186 deletions(-) diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index dd7b737891..9b9f2dcdfe 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -963,6 +963,13 @@ HYPRE_Int hypre_BooleanGenerateDiagAndOffd ( hypre_CSRBooleanMatrix *A, /* par_csr_communication.c */ hypre_ParCSRCommHandle *hypre_ParCSRCommHandleCreate ( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg, void *send_data, void *recv_data ); +hypre_ParCSRCommHandle *hypre_ParCSRCommHandleCreate_core ( HYPRE_Int persistent, + HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + void *send_data_in, + HYPRE_MemoryLocation recv_memory_location, + void *recv_data_in ); hypre_ParCSRCommHandle *hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg, HYPRE_MemoryLocation send_memory_location, diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 57d9e4e018..98f8a98e6d 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -39,29 +39,51 @@ *-------------------------------------------------------------------------------------*/ HYPRE_Int -hypre_ParCSRCommHandleCreateBuffer( HYPRE_MemoryLocation send_memory_location, - HYPRE_MemoryLocation recv_memory_location, - HYPRE_Int num_send_elems, - HYPRE_Int num_recv_elems, - HYPRE_Int size_of_elem, - hypre_ParCSRCommHandle *comm_handle ) +hypre_ParCSRCommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location, + HYPRE_Int num_send_elems, + HYPRE_Int num_recv_elems, + HYPRE_Int size_of_elem, + hypre_ParCSRCommHandle *comm_handle ) { - if (!hypre_ParCSRCommHandleSendBuffer(comm_handle) && - hypre_MPINeedHostBuffer(hypre_GetActualMemLocation(send_memory_location))) + hypre_MemoryLocation send_memory_alocation = hypre_GetActualMemLocation(send_memory_location); + hypre_MemoryLocation recv_memory_alocation = hypre_GetActualMemLocation(recv_memory_location); + + if (!hypre_ParCSRCommHandleSendBuffer(comm_handle) && hypre_MPINeedHostBuffer(send_memory_alocation)) { hypre_MemoryLocation location = hypre_HandleMPIHostBufferLocation(hypre_handle()); hypre_ParCSRCommHandleSendBufferLocation(comm_handle) = location; hypre_ParCSRCommHandleSendBuffer(comm_handle) = _hypre_TAlloc(char, num_send_elems * size_of_elem, location); } - if (!hypre_ParCSRCommHandleRecvBuffer(comm_handle) && - hypre_MPINeedHostBuffer(hypre_GetActualMemLocation(recv_memory_location))) + if (!hypre_ParCSRCommHandleRecvBuffer(comm_handle) && hypre_MPINeedHostBuffer(recv_memory_alocation)) { hypre_MemoryLocation location = hypre_HandleMPIHostBufferLocation(hypre_handle()); hypre_ParCSRCommHandleRecvBufferLocation(comm_handle) = location; hypre_ParCSRCommHandleRecvBuffer(comm_handle) = _hypre_TAlloc(char, num_recv_elems * size_of_elem, location); } + if (hypre_ParCSRCommHandlePersistent(comm_handle)) + { + if (!hypre_ParCSRCommHandleSendData(comm_handle)) + { + hypre_ParCSRCommHandleSendData(comm_handle) = hypre_TAlloc(char, num_send_elems * size_of_elem, send_memory_location); + } + + if (!hypre_ParCSRCommHandleRecvData(comm_handle)) + { + hypre_ParCSRCommHandleRecvData(comm_handle) = hypre_TAlloc(char, num_recv_elems * size_of_elem, recv_memory_location); + } + } + + hypre_MPICommSetSendLocation(hypre_ParCSRCommHandleComm(comm_handle), send_memory_alocation); + hypre_MPICommSetSendBuffer(hypre_ParCSRCommHandleComm(comm_handle), hypre_ParCSRCommHandleSendBuffer(comm_handle)); + hypre_MPICommSetSendBufferLocation(hypre_ParCSRCommHandleComm(comm_handle), hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); + + hypre_MPICommSetRecvLocation(hypre_ParCSRCommHandleComm(comm_handle), recv_memory_alocation); + hypre_MPICommSetRecvBuffer(hypre_ParCSRCommHandleComm(comm_handle), hypre_ParCSRCommHandleRecvBuffer(comm_handle)); + hypre_MPICommSetRecvBufferLocation(hypre_ParCSRCommHandleComm(comm_handle), hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); + return hypre_error_flag; } @@ -80,125 +102,7 @@ hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, HYPRE_MemoryLocation send_memory_location, HYPRE_MemoryLocation recv_memory_location ) { - hypre_GpuProfilingPushRange("hypre_ParCSRPersistentCommHandleCreate"); - - HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); - MPI_Comm comm_orig = hypre_ParCSRCommPkgComm(comm_pkg); - MPI_Comm comm; - HYPRE_Int num_requests = num_sends + num_recvs; - hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); - hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); - hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); - hypre_MPI_Request *extra_requests = hypre_CTAlloc(hypre_MPI_Request, 2, HYPRE_MEMORY_HOST); - HYPRE_Int num_send_elems = 0, num_recv_elems = 0; - HYPRE_Int data_size; - - hypre_MPI_Comm_dup(comm_orig, &comm); - hypre_MPI_Type_size(mpi_dtype, &data_size); - - hypre_MPICommSetSendLocation(comm, hypre_GetActualMemLocation(send_memory_location)); - hypre_MPICommSetRecvLocation(comm, hypre_GetActualMemLocation(recv_memory_location)); - - switch (hypre_ParCSRCommHandleGetJobType(job)) - { - case HYPRE_COMM_PKG_JOB_COMPLEX: - case HYPRE_COMM_PKG_JOB_INT: - case HYPRE_COMM_PKG_JOB_BIGINT: - { - num_send_elems = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - num_recv_elems = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - - if (!hypre_ParCSRCommHandleSendData(comm_handle)) - { - hypre_ParCSRCommHandleSendData(comm_handle) = hypre_TAlloc(char, num_send_elems * data_size, - send_memory_location); - } - - if (!hypre_ParCSRCommHandleRecvData(comm_handle)) - { - hypre_ParCSRCommHandleRecvData(comm_handle) = hypre_TAlloc(char, num_recv_elems * data_size, - recv_memory_location); - } - - hypre_ParCSRCommHandleCreateBuffer(send_memory_location, recv_memory_location, - num_send_elems, num_recv_elems, data_size, comm_handle); - - hypre_MPICommSetRecvBuffer(comm, hypre_ParCSRCommHandleRecvBuffer(comm_handle)); - hypre_MPICommSetRecvBufferLocation(comm, hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); - hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), - num_recvs, - hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), - NULL, mpi_dtype, - hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, comm, requests, &extra_requests[0]); - - hypre_MPICommSetSendBuffer(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); - hypre_MPICommSetSendBufferLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); - hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), - num_sends, - hypre_ParCSRCommPkgSendMapStarts(comm_pkg), - NULL, mpi_dtype, - hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, comm, requests + num_recvs, &extra_requests[1]); - - break; - } - - case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: - case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: - case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: - { - num_send_elems = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - num_recv_elems = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - - if (!hypre_ParCSRCommHandleSendData(comm_handle)) - { - hypre_ParCSRCommHandleSendData(comm_handle) = hypre_TAlloc(char, num_send_elems * data_size, - send_memory_location); - } - - if (!hypre_ParCSRCommHandleRecvData(comm_handle)) - { - hypre_ParCSRCommHandleRecvData(comm_handle) = hypre_TAlloc(char, num_recv_elems * data_size, - recv_memory_location); - } - - hypre_ParCSRCommHandleCreateBuffer(send_memory_location, recv_memory_location, - num_send_elems, num_recv_elems, data_size, comm_handle); - - hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), - num_sends, - hypre_ParCSRCommPkgSendMapStarts(comm_pkg), - NULL, mpi_dtype, - hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, comm, requests, &extra_requests[0]); - - hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), - num_recvs, - hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), - NULL, mpi_dtype, - hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, comm, requests + num_sends, &extra_requests[1]); - - break; - } - - default: - break; - } - - hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; - hypre_ParCSRCommHandlePersistent(comm_handle) = 1; - hypre_ParCSRCommHandleSendLocation(comm_handle) = send_memory_location; - hypre_ParCSRCommHandleRecvLocation(comm_handle) = recv_memory_location; - hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; - hypre_ParCSRCommHandleRequests(comm_handle) = requests; - hypre_ParCSRCommHandleNumExtraRequests(comm_handle) = 2; - hypre_ParCSRCommHandleExtraRequests(comm_handle) = extra_requests; - hypre_ParCSRCommHandleComm(comm_handle) = comm; - - return ( comm_handle ); + return hypre_ParCSRCommHandleCreate_core(1, job, comm_pkg, send_memory_location, NULL, recv_memory_location, NULL); } /*------------------------------------------------------------------ @@ -237,10 +141,10 @@ hypre_ParCSRPersistentCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) HYPRE_Int i; - hypre_TFree(hypre_ParCSRCommHandleSendData(comm_handle), hypre_ParCSRCommHandleSendLocation(comm_handle)); - hypre_TFree(hypre_ParCSRCommHandleRecvData(comm_handle), hypre_ParCSRCommHandleRecvLocation(comm_handle)); _hypre_TFree(hypre_ParCSRCommHandleSendBuffer(comm_handle), hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); _hypre_TFree(hypre_ParCSRCommHandleRecvBuffer(comm_handle), hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); + hypre_TFree(hypre_ParCSRCommHandleSendData(comm_handle), hypre_ParCSRCommHandleSendLocation(comm_handle)); + hypre_TFree(hypre_ParCSRCommHandleRecvData(comm_handle), hypre_ParCSRCommHandleRecvLocation(comm_handle)); hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); for (i = 0; i < hypre_ParCSRCommHandleNumExtraRequests(comm_handle); i++) { @@ -273,7 +177,6 @@ hypre_ParCSRPersistentCommHandleStart( hypre_ParCSRCommHandle *comm_handle ) if (hypre_MPI_SUCCESS != ret) { hypre_error_w_msg(HYPRE_ERROR_GENERIC, "MPI error\n"); - /*hypre_printf("MPI error %d in %s (%s, line %u)\n", ret, __FUNCTION__, __FILE__, __LINE__);*/ } } @@ -299,7 +202,6 @@ hypre_ParCSRCommHandleCreate ( HYPRE_Int job, /*------------------------------------------------------------------ * hypre_ParCSRCommHandleCreate_v2 *------------------------------------------------------------------*/ - hypre_ParCSRCommHandle* hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg, @@ -308,25 +210,43 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, HYPRE_MemoryLocation recv_memory_location, void *recv_data ) { - hypre_GpuProfilingPushRange("hypre_ParCSRCommHandleCreate_v2"); + return hypre_ParCSRCommHandleCreate_core(0, job, comm_pkg, send_memory_location, send_data, recv_memory_location, recv_data); +} + +/*------------------------------------------------------------------ + * hypre_ParCSRCommHandleCreate_core + *------------------------------------------------------------------*/ + +hypre_ParCSRCommHandle* +hypre_ParCSRCommHandleCreate_core ( HYPRE_Int persistent, + HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + void *send_data, + HYPRE_MemoryLocation recv_memory_location, + void *recv_data ) +{ + hypre_GpuProfilingPushRange("hypre_ParCSRCommHandleCreate_core"); - HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); - MPI_Comm comm_orig = hypre_ParCSRCommPkgComm(comm_pkg); + HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); + HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); + MPI_Comm comm_orig = hypre_ParCSRCommPkgComm(comm_pkg); MPI_Comm comm; - HYPRE_Int num_requests = num_sends + num_recvs; - hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); - hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); - hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); - hypre_MPI_Request *extra_requests = hypre_CTAlloc(hypre_MPI_Request, 1, HYPRE_MEMORY_HOST); - HYPRE_Int num_send_elems = 0, num_recv_elems = 0; + HYPRE_Int num_requests = num_sends + num_recvs; + hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); + hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); + hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); + HYPRE_Int num_extra_requests = persistent ? 2 : 1; + hypre_MPI_Request *extra_requests = hypre_CTAlloc(hypre_MPI_Request, num_extra_requests, HYPRE_MEMORY_HOST); + HYPRE_Int num_send_elems = 0; + HYPRE_Int num_recv_elems = 0; HYPRE_Int data_size; hypre_MPI_Comm_dup(comm_orig, &comm); hypre_MPI_Type_size(mpi_dtype, &data_size); - hypre_MPICommSetSendLocation(comm, hypre_GetActualMemLocation(send_memory_location)); - hypre_MPICommSetRecvLocation(comm, hypre_GetActualMemLocation(recv_memory_location)); + hypre_ParCSRCommHandlePersistent(comm_handle) = persistent; + hypre_ParCSRCommHandleComm(comm_handle) = comm; switch (hypre_ParCSRCommHandleGetJobType(job)) { @@ -336,24 +256,39 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, { num_send_elems = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); num_recv_elems = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - hypre_ParCSRCommHandleCreateBuffer(send_memory_location, recv_memory_location, - num_send_elems, num_recv_elems, data_size, comm_handle); - - hypre_MPICommSetRecvBuffer(comm, hypre_ParCSRCommHandleRecvBuffer(comm_handle)); - hypre_MPICommSetRecvBufferLocation(comm, hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); - hypre_MPI_Irecv_Multiple(recv_data, num_recvs, - hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), - NULL, mpi_dtype, - hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, comm, requests, extra_requests); - - hypre_MPICommSetSendBuffer(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); - hypre_MPICommSetSendBufferLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); - hypre_MPI_Isend_Multiple(send_data, num_sends, - hypre_ParCSRCommPkgSendMapStarts(comm_pkg), - NULL, mpi_dtype, - hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, comm, requests + num_recvs); + hypre_ParCSRCommHandleAllocateBuffers(send_memory_location, recv_memory_location, + num_send_elems, num_recv_elems, data_size, comm_handle); + + if (persistent) + { + hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), + num_recvs, + hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), + NULL, mpi_dtype, + hypre_ParCSRCommPkgRecvProcs(comm_pkg), + 0, comm, requests, &extra_requests[0]); + + hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), + num_sends, + hypre_ParCSRCommPkgSendMapStarts(comm_pkg), + NULL, mpi_dtype, + hypre_ParCSRCommPkgSendProcs(comm_pkg), + 0, comm, requests + num_recvs, &extra_requests[1]); + } + else + { + hypre_MPI_Irecv_Multiple(recv_data, num_recvs, + hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), + NULL, mpi_dtype, + hypre_ParCSRCommPkgRecvProcs(comm_pkg), + 0, comm, requests, extra_requests); + + hypre_MPI_Isend_Multiple(send_data, num_sends, + hypre_ParCSRCommPkgSendMapStarts(comm_pkg), + NULL, mpi_dtype, + hypre_ParCSRCommPkgSendProcs(comm_pkg), + 0, comm, requests + num_recvs); + } break; } @@ -364,24 +299,39 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, { num_send_elems = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); num_recv_elems = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - hypre_ParCSRCommHandleCreateBuffer(send_memory_location, recv_memory_location, - num_send_elems, num_recv_elems, data_size, comm_handle); - - hypre_MPICommSetRecvBuffer(comm, hypre_ParCSRCommHandleRecvBuffer(comm_handle)); - hypre_MPICommSetRecvBufferLocation(comm, hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); - hypre_MPI_Irecv_Multiple(recv_data, num_sends, - hypre_ParCSRCommPkgSendMapStarts(comm_pkg), - NULL, mpi_dtype, - hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, comm, requests, extra_requests); - - hypre_MPICommSetSendBuffer(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); - hypre_MPICommSetSendBufferLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); - hypre_MPI_Isend_Multiple(send_data, num_recvs, - hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), - NULL, mpi_dtype, - hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, comm, requests + num_sends); + hypre_ParCSRCommHandleAllocateBuffers(send_memory_location, recv_memory_location, + num_send_elems, num_recv_elems, data_size, comm_handle); + + if (persistent) + { + hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), + num_sends, + hypre_ParCSRCommPkgSendMapStarts(comm_pkg), + NULL, mpi_dtype, + hypre_ParCSRCommPkgSendProcs(comm_pkg), + 0, comm, requests, &extra_requests[0]); + + hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), + num_recvs, + hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), + NULL, mpi_dtype, + hypre_ParCSRCommPkgRecvProcs(comm_pkg), + 0, comm, requests + num_sends, &extra_requests[1]); + } + else + { + hypre_MPI_Irecv_Multiple(recv_data, num_sends, + hypre_ParCSRCommPkgSendMapStarts(comm_pkg), + NULL, mpi_dtype, + hypre_ParCSRCommPkgSendProcs(comm_pkg), + 0, comm, requests, extra_requests); + + hypre_MPI_Isend_Multiple(send_data, num_recvs, + hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), + NULL, mpi_dtype, + hypre_ParCSRCommPkgRecvProcs(comm_pkg), + 0, comm, requests + num_sends); + } break; } @@ -395,15 +345,17 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, *--------------------------------------------------------------------*/ hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; - hypre_ParCSRCommHandleSendData(comm_handle) = send_data; - hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data; + if (!persistent) + { + hypre_ParCSRCommHandleSendData(comm_handle) = send_data; + hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data; + } hypre_ParCSRCommHandleSendLocation(comm_handle) = send_memory_location; hypre_ParCSRCommHandleRecvLocation(comm_handle) = recv_memory_location; hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; hypre_ParCSRCommHandleRequests(comm_handle) = requests; - hypre_ParCSRCommHandleNumExtraRequests(comm_handle) = 1; + hypre_ParCSRCommHandleNumExtraRequests(comm_handle) = num_extra_requests; hypre_ParCSRCommHandleExtraRequests(comm_handle) = extra_requests; - hypre_ParCSRCommHandleComm(comm_handle) = comm; hypre_GpuProfilingPopRange(); @@ -438,7 +390,7 @@ hypre_ParCSRCommHandleWait( hypre_ParCSRCommHandle *comm_handle ) if (hypre_ParCSRCommHandlePersistent(comm_handle)) { HYPRE_Int flag; - hypre_MPI_Request_get_status(hypre_ParCSRCommHandleExtraRequest(comm_handle, 0), &flag, MPI_STATUS_IGNORE); + hypre_MPI_Request_get_status(hypre_ParCSRCommHandleExtraRequest(comm_handle, 0), &flag, hypre_MPI_STATUS_IGNORE); } else { diff --git a/src/parcsr_mv/protos.h b/src/parcsr_mv/protos.h index 3a40ca2513..86710984bd 100644 --- a/src/parcsr_mv/protos.h +++ b/src/parcsr_mv/protos.h @@ -237,6 +237,13 @@ HYPRE_Int hypre_BooleanGenerateDiagAndOffd ( hypre_CSRBooleanMatrix *A, /* par_csr_communication.c */ hypre_ParCSRCommHandle *hypre_ParCSRCommHandleCreate ( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg, void *send_data, void *recv_data ); +hypre_ParCSRCommHandle *hypre_ParCSRCommHandleCreate_core ( HYPRE_Int persistent, + HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + void *send_data_in, + HYPRE_MemoryLocation recv_memory_location, + void *recv_data_in ); hypre_ParCSRCommHandle *hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg, HYPRE_MemoryLocation send_memory_location, From 4924b31ede29975f139da98b18df17641bbd5c32 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Mon, 1 Jul 2024 15:21:07 -0700 Subject: [PATCH 53/90] fix for nonmpi --- src/utilities/_hypre_utilities.h | 3 ++ src/utilities/mpistubs.c | 58 +++++++++++++++++++++++++++++--- src/utilities/mpistubs.h | 3 ++ 3 files changed, 59 insertions(+), 5 deletions(-) diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index f3fab93b61..13299e5ab4 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1052,6 +1052,7 @@ extern "C" { #define MPI_BOR hypre_MPI_BOR #define MPI_SUCCESS hypre_MPI_SUCCESS #define MPI_STATUSES_IGNORE hypre_MPI_STATUSES_IGNORE +#define MPI_STATUS_IGNORE hypre_MPI_STATUS_IGNORE #define MPI_UNDEFINED hypre_MPI_UNDEFINED #define MPI_REQUEST_NULL hypre_MPI_REQUEST_NULL @@ -1177,6 +1178,7 @@ typedef HYPRE_Int hypre_MPI_Info; #define hypre_MPI_BOR 5 #define hypre_MPI_SUCCESS 0 #define hypre_MPI_STATUSES_IGNORE 0 +#define hypre_MPI_STATUS_IGNORE 0 #define hypre_MPI_UNDEFINED -9999 #define hypre_MPI_REQUEST_NULL 0 @@ -1234,6 +1236,7 @@ typedef MPI_Grequest_cancel_function hypre_MPI_Grequest_cancel_function; #define hypre_MPI_BOR MPI_BOR #define hypre_MPI_SUCCESS MPI_SUCCESS #define hypre_MPI_STATUSES_IGNORE MPI_STATUSES_IGNORE +#define hypre_MPI_STATUS_IGNORE MPI_STATUS_IGNORE #define hypre_MPI_UNDEFINED MPI_UNDEFINED #define hypre_MPI_REQUEST_NULL MPI_REQUEST_NULL diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 7ea9aae56e..3e2c3fe59c 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -630,6 +630,7 @@ hypre_MPI_Allreduce( void *sendbuf, case hypre_MPI_COMPLEX: { HYPRE_Complex *crecvbuf = (HYPRE_Complex *)recvbuf; + HYPRE_Complex *csendbuf = (HYPRE_Complex *)sendbuf; for (i = 0; i < count; i++) { @@ -736,23 +737,32 @@ hypre_MPI_Op_free( hypre_MPI_Op *op ) } #if defined(HYPRE_USING_GPU) -HYPRE_Int hypre_MPI_Comm_split_type( hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, - hypre_MPI_Info info, hypre_MPI_Comm *newcomm ) +HYPRE_Int +hypre_MPI_Comm_split_type( hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, + hypre_MPI_Info info, hypre_MPI_Comm *newcomm ) { return (0); } -HYPRE_Int hypre_MPI_Info_create( hypre_MPI_Info *info ) +HYPRE_Int +hypre_MPI_Info_create( hypre_MPI_Info *info ) { return (0); } -HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ) +HYPRE_Int +hypre_MPI_Info_free( hypre_MPI_Info *info ) { return (0); } #endif +HYPRE_Int +hypre_MPI_Request_get_status(hypre_MPI_Request request, HYPRE_Int *flag, hypre_MPI_Status *status) +{ + return (0); +} + HYPRE_Int hypre_MPI_Comm_set_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val) { @@ -775,6 +785,13 @@ hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr return (0); } +HYPRE_Int +hypre_MPI_Type_size(hypre_MPI_Datatype datatype, HYPRE_Int *size) +{ + *size = 0; + return (0); +} + HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, @@ -798,7 +815,38 @@ hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, - hypre_MPI_Request *requests ) + hypre_MPI_Request *requests, + hypre_MPI_Request *extra_request) +{ + return (0); +} + +HYPRE_Int +hypre_MPI_Send_init_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPI_Comm comm, + hypre_MPI_Request *requests, + hypre_MPI_Request *extra_request) +{ + return (0); +} + +HYPRE_Int +hypre_MPI_Recv_init_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPI_Comm comm, + hypre_MPI_Request *requests, + hypre_MPI_Request *extra_request) { return (0); } diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 497a128919..5753578f9c 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -68,6 +68,7 @@ extern "C" { #define MPI_BOR hypre_MPI_BOR #define MPI_SUCCESS hypre_MPI_SUCCESS #define MPI_STATUSES_IGNORE hypre_MPI_STATUSES_IGNORE +#define MPI_STATUS_IGNORE hypre_MPI_STATUS_IGNORE #define MPI_UNDEFINED hypre_MPI_UNDEFINED #define MPI_REQUEST_NULL hypre_MPI_REQUEST_NULL @@ -193,6 +194,7 @@ typedef HYPRE_Int hypre_MPI_Info; #define hypre_MPI_BOR 5 #define hypre_MPI_SUCCESS 0 #define hypre_MPI_STATUSES_IGNORE 0 +#define hypre_MPI_STATUS_IGNORE 0 #define hypre_MPI_UNDEFINED -9999 #define hypre_MPI_REQUEST_NULL 0 @@ -250,6 +252,7 @@ typedef MPI_Grequest_cancel_function hypre_MPI_Grequest_cancel_function; #define hypre_MPI_BOR MPI_BOR #define hypre_MPI_SUCCESS MPI_SUCCESS #define hypre_MPI_STATUSES_IGNORE MPI_STATUSES_IGNORE +#define hypre_MPI_STATUS_IGNORE MPI_STATUS_IGNORE #define hypre_MPI_UNDEFINED MPI_UNDEFINED #define hypre_MPI_REQUEST_NULL MPI_REQUEST_NULL From 19d830cc5d95e1a6d3ca71322b6368a6f98548b9 Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Tue, 2 Jul 2024 07:19:07 -0700 Subject: [PATCH 54/90] fix memory leaks --- src/utilities/_hypre_utilities.h | 1 + src/utilities/mpistubs.c | 20 +++++++++++++++++++- src/utilities/mpistubs.h | 1 + 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 13299e5ab4..915794011d 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1371,6 +1371,7 @@ HYPRE_Int hypre_MPI_GRequestGetCopyAction(void *dest, hypre_MemoryLocation dest_ void *src, hypre_MemoryLocation src_location, HYPRE_Int num_bytes, hypre_MPI_GRequest_Action **action_ptr); HYPRE_Int hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action); +HYPRE_Int hypre_MPI_GRequestDestroyAction(hypre_MPI_GRequest_Action *action); HYPRE_Int hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location); HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 3e2c3fe59c..033d9430e4 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -2007,6 +2007,7 @@ hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action) data += sizeof(hypre_MemoryLocation); // action! hypre_GpuProfilingPushRange("MPI-H2D/D2H"); + //hypre_printf(" copying %p <-- %p\n", dest, src); _hypre_TMemcpy(dest, src, char, num_bytes, dest_location, src_location); hypre_GpuProfilingPopRange(); } @@ -2017,10 +2018,27 @@ hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action) return hypre_error_flag; } +HYPRE_Int +hypre_MPI_GRequestDestroyAction(hypre_MPI_GRequest_Action *action) +{ + if (!action) + { + return hypre_error_flag; + } + + hypre_MPI_GRequest_ActionCount(action) = 0; + hypre_MPI_GRequest_ActionDataSize(action) = 0; + hypre_TFree(hypre_MPI_GRequest_ActionData(action), HYPRE_MEMORY_HOST); + + return hypre_error_flag; +} + hypre_int hypre_grequest_free_fn(void *extra_state) { - hypre_TFree(extra_state, HYPRE_MEMORY_HOST); + hypre_MPI_GRequest_Action *action = (hypre_MPI_GRequest_Action *) extra_state; + hypre_MPI_GRequestDestroyAction(action); + hypre_TFree(action, HYPRE_MEMORY_HOST); return hypre_MPI_SUCCESS; } diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 5753578f9c..45825d9392 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -387,6 +387,7 @@ HYPRE_Int hypre_MPI_GRequestGetCopyAction(void *dest, hypre_MemoryLocation dest_ void *src, hypre_MemoryLocation src_location, HYPRE_Int num_bytes, hypre_MPI_GRequest_Action **action_ptr); HYPRE_Int hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action); +HYPRE_Int hypre_MPI_GRequestDestroyAction(hypre_MPI_GRequest_Action *action); HYPRE_Int hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location); HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); From 12483cca1bfdf127085e4d1e96e484a0ffe463a0 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Tue, 2 Jul 2024 15:51:11 -0700 Subject: [PATCH 55/90] free key val --- src/utilities/_hypre_utilities.h | 1 + src/utilities/general.c | 6 ++++++ src/utilities/mpistubs.c | 13 +++++++++++++ src/utilities/mpistubs.h | 1 + 4 files changed, 21 insertions(+) diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 915794011d..213df63dc6 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1394,6 +1394,7 @@ HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); HYPRE_Int hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr_fn, hypre_MPI_Comm_delete_attr_function *comm_delete_attr_fn, HYPRE_Int *comm_keyval, void *extra_state); +HYPRE_Int hypre_MPI_Comm_free_keyval(HYPRE_Int *comm_keyval); HYPRE_Int hypre_MPI_Comm_set_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val); HYPRE_Int hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val, HYPRE_Int *flag); HYPRE_Int hypre_MPI_Grequest_start(hypre_MPI_Grequest_query_function *query_fn, hypre_MPI_Grequest_free_function *free_fn, diff --git a/src/utilities/general.c b/src/utilities/general.c index 9c33936e8d..9f9208d8b8 100644 --- a/src/utilities/general.c +++ b/src/utilities/general.c @@ -78,6 +78,12 @@ hypre_HandleDestroy(hypre_Handle *hypre_handle_) hypre_HandleDeviceData(hypre_handle_) = NULL; #endif + HYPRE_Int i; + for (i = 0; i < HYPRE_MAX_NUM_COMM_KEYS; i++) + { + hypre_MPI_Comm_free_keyval(&hypre_HandleMPICommKeys(hypre_handle_)[i]); + } + hypre_TFree(hypre_handle_, HYPRE_MEMORY_HOST); return hypre_error_flag; diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 033d9430e4..6bb7425527 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -785,6 +785,12 @@ hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr return (0); } +HYPRE_Int +hypre_MPI_Comm_free_keyval(HYPRE_Int *comm_keyval) +{ + return (0); +} + HYPRE_Int hypre_MPI_Type_size(hypre_MPI_Datatype datatype, HYPRE_Int *size) { @@ -895,6 +901,13 @@ hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr return ierr; } +HYPRE_Int +hypre_MPI_Comm_free_keyval(HYPRE_Int *comm_keyval) +{ + hypre_int mpi_comm_keyval = (hypre_int) (*comm_keyval); + return MPI_Comm_free_keyval(&mpi_comm_keyval); +} + HYPRE_Int hypre_MPI_Grequest_start(hypre_MPI_Grequest_query_function *query_fn, hypre_MPI_Grequest_free_function *free_fn, diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 45825d9392..94eb7b22c1 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -410,6 +410,7 @@ HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); HYPRE_Int hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr_fn, hypre_MPI_Comm_delete_attr_function *comm_delete_attr_fn, HYPRE_Int *comm_keyval, void *extra_state); +HYPRE_Int hypre_MPI_Comm_free_keyval(HYPRE_Int *comm_keyval); HYPRE_Int hypre_MPI_Comm_set_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val); HYPRE_Int hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val, HYPRE_Int *flag); HYPRE_Int hypre_MPI_Grequest_start(hypre_MPI_Grequest_query_function *query_fn, hypre_MPI_Grequest_free_function *free_fn, From 46309e836b81a4ca5b23024e2a7406b10ae44eff Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Tue, 2 Jul 2024 22:13:04 -0700 Subject: [PATCH 56/90] code clean up --- src/parcsr_mv/_hypre_parcsr_mv.h | 4 +- src/parcsr_mv/par_csr_communication.c | 72 +++++++++++++-------------- src/parcsr_mv/par_csr_matvec.c | 4 +- src/parcsr_mv/protos.h | 4 +- src/utilities/_hypre_utilities.h | 9 ++++ src/utilities/mpistubs.c | 60 ++++++++++++++++++++++ src/utilities/mpistubs.h | 9 ++++ 7 files changed, 121 insertions(+), 41 deletions(-) diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index 9b9f2dcdfe..343b8481dc 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -1111,7 +1111,9 @@ hypre_ParCSRCommHandle* hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_Int job hypre_ParCSRCommPkg *comm_pkg, HYPRE_MemoryLocation send_memory_location, HYPRE_MemoryLocation recv_memory_location); -HYPRE_Int hypre_ParCSRPersistentCommHandleDestroy(hypre_ParCSRCommHandle *comm_handle); +#define hypre_ParCSRPersistentCommHandleWait hypre_ParCSRCommHandleWait +#define hypre_ParCSRPersistentCommHandleDestroy hypre_ParCSRCommHandleDestroy + HYPRE_Int hypre_ParCSRPersistentCommHandleStart(hypre_ParCSRCommHandle *comm_handle); #endif diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 98f8a98e6d..d2cd71eb0c 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -127,38 +127,6 @@ hypre_ParCSRCommPkgGetPersistentCommHandle( HYPRE_Int job, return hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg, type); } -/*------------------------------------------------------------------ - * hypre_ParCSRPersistentCommHandleDestroy - *------------------------------------------------------------------*/ - -HYPRE_Int -hypre_ParCSRPersistentCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) -{ - if (!comm_handle) - { - return hypre_error_flag; - } - - HYPRE_Int i; - - _hypre_TFree(hypre_ParCSRCommHandleSendBuffer(comm_handle), hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); - _hypre_TFree(hypre_ParCSRCommHandleRecvBuffer(comm_handle), hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); - hypre_TFree(hypre_ParCSRCommHandleSendData(comm_handle), hypre_ParCSRCommHandleSendLocation(comm_handle)); - hypre_TFree(hypre_ParCSRCommHandleRecvData(comm_handle), hypre_ParCSRCommHandleRecvLocation(comm_handle)); - hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); - for (i = 0; i < hypre_ParCSRCommHandleNumExtraRequests(comm_handle); i++) - { - if (hypre_ParCSRCommHandleExtraRequest(comm_handle, i) != hypre_MPI_REQUEST_NULL) - { - hypre_MPI_Request_free(&hypre_ParCSRCommHandleExtraRequest(comm_handle, i)); - } - } - hypre_TFree(hypre_ParCSRCommHandleExtraRequests(comm_handle), HYPRE_MEMORY_HOST); - hypre_MPI_Comm_free(&hypre_ParCSRCommHandleComm(comm_handle)); - hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); - - return hypre_error_flag; -} /*------------------------------------------------------------------ * hypre_ParCSRPersistentCommHandleStart @@ -407,24 +375,54 @@ hypre_ParCSRCommHandleWait( hypre_ParCSRCommHandle *comm_handle ) HYPRE_Int hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) { - hypre_GpuProfilingPushRange("hypre_ParCSRCommHandleDestroy"); - if (!comm_handle) { return hypre_error_flag; } - hypre_ParCSRCommHandleWait(comm_handle); + HYPRE_Int persistent = hypre_ParCSRCommHandlePersistent(comm_handle); + + if (!persistent) + { + hypre_ParCSRCommHandleWait(comm_handle); + } _hypre_TFree(hypre_ParCSRCommHandleSendBuffer(comm_handle), hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); _hypre_TFree(hypre_ParCSRCommHandleRecvBuffer(comm_handle), hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); + + + if (persistent) + { + hypre_TFree(hypre_ParCSRCommHandleSendData(comm_handle), hypre_ParCSRCommHandleSendLocation(comm_handle)); + hypre_TFree(hypre_ParCSRCommHandleRecvData(comm_handle), hypre_ParCSRCommHandleRecvLocation(comm_handle)); + } + hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); + + if (persistent) + { + HYPRE_Int i; + for (i = 0; i < hypre_ParCSRCommHandleNumExtraRequests(comm_handle); i++) + { + if (hypre_ParCSRCommHandleExtraRequest(comm_handle, i) != hypre_MPI_REQUEST_NULL) + { + hypre_MPI_Request_free(&hypre_ParCSRCommHandleExtraRequest(comm_handle, i)); + } + } + } + hypre_TFree(hypre_ParCSRCommHandleExtraRequests(comm_handle), HYPRE_MEMORY_HOST); + + hypre_MPICommDeleteSendLocation(hypre_ParCSRCommHandleComm(comm_handle)); + hypre_MPICommDeleteRecvLocation(hypre_ParCSRCommHandleComm(comm_handle)); + hypre_MPICommDeleteSendBufferLocation(hypre_ParCSRCommHandleComm(comm_handle)); + hypre_MPICommDeleteRecvBufferLocation(hypre_ParCSRCommHandleComm(comm_handle)); + hypre_MPICommDeleteSendBuffer(hypre_ParCSRCommHandleComm(comm_handle)); + hypre_MPICommDeleteRecvBuffer(hypre_ParCSRCommHandleComm(comm_handle)); + hypre_MPI_Comm_free(&hypre_ParCSRCommHandleComm(comm_handle)); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); - hypre_GpuProfilingPopRange(); - return hypre_error_flag; } diff --git a/src/parcsr_mv/par_csr_matvec.c b/src/parcsr_mv/par_csr_matvec.c index ae38f43c70..ff8905de16 100644 --- a/src/parcsr_mv/par_csr_matvec.c +++ b/src/parcsr_mv/par_csr_matvec.c @@ -197,7 +197,7 @@ hypre_ParCSRMatrixMatvecOutOfPlaceHost( HYPRE_Complex alpha, /* Non-blocking communication ends */ #ifdef HYPRE_USING_PERSISTENT_COMM - hypre_ParCSRCommHandleWait(persistent_comm_handle); + hypre_ParCSRPersistentCommHandleWait(persistent_comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif @@ -464,7 +464,7 @@ hypre_ParCSRMatrixMatvecTHost( HYPRE_Complex alpha, /* Non-blocking communication ends */ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRCommHandleWait(persistent_comm_handle); + hypre_ParCSRPersistentCommHandleWait(persistent_comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif diff --git a/src/parcsr_mv/protos.h b/src/parcsr_mv/protos.h index 86710984bd..e79409cba3 100644 --- a/src/parcsr_mv/protos.h +++ b/src/parcsr_mv/protos.h @@ -385,7 +385,9 @@ hypre_ParCSRCommHandle* hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_Int job hypre_ParCSRCommPkg *comm_pkg, HYPRE_MemoryLocation send_memory_location, HYPRE_MemoryLocation recv_memory_location); -HYPRE_Int hypre_ParCSRPersistentCommHandleDestroy(hypre_ParCSRCommHandle *comm_handle); +#define hypre_ParCSRPersistentCommHandleWait hypre_ParCSRCommHandleWait +#define hypre_ParCSRPersistentCommHandleDestroy hypre_ParCSRCommHandleDestroy + HYPRE_Int hypre_ParCSRPersistentCommHandleStart(hypre_ParCSRCommHandle *comm_handle); #endif diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 213df63dc6..550fb02d20 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1397,6 +1397,7 @@ hypre_MPI_Comm_delete_attr_function *comm_delete_attr_fn, HYPRE_Int *comm_keyval HYPRE_Int hypre_MPI_Comm_free_keyval(HYPRE_Int *comm_keyval); HYPRE_Int hypre_MPI_Comm_set_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val); HYPRE_Int hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val, HYPRE_Int *flag); +HYPRE_Int hypre_MPI_Comm_delete_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval); HYPRE_Int hypre_MPI_Grequest_start(hypre_MPI_Grequest_query_function *query_fn, hypre_MPI_Grequest_free_function *free_fn, hypre_MPI_Grequest_cancel_function *cancel_fn, void *extra_state, hypre_MPI_Request *request); HYPRE_Int hypre_MPI_Grequest_complete( hypre_MPI_Request request ); @@ -1408,6 +1409,7 @@ hypre_MemoryLocation hypre_MPICommGetSendBufferLocation(hypre_MPI_Comm comm); hypre_MemoryLocation hypre_MPICommGetRecvBufferLocation(hypre_MPI_Comm comm); void* hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm); void* hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm); + HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); @@ -1415,6 +1417,13 @@ HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLo HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); +HYPRE_Int hypre_MPICommDeleteSendLocation(hypre_MPI_Comm comm); +HYPRE_Int hypre_MPICommDeleteRecvLocation(hypre_MPI_Comm comm); +HYPRE_Int hypre_MPICommDeleteSendBufferLocation(hypre_MPI_Comm comm); +HYPRE_Int hypre_MPICommDeleteRecvBufferLocation(hypre_MPI_Comm comm); +HYPRE_Int hypre_MPICommDeleteSendBuffer(hypre_MPI_Comm comm); +HYPRE_Int hypre_MPICommDeleteRecvBuffer(hypre_MPI_Comm comm); + hypre_int hypre_grequest_free_fn(void *extra_state); hypre_int hypre_grequest_query_fn(void *extra_state, hypre_MPI_Status *status); hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete); diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 6bb7425527..c92477ba74 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -775,6 +775,12 @@ hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attrib return (0); } +HYPRE_Int +hypre_MPI_Comm_delete_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval) +{ + return (0); +} + HYPRE_Int hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr_fn, hypre_MPI_Comm_delete_attr_function *comm_delete_attr_fn, @@ -888,6 +894,14 @@ hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attrib return ierr; } +HYPRE_Int +hypre_MPI_Comm_delete_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval) +{ + hypre_int mpi_comm_keyval = (hypre_int) comm_keyval; + + return (HYPRE_Int) MPI_Comm_delete_attr(comm, mpi_comm_keyval); +} + HYPRE_Int hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr_fn, hypre_MPI_Comm_delete_attr_function *comm_delete_attr_fn, @@ -1822,6 +1836,14 @@ hypre_MPICommGetSendLocation(hypre_MPI_Comm comm) return (location); } +HYPRE_Int +hypre_MPICommDeleteSendLocation(hypre_MPI_Comm comm) +{ + hypre_Handle *handle = hypre_handle(); + hypre_MPI_Comm_delete_attr(comm, hypre_HandleMPICommKeySendLocation(handle)); + return hypre_error_flag; +} + HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) { @@ -1843,6 +1865,14 @@ hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm) return (location); } +HYPRE_Int +hypre_MPICommDeleteRecvLocation(hypre_MPI_Comm comm) +{ + hypre_Handle *handle = hypre_handle(); + hypre_MPI_Comm_delete_attr(comm, hypre_HandleMPICommKeyRecvLocation(handle)); + return hypre_error_flag; +} + HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) { @@ -1864,6 +1894,14 @@ hypre_MPICommGetSendBufferLocation(hypre_MPI_Comm comm) return (location); } +HYPRE_Int +hypre_MPICommDeleteSendBufferLocation(hypre_MPI_Comm comm) +{ + hypre_Handle *handle = hypre_handle(); + hypre_MPI_Comm_delete_attr(comm, hypre_HandleMPICommKeySendBufferLocation(handle)); + return hypre_error_flag; +} + HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) { @@ -1885,6 +1923,14 @@ hypre_MPICommGetRecvBufferLocation(hypre_MPI_Comm comm) return (location); } +HYPRE_Int +hypre_MPICommDeleteRecvBufferLocation(hypre_MPI_Comm comm) +{ + hypre_Handle *handle = hypre_handle(); + hypre_MPI_Comm_delete_attr(comm, hypre_HandleMPICommKeyRecvBufferLocation(handle)); + return hypre_error_flag; +} + HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void *buffer) { @@ -1905,6 +1951,13 @@ hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm) return (buffer); } +HYPRE_Int +hypre_MPICommDeleteSendBuffer(hypre_MPI_Comm comm) +{ + hypre_MPI_Comm_delete_attr(comm, hypre_HandleMPICommKeySendBuffer(hypre_handle())); + return hypre_error_flag; +} + HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void *buffer) { @@ -1925,6 +1978,13 @@ hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm) return (buffer); } +HYPRE_Int +hypre_MPICommDeleteRecvBuffer(hypre_MPI_Comm comm) +{ + hypre_MPI_Comm_delete_attr(comm, hypre_HandleMPICommKeyRecvBuffer(hypre_handle())); + return hypre_error_flag; +} + HYPRE_Int hypre_MPI_GRequestGetCopyAction(void *dest, hypre_MemoryLocation dest_location, diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 94eb7b22c1..743210199d 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -413,6 +413,7 @@ hypre_MPI_Comm_delete_attr_function *comm_delete_attr_fn, HYPRE_Int *comm_keyval HYPRE_Int hypre_MPI_Comm_free_keyval(HYPRE_Int *comm_keyval); HYPRE_Int hypre_MPI_Comm_set_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val); HYPRE_Int hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val, HYPRE_Int *flag); +HYPRE_Int hypre_MPI_Comm_delete_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval); HYPRE_Int hypre_MPI_Grequest_start(hypre_MPI_Grequest_query_function *query_fn, hypre_MPI_Grequest_free_function *free_fn, hypre_MPI_Grequest_cancel_function *cancel_fn, void *extra_state, hypre_MPI_Request *request); HYPRE_Int hypre_MPI_Grequest_complete( hypre_MPI_Request request ); @@ -424,6 +425,7 @@ hypre_MemoryLocation hypre_MPICommGetSendBufferLocation(hypre_MPI_Comm comm); hypre_MemoryLocation hypre_MPICommGetRecvBufferLocation(hypre_MPI_Comm comm); void* hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm); void* hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm); + HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); @@ -431,6 +433,13 @@ HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLo HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); +HYPRE_Int hypre_MPICommDeleteSendLocation(hypre_MPI_Comm comm); +HYPRE_Int hypre_MPICommDeleteRecvLocation(hypre_MPI_Comm comm); +HYPRE_Int hypre_MPICommDeleteSendBufferLocation(hypre_MPI_Comm comm); +HYPRE_Int hypre_MPICommDeleteRecvBufferLocation(hypre_MPI_Comm comm); +HYPRE_Int hypre_MPICommDeleteSendBuffer(hypre_MPI_Comm comm); +HYPRE_Int hypre_MPICommDeleteRecvBuffer(hypre_MPI_Comm comm); + hypre_int hypre_grequest_free_fn(void *extra_state); hypre_int hypre_grequest_query_fn(void *extra_state, hypre_MPI_Status *status); hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete); From 7e25b17b423bae7d2cc28ea4a7d82df109707484 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Tue, 2 Jul 2024 22:16:24 -0700 Subject: [PATCH 57/90] minor change --- src/parcsr_mv/par_csr_matop.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parcsr_mv/par_csr_matop.c b/src/parcsr_mv/par_csr_matop.c index 2049ca994c..67d5ec65ec 100644 --- a/src/parcsr_mv/par_csr_matop.c +++ b/src/parcsr_mv/par_csr_matop.c @@ -6353,7 +6353,7 @@ hypre_ParCSRMatrixDiagScaleHost( hypre_ParCSRMatrix *par_A, /* Non-blocking communication ends */ #ifdef HYPRE_USING_PERSISTENT_COMM - hypre_ParCSRCommHandleWait(comm_handle); + hypre_ParCSRPersistentCommHandleWait(comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif @@ -6755,7 +6755,7 @@ hypre_ParCSRMatrixBlockColSumHost( hypre_ParCSRMatrix *A, /* Non-blocking communication ends */ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRCommHandleWait(comm_handle); + hypre_ParCSRPersistentCommHandleWait(comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif @@ -6961,7 +6961,7 @@ hypre_ParCSRMatrixColSumHost( hypre_ParCSRMatrix *A, /* Non-blocking communication ends */ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRCommHandleWait(comm_handle); + hypre_ParCSRPersistentCommHandleWait(comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif From 2d9235ae4b3907c33c5272637a4d32b6afe74fcb Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Wed, 3 Jul 2024 09:44:51 -0700 Subject: [PATCH 58/90] minor changes --- src/parcsr_mv/_hypre_parcsr_mv.h | 2 +- src/parcsr_mv/par_csr_communication.c | 3 +++ src/parcsr_mv/par_csr_communication.h | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index 343b8481dc..e88c2cd692 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -160,7 +160,7 @@ typedef struct _hypre_ParCSRCommPkg hypre_MPI_Datatype *send_mpi_types; hypre_MPI_Datatype *recv_mpi_types; #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRCommHandle *persistent_comm_handles[NUM_OF_COMM_PKG_JOB_TYPE]; + hypre_ParCSRPersistentCommHandle *persistent_comm_handles[NUM_OF_COMM_PKG_JOB_TYPE]; #endif #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) /* temporary memory for matvec. cudaMalloc is expensive. alloc once and reuse */ diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index d2cd71eb0c..f415c9e5f2 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -413,12 +413,15 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) hypre_TFree(hypre_ParCSRCommHandleExtraRequests(comm_handle), HYPRE_MEMORY_HOST); + /* attributes should be deleted when the communicator is being freed */ + /* hypre_MPICommDeleteSendLocation(hypre_ParCSRCommHandleComm(comm_handle)); hypre_MPICommDeleteRecvLocation(hypre_ParCSRCommHandleComm(comm_handle)); hypre_MPICommDeleteSendBufferLocation(hypre_ParCSRCommHandleComm(comm_handle)); hypre_MPICommDeleteRecvBufferLocation(hypre_ParCSRCommHandleComm(comm_handle)); hypre_MPICommDeleteSendBuffer(hypre_ParCSRCommHandleComm(comm_handle)); hypre_MPICommDeleteRecvBuffer(hypre_ParCSRCommHandleComm(comm_handle)); + */ hypre_MPI_Comm_free(&hypre_ParCSRCommHandleComm(comm_handle)); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); diff --git a/src/parcsr_mv/par_csr_communication.h b/src/parcsr_mv/par_csr_communication.h index ca4eaacd7d..19d97f8ae4 100644 --- a/src/parcsr_mv/par_csr_communication.h +++ b/src/parcsr_mv/par_csr_communication.h @@ -144,7 +144,7 @@ typedef struct _hypre_ParCSRCommPkg hypre_MPI_Datatype *send_mpi_types; hypre_MPI_Datatype *recv_mpi_types; #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRCommHandle *persistent_comm_handles[NUM_OF_COMM_PKG_JOB_TYPE]; + hypre_ParCSRPersistentCommHandle *persistent_comm_handles[NUM_OF_COMM_PKG_JOB_TYPE]; #endif #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) /* temporary memory for matvec. cudaMalloc is expensive. alloc once and reuse */ From 4e768198f508ed14a5dd8bca86632f37a1c75371 Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Wed, 3 Jul 2024 12:21:32 -0700 Subject: [PATCH 59/90] minor change --- src/utilities/mpistubs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index c92477ba74..8d0620ab7b 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -2080,7 +2080,12 @@ hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action) data += sizeof(hypre_MemoryLocation); // action! hypre_GpuProfilingPushRange("MPI-H2D/D2H"); - //hypre_printf(" copying %p <-- %p\n", dest, src); +#if 1 + char dname[32],sname[32]; + hypre_GetMemoryLocationName(dest_location, dname); + hypre_GetMemoryLocationName(src_location, sname); + hypre_printf(" copying %s %p <-- %s %p\n", dname, dest, sname, src); +#endif _hypre_TMemcpy(dest, src, char, num_bytes, dest_location, src_location); hypre_GpuProfilingPopRange(); } From d3ec50a4f259e19b2056073e49bb3e96b10a5775 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Wed, 3 Jul 2024 12:30:52 -0700 Subject: [PATCH 60/90] minor changes --- src/utilities/mpistubs.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 8d0620ab7b..6e7da07eb4 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -1343,14 +1343,13 @@ hypre_MPI_Isend_Multiple( void *buf, void *sbuf = cbuf ? cbuf : buf; if (sbuf != buf) { - hypre_GpuProfilingPushRange("MPI-D2H"); - _hypre_TMemcpy(sbuf, - buf, - char, - displs[num] * data_size, - hypre_MPICommGetSendBufferLocation(comm), - hypre_MPICommGetSendLocation(comm)); - hypre_GpuProfilingPopRange(); + hypre_MPI_GRequest_Action *action; + hypre_MPI_GRequestGetCopyAction(sbuf, hypre_MPICommGetSendBufferLocation(comm), + buf, hypre_MPICommGetSendLocation(comm), + displs[num] * data_size, &action); + hypre_MPI_GRequestProcessAction(action); + hypre_MPI_GRequestDestroyAction(action); + hypre_TFree(action, HYPRE_MEMORY_HOST); } HYPRE_Int i; From 11cb8aada5e2ff4a8465ff92fd18e29fb8305f85 Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Wed, 3 Jul 2024 14:02:56 -0700 Subject: [PATCH 61/90] minor change --- src/parcsr_mv/par_csr_communication.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index f415c9e5f2..5a9a213e4c 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -939,9 +939,9 @@ hypre_MatvecCommPkgDestroy( hypre_ParCSRCommPkg *comm_pkg ) HYPRE_Int i; for (i = HYPRE_COMM_PKG_JOB_COMPLEX; i < NUM_OF_COMM_PKG_JOB_TYPE; ++i) { - if (comm_pkg->persistent_comm_handles[i]) + if (hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg, i)) { - hypre_ParCSRPersistentCommHandleDestroy(comm_pkg->persistent_comm_handles[i]); + hypre_ParCSRPersistentCommHandleDestroy(hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg, i)); } } #endif From e687ab469ad91bfb935c21ee4b12997b6f708d6e Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Wed, 3 Jul 2024 14:03:27 -0700 Subject: [PATCH 62/90] turn off debug code --- src/utilities/mpistubs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 6e7da07eb4..ba6180d644 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -2079,7 +2079,7 @@ hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action) data += sizeof(hypre_MemoryLocation); // action! hypre_GpuProfilingPushRange("MPI-H2D/D2H"); -#if 1 +#if 0 char dname[32],sname[32]; hypre_GetMemoryLocationName(dest_location, dname); hypre_GetMemoryLocationName(src_location, sname); From 68c4b54f0f1439ccf71e08a6e318f43560a0e97b Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Wed, 3 Jul 2024 14:06:01 -0700 Subject: [PATCH 63/90] add persistent mpi to GPU matvec --- src/parcsr_mv/par_csr_matvec_device.c | 48 +++++++++++++++++++-------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/src/parcsr_mv/par_csr_matvec_device.c b/src/parcsr_mv/par_csr_matvec_device.c index 1667a5422d..ef06ea9bdf 100644 --- a/src/parcsr_mv/par_csr_matvec_device.c +++ b/src/parcsr_mv/par_csr_matvec_device.c @@ -143,31 +143,37 @@ hypre_ParCSRMatrixMatvecOutOfPlaceDevice( HYPRE_Complex alpha, hypre_profile_times[HYPRE_TIMER_ID_PACK_UNPACK] -= hypre_MPI_Wtime(); #endif +#if defined(HYPRE_USING_PERSISTENT_COMM) + comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(1, comm_pkg, + HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); + x_tmp_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvData(comm_handle); + x_buf_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendData(comm_handle); +#else /*--------------------------------------------------------------------- * Allocate or reuse receive data buffer for x_tmp *--------------------------------------------------------------------*/ - if (!hypre_ParCSRCommPkgTmpData(comm_pkg)) { hypre_ParCSRCommPkgTmpData(comm_pkg) = hypre_TAlloc(HYPRE_Complex, num_cols_offd * num_vectors, HYPRE_MEMORY_DEVICE); } - hypre_VectorData(x_tmp) = x_tmp_data = hypre_ParCSRCommPkgTmpData(comm_pkg); - hypre_SeqVectorSetDataOwner(x_tmp, 0); - hypre_SeqVectorInitialize_v2(x_tmp, HYPRE_MEMORY_DEVICE); - /*--------------------------------------------------------------------- * Allocate or reuse send data buffer *--------------------------------------------------------------------*/ - if (!hypre_ParCSRCommPkgBufData(comm_pkg)) { hypre_ParCSRCommPkgBufData(comm_pkg) = hypre_TAlloc(HYPRE_Complex, send_map_num_elmts, HYPRE_MEMORY_DEVICE); } + x_tmp_data = hypre_ParCSRCommPkgTmpData(comm_pkg); x_buf_data = hypre_ParCSRCommPkgBufData(comm_pkg); +#endif + + hypre_VectorData(x_tmp) = x_tmp_data; + hypre_SeqVectorSetDataOwner(x_tmp, 0); + hypre_SeqVectorInitialize_v2(x_tmp, HYPRE_MEMORY_DEVICE); /* The assert is because this code has been tested for column-wise vector storage only. */ hypre_assert(idxstride == 1); @@ -223,12 +229,16 @@ hypre_ParCSRMatrixMatvecOutOfPlaceDevice( HYPRE_Complex alpha, #endif /* Non-blocking communication starts */ +#if !defined(HYPRE_USING_PERSISTENT_COMM) comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, x_buf_data, HYPRE_MEMORY_DEVICE, x_tmp_data); - /* Non-blocking communication ends */ hypre_ParCSRCommHandleDestroy(comm_handle); +#else + hypre_ParCSRPersistentCommHandleStart(comm_handle); + hypre_ParCSRPersistentCommHandleWait(comm_handle); +#endif #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_HALO_EXCHANGE] += hypre_MPI_Wtime(); @@ -389,31 +399,37 @@ hypre_ParCSRMatrixMatvecTDevice( HYPRE_Complex alpha, hypre_profile_times[HYPRE_TIMER_ID_PACK_UNPACK] -= hypre_MPI_Wtime(); #endif +#if defined(HYPRE_USING_PERSISTENT_COMM) + comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(2, comm_pkg, + HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); + y_tmp_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendData(comm_handle); + y_buf_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvData(comm_handle); +#else /*--------------------------------------------------------------------- * Allocate or reuse send data buffer for y_tmp *--------------------------------------------------------------------*/ - if (!hypre_ParCSRCommPkgTmpData(comm_pkg)) { hypre_ParCSRCommPkgTmpData(comm_pkg) = hypre_TAlloc(HYPRE_Complex, num_cols_offd * num_vectors, HYPRE_MEMORY_DEVICE); } - hypre_VectorData(y_tmp) = y_tmp_data = hypre_ParCSRCommPkgTmpData(comm_pkg); - hypre_SeqVectorSetDataOwner(y_tmp, 0); - hypre_SeqVectorInitialize_v2(y_tmp, HYPRE_MEMORY_DEVICE); - /*--------------------------------------------------------------------- * Allocate receive data buffer *--------------------------------------------------------------------*/ - if (!hypre_ParCSRCommPkgBufData(comm_pkg)) { hypre_ParCSRCommPkgBufData(comm_pkg) = hypre_TAlloc(HYPRE_Complex, send_map_num_elmts, HYPRE_MEMORY_DEVICE); } + y_tmp_data = hypre_ParCSRCommPkgTmpData(comm_pkg); y_buf_data = hypre_ParCSRCommPkgBufData(comm_pkg); +#endif + + hypre_VectorData(y_tmp) = y_tmp_data; + hypre_SeqVectorSetDataOwner(y_tmp, 0); + hypre_SeqVectorInitialize_v2(y_tmp, HYPRE_MEMORY_DEVICE); #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_PACK_UNPACK] += hypre_MPI_Wtime(); @@ -455,12 +471,16 @@ hypre_ParCSRMatrixMatvecTDevice( HYPRE_Complex alpha, #endif /* Non-blocking communication starts */ +#if !defined(HYPRE_USING_PERSISTENT_COMM) comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, HYPRE_MEMORY_DEVICE, y_tmp_data, HYPRE_MEMORY_DEVICE, y_buf_data ); - /* Non-blocking communication ends */ hypre_ParCSRCommHandleDestroy(comm_handle); +#else + hypre_ParCSRPersistentCommHandleStart(comm_handle); + hypre_ParCSRPersistentCommHandleWait(comm_handle); +#endif #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_HALO_EXCHANGE] += hypre_MPI_Wtime(); From a1f4899a1cff9f12d4d718194c190cb4df4a7b22 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Thu, 23 Jan 2025 17:10:56 -0800 Subject: [PATCH 64/90] fix conflict --- src/utilities/_hypre_utilities.h | 170 ++++--------------------------- 1 file changed, 18 insertions(+), 152 deletions(-) diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 148ec8abf1..6800b65864 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -47,6 +47,8 @@ typedef struct hypre_DeviceData hypre_DeviceData; typedef void (*GPUMallocFunc)(void **, size_t); typedef void (*GPUMfreeFunc)(void *); +#define HYPRE_MAX_NUM_COMM_KEYS 8 + typedef struct { HYPRE_Int log_level; @@ -60,11 +62,15 @@ typedef struct HYPRE_Int struct_comm_recv_buffer_size; HYPRE_Int struct_comm_send_buffer_size; - /* GPU MPI */ + /* MPI */ + HYPRE_Int comm_keys[HYPRE_MAX_NUM_COMM_KEYS]; + #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int use_gpu_aware_mpi; #endif + hypre_MemoryLocation mpi_host_buffer_location; + #if defined(HYPRE_USING_GPU) hypre_DeviceData *device_data; HYPRE_Int device_gs_method; /* device G-S options */ @@ -108,7 +114,17 @@ typedef struct #define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) #define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) + +#define hypre_HandleMPICommKeys(hypre_handle) ((hypre_handle) -> comm_keys) +#define hypre_HandleMPICommKeySendLocation(hypre_handle) ((hypre_handle) -> comm_keys[0]) +#define hypre_HandleMPICommKeyRecvLocation(hypre_handle) ((hypre_handle) -> comm_keys[1]) +#define hypre_HandleMPICommKeySendBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[2]) +#define hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[3]) +#define hypre_HandleMPICommKeySendBuffer(hypre_handle) ((hypre_handle) -> comm_keys[4]) +#define hypre_HandleMPICommKeyRecvBuffer(hypre_handle) ((hypre_handle) -> comm_keys[5]) + #define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) +#define hypre_HandleMPIHostBufferLocation(hypre_handle) ((hypre_handle) -> mpi_host_buffer_location) #define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) @@ -2031,157 +2047,6 @@ extern "C++" * SPDX-License-Identifier: (Apache-2.0 OR MIT) ******************************************************************************/ -<<<<<<< HEAD -/****************************************************************************** - * - * General structures and values - * - *****************************************************************************/ - -#ifndef HYPRE_HANDLE_H -#define HYPRE_HANDLE_H - -struct hypre_DeviceData; -typedef struct hypre_DeviceData hypre_DeviceData; - -#define HYPRE_MAX_NUM_COMM_KEYS 8 - -typedef struct -{ - HYPRE_Int hypre_error; - HYPRE_MemoryLocation memory_location; - HYPRE_ExecutionPolicy default_exec_policy; - - /* the device buffers needed to do MPI communication for struct comm */ - HYPRE_Complex *struct_comm_recv_buffer; - HYPRE_Complex *struct_comm_send_buffer; - HYPRE_Int struct_comm_recv_buffer_size; - HYPRE_Int struct_comm_send_buffer_size; - - /* MPI */ - HYPRE_Int comm_keys[HYPRE_MAX_NUM_COMM_KEYS]; - -#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) - HYPRE_Int use_gpu_aware_mpi; -#endif - - hypre_MemoryLocation mpi_host_buffer_location; - -#if defined(HYPRE_USING_GPU) - hypre_DeviceData *device_data; - HYPRE_Int device_gs_method; /* device G-S options */ -#endif - - /* user malloc/free function pointers */ - GPUMallocFunc user_device_malloc; - GPUMfreeFunc user_device_free; - -#if defined(HYPRE_USING_UMPIRE) - char umpire_device_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; - char umpire_um_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; - char umpire_host_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; - char umpire_pinned_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; - size_t umpire_device_pool_size; - size_t umpire_um_pool_size; - size_t umpire_host_pool_size; - size_t umpire_pinned_pool_size; - size_t umpire_block_size; - HYPRE_Int own_umpire_device_pool; - HYPRE_Int own_umpire_um_pool; - HYPRE_Int own_umpire_host_pool; - HYPRE_Int own_umpire_pinned_pool; - umpire_resourcemanager umpire_rm; -#endif - -#if defined(HYPRE_USING_MAGMA) - magma_queue_t magma_queue; -#endif -} hypre_Handle; - -/* accessor macros to hypre_Handle */ -#define hypre_HandleMemoryLocation(hypre_handle) ((hypre_handle) -> memory_location) -#define hypre_HandleDefaultExecPolicy(hypre_handle) ((hypre_handle) -> default_exec_policy) - -#define hypre_HandleStructCommRecvBuffer(hypre_handle) ((hypre_handle) -> struct_comm_recv_buffer) -#define hypre_HandleStructCommSendBuffer(hypre_handle) ((hypre_handle) -> struct_comm_send_buffer) -#define hypre_HandleStructCommRecvBufferSize(hypre_handle) ((hypre_handle) -> struct_comm_recv_buffer_size) -#define hypre_HandleStructCommSendBufferSize(hypre_handle) ((hypre_handle) -> struct_comm_send_buffer_size) - -#define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) -#define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) - -#define hypre_HandleMPICommKeys(hypre_handle) ((hypre_handle) -> comm_keys) -#define hypre_HandleMPICommKeySendLocation(hypre_handle) ((hypre_handle) -> comm_keys[0]) -#define hypre_HandleMPICommKeyRecvLocation(hypre_handle) ((hypre_handle) -> comm_keys[1]) -#define hypre_HandleMPICommKeySendBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[2]) -#define hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[3]) -#define hypre_HandleMPICommKeySendBuffer(hypre_handle) ((hypre_handle) -> comm_keys[4]) -#define hypre_HandleMPICommKeyRecvBuffer(hypre_handle) ((hypre_handle) -> comm_keys[5]) - -#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) -#define hypre_HandleMPIHostBufferLocation(hypre_handle) ((hypre_handle) -> mpi_host_buffer_location) - -#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCusparseHandle(hypre_handle) hypre_DeviceDataCusparseHandle(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleVendorSolverHandle(hypre_handle) hypre_DeviceDataVendorSolverHandle(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleComputeStream(hypre_handle) hypre_DeviceDataComputeStream(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubBinGrowth(hypre_handle) hypre_DeviceDataCubBinGrowth(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubMinBin(hypre_handle) hypre_DeviceDataCubMinBin(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubMaxBin(hypre_handle) hypre_DeviceDataCubMaxBin(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubMaxCachedBytes(hypre_handle) hypre_DeviceDataCubMaxCachedBytes(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubDevAllocator(hypre_handle) hypre_DeviceDataCubDevAllocator(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubUvmAllocator(hypre_handle) hypre_DeviceDataCubUvmAllocator(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleDevice(hypre_handle) hypre_DeviceDataDevice(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleDeviceMaxWorkGroupSize(hypre_handle) hypre_DeviceDataDeviceMaxWorkGroupSize(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleDeviceMaxShmemPerBlock(hypre_handle) hypre_DeviceDataDeviceMaxShmemPerBlock(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleDeviceMaxShmemPerBlockInited(hypre_handle) hypre_DeviceDataDeviceMaxShmemPerBlockInited(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleComputeStreamNum(hypre_handle) hypre_DeviceDataComputeStreamNum(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleReduceBuffer(hypre_handle) hypre_DeviceDataReduceBuffer(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmUseVendor(hypre_handle) hypre_DeviceDataSpgemmUseVendor(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpMVUseVendor(hypre_handle) hypre_DeviceDataSpMVUseVendor(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpTransUseVendor(hypre_handle) hypre_DeviceDataSpTransUseVendor(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmAlgorithm(hypre_handle) hypre_DeviceDataSpgemmAlgorithm(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmBinned(hypre_handle) hypre_DeviceDataSpgemmBinned(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmNumBin(hypre_handle) hypre_DeviceDataSpgemmNumBin(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmHighestBin(hypre_handle) hypre_DeviceDataSpgemmHighestBin(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmBlockNumDim(hypre_handle) hypre_DeviceDataSpgemmBlockNumDim(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmRownnzEstimateMethod(hypre_handle) hypre_DeviceDataSpgemmRownnzEstimateMethod(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmRownnzEstimateNsamples(hypre_handle) hypre_DeviceDataSpgemmRownnzEstimateNsamples(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmRownnzEstimateMultFactor(hypre_handle) hypre_DeviceDataSpgemmRownnzEstimateMultFactor(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleDeviceAllocator(hypre_handle) hypre_DeviceDataDeviceAllocator(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleUseGpuRand(hypre_handle) hypre_DeviceDataUseGpuRand(hypre_HandleDeviceData(hypre_handle)) - -#define hypre_HandleUserDeviceMalloc(hypre_handle) ((hypre_handle) -> user_device_malloc) -#define hypre_HandleUserDeviceMfree(hypre_handle) ((hypre_handle) -> user_device_free) - -#define hypre_HandleUmpireResourceMan(hypre_handle) ((hypre_handle) -> umpire_rm) -#define hypre_HandleUmpireDevicePoolSize(hypre_handle) ((hypre_handle) -> umpire_device_pool_size) -#define hypre_HandleUmpireUMPoolSize(hypre_handle) ((hypre_handle) -> umpire_um_pool_size) -#define hypre_HandleUmpireHostPoolSize(hypre_handle) ((hypre_handle) -> umpire_host_pool_size) -#define hypre_HandleUmpirePinnedPoolSize(hypre_handle) ((hypre_handle) -> umpire_pinned_pool_size) -#define hypre_HandleUmpireBlockSize(hypre_handle) ((hypre_handle) -> umpire_block_size) -#define hypre_HandleUmpireDevicePoolName(hypre_handle) ((hypre_handle) -> umpire_device_pool_name) -#define hypre_HandleUmpireUMPoolName(hypre_handle) ((hypre_handle) -> umpire_um_pool_name) -#define hypre_HandleUmpireHostPoolName(hypre_handle) ((hypre_handle) -> umpire_host_pool_name) -#define hypre_HandleUmpirePinnedPoolName(hypre_handle) ((hypre_handle) -> umpire_pinned_pool_name) -#define hypre_HandleOwnUmpireDevicePool(hypre_handle) ((hypre_handle) -> own_umpire_device_pool) -#define hypre_HandleOwnUmpireUMPool(hypre_handle) ((hypre_handle) -> own_umpire_um_pool) -#define hypre_HandleOwnUmpireHostPool(hypre_handle) ((hypre_handle) -> own_umpire_host_pool) -#define hypre_HandleOwnUmpirePinnedPool(hypre_handle) ((hypre_handle) -> own_umpire_pinned_pool) - -#define hypre_HandleMagmaQueue(hypre_handle) ((hypre_handle) -> magma_queue) - -#endif -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - -======= ->>>>>>> b2726f25ee172d671deacf019108df7981b561dd #ifndef HYPRE_GSELIM_H #define HYPRE_GSELIM_H @@ -4207,3 +4072,4 @@ HYPRE_Int hypre_mm_read_mtx_crd_size(FILE *f, HYPRE_Int *M, HYPRE_Int *N, HYPRE_ #endif #endif + From 5809123ccc09c2fd08c039430518632bcde8420f Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Wed, 5 Feb 2025 14:36:29 -0800 Subject: [PATCH 65/90] temp update --- src/struct_mv/struct_communication.c | 29 ++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index a6ea8a8d7c..0e3394de56 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -1032,6 +1032,34 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, * post receives and initiate sends *--------------------------------------------------------------------*/ +#if 1 + HYPRE_Int persistent = 0; + HYPRE_Int num_extra_requests = persistent ? 2 : 1; + hypre_MPI_Request *extra_requests = hypre_CTAlloc(hypre_MPI_Request, num_extra_requests, HYPRE_MEMORY_HOST); + { + HYPRE_Int displs = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); + HYPRE_Int procs = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); + for (i = 0; i < num_recvs; i++) + { + comm_type = hypre_CommPkgRecvType(comm_pkg, i); + displs[i] = hypre_CommTypeBufsize(comm_type) * sizeof(HYPRE_Complex); + procs[i] = hypre_CommTypeProc(comm_type); + } + hypre_MPI_Irecv_Multiple(recv_buffers_mpi[0], + num_recvs, + displs, + NULL, + hypre_MPI_BYTE, + procs, + tag, + comm, + requests, + extra_requests); + hypre_TFree(displs, HYPRE_MEMORY_HOST); + hypre_TFree(procs, HYPRE_MEMORY_HOST); + } + hypre_TFree(extra_requests, HYPRE_MEMORY_HOST); +#else j = 0; for (i = 0; i < num_recvs; i++) { @@ -1047,6 +1075,7 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, hypre_CommPkgRecvBufsize(comm_pkg) -= size; } } +#endif for (i = 0; i < num_sends; i++) { From b9764376b26aeac687d041a6c4ce985b7d2f4728 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Fri, 7 Feb 2025 14:08:31 -0800 Subject: [PATCH 66/90] various changes --- src/struct_mv/_hypre_struct_mv.h | 46 +-- src/struct_mv/struct_communication.c | 281 ++++++++++------- src/struct_mv/struct_communication.h | 46 +-- src/utilities/_hypre_utilities.h | 435 +++++++++++++-------------- src/utilities/headers | 2 +- src/utilities/memory.h | 59 ---- src/utilities/mpistubs.c | 14 +- src/utilities/protos.h | 54 ++++ 8 files changed, 508 insertions(+), 429 deletions(-) diff --git a/src/struct_mv/_hypre_struct_mv.h b/src/struct_mv/_hypre_struct_mv.h index fdb019e167..a346ddf40f 100644 --- a/src/struct_mv/_hypre_struct_mv.h +++ b/src/struct_mv/_hypre_struct_mv.h @@ -929,19 +929,21 @@ typedef struct hypre_CommPkg_struct typedef struct hypre_CommHandle_struct { - hypre_CommPkg *comm_pkg; - HYPRE_Complex *send_data; - HYPRE_Complex *recv_data; + hypre_CommPkg *comm_pkg; + HYPRE_Complex *send_data; + HYPRE_Complex *recv_data; - HYPRE_Int num_requests; - hypre_MPI_Request *requests; - hypre_MPI_Status *status; + HYPRE_Int num_requests; + hypre_MPI_Request *requests; + hypre_MPI_Status *status; - HYPRE_Complex **send_buffers; - HYPRE_Complex **recv_buffers; + HYPRE_Complex **send_buffers; + HYPRE_Complex **recv_buffers; - HYPRE_Complex **send_buffers_mpi; - HYPRE_Complex **recv_buffers_mpi; + void *send_buffers_mpi; + void *recv_buffers_mpi; + hypre_MemoryLocation send_buffers_mpi_location; + hypre_MemoryLocation recv_buffers_mpi_location; /* set = 0, add = 1 */ HYPRE_Int action; @@ -1043,17 +1045,19 @@ typedef struct hypre_CommHandle_struct * Accessor macros: hypre_CommHandle *--------------------------------------------------------------------------*/ -#define hypre_CommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) -#define hypre_CommHandleSendData(comm_handle) (comm_handle -> send_data) -#define hypre_CommHandleRecvData(comm_handle) (comm_handle -> recv_data) -#define hypre_CommHandleNumRequests(comm_handle) (comm_handle -> num_requests) -#define hypre_CommHandleRequests(comm_handle) (comm_handle -> requests) -#define hypre_CommHandleStatus(comm_handle) (comm_handle -> status) -#define hypre_CommHandleSendBuffers(comm_handle) (comm_handle -> send_buffers) -#define hypre_CommHandleRecvBuffers(comm_handle) (comm_handle -> recv_buffers) -#define hypre_CommHandleAction(comm_handle) (comm_handle -> action) -#define hypre_CommHandleSendBuffersMPI(comm_handle) (comm_handle -> send_buffers_mpi) -#define hypre_CommHandleRecvBuffersMPI(comm_handle) (comm_handle -> recv_buffers_mpi) +#define hypre_CommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) +#define hypre_CommHandleSendData(comm_handle) (comm_handle -> send_data) +#define hypre_CommHandleRecvData(comm_handle) (comm_handle -> recv_data) +#define hypre_CommHandleNumRequests(comm_handle) (comm_handle -> num_requests) +#define hypre_CommHandleRequests(comm_handle) (comm_handle -> requests) +#define hypre_CommHandleStatus(comm_handle) (comm_handle -> status) +#define hypre_CommHandleSendBuffers(comm_handle) (comm_handle -> send_buffers) +#define hypre_CommHandleRecvBuffers(comm_handle) (comm_handle -> recv_buffers) +#define hypre_CommHandleAction(comm_handle) (comm_handle -> action) +#define hypre_CommHandleSendBuffersMPI(comm_handle) (comm_handle -> send_buffers_mpi) +#define hypre_CommHandleRecvBuffersMPI(comm_handle) (comm_handle -> recv_buffers_mpi) +#define hypre_CommHandleSendBuffersMPILocation(comm_handle) (comm_handle -> send_buffers_mpi_location) +#define hypre_CommHandleRecvBuffersMPILocation(comm_handle) (comm_handle -> recv_buffers_mpi_location) #endif /****************************************************************************** diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index 0e3394de56..cd130d256b 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -805,6 +805,47 @@ hypre_StructCommunicationReleaseBuffer(HYPRE_Complex *buffer, return hypre_error_flag; } +HYPRE_Int +hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location, + HYPRE_Int num_send_elems, + HYPRE_Int num_recv_elems, + HYPRE_Int size_of_elem, + hypre_CommHandle *comm_handle ) +{ + hypre_CommPkg *comm_pkg = hypre_CommHandleCommPkg(comm_handle); + hypre_MemoryLocation send_memory_alocation = hypre_GetActualMemLocation(send_memory_location); + hypre_MemoryLocation recv_memory_alocation = hypre_GetActualMemLocation(recv_memory_location); + + if (!hypre_CommHandleSendBuffersMPI(comm_handle) && hypre_MPINeedHostBuffer(send_memory_alocation)) + { + hypre_MemoryLocation location = hypre_HandleMPIHostBufferLocation(hypre_handle()); + hypre_CommHandleSendBuffersMPILocation(comm_handle) = location; + hypre_CommHandleSendBuffersMPI(comm_handle) = _hypre_TAlloc(char, num_send_elems * size_of_elem, + location); + } + + if (!hypre_CommHandleRecvBuffersMPI(comm_handle) && hypre_MPINeedHostBuffer(recv_memory_alocation)) + { + hypre_MemoryLocation location = hypre_HandleMPIHostBufferLocation(hypre_handle()); + hypre_CommHandleRecvBuffersMPILocation(comm_handle) = location; + hypre_CommHandleRecvBuffersMPI(comm_handle) = _hypre_TAlloc(char, num_recv_elems * size_of_elem, + location); + } + + hypre_MPICommSetSendLocation(hypre_CommPkgComm(comm_pkg), send_memory_alocation); + hypre_MPICommSetSendBuffer(hypre_CommPkgComm(comm_pkg), hypre_CommHandleSendBuffersMPI(comm_handle)); + hypre_MPICommSetSendBufferLocation(hypre_CommPkgComm(comm_pkg), + hypre_CommHandleSendBuffersMPILocation(comm_handle)); + + hypre_MPICommSetRecvLocation(hypre_CommPkgComm(comm_pkg), recv_memory_alocation); + hypre_MPICommSetRecvBuffer(hypre_CommPkgComm(comm_pkg), hypre_CommHandleRecvBuffersMPI(comm_handle)); + hypre_MPICommSetRecvBufferLocation(hypre_CommPkgComm(comm_pkg), + hypre_CommHandleRecvBuffersMPILocation(comm_handle)); + + return hypre_error_flag; +} + /*-------------------------------------------------------------------------- * Initialize a non-blocking communication exchange. * @@ -824,7 +865,7 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Int tag, hypre_CommHandle **comm_handle_ptr ) { - hypre_CommHandle *comm_handle; + hypre_CommHandle *comm_handle = hypre_TAlloc(hypre_CommHandle, 1, HYPRE_MEMORY_HOST); HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); @@ -838,8 +879,8 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Complex **send_buffers; HYPRE_Complex **recv_buffers; - HYPRE_Complex **send_buffers_mpi; - HYPRE_Complex **recv_buffers_mpi; + HYPRE_Complex *send_buffers_mpi = NULL; + HYPRE_Complex *recv_buffers_mpi = NULL; hypre_CommType *comm_type, *from_type, *to_type; hypre_CommEntryType *comm_entry; @@ -857,6 +898,9 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); HYPRE_MemoryLocation memory_location_mpi = memory_location; + HYPRE_Int persistent = 0; + + hypre_CommHandleCommPkg(comm_handle) = comm_pkg; /*-------------------------------------------------------------------- * allocate requests and status @@ -960,6 +1004,48 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, } } + + for (i = 0; i < num_sends; i++) + { + comm_type = hypre_CommPkgSendType(comm_pkg, i); + num_entries = hypre_CommTypeNumEntries(comm_type); + + if ( hypre_CommPkgFirstComm(comm_pkg) ) + { + qptr = (HYPRE_Int *) (send_buffers[0] + (send_buffers[i] - send_buffers[0])); + hypre_TMemcpy(qptr, &num_entries, + HYPRE_Int, 1, memory_location_mpi, HYPRE_MEMORY_HOST); + qptr ++; + hypre_TMemcpy(qptr, hypre_CommTypeRemBoxnums(comm_type), + HYPRE_Int, num_entries, memory_location_mpi, HYPRE_MEMORY_HOST); + qptr += num_entries; + hypre_TMemcpy(qptr, hypre_CommTypeRemBoxes(comm_type), + hypre_Box, num_entries, memory_location_mpi, HYPRE_MEMORY_HOST); + hypre_CommTypeRemBoxnums(comm_type) = NULL; + hypre_CommTypeRemBoxes(comm_type) = NULL; + } + } + + { + memory_location_mpi = HYPRE_MEMORY_HOST; + + if (num_sends > 0) + { + size = hypre_CommPkgSendBufsize(comm_pkg); + send_buffers_mpi = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); + hypre_TMemcpy(send_buffers_mpi, send_buffers[0], HYPRE_Complex, size, HYPRE_MEMORY_HOST, + memory_location); + } + + if (num_recvs > 0) + { + size = hypre_CommPkgRecvBufsize(comm_pkg); + recv_buffers_mpi = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); + } + } + + +#if 0 #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) if (hypre_GetActualMemLocation(memory_location) != hypre_MEMORY_HOST) { @@ -968,130 +1054,127 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, #if defined(HYPRE_USING_GPU) hypre_ForceSyncComputeStream(); #endif - send_buffers_mpi = send_buffers; - recv_buffers_mpi = recv_buffers; + if (num_sends > 0) { send_buffers_mpi = send_buffers[0]; } + if (num_recvs > 0) { recv_buffers_mpi = recv_buffers[0]; } } else { memory_location_mpi = HYPRE_MEMORY_HOST; - send_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST); if (num_sends > 0) { size = hypre_CommPkgSendBufsize(comm_pkg); - send_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); - for (i = 1; i < num_sends; i++) - { - send_buffers_mpi[i] = send_buffers_mpi[i - 1] + (send_buffers[i] - send_buffers[i - 1]); - } - hypre_TMemcpy(send_buffers_mpi[0], send_buffers[0], HYPRE_Complex, size, HYPRE_MEMORY_HOST, + send_buffers_mpi = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); + hypre_TMemcpy(send_buffers_mpi, send_buffers[0], HYPRE_Complex, size, HYPRE_MEMORY_HOST, memory_location); } - recv_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_recvs, HYPRE_MEMORY_HOST); if (num_recvs > 0) { size = hypre_CommPkgRecvBufsize(comm_pkg); - recv_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); - for (i = 1; i < num_recvs; i++) - { - recv_buffers_mpi[i] = recv_buffers_mpi[i - 1] + (recv_buffers[i] - recv_buffers[i - 1]); - } + recv_buffers_mpi = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); } } } else #endif { - send_buffers_mpi = send_buffers; - recv_buffers_mpi = recv_buffers; - } - - for (i = 0; i < num_sends; i++) - { - comm_type = hypre_CommPkgSendType(comm_pkg, i); - num_entries = hypre_CommTypeNumEntries(comm_type); - - if ( hypre_CommPkgFirstComm(comm_pkg) ) - { - qptr = (HYPRE_Int *) send_buffers_mpi[i]; - hypre_TMemcpy(qptr, &num_entries, - HYPRE_Int, 1, memory_location_mpi, HYPRE_MEMORY_HOST); - qptr ++; - hypre_TMemcpy(qptr, hypre_CommTypeRemBoxnums(comm_type), - HYPRE_Int, num_entries, memory_location_mpi, HYPRE_MEMORY_HOST); - qptr += num_entries; - hypre_TMemcpy(qptr, hypre_CommTypeRemBoxes(comm_type), - hypre_Box, num_entries, memory_location_mpi, HYPRE_MEMORY_HOST); - hypre_CommTypeRemBoxnums(comm_type) = NULL; - hypre_CommTypeRemBoxes(comm_type) = NULL; - } + if (num_sends > 0) { send_buffers_mpi = send_buffers[0]; } + if (num_recvs > 0) { recv_buffers_mpi = recv_buffers[0]; } } +#endif /*-------------------------------------------------------------------- * post receives and initiate sends *--------------------------------------------------------------------*/ -#if 1 - HYPRE_Int persistent = 0; - HYPRE_Int num_extra_requests = persistent ? 2 : 1; - hypre_MPI_Request *extra_requests = hypre_CTAlloc(hypre_MPI_Request, num_extra_requests, HYPRE_MEMORY_HOST); + HYPRE_Int *displs_recv = NULL; + HYPRE_Int *procs_recv = NULL; + HYPRE_Int *counts_recv = NULL; + HYPRE_Int *displs_send = NULL; + HYPRE_Int *procs_send = NULL; + HYPRE_Int *counts_send = NULL; + HYPRE_Int num_extra_requests = 0; + hypre_MPI_Request *extra_requests = NULL; + + if (num_recvs) { - HYPRE_Int displs = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); - HYPRE_Int procs = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); + displs_recv = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); + procs_recv = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); + counts_recv = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); for (i = 0; i < num_recvs; i++) { comm_type = hypre_CommPkgRecvType(comm_pkg, i); - displs[i] = hypre_CommTypeBufsize(comm_type) * sizeof(HYPRE_Complex); - procs[i] = hypre_CommTypeProc(comm_type); + counts_recv[i] = hypre_CommTypeBufsize(comm_type) * sizeof(HYPRE_Complex); + displs_recv[i] = (recv_buffers[i] - recv_buffers[0]) * sizeof(HYPRE_Complex); + procs_recv[i] = hypre_CommTypeProc(comm_type); } - hypre_MPI_Irecv_Multiple(recv_buffers_mpi[0], - num_recvs, - displs, - NULL, - hypre_MPI_BYTE, - procs, - tag, - comm, - requests, - extra_requests); - hypre_TFree(displs, HYPRE_MEMORY_HOST); - hypre_TFree(procs, HYPRE_MEMORY_HOST); + num_extra_requests = persistent ? 2 : 1; + extra_requests = hypre_CTAlloc(hypre_MPI_Request, num_extra_requests, HYPRE_MEMORY_HOST); } - hypre_TFree(extra_requests, HYPRE_MEMORY_HOST); -#else - j = 0; - for (i = 0; i < num_recvs; i++) + + if (num_sends) { - comm_type = hypre_CommPkgRecvType(comm_pkg, i); - hypre_MPI_Irecv(recv_buffers_mpi[i], - hypre_CommTypeBufsize(comm_type)*sizeof(HYPRE_Complex), - hypre_MPI_BYTE, hypre_CommTypeProc(comm_type), - tag, comm, &requests[j++]); - if ( hypre_CommPkgFirstComm(comm_pkg) ) + displs_send = hypre_CTAlloc(HYPRE_Int, num_sends, HYPRE_MEMORY_HOST); + procs_send = hypre_CTAlloc(HYPRE_Int, num_sends, HYPRE_MEMORY_HOST); + counts_send = hypre_CTAlloc(HYPRE_Int, num_sends, HYPRE_MEMORY_HOST); + for (i = 0; i < num_sends; i++) + { + comm_type = hypre_CommPkgSendType(comm_pkg, i); + counts_send[i] = hypre_CommTypeBufsize(comm_type) * sizeof(HYPRE_Complex); + displs_send[i] = (send_buffers[i] - send_buffers[0]) * sizeof(HYPRE_Complex); + procs_send[i] = hypre_CommTypeProc(comm_type); + } + } + + hypre_MPI_Irecv_Multiple(recv_buffers_mpi, + num_recvs, + displs_recv, + counts_recv, + hypre_MPI_BYTE, + procs_recv, + tag, + comm, + requests, + extra_requests); + + hypre_MPI_Isend_Multiple(send_buffers_mpi, + num_sends, + displs_send, + counts_send, + hypre_MPI_BYTE, + procs_send, + tag, + comm, + requests + num_recvs); + + if ( hypre_CommPkgFirstComm(comm_pkg) ) + { + for (i = 0; i < num_recvs; i++) { + comm_type = hypre_CommPkgRecvType(comm_pkg, i); size = hypre_CommPrefixSize(hypre_CommTypeNumEntries(comm_type)); hypre_CommTypeBufsize(comm_type) -= size; hypre_CommPkgRecvBufsize(comm_pkg) -= size; } - } -#endif - for (i = 0; i < num_sends; i++) - { - comm_type = hypre_CommPkgSendType(comm_pkg, i); - hypre_MPI_Isend(send_buffers_mpi[i], - hypre_CommTypeBufsize(comm_type)*sizeof(HYPRE_Complex), - hypre_MPI_BYTE, hypre_CommTypeProc(comm_type), - tag, comm, &requests[j++]); - if ( hypre_CommPkgFirstComm(comm_pkg) ) + for (i = 0; i < num_sends; i++) { + comm_type = hypre_CommPkgSendType(comm_pkg, i); size = hypre_CommPrefixSize(hypre_CommTypeNumEntries(comm_type)); hypre_CommTypeBufsize(comm_type) -= size; hypre_CommPkgSendBufsize(comm_pkg) -= size; } } + hypre_TFree(displs_recv, HYPRE_MEMORY_HOST); + hypre_TFree(counts_recv, HYPRE_MEMORY_HOST); + hypre_TFree(procs_recv, HYPRE_MEMORY_HOST); + hypre_TFree(displs_send, HYPRE_MEMORY_HOST); + hypre_TFree(counts_send, HYPRE_MEMORY_HOST); + hypre_TFree(procs_send, HYPRE_MEMORY_HOST); + hypre_TFree(extra_requests, HYPRE_MEMORY_HOST); + /*-------------------------------------------------------------------- * set up CopyToType and exchange local data *--------------------------------------------------------------------*/ @@ -1123,9 +1206,6 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, * set up comm_handle and return *--------------------------------------------------------------------*/ - comm_handle = hypre_TAlloc(hypre_CommHandle, 1, HYPRE_MEMORY_HOST); - - hypre_CommHandleCommPkg(comm_handle) = comm_pkg; hypre_CommHandleSendData(comm_handle) = send_data; hypre_CommHandleRecvData(comm_handle) = recv_data; hypre_CommHandleNumRequests(comm_handle) = num_requests; @@ -1156,8 +1236,8 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_CommPkg *comm_pkg = hypre_CommHandleCommPkg(comm_handle); HYPRE_Complex **send_buffers = hypre_CommHandleSendBuffers(comm_handle); HYPRE_Complex **recv_buffers = hypre_CommHandleRecvBuffers(comm_handle); - HYPRE_Complex **send_buffers_mpi = hypre_CommHandleSendBuffersMPI(comm_handle); - HYPRE_Complex **recv_buffers_mpi = hypre_CommHandleRecvBuffersMPI(comm_handle); + HYPRE_Complex *send_buffers_mpi = hypre_CommHandleSendBuffersMPI(comm_handle); + HYPRE_Complex *recv_buffers_mpi = hypre_CommHandleRecvBuffersMPI(comm_handle); HYPRE_Int action = hypre_CommHandleAction(comm_handle); HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); @@ -1216,7 +1296,7 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) { comm_type = hypre_CommPkgRecvType(comm_pkg, i); - qptr = (HYPRE_Int *) recv_buffers_mpi[i]; + qptr = (HYPRE_Int *) (recv_buffers_mpi + (recv_buffers[i] - recv_buffers[0])); hypre_TMemcpy(&hypre_CommTypeNumEntries(comm_type), qptr, HYPRE_Int, 1, HYPRE_MEMORY_HOST, memory_location_mpi); @@ -1237,7 +1317,7 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) num_entries = hypre_CommTypeNumEntries(comm_type); ct_entries += num_entries; - qptr = (HYPRE_Int *) recv_buffers_mpi[i]; + qptr = (HYPRE_Int *) (recv_buffers_mpi + (recv_buffers[i] - recv_buffers[0])); qptr++; /* Set boxnums and boxes from MPI recv buffer */ @@ -1281,18 +1361,15 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) *--------------------------------------------------------------------*/ /* Note: hypre_CommPkgRecvBufsize is different in the first comm */ - if (recv_buffers != recv_buffers_mpi) + if (num_recvs > 0 && recv_buffers[0] != recv_buffers_mpi) { - if (num_recvs > 0) - { - HYPRE_Int recv_buf_size; + HYPRE_Int recv_buf_size; - recv_buf_size = hypre_CommPkgFirstComm(comm_pkg) ? hypre_CommPkgRecvBufsizeFirstComm(comm_pkg) : - hypre_CommPkgRecvBufsize(comm_pkg); + recv_buf_size = hypre_CommPkgFirstComm(comm_pkg) ? hypre_CommPkgRecvBufsizeFirstComm(comm_pkg) : + hypre_CommPkgRecvBufsize(comm_pkg); - hypre_TMemcpy(recv_buffers[0], recv_buffers_mpi[0], HYPRE_Complex, recv_buf_size, - memory_location, memory_location_mpi); - } + hypre_TMemcpy(recv_buffers[0], recv_buffers_mpi, HYPRE_Complex, recv_buf_size, + memory_location, memory_location_mpi); } for (i = 0; i < num_recvs; i++) @@ -1369,15 +1446,13 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); - if (send_buffers != send_buffers_mpi) + if (num_sends > 0 && send_buffers[0] != send_buffers_mpi) { - hypre_TFree(send_buffers_mpi[0], memory_location_mpi); - hypre_TFree(send_buffers_mpi, HYPRE_MEMORY_HOST); + hypre_TFree(send_buffers_mpi, memory_location_mpi); } - if (recv_buffers != recv_buffers_mpi) + if (num_recvs > 0 && recv_buffers[0] != recv_buffers_mpi) { - hypre_TFree(recv_buffers_mpi[0], memory_location_mpi); - hypre_TFree(recv_buffers_mpi, HYPRE_MEMORY_HOST); + hypre_TFree(recv_buffers_mpi, memory_location_mpi); } hypre_TFree(send_buffers, HYPRE_MEMORY_HOST); diff --git a/src/struct_mv/struct_communication.h b/src/struct_mv/struct_communication.h index fa5ddf1af7..9b32e7213e 100644 --- a/src/struct_mv/struct_communication.h +++ b/src/struct_mv/struct_communication.h @@ -131,19 +131,21 @@ typedef struct hypre_CommPkg_struct typedef struct hypre_CommHandle_struct { - hypre_CommPkg *comm_pkg; - HYPRE_Complex *send_data; - HYPRE_Complex *recv_data; + hypre_CommPkg *comm_pkg; + HYPRE_Complex *send_data; + HYPRE_Complex *recv_data; - HYPRE_Int num_requests; - hypre_MPI_Request *requests; - hypre_MPI_Status *status; + HYPRE_Int num_requests; + hypre_MPI_Request *requests; + hypre_MPI_Status *status; - HYPRE_Complex **send_buffers; - HYPRE_Complex **recv_buffers; + HYPRE_Complex **send_buffers; + HYPRE_Complex **recv_buffers; - HYPRE_Complex **send_buffers_mpi; - HYPRE_Complex **recv_buffers_mpi; + void *send_buffers_mpi; + void *recv_buffers_mpi; + hypre_MemoryLocation send_buffers_mpi_location; + hypre_MemoryLocation recv_buffers_mpi_location; /* set = 0, add = 1 */ HYPRE_Int action; @@ -245,16 +247,18 @@ typedef struct hypre_CommHandle_struct * Accessor macros: hypre_CommHandle *--------------------------------------------------------------------------*/ -#define hypre_CommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) -#define hypre_CommHandleSendData(comm_handle) (comm_handle -> send_data) -#define hypre_CommHandleRecvData(comm_handle) (comm_handle -> recv_data) -#define hypre_CommHandleNumRequests(comm_handle) (comm_handle -> num_requests) -#define hypre_CommHandleRequests(comm_handle) (comm_handle -> requests) -#define hypre_CommHandleStatus(comm_handle) (comm_handle -> status) -#define hypre_CommHandleSendBuffers(comm_handle) (comm_handle -> send_buffers) -#define hypre_CommHandleRecvBuffers(comm_handle) (comm_handle -> recv_buffers) -#define hypre_CommHandleAction(comm_handle) (comm_handle -> action) -#define hypre_CommHandleSendBuffersMPI(comm_handle) (comm_handle -> send_buffers_mpi) -#define hypre_CommHandleRecvBuffersMPI(comm_handle) (comm_handle -> recv_buffers_mpi) +#define hypre_CommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) +#define hypre_CommHandleSendData(comm_handle) (comm_handle -> send_data) +#define hypre_CommHandleRecvData(comm_handle) (comm_handle -> recv_data) +#define hypre_CommHandleNumRequests(comm_handle) (comm_handle -> num_requests) +#define hypre_CommHandleRequests(comm_handle) (comm_handle -> requests) +#define hypre_CommHandleStatus(comm_handle) (comm_handle -> status) +#define hypre_CommHandleSendBuffers(comm_handle) (comm_handle -> send_buffers) +#define hypre_CommHandleRecvBuffers(comm_handle) (comm_handle -> recv_buffers) +#define hypre_CommHandleAction(comm_handle) (comm_handle -> action) +#define hypre_CommHandleSendBuffersMPI(comm_handle) (comm_handle -> send_buffers_mpi) +#define hypre_CommHandleRecvBuffersMPI(comm_handle) (comm_handle -> recv_buffers_mpi) +#define hypre_CommHandleSendBuffersMPILocation(comm_handle) (comm_handle -> send_buffers_mpi_location) +#define hypre_CommHandleRecvBuffersMPILocation(comm_handle) (comm_handle -> recv_buffers_mpi_location) #endif diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 6800b65864..278fea5f3f 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -14,170 +14,6 @@ extern "C" { #endif -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - -/****************************************************************************** - * - * General structures and values - * - *****************************************************************************/ - -#ifndef HYPRE_HANDLE_H -#define HYPRE_HANDLE_H - -#if defined(HYPRE_USING_UMPIRE) -#include "umpire/config.hpp" -#if UMPIRE_VERSION_MAJOR >= 2022 -#include "umpire/interface/c_fortran/umpire.h" -#define hypre_umpire_resourcemanager_make_allocator_pool umpire_resourcemanager_make_allocator_quick_pool -#else -#include "umpire/interface/umpire.h" -#define hypre_umpire_resourcemanager_make_allocator_pool umpire_resourcemanager_make_allocator_pool -#endif /* UMPIRE_VERSION_MAJOR >= 2022 */ -#define HYPRE_UMPIRE_POOL_NAME_MAX_LEN 1024 -#endif /* defined(HYPRE_USING_UMPIRE) */ - -struct hypre_DeviceData; -typedef struct hypre_DeviceData hypre_DeviceData; -typedef void (*GPUMallocFunc)(void **, size_t); -typedef void (*GPUMfreeFunc)(void *); - -#define HYPRE_MAX_NUM_COMM_KEYS 8 - -typedef struct -{ - HYPRE_Int log_level; - HYPRE_Int hypre_error; - HYPRE_MemoryLocation memory_location; - HYPRE_ExecutionPolicy default_exec_policy; - - /* the device buffers needed to do MPI communication for struct comm */ - HYPRE_Complex *struct_comm_recv_buffer; - HYPRE_Complex *struct_comm_send_buffer; - HYPRE_Int struct_comm_recv_buffer_size; - HYPRE_Int struct_comm_send_buffer_size; - - /* MPI */ - HYPRE_Int comm_keys[HYPRE_MAX_NUM_COMM_KEYS]; - -#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) - HYPRE_Int use_gpu_aware_mpi; -#endif - - hypre_MemoryLocation mpi_host_buffer_location; - -#if defined(HYPRE_USING_GPU) - hypre_DeviceData *device_data; - HYPRE_Int device_gs_method; /* device G-S options */ -#endif - - /* user malloc/free function pointers */ - GPUMallocFunc user_device_malloc; - GPUMfreeFunc user_device_free; - -#if defined(HYPRE_USING_UMPIRE) - char umpire_device_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; - char umpire_um_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; - char umpire_host_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; - char umpire_pinned_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; - size_t umpire_device_pool_size; - size_t umpire_um_pool_size; - size_t umpire_host_pool_size; - size_t umpire_pinned_pool_size; - size_t umpire_block_size; - HYPRE_Int own_umpire_device_pool; - HYPRE_Int own_umpire_um_pool; - HYPRE_Int own_umpire_host_pool; - HYPRE_Int own_umpire_pinned_pool; - umpire_resourcemanager umpire_rm; -#endif - -#if defined(HYPRE_USING_MAGMA) - magma_queue_t magma_queue; -#endif -} hypre_Handle; - -/* accessor macros to hypre_Handle */ -#define hypre_HandleLogLevel(hypre_handle) ((hypre_handle) -> log_level) -#define hypre_HandleMemoryLocation(hypre_handle) ((hypre_handle) -> memory_location) -#define hypre_HandleDefaultExecPolicy(hypre_handle) ((hypre_handle) -> default_exec_policy) - -#define hypre_HandleStructCommRecvBuffer(hypre_handle) ((hypre_handle) -> struct_comm_recv_buffer) -#define hypre_HandleStructCommSendBuffer(hypre_handle) ((hypre_handle) -> struct_comm_send_buffer) -#define hypre_HandleStructCommRecvBufferSize(hypre_handle) ((hypre_handle) -> struct_comm_recv_buffer_size) -#define hypre_HandleStructCommSendBufferSize(hypre_handle) ((hypre_handle) -> struct_comm_send_buffer_size) - -#define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) -#define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) - -#define hypre_HandleMPICommKeys(hypre_handle) ((hypre_handle) -> comm_keys) -#define hypre_HandleMPICommKeySendLocation(hypre_handle) ((hypre_handle) -> comm_keys[0]) -#define hypre_HandleMPICommKeyRecvLocation(hypre_handle) ((hypre_handle) -> comm_keys[1]) -#define hypre_HandleMPICommKeySendBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[2]) -#define hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[3]) -#define hypre_HandleMPICommKeySendBuffer(hypre_handle) ((hypre_handle) -> comm_keys[4]) -#define hypre_HandleMPICommKeyRecvBuffer(hypre_handle) ((hypre_handle) -> comm_keys[5]) - -#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) -#define hypre_HandleMPIHostBufferLocation(hypre_handle) ((hypre_handle) -> mpi_host_buffer_location) - -#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCusparseHandle(hypre_handle) hypre_DeviceDataCusparseHandle(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleVendorSolverHandle(hypre_handle) hypre_DeviceDataVendorSolverHandle(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleComputeStream(hypre_handle) hypre_DeviceDataComputeStream(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubBinGrowth(hypre_handle) hypre_DeviceDataCubBinGrowth(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubMinBin(hypre_handle) hypre_DeviceDataCubMinBin(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubMaxBin(hypre_handle) hypre_DeviceDataCubMaxBin(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubMaxCachedBytes(hypre_handle) hypre_DeviceDataCubMaxCachedBytes(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubDevAllocator(hypre_handle) hypre_DeviceDataCubDevAllocator(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubUvmAllocator(hypre_handle) hypre_DeviceDataCubUvmAllocator(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleDevice(hypre_handle) hypre_DeviceDataDevice(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleDeviceMaxWorkGroupSize(hypre_handle) hypre_DeviceDataDeviceMaxWorkGroupSize(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleDeviceMaxShmemPerBlock(hypre_handle) hypre_DeviceDataDeviceMaxShmemPerBlock(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleDeviceMaxShmemPerBlockInited(hypre_handle) hypre_DeviceDataDeviceMaxShmemPerBlockInited(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleComputeStreamNum(hypre_handle) hypre_DeviceDataComputeStreamNum(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleReduceBuffer(hypre_handle) hypre_DeviceDataReduceBuffer(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmUseVendor(hypre_handle) hypre_DeviceDataSpgemmUseVendor(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpMVUseVendor(hypre_handle) hypre_DeviceDataSpMVUseVendor(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpTransUseVendor(hypre_handle) hypre_DeviceDataSpTransUseVendor(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmAlgorithm(hypre_handle) hypre_DeviceDataSpgemmAlgorithm(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmBinned(hypre_handle) hypre_DeviceDataSpgemmBinned(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmNumBin(hypre_handle) hypre_DeviceDataSpgemmNumBin(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmHighestBin(hypre_handle) hypre_DeviceDataSpgemmHighestBin(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmBlockNumDim(hypre_handle) hypre_DeviceDataSpgemmBlockNumDim(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmRownnzEstimateMethod(hypre_handle) hypre_DeviceDataSpgemmRownnzEstimateMethod(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmRownnzEstimateNsamples(hypre_handle) hypre_DeviceDataSpgemmRownnzEstimateNsamples(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmRownnzEstimateMultFactor(hypre_handle) hypre_DeviceDataSpgemmRownnzEstimateMultFactor(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleDeviceAllocator(hypre_handle) hypre_DeviceDataDeviceAllocator(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleUseGpuRand(hypre_handle) hypre_DeviceDataUseGpuRand(hypre_HandleDeviceData(hypre_handle)) - -#define hypre_HandleUserDeviceMalloc(hypre_handle) ((hypre_handle) -> user_device_malloc) -#define hypre_HandleUserDeviceMfree(hypre_handle) ((hypre_handle) -> user_device_free) - -#define hypre_HandleUmpireResourceMan(hypre_handle) ((hypre_handle) -> umpire_rm) -#define hypre_HandleUmpireDevicePoolSize(hypre_handle) ((hypre_handle) -> umpire_device_pool_size) -#define hypre_HandleUmpireUMPoolSize(hypre_handle) ((hypre_handle) -> umpire_um_pool_size) -#define hypre_HandleUmpireHostPoolSize(hypre_handle) ((hypre_handle) -> umpire_host_pool_size) -#define hypre_HandleUmpirePinnedPoolSize(hypre_handle) ((hypre_handle) -> umpire_pinned_pool_size) -#define hypre_HandleUmpireBlockSize(hypre_handle) ((hypre_handle) -> umpire_block_size) -#define hypre_HandleUmpireDevicePoolName(hypre_handle) ((hypre_handle) -> umpire_device_pool_name) -#define hypre_HandleUmpireUMPoolName(hypre_handle) ((hypre_handle) -> umpire_um_pool_name) -#define hypre_HandleUmpireHostPoolName(hypre_handle) ((hypre_handle) -> umpire_host_pool_name) -#define hypre_HandleUmpirePinnedPoolName(hypre_handle) ((hypre_handle) -> umpire_pinned_pool_name) -#define hypre_HandleOwnUmpireDevicePool(hypre_handle) ((hypre_handle) -> own_umpire_device_pool) -#define hypre_HandleOwnUmpireUMPool(hypre_handle) ((hypre_handle) -> own_umpire_um_pool) -#define hypre_HandleOwnUmpireHostPool(hypre_handle) ((hypre_handle) -> own_umpire_host_pool) -#define hypre_HandleOwnUmpirePinnedPool(hypre_handle) ((hypre_handle) -> own_umpire_pinned_pool) - -#define hypre_HandleMagmaQueue(hypre_handle) ((hypre_handle) -> magma_queue) - -#endif /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -901,70 +737,175 @@ hypre_GetActualMemLocation(HYPRE_MemoryLocation location) #endif /* #if !defined(HYPRE_USING_MEMORY_TRACKER) */ +#ifdef __cplusplus +} +#endif -/*-------------------------------------------------------------------------- - * Prototypes - *--------------------------------------------------------------------------*/ +#endif /* hypre_MEMORY_HEADER */ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ -/* memory.c */ -HYPRE_Int hypre_GetMemoryLocationName(hypre_MemoryLocation memory_location, - char *memory_location_name); -void hypre_CheckMemoryLocation(void *ptr, hypre_MemoryLocation location); -void * hypre_Memset(void *ptr, HYPRE_Int value, size_t num, HYPRE_MemoryLocation location); -void hypre_MemPrefetch(void *ptr, size_t size, HYPRE_MemoryLocation location); -void * hypre_MAlloc(size_t size, HYPRE_MemoryLocation location); -void * hypre_CAlloc( size_t count, size_t elt_size, HYPRE_MemoryLocation location); -void hypre_Free(void *ptr, HYPRE_MemoryLocation location); -void hypre_Memcpy(void *dst, void *src, size_t size, HYPRE_MemoryLocation loc_dst, - HYPRE_MemoryLocation loc_src); -void * hypre_ReAlloc(void *ptr, size_t size, HYPRE_MemoryLocation location); -void * hypre_ReAlloc_v2(void *ptr, size_t old_size, size_t new_size, HYPRE_MemoryLocation location); +/****************************************************************************** + * + * General structures and values + * + *****************************************************************************/ -void * _hypre_MAlloc(size_t size, hypre_MemoryLocation location); -void _hypre_Free(void *ptr, hypre_MemoryLocation location); -void _hypre_Memcpy(void *dst, void *src, size_t size, hypre_MemoryLocation loc_dst, - hypre_MemoryLocation loc_src); +#ifndef HYPRE_HANDLE_H +#define HYPRE_HANDLE_H -HYPRE_ExecutionPolicy hypre_GetExecPolicy1(HYPRE_MemoryLocation location); -HYPRE_ExecutionPolicy hypre_GetExecPolicy2(HYPRE_MemoryLocation location1, - HYPRE_MemoryLocation location2); +#if defined(HYPRE_USING_UMPIRE) +#include "umpire/config.hpp" +#if UMPIRE_VERSION_MAJOR >= 2022 +#include "umpire/interface/c_fortran/umpire.h" +#define hypre_umpire_resourcemanager_make_allocator_pool umpire_resourcemanager_make_allocator_quick_pool +#else +#include "umpire/interface/umpire.h" +#define hypre_umpire_resourcemanager_make_allocator_pool umpire_resourcemanager_make_allocator_pool +#endif /* UMPIRE_VERSION_MAJOR >= 2022 */ +#define HYPRE_UMPIRE_POOL_NAME_MAX_LEN 1024 +#endif /* defined(HYPRE_USING_UMPIRE) */ -HYPRE_Int hypre_GetPointerLocation(const void *ptr, hypre_MemoryLocation *memory_location); -HYPRE_Int hypre_SetCubMemPoolSize( hypre_uint bin_growth, hypre_uint min_bin, hypre_uint max_bin, - size_t max_cached_bytes ); -HYPRE_Int hypre_umpire_host_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_host_pooled_free(void *ptr); -void *hypre_umpire_host_pooled_realloc(void *ptr, size_t size); -HYPRE_Int hypre_umpire_device_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_device_pooled_free(void *ptr); -HYPRE_Int hypre_umpire_um_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_um_pooled_free(void *ptr); -HYPRE_Int hypre_umpire_pinned_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_pinned_pooled_free(void *ptr); -HYPRE_Int hypre_UmpireInit(hypre_Handle *hypre_handle_); -HYPRE_Int hypre_UmpireFinalize(hypre_Handle *hypre_handle_); -HYPRE_Int hypre_UmpireGetCurrentMemoryUsage(MPI_Comm comm, HYPRE_Real *current); -HYPRE_Int hypre_UmpireMemoryGetUsage(HYPRE_Real *memory); -HYPRE_Int hypre_HostMemoryGetUsage(HYPRE_Real *mem); -HYPRE_Int hypre_MemoryPrintUsage(MPI_Comm comm, HYPRE_Int level, - const char *function, HYPRE_Int line); -#define HYPRE_PRINT_MEMORY_USAGE(comm) hypre_MemoryPrintUsage(comm,\ - hypre_HandleLogLevel(hypre_handle()),\ - __func__,\ - __LINE__) -/* memory_dmalloc.c */ -HYPRE_Int hypre_InitMemoryDebugDML( HYPRE_Int id ); -HYPRE_Int hypre_FinalizeMemoryDebugDML( void ); -char *hypre_MAllocDML( HYPRE_Int size, char *file, HYPRE_Int line ); -char *hypre_CAllocDML( HYPRE_Int count, HYPRE_Int elt_size, char *file, HYPRE_Int line ); -char *hypre_ReAllocDML( char *ptr, HYPRE_Int size, char *file, HYPRE_Int line ); -void hypre_FreeDML( char *ptr, char *file, HYPRE_Int line ); +struct hypre_DeviceData; +typedef struct hypre_DeviceData hypre_DeviceData; +typedef void (*GPUMallocFunc)(void **, size_t); +typedef void (*GPUMfreeFunc)(void *); -#ifdef __cplusplus -} +#define HYPRE_MAX_NUM_COMM_KEYS 8 + +typedef struct +{ + HYPRE_Int log_level; + HYPRE_Int hypre_error; + HYPRE_MemoryLocation memory_location; + HYPRE_ExecutionPolicy default_exec_policy; + + /* the device buffers needed to do MPI communication for struct comm */ + HYPRE_Complex *struct_comm_recv_buffer; + HYPRE_Complex *struct_comm_send_buffer; + HYPRE_Int struct_comm_recv_buffer_size; + HYPRE_Int struct_comm_send_buffer_size; + + /* MPI */ + HYPRE_Int comm_keys[HYPRE_MAX_NUM_COMM_KEYS]; + +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_Int use_gpu_aware_mpi; #endif -#endif /* hypre_MEMORY_HEADER */ + hypre_MemoryLocation mpi_host_buffer_location; + +#if defined(HYPRE_USING_GPU) + hypre_DeviceData *device_data; + HYPRE_Int device_gs_method; /* device G-S options */ +#endif + + /* user malloc/free function pointers */ + GPUMallocFunc user_device_malloc; + GPUMfreeFunc user_device_free; + +#if defined(HYPRE_USING_UMPIRE) + char umpire_device_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; + char umpire_um_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; + char umpire_host_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; + char umpire_pinned_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; + size_t umpire_device_pool_size; + size_t umpire_um_pool_size; + size_t umpire_host_pool_size; + size_t umpire_pinned_pool_size; + size_t umpire_block_size; + HYPRE_Int own_umpire_device_pool; + HYPRE_Int own_umpire_um_pool; + HYPRE_Int own_umpire_host_pool; + HYPRE_Int own_umpire_pinned_pool; + umpire_resourcemanager umpire_rm; +#endif + +#if defined(HYPRE_USING_MAGMA) + magma_queue_t magma_queue; +#endif +} hypre_Handle; + +/* accessor macros to hypre_Handle */ +#define hypre_HandleLogLevel(hypre_handle) ((hypre_handle) -> log_level) +#define hypre_HandleMemoryLocation(hypre_handle) ((hypre_handle) -> memory_location) +#define hypre_HandleDefaultExecPolicy(hypre_handle) ((hypre_handle) -> default_exec_policy) + +#define hypre_HandleStructCommRecvBuffer(hypre_handle) ((hypre_handle) -> struct_comm_recv_buffer) +#define hypre_HandleStructCommSendBuffer(hypre_handle) ((hypre_handle) -> struct_comm_send_buffer) +#define hypre_HandleStructCommRecvBufferSize(hypre_handle) ((hypre_handle) -> struct_comm_recv_buffer_size) +#define hypre_HandleStructCommSendBufferSize(hypre_handle) ((hypre_handle) -> struct_comm_send_buffer_size) + +#define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) +#define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) + +#define hypre_HandleMPICommKeys(hypre_handle) ((hypre_handle) -> comm_keys) +#define hypre_HandleMPICommKeySendLocation(hypre_handle) ((hypre_handle) -> comm_keys[0]) +#define hypre_HandleMPICommKeyRecvLocation(hypre_handle) ((hypre_handle) -> comm_keys[1]) +#define hypre_HandleMPICommKeySendBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[2]) +#define hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[3]) +#define hypre_HandleMPICommKeySendBuffer(hypre_handle) ((hypre_handle) -> comm_keys[4]) +#define hypre_HandleMPICommKeyRecvBuffer(hypre_handle) ((hypre_handle) -> comm_keys[5]) + +#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) +#define hypre_HandleMPIHostBufferLocation(hypre_handle) ((hypre_handle) -> mpi_host_buffer_location) + +#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleCusparseHandle(hypre_handle) hypre_DeviceDataCusparseHandle(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleVendorSolverHandle(hypre_handle) hypre_DeviceDataVendorSolverHandle(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleComputeStream(hypre_handle) hypre_DeviceDataComputeStream(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleCubBinGrowth(hypre_handle) hypre_DeviceDataCubBinGrowth(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleCubMinBin(hypre_handle) hypre_DeviceDataCubMinBin(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleCubMaxBin(hypre_handle) hypre_DeviceDataCubMaxBin(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleCubMaxCachedBytes(hypre_handle) hypre_DeviceDataCubMaxCachedBytes(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleCubDevAllocator(hypre_handle) hypre_DeviceDataCubDevAllocator(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleCubUvmAllocator(hypre_handle) hypre_DeviceDataCubUvmAllocator(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleDevice(hypre_handle) hypre_DeviceDataDevice(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleDeviceMaxWorkGroupSize(hypre_handle) hypre_DeviceDataDeviceMaxWorkGroupSize(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleDeviceMaxShmemPerBlock(hypre_handle) hypre_DeviceDataDeviceMaxShmemPerBlock(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleDeviceMaxShmemPerBlockInited(hypre_handle) hypre_DeviceDataDeviceMaxShmemPerBlockInited(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleComputeStreamNum(hypre_handle) hypre_DeviceDataComputeStreamNum(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleReduceBuffer(hypre_handle) hypre_DeviceDataReduceBuffer(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmUseVendor(hypre_handle) hypre_DeviceDataSpgemmUseVendor(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpMVUseVendor(hypre_handle) hypre_DeviceDataSpMVUseVendor(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpTransUseVendor(hypre_handle) hypre_DeviceDataSpTransUseVendor(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmAlgorithm(hypre_handle) hypre_DeviceDataSpgemmAlgorithm(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmBinned(hypre_handle) hypre_DeviceDataSpgemmBinned(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmNumBin(hypre_handle) hypre_DeviceDataSpgemmNumBin(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmHighestBin(hypre_handle) hypre_DeviceDataSpgemmHighestBin(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmBlockNumDim(hypre_handle) hypre_DeviceDataSpgemmBlockNumDim(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmRownnzEstimateMethod(hypre_handle) hypre_DeviceDataSpgemmRownnzEstimateMethod(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmRownnzEstimateNsamples(hypre_handle) hypre_DeviceDataSpgemmRownnzEstimateNsamples(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmRownnzEstimateMultFactor(hypre_handle) hypre_DeviceDataSpgemmRownnzEstimateMultFactor(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleDeviceAllocator(hypre_handle) hypre_DeviceDataDeviceAllocator(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleUseGpuRand(hypre_handle) hypre_DeviceDataUseGpuRand(hypre_HandleDeviceData(hypre_handle)) + +#define hypre_HandleUserDeviceMalloc(hypre_handle) ((hypre_handle) -> user_device_malloc) +#define hypre_HandleUserDeviceMfree(hypre_handle) ((hypre_handle) -> user_device_free) + +#define hypre_HandleUmpireResourceMan(hypre_handle) ((hypre_handle) -> umpire_rm) +#define hypre_HandleUmpireDevicePoolSize(hypre_handle) ((hypre_handle) -> umpire_device_pool_size) +#define hypre_HandleUmpireUMPoolSize(hypre_handle) ((hypre_handle) -> umpire_um_pool_size) +#define hypre_HandleUmpireHostPoolSize(hypre_handle) ((hypre_handle) -> umpire_host_pool_size) +#define hypre_HandleUmpirePinnedPoolSize(hypre_handle) ((hypre_handle) -> umpire_pinned_pool_size) +#define hypre_HandleUmpireBlockSize(hypre_handle) ((hypre_handle) -> umpire_block_size) +#define hypre_HandleUmpireDevicePoolName(hypre_handle) ((hypre_handle) -> umpire_device_pool_name) +#define hypre_HandleUmpireUMPoolName(hypre_handle) ((hypre_handle) -> umpire_um_pool_name) +#define hypre_HandleUmpireHostPoolName(hypre_handle) ((hypre_handle) -> umpire_host_pool_name) +#define hypre_HandleUmpirePinnedPoolName(hypre_handle) ((hypre_handle) -> umpire_pinned_pool_name) +#define hypre_HandleOwnUmpireDevicePool(hypre_handle) ((hypre_handle) -> own_umpire_device_pool) +#define hypre_HandleOwnUmpireUMPool(hypre_handle) ((hypre_handle) -> own_umpire_um_pool) +#define hypre_HandleOwnUmpireHostPool(hypre_handle) ((hypre_handle) -> own_umpire_host_pool) +#define hypre_HandleOwnUmpirePinnedPool(hypre_handle) ((hypre_handle) -> own_umpire_pinned_pool) + +#define hypre_HandleMagmaQueue(hypre_handle) ((hypre_handle) -> magma_queue) + +#endif /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -2623,6 +2564,60 @@ HYPRE_Int hypre_IntArraySeparateByValueDevice( HYPRE_Int num_values, HYPRE_Int * hypre_IntArrayArray *w ); #endif +/* memory.c */ +HYPRE_Int hypre_GetMemoryLocationName(hypre_MemoryLocation memory_location, + char *memory_location_name); +void hypre_CheckMemoryLocation(void *ptr, hypre_MemoryLocation location); +void * hypre_Memset(void *ptr, HYPRE_Int value, size_t num, HYPRE_MemoryLocation location); +void hypre_MemPrefetch(void *ptr, size_t size, HYPRE_MemoryLocation location); +void * hypre_MAlloc(size_t size, HYPRE_MemoryLocation location); +void * hypre_CAlloc( size_t count, size_t elt_size, HYPRE_MemoryLocation location); +void hypre_Free(void *ptr, HYPRE_MemoryLocation location); +void hypre_Memcpy(void *dst, void *src, size_t size, HYPRE_MemoryLocation loc_dst, + HYPRE_MemoryLocation loc_src); +void * hypre_ReAlloc(void *ptr, size_t size, HYPRE_MemoryLocation location); +void * hypre_ReAlloc_v2(void *ptr, size_t old_size, size_t new_size, HYPRE_MemoryLocation location); + +void * _hypre_MAlloc(size_t size, hypre_MemoryLocation location); +void _hypre_Free(void *ptr, hypre_MemoryLocation location); +void _hypre_Memcpy(void *dst, void *src, size_t size, hypre_MemoryLocation loc_dst, + hypre_MemoryLocation loc_src); + +HYPRE_ExecutionPolicy hypre_GetExecPolicy1(HYPRE_MemoryLocation location); +HYPRE_ExecutionPolicy hypre_GetExecPolicy2(HYPRE_MemoryLocation location1, + HYPRE_MemoryLocation location2); + +HYPRE_Int hypre_GetPointerLocation(const void *ptr, hypre_MemoryLocation *memory_location); +HYPRE_Int hypre_SetCubMemPoolSize( hypre_uint bin_growth, hypre_uint min_bin, hypre_uint max_bin, + size_t max_cached_bytes ); +HYPRE_Int hypre_umpire_host_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_host_pooled_free(void *ptr); +void *hypre_umpire_host_pooled_realloc(void *ptr, size_t size); +HYPRE_Int hypre_umpire_device_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_device_pooled_free(void *ptr); +HYPRE_Int hypre_umpire_um_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_um_pooled_free(void *ptr); +HYPRE_Int hypre_umpire_pinned_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_pinned_pooled_free(void *ptr); +HYPRE_Int hypre_UmpireInit(hypre_Handle *hypre_handle_); +HYPRE_Int hypre_UmpireFinalize(hypre_Handle *hypre_handle_); +HYPRE_Int hypre_UmpireGetCurrentMemoryUsage(MPI_Comm comm, HYPRE_Real *current); +HYPRE_Int hypre_UmpireMemoryGetUsage(HYPRE_Real *memory); +HYPRE_Int hypre_HostMemoryGetUsage(HYPRE_Real *mem); +HYPRE_Int hypre_MemoryPrintUsage(MPI_Comm comm, HYPRE_Int level, + const char *function, HYPRE_Int line); +#define HYPRE_PRINT_MEMORY_USAGE(comm) hypre_MemoryPrintUsage(comm,\ + hypre_HandleLogLevel(hypre_handle()),\ + __func__,\ + __LINE__) +/* memory_dmalloc.c */ +HYPRE_Int hypre_InitMemoryDebugDML( HYPRE_Int id ); +HYPRE_Int hypre_FinalizeMemoryDebugDML( void ); +char *hypre_MAllocDML( HYPRE_Int size, char *file, HYPRE_Int line ); +char *hypre_CAllocDML( HYPRE_Int count, HYPRE_Int elt_size, char *file, HYPRE_Int line ); +char *hypre_ReAllocDML( char *ptr, HYPRE_Int size, char *file, HYPRE_Int line ); +void hypre_FreeDML( char *ptr, char *file, HYPRE_Int line ); + /* memory_tracker.c */ #ifdef HYPRE_USING_MEMORY_TRACKER hypre_MemoryTracker* hypre_memory_tracker(void); diff --git a/src/utilities/headers b/src/utilities/headers index cc3ce4d831..fca45b66c9 100755 --- a/src/utilities/headers +++ b/src/utilities/headers @@ -33,7 +33,6 @@ extern "C" { # Structures and prototypes #=========================================================================== -cat handle.h >> $INTERNAL_HEADER cat state.h >> $INTERNAL_HEADER cat general.h >> $INTERNAL_HEADER cat base.h >> $INTERNAL_HEADER @@ -43,6 +42,7 @@ cat printf.h >> $INTERNAL_HEADER cat error.h >> $INTERNAL_HEADER cat smp.h >> $INTERNAL_HEADER cat memory.h >> $INTERNAL_HEADER +cat handle.h >> $INTERNAL_HEADER cat memory_tracker.h >> $INTERNAL_HEADER cat mpistubs.h >> $INTERNAL_HEADER cat omp_device.h >> $INTERNAL_HEADER diff --git a/src/utilities/memory.h b/src/utilities/memory.h index f7d46b74a9..c089467e77 100644 --- a/src/utilities/memory.h +++ b/src/utilities/memory.h @@ -157,65 +157,6 @@ hypre_GetActualMemLocation(HYPRE_MemoryLocation location) #endif /* #if !defined(HYPRE_USING_MEMORY_TRACKER) */ - -/*-------------------------------------------------------------------------- - * Prototypes - *--------------------------------------------------------------------------*/ - -/* memory.c */ -HYPRE_Int hypre_GetMemoryLocationName(hypre_MemoryLocation memory_location, - char *memory_location_name); -void hypre_CheckMemoryLocation(void *ptr, hypre_MemoryLocation location); -void * hypre_Memset(void *ptr, HYPRE_Int value, size_t num, HYPRE_MemoryLocation location); -void hypre_MemPrefetch(void *ptr, size_t size, HYPRE_MemoryLocation location); -void * hypre_MAlloc(size_t size, HYPRE_MemoryLocation location); -void * hypre_CAlloc( size_t count, size_t elt_size, HYPRE_MemoryLocation location); -void hypre_Free(void *ptr, HYPRE_MemoryLocation location); -void hypre_Memcpy(void *dst, void *src, size_t size, HYPRE_MemoryLocation loc_dst, - HYPRE_MemoryLocation loc_src); -void * hypre_ReAlloc(void *ptr, size_t size, HYPRE_MemoryLocation location); -void * hypre_ReAlloc_v2(void *ptr, size_t old_size, size_t new_size, HYPRE_MemoryLocation location); - -void * _hypre_MAlloc(size_t size, hypre_MemoryLocation location); -void _hypre_Free(void *ptr, hypre_MemoryLocation location); -void _hypre_Memcpy(void *dst, void *src, size_t size, hypre_MemoryLocation loc_dst, - hypre_MemoryLocation loc_src); - -HYPRE_ExecutionPolicy hypre_GetExecPolicy1(HYPRE_MemoryLocation location); -HYPRE_ExecutionPolicy hypre_GetExecPolicy2(HYPRE_MemoryLocation location1, - HYPRE_MemoryLocation location2); - -HYPRE_Int hypre_GetPointerLocation(const void *ptr, hypre_MemoryLocation *memory_location); -HYPRE_Int hypre_SetCubMemPoolSize( hypre_uint bin_growth, hypre_uint min_bin, hypre_uint max_bin, - size_t max_cached_bytes ); -HYPRE_Int hypre_umpire_host_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_host_pooled_free(void *ptr); -void *hypre_umpire_host_pooled_realloc(void *ptr, size_t size); -HYPRE_Int hypre_umpire_device_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_device_pooled_free(void *ptr); -HYPRE_Int hypre_umpire_um_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_um_pooled_free(void *ptr); -HYPRE_Int hypre_umpire_pinned_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_pinned_pooled_free(void *ptr); -HYPRE_Int hypre_UmpireInit(hypre_Handle *hypre_handle_); -HYPRE_Int hypre_UmpireFinalize(hypre_Handle *hypre_handle_); -HYPRE_Int hypre_UmpireGetCurrentMemoryUsage(MPI_Comm comm, HYPRE_Real *current); -HYPRE_Int hypre_UmpireMemoryGetUsage(HYPRE_Real *memory); -HYPRE_Int hypre_HostMemoryGetUsage(HYPRE_Real *mem); -HYPRE_Int hypre_MemoryPrintUsage(MPI_Comm comm, HYPRE_Int level, - const char *function, HYPRE_Int line); -#define HYPRE_PRINT_MEMORY_USAGE(comm) hypre_MemoryPrintUsage(comm,\ - hypre_HandleLogLevel(hypre_handle()),\ - __func__,\ - __LINE__) -/* memory_dmalloc.c */ -HYPRE_Int hypre_InitMemoryDebugDML( HYPRE_Int id ); -HYPRE_Int hypre_FinalizeMemoryDebugDML( void ); -char *hypre_MAllocDML( HYPRE_Int size, char *file, HYPRE_Int line ); -char *hypre_CAllocDML( HYPRE_Int count, HYPRE_Int elt_size, char *file, HYPRE_Int line ); -char *hypre_ReAllocDML( char *ptr, HYPRE_Int size, char *file, HYPRE_Int line ); -void hypre_FreeDML( char *ptr, char *file, HYPRE_Int line ); - #ifdef __cplusplus } #endif diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 4281d55270..78da07ac2b 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -1534,13 +1534,16 @@ hypre_MPI_Irecv_Multiple( void *buf, hypre_MPI_Request *requests, hypre_MPI_Request *extra_request) { - *extra_request = hypre_MPI_REQUEST_NULL; - if (!num) { return hypre_error_flag; } + if (extra_request) + { + *extra_request = hypre_MPI_REQUEST_NULL; + } + HYPRE_Int data_size, i; hypre_MPI_Type_size(datatype, &data_size); @@ -1660,13 +1663,16 @@ hypre_MPI_Recv_init_Multiple( void *buf, hypre_MPI_Request *requests, hypre_MPI_Request *extra_request) { - *extra_request = hypre_MPI_REQUEST_NULL; - if (!num) { return hypre_error_flag; } + if (extra_request) + { + *extra_request = hypre_MPI_REQUEST_NULL; + } + HYPRE_Int data_size, i; hypre_MPI_Type_size(datatype, &data_size); diff --git a/src/utilities/protos.h b/src/utilities/protos.h index 0fe65c07f7..a24f252fd5 100644 --- a/src/utilities/protos.h +++ b/src/utilities/protos.h @@ -446,6 +446,60 @@ HYPRE_Int hypre_IntArraySeparateByValueDevice( HYPRE_Int num_values, HYPRE_Int * hypre_IntArrayArray *w ); #endif +/* memory.c */ +HYPRE_Int hypre_GetMemoryLocationName(hypre_MemoryLocation memory_location, + char *memory_location_name); +void hypre_CheckMemoryLocation(void *ptr, hypre_MemoryLocation location); +void * hypre_Memset(void *ptr, HYPRE_Int value, size_t num, HYPRE_MemoryLocation location); +void hypre_MemPrefetch(void *ptr, size_t size, HYPRE_MemoryLocation location); +void * hypre_MAlloc(size_t size, HYPRE_MemoryLocation location); +void * hypre_CAlloc( size_t count, size_t elt_size, HYPRE_MemoryLocation location); +void hypre_Free(void *ptr, HYPRE_MemoryLocation location); +void hypre_Memcpy(void *dst, void *src, size_t size, HYPRE_MemoryLocation loc_dst, + HYPRE_MemoryLocation loc_src); +void * hypre_ReAlloc(void *ptr, size_t size, HYPRE_MemoryLocation location); +void * hypre_ReAlloc_v2(void *ptr, size_t old_size, size_t new_size, HYPRE_MemoryLocation location); + +void * _hypre_MAlloc(size_t size, hypre_MemoryLocation location); +void _hypre_Free(void *ptr, hypre_MemoryLocation location); +void _hypre_Memcpy(void *dst, void *src, size_t size, hypre_MemoryLocation loc_dst, + hypre_MemoryLocation loc_src); + +HYPRE_ExecutionPolicy hypre_GetExecPolicy1(HYPRE_MemoryLocation location); +HYPRE_ExecutionPolicy hypre_GetExecPolicy2(HYPRE_MemoryLocation location1, + HYPRE_MemoryLocation location2); + +HYPRE_Int hypre_GetPointerLocation(const void *ptr, hypre_MemoryLocation *memory_location); +HYPRE_Int hypre_SetCubMemPoolSize( hypre_uint bin_growth, hypre_uint min_bin, hypre_uint max_bin, + size_t max_cached_bytes ); +HYPRE_Int hypre_umpire_host_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_host_pooled_free(void *ptr); +void *hypre_umpire_host_pooled_realloc(void *ptr, size_t size); +HYPRE_Int hypre_umpire_device_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_device_pooled_free(void *ptr); +HYPRE_Int hypre_umpire_um_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_um_pooled_free(void *ptr); +HYPRE_Int hypre_umpire_pinned_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_pinned_pooled_free(void *ptr); +HYPRE_Int hypre_UmpireInit(hypre_Handle *hypre_handle_); +HYPRE_Int hypre_UmpireFinalize(hypre_Handle *hypre_handle_); +HYPRE_Int hypre_UmpireGetCurrentMemoryUsage(MPI_Comm comm, HYPRE_Real *current); +HYPRE_Int hypre_UmpireMemoryGetUsage(HYPRE_Real *memory); +HYPRE_Int hypre_HostMemoryGetUsage(HYPRE_Real *mem); +HYPRE_Int hypre_MemoryPrintUsage(MPI_Comm comm, HYPRE_Int level, + const char *function, HYPRE_Int line); +#define HYPRE_PRINT_MEMORY_USAGE(comm) hypre_MemoryPrintUsage(comm,\ + hypre_HandleLogLevel(hypre_handle()),\ + __func__,\ + __LINE__) +/* memory_dmalloc.c */ +HYPRE_Int hypre_InitMemoryDebugDML( HYPRE_Int id ); +HYPRE_Int hypre_FinalizeMemoryDebugDML( void ); +char *hypre_MAllocDML( HYPRE_Int size, char *file, HYPRE_Int line ); +char *hypre_CAllocDML( HYPRE_Int count, HYPRE_Int elt_size, char *file, HYPRE_Int line ); +char *hypre_ReAllocDML( char *ptr, HYPRE_Int size, char *file, HYPRE_Int line ); +void hypre_FreeDML( char *ptr, char *file, HYPRE_Int line ); + /* memory_tracker.c */ #ifdef HYPRE_USING_MEMORY_TRACKER hypre_MemoryTracker* hypre_memory_tracker(void); From 010c84cc554998e823dbb4a139039db337516d09 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Sat, 8 Feb 2025 08:12:13 -0800 Subject: [PATCH 67/90] more changes on struct comm --- src/struct_mv/_hypre_struct_mv.h | 10 +- src/struct_mv/struct_communication.c | 149 +++++++++------------------ src/struct_mv/struct_communication.h | 10 +- 3 files changed, 65 insertions(+), 104 deletions(-) diff --git a/src/struct_mv/_hypre_struct_mv.h b/src/struct_mv/_hypre_struct_mv.h index a346ddf40f..9570016ec4 100644 --- a/src/struct_mv/_hypre_struct_mv.h +++ b/src/struct_mv/_hypre_struct_mv.h @@ -944,10 +944,12 @@ typedef struct hypre_CommHandle_struct void *recv_buffers_mpi; hypre_MemoryLocation send_buffers_mpi_location; hypre_MemoryLocation recv_buffers_mpi_location; + HYPRE_Int num_extra_requests; + hypre_MPI_Request *extra_requests; /* set = 0, add = 1 */ - HYPRE_Int action; - + HYPRE_Int action; + MPI_Comm comm; } hypre_CommHandle; /*-------------------------------------------------------------------------- @@ -1058,6 +1060,10 @@ typedef struct hypre_CommHandle_struct #define hypre_CommHandleRecvBuffersMPI(comm_handle) (comm_handle -> recv_buffers_mpi) #define hypre_CommHandleSendBuffersMPILocation(comm_handle) (comm_handle -> send_buffers_mpi_location) #define hypre_CommHandleRecvBuffersMPILocation(comm_handle) (comm_handle -> recv_buffers_mpi_location) +#define hypre_CommHandleNumExtraRequests(comm_handle) (comm_handle -> num_extra_requests) +#define hypre_CommHandleExtraRequests(comm_handle) (comm_handle -> extra_requests) +#define hypre_CommHandleExtraRequest(comm_handle, i) (comm_handle -> extra_requests[i]) +#define hypre_CommHandleComm(comm_handle) (comm_handle -> comm) #endif /****************************************************************************** diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index cd130d256b..ce6424b30b 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -813,7 +813,6 @@ hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_location, HYPRE_Int size_of_elem, hypre_CommHandle *comm_handle ) { - hypre_CommPkg *comm_pkg = hypre_CommHandleCommPkg(comm_handle); hypre_MemoryLocation send_memory_alocation = hypre_GetActualMemLocation(send_memory_location); hypre_MemoryLocation recv_memory_alocation = hypre_GetActualMemLocation(recv_memory_location); @@ -833,15 +832,14 @@ hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_location, location); } - hypre_MPICommSetSendLocation(hypre_CommPkgComm(comm_pkg), send_memory_alocation); - hypre_MPICommSetSendBuffer(hypre_CommPkgComm(comm_pkg), hypre_CommHandleSendBuffersMPI(comm_handle)); - hypre_MPICommSetSendBufferLocation(hypre_CommPkgComm(comm_pkg), - hypre_CommHandleSendBuffersMPILocation(comm_handle)); + MPI_Comm comm = hypre_CommHandleComm(comm_handle); + hypre_MPICommSetSendLocation(comm, send_memory_alocation); + hypre_MPICommSetSendBuffer(comm, hypre_CommHandleSendBuffersMPI(comm_handle)); + hypre_MPICommSetSendBufferLocation(comm, hypre_CommHandleSendBuffersMPILocation(comm_handle)); - hypre_MPICommSetRecvLocation(hypre_CommPkgComm(comm_pkg), recv_memory_alocation); - hypre_MPICommSetRecvBuffer(hypre_CommPkgComm(comm_pkg), hypre_CommHandleRecvBuffersMPI(comm_handle)); - hypre_MPICommSetRecvBufferLocation(hypre_CommPkgComm(comm_pkg), - hypre_CommHandleRecvBuffersMPILocation(comm_handle)); + hypre_MPICommSetRecvLocation(comm, recv_memory_alocation); + hypre_MPICommSetRecvBuffer(comm, hypre_CommHandleRecvBuffersMPI(comm_handle)); + hypre_MPICommSetRecvBufferLocation(comm, hypre_CommHandleRecvBuffersMPILocation(comm_handle)); return hypre_error_flag; } @@ -871,7 +869,8 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); HYPRE_Int num_sends = hypre_CommPkgNumSends(comm_pkg); HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); - MPI_Comm comm = hypre_CommPkgComm(comm_pkg); + MPI_Comm comm_orig = hypre_CommPkgComm(comm_pkg); + MPI_Comm comm; HYPRE_Int num_requests; hypre_MPI_Request *requests; @@ -879,8 +878,6 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Complex **send_buffers; HYPRE_Complex **recv_buffers; - HYPRE_Complex *send_buffers_mpi = NULL; - HYPRE_Complex *recv_buffers_mpi = NULL; hypre_CommType *comm_type, *from_type, *to_type; hypre_CommEntryType *comm_entry; @@ -897,10 +894,10 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Int size; HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); - HYPRE_MemoryLocation memory_location_mpi = memory_location; HYPRE_Int persistent = 0; - hypre_CommHandleCommPkg(comm_handle) = comm_pkg; + hypre_MPI_Comm_dup(comm_orig, &comm); + hypre_CommHandleComm(comm_handle) = comm; /*-------------------------------------------------------------------- * allocate requests and status @@ -1004,7 +1001,6 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, } } - for (i = 0; i < num_sends; i++) { comm_type = hypre_CommPkgSendType(comm_pkg, i); @@ -1014,35 +1010,24 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, { qptr = (HYPRE_Int *) (send_buffers[0] + (send_buffers[i] - send_buffers[0])); hypre_TMemcpy(qptr, &num_entries, - HYPRE_Int, 1, memory_location_mpi, HYPRE_MEMORY_HOST); + HYPRE_Int, 1, memory_location, HYPRE_MEMORY_HOST); qptr ++; hypre_TMemcpy(qptr, hypre_CommTypeRemBoxnums(comm_type), - HYPRE_Int, num_entries, memory_location_mpi, HYPRE_MEMORY_HOST); + HYPRE_Int, num_entries, memory_location, HYPRE_MEMORY_HOST); qptr += num_entries; hypre_TMemcpy(qptr, hypre_CommTypeRemBoxes(comm_type), - hypre_Box, num_entries, memory_location_mpi, HYPRE_MEMORY_HOST); + hypre_Box, num_entries, memory_location, HYPRE_MEMORY_HOST); hypre_CommTypeRemBoxnums(comm_type) = NULL; hypre_CommTypeRemBoxes(comm_type) = NULL; } } - { - memory_location_mpi = HYPRE_MEMORY_HOST; - - if (num_sends > 0) - { - size = hypre_CommPkgSendBufsize(comm_pkg); - send_buffers_mpi = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); - hypre_TMemcpy(send_buffers_mpi, send_buffers[0], HYPRE_Complex, size, HYPRE_MEMORY_HOST, - memory_location); - } - - if (num_recvs > 0) - { - size = hypre_CommPkgRecvBufsize(comm_pkg); - recv_buffers_mpi = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); - } - } + hypre_CommHandleAllocateBuffers(memory_location, + memory_location, + hypre_CommPkgSendBufsize(comm_pkg), + hypre_CommPkgRecvBufsize(comm_pkg), + sizeof(HYPRE_Complex), + comm_handle); #if 0 @@ -1094,8 +1079,8 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Int *displs_send = NULL; HYPRE_Int *procs_send = NULL; HYPRE_Int *counts_send = NULL; - HYPRE_Int num_extra_requests = 0; - hypre_MPI_Request *extra_requests = NULL; + HYPRE_Int num_extra_requests = persistent ? 2 : 1; + hypre_MPI_Request *extra_requests = hypre_CTAlloc(hypre_MPI_Request, num_extra_requests, HYPRE_MEMORY_HOST); if (num_recvs) { @@ -1109,8 +1094,6 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, displs_recv[i] = (recv_buffers[i] - recv_buffers[0]) * sizeof(HYPRE_Complex); procs_recv[i] = hypre_CommTypeProc(comm_type); } - num_extra_requests = persistent ? 2 : 1; - extra_requests = hypre_CTAlloc(hypre_MPI_Request, num_extra_requests, HYPRE_MEMORY_HOST); } if (num_sends) @@ -1127,7 +1110,7 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, } } - hypre_MPI_Irecv_Multiple(recv_buffers_mpi, + hypre_MPI_Irecv_Multiple(recv_buffers ? recv_buffers[0] : NULL, num_recvs, displs_recv, counts_recv, @@ -1138,7 +1121,7 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, requests, extra_requests); - hypre_MPI_Isend_Multiple(send_buffers_mpi, + hypre_MPI_Isend_Multiple(send_buffers ? send_buffers[0] : NULL, num_sends, displs_send, counts_send, @@ -1173,7 +1156,6 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, hypre_TFree(displs_send, HYPRE_MEMORY_HOST); hypre_TFree(counts_send, HYPRE_MEMORY_HOST); hypre_TFree(procs_send, HYPRE_MEMORY_HOST); - hypre_TFree(extra_requests, HYPRE_MEMORY_HOST); /*-------------------------------------------------------------------- * set up CopyToType and exchange local data @@ -1206,16 +1188,17 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, * set up comm_handle and return *--------------------------------------------------------------------*/ - hypre_CommHandleSendData(comm_handle) = send_data; - hypre_CommHandleRecvData(comm_handle) = recv_data; - hypre_CommHandleNumRequests(comm_handle) = num_requests; - hypre_CommHandleRequests(comm_handle) = requests; - hypre_CommHandleStatus(comm_handle) = status; - hypre_CommHandleSendBuffers(comm_handle) = send_buffers; - hypre_CommHandleRecvBuffers(comm_handle) = recv_buffers; - hypre_CommHandleAction(comm_handle) = action; - hypre_CommHandleSendBuffersMPI(comm_handle) = send_buffers_mpi; - hypre_CommHandleRecvBuffersMPI(comm_handle) = recv_buffers_mpi; + hypre_CommHandleCommPkg(comm_handle) = comm_pkg; + hypre_CommHandleSendData(comm_handle) = send_data; + hypre_CommHandleRecvData(comm_handle) = recv_data; + hypre_CommHandleNumRequests(comm_handle) = num_requests; + hypre_CommHandleRequests(comm_handle) = requests; + hypre_CommHandleNumExtraRequests(comm_handle) = num_extra_requests; + hypre_CommHandleExtraRequests(comm_handle) = extra_requests; + hypre_CommHandleStatus(comm_handle) = status; + hypre_CommHandleSendBuffers(comm_handle) = send_buffers; + hypre_CommHandleRecvBuffers(comm_handle) = recv_buffers; + hypre_CommHandleAction(comm_handle) = action; *comm_handle_ptr = comm_handle; @@ -1236,8 +1219,6 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_CommPkg *comm_pkg = hypre_CommHandleCommPkg(comm_handle); HYPRE_Complex **send_buffers = hypre_CommHandleSendBuffers(comm_handle); HYPRE_Complex **recv_buffers = hypre_CommHandleRecvBuffers(comm_handle); - HYPRE_Complex *send_buffers_mpi = hypre_CommHandleSendBuffersMPI(comm_handle); - HYPRE_Complex *recv_buffers_mpi = hypre_CommHandleRecvBuffersMPI(comm_handle); HYPRE_Int action = hypre_CommHandleAction(comm_handle); HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); @@ -1261,15 +1242,7 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) HYPRE_Int i, j, d, ll; - HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); - HYPRE_MemoryLocation memory_location_mpi = memory_location; - -#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) - if (!hypre_GetGpuAwareMPI()) - { - memory_location_mpi = HYPRE_MEMORY_HOST; - } -#endif + HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); /*-------------------------------------------------------------------- * finish communications @@ -1282,6 +1255,9 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_CommHandleStatus(comm_handle)); } + printf("%d %p\n", hypre_CommHandleNumExtraRequests(comm_handle), &hypre_CommHandleExtraRequest(comm_handle, 0)); + hypre_MPI_Wait(&hypre_CommHandleExtraRequest(comm_handle, 0), MPI_STATUS_IGNORE); + /*-------------------------------------------------------------------- * if FirstComm, unpack prefix information and set 'num_entries' and * 'entries' for RecvType @@ -1296,10 +1272,8 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) { comm_type = hypre_CommPkgRecvType(comm_pkg, i); - qptr = (HYPRE_Int *) (recv_buffers_mpi + (recv_buffers[i] - recv_buffers[0])); - - hypre_TMemcpy(&hypre_CommTypeNumEntries(comm_type), qptr, - HYPRE_Int, 1, HYPRE_MEMORY_HOST, memory_location_mpi); + hypre_TMemcpy(&hypre_CommTypeNumEntries(comm_type), recv_buffers[i], + HYPRE_Int, 1, HYPRE_MEMORY_HOST, memory_location); num_entries += hypre_CommTypeNumEntries(comm_type); } @@ -1317,25 +1291,18 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) num_entries = hypre_CommTypeNumEntries(comm_type); ct_entries += num_entries; - qptr = (HYPRE_Int *) (recv_buffers_mpi + (recv_buffers[i] - recv_buffers[0])); + qptr = (HYPRE_Int *) recv_buffers[i]; qptr++; /* Set boxnums and boxes from MPI recv buffer */ - if (!hypre_GetGpuAwareMPI()) - { - boxnums = (HYPRE_Int*) qptr; - qptr += num_entries; - boxes = (hypre_Box*) qptr; - } - else { boxnums = hypre_TAlloc(HYPRE_Int, num_entries, HYPRE_MEMORY_HOST); hypre_TMemcpy(boxnums, qptr, HYPRE_Int, num_entries, - HYPRE_MEMORY_HOST, memory_location_mpi); + HYPRE_MEMORY_HOST, memory_location); qptr += num_entries; boxes = hypre_TAlloc(hypre_Box, num_entries, HYPRE_MEMORY_HOST); hypre_TMemcpy(boxes, qptr, hypre_Box, num_entries, - HYPRE_MEMORY_HOST, memory_location_mpi); + HYPRE_MEMORY_HOST, memory_location); } /* Set the entries for the comm_type */ @@ -1347,8 +1314,6 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_CommPkgRecvDataSpace(comm_pkg), hypre_CommPkgRecvDataOffsets(comm_pkg)); - /* Free allocated memory if using GPU-aware MPI */ - if (hypre_GetGpuAwareMPI()) { hypre_TFree(boxnums, HYPRE_MEMORY_HOST); hypre_TFree(boxes, HYPRE_MEMORY_HOST); @@ -1360,18 +1325,6 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) * unpack receive buffer data *--------------------------------------------------------------------*/ - /* Note: hypre_CommPkgRecvBufsize is different in the first comm */ - if (num_recvs > 0 && recv_buffers[0] != recv_buffers_mpi) - { - HYPRE_Int recv_buf_size; - - recv_buf_size = hypre_CommPkgFirstComm(comm_pkg) ? hypre_CommPkgRecvBufsizeFirstComm(comm_pkg) : - hypre_CommPkgRecvBufsize(comm_pkg); - - hypre_TMemcpy(recv_buffers[0], recv_buffers_mpi, HYPRE_Complex, recv_buf_size, - memory_location, memory_location_mpi); - } - for (i = 0; i < num_recvs; i++) { comm_type = hypre_CommPkgRecvType(comm_pkg, i); @@ -1435,6 +1388,7 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_TFree(hypre_CommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); hypre_TFree(hypre_CommHandleStatus(comm_handle), HYPRE_MEMORY_HOST); + if (num_sends > 0) { hypre_StructCommunicationReleaseBuffer(send_buffers[0], memory_location); @@ -1444,16 +1398,11 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_StructCommunicationReleaseBuffer(recv_buffers[0], memory_location); } - hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); + _hypre_TFree(hypre_CommHandleSendBuffersMPI(comm_handle), hypre_CommHandleSendBuffersMPILocation(comm_handle)); + _hypre_TFree(hypre_CommHandleRecvBuffersMPI(comm_handle), hypre_CommHandleRecvBuffersMPILocation(comm_handle)); - if (num_sends > 0 && send_buffers[0] != send_buffers_mpi) - { - hypre_TFree(send_buffers_mpi, memory_location_mpi); - } - if (num_recvs > 0 && recv_buffers[0] != recv_buffers_mpi) - { - hypre_TFree(recv_buffers_mpi, memory_location_mpi); - } + hypre_MPI_Comm_free(&hypre_CommHandleComm(comm_handle)); + hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); hypre_TFree(send_buffers, HYPRE_MEMORY_HOST); hypre_TFree(recv_buffers, HYPRE_MEMORY_HOST); diff --git a/src/struct_mv/struct_communication.h b/src/struct_mv/struct_communication.h index 9b32e7213e..b56f547de7 100644 --- a/src/struct_mv/struct_communication.h +++ b/src/struct_mv/struct_communication.h @@ -146,10 +146,12 @@ typedef struct hypre_CommHandle_struct void *recv_buffers_mpi; hypre_MemoryLocation send_buffers_mpi_location; hypre_MemoryLocation recv_buffers_mpi_location; + HYPRE_Int num_extra_requests; + hypre_MPI_Request *extra_requests; /* set = 0, add = 1 */ - HYPRE_Int action; - + HYPRE_Int action; + MPI_Comm comm; } hypre_CommHandle; /*-------------------------------------------------------------------------- @@ -260,5 +262,9 @@ typedef struct hypre_CommHandle_struct #define hypre_CommHandleRecvBuffersMPI(comm_handle) (comm_handle -> recv_buffers_mpi) #define hypre_CommHandleSendBuffersMPILocation(comm_handle) (comm_handle -> send_buffers_mpi_location) #define hypre_CommHandleRecvBuffersMPILocation(comm_handle) (comm_handle -> recv_buffers_mpi_location) +#define hypre_CommHandleNumExtraRequests(comm_handle) (comm_handle -> num_extra_requests) +#define hypre_CommHandleExtraRequests(comm_handle) (comm_handle -> extra_requests) +#define hypre_CommHandleExtraRequest(comm_handle, i) (comm_handle -> extra_requests[i]) +#define hypre_CommHandleComm(comm_handle) (comm_handle -> comm) #endif From 55dcbe14389d49334a8a3238d259f10de6233242 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Sat, 8 Feb 2025 20:56:53 -0800 Subject: [PATCH 68/90] fix issues --- src/struct_mv/struct_communication.c | 24 ++++++++++++++---------- src/utilities/mpistubs.c | 20 +++++++++++++------- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index ce6424b30b..ee499d4c37 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -764,6 +764,7 @@ hypre_CommTypeSetEntry( hypre_Box *box, return hypre_error_flag; } +//TODO size is confusing HYPRE_Complex * hypre_StructCommunicationGetBuffer(HYPRE_MemoryLocation memory_location, HYPRE_Int size) @@ -863,7 +864,7 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Int tag, hypre_CommHandle **comm_handle_ptr ) { - hypre_CommHandle *comm_handle = hypre_TAlloc(hypre_CommHandle, 1, HYPRE_MEMORY_HOST); + hypre_CommHandle *comm_handle = hypre_CTAlloc(hypre_CommHandle, 1, HYPRE_MEMORY_HOST); HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); @@ -1084,30 +1085,32 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, if (num_recvs) { - displs_recv = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); - procs_recv = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); - counts_recv = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); + displs_recv = hypre_CTAlloc(HYPRE_Int, num_recvs + 1, HYPRE_MEMORY_HOST); + procs_recv = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); + counts_recv = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); for (i = 0; i < num_recvs; i++) { comm_type = hypre_CommPkgRecvType(comm_pkg, i); - counts_recv[i] = hypre_CommTypeBufsize(comm_type) * sizeof(HYPRE_Complex); displs_recv[i] = (recv_buffers[i] - recv_buffers[0]) * sizeof(HYPRE_Complex); procs_recv[i] = hypre_CommTypeProc(comm_type); + counts_recv[i] = hypre_CommTypeBufsize(comm_type) * sizeof(HYPRE_Complex); } + displs_recv[num_recvs] = hypre_CommPkgRecvBufsize(comm_pkg) * sizeof(HYPRE_Complex); } if (num_sends) { - displs_send = hypre_CTAlloc(HYPRE_Int, num_sends, HYPRE_MEMORY_HOST); - procs_send = hypre_CTAlloc(HYPRE_Int, num_sends, HYPRE_MEMORY_HOST); - counts_send = hypre_CTAlloc(HYPRE_Int, num_sends, HYPRE_MEMORY_HOST); + displs_send = hypre_CTAlloc(HYPRE_Int, num_sends + 1, HYPRE_MEMORY_HOST); + procs_send = hypre_CTAlloc(HYPRE_Int, num_sends, HYPRE_MEMORY_HOST); + counts_send = hypre_CTAlloc(HYPRE_Int, num_sends, HYPRE_MEMORY_HOST); for (i = 0; i < num_sends; i++) { comm_type = hypre_CommPkgSendType(comm_pkg, i); - counts_send[i] = hypre_CommTypeBufsize(comm_type) * sizeof(HYPRE_Complex); displs_send[i] = (send_buffers[i] - send_buffers[0]) * sizeof(HYPRE_Complex); procs_send[i] = hypre_CommTypeProc(comm_type); + counts_send[i] = hypre_CommTypeBufsize(comm_type) * sizeof(HYPRE_Complex); } + displs_send[num_sends] = hypre_CommPkgSendBufsize(comm_pkg) * sizeof(HYPRE_Complex); } hypre_MPI_Irecv_Multiple(recv_buffers ? recv_buffers[0] : NULL, @@ -1255,7 +1258,6 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_CommHandleStatus(comm_handle)); } - printf("%d %p\n", hypre_CommHandleNumExtraRequests(comm_handle), &hypre_CommHandleExtraRequest(comm_handle, 0)); hypre_MPI_Wait(&hypre_CommHandleExtraRequest(comm_handle, 0), MPI_STATUS_IGNORE); /*-------------------------------------------------------------------- @@ -1401,6 +1403,8 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) _hypre_TFree(hypre_CommHandleSendBuffersMPI(comm_handle), hypre_CommHandleSendBuffersMPILocation(comm_handle)); _hypre_TFree(hypre_CommHandleRecvBuffersMPI(comm_handle), hypre_CommHandleRecvBuffersMPILocation(comm_handle)); + hypre_TFree(hypre_CommHandleExtraRequests(comm_handle), HYPRE_MEMORY_HOST); + hypre_MPI_Comm_free(&hypre_CommHandleComm(comm_handle)); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 78da07ac2b..e3ce8fbf5d 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -1534,14 +1534,14 @@ hypre_MPI_Irecv_Multiple( void *buf, hypre_MPI_Request *requests, hypre_MPI_Request *extra_request) { - if (!num) + if (extra_request) { - return hypre_error_flag; + *extra_request = hypre_MPI_REQUEST_NULL; } - if (extra_request) + if (!num) { - *extra_request = hypre_MPI_REQUEST_NULL; + return hypre_error_flag; } HYPRE_Int data_size, i; @@ -1600,7 +1600,10 @@ hypre_MPI_Send_init_Multiple( void *buf, hypre_MPI_Request *requests, hypre_MPI_Request *extra_request) { - *extra_request = hypre_MPI_REQUEST_NULL; + if (extra_request) + { + *extra_request = hypre_MPI_REQUEST_NULL; + } if (!num) { @@ -1974,7 +1977,10 @@ hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location) memory_location != hypre_MEMORY_HOST && memory_location != hypre_MEMORY_HOST_PINNED; #else - /* RL: return 1 for debugging purpose */ + /* RL: return 1 for debugging without GPUs, + so we always has a host buffer for MPI. + O.w. make sure return Z E R O! + */ return 1; #endif } @@ -2248,7 +2254,7 @@ hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action) char dname[32],sname[32]; hypre_GetMemoryLocationName(dest_location, dname); hypre_GetMemoryLocationName(src_location, sname); - hypre_printf(" copying %s %p <-- %s %p\n", dname, dest, sname, src); + hypre_printf(" copying %s %p <-- %s %p, %d bytes\n", dname, dest, sname, src, num_bytes); #endif _hypre_TMemcpy(dest, src, char, num_bytes, dest_location, src_location); hypre_GpuProfilingPopRange(); From 4267d1328436db302379e7823eb7f494d66841a2 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Sun, 9 Feb 2025 09:40:43 -0800 Subject: [PATCH 69/90] cleanup code --- src/struct_mv/struct_communication.c | 52 ++-------------------------- 1 file changed, 2 insertions(+), 50 deletions(-) diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index ee499d4c37..c144029420 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -1030,56 +1030,14 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, sizeof(HYPRE_Complex), comm_handle); - -#if 0 -#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) - if (hypre_GetActualMemLocation(memory_location) != hypre_MEMORY_HOST) - { - if (hypre_GetGpuAwareMPI()) - { -#if defined(HYPRE_USING_GPU) - hypre_ForceSyncComputeStream(); -#endif - if (num_sends > 0) { send_buffers_mpi = send_buffers[0]; } - if (num_recvs > 0) { recv_buffers_mpi = recv_buffers[0]; } - } - else - { - memory_location_mpi = HYPRE_MEMORY_HOST; - - if (num_sends > 0) - { - size = hypre_CommPkgSendBufsize(comm_pkg); - send_buffers_mpi = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); - hypre_TMemcpy(send_buffers_mpi, send_buffers[0], HYPRE_Complex, size, HYPRE_MEMORY_HOST, - memory_location); - } - - if (num_recvs > 0) - { - size = hypre_CommPkgRecvBufsize(comm_pkg); - recv_buffers_mpi = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); - } - } - } - else -#endif - { - if (num_sends > 0) { send_buffers_mpi = send_buffers[0]; } - if (num_recvs > 0) { recv_buffers_mpi = recv_buffers[0]; } - } -#endif - /*-------------------------------------------------------------------- * post receives and initiate sends *--------------------------------------------------------------------*/ HYPRE_Int *displs_recv = NULL; HYPRE_Int *procs_recv = NULL; - HYPRE_Int *counts_recv = NULL; HYPRE_Int *displs_send = NULL; HYPRE_Int *procs_send = NULL; - HYPRE_Int *counts_send = NULL; HYPRE_Int num_extra_requests = persistent ? 2 : 1; hypre_MPI_Request *extra_requests = hypre_CTAlloc(hypre_MPI_Request, num_extra_requests, HYPRE_MEMORY_HOST); @@ -1087,13 +1045,11 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, { displs_recv = hypre_CTAlloc(HYPRE_Int, num_recvs + 1, HYPRE_MEMORY_HOST); procs_recv = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); - counts_recv = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); for (i = 0; i < num_recvs; i++) { comm_type = hypre_CommPkgRecvType(comm_pkg, i); displs_recv[i] = (recv_buffers[i] - recv_buffers[0]) * sizeof(HYPRE_Complex); procs_recv[i] = hypre_CommTypeProc(comm_type); - counts_recv[i] = hypre_CommTypeBufsize(comm_type) * sizeof(HYPRE_Complex); } displs_recv[num_recvs] = hypre_CommPkgRecvBufsize(comm_pkg) * sizeof(HYPRE_Complex); } @@ -1102,13 +1058,11 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, { displs_send = hypre_CTAlloc(HYPRE_Int, num_sends + 1, HYPRE_MEMORY_HOST); procs_send = hypre_CTAlloc(HYPRE_Int, num_sends, HYPRE_MEMORY_HOST); - counts_send = hypre_CTAlloc(HYPRE_Int, num_sends, HYPRE_MEMORY_HOST); for (i = 0; i < num_sends; i++) { comm_type = hypre_CommPkgSendType(comm_pkg, i); displs_send[i] = (send_buffers[i] - send_buffers[0]) * sizeof(HYPRE_Complex); procs_send[i] = hypre_CommTypeProc(comm_type); - counts_send[i] = hypre_CommTypeBufsize(comm_type) * sizeof(HYPRE_Complex); } displs_send[num_sends] = hypre_CommPkgSendBufsize(comm_pkg) * sizeof(HYPRE_Complex); } @@ -1116,7 +1070,7 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, hypre_MPI_Irecv_Multiple(recv_buffers ? recv_buffers[0] : NULL, num_recvs, displs_recv, - counts_recv, + NULL, hypre_MPI_BYTE, procs_recv, tag, @@ -1127,7 +1081,7 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, hypre_MPI_Isend_Multiple(send_buffers ? send_buffers[0] : NULL, num_sends, displs_send, - counts_send, + NULL, hypre_MPI_BYTE, procs_send, tag, @@ -1154,10 +1108,8 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, } hypre_TFree(displs_recv, HYPRE_MEMORY_HOST); - hypre_TFree(counts_recv, HYPRE_MEMORY_HOST); hypre_TFree(procs_recv, HYPRE_MEMORY_HOST); hypre_TFree(displs_send, HYPRE_MEMORY_HOST); - hypre_TFree(counts_send, HYPRE_MEMORY_HOST); hypre_TFree(procs_send, HYPRE_MEMORY_HOST); /*-------------------------------------------------------------------- From 2a38d4d46056b4d602a877048cbeb857a83cf53a Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Mon, 10 Feb 2025 17:26:38 -0800 Subject: [PATCH 70/90] simplify function interface --- src/parcsr_mv/_hypre_parcsr_mv.h | 5 -- src/parcsr_mv/par_csr_communication.c | 67 ++++++++-------- src/parcsr_mv/par_csr_communication.h | 5 -- src/struct_mv/_hypre_struct_mv.h | 5 -- src/struct_mv/struct_communication.c | 27 ++++--- src/struct_mv/struct_communication.h | 5 -- src/utilities/_hypre_utilities.h | 17 ++-- src/utilities/handle.h | 2 + src/utilities/mpistubs.c | 107 ++++++++++++++++++-------- src/utilities/mpistubs.h | 15 ++-- 10 files changed, 144 insertions(+), 111 deletions(-) diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index 9195875191..1ec1418153 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -115,8 +115,6 @@ typedef struct hypre_MemoryLocation recv_buffer_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; - HYPRE_Int num_extra_requests; - hypre_MPI_Request *extra_requests; MPI_Comm comm; } hypre_ParCSRCommHandle; @@ -137,9 +135,6 @@ typedef struct #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) -#define hypre_ParCSRCommHandleNumExtraRequests(comm_handle) (comm_handle -> num_extra_requests) -#define hypre_ParCSRCommHandleExtraRequests(comm_handle) (comm_handle -> extra_requests) -#define hypre_ParCSRCommHandleExtraRequest(comm_handle, i) (comm_handle -> extra_requests[i]) #define hypre_ParCSRCommHandleComm(comm_handle) (comm_handle -> comm) typedef hypre_ParCSRCommHandle hypre_ParCSRPersistentCommHandle; diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 3d8daeec70..949388e98f 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -205,8 +205,6 @@ hypre_ParCSRCommHandleCreate_core ( HYPRE_Int persistent, hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); - HYPRE_Int num_extra_requests = persistent ? 2 : 1; - hypre_MPI_Request *extra_requests = hypre_CTAlloc(hypre_MPI_Request, num_extra_requests, HYPRE_MEMORY_HOST); HYPRE_Int num_send_elems = 0; HYPRE_Int num_recv_elems = 0; HYPRE_Int data_size; @@ -235,14 +233,14 @@ hypre_ParCSRCommHandleCreate_core ( HYPRE_Int persistent, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, comm, requests, &extra_requests[0]); + 0, comm, requests); hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), num_sends, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, comm, requests + num_recvs, &extra_requests[1]); + 0, comm, requests + num_recvs); } else { @@ -250,7 +248,7 @@ hypre_ParCSRCommHandleCreate_core ( HYPRE_Int persistent, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, comm, requests, extra_requests); + 0, comm, requests); hypre_MPI_Isend_Multiple(send_data, num_sends, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), @@ -278,14 +276,14 @@ hypre_ParCSRCommHandleCreate_core ( HYPRE_Int persistent, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, comm, requests, &extra_requests[0]); + 0, comm, requests); hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, comm, requests + num_sends, &extra_requests[1]); + 0, comm, requests + num_sends); } else { @@ -293,7 +291,7 @@ hypre_ParCSRCommHandleCreate_core ( HYPRE_Int persistent, hypre_ParCSRCommPkgSendMapStarts(comm_pkg), NULL, mpi_dtype, hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, comm, requests, extra_requests); + 0, comm, requests); hypre_MPI_Isend_Multiple(send_data, num_recvs, hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), @@ -313,18 +311,16 @@ hypre_ParCSRCommHandleCreate_core ( HYPRE_Int persistent, * set up comm_handle and return *--------------------------------------------------------------------*/ - hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; + hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; if (!persistent) { - hypre_ParCSRCommHandleSendData(comm_handle) = send_data; - hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data; + hypre_ParCSRCommHandleSendData(comm_handle) = send_data; + hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data; } - hypre_ParCSRCommHandleSendLocation(comm_handle) = send_memory_location; - hypre_ParCSRCommHandleRecvLocation(comm_handle) = recv_memory_location; - hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; - hypre_ParCSRCommHandleRequests(comm_handle) = requests; - hypre_ParCSRCommHandleNumExtraRequests(comm_handle) = num_extra_requests; - hypre_ParCSRCommHandleExtraRequests(comm_handle) = extra_requests; + hypre_ParCSRCommHandleSendLocation(comm_handle) = send_memory_location; + hypre_ParCSRCommHandleRecvLocation(comm_handle) = recv_memory_location; + hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; + hypre_ParCSRCommHandleRequests(comm_handle) = requests; hypre_GpuProfilingPopRange(); @@ -356,14 +352,20 @@ hypre_ParCSRCommHandleWait( hypre_ParCSRCommHandle *comm_handle ) } } - if (hypre_ParCSRCommHandlePersistent(comm_handle)) - { - HYPRE_Int flag; - hypre_MPI_Request_get_status(hypre_ParCSRCommHandleExtraRequest(comm_handle, 0), &flag, hypre_MPI_STATUS_IGNORE); - } - else + MPI_Comm comm = hypre_ParCSRCommHandleComm(comm_handle); + hypre_MPI_Request *post_recv_request = hypre_MPICommGetPostRecvRequest(comm); + + if (post_recv_request) { - hypre_MPI_Wait(&hypre_ParCSRCommHandleExtraRequest(comm_handle, 0), MPI_STATUS_IGNORE); + if (hypre_ParCSRCommHandlePersistent(comm_handle)) + { + HYPRE_Int flag; + hypre_MPI_Request_get_status(*post_recv_request, &flag, hypre_MPI_STATUS_IGNORE); + } + else + { + hypre_MPI_Wait(post_recv_request, MPI_STATUS_IGNORE); + } } return hypre_error_flag; @@ -402,18 +404,13 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) if (persistent) { - HYPRE_Int i; - for (i = 0; i < hypre_ParCSRCommHandleNumExtraRequests(comm_handle); i++) - { - if (hypre_ParCSRCommHandleExtraRequest(comm_handle, i) != hypre_MPI_REQUEST_NULL) - { - hypre_MPI_Request_free(&hypre_ParCSRCommHandleExtraRequest(comm_handle, i)); - } - } + MPI_Comm comm = hypre_ParCSRCommHandleComm(comm_handle); + hypre_MPI_Request *post_recv_request = hypre_MPICommGetPostRecvRequest(comm); + hypre_MPI_Request *pre_send_request = hypre_MPICommGetPreSendRequest(comm); + if (post_recv_request) { hypre_MPI_Request_free(post_recv_request); } + if (pre_send_request) { hypre_MPI_Request_free(pre_send_request); } } - hypre_TFree(hypre_ParCSRCommHandleExtraRequests(comm_handle), HYPRE_MEMORY_HOST); - /* attributes should be deleted when the communicator is being freed */ /* hypre_MPICommDeleteSendLocation(hypre_ParCSRCommHandleComm(comm_handle)); @@ -422,6 +419,8 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) hypre_MPICommDeleteRecvBufferLocation(hypre_ParCSRCommHandleComm(comm_handle)); hypre_MPICommDeleteSendBuffer(hypre_ParCSRCommHandleComm(comm_handle)); hypre_MPICommDeleteRecvBuffer(hypre_ParCSRCommHandleComm(comm_handle)); + hypre_MPICommDeletePostRecvRequest(hypre_ParCSRCommHandleComm(comm_handle)); + hypre_MPICommDeletePreSendRequest(hypre_ParCSRCommHandleComm(comm_handle)); */ hypre_MPI_Comm_free(&hypre_ParCSRCommHandleComm(comm_handle)); diff --git a/src/parcsr_mv/par_csr_communication.h b/src/parcsr_mv/par_csr_communication.h index 86e251f1b9..549b892a28 100644 --- a/src/parcsr_mv/par_csr_communication.h +++ b/src/parcsr_mv/par_csr_communication.h @@ -99,8 +99,6 @@ typedef struct hypre_MemoryLocation recv_buffer_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; - HYPRE_Int num_extra_requests; - hypre_MPI_Request *extra_requests; MPI_Comm comm; } hypre_ParCSRCommHandle; @@ -121,9 +119,6 @@ typedef struct #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) -#define hypre_ParCSRCommHandleNumExtraRequests(comm_handle) (comm_handle -> num_extra_requests) -#define hypre_ParCSRCommHandleExtraRequests(comm_handle) (comm_handle -> extra_requests) -#define hypre_ParCSRCommHandleExtraRequest(comm_handle, i) (comm_handle -> extra_requests[i]) #define hypre_ParCSRCommHandleComm(comm_handle) (comm_handle -> comm) typedef hypre_ParCSRCommHandle hypre_ParCSRPersistentCommHandle; diff --git a/src/struct_mv/_hypre_struct_mv.h b/src/struct_mv/_hypre_struct_mv.h index 9570016ec4..f0f971c43e 100644 --- a/src/struct_mv/_hypre_struct_mv.h +++ b/src/struct_mv/_hypre_struct_mv.h @@ -944,8 +944,6 @@ typedef struct hypre_CommHandle_struct void *recv_buffers_mpi; hypre_MemoryLocation send_buffers_mpi_location; hypre_MemoryLocation recv_buffers_mpi_location; - HYPRE_Int num_extra_requests; - hypre_MPI_Request *extra_requests; /* set = 0, add = 1 */ HYPRE_Int action; @@ -1060,9 +1058,6 @@ typedef struct hypre_CommHandle_struct #define hypre_CommHandleRecvBuffersMPI(comm_handle) (comm_handle -> recv_buffers_mpi) #define hypre_CommHandleSendBuffersMPILocation(comm_handle) (comm_handle -> send_buffers_mpi_location) #define hypre_CommHandleRecvBuffersMPILocation(comm_handle) (comm_handle -> recv_buffers_mpi_location) -#define hypre_CommHandleNumExtraRequests(comm_handle) (comm_handle -> num_extra_requests) -#define hypre_CommHandleExtraRequests(comm_handle) (comm_handle -> extra_requests) -#define hypre_CommHandleExtraRequest(comm_handle, i) (comm_handle -> extra_requests[i]) #define hypre_CommHandleComm(comm_handle) (comm_handle -> comm) #endif diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index c144029420..5b0bcc446c 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -895,7 +895,6 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Int size; HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); - HYPRE_Int persistent = 0; hypre_MPI_Comm_dup(comm_orig, &comm); hypre_CommHandleComm(comm_handle) = comm; @@ -1038,8 +1037,6 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Int *procs_recv = NULL; HYPRE_Int *displs_send = NULL; HYPRE_Int *procs_send = NULL; - HYPRE_Int num_extra_requests = persistent ? 2 : 1; - hypre_MPI_Request *extra_requests = hypre_CTAlloc(hypre_MPI_Request, num_extra_requests, HYPRE_MEMORY_HOST); if (num_recvs) { @@ -1075,8 +1072,7 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, procs_recv, tag, comm, - requests, - extra_requests); + requests); hypre_MPI_Isend_Multiple(send_buffers ? send_buffers[0] : NULL, num_sends, @@ -1148,8 +1144,6 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, hypre_CommHandleRecvData(comm_handle) = recv_data; hypre_CommHandleNumRequests(comm_handle) = num_requests; hypre_CommHandleRequests(comm_handle) = requests; - hypre_CommHandleNumExtraRequests(comm_handle) = num_extra_requests; - hypre_CommHandleExtraRequests(comm_handle) = extra_requests; hypre_CommHandleStatus(comm_handle) = status; hypre_CommHandleSendBuffers(comm_handle) = send_buffers; hypre_CommHandleRecvBuffers(comm_handle) = recv_buffers; @@ -1171,10 +1165,11 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Int hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) { - hypre_CommPkg *comm_pkg = hypre_CommHandleCommPkg(comm_handle); - HYPRE_Complex **send_buffers = hypre_CommHandleSendBuffers(comm_handle); - HYPRE_Complex **recv_buffers = hypre_CommHandleRecvBuffers(comm_handle); - HYPRE_Int action = hypre_CommHandleAction(comm_handle); + MPI_Comm comm = hypre_CommHandleComm(comm_handle); + hypre_CommPkg *comm_pkg = hypre_CommHandleCommPkg(comm_handle); + HYPRE_Complex **send_buffers = hypre_CommHandleSendBuffers(comm_handle); + HYPRE_Complex **recv_buffers = hypre_CommHandleRecvBuffers(comm_handle); + HYPRE_Int action = hypre_CommHandleAction(comm_handle); HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); @@ -1198,6 +1193,7 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) HYPRE_Int i, j, d, ll; HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); + hypre_MPI_Request *post_recv_request = hypre_MPICommGetPostRecvRequest(comm); /*-------------------------------------------------------------------- * finish communications @@ -1210,7 +1206,10 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_CommHandleStatus(comm_handle)); } - hypre_MPI_Wait(&hypre_CommHandleExtraRequest(comm_handle, 0), MPI_STATUS_IGNORE); + if (post_recv_request) + { + hypre_MPI_Wait(post_recv_request, MPI_STATUS_IGNORE); + } /*-------------------------------------------------------------------- * if FirstComm, unpack prefix information and set 'num_entries' and @@ -1355,9 +1354,9 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) _hypre_TFree(hypre_CommHandleSendBuffersMPI(comm_handle), hypre_CommHandleSendBuffersMPILocation(comm_handle)); _hypre_TFree(hypre_CommHandleRecvBuffersMPI(comm_handle), hypre_CommHandleRecvBuffersMPILocation(comm_handle)); - hypre_TFree(hypre_CommHandleExtraRequests(comm_handle), HYPRE_MEMORY_HOST); + hypre_TFree(post_recv_request, HYPRE_MEMORY_HOST); - hypre_MPI_Comm_free(&hypre_CommHandleComm(comm_handle)); + hypre_MPI_Comm_free(&comm); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); hypre_TFree(send_buffers, HYPRE_MEMORY_HOST); diff --git a/src/struct_mv/struct_communication.h b/src/struct_mv/struct_communication.h index b56f547de7..f56c175e8d 100644 --- a/src/struct_mv/struct_communication.h +++ b/src/struct_mv/struct_communication.h @@ -146,8 +146,6 @@ typedef struct hypre_CommHandle_struct void *recv_buffers_mpi; hypre_MemoryLocation send_buffers_mpi_location; hypre_MemoryLocation recv_buffers_mpi_location; - HYPRE_Int num_extra_requests; - hypre_MPI_Request *extra_requests; /* set = 0, add = 1 */ HYPRE_Int action; @@ -262,9 +260,6 @@ typedef struct hypre_CommHandle_struct #define hypre_CommHandleRecvBuffersMPI(comm_handle) (comm_handle -> recv_buffers_mpi) #define hypre_CommHandleSendBuffersMPILocation(comm_handle) (comm_handle -> send_buffers_mpi_location) #define hypre_CommHandleRecvBuffersMPILocation(comm_handle) (comm_handle -> recv_buffers_mpi_location) -#define hypre_CommHandleNumExtraRequests(comm_handle) (comm_handle -> num_extra_requests) -#define hypre_CommHandleExtraRequests(comm_handle) (comm_handle -> extra_requests) -#define hypre_CommHandleExtraRequest(comm_handle, i) (comm_handle -> extra_requests[i]) #define hypre_CommHandleComm(comm_handle) (comm_handle -> comm) #endif diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 278fea5f3f..3e477389d4 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -850,6 +850,8 @@ typedef struct #define hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[3]) #define hypre_HandleMPICommKeySendBuffer(hypre_handle) ((hypre_handle) -> comm_keys[4]) #define hypre_HandleMPICommKeyRecvBuffer(hypre_handle) ((hypre_handle) -> comm_keys[5]) +#define hypre_HandleMPICommKeyPreSendRequest(hypre_handle) ((hypre_handle) -> comm_keys[6]) +#define hypre_HandleMPICommKeyPostRecvRequest(hypre_handle) ((hypre_handle) -> comm_keys[7]) #define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) #define hypre_HandleMPIHostBufferLocation(hypre_handle) ((hypre_handle) -> mpi_host_buffer_location) @@ -1487,14 +1489,11 @@ HYPRE_Int hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location); HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests, -hypre_MPI_Request *extra_request ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests, -hypre_MPI_Request *extra_request ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests, -hypre_MPI_Request *extra_request ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, @@ -1520,6 +1519,8 @@ hypre_MemoryLocation hypre_MPICommGetSendBufferLocation(hypre_MPI_Comm comm); hypre_MemoryLocation hypre_MPICommGetRecvBufferLocation(hypre_MPI_Comm comm); void* hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm); void* hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm); +hypre_MPI_Request* hypre_MPICommGetPreSendRequest(hypre_MPI_Comm comm); +hypre_MPI_Request* hypre_MPICommGetPostRecvRequest(hypre_MPI_Comm comm); HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); @@ -1527,6 +1528,8 @@ HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLo HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); +HYPRE_Int hypre_MPICommSetPreSendRequest(hypre_MPI_Comm comm, hypre_MPI_Request *request); +HYPRE_Int hypre_MPICommSetPostRecvRequest(hypre_MPI_Comm comm, hypre_MPI_Request *request); HYPRE_Int hypre_MPICommDeleteSendLocation(hypre_MPI_Comm comm); HYPRE_Int hypre_MPICommDeleteRecvLocation(hypre_MPI_Comm comm); @@ -1534,6 +1537,8 @@ HYPRE_Int hypre_MPICommDeleteSendBufferLocation(hypre_MPI_Comm comm); HYPRE_Int hypre_MPICommDeleteRecvBufferLocation(hypre_MPI_Comm comm); HYPRE_Int hypre_MPICommDeleteSendBuffer(hypre_MPI_Comm comm); HYPRE_Int hypre_MPICommDeleteRecvBuffer(hypre_MPI_Comm comm); +HYPRE_Int hypre_MPICommDeletePreSendRequest(hypre_MPI_Comm comm); +HYPRE_Int hypre_MPICommDeletePostRecvRequest(hypre_MPI_Comm comm); hypre_int hypre_grequest_free_fn(void *extra_state); hypre_int hypre_grequest_query_fn(void *extra_state, hypre_MPI_Status *status); diff --git a/src/utilities/handle.h b/src/utilities/handle.h index 26375fcce4..e58026e2ea 100644 --- a/src/utilities/handle.h +++ b/src/utilities/handle.h @@ -106,6 +106,8 @@ typedef struct #define hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[3]) #define hypre_HandleMPICommKeySendBuffer(hypre_handle) ((hypre_handle) -> comm_keys[4]) #define hypre_HandleMPICommKeyRecvBuffer(hypre_handle) ((hypre_handle) -> comm_keys[5]) +#define hypre_HandleMPICommKeyPreSendRequest(hypre_handle) ((hypre_handle) -> comm_keys[6]) +#define hypre_HandleMPICommKeyPostRecvRequest(hypre_handle) ((hypre_handle) -> comm_keys[7]) #define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) #define hypre_HandleMPIHostBufferLocation(hypre_handle) ((hypre_handle) -> mpi_host_buffer_location) diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index e3ce8fbf5d..1519245020 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -986,8 +986,7 @@ hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, - hypre_MPI_Request *requests, - hypre_MPI_Request *extra_request) + hypre_MPI_Request *requests) { return (0); } @@ -1001,8 +1000,7 @@ hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, - hypre_MPI_Request *requests, - hypre_MPI_Request *extra_request) + hypre_MPI_Request *requests) { return (0); } @@ -1016,8 +1014,7 @@ hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, - hypre_MPI_Request *requests, - hypre_MPI_Request *extra_request) + hypre_MPI_Request *requests) { return (0); } @@ -1531,14 +1528,8 @@ hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, - hypre_MPI_Request *requests, - hypre_MPI_Request *extra_request) + hypre_MPI_Request *requests) { - if (extra_request) - { - *extra_request = hypre_MPI_REQUEST_NULL; - } - if (!num) { return hypre_error_flag; @@ -1559,6 +1550,7 @@ hypre_MPI_Irecv_Multiple( void *buf, if (rbuf != buf) { + hypre_MPI_Request *extra_request = hypre_CTAlloc(hypre_MPI_Request, 1, HYPRE_MEMORY_HOST); hypre_MPI_GRequest_Action *action; hypre_MPI_GRequestGetCopyAction(buf, hypre_MPICommGetRecvLocation(comm), rbuf, hypre_MPICommGetRecvBufferLocation(comm), @@ -1569,6 +1561,7 @@ hypre_MPI_Irecv_Multiple( void *buf, hypre_grequest_noop_cancel_fn, action, extra_request); hypre_MPI_Grequest_complete(*extra_request); + hypre_MPICommSetPostRecvRequest(comm, extra_request); } return hypre_error_flag; @@ -1597,14 +1590,8 @@ hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, - hypre_MPI_Request *requests, - hypre_MPI_Request *extra_request) + hypre_MPI_Request *requests ) { - if (extra_request) - { - *extra_request = hypre_MPI_REQUEST_NULL; - } - if (!num) { return hypre_error_flag; @@ -1617,6 +1604,7 @@ hypre_MPI_Send_init_Multiple( void *buf, void *sbuf = cbuf ? cbuf : buf; if (sbuf != buf) { + hypre_MPI_Request *extra_request = hypre_CTAlloc(hypre_MPI_Request, 1, HYPRE_MEMORY_HOST); hypre_MPI_GRequest_Action *action; hypre_MPI_GRequestGetCopyAction(sbuf, hypre_MPICommGetSendBufferLocation(comm), buf, hypre_MPICommGetSendLocation(comm), @@ -1627,6 +1615,7 @@ hypre_MPI_Send_init_Multiple( void *buf, hypre_grequest_noop_cancel_fn, action, extra_request); hypre_MPI_Grequest_complete(*extra_request); + hypre_MPICommSetPreSendRequest(comm, extra_request); } HYPRE_Int i; @@ -1663,19 +1652,13 @@ hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, - hypre_MPI_Request *requests, - hypre_MPI_Request *extra_request) + hypre_MPI_Request *requests ) { if (!num) { return hypre_error_flag; } - if (extra_request) - { - *extra_request = hypre_MPI_REQUEST_NULL; - } - HYPRE_Int data_size, i; hypre_MPI_Type_size(datatype, &data_size); @@ -1691,6 +1674,7 @@ hypre_MPI_Recv_init_Multiple( void *buf, if (rbuf != buf) { + hypre_MPI_Request *extra_request = hypre_CTAlloc(hypre_MPI_Request, 1, HYPRE_MEMORY_HOST); hypre_MPI_GRequest_Action *action; hypre_MPI_GRequestGetCopyAction(buf, hypre_MPICommGetRecvLocation(comm), rbuf, hypre_MPICommGetRecvBufferLocation(comm), @@ -1701,6 +1685,7 @@ hypre_MPI_Recv_init_Multiple( void *buf, hypre_grequest_noop_cancel_fn, action, extra_request); hypre_MPI_Grequest_complete(*extra_request); + hypre_MPICommSetPostRecvRequest(comm, extra_request); } return hypre_error_flag; @@ -1986,7 +1971,8 @@ hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location) } HYPRE_Int -hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) +hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, + hypre_MemoryLocation location) { hypre_Handle *handle = hypre_handle(); hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendLocation(handle), (void *) location); @@ -2015,7 +2001,8 @@ hypre_MPICommDeleteSendLocation(hypre_MPI_Comm comm) } HYPRE_Int -hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) +hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, + hypre_MemoryLocation location) { hypre_Handle *handle = hypre_handle(); hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvLocation(handle), (void *) location); @@ -2044,7 +2031,8 @@ hypre_MPICommDeleteRecvLocation(hypre_MPI_Comm comm) } HYPRE_Int -hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) +hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, + hypre_MemoryLocation location) { hypre_Handle *handle = hypre_handle(); hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendBufferLocation(handle), (void *) location); @@ -2102,7 +2090,8 @@ hypre_MPICommDeleteRecvBufferLocation(hypre_MPI_Comm comm) } HYPRE_Int -hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void *buffer) +hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, + void *buffer) { hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendBuffer(hypre_handle()), buffer); return hypre_error_flag; @@ -2155,6 +2144,62 @@ hypre_MPICommDeleteRecvBuffer(hypre_MPI_Comm comm) return hypre_error_flag; } +HYPRE_Int +hypre_MPICommSetPreSendRequest(hypre_MPI_Comm comm, + hypre_MPI_Request *request) +{ + hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyPreSendRequest(hypre_handle()), request); + return hypre_error_flag; +} + +hypre_MPI_Request * +hypre_MPICommGetPreSendRequest(hypre_MPI_Comm comm) +{ + HYPRE_Int flag; + hypre_MPI_Request *request = NULL; + hypre_MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyPreSendRequest(hypre_handle()), &request, &flag); + if (!flag) + { + request = NULL; + } + return (request); +} + +HYPRE_Int +hypre_MPICommDeletePreSendRequest(hypre_MPI_Comm comm) +{ + hypre_MPI_Comm_delete_attr(comm, hypre_HandleMPICommKeyPreSendRequest(hypre_handle())); + return hypre_error_flag; +} + +HYPRE_Int +hypre_MPICommSetPostRecvRequest(hypre_MPI_Comm comm, + hypre_MPI_Request *request) +{ + hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyPostRecvRequest(hypre_handle()), request); + return hypre_error_flag; +} + +hypre_MPI_Request * +hypre_MPICommGetPostRecvRequest(hypre_MPI_Comm comm) +{ + HYPRE_Int flag; + hypre_MPI_Request *request = NULL; + hypre_MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyPostRecvRequest(hypre_handle()), &request, &flag); + if (!flag) + { + request = NULL; + } + return (request); +} + +HYPRE_Int +hypre_MPICommDeletePostRecvRequest(hypre_MPI_Comm comm) +{ + hypre_MPI_Comm_delete_attr(comm, hypre_HandleMPICommKeyPostRecvRequest(hypre_handle())); + return hypre_error_flag; +} + HYPRE_Int hypre_MPI_GRequestGetCopyAction(void *dest, hypre_MemoryLocation dest_location, diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 743210199d..7cf6518d6a 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -392,14 +392,11 @@ HYPRE_Int hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location); HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests, -hypre_MPI_Request *extra_request ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests, -hypre_MPI_Request *extra_request ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests, -hypre_MPI_Request *extra_request ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, @@ -425,6 +422,8 @@ hypre_MemoryLocation hypre_MPICommGetSendBufferLocation(hypre_MPI_Comm comm); hypre_MemoryLocation hypre_MPICommGetRecvBufferLocation(hypre_MPI_Comm comm); void* hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm); void* hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm); +hypre_MPI_Request* hypre_MPICommGetPreSendRequest(hypre_MPI_Comm comm); +hypre_MPI_Request* hypre_MPICommGetPostRecvRequest(hypre_MPI_Comm comm); HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); @@ -432,6 +431,8 @@ HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLo HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); +HYPRE_Int hypre_MPICommSetPreSendRequest(hypre_MPI_Comm comm, hypre_MPI_Request *request); +HYPRE_Int hypre_MPICommSetPostRecvRequest(hypre_MPI_Comm comm, hypre_MPI_Request *request); HYPRE_Int hypre_MPICommDeleteSendLocation(hypre_MPI_Comm comm); HYPRE_Int hypre_MPICommDeleteRecvLocation(hypre_MPI_Comm comm); @@ -439,6 +440,8 @@ HYPRE_Int hypre_MPICommDeleteSendBufferLocation(hypre_MPI_Comm comm); HYPRE_Int hypre_MPICommDeleteRecvBufferLocation(hypre_MPI_Comm comm); HYPRE_Int hypre_MPICommDeleteSendBuffer(hypre_MPI_Comm comm); HYPRE_Int hypre_MPICommDeleteRecvBuffer(hypre_MPI_Comm comm); +HYPRE_Int hypre_MPICommDeletePreSendRequest(hypre_MPI_Comm comm); +HYPRE_Int hypre_MPICommDeletePostRecvRequest(hypre_MPI_Comm comm); hypre_int hypre_grequest_free_fn(void *extra_state); hypre_int hypre_grequest_query_fn(void *extra_state, hypre_MPI_Status *status); From ea2d51a7d4b44b554ca5065a101859079dad4b16 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Mon, 10 Feb 2025 17:50:44 -0800 Subject: [PATCH 71/90] fix leak --- src/parcsr_mv/par_csr_communication.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 949388e98f..c173775790 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -383,7 +383,10 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) return hypre_error_flag; } - HYPRE_Int persistent = hypre_ParCSRCommHandlePersistent(comm_handle); + MPI_Comm comm = hypre_ParCSRCommHandleComm(comm_handle); + hypre_MPI_Request *post_recv_request = hypre_MPICommGetPostRecvRequest(comm); + hypre_MPI_Request *pre_send_request = hypre_MPICommGetPreSendRequest(comm); + HYPRE_Int persistent = hypre_ParCSRCommHandlePersistent(comm_handle); if (!persistent) { @@ -404,9 +407,6 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) if (persistent) { - MPI_Comm comm = hypre_ParCSRCommHandleComm(comm_handle); - hypre_MPI_Request *post_recv_request = hypre_MPICommGetPostRecvRequest(comm); - hypre_MPI_Request *pre_send_request = hypre_MPICommGetPreSendRequest(comm); if (post_recv_request) { hypre_MPI_Request_free(post_recv_request); } if (pre_send_request) { hypre_MPI_Request_free(pre_send_request); } } @@ -423,7 +423,10 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) hypre_MPICommDeletePreSendRequest(hypre_ParCSRCommHandleComm(comm_handle)); */ - hypre_MPI_Comm_free(&hypre_ParCSRCommHandleComm(comm_handle)); + hypre_TFree(post_recv_request, HYPRE_MEMORY_HOST); + hypre_TFree(pre_send_request, HYPRE_MEMORY_HOST); + + hypre_MPI_Comm_free(&comm); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); return hypre_error_flag; From 800302ebf107460fdaf74f114019cc76efe8f119 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Mon, 10 Feb 2025 18:01:46 -0800 Subject: [PATCH 72/90] fix error --- src/parcsr_mv/par_csr_communication.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index c173775790..7d042b4b87 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -135,8 +135,14 @@ hypre_ParCSRCommPkgGetPersistentCommHandle( HYPRE_Int job, HYPRE_Int hypre_ParCSRPersistentCommHandleStart( hypre_ParCSRCommHandle *comm_handle ) { - HYPRE_Int flag; - hypre_MPI_Request_get_status(hypre_ParCSRCommHandleExtraRequest(comm_handle, 1), &flag, MPI_STATUS_IGNORE); + MPI_Comm comm = hypre_ParCSRCommHandleComm(comm_handle); + hypre_MPI_Request *pre_send_request = hypre_MPICommGetPreSendRequest(comm); + HYPRE_Int flag; + + if (pre_send_request) + { + hypre_MPI_Request_get_status(pre_send_request, &flag, MPI_STATUS_IGNORE); + } if (hypre_ParCSRCommHandleNumRequests(comm_handle) > 0) { From 8cd394473e3ced31bcfbecf05060407997c86612 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Tue, 11 Feb 2025 09:43:57 -0800 Subject: [PATCH 73/90] name changes --- src/utilities/_hypre_utilities.h | 10 +++++----- src/utilities/handle.h | 8 ++++---- src/utilities/mpistubs.c | 2 +- src/utilities/mpistubs.h | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 3e477389d4..22547f31e2 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -797,7 +797,7 @@ typedef struct HYPRE_Int use_gpu_aware_mpi; #endif - hypre_MemoryLocation mpi_host_buffer_location; + hypre_MemoryLocation mpi_copy_buffer_location; #if defined(HYPRE_USING_GPU) hypre_DeviceData *device_data; @@ -850,11 +850,11 @@ typedef struct #define hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[3]) #define hypre_HandleMPICommKeySendBuffer(hypre_handle) ((hypre_handle) -> comm_keys[4]) #define hypre_HandleMPICommKeyRecvBuffer(hypre_handle) ((hypre_handle) -> comm_keys[5]) -#define hypre_HandleMPICommKeyPreSendRequest(hypre_handle) ((hypre_handle) -> comm_keys[6]) -#define hypre_HandleMPICommKeyPostRecvRequest(hypre_handle) ((hypre_handle) -> comm_keys[7]) +#define hypre_HandleMPICommKeyPreSendRequest(hypre_handle) ((hypre_handle) -> comm_keys[6]) +#define hypre_HandleMPICommKeyPostRecvRequest(hypre_handle) ((hypre_handle) -> comm_keys[7]) #define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) -#define hypre_HandleMPIHostBufferLocation(hypre_handle) ((hypre_handle) -> mpi_host_buffer_location) +#define hypre_HandleMPICopyBufferLocation(hypre_handle) ((hypre_handle) -> mpi_copy_buffer_location) #define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) @@ -1485,7 +1485,7 @@ HYPRE_Int hypre_MPI_GRequestGetCopyAction(void *dest, hypre_MemoryLocation dest_ HYPRE_Int num_bytes, hypre_MPI_GRequest_Action **action_ptr); HYPRE_Int hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action); HYPRE_Int hypre_MPI_GRequestDestroyAction(hypre_MPI_GRequest_Action *action); -HYPRE_Int hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location); +HYPRE_Int hypre_NeedMPICopyBuffer(hypre_MemoryLocation memory_location); HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, diff --git a/src/utilities/handle.h b/src/utilities/handle.h index e58026e2ea..0c4d1ca266 100644 --- a/src/utilities/handle.h +++ b/src/utilities/handle.h @@ -53,7 +53,7 @@ typedef struct HYPRE_Int use_gpu_aware_mpi; #endif - hypre_MemoryLocation mpi_host_buffer_location; + hypre_MemoryLocation mpi_copy_buffer_location; #if defined(HYPRE_USING_GPU) hypre_DeviceData *device_data; @@ -106,11 +106,11 @@ typedef struct #define hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[3]) #define hypre_HandleMPICommKeySendBuffer(hypre_handle) ((hypre_handle) -> comm_keys[4]) #define hypre_HandleMPICommKeyRecvBuffer(hypre_handle) ((hypre_handle) -> comm_keys[5]) -#define hypre_HandleMPICommKeyPreSendRequest(hypre_handle) ((hypre_handle) -> comm_keys[6]) -#define hypre_HandleMPICommKeyPostRecvRequest(hypre_handle) ((hypre_handle) -> comm_keys[7]) +#define hypre_HandleMPICommKeyPreSendRequest(hypre_handle) ((hypre_handle) -> comm_keys[6]) +#define hypre_HandleMPICommKeyPostRecvRequest(hypre_handle) ((hypre_handle) -> comm_keys[7]) #define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) -#define hypre_HandleMPIHostBufferLocation(hypre_handle) ((hypre_handle) -> mpi_host_buffer_location) +#define hypre_HandleMPICopyBufferLocation(hypre_handle) ((hypre_handle) -> mpi_copy_buffer_location) #define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 1519245020..32db462fed 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -1955,7 +1955,7 @@ hypre_MPI_Info_free( hypre_MPI_Info *info ) #endif HYPRE_Int -hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location) +hypre_NeedMPICopyBuffer(hypre_MemoryLocation memory_location) { #if defined(HYPRE_USING_GPU) return !hypre_GetGpuAwareMPI() && diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 7cf6518d6a..acf436c3d0 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -388,7 +388,7 @@ HYPRE_Int hypre_MPI_GRequestGetCopyAction(void *dest, hypre_MemoryLocation dest_ HYPRE_Int num_bytes, hypre_MPI_GRequest_Action **action_ptr); HYPRE_Int hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action); HYPRE_Int hypre_MPI_GRequestDestroyAction(hypre_MPI_GRequest_Action *action); -HYPRE_Int hypre_MPINeedHostBuffer(hypre_MemoryLocation memory_location); +HYPRE_Int hypre_NeedMPICopyBuffer(hypre_MemoryLocation memory_location); HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, From cc82c2076d1cf97a4ae9c07e7b02725208aa97d4 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Tue, 11 Feb 2025 10:07:57 -0800 Subject: [PATCH 74/90] add debug info --- src/parcsr_mv/par_csr_communication.c | 97 +++++++++++++++++++-------- src/struct_mv/struct_communication.c | 77 +++++++++++++++++---- src/utilities/mpistubs.c | 2 +- 3 files changed, 135 insertions(+), 41 deletions(-) diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 7d042b4b87..be7c7970cc 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -49,18 +49,34 @@ hypre_ParCSRCommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_locat hypre_MemoryLocation send_memory_alocation = hypre_GetActualMemLocation(send_memory_location); hypre_MemoryLocation recv_memory_alocation = hypre_GetActualMemLocation(recv_memory_location); - if (!hypre_ParCSRCommHandleSendBuffer(comm_handle) && hypre_MPINeedHostBuffer(send_memory_alocation)) + if (hypre_NeedMPICopyBuffer(send_memory_alocation)) { - hypre_MemoryLocation location = hypre_HandleMPIHostBufferLocation(hypre_handle()); - hypre_ParCSRCommHandleSendBufferLocation(comm_handle) = location; - hypre_ParCSRCommHandleSendBuffer(comm_handle) = _hypre_TAlloc(char, num_send_elems * size_of_elem, location); + if (!hypre_ParCSRCommHandleSendBuffer(comm_handle)) + { + hypre_MemoryLocation location = hypre_HandleMPICopyBufferLocation(hypre_handle()); + hypre_ParCSRCommHandleSendBufferLocation(comm_handle) = location; + hypre_ParCSRCommHandleSendBuffer(comm_handle) = _hypre_TAlloc(char, num_send_elems * size_of_elem, + location); + } + else + { + hypre_printf("[%s, %d] ParCSRCommHandleSendBuffer existed!\n", __FILE__, __LINE__); + } } - if (!hypre_ParCSRCommHandleRecvBuffer(comm_handle) && hypre_MPINeedHostBuffer(recv_memory_alocation)) + if (hypre_NeedMPICopyBuffer(recv_memory_alocation)) { - hypre_MemoryLocation location = hypre_HandleMPIHostBufferLocation(hypre_handle()); - hypre_ParCSRCommHandleRecvBufferLocation(comm_handle) = location; - hypre_ParCSRCommHandleRecvBuffer(comm_handle) = _hypre_TAlloc(char, num_recv_elems * size_of_elem, location); + if (!hypre_ParCSRCommHandleRecvBuffer(comm_handle)) + { + hypre_MemoryLocation location = hypre_HandleMPICopyBufferLocation(hypre_handle()); + hypre_ParCSRCommHandleRecvBufferLocation(comm_handle) = location; + hypre_ParCSRCommHandleRecvBuffer(comm_handle) = _hypre_TAlloc(char, num_recv_elems * size_of_elem, + location); + } + else + { + hypre_printf("[%s, %d] ParCSRCommHandleRecvBuffer existed!\n", __FILE__, __LINE__); + } } if (hypre_ParCSRCommHandlePersistent(comm_handle)) @@ -76,13 +92,39 @@ hypre_ParCSRCommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_locat } } - hypre_MPICommSetSendLocation(hypre_ParCSRCommHandleComm(comm_handle), send_memory_alocation); - hypre_MPICommSetSendBuffer(hypre_ParCSRCommHandleComm(comm_handle), hypre_ParCSRCommHandleSendBuffer(comm_handle)); - hypre_MPICommSetSendBufferLocation(hypre_ParCSRCommHandleComm(comm_handle), hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); - - hypre_MPICommSetRecvLocation(hypre_ParCSRCommHandleComm(comm_handle), recv_memory_alocation); - hypre_MPICommSetRecvBuffer(hypre_ParCSRCommHandleComm(comm_handle), hypre_ParCSRCommHandleRecvBuffer(comm_handle)); - hypre_MPICommSetRecvBufferLocation(hypre_ParCSRCommHandleComm(comm_handle), hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); + MPI_Comm comm = hypre_ParCSRCommHandleComm(comm_handle); +#if defined(HYPRE_DEBUG) + if (hypre_MPICommGetSendLocation(comm) != hypre_MEMORY_UNDEFINED) + { + hypre_printf("[%s, %d] MPI_Comm SendLocation existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetRecvLocation(comm) != hypre_MEMORY_UNDEFINED) + { + hypre_printf("[%s, %d] MPI_Comm RecvLocation existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetSendBuffer(comm)) + { + hypre_printf("[%s, %d] MPI_Comm SendBuffer existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetRecvBuffer(comm)) + { + hypre_printf("[%s, %d] MPI_Comm RecvBuffer existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetSendBufferLocation(comm) != hypre_MEMORY_UNDEFINED) + { + hypre_printf("[%s, %d] MPI_Comm SendBufferLocation existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetRecvBufferLocation(comm) != hypre_MEMORY_UNDEFINED) + { + hypre_printf("[%s, %d] MPI_Comm RecvBufferLocation existed!\n", __FILE__, __LINE__); + } +#endif + hypre_MPICommSetSendLocation(comm, send_memory_alocation); + hypre_MPICommSetRecvLocation(comm, recv_memory_alocation); + hypre_MPICommSetSendBuffer(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); + hypre_MPICommSetRecvBuffer(comm, hypre_ParCSRCommHandleRecvBuffer(comm_handle)); + hypre_MPICommSetSendBufferLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); + hypre_MPICommSetRecvBufferLocation(comm, hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); return hypre_error_flag; } @@ -141,7 +183,7 @@ hypre_ParCSRPersistentCommHandleStart( hypre_ParCSRCommHandle *comm_handle ) if (pre_send_request) { - hypre_MPI_Request_get_status(pre_send_request, &flag, MPI_STATUS_IGNORE); + hypre_MPI_Request_get_status(*pre_send_request, &flag, MPI_STATUS_IGNORE); } if (hypre_ParCSRCommHandleNumRequests(comm_handle) > 0) @@ -417,17 +459,18 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) if (pre_send_request) { hypre_MPI_Request_free(pre_send_request); } } - /* attributes should be deleted when the communicator is being freed */ - /* - hypre_MPICommDeleteSendLocation(hypre_ParCSRCommHandleComm(comm_handle)); - hypre_MPICommDeleteRecvLocation(hypre_ParCSRCommHandleComm(comm_handle)); - hypre_MPICommDeleteSendBufferLocation(hypre_ParCSRCommHandleComm(comm_handle)); - hypre_MPICommDeleteRecvBufferLocation(hypre_ParCSRCommHandleComm(comm_handle)); - hypre_MPICommDeleteSendBuffer(hypre_ParCSRCommHandleComm(comm_handle)); - hypre_MPICommDeleteRecvBuffer(hypre_ParCSRCommHandleComm(comm_handle)); - hypre_MPICommDeletePostRecvRequest(hypre_ParCSRCommHandleComm(comm_handle)); - hypre_MPICommDeletePreSendRequest(hypre_ParCSRCommHandleComm(comm_handle)); - */ + /* attributes should be deleted when the communicator is being freed * + * but since we delete comm right after, so we don't .... */ + #if 0 + hypre_MPICommDeleteSendLocation(comm); + hypre_MPICommDeleteRecvLocation(comm); + hypre_MPICommDeleteSendBufferLocation(comm); + hypre_MPICommDeleteRecvBufferLocation(comm); + hypre_MPICommDeleteSendBuffer(comm); + hypre_MPICommDeleteRecvBuffer(comm); + if (post_recv_request) { hypre_MPICommDeletePostRecvRequest(comm); } + if (pre_send_request) { hypre_MPICommDeletePreSendRequest(comm); } + #endif hypre_TFree(post_recv_request, HYPRE_MEMORY_HOST); hypre_TFree(pre_send_request, HYPRE_MEMORY_HOST); diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index 5b0bcc446c..d3f67bb9f8 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -817,29 +817,68 @@ hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_location, hypre_MemoryLocation send_memory_alocation = hypre_GetActualMemLocation(send_memory_location); hypre_MemoryLocation recv_memory_alocation = hypre_GetActualMemLocation(recv_memory_location); - if (!hypre_CommHandleSendBuffersMPI(comm_handle) && hypre_MPINeedHostBuffer(send_memory_alocation)) + if (hypre_NeedMPICopyBuffer(send_memory_alocation)) { - hypre_MemoryLocation location = hypre_HandleMPIHostBufferLocation(hypre_handle()); - hypre_CommHandleSendBuffersMPILocation(comm_handle) = location; - hypre_CommHandleSendBuffersMPI(comm_handle) = _hypre_TAlloc(char, num_send_elems * size_of_elem, - location); + if (!hypre_CommHandleSendBuffersMPI(comm_handle)) + { + hypre_MemoryLocation location = hypre_HandleMPICopyBufferLocation(hypre_handle()); + hypre_CommHandleSendBuffersMPILocation(comm_handle) = location; + hypre_CommHandleSendBuffersMPI(comm_handle) = _hypre_TAlloc(char, num_send_elems *size_of_elem, + location); + } + else + { + hypre_printf("[%s, %d] CommHandleSendBufferMPI existed!\n", __FILE__, __LINE__); + } } - if (!hypre_CommHandleRecvBuffersMPI(comm_handle) && hypre_MPINeedHostBuffer(recv_memory_alocation)) + if (hypre_NeedMPICopyBuffer(recv_memory_alocation)) { - hypre_MemoryLocation location = hypre_HandleMPIHostBufferLocation(hypre_handle()); - hypre_CommHandleRecvBuffersMPILocation(comm_handle) = location; - hypre_CommHandleRecvBuffersMPI(comm_handle) = _hypre_TAlloc(char, num_recv_elems * size_of_elem, - location); + if (!hypre_CommHandleRecvBuffersMPI(comm_handle)) + { + hypre_MemoryLocation location = hypre_HandleMPICopyBufferLocation(hypre_handle()); + hypre_CommHandleRecvBuffersMPILocation(comm_handle) = location; + hypre_CommHandleRecvBuffersMPI(comm_handle) = _hypre_TAlloc(char, num_recv_elems *size_of_elem, + location); + } + else + { + hypre_printf("[%s, %d] CommHandleRecvBufferMPI existed!\n", __FILE__, __LINE__); + } } MPI_Comm comm = hypre_CommHandleComm(comm_handle); +#if defined(HYPRE_DEBUG) + if (hypre_MPICommGetSendLocation(comm) != hypre_MEMORY_UNDEFINED) + { + hypre_printf("[%s, %d] MPI_Comm SendLocation existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetRecvLocation(comm) != hypre_MEMORY_UNDEFINED) + { + hypre_printf("[%s, %d] MPI_Comm RecvLocation existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetSendBuffer(comm)) + { + hypre_printf("[%s, %d] MPI_Comm SendBuffer existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetRecvBuffer(comm)) + { + hypre_printf("[%s, %d] MPI_Comm RecvBuffer existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetSendBufferLocation(comm) != hypre_MEMORY_UNDEFINED) + { + hypre_printf("[%s, %d] MPI_Comm SendBufferLocation existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetRecvBufferLocation(comm) != hypre_MEMORY_UNDEFINED) + { + hypre_printf("[%s, %d] MPI_Comm RecvBufferLocation existed!\n", __FILE__, __LINE__); + } +#endif hypre_MPICommSetSendLocation(comm, send_memory_alocation); - hypre_MPICommSetSendBuffer(comm, hypre_CommHandleSendBuffersMPI(comm_handle)); - hypre_MPICommSetSendBufferLocation(comm, hypre_CommHandleSendBuffersMPILocation(comm_handle)); - hypre_MPICommSetRecvLocation(comm, recv_memory_alocation); + hypre_MPICommSetSendBuffer(comm, hypre_CommHandleSendBuffersMPI(comm_handle)); hypre_MPICommSetRecvBuffer(comm, hypre_CommHandleRecvBuffersMPI(comm_handle)); + hypre_MPICommSetSendBufferLocation(comm, hypre_CommHandleSendBuffersMPILocation(comm_handle)); hypre_MPICommSetRecvBufferLocation(comm, hypre_CommHandleRecvBuffersMPILocation(comm_handle)); return hypre_error_flag; @@ -1354,6 +1393,18 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) _hypre_TFree(hypre_CommHandleSendBuffersMPI(comm_handle), hypre_CommHandleSendBuffersMPILocation(comm_handle)); _hypre_TFree(hypre_CommHandleRecvBuffersMPI(comm_handle), hypre_CommHandleRecvBuffersMPILocation(comm_handle)); + /* attributes should be deleted when the communicator is being freed * + * but since we delete comm right after, so we don't .... */ + #if 1 + hypre_MPICommDeleteSendLocation(comm); + hypre_MPICommDeleteRecvLocation(comm); + hypre_MPICommDeleteSendBufferLocation(comm); + hypre_MPICommDeleteRecvBufferLocation(comm); + hypre_MPICommDeleteSendBuffer(comm); + hypre_MPICommDeleteRecvBuffer(comm); + if (post_recv_request) { hypre_MPICommDeletePostRecvRequest(comm); } + #endif + hypre_TFree(post_recv_request, HYPRE_MEMORY_HOST); hypre_MPI_Comm_free(&comm); diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 32db462fed..d16db28ba1 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -1966,7 +1966,7 @@ hypre_NeedMPICopyBuffer(hypre_MemoryLocation memory_location) so we always has a host buffer for MPI. O.w. make sure return Z E R O! */ - return 1; + return 0; #endif } From f2def9313fcb3e190bbde122125f073f37ecea5a Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Tue, 11 Feb 2025 13:48:07 -0800 Subject: [PATCH 75/90] fix mpi issue --- src/parcsr_ls/par_rap.c | 29 +++++++++++------------------ src/utilities/mpistubs.c | 2 +- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/src/parcsr_ls/par_rap.c b/src/parcsr_ls/par_rap.c index c432b36940..96760e985f 100644 --- a/src/parcsr_ls/par_rap.c +++ b/src/parcsr_ls/par_rap.c @@ -48,7 +48,6 @@ hypre_BoomerAMGBuildCoarseOperatorKT( hypre_ParCSRMatrix *RT, HYPRE_Int num_cols_offd_RT = hypre_CSRMatrixNumCols(RT_offd); HYPRE_Int num_rows_offd_RT = hypre_CSRMatrixNumRows(RT_offd); hypre_ParCSRCommPkg *comm_pkg_RT = hypre_ParCSRMatrixCommPkg(RT); - HYPRE_Int num_recvs_RT = 0; HYPRE_Int num_sends_RT = 0; HYPRE_Int *send_map_starts_RT = NULL; HYPRE_Int *send_map_elmts_RT = NULL; @@ -192,7 +191,6 @@ hypre_BoomerAMGBuildCoarseOperatorKT( hypre_ParCSRMatrix *RT, if (comm_pkg_RT) { - num_recvs_RT = hypre_ParCSRCommPkgNumRecvs(comm_pkg_RT); num_sends_RT = hypre_ParCSRCommPkgNumSends(comm_pkg_RT); send_map_starts_RT = hypre_ParCSRCommPkgSendMapStarts(comm_pkg_RT); send_map_elmts_RT = hypre_ParCSRCommPkgSendMapElmts(comm_pkg_RT); @@ -201,7 +199,6 @@ hypre_BoomerAMGBuildCoarseOperatorKT( hypre_ParCSRMatrix *RT, { hypre_MatvecCommPkgCreate(RT); comm_pkg_RT = hypre_ParCSRMatrixCommPkg(RT); - num_recvs_RT = hypre_ParCSRCommPkgNumRecvs(comm_pkg_RT); num_sends_RT = hypre_ParCSRCommPkgNumSends(comm_pkg_RT); send_map_starts_RT = hypre_ParCSRCommPkgSendMapStarts(comm_pkg_RT); send_map_elmts_RT = hypre_ParCSRCommPkgSendMapElmts(comm_pkg_RT); @@ -1040,16 +1037,15 @@ hypre_BoomerAMGBuildCoarseOperatorKT( hypre_ParCSRMatrix *RT, #endif RAP_ext_size = 0; - if (num_sends_RT || num_recvs_RT) - { - void *request; - hypre_ExchangeExternalRowsInit(RAP_int, comm_pkg_RT, &request); - RAP_ext = hypre_ExchangeExternalRowsWait(request); - RAP_ext_i = hypre_CSRMatrixI(RAP_ext); - RAP_ext_j = hypre_CSRMatrixBigJ(RAP_ext); - RAP_ext_data = hypre_CSRMatrixData(RAP_ext); - RAP_ext_size = RAP_ext_i[hypre_CSRMatrixNumRows(RAP_ext)]; - } + + void *request; + hypre_ExchangeExternalRowsInit(RAP_int, comm_pkg_RT, &request); + RAP_ext = hypre_ExchangeExternalRowsWait(request); + RAP_ext_i = hypre_CSRMatrixI(RAP_ext); + RAP_ext_j = hypre_CSRMatrixBigJ(RAP_ext); + RAP_ext_data = hypre_CSRMatrixData(RAP_ext); + RAP_ext_size = RAP_ext_i[hypre_CSRMatrixNumRows(RAP_ext)]; + if (num_cols_offd_RT) { hypre_CSRMatrixDestroy(RAP_int); @@ -2039,11 +2035,8 @@ hypre_BoomerAMGBuildCoarseOperatorKT( hypre_ParCSRMatrix *RT, R_offd = NULL; } - if (num_sends_RT || num_recvs_RT) - { - hypre_CSRMatrixDestroy(RAP_ext); - RAP_ext = NULL; - } + hypre_CSRMatrixDestroy(RAP_ext); + RAP_ext = NULL; hypre_TFree(P_mark_array, HYPRE_MEMORY_HOST); hypre_TFree(A_mark_array, HYPRE_MEMORY_HOST); hypre_TFree(P_ext_diag_i, HYPRE_MEMORY_HOST); diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index d16db28ba1..32db462fed 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -1966,7 +1966,7 @@ hypre_NeedMPICopyBuffer(hypre_MemoryLocation memory_location) so we always has a host buffer for MPI. O.w. make sure return Z E R O! */ - return 0; + return 1; #endif } From 105aa04151b3efb929fe66a93e3ef7b2f84315e0 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Tue, 11 Feb 2025 14:17:40 -0800 Subject: [PATCH 76/90] minor changes --- src/parcsr_mv/par_csr_communication.c | 4 ++++ src/struct_mv/struct_communication.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index be7c7970cc..3d3d21af7f 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -58,10 +58,12 @@ hypre_ParCSRCommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_locat hypre_ParCSRCommHandleSendBuffer(comm_handle) = _hypre_TAlloc(char, num_send_elems * size_of_elem, location); } + #if defined(HYPRE_DEBUG) else { hypre_printf("[%s, %d] ParCSRCommHandleSendBuffer existed!\n", __FILE__, __LINE__); } + #endif } if (hypre_NeedMPICopyBuffer(recv_memory_alocation)) @@ -73,10 +75,12 @@ hypre_ParCSRCommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_locat hypre_ParCSRCommHandleRecvBuffer(comm_handle) = _hypre_TAlloc(char, num_recv_elems * size_of_elem, location); } + #if defined(HYPRE_DEBUG) else { hypre_printf("[%s, %d] ParCSRCommHandleRecvBuffer existed!\n", __FILE__, __LINE__); } + #endif } if (hypre_ParCSRCommHandlePersistent(comm_handle)) diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index d3f67bb9f8..19632e5f94 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -826,10 +826,12 @@ hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_location, hypre_CommHandleSendBuffersMPI(comm_handle) = _hypre_TAlloc(char, num_send_elems *size_of_elem, location); } +#if defined(HYPRE_DEBUG) else { hypre_printf("[%s, %d] CommHandleSendBufferMPI existed!\n", __FILE__, __LINE__); } +#endif } if (hypre_NeedMPICopyBuffer(recv_memory_alocation)) @@ -841,10 +843,12 @@ hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_location, hypre_CommHandleRecvBuffersMPI(comm_handle) = _hypre_TAlloc(char, num_recv_elems *size_of_elem, location); } +#if defined(HYPRE_DEBUG) else { hypre_printf("[%s, %d] CommHandleRecvBufferMPI existed!\n", __FILE__, __LINE__); } +#endif } MPI_Comm comm = hypre_CommHandleComm(comm_handle); From 32b4ade66e4cdd02f89d127b816030e6b332f4eb Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Wed, 12 Feb 2025 08:54:23 -0800 Subject: [PATCH 77/90] further simplify code (save this version) --- src/parcsr_mv/_hypre_parcsr_mv.h | 27 +++++ src/parcsr_mv/par_csr_communication.c | 145 +++++++++++--------------- src/parcsr_mv/par_csr_communication.h | 27 +++++ 3 files changed, 112 insertions(+), 87 deletions(-) diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index 1ec1418153..55da70ab2f 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -69,6 +69,33 @@ hypre_ParCSRCommHandleGetJobType(HYPRE_Int job) return job_type; } +static inline HYPRE_Int +hypre_ParCSRCommHandleIsTransposeJob(HYPRE_Int job) +{ + HYPRE_Int trans = 0; + + switch (hypre_ParCSRCommHandleGetJobType(job)) + { + case HYPRE_COMM_PKG_JOB_COMPLEX: + case HYPRE_COMM_PKG_JOB_INT: + case HYPRE_COMM_PKG_JOB_BIGINT: + { + trans = 0; + break; + } + case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: + case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: + case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: + { + trans = 1; + break; + } + default: + break; + } + return trans; +} + static inline hypre_MPI_Datatype hypre_ParCSRCommHandleGetMPIDataType(HYPRE_Int job) { diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 3d3d21af7f..afa17a4885 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -260,6 +260,12 @@ hypre_ParCSRCommHandleCreate_core ( HYPRE_Int persistent, HYPRE_Int num_send_elems = 0; HYPRE_Int num_recv_elems = 0; HYPRE_Int data_size; + HYPRE_Int mpi_num_recvs = 0; + HYPRE_Int mpi_num_sends = 0; + HYPRE_Int *mpi_send_displs = NULL; + HYPRE_Int *mpi_recv_displs = NULL; + HYPRE_Int *mpi_send_procs = NULL; + HYPRE_Int *mpi_recv_procs = NULL; hypre_MPI_Comm_dup(comm_orig, &comm); hypre_MPI_Type_size(mpi_dtype, &data_size); @@ -267,96 +273,61 @@ hypre_ParCSRCommHandleCreate_core ( HYPRE_Int persistent, hypre_ParCSRCommHandlePersistent(comm_handle) = persistent; hypre_ParCSRCommHandleComm(comm_handle) = comm; - switch (hypre_ParCSRCommHandleGetJobType(job)) + if (hypre_ParCSRCommHandleIsTransposeJob(job)) { - case HYPRE_COMM_PKG_JOB_COMPLEX: - case HYPRE_COMM_PKG_JOB_INT: - case HYPRE_COMM_PKG_JOB_BIGINT: - { - num_send_elems = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - num_recv_elems = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - hypre_ParCSRCommHandleAllocateBuffers(send_memory_location, recv_memory_location, - num_send_elems, num_recv_elems, data_size, comm_handle); - - if (persistent) - { - hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), - num_recvs, - hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), - NULL, mpi_dtype, - hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, comm, requests); - - hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), - num_sends, - hypre_ParCSRCommPkgSendMapStarts(comm_pkg), - NULL, mpi_dtype, - hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, comm, requests + num_recvs); - } - else - { - hypre_MPI_Irecv_Multiple(recv_data, num_recvs, - hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), - NULL, mpi_dtype, - hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, comm, requests); - - hypre_MPI_Isend_Multiple(send_data, num_sends, - hypre_ParCSRCommPkgSendMapStarts(comm_pkg), - NULL, mpi_dtype, - hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, comm, requests + num_recvs); - } - - break; - } - - case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: - case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: - case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: - { - num_send_elems = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - num_recv_elems = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - hypre_ParCSRCommHandleAllocateBuffers(send_memory_location, recv_memory_location, - num_send_elems, num_recv_elems, data_size, comm_handle); - - if (persistent) - { - hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), - num_sends, - hypre_ParCSRCommPkgSendMapStarts(comm_pkg), - NULL, mpi_dtype, - hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, comm, requests); - - hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), - num_recvs, - hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), - NULL, mpi_dtype, - hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, comm, requests + num_sends); - } - else - { - hypre_MPI_Irecv_Multiple(recv_data, num_sends, - hypre_ParCSRCommPkgSendMapStarts(comm_pkg), - NULL, mpi_dtype, - hypre_ParCSRCommPkgSendProcs(comm_pkg), - 0, comm, requests); - - hypre_MPI_Isend_Multiple(send_data, num_recvs, - hypre_ParCSRCommPkgRecvVecStarts(comm_pkg), - NULL, mpi_dtype, - hypre_ParCSRCommPkgRecvProcs(comm_pkg), - 0, comm, requests + num_sends); - } + mpi_num_recvs = num_sends; + mpi_num_sends = num_recvs; + mpi_recv_displs = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); + mpi_send_displs = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); + mpi_recv_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); + mpi_send_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); + } + else + { + mpi_num_recvs = num_recvs; + mpi_num_sends = num_sends; + mpi_recv_displs = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); + mpi_send_displs = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); + mpi_recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); + mpi_send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); + } + num_recv_elems = mpi_recv_displs[mpi_num_recvs]; + num_send_elems = mpi_send_displs[mpi_num_sends]; - break; - } + hypre_ParCSRCommHandleAllocateBuffers(send_memory_location, recv_memory_location, + num_send_elems, num_recv_elems, data_size, comm_handle); - default: - break; + if (persistent) + { + hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), + mpi_num_recvs, + mpi_recv_displs, + NULL, mpi_dtype, + mpi_recv_procs, + 0, comm, requests); + + hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), + mpi_num_sends, + mpi_send_displs, + NULL, mpi_dtype, + mpi_send_procs, + 0, comm, requests + mpi_num_recvs); + } + else + { + hypre_MPI_Irecv_Multiple(recv_data, + mpi_num_recvs, + mpi_recv_displs, + NULL, mpi_dtype, + mpi_recv_procs, + 0, comm, requests); + + hypre_MPI_Isend_Multiple(send_data, + mpi_num_sends, + mpi_send_displs, + NULL, mpi_dtype, + mpi_send_procs, + 0, comm, requests + mpi_num_recvs); } /*-------------------------------------------------------------------- diff --git a/src/parcsr_mv/par_csr_communication.h b/src/parcsr_mv/par_csr_communication.h index 549b892a28..5e29f949a8 100644 --- a/src/parcsr_mv/par_csr_communication.h +++ b/src/parcsr_mv/par_csr_communication.h @@ -53,6 +53,33 @@ hypre_ParCSRCommHandleGetJobType(HYPRE_Int job) return job_type; } +static inline HYPRE_Int +hypre_ParCSRCommHandleIsTransposeJob(HYPRE_Int job) +{ + HYPRE_Int trans = 0; + + switch (hypre_ParCSRCommHandleGetJobType(job)) + { + case HYPRE_COMM_PKG_JOB_COMPLEX: + case HYPRE_COMM_PKG_JOB_INT: + case HYPRE_COMM_PKG_JOB_BIGINT: + { + trans = 0; + break; + } + case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: + case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: + case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: + { + trans = 1; + break; + } + default: + break; + } + return trans; +} + static inline hypre_MPI_Datatype hypre_ParCSRCommHandleGetMPIDataType(HYPRE_Int job) { From 0d93e48659ce96dfb3d179ca1f9ed82305b9768f Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Wed, 12 Feb 2025 12:48:12 -0800 Subject: [PATCH 78/90] further simplify code --- src/parcsr_mv/_hypre_parcsr_mv.h | 9 ----- src/parcsr_mv/par_csr_communication.c | 57 +++++++++++---------------- src/parcsr_mv/par_csr_communication.h | 9 ----- src/struct_mv/_hypre_struct_mv.h | 9 ----- src/struct_mv/struct_communication.c | 57 +++++++++++---------------- src/struct_mv/struct_communication.h | 9 ----- 6 files changed, 46 insertions(+), 104 deletions(-) diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index 55da70ab2f..326a4f9fa5 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -133,13 +133,8 @@ typedef struct HYPRE_Int persistent; void *send_data; void *recv_data; - /* send/recv buffers to copy to/from */ - void *send_buffer; - void *recv_buffer; HYPRE_MemoryLocation send_location; HYPRE_MemoryLocation recv_location; - hypre_MemoryLocation send_buffer_location; - hypre_MemoryLocation recv_buffer_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; MPI_Comm comm; @@ -153,12 +148,8 @@ typedef struct #define hypre_ParCSRCommHandlePersistent(comm_handle) (comm_handle -> persistent) #define hypre_ParCSRCommHandleSendData(comm_handle) (comm_handle -> send_data) #define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) -#define hypre_ParCSRCommHandleSendBuffer(comm_handle) (comm_handle -> send_buffer) -#define hypre_ParCSRCommHandleRecvBuffer(comm_handle) (comm_handle -> recv_buffer) #define hypre_ParCSRCommHandleSendLocation(comm_handle) (comm_handle -> send_location) #define hypre_ParCSRCommHandleRecvLocation(comm_handle) (comm_handle -> recv_location) -#define hypre_ParCSRCommHandleSendBufferLocation(comm_handle) (comm_handle -> send_buffer_location) -#define hypre_ParCSRCommHandleRecvBufferLocation(comm_handle) (comm_handle -> recv_buffer_location) #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index afa17a4885..512f9540f6 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -46,41 +46,23 @@ hypre_ParCSRCommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_locat HYPRE_Int size_of_elem, hypre_ParCSRCommHandle *comm_handle ) { - hypre_MemoryLocation send_memory_alocation = hypre_GetActualMemLocation(send_memory_location); - hypre_MemoryLocation recv_memory_alocation = hypre_GetActualMemLocation(recv_memory_location); + hypre_MemoryLocation send_memory_alocation = hypre_GetActualMemLocation(send_memory_location); + hypre_MemoryLocation recv_memory_alocation = hypre_GetActualMemLocation(recv_memory_location); + hypre_MemoryLocation send_buffer_location = hypre_MEMORY_UNDEFINED; + hypre_MemoryLocation recv_buffer_location = hypre_MEMORY_UNDEFINED; + void *send_buffer = NULL; + void *recv_buffer = NULL; if (hypre_NeedMPICopyBuffer(send_memory_alocation)) { - if (!hypre_ParCSRCommHandleSendBuffer(comm_handle)) - { - hypre_MemoryLocation location = hypre_HandleMPICopyBufferLocation(hypre_handle()); - hypre_ParCSRCommHandleSendBufferLocation(comm_handle) = location; - hypre_ParCSRCommHandleSendBuffer(comm_handle) = _hypre_TAlloc(char, num_send_elems * size_of_elem, - location); - } - #if defined(HYPRE_DEBUG) - else - { - hypre_printf("[%s, %d] ParCSRCommHandleSendBuffer existed!\n", __FILE__, __LINE__); - } - #endif + send_buffer_location = hypre_HandleMPICopyBufferLocation(hypre_handle()); + send_buffer = _hypre_TAlloc(char, num_send_elems * size_of_elem, send_buffer_location); } if (hypre_NeedMPICopyBuffer(recv_memory_alocation)) { - if (!hypre_ParCSRCommHandleRecvBuffer(comm_handle)) - { - hypre_MemoryLocation location = hypre_HandleMPICopyBufferLocation(hypre_handle()); - hypre_ParCSRCommHandleRecvBufferLocation(comm_handle) = location; - hypre_ParCSRCommHandleRecvBuffer(comm_handle) = _hypre_TAlloc(char, num_recv_elems * size_of_elem, - location); - } - #if defined(HYPRE_DEBUG) - else - { - hypre_printf("[%s, %d] ParCSRCommHandleRecvBuffer existed!\n", __FILE__, __LINE__); - } - #endif + recv_buffer_location = hypre_HandleMPICopyBufferLocation(hypre_handle()); + recv_buffer = _hypre_TAlloc(char, num_recv_elems * size_of_elem, recv_buffer_location); } if (hypre_ParCSRCommHandlePersistent(comm_handle)) @@ -97,6 +79,7 @@ hypre_ParCSRCommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_locat } MPI_Comm comm = hypre_ParCSRCommHandleComm(comm_handle); + #if defined(HYPRE_DEBUG) if (hypre_MPICommGetSendLocation(comm) != hypre_MEMORY_UNDEFINED) { @@ -123,12 +106,13 @@ hypre_ParCSRCommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_locat hypre_printf("[%s, %d] MPI_Comm RecvBufferLocation existed!\n", __FILE__, __LINE__); } #endif + hypre_MPICommSetSendLocation(comm, send_memory_alocation); hypre_MPICommSetRecvLocation(comm, recv_memory_alocation); - hypre_MPICommSetSendBuffer(comm, hypre_ParCSRCommHandleSendBuffer(comm_handle)); - hypre_MPICommSetRecvBuffer(comm, hypre_ParCSRCommHandleRecvBuffer(comm_handle)); - hypre_MPICommSetSendBufferLocation(comm, hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); - hypre_MPICommSetRecvBufferLocation(comm, hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); + hypre_MPICommSetSendBuffer(comm, send_buffer); + hypre_MPICommSetRecvBuffer(comm, recv_buffer); + hypre_MPICommSetSendBufferLocation(comm, send_buffer_location); + hypre_MPICommSetRecvBufferLocation(comm, recv_buffer_location); return hypre_error_flag; } @@ -416,8 +400,13 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) hypre_ParCSRCommHandleWait(comm_handle); } - _hypre_TFree(hypre_ParCSRCommHandleSendBuffer(comm_handle), hypre_ParCSRCommHandleSendBufferLocation(comm_handle)); - _hypre_TFree(hypre_ParCSRCommHandleRecvBuffer(comm_handle), hypre_ParCSRCommHandleRecvBufferLocation(comm_handle)); + hypre_MemoryLocation send_buffer_location = hypre_MPICommGetSendBufferLocation(comm); + hypre_MemoryLocation recv_buffer_location = hypre_MPICommGetRecvBufferLocation(comm); + void *send_buffer = hypre_MPICommGetSendBuffer(comm); + void *recv_buffer = hypre_MPICommGetRecvBuffer(comm); + + _hypre_TFree(send_buffer, send_buffer_location); + _hypre_TFree(recv_buffer, recv_buffer_location); if (persistent) diff --git a/src/parcsr_mv/par_csr_communication.h b/src/parcsr_mv/par_csr_communication.h index 5e29f949a8..4b74bb8990 100644 --- a/src/parcsr_mv/par_csr_communication.h +++ b/src/parcsr_mv/par_csr_communication.h @@ -117,13 +117,8 @@ typedef struct HYPRE_Int persistent; void *send_data; void *recv_data; - /* send/recv buffers to copy to/from */ - void *send_buffer; - void *recv_buffer; HYPRE_MemoryLocation send_location; HYPRE_MemoryLocation recv_location; - hypre_MemoryLocation send_buffer_location; - hypre_MemoryLocation recv_buffer_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; MPI_Comm comm; @@ -137,12 +132,8 @@ typedef struct #define hypre_ParCSRCommHandlePersistent(comm_handle) (comm_handle -> persistent) #define hypre_ParCSRCommHandleSendData(comm_handle) (comm_handle -> send_data) #define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) -#define hypre_ParCSRCommHandleSendBuffer(comm_handle) (comm_handle -> send_buffer) -#define hypre_ParCSRCommHandleRecvBuffer(comm_handle) (comm_handle -> recv_buffer) #define hypre_ParCSRCommHandleSendLocation(comm_handle) (comm_handle -> send_location) #define hypre_ParCSRCommHandleRecvLocation(comm_handle) (comm_handle -> recv_location) -#define hypre_ParCSRCommHandleSendBufferLocation(comm_handle) (comm_handle -> send_buffer_location) -#define hypre_ParCSRCommHandleRecvBufferLocation(comm_handle) (comm_handle -> recv_buffer_location) #define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) #define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) #define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) diff --git a/src/struct_mv/_hypre_struct_mv.h b/src/struct_mv/_hypre_struct_mv.h index f0f971c43e..5f92c15975 100644 --- a/src/struct_mv/_hypre_struct_mv.h +++ b/src/struct_mv/_hypre_struct_mv.h @@ -940,11 +940,6 @@ typedef struct hypre_CommHandle_struct HYPRE_Complex **send_buffers; HYPRE_Complex **recv_buffers; - void *send_buffers_mpi; - void *recv_buffers_mpi; - hypre_MemoryLocation send_buffers_mpi_location; - hypre_MemoryLocation recv_buffers_mpi_location; - /* set = 0, add = 1 */ HYPRE_Int action; MPI_Comm comm; @@ -1054,10 +1049,6 @@ typedef struct hypre_CommHandle_struct #define hypre_CommHandleSendBuffers(comm_handle) (comm_handle -> send_buffers) #define hypre_CommHandleRecvBuffers(comm_handle) (comm_handle -> recv_buffers) #define hypre_CommHandleAction(comm_handle) (comm_handle -> action) -#define hypre_CommHandleSendBuffersMPI(comm_handle) (comm_handle -> send_buffers_mpi) -#define hypre_CommHandleRecvBuffersMPI(comm_handle) (comm_handle -> recv_buffers_mpi) -#define hypre_CommHandleSendBuffersMPILocation(comm_handle) (comm_handle -> send_buffers_mpi_location) -#define hypre_CommHandleRecvBuffersMPILocation(comm_handle) (comm_handle -> recv_buffers_mpi_location) #define hypre_CommHandleComm(comm_handle) (comm_handle -> comm) #endif diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index 19632e5f94..b50f400278 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -814,44 +814,27 @@ hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_location, HYPRE_Int size_of_elem, hypre_CommHandle *comm_handle ) { - hypre_MemoryLocation send_memory_alocation = hypre_GetActualMemLocation(send_memory_location); - hypre_MemoryLocation recv_memory_alocation = hypre_GetActualMemLocation(recv_memory_location); + hypre_MemoryLocation send_memory_alocation = hypre_GetActualMemLocation(send_memory_location); + hypre_MemoryLocation recv_memory_alocation = hypre_GetActualMemLocation(recv_memory_location); + hypre_MemoryLocation send_buffer_location = hypre_MEMORY_UNDEFINED; + hypre_MemoryLocation recv_buffer_location = hypre_MEMORY_UNDEFINED; + void *send_buffer = NULL; + void *recv_buffer = NULL; if (hypre_NeedMPICopyBuffer(send_memory_alocation)) { - if (!hypre_CommHandleSendBuffersMPI(comm_handle)) - { - hypre_MemoryLocation location = hypre_HandleMPICopyBufferLocation(hypre_handle()); - hypre_CommHandleSendBuffersMPILocation(comm_handle) = location; - hypre_CommHandleSendBuffersMPI(comm_handle) = _hypre_TAlloc(char, num_send_elems *size_of_elem, - location); - } -#if defined(HYPRE_DEBUG) - else - { - hypre_printf("[%s, %d] CommHandleSendBufferMPI existed!\n", __FILE__, __LINE__); - } -#endif + send_buffer_location = hypre_HandleMPICopyBufferLocation(hypre_handle()); + send_buffer = _hypre_TAlloc(char, num_send_elems * size_of_elem, send_buffer_location); } if (hypre_NeedMPICopyBuffer(recv_memory_alocation)) { - if (!hypre_CommHandleRecvBuffersMPI(comm_handle)) - { - hypre_MemoryLocation location = hypre_HandleMPICopyBufferLocation(hypre_handle()); - hypre_CommHandleRecvBuffersMPILocation(comm_handle) = location; - hypre_CommHandleRecvBuffersMPI(comm_handle) = _hypre_TAlloc(char, num_recv_elems *size_of_elem, - location); - } -#if defined(HYPRE_DEBUG) - else - { - hypre_printf("[%s, %d] CommHandleRecvBufferMPI existed!\n", __FILE__, __LINE__); - } -#endif + recv_buffer_location = hypre_HandleMPICopyBufferLocation(hypre_handle()); + recv_buffer = _hypre_TAlloc(char, num_recv_elems * size_of_elem, recv_buffer_location); } MPI_Comm comm = hypre_CommHandleComm(comm_handle); + #if defined(HYPRE_DEBUG) if (hypre_MPICommGetSendLocation(comm) != hypre_MEMORY_UNDEFINED) { @@ -878,12 +861,13 @@ hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_location, hypre_printf("[%s, %d] MPI_Comm RecvBufferLocation existed!\n", __FILE__, __LINE__); } #endif + hypre_MPICommSetSendLocation(comm, send_memory_alocation); hypre_MPICommSetRecvLocation(comm, recv_memory_alocation); - hypre_MPICommSetSendBuffer(comm, hypre_CommHandleSendBuffersMPI(comm_handle)); - hypre_MPICommSetRecvBuffer(comm, hypre_CommHandleRecvBuffersMPI(comm_handle)); - hypre_MPICommSetSendBufferLocation(comm, hypre_CommHandleSendBuffersMPILocation(comm_handle)); - hypre_MPICommSetRecvBufferLocation(comm, hypre_CommHandleRecvBuffersMPILocation(comm_handle)); + hypre_MPICommSetSendBuffer(comm, send_buffer); + hypre_MPICommSetRecvBuffer(comm, recv_buffer); + hypre_MPICommSetSendBufferLocation(comm, send_buffer_location); + hypre_MPICommSetRecvBufferLocation(comm, recv_buffer_location); return hypre_error_flag; } @@ -1394,8 +1378,13 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_StructCommunicationReleaseBuffer(recv_buffers[0], memory_location); } - _hypre_TFree(hypre_CommHandleSendBuffersMPI(comm_handle), hypre_CommHandleSendBuffersMPILocation(comm_handle)); - _hypre_TFree(hypre_CommHandleRecvBuffersMPI(comm_handle), hypre_CommHandleRecvBuffersMPILocation(comm_handle)); + hypre_MemoryLocation send_buffer_location = hypre_MPICommGetSendBufferLocation(comm); + hypre_MemoryLocation recv_buffer_location = hypre_MPICommGetRecvBufferLocation(comm); + void *send_buffer = hypre_MPICommGetSendBuffer(comm); + void *recv_buffer = hypre_MPICommGetRecvBuffer(comm); + + _hypre_TFree(send_buffer, send_buffer_location); + _hypre_TFree(recv_buffer, recv_buffer_location); /* attributes should be deleted when the communicator is being freed * * but since we delete comm right after, so we don't .... */ diff --git a/src/struct_mv/struct_communication.h b/src/struct_mv/struct_communication.h index f56c175e8d..a4e87dfec8 100644 --- a/src/struct_mv/struct_communication.h +++ b/src/struct_mv/struct_communication.h @@ -142,11 +142,6 @@ typedef struct hypre_CommHandle_struct HYPRE_Complex **send_buffers; HYPRE_Complex **recv_buffers; - void *send_buffers_mpi; - void *recv_buffers_mpi; - hypre_MemoryLocation send_buffers_mpi_location; - hypre_MemoryLocation recv_buffers_mpi_location; - /* set = 0, add = 1 */ HYPRE_Int action; MPI_Comm comm; @@ -256,10 +251,6 @@ typedef struct hypre_CommHandle_struct #define hypre_CommHandleSendBuffers(comm_handle) (comm_handle -> send_buffers) #define hypre_CommHandleRecvBuffers(comm_handle) (comm_handle -> recv_buffers) #define hypre_CommHandleAction(comm_handle) (comm_handle -> action) -#define hypre_CommHandleSendBuffersMPI(comm_handle) (comm_handle -> send_buffers_mpi) -#define hypre_CommHandleRecvBuffersMPI(comm_handle) (comm_handle -> recv_buffers_mpi) -#define hypre_CommHandleSendBuffersMPILocation(comm_handle) (comm_handle -> send_buffers_mpi_location) -#define hypre_CommHandleRecvBuffersMPILocation(comm_handle) (comm_handle -> recv_buffers_mpi_location) #define hypre_CommHandleComm(comm_handle) (comm_handle -> comm) #endif From d05ec877e0b309400288943e10f2acfb3bd56fb4 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Wed, 12 Feb 2025 17:10:32 -0800 Subject: [PATCH 79/90] another way to impl. wrap MPI_COMM --- src/parcsr_mv/_hypre_parcsr_mv.h | 2 +- src/parcsr_mv/par_csr_communication.c | 17 +- src/parcsr_mv/par_csr_communication.h | 2 +- src/struct_mv/_hypre_struct_mv.h | 2 +- src/struct_mv/struct_communication.c | 37 ++-- src/struct_mv/struct_communication.h | 2 +- src/utilities/_hypre_utilities.h | 98 +++++----- src/utilities/general.c | 13 -- src/utilities/handle.h | 13 -- src/utilities/mpistubs.c | 267 +++++++++++--------------- src/utilities/mpistubs.h | 85 +++++--- 11 files changed, 249 insertions(+), 289 deletions(-) diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index 326a4f9fa5..cf7d1ea9c6 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -137,7 +137,7 @@ typedef struct HYPRE_MemoryLocation recv_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; - MPI_Comm comm; + hypre_MPICommWrapper *comm; } hypre_ParCSRCommHandle; /*-------------------------------------------------------------------------- diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 512f9540f6..2670db7051 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -78,7 +78,7 @@ hypre_ParCSRCommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_locat } } - MPI_Comm comm = hypre_ParCSRCommHandleComm(comm_handle); + hypre_MPICommWrapper *comm = hypre_ParCSRCommHandleComm(comm_handle); #if defined(HYPRE_DEBUG) if (hypre_MPICommGetSendLocation(comm) != hypre_MEMORY_UNDEFINED) @@ -236,7 +236,7 @@ hypre_ParCSRCommHandleCreate_core ( HYPRE_Int persistent, HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); MPI_Comm comm_orig = hypre_ParCSRCommPkgComm(comm_pkg); - MPI_Comm comm; + hypre_MPICommWrapper *comm = hypre_MPICommWrapperCreate(comm_orig); HYPRE_Int num_requests = num_sends + num_recvs; hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); @@ -251,7 +251,6 @@ hypre_ParCSRCommHandleCreate_core ( HYPRE_Int persistent, HYPRE_Int *mpi_send_procs = NULL; HYPRE_Int *mpi_recv_procs = NULL; - hypre_MPI_Comm_dup(comm_orig, &comm); hypre_MPI_Type_size(mpi_dtype, &data_size); hypre_ParCSRCommHandlePersistent(comm_handle) = persistent; @@ -359,7 +358,7 @@ hypre_ParCSRCommHandleWait( hypre_ParCSRCommHandle *comm_handle ) } } - MPI_Comm comm = hypre_ParCSRCommHandleComm(comm_handle); + hypre_MPICommWrapper *comm = hypre_ParCSRCommHandleComm(comm_handle); hypre_MPI_Request *post_recv_request = hypre_MPICommGetPostRecvRequest(comm); if (post_recv_request) @@ -390,10 +389,10 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) return hypre_error_flag; } - MPI_Comm comm = hypre_ParCSRCommHandleComm(comm_handle); - hypre_MPI_Request *post_recv_request = hypre_MPICommGetPostRecvRequest(comm); - hypre_MPI_Request *pre_send_request = hypre_MPICommGetPreSendRequest(comm); - HYPRE_Int persistent = hypre_ParCSRCommHandlePersistent(comm_handle); + hypre_MPICommWrapper *comm = hypre_ParCSRCommHandleComm(comm_handle); + hypre_MPI_Request *post_recv_request = hypre_MPICommGetPostRecvRequest(comm); + hypre_MPI_Request *pre_send_request = hypre_MPICommGetPreSendRequest(comm); + HYPRE_Int persistent = hypre_ParCSRCommHandlePersistent(comm_handle); if (!persistent) { @@ -439,7 +438,7 @@ hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) hypre_TFree(post_recv_request, HYPRE_MEMORY_HOST); hypre_TFree(pre_send_request, HYPRE_MEMORY_HOST); - hypre_MPI_Comm_free(&comm); + hypre_TFree(comm, HYPRE_MEMORY_HOST); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); return hypre_error_flag; diff --git a/src/parcsr_mv/par_csr_communication.h b/src/parcsr_mv/par_csr_communication.h index 4b74bb8990..636a1fda9d 100644 --- a/src/parcsr_mv/par_csr_communication.h +++ b/src/parcsr_mv/par_csr_communication.h @@ -121,7 +121,7 @@ typedef struct HYPRE_MemoryLocation recv_location; HYPRE_Int num_requests; hypre_MPI_Request *requests; - MPI_Comm comm; + hypre_MPICommWrapper *comm; } hypre_ParCSRCommHandle; /*-------------------------------------------------------------------------- diff --git a/src/struct_mv/_hypre_struct_mv.h b/src/struct_mv/_hypre_struct_mv.h index 5f92c15975..bca2198a32 100644 --- a/src/struct_mv/_hypre_struct_mv.h +++ b/src/struct_mv/_hypre_struct_mv.h @@ -942,7 +942,7 @@ typedef struct hypre_CommHandle_struct /* set = 0, add = 1 */ HYPRE_Int action; - MPI_Comm comm; + hypre_MPICommWrapper *comm; } hypre_CommHandle; /*-------------------------------------------------------------------------- diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index b50f400278..408a41814e 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -833,7 +833,7 @@ hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_location, recv_buffer = _hypre_TAlloc(char, num_recv_elems * size_of_elem, recv_buffer_location); } - MPI_Comm comm = hypre_CommHandleComm(comm_handle); + hypre_MPICommWrapper *comm = hypre_CommHandleComm(comm_handle); #if defined(HYPRE_DEBUG) if (hypre_MPICommGetSendLocation(comm) != hypre_MEMORY_UNDEFINED) @@ -893,12 +893,12 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, { hypre_CommHandle *comm_handle = hypre_CTAlloc(hypre_CommHandle, 1, HYPRE_MEMORY_HOST); - HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); - HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); - HYPRE_Int num_sends = hypre_CommPkgNumSends(comm_pkg); - HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); - MPI_Comm comm_orig = hypre_CommPkgComm(comm_pkg); - MPI_Comm comm; + HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); + HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); + HYPRE_Int num_sends = hypre_CommPkgNumSends(comm_pkg); + HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); + MPI_Comm comm_orig = hypre_CommPkgComm(comm_pkg); + hypre_MPICommWrapper *comm = hypre_MPICommWrapperCreate(comm_orig); HYPRE_Int num_requests; hypre_MPI_Request *requests; @@ -921,9 +921,8 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Int i, j, d, ll; HYPRE_Int size; - HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); + HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); - hypre_MPI_Comm_dup(comm_orig, &comm); hypre_CommHandleComm(comm_handle) = comm; /*-------------------------------------------------------------------- @@ -1192,16 +1191,16 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Int hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) { - MPI_Comm comm = hypre_CommHandleComm(comm_handle); - hypre_CommPkg *comm_pkg = hypre_CommHandleCommPkg(comm_handle); - HYPRE_Complex **send_buffers = hypre_CommHandleSendBuffers(comm_handle); - HYPRE_Complex **recv_buffers = hypre_CommHandleRecvBuffers(comm_handle); - HYPRE_Int action = hypre_CommHandleAction(comm_handle); + hypre_MPICommWrapper *comm = hypre_CommHandleComm(comm_handle); + hypre_CommPkg *comm_pkg = hypre_CommHandleCommPkg(comm_handle); + HYPRE_Complex **send_buffers = hypre_CommHandleSendBuffers(comm_handle); + HYPRE_Complex **recv_buffers = hypre_CommHandleRecvBuffers(comm_handle); + HYPRE_Int action = hypre_CommHandleAction(comm_handle); - HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); - HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); - HYPRE_Int num_sends = hypre_CommPkgNumSends(comm_pkg); - HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); + HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); + HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); + HYPRE_Int num_sends = hypre_CommPkgNumSends(comm_pkg); + HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); hypre_CommType *comm_type; hypre_CommEntryType *comm_entry; @@ -1400,7 +1399,7 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_TFree(post_recv_request, HYPRE_MEMORY_HOST); - hypre_MPI_Comm_free(&comm); + hypre_TFree(comm, HYPRE_MEMORY_HOST); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); hypre_TFree(send_buffers, HYPRE_MEMORY_HOST); diff --git a/src/struct_mv/struct_communication.h b/src/struct_mv/struct_communication.h index a4e87dfec8..c575ced6a6 100644 --- a/src/struct_mv/struct_communication.h +++ b/src/struct_mv/struct_communication.h @@ -144,7 +144,7 @@ typedef struct hypre_CommHandle_struct /* set = 0, add = 1 */ HYPRE_Int action; - MPI_Comm comm; + hypre_MPICommWrapper *comm; } hypre_CommHandle; /*-------------------------------------------------------------------------- diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 22547f31e2..18c1d519a5 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -790,9 +790,6 @@ typedef struct HYPRE_Int struct_comm_recv_buffer_size; HYPRE_Int struct_comm_send_buffer_size; - /* MPI */ - HYPRE_Int comm_keys[HYPRE_MAX_NUM_COMM_KEYS]; - #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int use_gpu_aware_mpi; #endif @@ -843,16 +840,6 @@ typedef struct #define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) #define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) -#define hypre_HandleMPICommKeys(hypre_handle) ((hypre_handle) -> comm_keys) -#define hypre_HandleMPICommKeySendLocation(hypre_handle) ((hypre_handle) -> comm_keys[0]) -#define hypre_HandleMPICommKeyRecvLocation(hypre_handle) ((hypre_handle) -> comm_keys[1]) -#define hypre_HandleMPICommKeySendBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[2]) -#define hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[3]) -#define hypre_HandleMPICommKeySendBuffer(hypre_handle) ((hypre_handle) -> comm_keys[4]) -#define hypre_HandleMPICommKeyRecvBuffer(hypre_handle) ((hypre_handle) -> comm_keys[5]) -#define hypre_HandleMPICommKeyPreSendRequest(hypre_handle) ((hypre_handle) -> comm_keys[6]) -#define hypre_HandleMPICommKeyPostRecvRequest(hypre_handle) ((hypre_handle) -> comm_keys[7]) - #define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) #define hypre_HandleMPICopyBufferLocation(hypre_handle) ((hypre_handle) -> mpi_copy_buffer_location) @@ -1369,6 +1356,29 @@ typedef MPI_Grequest_cancel_function hypre_MPI_Grequest_cancel_function; * Everything below this applies to both ifdef cases above *****************************************************************************/ +typedef struct +{ + hypre_MPI_Comm comm; + hypre_MemoryLocation send_location; + hypre_MemoryLocation recv_location; + hypre_MemoryLocation send_buffer_location; + hypre_MemoryLocation recv_buffer_location; + void *send_buffer; + void *recv_buffer; + hypre_MPI_Request *pre_send_request; + hypre_MPI_Request *post_recv_request; +} hypre_MPICommWrapper; + +#define hypre_MPICommWrapperComm(wrapper) ((wrapper) -> comm) +#define hypre_MPICommWrapperSendLocation(wrapper) ((wrapper) -> send_location) +#define hypre_MPICommWrapperRecvLocation(wrapper) ((wrapper) -> recv_location) +#define hypre_MPICommWrapperSendBufferLocation(wrapper) ((wrapper) -> send_buffer_location) +#define hypre_MPICommWrapperRecvBufferLocation(wrapper) ((wrapper) -> recv_buffer_location) +#define hypre_MPICommWrapperSendBuffer(wrapper) ((wrapper) -> send_buffer) +#define hypre_MPICommWrapperRecvBuffer(wrapper) ((wrapper) -> recv_buffer) +#define hypre_MPICommWrapperPreSendRequest(wrapper) ((wrapper) -> pre_send_request) +#define hypre_MPICommWrapperPostRecvRequest(wrapper) ((wrapper) -> post_recv_request) + #define HYPRE_MPI_GREQUEST_FREE 1 #define HYPRE_MPI_GREQUEST_COPY 2 @@ -1487,13 +1497,13 @@ HYPRE_Int hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action); HYPRE_Int hypre_MPI_GRequestDestroyAction(hypre_MPI_GRequest_Action *action); HYPRE_Int hypre_NeedMPICopyBuffer(hypre_MemoryLocation memory_location); HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPICommWrapper *comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPICommWrapper *comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPICommWrapper *comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPICommWrapper *comm, hypre_MPI_Request *requests ); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, @@ -1513,37 +1523,39 @@ HYPRE_Int hypre_MPI_Grequest_start(hypre_MPI_Grequest_query_function *query_fn, HYPRE_Int hypre_MPI_Grequest_complete( hypre_MPI_Request request ); HYPRE_Int hypre_MPI_Type_size(hypre_MPI_Datatype datatype, HYPRE_Int *size); -hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPI_Comm comm); -hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm); -hypre_MemoryLocation hypre_MPICommGetSendBufferLocation(hypre_MPI_Comm comm); -hypre_MemoryLocation hypre_MPICommGetRecvBufferLocation(hypre_MPI_Comm comm); -void* hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm); -void* hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm); -hypre_MPI_Request* hypre_MPICommGetPreSendRequest(hypre_MPI_Comm comm); -hypre_MPI_Request* hypre_MPICommGetPostRecvRequest(hypre_MPI_Comm comm); - -HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); -HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); -HYPRE_Int hypre_MPICommSetPreSendRequest(hypre_MPI_Comm comm, hypre_MPI_Request *request); -HYPRE_Int hypre_MPICommSetPostRecvRequest(hypre_MPI_Comm comm, hypre_MPI_Request *request); - -HYPRE_Int hypre_MPICommDeleteSendLocation(hypre_MPI_Comm comm); -HYPRE_Int hypre_MPICommDeleteRecvLocation(hypre_MPI_Comm comm); -HYPRE_Int hypre_MPICommDeleteSendBufferLocation(hypre_MPI_Comm comm); -HYPRE_Int hypre_MPICommDeleteRecvBufferLocation(hypre_MPI_Comm comm); -HYPRE_Int hypre_MPICommDeleteSendBuffer(hypre_MPI_Comm comm); -HYPRE_Int hypre_MPICommDeleteRecvBuffer(hypre_MPI_Comm comm); -HYPRE_Int hypre_MPICommDeletePreSendRequest(hypre_MPI_Comm comm); -HYPRE_Int hypre_MPICommDeletePostRecvRequest(hypre_MPI_Comm comm); +hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPICommWrapper *comm); +hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPICommWrapper *comm); +hypre_MemoryLocation hypre_MPICommGetSendBufferLocation(hypre_MPICommWrapper *comm); +hypre_MemoryLocation hypre_MPICommGetRecvBufferLocation(hypre_MPICommWrapper *comm); +void* hypre_MPICommGetSendBuffer(hypre_MPICommWrapper *comm); +void* hypre_MPICommGetRecvBuffer(hypre_MPICommWrapper *comm); +hypre_MPI_Request* hypre_MPICommGetPreSendRequest(hypre_MPICommWrapper *comm); +hypre_MPI_Request* hypre_MPICommGetPostRecvRequest(hypre_MPICommWrapper *comm); + +HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPICommWrapper *comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPICommWrapper *comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPICommWrapper *comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPICommWrapper *comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPICommWrapper *comm, void*); +HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPICommWrapper *comm, void*); +HYPRE_Int hypre_MPICommSetPreSendRequest(hypre_MPICommWrapper *comm, hypre_MPI_Request *request); +HYPRE_Int hypre_MPICommSetPostRecvRequest(hypre_MPICommWrapper *comm, hypre_MPI_Request *request); + +HYPRE_Int hypre_MPICommDeleteSendLocation(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteRecvLocation(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteSendBufferLocation(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteRecvBufferLocation(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteSendBuffer(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteRecvBuffer(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeletePreSendRequest(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeletePostRecvRequest(hypre_MPICommWrapper *comm); hypre_int hypre_grequest_free_fn(void *extra_state); hypre_int hypre_grequest_query_fn(void *extra_state, hypre_MPI_Status *status); hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete); +hypre_MPICommWrapper *hypre_MPICommWrapperCreate(hypre_MPI_Comm comm); + #ifdef __cplusplus } #endif diff --git a/src/utilities/general.c b/src/utilities/general.c index 6389403fb0..1347ddbbd4 100644 --- a/src/utilities/general.c +++ b/src/utilities/general.c @@ -47,13 +47,6 @@ hypre_HandleCreate(void) hypre_HandleDeviceGSMethod(hypre_handle_) = 1; /* CPU: 0; Cusparse: 1 */ #endif - HYPRE_Int i; - for (i = 0; i < HYPRE_MAX_NUM_COMM_KEYS; i++) - { - hypre_MPI_Comm_create_keyval( hypre_MPI_COMM_NULL_COPY_FN, hypre_MPI_COMM_NULL_DELETE_FN, - &hypre_HandleMPICommKeys(hypre_handle_)[i], (void *)0 ); - } - #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) #if defined(HYPRE_USING_GPU_AWARE_MPI) hypre_HandleUseGpuAwareMPI(hypre_handle_) = 1; @@ -81,12 +74,6 @@ hypre_HandleDestroy(hypre_Handle *hypre_handle_) hypre_HandleDeviceData(hypre_handle_) = NULL; #endif - HYPRE_Int i; - for (i = 0; i < HYPRE_MAX_NUM_COMM_KEYS; i++) - { - hypre_MPI_Comm_free_keyval(&hypre_HandleMPICommKeys(hypre_handle_)[i]); - } - /* Deallocate error messages in error handler */ hypre_error_handler_clear_messages(); diff --git a/src/utilities/handle.h b/src/utilities/handle.h index 0c4d1ca266..1fa3a1d5eb 100644 --- a/src/utilities/handle.h +++ b/src/utilities/handle.h @@ -46,9 +46,6 @@ typedef struct HYPRE_Int struct_comm_recv_buffer_size; HYPRE_Int struct_comm_send_buffer_size; - /* MPI */ - HYPRE_Int comm_keys[HYPRE_MAX_NUM_COMM_KEYS]; - #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int use_gpu_aware_mpi; #endif @@ -99,16 +96,6 @@ typedef struct #define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) #define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) -#define hypre_HandleMPICommKeys(hypre_handle) ((hypre_handle) -> comm_keys) -#define hypre_HandleMPICommKeySendLocation(hypre_handle) ((hypre_handle) -> comm_keys[0]) -#define hypre_HandleMPICommKeyRecvLocation(hypre_handle) ((hypre_handle) -> comm_keys[1]) -#define hypre_HandleMPICommKeySendBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[2]) -#define hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle) ((hypre_handle) -> comm_keys[3]) -#define hypre_HandleMPICommKeySendBuffer(hypre_handle) ((hypre_handle) -> comm_keys[4]) -#define hypre_HandleMPICommKeyRecvBuffer(hypre_handle) ((hypre_handle) -> comm_keys[5]) -#define hypre_HandleMPICommKeyPreSendRequest(hypre_handle) ((hypre_handle) -> comm_keys[6]) -#define hypre_HandleMPICommKeyPostRecvRequest(hypre_handle) ((hypre_handle) -> comm_keys[7]) - #define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) #define hypre_HandleMPICopyBufferLocation(hypre_handle) ((hypre_handle) -> mpi_copy_buffer_location) diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 32db462fed..243bc1fe0e 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -1477,15 +1477,15 @@ hypre_MPI_Irecv( void *buf, } HYPRE_Int -hypre_MPI_Isend_Multiple( void *buf, - HYPRE_Int num, - HYPRE_Int *displs, - HYPRE_Int *counts, - hypre_MPI_Datatype datatype, - HYPRE_Int *procs, - HYPRE_Int tag, - hypre_MPI_Comm comm, - hypre_MPI_Request *requests ) +hypre_MPI_Isend_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPICommWrapper *comm, + hypre_MPI_Request *requests ) { if (!num) { @@ -1513,22 +1513,22 @@ hypre_MPI_Isend_Multiple( void *buf, { HYPRE_Int start = displs[i]; HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; - hypre_MPI_Isend((char *) sbuf + start * data_size, len, datatype, procs[i], tag, comm, &requests[i]); + hypre_MPI_Isend((char *) sbuf + start * data_size, len, datatype, procs[i], tag, hypre_MPICommWrapperComm(comm), &requests[i]); } return hypre_error_flag; } HYPRE_Int -hypre_MPI_Irecv_Multiple( void *buf, - HYPRE_Int num, - HYPRE_Int *displs, - HYPRE_Int *counts, - hypre_MPI_Datatype datatype, - HYPRE_Int *procs, - HYPRE_Int tag, - hypre_MPI_Comm comm, - hypre_MPI_Request *requests) +hypre_MPI_Irecv_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPICommWrapper *comm, + hypre_MPI_Request *requests) { if (!num) { @@ -1545,7 +1545,7 @@ hypre_MPI_Irecv_Multiple( void *buf, { HYPRE_Int start = displs[i]; HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; - hypre_MPI_Irecv((char *) rbuf + start * data_size, len, datatype, procs[i], tag, comm, &requests[i]); + hypre_MPI_Irecv((char *) rbuf + start * data_size, len, datatype, procs[i], tag, hypre_MPICommWrapperComm(comm), &requests[i]); } if (rbuf != buf) @@ -1582,15 +1582,15 @@ hypre_MPI_Send_init( void *buf, } HYPRE_Int -hypre_MPI_Send_init_Multiple( void *buf, - HYPRE_Int num, - HYPRE_Int *displs, - HYPRE_Int *counts, - hypre_MPI_Datatype datatype, - HYPRE_Int *procs, - HYPRE_Int tag, - hypre_MPI_Comm comm, - hypre_MPI_Request *requests ) +hypre_MPI_Send_init_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPICommWrapper *comm, + hypre_MPI_Request *requests ) { if (!num) { @@ -1623,7 +1623,7 @@ hypre_MPI_Send_init_Multiple( void *buf, { HYPRE_Int start = displs[i]; HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; - hypre_MPI_Send_init((char *) sbuf + start * data_size, len, datatype, procs[i], tag, comm, &requests[i]); + hypre_MPI_Send_init((char *) sbuf + start * data_size, len, datatype, procs[i], tag, hypre_MPICommWrapperComm(comm), &requests[i]); } return hypre_error_flag; @@ -1644,15 +1644,15 @@ hypre_MPI_Recv_init( void *buf, } HYPRE_Int -hypre_MPI_Recv_init_Multiple( void *buf, - HYPRE_Int num, - HYPRE_Int *displs, - HYPRE_Int *counts, - hypre_MPI_Datatype datatype, - HYPRE_Int *procs, - HYPRE_Int tag, - hypre_MPI_Comm comm, - hypre_MPI_Request *requests ) +hypre_MPI_Recv_init_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPICommWrapper *comm, + hypre_MPI_Request *requests ) { if (!num) { @@ -1669,7 +1669,7 @@ hypre_MPI_Recv_init_Multiple( void *buf, { HYPRE_Int start = displs[i]; HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; - hypre_MPI_Recv_init((char *) rbuf + start * data_size, len, datatype, procs[i], tag, comm, &requests[i]); + hypre_MPI_Recv_init((char *) rbuf + start * data_size, len, datatype, procs[i], tag, hypre_MPICommWrapperComm(comm), &requests[i]); } if (rbuf != buf) @@ -1971,232 +1971,170 @@ hypre_NeedMPICopyBuffer(hypre_MemoryLocation memory_location) } HYPRE_Int -hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, - hypre_MemoryLocation location) +hypre_MPICommSetSendLocation(hypre_MPICommWrapper *comm, + hypre_MemoryLocation location) { - hypre_Handle *handle = hypre_handle(); - hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendLocation(handle), (void *) location); + hypre_MPICommWrapperSendLocation(comm) = location; return hypre_error_flag; } hypre_MemoryLocation -hypre_MPICommGetSendLocation(hypre_MPI_Comm comm) +hypre_MPICommGetSendLocation(hypre_MPICommWrapper *comm) { - HYPRE_Int flag, *atrr_val; - hypre_MemoryLocation location = hypre_MEMORY_UNDEFINED; - hypre_MPI_Comm_get_attr(comm, hypre_HandleMPICommKeySendLocation(hypre_handle()), &atrr_val, &flag); - if (flag) - { - location = (hypre_MPI_Aint) atrr_val; - } - return (location); + return hypre_MPICommWrapperSendLocation(comm); } HYPRE_Int -hypre_MPICommDeleteSendLocation(hypre_MPI_Comm comm) +hypre_MPICommDeleteSendLocation(hypre_MPICommWrapper *comm) { - hypre_Handle *handle = hypre_handle(); - hypre_MPI_Comm_delete_attr(comm, hypre_HandleMPICommKeySendLocation(handle)); + hypre_MPICommWrapperSendLocation(comm) = hypre_MEMORY_UNDEFINED; return hypre_error_flag; } HYPRE_Int -hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, - hypre_MemoryLocation location) +hypre_MPICommSetRecvLocation(hypre_MPICommWrapper *comm, + hypre_MemoryLocation location) { - hypre_Handle *handle = hypre_handle(); - hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvLocation(handle), (void *) location); + hypre_MPICommWrapperRecvLocation(comm) = location; return hypre_error_flag; } hypre_MemoryLocation -hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm) +hypre_MPICommGetRecvLocation(hypre_MPICommWrapper *comm) { - HYPRE_Int flag, *atrr_val; - hypre_MemoryLocation location = hypre_MEMORY_UNDEFINED; - hypre_MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyRecvLocation(hypre_handle()), &atrr_val, &flag); - if (flag) - { - location = (hypre_MPI_Aint) atrr_val; - } - return (location); + return hypre_MPICommWrapperRecvLocation(comm); } HYPRE_Int -hypre_MPICommDeleteRecvLocation(hypre_MPI_Comm comm) +hypre_MPICommDeleteRecvLocation(hypre_MPICommWrapper *comm) { - hypre_Handle *handle = hypre_handle(); - hypre_MPI_Comm_delete_attr(comm, hypre_HandleMPICommKeyRecvLocation(handle)); + hypre_MPICommWrapperRecvLocation(comm) = hypre_MEMORY_UNDEFINED; return hypre_error_flag; } HYPRE_Int -hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, - hypre_MemoryLocation location) +hypre_MPICommSetSendBufferLocation(hypre_MPICommWrapper *comm, + hypre_MemoryLocation location) { - hypre_Handle *handle = hypre_handle(); - hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendBufferLocation(handle), (void *) location); + hypre_MPICommWrapperSendBufferLocation(comm) = location; return hypre_error_flag; } hypre_MemoryLocation -hypre_MPICommGetSendBufferLocation(hypre_MPI_Comm comm) +hypre_MPICommGetSendBufferLocation(hypre_MPICommWrapper *comm) { - HYPRE_Int flag, *atrr_val; - hypre_MemoryLocation location = hypre_MEMORY_UNDEFINED; - hypre_MPI_Comm_get_attr(comm, hypre_HandleMPICommKeySendBufferLocation(hypre_handle()), &atrr_val, &flag); - if (flag) - { - location = (hypre_MPI_Aint) atrr_val; - } - return (location); + return hypre_MPICommWrapperSendBufferLocation(comm); } HYPRE_Int -hypre_MPICommDeleteSendBufferLocation(hypre_MPI_Comm comm) +hypre_MPICommDeleteSendBufferLocation(hypre_MPICommWrapper *comm) { - hypre_Handle *handle = hypre_handle(); - hypre_MPI_Comm_delete_attr(comm, hypre_HandleMPICommKeySendBufferLocation(handle)); + hypre_MPICommWrapperSendBufferLocation(comm) = hypre_MEMORY_UNDEFINED; return hypre_error_flag; } HYPRE_Int -hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation location) +hypre_MPICommSetRecvBufferLocation(hypre_MPICommWrapper *comm, + hypre_MemoryLocation location) { - hypre_Handle *handle = hypre_handle(); - hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvBufferLocation(handle), (void *) location); + hypre_MPICommWrapperRecvBufferLocation(comm) = location; return hypre_error_flag; } hypre_MemoryLocation -hypre_MPICommGetRecvBufferLocation(hypre_MPI_Comm comm) +hypre_MPICommGetRecvBufferLocation(hypre_MPICommWrapper *comm) { - HYPRE_Int flag, *atrr_val; - hypre_MemoryLocation location = hypre_MEMORY_UNDEFINED; - hypre_MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyRecvBufferLocation(hypre_handle()), &atrr_val, &flag); - if (flag) - { - location = (hypre_MPI_Aint) atrr_val; - } - return (location); + return hypre_MPICommWrapperRecvBufferLocation(comm); } HYPRE_Int -hypre_MPICommDeleteRecvBufferLocation(hypre_MPI_Comm comm) +hypre_MPICommDeleteRecvBufferLocation(hypre_MPICommWrapper *comm) { - hypre_Handle *handle = hypre_handle(); - hypre_MPI_Comm_delete_attr(comm, hypre_HandleMPICommKeyRecvBufferLocation(handle)); + hypre_MPICommWrapperRecvBufferLocation(comm) = hypre_MEMORY_UNDEFINED; return hypre_error_flag; } HYPRE_Int -hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, - void *buffer) +hypre_MPICommSetSendBuffer(hypre_MPICommWrapper *comm, + void *buffer) { - hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeySendBuffer(hypre_handle()), buffer); + hypre_MPICommWrapperSendBuffer(comm) = buffer; return hypre_error_flag; } void * -hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm) +hypre_MPICommGetSendBuffer(hypre_MPICommWrapper *comm) { - HYPRE_Int flag; - void *buffer = NULL; - hypre_MPI_Comm_get_attr(comm, hypre_HandleMPICommKeySendBuffer(hypre_handle()), &buffer, &flag); - if (!flag) - { - buffer = NULL; - } - return (buffer); + return hypre_MPICommWrapperSendBuffer(comm); } HYPRE_Int -hypre_MPICommDeleteSendBuffer(hypre_MPI_Comm comm) +hypre_MPICommDeleteSendBuffer(hypre_MPICommWrapper *comm) { - hypre_MPI_Comm_delete_attr(comm, hypre_HandleMPICommKeySendBuffer(hypre_handle())); + hypre_MPICommWrapperSendBuffer(comm) = NULL; return hypre_error_flag; } HYPRE_Int -hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void *buffer) +hypre_MPICommSetRecvBuffer(hypre_MPICommWrapper *comm, + void *buffer) { - hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyRecvBuffer(hypre_handle()), buffer); + hypre_MPICommWrapperRecvBuffer(comm) = buffer; return hypre_error_flag; } void * -hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm) +hypre_MPICommGetRecvBuffer(hypre_MPICommWrapper *comm) { - HYPRE_Int flag; - void *buffer = NULL; - hypre_MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyRecvBuffer(hypre_handle()), &buffer, &flag); - if (!flag) - { - buffer = NULL; - } - return (buffer); + return hypre_MPICommWrapperRecvBuffer(comm); } HYPRE_Int -hypre_MPICommDeleteRecvBuffer(hypre_MPI_Comm comm) +hypre_MPICommDeleteRecvBuffer(hypre_MPICommWrapper *comm) { - hypre_MPI_Comm_delete_attr(comm, hypre_HandleMPICommKeyRecvBuffer(hypre_handle())); + hypre_MPICommWrapperRecvBuffer(comm) = NULL; return hypre_error_flag; } HYPRE_Int -hypre_MPICommSetPreSendRequest(hypre_MPI_Comm comm, - hypre_MPI_Request *request) +hypre_MPICommSetPreSendRequest(hypre_MPICommWrapper *comm, + hypre_MPI_Request *request) { - hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyPreSendRequest(hypre_handle()), request); + hypre_MPICommWrapperPreSendRequest(comm) = request; return hypre_error_flag; } hypre_MPI_Request * -hypre_MPICommGetPreSendRequest(hypre_MPI_Comm comm) +hypre_MPICommGetPreSendRequest(hypre_MPICommWrapper *comm) { - HYPRE_Int flag; - hypre_MPI_Request *request = NULL; - hypre_MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyPreSendRequest(hypre_handle()), &request, &flag); - if (!flag) - { - request = NULL; - } - return (request); + return hypre_MPICommWrapperPreSendRequest(comm); } HYPRE_Int -hypre_MPICommDeletePreSendRequest(hypre_MPI_Comm comm) +hypre_MPICommDeletePreSendRequest(hypre_MPICommWrapper *comm) { - hypre_MPI_Comm_delete_attr(comm, hypre_HandleMPICommKeyPreSendRequest(hypre_handle())); + hypre_MPICommWrapperPreSendRequest(comm) = NULL; return hypre_error_flag; } HYPRE_Int -hypre_MPICommSetPostRecvRequest(hypre_MPI_Comm comm, - hypre_MPI_Request *request) +hypre_MPICommSetPostRecvRequest(hypre_MPICommWrapper *comm, + hypre_MPI_Request *request) { - hypre_MPI_Comm_set_attr(comm, hypre_HandleMPICommKeyPostRecvRequest(hypre_handle()), request); + hypre_MPICommWrapperPostRecvRequest(comm) = request; return hypre_error_flag; } hypre_MPI_Request * -hypre_MPICommGetPostRecvRequest(hypre_MPI_Comm comm) +hypre_MPICommGetPostRecvRequest(hypre_MPICommWrapper *comm) { - HYPRE_Int flag; - hypre_MPI_Request *request = NULL; - hypre_MPI_Comm_get_attr(comm, hypre_HandleMPICommKeyPostRecvRequest(hypre_handle()), &request, &flag); - if (!flag) - { - request = NULL; - } - return (request); + return hypre_MPICommWrapperPostRecvRequest(comm); } HYPRE_Int -hypre_MPICommDeletePostRecvRequest(hypre_MPI_Comm comm) +hypre_MPICommDeletePostRecvRequest(hypre_MPICommWrapper *comm) { - hypre_MPI_Comm_delete_attr(comm, hypre_HandleMPICommKeyPostRecvRequest(hypre_handle())); + hypre_MPICommWrapperPostRecvRequest(comm) = NULL; return hypre_error_flag; } @@ -2344,3 +2282,16 @@ hypre_grequest_query_fn(void *extra_state, hypre_MPI_Status *status) hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete) { return hypre_MPI_SUCCESS; } + +hypre_MPICommWrapper * +hypre_MPICommWrapperCreate(hypre_MPI_Comm comm) +{ + hypre_MPICommWrapper *wrapper = hypre_CTAlloc(hypre_MPICommWrapper, 1, HYPRE_MEMORY_HOST); + hypre_MPICommWrapperComm(wrapper) = comm; + hypre_MPICommWrapperSendLocation(wrapper) = hypre_MEMORY_UNDEFINED; + hypre_MPICommWrapperRecvLocation(wrapper) = hypre_MEMORY_UNDEFINED; + hypre_MPICommWrapperSendBufferLocation(wrapper) = hypre_MEMORY_UNDEFINED; + hypre_MPICommWrapperRecvBufferLocation(wrapper) = hypre_MEMORY_UNDEFINED; + + return wrapper; +} diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index acf436c3d0..343ff06aaf 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -272,6 +272,29 @@ typedef MPI_Grequest_cancel_function hypre_MPI_Grequest_cancel_function; * Everything below this applies to both ifdef cases above *****************************************************************************/ +typedef struct +{ + hypre_MPI_Comm comm; + hypre_MemoryLocation send_location; + hypre_MemoryLocation recv_location; + hypre_MemoryLocation send_buffer_location; + hypre_MemoryLocation recv_buffer_location; + void *send_buffer; + void *recv_buffer; + hypre_MPI_Request *pre_send_request; + hypre_MPI_Request *post_recv_request; +} hypre_MPICommWrapper; + +#define hypre_MPICommWrapperComm(wrapper) ((wrapper) -> comm) +#define hypre_MPICommWrapperSendLocation(wrapper) ((wrapper) -> send_location) +#define hypre_MPICommWrapperRecvLocation(wrapper) ((wrapper) -> recv_location) +#define hypre_MPICommWrapperSendBufferLocation(wrapper) ((wrapper) -> send_buffer_location) +#define hypre_MPICommWrapperRecvBufferLocation(wrapper) ((wrapper) -> recv_buffer_location) +#define hypre_MPICommWrapperSendBuffer(wrapper) ((wrapper) -> send_buffer) +#define hypre_MPICommWrapperRecvBuffer(wrapper) ((wrapper) -> recv_buffer) +#define hypre_MPICommWrapperPreSendRequest(wrapper) ((wrapper) -> pre_send_request) +#define hypre_MPICommWrapperPostRecvRequest(wrapper) ((wrapper) -> post_recv_request) + #define HYPRE_MPI_GREQUEST_FREE 1 #define HYPRE_MPI_GREQUEST_COPY 2 @@ -390,13 +413,13 @@ HYPRE_Int hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action); HYPRE_Int hypre_MPI_GRequestDestroyAction(hypre_MPI_GRequest_Action *action); HYPRE_Int hypre_NeedMPICopyBuffer(hypre_MemoryLocation memory_location); HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPICommWrapper *comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPICommWrapper *comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPICommWrapper *comm, hypre_MPI_Request *requests ); HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, -hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *requests ); +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPICommWrapper *comm, hypre_MPI_Request *requests ); #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, @@ -416,37 +439,39 @@ HYPRE_Int hypre_MPI_Grequest_start(hypre_MPI_Grequest_query_function *query_fn, HYPRE_Int hypre_MPI_Grequest_complete( hypre_MPI_Request request ); HYPRE_Int hypre_MPI_Type_size(hypre_MPI_Datatype datatype, HYPRE_Int *size); -hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPI_Comm comm); -hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPI_Comm comm); -hypre_MemoryLocation hypre_MPICommGetSendBufferLocation(hypre_MPI_Comm comm); -hypre_MemoryLocation hypre_MPICommGetRecvBufferLocation(hypre_MPI_Comm comm); -void* hypre_MPICommGetSendBuffer(hypre_MPI_Comm comm); -void* hypre_MPICommGetRecvBuffer(hypre_MPI_Comm comm); -hypre_MPI_Request* hypre_MPICommGetPreSendRequest(hypre_MPI_Comm comm); -hypre_MPI_Request* hypre_MPICommGetPostRecvRequest(hypre_MPI_Comm comm); - -HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPI_Comm comm, hypre_MemoryLocation); -HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPI_Comm comm, void*); -HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPI_Comm comm, void*); -HYPRE_Int hypre_MPICommSetPreSendRequest(hypre_MPI_Comm comm, hypre_MPI_Request *request); -HYPRE_Int hypre_MPICommSetPostRecvRequest(hypre_MPI_Comm comm, hypre_MPI_Request *request); - -HYPRE_Int hypre_MPICommDeleteSendLocation(hypre_MPI_Comm comm); -HYPRE_Int hypre_MPICommDeleteRecvLocation(hypre_MPI_Comm comm); -HYPRE_Int hypre_MPICommDeleteSendBufferLocation(hypre_MPI_Comm comm); -HYPRE_Int hypre_MPICommDeleteRecvBufferLocation(hypre_MPI_Comm comm); -HYPRE_Int hypre_MPICommDeleteSendBuffer(hypre_MPI_Comm comm); -HYPRE_Int hypre_MPICommDeleteRecvBuffer(hypre_MPI_Comm comm); -HYPRE_Int hypre_MPICommDeletePreSendRequest(hypre_MPI_Comm comm); -HYPRE_Int hypre_MPICommDeletePostRecvRequest(hypre_MPI_Comm comm); +hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPICommWrapper *comm); +hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPICommWrapper *comm); +hypre_MemoryLocation hypre_MPICommGetSendBufferLocation(hypre_MPICommWrapper *comm); +hypre_MemoryLocation hypre_MPICommGetRecvBufferLocation(hypre_MPICommWrapper *comm); +void* hypre_MPICommGetSendBuffer(hypre_MPICommWrapper *comm); +void* hypre_MPICommGetRecvBuffer(hypre_MPICommWrapper *comm); +hypre_MPI_Request* hypre_MPICommGetPreSendRequest(hypre_MPICommWrapper *comm); +hypre_MPI_Request* hypre_MPICommGetPostRecvRequest(hypre_MPICommWrapper *comm); + +HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPICommWrapper *comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPICommWrapper *comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPICommWrapper *comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPICommWrapper *comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPICommWrapper *comm, void*); +HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPICommWrapper *comm, void*); +HYPRE_Int hypre_MPICommSetPreSendRequest(hypre_MPICommWrapper *comm, hypre_MPI_Request *request); +HYPRE_Int hypre_MPICommSetPostRecvRequest(hypre_MPICommWrapper *comm, hypre_MPI_Request *request); + +HYPRE_Int hypre_MPICommDeleteSendLocation(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteRecvLocation(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteSendBufferLocation(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteRecvBufferLocation(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteSendBuffer(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteRecvBuffer(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeletePreSendRequest(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeletePostRecvRequest(hypre_MPICommWrapper *comm); hypre_int hypre_grequest_free_fn(void *extra_state); hypre_int hypre_grequest_query_fn(void *extra_state, hypre_MPI_Status *status); hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete); +hypre_MPICommWrapper *hypre_MPICommWrapperCreate(hypre_MPI_Comm comm); + #ifdef __cplusplus } #endif From b90002db973978567bc61f3424bb85900ba03760 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Thu, 13 Feb 2025 13:16:42 -0800 Subject: [PATCH 80/90] redid struct buffer allocations --- src/struct_mv/struct_communication.c | 183 +++++++++++++-------------- src/utilities/_hypre_utilities.h | 13 +- src/utilities/general.c | 5 +- src/utilities/handle.h | 11 -- src/utilities/protos.h | 3 + 5 files changed, 98 insertions(+), 117 deletions(-) diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index 408a41814e..51faa429a9 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -764,73 +764,112 @@ hypre_CommTypeSetEntry( hypre_Box *box, return hypre_error_flag; } -//TODO size is confusing -HYPRE_Complex * -hypre_StructCommunicationGetBuffer(HYPRE_MemoryLocation memory_location, - HYPRE_Int size) +/** + * @brief Manages memory buffers for structured communication + * + * This function handles buffer allocation and deallocation based on the `action` parameter. + * - If `action == 0`, it returns a buffer that is enough for the specified `size` + * in the given `memory_location`. + * - If `action == 1`, it releases the buffer. + * - There is only one pointer managed. The same ptr will be returned unless a realloc is needed. + * + * @param memory_location The memory location where the buffer should be allocated + * (e.g., host or device memory). + * @param size The size of the buffer in BYTES (used for allocation). + * @param action The action to perform: + * - `0`: Return buffer. + * - `1`: Free buffer. + * + * @return Pointer to the buffer if `action == 0`, or `NULL` if `action == 1`. + */ + +char * +hypre_StructCommunicationManageBuffer(HYPRE_MemoryLocation memory_location, + size_t size, + HYPRE_Int action) { - HYPRE_Complex *ptr; + static char *buffer_ptr = NULL; + static size_t buffer_size = 0; -#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) - if (hypre_GetActualMemLocation(memory_location) != hypre_MEMORY_HOST) + if (0 == action) { - if (size > hypre_HandleStructCommSendBufferSize(hypre_handle())) + if (size > buffer_size) { - HYPRE_Int new_size = 5 * size; - hypre_HandleStructCommSendBufferSize(hypre_handle()) = new_size; - hypre_TFree(hypre_HandleStructCommSendBuffer(hypre_handle()), memory_location); - hypre_HandleStructCommSendBuffer(hypre_handle()) = hypre_CTAlloc(HYPRE_Complex, new_size, - memory_location); + buffer_size = 2 * size; + hypre_TFree(buffer_ptr, memory_location); + buffer_ptr = hypre_TAlloc(char, buffer_size, memory_location); } - - ptr = hypre_HandleStructCommSendBuffer(hypre_handle()); + return buffer_ptr; } - else -#endif + else if (1 == action) { - ptr = hypre_CTAlloc(HYPRE_Complex, size, memory_location); + buffer_size = 0; + hypre_TFree(buffer_ptr, memory_location); } - - return ptr; + return NULL; } HYPRE_Int -hypre_StructCommunicationReleaseBuffer(HYPRE_Complex *buffer, - HYPRE_MemoryLocation memory_location) +hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation memory_location, + hypre_CommPkg *comm_pkg, + hypre_CommHandle *comm_handle ) { - if (hypre_GetActualMemLocation(memory_location) == hypre_MEMORY_HOST) + hypre_MemoryLocation send_memory_alocation = hypre_GetActualMemLocation(memory_location); + hypre_MemoryLocation recv_memory_alocation = hypre_GetActualMemLocation(memory_location); + hypre_MemoryLocation send_copy_location = hypre_MEMORY_UNDEFINED; + hypre_MemoryLocation recv_copy_location = hypre_MEMORY_UNDEFINED; + void *send_copy_buffer = NULL; + void *recv_copy_buffer = NULL; + HYPRE_Complex **send_buffers = NULL; + HYPRE_Complex **recv_buffers = NULL; + HYPRE_Complex *buffer_ptr = NULL; + HYPRE_Int num_sends = hypre_CommPkgNumSends(comm_pkg); + HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); + HYPRE_Int num_send_elems = hypre_CommPkgSendBufsize(comm_pkg); + HYPRE_Int num_recv_elems = hypre_CommPkgRecvBufsize(comm_pkg); + HYPRE_Int size_of_elem = sizeof(HYPRE_Complex); + HYPRE_Int i; + + buffer_ptr = (HYPRE_Complex *) hypre_StructCommunicationManageBuffer(memory_location, + (num_send_elems + num_recv_elems) * sizeof(HYPRE_Complex), 0); + + /* allocate send buffers */ + send_buffers = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST); + if (num_sends > 0) { - hypre_TFree(buffer, memory_location); + send_buffers[0] = buffer_ptr; + for (i = 1; i < num_sends; i++) + { + hypre_CommType *comm_type = hypre_CommPkgSendType(comm_pkg, i - 1); + send_buffers[i] = send_buffers[i - 1] + hypre_CommTypeBufsize(comm_type); + } } - return hypre_error_flag; -} + /* allocate recv buffers */ + recv_buffers = hypre_TAlloc(HYPRE_Complex *, num_recvs, HYPRE_MEMORY_HOST); + if (num_recvs > 0) + { + recv_buffers[0] = buffer_ptr + num_send_elems; + for (i = 1; i < num_recvs; i++) + { + hypre_CommType *comm_type = hypre_CommPkgRecvType(comm_pkg, i - 1); + recv_buffers[i] = recv_buffers[i - 1] + hypre_CommTypeBufsize(comm_type); + } + } -HYPRE_Int -hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_location, - HYPRE_MemoryLocation recv_memory_location, - HYPRE_Int num_send_elems, - HYPRE_Int num_recv_elems, - HYPRE_Int size_of_elem, - hypre_CommHandle *comm_handle ) -{ - hypre_MemoryLocation send_memory_alocation = hypre_GetActualMemLocation(send_memory_location); - hypre_MemoryLocation recv_memory_alocation = hypre_GetActualMemLocation(recv_memory_location); - hypre_MemoryLocation send_buffer_location = hypre_MEMORY_UNDEFINED; - hypre_MemoryLocation recv_buffer_location = hypre_MEMORY_UNDEFINED; - void *send_buffer = NULL; - void *recv_buffer = NULL; + hypre_CommHandleSendBuffers(comm_handle) = send_buffers; + hypre_CommHandleRecvBuffers(comm_handle) = recv_buffers; if (hypre_NeedMPICopyBuffer(send_memory_alocation)) { - send_buffer_location = hypre_HandleMPICopyBufferLocation(hypre_handle()); - send_buffer = _hypre_TAlloc(char, num_send_elems * size_of_elem, send_buffer_location); + send_copy_location = hypre_HandleMPICopyBufferLocation(hypre_handle()); + send_copy_buffer = _hypre_TAlloc(char, num_send_elems * size_of_elem, send_copy_location); } if (hypre_NeedMPICopyBuffer(recv_memory_alocation)) { - recv_buffer_location = hypre_HandleMPICopyBufferLocation(hypre_handle()); - recv_buffer = _hypre_TAlloc(char, num_recv_elems * size_of_elem, recv_buffer_location); + recv_copy_location = hypre_HandleMPICopyBufferLocation(hypre_handle()); + recv_copy_buffer = _hypre_TAlloc(char, num_recv_elems * size_of_elem, recv_copy_location); } hypre_MPICommWrapper *comm = hypre_CommHandleComm(comm_handle); @@ -864,10 +903,10 @@ hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_location, hypre_MPICommSetSendLocation(comm, send_memory_alocation); hypre_MPICommSetRecvLocation(comm, recv_memory_alocation); - hypre_MPICommSetSendBuffer(comm, send_buffer); - hypre_MPICommSetRecvBuffer(comm, recv_buffer); - hypre_MPICommSetSendBufferLocation(comm, send_buffer_location); - hypre_MPICommSetRecvBufferLocation(comm, recv_buffer_location); + hypre_MPICommSetSendBuffer(comm, send_copy_buffer); + hypre_MPICommSetRecvBuffer(comm, recv_copy_buffer); + hypre_MPICommSetSendBufferLocation(comm, send_copy_location); + hypre_MPICommSetRecvBufferLocation(comm, recv_copy_location); return hypre_error_flag; } @@ -937,33 +976,10 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, * allocate buffers *--------------------------------------------------------------------*/ - /* allocate send buffers */ - send_buffers = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST); - if (num_sends > 0) - { - size = hypre_CommPkgSendBufsize(comm_pkg); - send_buffers[0] = hypre_StructCommunicationGetBuffer(memory_location, size); - for (i = 1; i < num_sends; i++) - { - comm_type = hypre_CommPkgSendType(comm_pkg, i - 1); - size = hypre_CommTypeBufsize(comm_type); - send_buffers[i] = send_buffers[i - 1] + size; - } - } + hypre_CommHandleAllocateBuffers(memory_location, comm_pkg, comm_handle); - /* allocate recv buffers */ - recv_buffers = hypre_TAlloc(HYPRE_Complex *, num_recvs, HYPRE_MEMORY_HOST); - if (num_recvs > 0) - { - size = hypre_CommPkgRecvBufsize(comm_pkg); - recv_buffers[0] = hypre_StructCommunicationGetBuffer(memory_location, size); - for (i = 1; i < num_recvs; i++) - { - comm_type = hypre_CommPkgRecvType(comm_pkg, i - 1); - size = hypre_CommTypeBufsize(comm_type); - recv_buffers[i] = recv_buffers[i - 1] + size; - } - } + send_buffers = hypre_CommHandleSendBuffers(comm_handle); + recv_buffers = hypre_CommHandleRecvBuffers(comm_handle); /*-------------------------------------------------------------------- * pack send buffers @@ -1048,13 +1064,6 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, } } - hypre_CommHandleAllocateBuffers(memory_location, - memory_location, - hypre_CommPkgSendBufsize(comm_pkg), - hypre_CommPkgRecvBufsize(comm_pkg), - sizeof(HYPRE_Complex), - comm_handle); - /*-------------------------------------------------------------------- * post receives and initiate sends *--------------------------------------------------------------------*/ @@ -1171,8 +1180,6 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, hypre_CommHandleNumRequests(comm_handle) = num_requests; hypre_CommHandleRequests(comm_handle) = requests; hypre_CommHandleStatus(comm_handle) = status; - hypre_CommHandleSendBuffers(comm_handle) = send_buffers; - hypre_CommHandleRecvBuffers(comm_handle) = recv_buffers; hypre_CommHandleAction(comm_handle) = action; *comm_handle_ptr = comm_handle; @@ -1199,7 +1206,6 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); - HYPRE_Int num_sends = hypre_CommPkgNumSends(comm_pkg); HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); hypre_CommType *comm_type; @@ -1368,15 +1374,6 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_TFree(hypre_CommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); hypre_TFree(hypre_CommHandleStatus(comm_handle), HYPRE_MEMORY_HOST); - if (num_sends > 0) - { - hypre_StructCommunicationReleaseBuffer(send_buffers[0], memory_location); - } - if (num_recvs > 0) - { - hypre_StructCommunicationReleaseBuffer(recv_buffers[0], memory_location); - } - hypre_MemoryLocation send_buffer_location = hypre_MPICommGetSendBufferLocation(comm); hypre_MemoryLocation recv_buffer_location = hypre_MPICommGetRecvBufferLocation(comm); void *send_buffer = hypre_MPICommGetSendBuffer(comm); diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 18c1d519a5..83ffd7b745 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -784,12 +784,6 @@ typedef struct HYPRE_MemoryLocation memory_location; HYPRE_ExecutionPolicy default_exec_policy; - /* the device buffers needed to do MPI communication for struct comm */ - HYPRE_Complex *struct_comm_recv_buffer; - HYPRE_Complex *struct_comm_send_buffer; - HYPRE_Int struct_comm_recv_buffer_size; - HYPRE_Int struct_comm_send_buffer_size; - #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int use_gpu_aware_mpi; #endif @@ -832,11 +826,6 @@ typedef struct #define hypre_HandleMemoryLocation(hypre_handle) ((hypre_handle) -> memory_location) #define hypre_HandleDefaultExecPolicy(hypre_handle) ((hypre_handle) -> default_exec_policy) -#define hypre_HandleStructCommRecvBuffer(hypre_handle) ((hypre_handle) -> struct_comm_recv_buffer) -#define hypre_HandleStructCommSendBuffer(hypre_handle) ((hypre_handle) -> struct_comm_send_buffer) -#define hypre_HandleStructCommRecvBufferSize(hypre_handle) ((hypre_handle) -> struct_comm_recv_buffer_size) -#define hypre_HandleStructCommSendBufferSize(hypre_handle) ((hypre_handle) -> struct_comm_send_buffer_size) - #define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) #define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) @@ -2659,6 +2648,8 @@ HYPRE_Int hypre_MemoryTrackerSetFileName(const char *file_name); HYPRE_Int hypre_MagmaInitialize(void); HYPRE_Int hypre_MagmaFinalize(void); #endif + +char *hypre_StructCommunicationManageBuffer(HYPRE_MemoryLocation memory_location, size_t size, HYPRE_Int action); /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. diff --git a/src/utilities/general.c b/src/utilities/general.c index 1347ddbbd4..06eef590d1 100644 --- a/src/utilities/general.c +++ b/src/utilities/general.c @@ -67,8 +67,9 @@ hypre_HandleDestroy(hypre_Handle *hypre_handle_) return hypre_error_flag; } - hypre_TFree(hypre_HandleStructCommRecvBuffer(hypre_handle_), HYPRE_MEMORY_DEVICE); - hypre_TFree(hypre_HandleStructCommSendBuffer(hypre_handle_), HYPRE_MEMORY_DEVICE); + /* free struct communication buffer memory */ + hypre_StructCommunicationManageBuffer(hypre_HandleMemoryLocation(hypre_handle_), 0, 1); + #if defined(HYPRE_USING_GPU) hypre_DeviceDataDestroy(hypre_HandleDeviceData(hypre_handle_)); hypre_HandleDeviceData(hypre_handle_) = NULL; diff --git a/src/utilities/handle.h b/src/utilities/handle.h index 1fa3a1d5eb..0c4b68bfc5 100644 --- a/src/utilities/handle.h +++ b/src/utilities/handle.h @@ -40,12 +40,6 @@ typedef struct HYPRE_MemoryLocation memory_location; HYPRE_ExecutionPolicy default_exec_policy; - /* the device buffers needed to do MPI communication for struct comm */ - HYPRE_Complex *struct_comm_recv_buffer; - HYPRE_Complex *struct_comm_send_buffer; - HYPRE_Int struct_comm_recv_buffer_size; - HYPRE_Int struct_comm_send_buffer_size; - #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int use_gpu_aware_mpi; #endif @@ -88,11 +82,6 @@ typedef struct #define hypre_HandleMemoryLocation(hypre_handle) ((hypre_handle) -> memory_location) #define hypre_HandleDefaultExecPolicy(hypre_handle) ((hypre_handle) -> default_exec_policy) -#define hypre_HandleStructCommRecvBuffer(hypre_handle) ((hypre_handle) -> struct_comm_recv_buffer) -#define hypre_HandleStructCommSendBuffer(hypre_handle) ((hypre_handle) -> struct_comm_send_buffer) -#define hypre_HandleStructCommRecvBufferSize(hypre_handle) ((hypre_handle) -> struct_comm_recv_buffer_size) -#define hypre_HandleStructCommSendBufferSize(hypre_handle) ((hypre_handle) -> struct_comm_send_buffer_size) - #define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) #define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) diff --git a/src/utilities/protos.h b/src/utilities/protos.h index a24f252fd5..3b0f2e2ce4 100644 --- a/src/utilities/protos.h +++ b/src/utilities/protos.h @@ -524,3 +524,6 @@ HYPRE_Int hypre_MemoryTrackerSetFileName(const char *file_name); HYPRE_Int hypre_MagmaInitialize(void); HYPRE_Int hypre_MagmaFinalize(void); #endif + +/* Consider refactoring or relocating this prototype */ +char *hypre_StructCommunicationManageBuffer(HYPRE_MemoryLocation memory_location, size_t size, HYPRE_Int action); From 8e2ce77154ac5819b4d205f9ee38b8b610c1cc3d Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Fri, 14 Feb 2025 08:56:58 -0800 Subject: [PATCH 81/90] bug fix --- src/utilities/general.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/utilities/general.c b/src/utilities/general.c index 06eef590d1..383e40711c 100644 --- a/src/utilities/general.c +++ b/src/utilities/general.c @@ -53,9 +53,10 @@ hypre_HandleCreate(void) #else hypre_HandleUseGpuAwareMPI(hypre_handle_) = 0; #endif - hypre_HandleMPIHostBufferLocation(hypre_handle_) = hypre_MEMORY_HOST; #endif + hypre_HandleMPICopyBufferLocation(hypre_handle_) = hypre_MEMORY_HOST; + return hypre_handle_; } From 78a8e7af729f4e7f9573054d04b3223f9d202439 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Fri, 14 Feb 2025 22:47:58 -0800 Subject: [PATCH 82/90] bug fix --- src/struct_mv/struct_communication.c | 99 +++++++++++++--------------- 1 file changed, 45 insertions(+), 54 deletions(-) diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index 51faa429a9..b4e63477c9 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -834,11 +834,11 @@ hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation memory_location, (num_send_elems + num_recv_elems) * sizeof(HYPRE_Complex), 0); /* allocate send buffers */ - send_buffers = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST); + send_buffers = hypre_TAlloc(HYPRE_Complex *, num_sends + 1, HYPRE_MEMORY_HOST); if (num_sends > 0) { send_buffers[0] = buffer_ptr; - for (i = 1; i < num_sends; i++) + for (i = 1; i <= num_sends; i++) { hypre_CommType *comm_type = hypre_CommPkgSendType(comm_pkg, i - 1); send_buffers[i] = send_buffers[i - 1] + hypre_CommTypeBufsize(comm_type); @@ -846,11 +846,11 @@ hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation memory_location, } /* allocate recv buffers */ - recv_buffers = hypre_TAlloc(HYPRE_Complex *, num_recvs, HYPRE_MEMORY_HOST); + recv_buffers = hypre_TAlloc(HYPRE_Complex *, num_recvs + 1, HYPRE_MEMORY_HOST); if (num_recvs > 0) { recv_buffers[0] = buffer_ptr + num_send_elems; - for (i = 1; i < num_recvs; i++) + for (i = 1; i <= num_recvs; i++) { hypre_CommType *comm_type = hypre_CommPkgRecvType(comm_pkg, i - 1); recv_buffers[i] = recv_buffers[i - 1] + hypre_CommTypeBufsize(comm_type); @@ -930,37 +930,30 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Int tag, hypre_CommHandle **comm_handle_ptr ) { - hypre_CommHandle *comm_handle = hypre_CTAlloc(hypre_CommHandle, 1, HYPRE_MEMORY_HOST); - - HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); - HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); - HYPRE_Int num_sends = hypre_CommPkgNumSends(comm_pkg); - HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); - MPI_Comm comm_orig = hypre_CommPkgComm(comm_pkg); - hypre_MPICommWrapper *comm = hypre_MPICommWrapperCreate(comm_orig); - - HYPRE_Int num_requests; - hypre_MPI_Request *requests; - hypre_MPI_Status *status; - - HYPRE_Complex **send_buffers; - HYPRE_Complex **recv_buffers; - - hypre_CommType *comm_type, *from_type, *to_type; - hypre_CommEntryType *comm_entry; - HYPRE_Int num_entries; - - HYPRE_Int *length_array; - HYPRE_Int *stride_array, unitst_array[HYPRE_MAXDIM + 1]; - HYPRE_Int *order; - - HYPRE_Complex *dptr, *kptr, *lptr; - HYPRE_Int *qptr; - - HYPRE_Int i, j, d, ll; - HYPRE_Int size; - - HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); + hypre_CommHandle *comm_handle = hypre_CTAlloc(hypre_CommHandle, 1, HYPRE_MEMORY_HOST); + + HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); + HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); + HYPRE_Int num_sends = hypre_CommPkgNumSends(comm_pkg); + HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); + MPI_Comm comm_orig = hypre_CommPkgComm(comm_pkg); + hypre_MPICommWrapper *comm = hypre_MPICommWrapperCreate(comm_orig); + HYPRE_Int num_requests; + hypre_MPI_Request *requests; + hypre_MPI_Status *status; + HYPRE_Complex **send_buffers; + HYPRE_Complex **recv_buffers; + hypre_CommType *comm_type, *from_type, *to_type; + hypre_CommEntryType *comm_entry; + HYPRE_Int num_entries; + HYPRE_Int *length_array; + HYPRE_Int *stride_array, unitst_array[HYPRE_MAXDIM + 1]; + HYPRE_Int *order; + HYPRE_Complex *dptr, *kptr, *lptr; + HYPRE_Int *qptr; + HYPRE_Int i, j, d, ll; + HYPRE_Int size; + HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); hypre_CommHandleComm(comm_handle) = comm; @@ -1080,10 +1073,9 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, for (i = 0; i < num_recvs; i++) { comm_type = hypre_CommPkgRecvType(comm_pkg, i); - displs_recv[i] = (recv_buffers[i] - recv_buffers[0]) * sizeof(HYPRE_Complex); procs_recv[i] = hypre_CommTypeProc(comm_type); + displs_recv[i+1] = (recv_buffers[i+1] - recv_buffers[0]) * sizeof(HYPRE_Complex); } - displs_recv[num_recvs] = hypre_CommPkgRecvBufsize(comm_pkg) * sizeof(HYPRE_Complex); } if (num_sends) @@ -1093,10 +1085,9 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, for (i = 0; i < num_sends; i++) { comm_type = hypre_CommPkgSendType(comm_pkg, i); - displs_send[i] = (send_buffers[i] - send_buffers[0]) * sizeof(HYPRE_Complex); procs_send[i] = hypre_CommTypeProc(comm_type); + displs_send[i+1] = (send_buffers[i+1] - send_buffers[0]) * sizeof(HYPRE_Complex); } - displs_send[num_sends] = hypre_CommPkgSendBufsize(comm_pkg) * sizeof(HYPRE_Complex); } hypre_MPI_Irecv_Multiple(recv_buffers ? recv_buffers[0] : NULL, @@ -1174,13 +1165,13 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, * set up comm_handle and return *--------------------------------------------------------------------*/ - hypre_CommHandleCommPkg(comm_handle) = comm_pkg; - hypre_CommHandleSendData(comm_handle) = send_data; - hypre_CommHandleRecvData(comm_handle) = recv_data; - hypre_CommHandleNumRequests(comm_handle) = num_requests; - hypre_CommHandleRequests(comm_handle) = requests; - hypre_CommHandleStatus(comm_handle) = status; - hypre_CommHandleAction(comm_handle) = action; + hypre_CommHandleCommPkg(comm_handle) = comm_pkg; + hypre_CommHandleSendData(comm_handle) = send_data; + hypre_CommHandleRecvData(comm_handle) = recv_data; + hypre_CommHandleNumRequests(comm_handle) = num_requests; + hypre_CommHandleRequests(comm_handle) = requests; + hypre_CommHandleStatus(comm_handle) = status; + hypre_CommHandleAction(comm_handle) = action; *comm_handle_ptr = comm_handle; @@ -1199,14 +1190,14 @@ HYPRE_Int hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) { hypre_MPICommWrapper *comm = hypre_CommHandleComm(comm_handle); - hypre_CommPkg *comm_pkg = hypre_CommHandleCommPkg(comm_handle); - HYPRE_Complex **send_buffers = hypre_CommHandleSendBuffers(comm_handle); - HYPRE_Complex **recv_buffers = hypre_CommHandleRecvBuffers(comm_handle); - HYPRE_Int action = hypre_CommHandleAction(comm_handle); + hypre_CommPkg *comm_pkg = hypre_CommHandleCommPkg(comm_handle); + HYPRE_Complex **send_buffers = hypre_CommHandleSendBuffers(comm_handle); + HYPRE_Complex **recv_buffers = hypre_CommHandleRecvBuffers(comm_handle); + HYPRE_Int action = hypre_CommHandleAction(comm_handle); - HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); - HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); - HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); + HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); + HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); + HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); hypre_CommType *comm_type; hypre_CommEntryType *comm_entry; @@ -1224,7 +1215,7 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) HYPRE_Int i, j, d, ll; - HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); + HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); hypre_MPI_Request *post_recv_request = hypre_MPICommGetPostRecvRequest(comm); /*-------------------------------------------------------------------- From d00fd126ba1705119f55f3924bb0cc3dc864558f Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Fri, 14 Feb 2025 23:04:14 -0800 Subject: [PATCH 83/90] bug fix --- src/utilities/mpistubs.c | 74 ++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 243bc1fe0e..999eceebf9 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -964,57 +964,57 @@ hypre_MPI_Type_size(hypre_MPI_Datatype datatype, HYPRE_Int *size) } HYPRE_Int -hypre_MPI_Isend_Multiple( void *buf, - HYPRE_Int num, - HYPRE_Int *displs, - HYPRE_Int *counts, - hypre_MPI_Datatype datatype, - HYPRE_Int *procs, - HYPRE_Int tag, - hypre_MPI_Comm comm, - hypre_MPI_Request *requests ) +hypre_MPI_Isend_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPICommWrapper *comm, + hypre_MPI_Request *requests ) { return (0); } HYPRE_Int -hypre_MPI_Irecv_Multiple( void *buf, - HYPRE_Int num, - HYPRE_Int *displs, - HYPRE_Int *counts, - hypre_MPI_Datatype datatype, - HYPRE_Int *procs, - HYPRE_Int tag, - hypre_MPI_Comm comm, - hypre_MPI_Request *requests) +hypre_MPI_Irecv_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPICommWrapper *comm, + hypre_MPI_Request *requests) { return (0); } HYPRE_Int -hypre_MPI_Send_init_Multiple( void *buf, - HYPRE_Int num, - HYPRE_Int *displs, - HYPRE_Int *counts, - hypre_MPI_Datatype datatype, - HYPRE_Int *procs, - HYPRE_Int tag, - hypre_MPI_Comm comm, - hypre_MPI_Request *requests) +hypre_MPI_Send_init_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPICommWrapper *comm, + hypre_MPI_Request *requests) { return (0); } HYPRE_Int -hypre_MPI_Recv_init_Multiple( void *buf, - HYPRE_Int num, - HYPRE_Int *displs, - HYPRE_Int *counts, - hypre_MPI_Datatype datatype, - HYPRE_Int *procs, - HYPRE_Int tag, - hypre_MPI_Comm comm, - hypre_MPI_Request *requests) +hypre_MPI_Recv_init_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPICommWrapper *comm, + hypre_MPI_Request *requests) { return (0); } @@ -1966,7 +1966,7 @@ hypre_NeedMPICopyBuffer(hypre_MemoryLocation memory_location) so we always has a host buffer for MPI. O.w. make sure return Z E R O! */ - return 1; + return 0; #endif } From b0daa81cc806e5ef24a79201232ef51533ab9e9e Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Fri, 14 Feb 2025 23:11:11 -0800 Subject: [PATCH 84/90] bug fix --- src/parcsr_block_mv/par_csr_block_matvec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parcsr_block_mv/par_csr_block_matvec.c b/src/parcsr_block_mv/par_csr_block_matvec.c index bc26eb58af..f96aa43b7e 100644 --- a/src/parcsr_block_mv/par_csr_block_matvec.c +++ b/src/parcsr_block_mv/par_csr_block_matvec.c @@ -208,7 +208,7 @@ hypre_ParCSRBlockMatrixMatvecT( HYPRE_Complex alpha, hypre_CSRBlockMatrixMatvecT(alpha, diag, x_local, beta, y_local); - hypre_ParCSRCommHandleDestroy(comm_handle); + hypre_ParCSRBlockCommHandleDestroy(comm_handle); comm_handle = NULL; index = 0; From 8e2c2a85167e26cc9a0271de2463b351ba08da4a Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Fri, 14 Feb 2025 23:11:40 -0800 Subject: [PATCH 85/90] small fix --- src/parcsr_mv/par_csr_communication.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 2670db7051..1f15064c91 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -353,7 +353,7 @@ hypre_ParCSRCommHandleWait( hypre_ParCSRCommHandle *comm_handle ) if (hypre_MPI_SUCCESS != ret) { char errmsg[256]; - hypre_sprintf(errmsg, "MPI error %d in %s (%s, line %u)\n", ret, __FUNCTION__, __FILE__, __LINE__); + hypre_sprintf(errmsg, "MPI error %d in %s (%s, line %u)\n", ret, __func__, __FILE__, __LINE__); hypre_error_w_msg(HYPRE_ERROR_GENERIC, errmsg); } } From f6d6ed6951f74b11bba7943785772ac070c49a44 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Fri, 14 Feb 2025 23:52:03 -0800 Subject: [PATCH 86/90] minor change --- src/parcsr_mv/par_csr_communication.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 1f15064c91..8352369334 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -171,7 +171,7 @@ hypre_ParCSRPersistentCommHandleStart( hypre_ParCSRCommHandle *comm_handle ) if (pre_send_request) { - hypre_MPI_Request_get_status(*pre_send_request, &flag, MPI_STATUS_IGNORE); + hypre_MPI_Request_get_status(*pre_send_request, &flag, hypre_MPI_STATUS_IGNORE); } if (hypre_ParCSRCommHandleNumRequests(comm_handle) > 0) @@ -370,7 +370,7 @@ hypre_ParCSRCommHandleWait( hypre_ParCSRCommHandle *comm_handle ) } else { - hypre_MPI_Wait(post_recv_request, MPI_STATUS_IGNORE); + hypre_MPI_Wait(post_recv_request, hypre_MPI_STATUS_IGNORE); } } From 6e73d57a52024602409be6570382c8a4b9738ab9 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Sun, 16 Feb 2025 11:47:20 -0800 Subject: [PATCH 87/90] fix bugs by removing buffer management; changed to simple malloc --- src/struct_mv/struct_communication.c | 76 ++++++++-------------------- src/utilities/_hypre_utilities.h | 2 - src/utilities/general.c | 3 -- src/utilities/protos.h | 3 -- 4 files changed, 20 insertions(+), 64 deletions(-) diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index b4e63477c9..d237cbe278 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -764,51 +764,6 @@ hypre_CommTypeSetEntry( hypre_Box *box, return hypre_error_flag; } -/** - * @brief Manages memory buffers for structured communication - * - * This function handles buffer allocation and deallocation based on the `action` parameter. - * - If `action == 0`, it returns a buffer that is enough for the specified `size` - * in the given `memory_location`. - * - If `action == 1`, it releases the buffer. - * - There is only one pointer managed. The same ptr will be returned unless a realloc is needed. - * - * @param memory_location The memory location where the buffer should be allocated - * (e.g., host or device memory). - * @param size The size of the buffer in BYTES (used for allocation). - * @param action The action to perform: - * - `0`: Return buffer. - * - `1`: Free buffer. - * - * @return Pointer to the buffer if `action == 0`, or `NULL` if `action == 1`. - */ - -char * -hypre_StructCommunicationManageBuffer(HYPRE_MemoryLocation memory_location, - size_t size, - HYPRE_Int action) -{ - static char *buffer_ptr = NULL; - static size_t buffer_size = 0; - - if (0 == action) - { - if (size > buffer_size) - { - buffer_size = 2 * size; - hypre_TFree(buffer_ptr, memory_location); - buffer_ptr = hypre_TAlloc(char, buffer_size, memory_location); - } - return buffer_ptr; - } - else if (1 == action) - { - buffer_size = 0; - hypre_TFree(buffer_ptr, memory_location); - } - return NULL; -} - HYPRE_Int hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation memory_location, hypre_CommPkg *comm_pkg, @@ -830,8 +785,7 @@ hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation memory_location, HYPRE_Int size_of_elem = sizeof(HYPRE_Complex); HYPRE_Int i; - buffer_ptr = (HYPRE_Complex *) hypre_StructCommunicationManageBuffer(memory_location, - (num_send_elems + num_recv_elems) * sizeof(HYPRE_Complex), 0); + buffer_ptr = hypre_TAlloc(HYPRE_Complex, num_send_elems + num_recv_elems, memory_location); /* allocate send buffers */ send_buffers = hypre_TAlloc(HYPRE_Complex *, num_sends + 1, HYPRE_MEMORY_HOST); @@ -1190,13 +1144,14 @@ HYPRE_Int hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) { hypre_MPICommWrapper *comm = hypre_CommHandleComm(comm_handle); - hypre_CommPkg *comm_pkg = hypre_CommHandleCommPkg(comm_handle); - HYPRE_Complex **send_buffers = hypre_CommHandleSendBuffers(comm_handle); - HYPRE_Complex **recv_buffers = hypre_CommHandleRecvBuffers(comm_handle); - HYPRE_Int action = hypre_CommHandleAction(comm_handle); + hypre_CommPkg *comm_pkg = hypre_CommHandleCommPkg(comm_handle); + HYPRE_Complex **send_buffers = hypre_CommHandleSendBuffers(comm_handle); + HYPRE_Complex **recv_buffers = hypre_CommHandleRecvBuffers(comm_handle); + HYPRE_Int action = hypre_CommHandleAction(comm_handle); HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); + HYPRE_Int num_sends = hypre_CommPkgNumSends(comm_pkg); HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); hypre_CommType *comm_type; @@ -1215,7 +1170,7 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) HYPRE_Int i, j, d, ll; - HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); + HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); hypre_MPI_Request *post_recv_request = hypre_MPICommGetPostRecvRequest(comm); /*-------------------------------------------------------------------- @@ -1367,11 +1322,11 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_MemoryLocation send_buffer_location = hypre_MPICommGetSendBufferLocation(comm); hypre_MemoryLocation recv_buffer_location = hypre_MPICommGetRecvBufferLocation(comm); - void *send_buffer = hypre_MPICommGetSendBuffer(comm); - void *recv_buffer = hypre_MPICommGetRecvBuffer(comm); + void *send_copy_buffer = hypre_MPICommGetSendBuffer(comm); + void *recv_copy_buffer = hypre_MPICommGetRecvBuffer(comm); - _hypre_TFree(send_buffer, send_buffer_location); - _hypre_TFree(recv_buffer, recv_buffer_location); + _hypre_TFree(send_copy_buffer, send_buffer_location); + _hypre_TFree(recv_copy_buffer, recv_buffer_location); /* attributes should be deleted when the communicator is being freed * * but since we delete comm right after, so we don't .... */ @@ -1390,6 +1345,15 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_TFree(comm, HYPRE_MEMORY_HOST); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); + if (num_sends > 0) + { + hypre_TFree(send_buffers[0], memory_location); + } + else if (num_recvs > 0) + { + hypre_TFree(recv_buffers[0], memory_location); + } + hypre_TFree(send_buffers, HYPRE_MEMORY_HOST); hypre_TFree(recv_buffers, HYPRE_MEMORY_HOST); diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 83ffd7b745..3f9a15423b 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -2648,8 +2648,6 @@ HYPRE_Int hypre_MemoryTrackerSetFileName(const char *file_name); HYPRE_Int hypre_MagmaInitialize(void); HYPRE_Int hypre_MagmaFinalize(void); #endif - -char *hypre_StructCommunicationManageBuffer(HYPRE_MemoryLocation memory_location, size_t size, HYPRE_Int action); /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. diff --git a/src/utilities/general.c b/src/utilities/general.c index 383e40711c..33899c7d91 100644 --- a/src/utilities/general.c +++ b/src/utilities/general.c @@ -68,9 +68,6 @@ hypre_HandleDestroy(hypre_Handle *hypre_handle_) return hypre_error_flag; } - /* free struct communication buffer memory */ - hypre_StructCommunicationManageBuffer(hypre_HandleMemoryLocation(hypre_handle_), 0, 1); - #if defined(HYPRE_USING_GPU) hypre_DeviceDataDestroy(hypre_HandleDeviceData(hypre_handle_)); hypre_HandleDeviceData(hypre_handle_) = NULL; diff --git a/src/utilities/protos.h b/src/utilities/protos.h index 3b0f2e2ce4..a24f252fd5 100644 --- a/src/utilities/protos.h +++ b/src/utilities/protos.h @@ -524,6 +524,3 @@ HYPRE_Int hypre_MemoryTrackerSetFileName(const char *file_name); HYPRE_Int hypre_MagmaInitialize(void); HYPRE_Int hypre_MagmaFinalize(void); #endif - -/* Consider refactoring or relocating this prototype */ -char *hypre_StructCommunicationManageBuffer(HYPRE_MemoryLocation memory_location, size_t size, HYPRE_Int action); From 99602edb6d1abf2421ba5b1c87d3fe9527ed687d Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Sun, 16 Feb 2025 12:08:44 -0800 Subject: [PATCH 88/90] minor fix --- src/struct_mv/struct_communication.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index d237cbe278..8133c168bc 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -1186,7 +1186,7 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) if (post_recv_request) { - hypre_MPI_Wait(post_recv_request, MPI_STATUS_IGNORE); + hypre_MPI_Wait(post_recv_request, hypre_MPI_STATUS_IGNORE); } /*-------------------------------------------------------------------- From cefa018d0d6c7a4a3e829b5dd4766a95b71601c1 Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Sun, 16 Feb 2025 20:12:59 -0800 Subject: [PATCH 89/90] another attempt to fix this thing --- src/struct_mv/struct_communication.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index 8133c168bc..787fae2998 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -777,7 +777,6 @@ hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation memory_location, void *recv_copy_buffer = NULL; HYPRE_Complex **send_buffers = NULL; HYPRE_Complex **recv_buffers = NULL; - HYPRE_Complex *buffer_ptr = NULL; HYPRE_Int num_sends = hypre_CommPkgNumSends(comm_pkg); HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); HYPRE_Int num_send_elems = hypre_CommPkgSendBufsize(comm_pkg); @@ -785,13 +784,11 @@ hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation memory_location, HYPRE_Int size_of_elem = sizeof(HYPRE_Complex); HYPRE_Int i; - buffer_ptr = hypre_TAlloc(HYPRE_Complex, num_send_elems + num_recv_elems, memory_location); - /* allocate send buffers */ send_buffers = hypre_TAlloc(HYPRE_Complex *, num_sends + 1, HYPRE_MEMORY_HOST); if (num_sends > 0) { - send_buffers[0] = buffer_ptr; + send_buffers[0] = hypre_TAlloc(HYPRE_Complex, num_send_elems, memory_location); for (i = 1; i <= num_sends; i++) { hypre_CommType *comm_type = hypre_CommPkgSendType(comm_pkg, i - 1); @@ -803,7 +800,7 @@ hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation memory_location, recv_buffers = hypre_TAlloc(HYPRE_Complex *, num_recvs + 1, HYPRE_MEMORY_HOST); if (num_recvs > 0) { - recv_buffers[0] = buffer_ptr + num_send_elems; + recv_buffers[0] = hypre_TAlloc(HYPRE_Complex, num_recv_elems, memory_location); for (i = 1; i <= num_recvs; i++) { hypre_CommType *comm_type = hypre_CommPkgRecvType(comm_pkg, i - 1); @@ -1349,7 +1346,8 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) { hypre_TFree(send_buffers[0], memory_location); } - else if (num_recvs > 0) + + if (num_recvs > 0) { hypre_TFree(recv_buffers[0], memory_location); } From 5b9a8214c03406ac9983bdcea3f5107cedec2cbc Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Sun, 16 Feb 2025 21:57:03 -0800 Subject: [PATCH 90/90] change par_rap.c back --- src/parcsr_ls/par_rap.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/parcsr_ls/par_rap.c b/src/parcsr_ls/par_rap.c index 96760e985f..c432b36940 100644 --- a/src/parcsr_ls/par_rap.c +++ b/src/parcsr_ls/par_rap.c @@ -48,6 +48,7 @@ hypre_BoomerAMGBuildCoarseOperatorKT( hypre_ParCSRMatrix *RT, HYPRE_Int num_cols_offd_RT = hypre_CSRMatrixNumCols(RT_offd); HYPRE_Int num_rows_offd_RT = hypre_CSRMatrixNumRows(RT_offd); hypre_ParCSRCommPkg *comm_pkg_RT = hypre_ParCSRMatrixCommPkg(RT); + HYPRE_Int num_recvs_RT = 0; HYPRE_Int num_sends_RT = 0; HYPRE_Int *send_map_starts_RT = NULL; HYPRE_Int *send_map_elmts_RT = NULL; @@ -191,6 +192,7 @@ hypre_BoomerAMGBuildCoarseOperatorKT( hypre_ParCSRMatrix *RT, if (comm_pkg_RT) { + num_recvs_RT = hypre_ParCSRCommPkgNumRecvs(comm_pkg_RT); num_sends_RT = hypre_ParCSRCommPkgNumSends(comm_pkg_RT); send_map_starts_RT = hypre_ParCSRCommPkgSendMapStarts(comm_pkg_RT); send_map_elmts_RT = hypre_ParCSRCommPkgSendMapElmts(comm_pkg_RT); @@ -199,6 +201,7 @@ hypre_BoomerAMGBuildCoarseOperatorKT( hypre_ParCSRMatrix *RT, { hypre_MatvecCommPkgCreate(RT); comm_pkg_RT = hypre_ParCSRMatrixCommPkg(RT); + num_recvs_RT = hypre_ParCSRCommPkgNumRecvs(comm_pkg_RT); num_sends_RT = hypre_ParCSRCommPkgNumSends(comm_pkg_RT); send_map_starts_RT = hypre_ParCSRCommPkgSendMapStarts(comm_pkg_RT); send_map_elmts_RT = hypre_ParCSRCommPkgSendMapElmts(comm_pkg_RT); @@ -1037,15 +1040,16 @@ hypre_BoomerAMGBuildCoarseOperatorKT( hypre_ParCSRMatrix *RT, #endif RAP_ext_size = 0; - - void *request; - hypre_ExchangeExternalRowsInit(RAP_int, comm_pkg_RT, &request); - RAP_ext = hypre_ExchangeExternalRowsWait(request); - RAP_ext_i = hypre_CSRMatrixI(RAP_ext); - RAP_ext_j = hypre_CSRMatrixBigJ(RAP_ext); - RAP_ext_data = hypre_CSRMatrixData(RAP_ext); - RAP_ext_size = RAP_ext_i[hypre_CSRMatrixNumRows(RAP_ext)]; - + if (num_sends_RT || num_recvs_RT) + { + void *request; + hypre_ExchangeExternalRowsInit(RAP_int, comm_pkg_RT, &request); + RAP_ext = hypre_ExchangeExternalRowsWait(request); + RAP_ext_i = hypre_CSRMatrixI(RAP_ext); + RAP_ext_j = hypre_CSRMatrixBigJ(RAP_ext); + RAP_ext_data = hypre_CSRMatrixData(RAP_ext); + RAP_ext_size = RAP_ext_i[hypre_CSRMatrixNumRows(RAP_ext)]; + } if (num_cols_offd_RT) { hypre_CSRMatrixDestroy(RAP_int); @@ -2035,8 +2039,11 @@ hypre_BoomerAMGBuildCoarseOperatorKT( hypre_ParCSRMatrix *RT, R_offd = NULL; } - hypre_CSRMatrixDestroy(RAP_ext); - RAP_ext = NULL; + if (num_sends_RT || num_recvs_RT) + { + hypre_CSRMatrixDestroy(RAP_ext); + RAP_ext = NULL; + } hypre_TFree(P_mark_array, HYPRE_MEMORY_HOST); hypre_TFree(A_mark_array, HYPRE_MEMORY_HOST); hypre_TFree(P_ext_diag_i, HYPRE_MEMORY_HOST);