diff --git a/src/parcsr_block_mv/par_csr_block_matvec.c b/src/parcsr_block_mv/par_csr_block_matvec.c index bc26eb58af..f96aa43b7e 100644 --- a/src/parcsr_block_mv/par_csr_block_matvec.c +++ b/src/parcsr_block_mv/par_csr_block_matvec.c @@ -208,7 +208,7 @@ hypre_ParCSRBlockMatrixMatvecT( HYPRE_Complex alpha, hypre_CSRBlockMatrixMatvecT(alpha, diag, x_local, beta, y_local); - hypre_ParCSRCommHandleDestroy(comm_handle); + hypre_ParCSRBlockCommHandleDestroy(comm_handle); comm_handle = NULL; index = 0; diff --git a/src/parcsr_ls/par_relax.c b/src/parcsr_ls/par_relax.c index 4b94bb544a..1a3fcf0108 100644 --- a/src/parcsr_ls/par_relax.c +++ b/src/parcsr_ls/par_relax.c @@ -787,9 +787,9 @@ hypre_BoomerAMGRelaxHybridGaussSeidel_core( hypre_ParCSRMatrix *A, num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); #if defined(HYPRE_USING_PERSISTENT_COMM) - persistent_comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(1, comm_pkg); - v_buf_data = (HYPRE_Real *) hypre_ParCSRCommHandleSendDataBuffer(persistent_comm_handle); - v_ext_data = (HYPRE_Real *) hypre_ParCSRCommHandleRecvDataBuffer(persistent_comm_handle); + persistent_comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(1, comm_pkg, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + v_buf_data = (HYPRE_Real *) hypre_ParCSRCommHandleSendData(persistent_comm_handle); + v_ext_data = (HYPRE_Real *) hypre_ParCSRCommHandleRecvData(persistent_comm_handle); #else v_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), @@ -813,13 +813,13 @@ hypre_BoomerAMGRelaxHybridGaussSeidel_core( hypre_ParCSRMatrix *A, #endif #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleStart(persistent_comm_handle, HYPRE_MEMORY_HOST, v_buf_data); + hypre_ParCSRPersistentCommHandleStart(persistent_comm_handle); #else comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg, v_buf_data, v_ext_data); #endif #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleWait(persistent_comm_handle, HYPRE_MEMORY_HOST, v_ext_data); + hypre_ParCSRCommHandleWait(persistent_comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index b920dc4f08..cf7d1ea9c6 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -29,7 +29,6 @@ extern "C" { * Structure containing information for doing communications *--------------------------------------------------------------------------*/ -#ifdef HYPRE_USING_PERSISTENT_COMM typedef enum CommPkgJobType { HYPRE_COMM_PKG_JOB_COMPLEX = 0, @@ -38,9 +37,90 @@ typedef enum CommPkgJobType HYPRE_COMM_PKG_JOB_INT_TRANSPOSE, HYPRE_COMM_PKG_JOB_BIGINT, HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE, - NUM_OF_COMM_PKG_JOB_TYPE, + NUM_OF_COMM_PKG_JOB_TYPE } CommPkgJobType; -#endif + +static inline CommPkgJobType +hypre_ParCSRCommHandleGetJobType(HYPRE_Int job) +{ + CommPkgJobType job_type = HYPRE_COMM_PKG_JOB_COMPLEX; + switch (job) + { + case 1: + job_type = HYPRE_COMM_PKG_JOB_COMPLEX; + break; + case 2: + job_type = HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE; + break; + case 11: + job_type = HYPRE_COMM_PKG_JOB_INT; + break; + case 12: + job_type = HYPRE_COMM_PKG_JOB_INT_TRANSPOSE; + break; + case 21: + job_type = HYPRE_COMM_PKG_JOB_BIGINT; + break; + case 22: + job_type = HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE; + break; + } + + return job_type; +} + +static inline HYPRE_Int +hypre_ParCSRCommHandleIsTransposeJob(HYPRE_Int job) +{ + HYPRE_Int trans = 0; + + switch (hypre_ParCSRCommHandleGetJobType(job)) + { + case HYPRE_COMM_PKG_JOB_COMPLEX: + case HYPRE_COMM_PKG_JOB_INT: + case HYPRE_COMM_PKG_JOB_BIGINT: + { + trans = 0; + break; + } + case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: + case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: + case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: + { + trans = 1; + break; + } + default: + break; + } + return trans; +} + +static inline hypre_MPI_Datatype +hypre_ParCSRCommHandleGetMPIDataType(HYPRE_Int job) +{ + hypre_MPI_Datatype dtype = HYPRE_MPI_COMPLEX; + + switch (hypre_ParCSRCommHandleGetJobType(job)) + { + case HYPRE_COMM_PKG_JOB_COMPLEX: + case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: + dtype = HYPRE_MPI_COMPLEX; + break; + case HYPRE_COMM_PKG_JOB_INT: + case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: + dtype = HYPRE_MPI_INT; + break; + case HYPRE_COMM_PKG_JOB_BIGINT: + case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: + dtype = HYPRE_MPI_BIG_INT; + break; + default: + break; + } + + return dtype; +} /*-------------------------------------------------------------------------- * hypre_ParCSRCommHandle, hypre_ParCSRPersistentCommHandle @@ -50,18 +130,31 @@ struct _hypre_ParCSRCommPkg; typedef struct { struct _hypre_ParCSRCommPkg *comm_pkg; - HYPRE_MemoryLocation send_memory_location; - HYPRE_MemoryLocation recv_memory_location; - HYPRE_Int num_send_bytes; - HYPRE_Int num_recv_bytes; - void *send_data; - void *recv_data; - void *send_data_buffer; - void *recv_data_buffer; - HYPRE_Int num_requests; - hypre_MPI_Request *requests; + HYPRE_Int persistent; + void *send_data; + void *recv_data; + HYPRE_MemoryLocation send_location; + HYPRE_MemoryLocation recv_location; + HYPRE_Int num_requests; + hypre_MPI_Request *requests; + hypre_MPICommWrapper *comm; } hypre_ParCSRCommHandle; +/*-------------------------------------------------------------------------- + * Accessor macros: hypre_ParCSRCommHandle + *--------------------------------------------------------------------------*/ + +#define hypre_ParCSRCommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) +#define hypre_ParCSRCommHandlePersistent(comm_handle) (comm_handle -> persistent) +#define hypre_ParCSRCommHandleSendData(comm_handle) (comm_handle -> send_data) +#define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) +#define hypre_ParCSRCommHandleSendLocation(comm_handle) (comm_handle -> send_location) +#define hypre_ParCSRCommHandleRecvLocation(comm_handle) (comm_handle -> recv_location) +#define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) +#define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) +#define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) +#define hypre_ParCSRCommHandleComm(comm_handle) (comm_handle -> comm) + typedef hypre_ParCSRCommHandle hypre_ParCSRPersistentCommHandle; typedef struct _hypre_ParCSRCommPkg @@ -79,7 +172,7 @@ typedef struct _hypre_ParCSRCommPkg /* remote communication information */ hypre_MPI_Datatype *send_mpi_types; hypre_MPI_Datatype *recv_mpi_types; -#ifdef HYPRE_USING_PERSISTENT_COMM +#if defined(HYPRE_USING_PERSISTENT_COMM) hypre_ParCSRPersistentCommHandle *persistent_comm_handles[NUM_OF_COMM_PKG_JOB_TYPE]; #endif #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) @@ -94,31 +187,33 @@ typedef struct _hypre_ParCSRCommPkg * Accessor macros: hypre_ParCSRCommPkg *--------------------------------------------------------------------------*/ -#define hypre_ParCSRCommPkgComm(comm_pkg) (comm_pkg -> comm) -#define hypre_ParCSRCommPkgNumComponents(comm_pkg) (comm_pkg -> num_components) -#define hypre_ParCSRCommPkgNumSends(comm_pkg) (comm_pkg -> num_sends) -#define hypre_ParCSRCommPkgSendProcs(comm_pkg) (comm_pkg -> send_procs) -#define hypre_ParCSRCommPkgSendProc(comm_pkg, i) (comm_pkg -> send_procs[i]) -#define hypre_ParCSRCommPkgSendMapStarts(comm_pkg) (comm_pkg -> send_map_starts) -#define hypre_ParCSRCommPkgSendMapStart(comm_pkg,i) (comm_pkg -> send_map_starts[i]) -#define hypre_ParCSRCommPkgSendMapElmts(comm_pkg) (comm_pkg -> send_map_elmts) -#define hypre_ParCSRCommPkgDeviceSendMapElmts(comm_pkg) (comm_pkg -> device_send_map_elmts) -#define hypre_ParCSRCommPkgSendMapElmt(comm_pkg,i) (comm_pkg -> send_map_elmts[i]) -#define hypre_ParCSRCommPkgDeviceSendMapElmt(comm_pkg,i) (comm_pkg -> device_send_map_elmts[i]) -#define hypre_ParCSRCommPkgNumRecvs(comm_pkg) (comm_pkg -> num_recvs) -#define hypre_ParCSRCommPkgRecvProcs(comm_pkg) (comm_pkg -> recv_procs) -#define hypre_ParCSRCommPkgRecvProc(comm_pkg, i) (comm_pkg -> recv_procs[i]) -#define hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) (comm_pkg -> recv_vec_starts) -#define hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i) (comm_pkg -> recv_vec_starts[i]) -#define hypre_ParCSRCommPkgSendMPITypes(comm_pkg) (comm_pkg -> send_mpi_types) -#define hypre_ParCSRCommPkgSendMPIType(comm_pkg,i) (comm_pkg -> send_mpi_types[i]) -#define hypre_ParCSRCommPkgRecvMPITypes(comm_pkg) (comm_pkg -> recv_mpi_types) -#define hypre_ParCSRCommPkgRecvMPIType(comm_pkg,i) (comm_pkg -> recv_mpi_types[i]) +#define hypre_ParCSRCommPkgComm(comm_pkg) (comm_pkg -> comm) +#define hypre_ParCSRCommPkgNumComponents(comm_pkg) (comm_pkg -> num_components) +#define hypre_ParCSRCommPkgNumSends(comm_pkg) (comm_pkg -> num_sends) +#define hypre_ParCSRCommPkgSendProcs(comm_pkg) (comm_pkg -> send_procs) +#define hypre_ParCSRCommPkgSendProc(comm_pkg, i) (comm_pkg -> send_procs[i]) +#define hypre_ParCSRCommPkgSendMapStarts(comm_pkg) (comm_pkg -> send_map_starts) +#define hypre_ParCSRCommPkgSendMapStart(comm_pkg,i) (comm_pkg -> send_map_starts[i]) +#define hypre_ParCSRCommPkgSendMapElmts(comm_pkg) (comm_pkg -> send_map_elmts) +#define hypre_ParCSRCommPkgDeviceSendMapElmts(comm_pkg) (comm_pkg -> device_send_map_elmts) +#define hypre_ParCSRCommPkgSendMapElmt(comm_pkg,i) (comm_pkg -> send_map_elmts[i]) +#define hypre_ParCSRCommPkgDeviceSendMapElmt(comm_pkg,i) (comm_pkg -> device_send_map_elmts[i]) +#define hypre_ParCSRCommPkgNumRecvs(comm_pkg) (comm_pkg -> num_recvs) +#define hypre_ParCSRCommPkgRecvProcs(comm_pkg) (comm_pkg -> recv_procs) +#define hypre_ParCSRCommPkgRecvProc(comm_pkg, i) (comm_pkg -> recv_procs[i]) +#define hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) (comm_pkg -> recv_vec_starts) +#define hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i) (comm_pkg -> recv_vec_starts[i]) +#define hypre_ParCSRCommPkgSendMPITypes(comm_pkg) (comm_pkg -> send_mpi_types) +#define hypre_ParCSRCommPkgSendMPIType(comm_pkg,i) (comm_pkg -> send_mpi_types[i]) +#define hypre_ParCSRCommPkgRecvMPITypes(comm_pkg) (comm_pkg -> recv_mpi_types) +#define hypre_ParCSRCommPkgRecvMPIType(comm_pkg,i) (comm_pkg -> recv_mpi_types[i]) +#define hypre_ParCSRCommPkgPersistentCommHandles(comm_pkg) (comm_pkg -> persistent_comm_handles) +#define hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg,i) (comm_pkg -> persistent_comm_handles[i]) #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) -#define hypre_ParCSRCommPkgTmpData(comm_pkg) ((comm_pkg) -> tmp_data) -#define hypre_ParCSRCommPkgBufData(comm_pkg) ((comm_pkg) -> buf_data) -#define hypre_ParCSRCommPkgMatrixE(comm_pkg) ((comm_pkg) -> matrix_E) +#define hypre_ParCSRCommPkgTmpData(comm_pkg) ((comm_pkg) -> tmp_data) +#define hypre_ParCSRCommPkgBufData(comm_pkg) ((comm_pkg) -> buf_data) +#define hypre_ParCSRCommPkgMatrixE(comm_pkg) ((comm_pkg) -> matrix_E) #endif static inline HYPRE_MAYBE_UNUSED_FUNC void @@ -146,23 +241,6 @@ hypre_ParCSRCommPkgCopySendMapElmtsToDevice(hypre_ParCSRCommPkg *comm_pkg) #endif } -/*-------------------------------------------------------------------------- - * Accessor macros: hypre_ParCSRCommHandle - *--------------------------------------------------------------------------*/ - -#define hypre_ParCSRCommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) -#define hypre_ParCSRCommHandleSendMemoryLocation(comm_handle) (comm_handle -> send_memory_location) -#define hypre_ParCSRCommHandleRecvMemoryLocation(comm_handle) (comm_handle -> recv_memory_location) -#define hypre_ParCSRCommHandleNumSendBytes(comm_handle) (comm_handle -> num_send_bytes) -#define hypre_ParCSRCommHandleNumRecvBytes(comm_handle) (comm_handle -> num_recv_bytes) -#define hypre_ParCSRCommHandleSendData(comm_handle) (comm_handle -> send_data) -#define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) -#define hypre_ParCSRCommHandleSendDataBuffer(comm_handle) (comm_handle -> send_data_buffer) -#define hypre_ParCSRCommHandleRecvDataBuffer(comm_handle) (comm_handle -> recv_data_buffer) -#define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) -#define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) -#define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) - #endif /* HYPRE_PAR_CSR_COMMUNICATION_HEADER */ /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other @@ -898,12 +976,20 @@ HYPRE_Int hypre_BooleanGenerateDiagAndOffd ( hypre_CSRBooleanMatrix *A, /* par_csr_communication.c */ hypre_ParCSRCommHandle *hypre_ParCSRCommHandleCreate ( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg, void *send_data, void *recv_data ); +hypre_ParCSRCommHandle *hypre_ParCSRCommHandleCreate_core ( HYPRE_Int persistent, + HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + void *send_data_in, + HYPRE_MemoryLocation recv_memory_location, + void *recv_data_in ); hypre_ParCSRCommHandle *hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg, HYPRE_MemoryLocation send_memory_location, void *send_data_in, HYPRE_MemoryLocation recv_memory_location, void *recv_data_in ); +HYPRE_Int hypre_ParCSRCommHandleWait(hypre_ParCSRCommHandle *comm_handle); HYPRE_Int hypre_ParCSRCommHandleDestroy ( hypre_ParCSRCommHandle *comm_handle ); void hypre_ParCSRCommPkgCreate_core ( MPI_Comm comm, HYPRE_BigInt *col_map_offd, HYPRE_BigInt first_col_diag, HYPRE_BigInt *col_starts, HYPRE_Int num_cols_diag, @@ -1034,15 +1120,18 @@ HYPRE_Int hypre_ParCSRMatrixDropSmallEntriesDevice( hypre_ParCSRMatrix *A, HYPRE HYPRE_Int hypre_ParCSRCommPkgCreateMatrixE( hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int local_ncols ); #ifdef HYPRE_USING_PERSISTENT_COMM -hypre_ParCSRPersistentCommHandle* hypre_ParCSRPersistentCommHandleCreate(HYPRE_Int job, - hypre_ParCSRCommPkg *comm_pkg); -hypre_ParCSRPersistentCommHandle* hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_Int job, - hypre_ParCSRCommPkg *comm_pkg); -void hypre_ParCSRPersistentCommHandleDestroy(hypre_ParCSRPersistentCommHandle *comm_handle); -void hypre_ParCSRPersistentCommHandleStart(hypre_ParCSRPersistentCommHandle *comm_handle, - HYPRE_MemoryLocation send_memory_location, void *send_data); -void hypre_ParCSRPersistentCommHandleWait(hypre_ParCSRPersistentCommHandle *comm_handle, - HYPRE_MemoryLocation recv_memory_location, void *recv_data); +hypre_ParCSRCommHandle* hypre_ParCSRPersistentCommHandleCreate(HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location); +hypre_ParCSRCommHandle* hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location); +#define hypre_ParCSRPersistentCommHandleWait hypre_ParCSRCommHandleWait +#define hypre_ParCSRPersistentCommHandleDestroy hypre_ParCSRCommHandleDestroy + +HYPRE_Int hypre_ParCSRPersistentCommHandleStart(hypre_ParCSRCommHandle *comm_handle); #endif HYPRE_Int hypre_ParcsrGetExternalRowsInit( hypre_ParCSRMatrix *A, HYPRE_Int indices_len, diff --git a/src/parcsr_mv/par_csr_communication.c b/src/parcsr_mv/par_csr_communication.c index 02305cbfe4..8352369334 100644 --- a/src/parcsr_mv/par_csr_communication.c +++ b/src/parcsr_mv/par_csr_communication.c @@ -7,333 +7,186 @@ #include "_hypre_parcsr_mv.h" -/*==========================================================================*/ +/*--------------------------------------------------------------------------------------- + * hypre_ParCSR(Persistent)CommHandleCreate sets up a communication handle, + * posts receives and initiates sends. It always requires num_sends, + * num_recvs, recv_procs and send_procs to be set in comm_pkg. + * There are different options for job: + * job = 1 : is used to initialize communication exchange for the parts + * of vector needed to perform a Matvec, it requires send_data + * and recv_data to be doubles, recv_vec_starts and + * send_map_starts need to be set in comm_pkg. + * job = 2 : is used to initialize communication exchange for the parts + * of vector needed to perform a MatvecT, it requires send_data + * and recv_data to be doubles, recv_vec_starts and + * send_map_starts need to be set in comm_pkg. + * job = 11: similar to job = 1, but exchanges data of type HYPRE_Int (not HYPRE_Complex), + * requires send_data and recv_data to be ints + * recv_vec_starts and send_map_starts need to be set in comm_pkg. + * job = 12: similar to job = 2, but exchanges data of type HYPRE_Int (not HYPRE_Complex), + * requires send_data and recv_data to be ints + * recv_vec_starts and send_map_starts need to be set in comm_pkg. + * job = 21: similar to job = 1, but exchanges data of type HYPRE_BigInt (not HYPRE_Complex), + * requires send_data and recv_data to be ints + * recv_vec_starts and send_map_starts need to be set in comm_pkg. + * job = 22: similar to job = 2, but exchanges data of type HYPRE_BigInt (not HYPRE_Complex), + * requires send_data and recv_data to be ints + * recv_vec_starts and send_map_starts need to be set in comm_pkg. + * default: ignores send_data and recv_data, requires send_mpi_types + * and recv_mpi_types to be set in comm_pkg. + * datatypes need to point to absolute + * addresses, e.g. generated using hypre_MPI_Address . + *-------------------------------------------------------------------------------------*/ -#ifdef HYPRE_USING_PERSISTENT_COMM -static CommPkgJobType getJobTypeOf(HYPRE_Int job) +HYPRE_Int +hypre_ParCSRCommHandleAllocateBuffers( HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location, + HYPRE_Int num_send_elems, + HYPRE_Int num_recv_elems, + HYPRE_Int size_of_elem, + hypre_ParCSRCommHandle *comm_handle ) { - CommPkgJobType job_type = HYPRE_COMM_PKG_JOB_COMPLEX; - switch (job) + hypre_MemoryLocation send_memory_alocation = hypre_GetActualMemLocation(send_memory_location); + hypre_MemoryLocation recv_memory_alocation = hypre_GetActualMemLocation(recv_memory_location); + hypre_MemoryLocation send_buffer_location = hypre_MEMORY_UNDEFINED; + hypre_MemoryLocation recv_buffer_location = hypre_MEMORY_UNDEFINED; + void *send_buffer = NULL; + void *recv_buffer = NULL; + + if (hypre_NeedMPICopyBuffer(send_memory_alocation)) { - case 1: - job_type = HYPRE_COMM_PKG_JOB_COMPLEX; - break; - case 2: - job_type = HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE; - break; - case 11: - job_type = HYPRE_COMM_PKG_JOB_INT; - break; - case 12: - job_type = HYPRE_COMM_PKG_JOB_INT_TRANSPOSE; - break; - case 21: - job_type = HYPRE_COMM_PKG_JOB_BIGINT; - break; - case 22: - job_type = HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE; - break; - } // switch (job) - - return job_type; -} - -/*------------------------------------------------------------------ - * hypre_ParCSRPersistentCommHandleCreate - * - * When send_data and recv_data are NULL, buffers are internally - * allocated and CommHandle owns the buffer - *------------------------------------------------------------------*/ - -hypre_ParCSRPersistentCommHandle* -hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg ) -{ - HYPRE_Int i; - size_t num_bytes_send, num_bytes_recv; - - hypre_ParCSRPersistentCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRPersistentCommHandle, 1, - HYPRE_MEMORY_HOST); - - CommPkgJobType job_type = getJobTypeOf(job); + send_buffer_location = hypre_HandleMPICopyBufferLocation(hypre_handle()); + send_buffer = _hypre_TAlloc(char, num_send_elems * size_of_elem, send_buffer_location); + } - HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); - MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg); + if (hypre_NeedMPICopyBuffer(recv_memory_alocation)) + { + recv_buffer_location = hypre_HandleMPICopyBufferLocation(hypre_handle()); + recv_buffer = _hypre_TAlloc(char, num_recv_elems * size_of_elem, recv_buffer_location); + } - HYPRE_Int num_requests = num_sends + num_recvs; - hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); + if (hypre_ParCSRCommHandlePersistent(comm_handle)) + { + if (!hypre_ParCSRCommHandleSendData(comm_handle)) + { + hypre_ParCSRCommHandleSendData(comm_handle) = hypre_TAlloc(char, num_send_elems * size_of_elem, send_memory_location); + } - hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; - hypre_ParCSRCommHandleRequests(comm_handle) = requests; + if (!hypre_ParCSRCommHandleRecvData(comm_handle)) + { + hypre_ParCSRCommHandleRecvData(comm_handle) = hypre_TAlloc(char, num_recv_elems * size_of_elem, recv_memory_location); + } + } - void *send_buff = NULL, *recv_buff = NULL; + hypre_MPICommWrapper *comm = hypre_ParCSRCommHandleComm(comm_handle); - switch (job_type) +#if defined(HYPRE_DEBUG) + if (hypre_MPICommGetSendLocation(comm) != hypre_MEMORY_UNDEFINED) { - case HYPRE_COMM_PKG_JOB_COMPLEX: - num_bytes_send = sizeof(HYPRE_Complex) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - num_bytes_recv = sizeof(HYPRE_Complex) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - send_buff = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - HYPRE_MEMORY_HOST); - recv_buff = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - HYPRE_MEMORY_HOST); - for (i = 0; i < num_recvs; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Recv_init( (HYPRE_Complex *)recv_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, requests + i ); - } - for (i = 0; i < num_sends; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Send_init( (HYPRE_Complex *)send_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, requests + num_recvs + i ); - } - break; - - case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: - num_bytes_recv = sizeof(HYPRE_Complex) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - num_bytes_send = sizeof(HYPRE_Complex) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - recv_buff = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - HYPRE_MEMORY_HOST); - send_buff = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - HYPRE_MEMORY_HOST); - for (i = 0; i < num_sends; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Recv_init( (HYPRE_Complex *)recv_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, requests + i ); - } - for (i = 0; i < num_recvs; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Send_init( (HYPRE_Complex *)send_buff + vec_start, vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, requests + num_sends + i ); - } - break; - - case HYPRE_COMM_PKG_JOB_INT: - num_bytes_send = sizeof(HYPRE_Int) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - num_bytes_recv = sizeof(HYPRE_Int) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - send_buff = hypre_TAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - HYPRE_MEMORY_HOST); - recv_buff = hypre_TAlloc(HYPRE_Int, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - HYPRE_MEMORY_HOST); - for (i = 0; i < num_recvs; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Recv_init( (HYPRE_Int *)recv_buff + vec_start, vec_len, HYPRE_MPI_INT, - ip, 0, comm, requests + i ); - } - for (i = 0; i < num_sends; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Send_init( (HYPRE_Int *)send_buff + vec_start, vec_len, HYPRE_MPI_INT, - ip, 0, comm, requests + num_recvs + i ); - } - break; - - case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: - num_bytes_recv = sizeof(HYPRE_Int) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - num_bytes_send = sizeof(HYPRE_Int) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - recv_buff = hypre_TAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - HYPRE_MEMORY_HOST); - send_buff = hypre_TAlloc(HYPRE_Int, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - HYPRE_MEMORY_HOST); - for (i = 0; i < num_sends; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Recv_init( (HYPRE_Int *)recv_buff + vec_start, vec_len, HYPRE_MPI_INT, - ip, 0, comm, requests + i ); - } - for (i = 0; i < num_recvs; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Send_init( (HYPRE_Int *)send_buff + vec_start, vec_len, HYPRE_MPI_INT, - ip, 0, comm, requests + num_sends + i ); - } - break; - - case HYPRE_COMM_PKG_JOB_BIGINT: - num_bytes_send = sizeof(HYPRE_BigInt) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - num_bytes_recv = sizeof(HYPRE_BigInt) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - send_buff = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - HYPRE_MEMORY_HOST); - recv_buff = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - HYPRE_MEMORY_HOST); - for (i = 0; i < num_recvs; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Recv_init( (HYPRE_BigInt *)recv_buff + (HYPRE_BigInt)vec_start, vec_len, - HYPRE_MPI_BIG_INT, - ip, 0, comm, requests + i ); - } - for (i = 0; i < num_sends; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Send_init( (HYPRE_BigInt *)send_buff + (HYPRE_BigInt)vec_start, vec_len, - HYPRE_MPI_BIG_INT, - ip, 0, comm, requests + num_recvs + i); - } - break; - - case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: - num_bytes_recv = sizeof(HYPRE_BigInt) * hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - num_bytes_send = sizeof(HYPRE_BigInt) * hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs); - recv_buff = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), - HYPRE_MEMORY_HOST); - send_buff = hypre_TAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs), - HYPRE_MEMORY_HOST); - for (i = 0; i < num_sends; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Recv_init( (HYPRE_BigInt *)recv_buff + (HYPRE_BigInt)vec_start, vec_len, - HYPRE_MPI_BIG_INT, - ip, 0, comm, requests + i ); - } - for (i = 0; i < num_recvs; ++i) - { - HYPRE_Int ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - HYPRE_Int vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - HYPRE_Int vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - - hypre_MPI_Send_init( (HYPRE_BigInt *)send_buff + (HYPRE_BigInt)vec_start, vec_len, - HYPRE_MPI_BIG_INT, - ip, 0, comm, requests + num_sends + i); - } - break; - default: - hypre_assert(1 == 0); - break; - } // switch (job_type) + hypre_printf("[%s, %d] MPI_Comm SendLocation existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetRecvLocation(comm) != hypre_MEMORY_UNDEFINED) + { + hypre_printf("[%s, %d] MPI_Comm RecvLocation existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetSendBuffer(comm)) + { + hypre_printf("[%s, %d] MPI_Comm SendBuffer existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetRecvBuffer(comm)) + { + hypre_printf("[%s, %d] MPI_Comm RecvBuffer existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetSendBufferLocation(comm) != hypre_MEMORY_UNDEFINED) + { + hypre_printf("[%s, %d] MPI_Comm SendBufferLocation existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetRecvBufferLocation(comm) != hypre_MEMORY_UNDEFINED) + { + hypre_printf("[%s, %d] MPI_Comm RecvBufferLocation existed!\n", __FILE__, __LINE__); + } +#endif - hypre_ParCSRCommHandleRecvDataBuffer(comm_handle) = recv_buff; - hypre_ParCSRCommHandleSendDataBuffer(comm_handle) = send_buff; - hypre_ParCSRCommHandleNumSendBytes(comm_handle) = num_bytes_send; - hypre_ParCSRCommHandleNumRecvBytes(comm_handle) = num_bytes_recv; + hypre_MPICommSetSendLocation(comm, send_memory_alocation); + hypre_MPICommSetRecvLocation(comm, recv_memory_alocation); + hypre_MPICommSetSendBuffer(comm, send_buffer); + hypre_MPICommSetRecvBuffer(comm, recv_buffer); + hypre_MPICommSetSendBufferLocation(comm, send_buffer_location); + hypre_MPICommSetRecvBufferLocation(comm, recv_buffer_location); - return ( comm_handle ); + return hypre_error_flag; } +#if defined(HYPRE_USING_PERSISTENT_COMM) + /*------------------------------------------------------------------ - * hypre_ParCSRCommPkgGetPersistentCommHandle + * hypre_ParCSRPersistentCommHandleCreate + * + * When send_data and recv_data are NULL, buffers are internally + * allocated and CommHandle owns the buffer *------------------------------------------------------------------*/ -hypre_ParCSRPersistentCommHandle* -hypre_ParCSRCommPkgGetPersistentCommHandle( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg ) +hypre_ParCSRCommHandle* +hypre_ParCSRPersistentCommHandleCreate( HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location ) { - CommPkgJobType type = getJobTypeOf(job); - if (!comm_pkg->persistent_comm_handles[type]) - { - /* data is owned by persistent comm handle */ - comm_pkg->persistent_comm_handles[type] = - hypre_ParCSRPersistentCommHandleCreate(job, comm_pkg); - } - - return comm_pkg->persistent_comm_handles[type]; + return hypre_ParCSRCommHandleCreate_core(1, job, comm_pkg, send_memory_location, NULL, recv_memory_location, NULL); } /*------------------------------------------------------------------ - * hypre_ParCSRPersistentCommHandleDestroy + * hypre_ParCSRCommPkgGetPersistentCommHandle *------------------------------------------------------------------*/ -void -hypre_ParCSRPersistentCommHandleDestroy( hypre_ParCSRPersistentCommHandle *comm_handle ) +hypre_ParCSRCommHandle* +hypre_ParCSRCommPkgGetPersistentCommHandle( HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location ) { - if (comm_handle) - { - hypre_TFree(hypre_ParCSRCommHandleSendDataBuffer(comm_handle), HYPRE_MEMORY_HOST); - hypre_TFree(hypre_ParCSRCommHandleRecvDataBuffer(comm_handle), HYPRE_MEMORY_HOST); - hypre_TFree(comm_handle->requests, HYPRE_MEMORY_HOST); + CommPkgJobType type = hypre_ParCSRCommHandleGetJobType(job); - hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); + if (!hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg, type)) + { + /* data is owned by persistent comm handle */ + hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg, type) = + hypre_ParCSRPersistentCommHandleCreate(job, comm_pkg, send_memory_location, recv_memory_location); } + + return hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg, type); } + /*------------------------------------------------------------------ * hypre_ParCSRPersistentCommHandleStart *------------------------------------------------------------------*/ -void -hypre_ParCSRPersistentCommHandleStart( hypre_ParCSRPersistentCommHandle *comm_handle, - HYPRE_MemoryLocation send_memory_location, - void *send_data ) +HYPRE_Int +hypre_ParCSRPersistentCommHandleStart( hypre_ParCSRCommHandle *comm_handle ) { - hypre_ParCSRCommHandleSendData(comm_handle) = send_data; - hypre_ParCSRCommHandleSendMemoryLocation(comm_handle) = send_memory_location; + MPI_Comm comm = hypre_ParCSRCommHandleComm(comm_handle); + hypre_MPI_Request *pre_send_request = hypre_MPICommGetPreSendRequest(comm); + HYPRE_Int flag; - if (hypre_ParCSRCommHandleNumRequests(comm_handle) > 0) + if (pre_send_request) { - hypre_TMemcpy( hypre_ParCSRCommHandleSendDataBuffer(comm_handle), - send_data, - char, - hypre_ParCSRCommHandleNumSendBytes(comm_handle), - HYPRE_MEMORY_HOST, - send_memory_location ); - - HYPRE_Int ret = hypre_MPI_Startall(hypre_ParCSRCommHandleNumRequests(comm_handle), - hypre_ParCSRCommHandleRequests(comm_handle)); - if (hypre_MPI_SUCCESS != ret) - { - hypre_error_w_msg(HYPRE_ERROR_GENERIC, "MPI error\n"); - /*hypre_printf("MPI error %d in %s (%s, line %u)\n", ret, __FUNCTION__, __FILE__, __LINE__);*/ - } + hypre_MPI_Request_get_status(*pre_send_request, &flag, hypre_MPI_STATUS_IGNORE); } -} - -/*------------------------------------------------------------------ - * hypre_ParCSRPersistentCommHandleWait - *------------------------------------------------------------------*/ - -void -hypre_ParCSRPersistentCommHandleWait( hypre_ParCSRPersistentCommHandle *comm_handle, - HYPRE_MemoryLocation recv_memory_location, - void *recv_data ) -{ - hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data; - hypre_ParCSRCommHandleRecvMemoryLocation(comm_handle) = recv_memory_location; if (hypre_ParCSRCommHandleNumRequests(comm_handle) > 0) { - HYPRE_Int ret = hypre_MPI_Waitall(hypre_ParCSRCommHandleNumRequests(comm_handle), - hypre_ParCSRCommHandleRequests(comm_handle), - hypre_MPI_STATUSES_IGNORE); + HYPRE_Int ret = hypre_MPI_Startall(hypre_ParCSRCommHandleNumRequests(comm_handle), + hypre_ParCSRCommHandleRequests(comm_handle)); if (hypre_MPI_SUCCESS != ret) { hypre_error_w_msg(HYPRE_ERROR_GENERIC, "MPI error\n"); - /*hypre_printf("MPI error %d in %s (%s, line %u)\n", ret, __FUNCTION__, __FILE__, __LINE__);*/ } - - hypre_TMemcpy(recv_data, - hypre_ParCSRCommHandleRecvDataBuffer(comm_handle), - char, - hypre_ParCSRCommHandleNumRecvBytes(comm_handle), - recv_memory_location, - HYPRE_MEMORY_HOST); } + + return hypre_error_flag; } + #endif // HYPRE_USING_PERSISTENT_COMM /*------------------------------------------------------------------ @@ -358,282 +211,170 @@ hypre_ParCSRCommHandle* hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg, HYPRE_MemoryLocation send_memory_location, - void *send_data_in, + void *send_data, HYPRE_MemoryLocation recv_memory_location, - void *recv_data_in ) + void *recv_data ) { - hypre_GpuProfilingPushRange("hypre_ParCSRCommHandleCreate_v2"); - - HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); - MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg); - HYPRE_Int num_send_bytes = 0; - HYPRE_Int num_recv_bytes = 0; - hypre_ParCSRCommHandle *comm_handle; - HYPRE_Int num_requests; - hypre_MPI_Request *requests; - HYPRE_Int i, j; - HYPRE_Int my_id, num_procs; - HYPRE_Int ip, vec_start, vec_len; - void *send_data; - void *recv_data; - - /*-------------------------------------------------------------------- - * hypre_Initialize sets up a communication handle, - * posts receives and initiates sends. It always requires num_sends, - * num_recvs, recv_procs and send_procs to be set in comm_pkg. - * There are different options for job: - * job = 1 : is used to initialize communication exchange for the parts - * of vector needed to perform a Matvec, it requires send_data - * and recv_data to be doubles, recv_vec_starts and - * send_map_starts need to be set in comm_pkg. - * job = 2 : is used to initialize communication exchange for the parts - * of vector needed to perform a MatvecT, it requires send_data - * and recv_data to be doubles, recv_vec_starts and - * send_map_starts need to be set in comm_pkg. - * job = 11: similar to job = 1, but exchanges data of type HYPRE_Int (not HYPRE_Complex), - * requires send_data and recv_data to be ints - * recv_vec_starts and send_map_starts need to be set in comm_pkg. - * job = 12: similar to job = 2, but exchanges data of type HYPRE_Int (not HYPRE_Complex), - * requires send_data and recv_data to be ints - * recv_vec_starts and send_map_starts need to be set in comm_pkg. - * job = 21: similar to job = 1, but exchanges data of type HYPRE_BigInt (not HYPRE_Complex), - * requires send_data and recv_data to be ints - * recv_vec_starts and send_map_starts need to be set in comm_pkg. - * job = 22: similar to job = 2, but exchanges data of type HYPRE_BigInt (not HYPRE_Complex), - * requires send_data and recv_data to be ints - * recv_vec_starts and send_map_starts need to be set in comm_pkg. - * default: ignores send_data and recv_data, requires send_mpi_types - * and recv_mpi_types to be set in comm_pkg. - * datatypes need to point to absolute - * addresses, e.g. generated using hypre_MPI_Address . - *--------------------------------------------------------------------*/ -#if !defined(HYPRE_USING_GPU_AWARE_MPI) - switch (job) - { - case 1: - num_send_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_Complex); - num_recv_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_Complex); - break; - case 2: - num_send_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_Complex); - num_recv_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_Complex); - break; - case 11: - num_send_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_Int); - num_recv_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_Int); - break; - case 12: - num_send_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_Int); - num_recv_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_Int); - break; - case 21: - num_send_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_BigInt); - num_recv_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_BigInt); - break; - case 22: - num_send_bytes = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, num_recvs) * sizeof(HYPRE_BigInt); - num_recv_bytes = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends) * sizeof(HYPRE_BigInt); - break; - } + return hypre_ParCSRCommHandleCreate_core(0, job, comm_pkg, send_memory_location, send_data, recv_memory_location, recv_data); +} - hypre_MemoryLocation act_send_memory_location = hypre_GetActualMemLocation(send_memory_location); +/*------------------------------------------------------------------ + * hypre_ParCSRCommHandleCreate_core + *------------------------------------------------------------------*/ - if ( act_send_memory_location == hypre_MEMORY_DEVICE || - act_send_memory_location == hypre_MEMORY_UNIFIED ) +hypre_ParCSRCommHandle* +hypre_ParCSRCommHandleCreate_core ( HYPRE_Int persistent, + HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + void *send_data, + HYPRE_MemoryLocation recv_memory_location, + void *recv_data ) +{ + hypre_GpuProfilingPushRange("hypre_ParCSRCommHandleCreate_core"); + + HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); + HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); + MPI_Comm comm_orig = hypre_ParCSRCommPkgComm(comm_pkg); + hypre_MPICommWrapper *comm = hypre_MPICommWrapperCreate(comm_orig); + HYPRE_Int num_requests = num_sends + num_recvs; + hypre_MPI_Request *requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); + hypre_MPI_Datatype mpi_dtype = hypre_ParCSRCommHandleGetMPIDataType(job); + hypre_ParCSRCommHandle *comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); + HYPRE_Int num_send_elems = 0; + HYPRE_Int num_recv_elems = 0; + HYPRE_Int data_size; + HYPRE_Int mpi_num_recvs = 0; + HYPRE_Int mpi_num_sends = 0; + HYPRE_Int *mpi_send_displs = NULL; + HYPRE_Int *mpi_recv_displs = NULL; + HYPRE_Int *mpi_send_procs = NULL; + HYPRE_Int *mpi_recv_procs = NULL; + + hypre_MPI_Type_size(mpi_dtype, &data_size); + + hypre_ParCSRCommHandlePersistent(comm_handle) = persistent; + hypre_ParCSRCommHandleComm(comm_handle) = comm; + + if (hypre_ParCSRCommHandleIsTransposeJob(job)) { - //send_data = _hypre_TAlloc(char, num_send_bytes, hypre_MEMORY_HOST_PINNED); - send_data = hypre_TAlloc(char, num_send_bytes, HYPRE_MEMORY_HOST); - hypre_GpuProfilingPushRange("MPI-D2H"); - hypre_TMemcpy(send_data, send_data_in, char, num_send_bytes, HYPRE_MEMORY_HOST, - HYPRE_MEMORY_DEVICE); - hypre_GpuProfilingPopRange(); + mpi_num_recvs = num_sends; + mpi_num_sends = num_recvs; + mpi_recv_displs = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); + mpi_send_displs = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); + mpi_recv_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); + mpi_send_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); } else { - send_data = send_data_in; + mpi_num_recvs = num_recvs; + mpi_num_sends = num_sends; + mpi_recv_displs = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); + mpi_send_displs = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); + mpi_recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); + mpi_send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); } + num_recv_elems = mpi_recv_displs[mpi_num_recvs]; + num_send_elems = mpi_send_displs[mpi_num_sends]; - hypre_MemoryLocation act_recv_memory_location = hypre_GetActualMemLocation(recv_memory_location); + hypre_ParCSRCommHandleAllocateBuffers(send_memory_location, recv_memory_location, + num_send_elems, num_recv_elems, data_size, comm_handle); - if ( act_recv_memory_location == hypre_MEMORY_DEVICE || - act_recv_memory_location == hypre_MEMORY_UNIFIED ) + if (persistent) { - //recv_data = hypre_TAlloc(char, num_recv_bytes, hypre_MEMORY_HOST_PINNED); - recv_data = hypre_TAlloc(char, num_recv_bytes, HYPRE_MEMORY_HOST); + hypre_MPI_Recv_init_Multiple(hypre_ParCSRCommHandleRecvData(comm_handle), + mpi_num_recvs, + mpi_recv_displs, + NULL, mpi_dtype, + mpi_recv_procs, + 0, comm, requests); + + hypre_MPI_Send_init_Multiple(hypre_ParCSRCommHandleSendData(comm_handle), + mpi_num_sends, + mpi_send_displs, + NULL, mpi_dtype, + mpi_send_procs, + 0, comm, requests + mpi_num_recvs); } else { - recv_data = recv_data_in; + hypre_MPI_Irecv_Multiple(recv_data, + mpi_num_recvs, + mpi_recv_displs, + NULL, mpi_dtype, + mpi_recv_procs, + 0, comm, requests); + + hypre_MPI_Isend_Multiple(send_data, + mpi_num_sends, + mpi_send_displs, + NULL, mpi_dtype, + mpi_send_procs, + 0, comm, requests + mpi_num_recvs); } -#else /* #ifndef HYPRE_USING_GPU_AWARE_MPI */ - send_data = send_data_in; - recv_data = recv_data_in; -#endif - num_requests = num_sends + num_recvs; - requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); + /*-------------------------------------------------------------------- + * set up comm_handle and return + *--------------------------------------------------------------------*/ - hypre_MPI_Comm_size(comm, &num_procs); - hypre_MPI_Comm_rank(comm, &my_id); + hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; + if (!persistent) + { + hypre_ParCSRCommHandleSendData(comm_handle) = send_data; + hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data; + } + hypre_ParCSRCommHandleSendLocation(comm_handle) = send_memory_location; + hypre_ParCSRCommHandleRecvLocation(comm_handle) = recv_memory_location; + hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; + hypre_ParCSRCommHandleRequests(comm_handle) = requests; - j = 0; - switch (job) + hypre_GpuProfilingPopRange(); + + return ( comm_handle ); +} + +/*------------------------------------------------------------------ + * hypre_ParCSRCommHandleWait + *------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRCommHandleWait( hypre_ParCSRCommHandle *comm_handle ) +{ + if ( comm_handle == NULL ) { - case 1: - { - HYPRE_Complex *d_send_data = (HYPRE_Complex *) send_data; - HYPRE_Complex *d_recv_data = (HYPRE_Complex *) recv_data; - for (i = 0; i < num_recvs; i++) - { - ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Irecv(&d_recv_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, &requests[j++]); - } - for (i = 0; i < num_sends; i++) - { - ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Isend(&d_send_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, &requests[j++]); - } - break; - } - case 2: - { - HYPRE_Complex *d_send_data = (HYPRE_Complex *) send_data; - HYPRE_Complex *d_recv_data = (HYPRE_Complex *) recv_data; - for (i = 0; i < num_sends; i++) - { - ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Irecv(&d_recv_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, &requests[j++]); - } - for (i = 0; i < num_recvs; i++) - { - ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Isend(&d_send_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, &requests[j++]); - } - break; - } - case 11: - { - HYPRE_Int *i_send_data = (HYPRE_Int *) send_data; - HYPRE_Int *i_recv_data = (HYPRE_Int *) recv_data; - for (i = 0; i < num_recvs; i++) - { - ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_INT, - ip, 0, comm, &requests[j++]); - } - for (i = 0; i < num_sends; i++) - { - ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_INT, - ip, 0, comm, &requests[j++]); - } - break; - } - case 12: + return hypre_error_flag; + } + + if (hypre_ParCSRCommHandleNumRequests(comm_handle)) + { + HYPRE_Int ret = hypre_MPI_Waitall(hypre_ParCSRCommHandleNumRequests(comm_handle), + hypre_ParCSRCommHandleRequests(comm_handle), + hypre_MPI_STATUSES_IGNORE); + if (hypre_MPI_SUCCESS != ret) { - HYPRE_Int *i_send_data = (HYPRE_Int *) send_data; - HYPRE_Int *i_recv_data = (HYPRE_Int *) recv_data; - for (i = 0; i < num_sends; i++) - { - ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_INT, - ip, 0, comm, &requests[j++]); - } - for (i = 0; i < num_recvs; i++) - { - ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_INT, - ip, 0, comm, &requests[j++]); - } - break; + char errmsg[256]; + hypre_sprintf(errmsg, "MPI error %d in %s (%s, line %u)\n", ret, __func__, __FILE__, __LINE__); + hypre_error_w_msg(HYPRE_ERROR_GENERIC, errmsg); } - case 21: + } + + hypre_MPICommWrapper *comm = hypre_ParCSRCommHandleComm(comm_handle); + hypre_MPI_Request *post_recv_request = hypre_MPICommGetPostRecvRequest(comm); + + if (post_recv_request) + { + if (hypre_ParCSRCommHandlePersistent(comm_handle)) { - HYPRE_BigInt *i_send_data = (HYPRE_BigInt *) send_data; - HYPRE_BigInt *i_recv_data = (HYPRE_BigInt *) recv_data; - for (i = 0; i < num_recvs; i++) - { - ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, comm, &requests[j++]); - } - for (i = 0; i < num_sends; i++) - { - vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, comm, &requests[j++]); - } - break; + HYPRE_Int flag; + hypre_MPI_Request_get_status(*post_recv_request, &flag, hypre_MPI_STATUS_IGNORE); } - case 22: + else { - HYPRE_BigInt *i_send_data = (HYPRE_BigInt *) send_data; - HYPRE_BigInt *i_recv_data = (HYPRE_BigInt *) recv_data; - for (i = 0; i < num_sends; i++) - { - vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1) - vec_start; - ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); - hypre_MPI_Irecv(&i_recv_data[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, comm, &requests[j++]); - } - for (i = 0; i < num_recvs; i++) - { - ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); - vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); - vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Isend(&i_send_data[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, comm, &requests[j++]); - } - break; + hypre_MPI_Wait(post_recv_request, hypre_MPI_STATUS_IGNORE); } } - /*-------------------------------------------------------------------- - * set up comm_handle and return - *--------------------------------------------------------------------*/ - - comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1, HYPRE_MEMORY_HOST); - - hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; - hypre_ParCSRCommHandleSendMemoryLocation(comm_handle) = send_memory_location; - hypre_ParCSRCommHandleRecvMemoryLocation(comm_handle) = recv_memory_location; - hypre_ParCSRCommHandleNumSendBytes(comm_handle) = num_send_bytes; - hypre_ParCSRCommHandleNumRecvBytes(comm_handle) = num_recv_bytes; - hypre_ParCSRCommHandleSendData(comm_handle) = send_data_in; - hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data_in; - hypre_ParCSRCommHandleSendDataBuffer(comm_handle) = send_data; - hypre_ParCSRCommHandleRecvDataBuffer(comm_handle) = recv_data; - hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; - hypre_ParCSRCommHandleRequests(comm_handle) = requests; - hypre_GpuProfilingPopRange(); - - return ( comm_handle ); + return hypre_error_flag; } /*------------------------------------------------------------------ @@ -643,58 +384,63 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, HYPRE_Int hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) { - if ( comm_handle == NULL ) + if (!comm_handle) { return hypre_error_flag; } - hypre_GpuProfilingPushRange("hypre_ParCSRCommHandleDestroy"); + hypre_MPICommWrapper *comm = hypre_ParCSRCommHandleComm(comm_handle); + hypre_MPI_Request *post_recv_request = hypre_MPICommGetPostRecvRequest(comm); + hypre_MPI_Request *pre_send_request = hypre_MPICommGetPreSendRequest(comm); + HYPRE_Int persistent = hypre_ParCSRCommHandlePersistent(comm_handle); - if (hypre_ParCSRCommHandleNumRequests(comm_handle)) + if (!persistent) { - hypre_MPI_Status *status0; - status0 = hypre_CTAlloc(hypre_MPI_Status, - hypre_ParCSRCommHandleNumRequests(comm_handle), HYPRE_MEMORY_HOST); - hypre_GpuProfilingPushRange("hypre_MPI_Waitall"); - hypre_MPI_Waitall(hypre_ParCSRCommHandleNumRequests(comm_handle), - hypre_ParCSRCommHandleRequests(comm_handle), status0); - hypre_GpuProfilingPopRange(); - hypre_TFree(status0, HYPRE_MEMORY_HOST); + hypre_ParCSRCommHandleWait(comm_handle); } -#if !defined(HYPRE_USING_GPU_AWARE_MPI) - hypre_MemoryLocation act_send_memory_location = hypre_GetActualMemLocation( - hypre_ParCSRCommHandleSendMemoryLocation(comm_handle)); - if ( act_send_memory_location == hypre_MEMORY_DEVICE || - act_send_memory_location == hypre_MEMORY_UNIFIED ) + hypre_MemoryLocation send_buffer_location = hypre_MPICommGetSendBufferLocation(comm); + hypre_MemoryLocation recv_buffer_location = hypre_MPICommGetRecvBufferLocation(comm); + void *send_buffer = hypre_MPICommGetSendBuffer(comm); + void *recv_buffer = hypre_MPICommGetRecvBuffer(comm); + + _hypre_TFree(send_buffer, send_buffer_location); + _hypre_TFree(recv_buffer, recv_buffer_location); + + + if (persistent) { - //hypre_HostPinnedFree(hypre_ParCSRCommHandleSendDataBuffer(comm_handle)); - hypre_TFree(hypre_ParCSRCommHandleSendDataBuffer(comm_handle), HYPRE_MEMORY_HOST); + hypre_TFree(hypre_ParCSRCommHandleSendData(comm_handle), hypre_ParCSRCommHandleSendLocation(comm_handle)); + hypre_TFree(hypre_ParCSRCommHandleRecvData(comm_handle), hypre_ParCSRCommHandleRecvLocation(comm_handle)); } - hypre_MemoryLocation act_recv_memory_location = hypre_GetActualMemLocation( - hypre_ParCSRCommHandleRecvMemoryLocation(comm_handle)); - if ( act_recv_memory_location == hypre_MEMORY_DEVICE || - act_recv_memory_location == hypre_MEMORY_UNIFIED ) + hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); + + if (persistent) { - hypre_GpuProfilingPushRange("MPI-H2D"); - hypre_TMemcpy( hypre_ParCSRCommHandleRecvData(comm_handle), - hypre_ParCSRCommHandleRecvDataBuffer(comm_handle), - char, - hypre_ParCSRCommHandleNumRecvBytes(comm_handle), - HYPRE_MEMORY_DEVICE, - HYPRE_MEMORY_HOST ); - hypre_GpuProfilingPopRange(); - //hypre_HostPinnedFree(hypre_ParCSRCommHandleRecvDataBuffer(comm_handle)); - hypre_TFree(hypre_ParCSRCommHandleRecvDataBuffer(comm_handle), HYPRE_MEMORY_HOST); + if (post_recv_request) { hypre_MPI_Request_free(post_recv_request); } + if (pre_send_request) { hypre_MPI_Request_free(pre_send_request); } } -#endif - hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); + /* attributes should be deleted when the communicator is being freed * + * but since we delete comm right after, so we don't .... */ + #if 0 + hypre_MPICommDeleteSendLocation(comm); + hypre_MPICommDeleteRecvLocation(comm); + hypre_MPICommDeleteSendBufferLocation(comm); + hypre_MPICommDeleteRecvBufferLocation(comm); + hypre_MPICommDeleteSendBuffer(comm); + hypre_MPICommDeleteRecvBuffer(comm); + if (post_recv_request) { hypre_MPICommDeletePostRecvRequest(comm); } + if (pre_send_request) { hypre_MPICommDeletePreSendRequest(comm); } + #endif + + hypre_TFree(post_recv_request, HYPRE_MEMORY_HOST); + hypre_TFree(pre_send_request, HYPRE_MEMORY_HOST); + + hypre_TFree(comm, HYPRE_MEMORY_HOST); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); - hypre_GpuProfilingPopRange(); - return hypre_error_flag; } @@ -1208,9 +954,9 @@ hypre_MatvecCommPkgDestroy( hypre_ParCSRCommPkg *comm_pkg ) HYPRE_Int i; for (i = HYPRE_COMM_PKG_JOB_COMPLEX; i < NUM_OF_COMM_PKG_JOB_TYPE; ++i) { - if (comm_pkg->persistent_comm_handles[i]) + if (hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg, i)) { - hypre_ParCSRPersistentCommHandleDestroy(comm_pkg->persistent_comm_handles[i]); + hypre_ParCSRPersistentCommHandleDestroy(hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg, i)); } } #endif diff --git a/src/parcsr_mv/par_csr_communication.h b/src/parcsr_mv/par_csr_communication.h index 50fa4cb434..636a1fda9d 100644 --- a/src/parcsr_mv/par_csr_communication.h +++ b/src/parcsr_mv/par_csr_communication.h @@ -13,7 +13,6 @@ * Structure containing information for doing communications *--------------------------------------------------------------------------*/ -#ifdef HYPRE_USING_PERSISTENT_COMM typedef enum CommPkgJobType { HYPRE_COMM_PKG_JOB_COMPLEX = 0, @@ -22,9 +21,90 @@ typedef enum CommPkgJobType HYPRE_COMM_PKG_JOB_INT_TRANSPOSE, HYPRE_COMM_PKG_JOB_BIGINT, HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE, - NUM_OF_COMM_PKG_JOB_TYPE, + NUM_OF_COMM_PKG_JOB_TYPE } CommPkgJobType; -#endif + +static inline CommPkgJobType +hypre_ParCSRCommHandleGetJobType(HYPRE_Int job) +{ + CommPkgJobType job_type = HYPRE_COMM_PKG_JOB_COMPLEX; + switch (job) + { + case 1: + job_type = HYPRE_COMM_PKG_JOB_COMPLEX; + break; + case 2: + job_type = HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE; + break; + case 11: + job_type = HYPRE_COMM_PKG_JOB_INT; + break; + case 12: + job_type = HYPRE_COMM_PKG_JOB_INT_TRANSPOSE; + break; + case 21: + job_type = HYPRE_COMM_PKG_JOB_BIGINT; + break; + case 22: + job_type = HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE; + break; + } + + return job_type; +} + +static inline HYPRE_Int +hypre_ParCSRCommHandleIsTransposeJob(HYPRE_Int job) +{ + HYPRE_Int trans = 0; + + switch (hypre_ParCSRCommHandleGetJobType(job)) + { + case HYPRE_COMM_PKG_JOB_COMPLEX: + case HYPRE_COMM_PKG_JOB_INT: + case HYPRE_COMM_PKG_JOB_BIGINT: + { + trans = 0; + break; + } + case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: + case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: + case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: + { + trans = 1; + break; + } + default: + break; + } + return trans; +} + +static inline hypre_MPI_Datatype +hypre_ParCSRCommHandleGetMPIDataType(HYPRE_Int job) +{ + hypre_MPI_Datatype dtype = HYPRE_MPI_COMPLEX; + + switch (hypre_ParCSRCommHandleGetJobType(job)) + { + case HYPRE_COMM_PKG_JOB_COMPLEX: + case HYPRE_COMM_PKG_JOB_COMPLEX_TRANSPOSE: + dtype = HYPRE_MPI_COMPLEX; + break; + case HYPRE_COMM_PKG_JOB_INT: + case HYPRE_COMM_PKG_JOB_INT_TRANSPOSE: + dtype = HYPRE_MPI_INT; + break; + case HYPRE_COMM_PKG_JOB_BIGINT: + case HYPRE_COMM_PKG_JOB_BIGINT_TRANSPOSE: + dtype = HYPRE_MPI_BIG_INT; + break; + default: + break; + } + + return dtype; +} /*-------------------------------------------------------------------------- * hypre_ParCSRCommHandle, hypre_ParCSRPersistentCommHandle @@ -34,18 +114,31 @@ struct _hypre_ParCSRCommPkg; typedef struct { struct _hypre_ParCSRCommPkg *comm_pkg; - HYPRE_MemoryLocation send_memory_location; - HYPRE_MemoryLocation recv_memory_location; - HYPRE_Int num_send_bytes; - HYPRE_Int num_recv_bytes; - void *send_data; - void *recv_data; - void *send_data_buffer; - void *recv_data_buffer; - HYPRE_Int num_requests; - hypre_MPI_Request *requests; + HYPRE_Int persistent; + void *send_data; + void *recv_data; + HYPRE_MemoryLocation send_location; + HYPRE_MemoryLocation recv_location; + HYPRE_Int num_requests; + hypre_MPI_Request *requests; + hypre_MPICommWrapper *comm; } hypre_ParCSRCommHandle; +/*-------------------------------------------------------------------------- + * Accessor macros: hypre_ParCSRCommHandle + *--------------------------------------------------------------------------*/ + +#define hypre_ParCSRCommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) +#define hypre_ParCSRCommHandlePersistent(comm_handle) (comm_handle -> persistent) +#define hypre_ParCSRCommHandleSendData(comm_handle) (comm_handle -> send_data) +#define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) +#define hypre_ParCSRCommHandleSendLocation(comm_handle) (comm_handle -> send_location) +#define hypre_ParCSRCommHandleRecvLocation(comm_handle) (comm_handle -> recv_location) +#define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) +#define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) +#define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) +#define hypre_ParCSRCommHandleComm(comm_handle) (comm_handle -> comm) + typedef hypre_ParCSRCommHandle hypre_ParCSRPersistentCommHandle; typedef struct _hypre_ParCSRCommPkg @@ -63,7 +156,7 @@ typedef struct _hypre_ParCSRCommPkg /* remote communication information */ hypre_MPI_Datatype *send_mpi_types; hypre_MPI_Datatype *recv_mpi_types; -#ifdef HYPRE_USING_PERSISTENT_COMM +#if defined(HYPRE_USING_PERSISTENT_COMM) hypre_ParCSRPersistentCommHandle *persistent_comm_handles[NUM_OF_COMM_PKG_JOB_TYPE]; #endif #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) @@ -78,31 +171,33 @@ typedef struct _hypre_ParCSRCommPkg * Accessor macros: hypre_ParCSRCommPkg *--------------------------------------------------------------------------*/ -#define hypre_ParCSRCommPkgComm(comm_pkg) (comm_pkg -> comm) -#define hypre_ParCSRCommPkgNumComponents(comm_pkg) (comm_pkg -> num_components) -#define hypre_ParCSRCommPkgNumSends(comm_pkg) (comm_pkg -> num_sends) -#define hypre_ParCSRCommPkgSendProcs(comm_pkg) (comm_pkg -> send_procs) -#define hypre_ParCSRCommPkgSendProc(comm_pkg, i) (comm_pkg -> send_procs[i]) -#define hypre_ParCSRCommPkgSendMapStarts(comm_pkg) (comm_pkg -> send_map_starts) -#define hypre_ParCSRCommPkgSendMapStart(comm_pkg,i) (comm_pkg -> send_map_starts[i]) -#define hypre_ParCSRCommPkgSendMapElmts(comm_pkg) (comm_pkg -> send_map_elmts) -#define hypre_ParCSRCommPkgDeviceSendMapElmts(comm_pkg) (comm_pkg -> device_send_map_elmts) -#define hypre_ParCSRCommPkgSendMapElmt(comm_pkg,i) (comm_pkg -> send_map_elmts[i]) -#define hypre_ParCSRCommPkgDeviceSendMapElmt(comm_pkg,i) (comm_pkg -> device_send_map_elmts[i]) -#define hypre_ParCSRCommPkgNumRecvs(comm_pkg) (comm_pkg -> num_recvs) -#define hypre_ParCSRCommPkgRecvProcs(comm_pkg) (comm_pkg -> recv_procs) -#define hypre_ParCSRCommPkgRecvProc(comm_pkg, i) (comm_pkg -> recv_procs[i]) -#define hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) (comm_pkg -> recv_vec_starts) -#define hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i) (comm_pkg -> recv_vec_starts[i]) -#define hypre_ParCSRCommPkgSendMPITypes(comm_pkg) (comm_pkg -> send_mpi_types) -#define hypre_ParCSRCommPkgSendMPIType(comm_pkg,i) (comm_pkg -> send_mpi_types[i]) -#define hypre_ParCSRCommPkgRecvMPITypes(comm_pkg) (comm_pkg -> recv_mpi_types) -#define hypre_ParCSRCommPkgRecvMPIType(comm_pkg,i) (comm_pkg -> recv_mpi_types[i]) +#define hypre_ParCSRCommPkgComm(comm_pkg) (comm_pkg -> comm) +#define hypre_ParCSRCommPkgNumComponents(comm_pkg) (comm_pkg -> num_components) +#define hypre_ParCSRCommPkgNumSends(comm_pkg) (comm_pkg -> num_sends) +#define hypre_ParCSRCommPkgSendProcs(comm_pkg) (comm_pkg -> send_procs) +#define hypre_ParCSRCommPkgSendProc(comm_pkg, i) (comm_pkg -> send_procs[i]) +#define hypre_ParCSRCommPkgSendMapStarts(comm_pkg) (comm_pkg -> send_map_starts) +#define hypre_ParCSRCommPkgSendMapStart(comm_pkg,i) (comm_pkg -> send_map_starts[i]) +#define hypre_ParCSRCommPkgSendMapElmts(comm_pkg) (comm_pkg -> send_map_elmts) +#define hypre_ParCSRCommPkgDeviceSendMapElmts(comm_pkg) (comm_pkg -> device_send_map_elmts) +#define hypre_ParCSRCommPkgSendMapElmt(comm_pkg,i) (comm_pkg -> send_map_elmts[i]) +#define hypre_ParCSRCommPkgDeviceSendMapElmt(comm_pkg,i) (comm_pkg -> device_send_map_elmts[i]) +#define hypre_ParCSRCommPkgNumRecvs(comm_pkg) (comm_pkg -> num_recvs) +#define hypre_ParCSRCommPkgRecvProcs(comm_pkg) (comm_pkg -> recv_procs) +#define hypre_ParCSRCommPkgRecvProc(comm_pkg, i) (comm_pkg -> recv_procs[i]) +#define hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) (comm_pkg -> recv_vec_starts) +#define hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i) (comm_pkg -> recv_vec_starts[i]) +#define hypre_ParCSRCommPkgSendMPITypes(comm_pkg) (comm_pkg -> send_mpi_types) +#define hypre_ParCSRCommPkgSendMPIType(comm_pkg,i) (comm_pkg -> send_mpi_types[i]) +#define hypre_ParCSRCommPkgRecvMPITypes(comm_pkg) (comm_pkg -> recv_mpi_types) +#define hypre_ParCSRCommPkgRecvMPIType(comm_pkg,i) (comm_pkg -> recv_mpi_types[i]) +#define hypre_ParCSRCommPkgPersistentCommHandles(comm_pkg) (comm_pkg -> persistent_comm_handles) +#define hypre_ParCSRCommPkgPersistentCommHandle(comm_pkg,i) (comm_pkg -> persistent_comm_handles[i]) #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) -#define hypre_ParCSRCommPkgTmpData(comm_pkg) ((comm_pkg) -> tmp_data) -#define hypre_ParCSRCommPkgBufData(comm_pkg) ((comm_pkg) -> buf_data) -#define hypre_ParCSRCommPkgMatrixE(comm_pkg) ((comm_pkg) -> matrix_E) +#define hypre_ParCSRCommPkgTmpData(comm_pkg) ((comm_pkg) -> tmp_data) +#define hypre_ParCSRCommPkgBufData(comm_pkg) ((comm_pkg) -> buf_data) +#define hypre_ParCSRCommPkgMatrixE(comm_pkg) ((comm_pkg) -> matrix_E) #endif static inline HYPRE_MAYBE_UNUSED_FUNC void @@ -130,21 +225,4 @@ hypre_ParCSRCommPkgCopySendMapElmtsToDevice(hypre_ParCSRCommPkg *comm_pkg) #endif } -/*-------------------------------------------------------------------------- - * Accessor macros: hypre_ParCSRCommHandle - *--------------------------------------------------------------------------*/ - -#define hypre_ParCSRCommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) -#define hypre_ParCSRCommHandleSendMemoryLocation(comm_handle) (comm_handle -> send_memory_location) -#define hypre_ParCSRCommHandleRecvMemoryLocation(comm_handle) (comm_handle -> recv_memory_location) -#define hypre_ParCSRCommHandleNumSendBytes(comm_handle) (comm_handle -> num_send_bytes) -#define hypre_ParCSRCommHandleNumRecvBytes(comm_handle) (comm_handle -> num_recv_bytes) -#define hypre_ParCSRCommHandleSendData(comm_handle) (comm_handle -> send_data) -#define hypre_ParCSRCommHandleRecvData(comm_handle) (comm_handle -> recv_data) -#define hypre_ParCSRCommHandleSendDataBuffer(comm_handle) (comm_handle -> send_data_buffer) -#define hypre_ParCSRCommHandleRecvDataBuffer(comm_handle) (comm_handle -> recv_data_buffer) -#define hypre_ParCSRCommHandleNumRequests(comm_handle) (comm_handle -> num_requests) -#define hypre_ParCSRCommHandleRequests(comm_handle) (comm_handle -> requests) -#define hypre_ParCSRCommHandleRequest(comm_handle, i) (comm_handle -> requests[i]) - #endif /* HYPRE_PAR_CSR_COMMUNICATION_HEADER */ diff --git a/src/parcsr_mv/par_csr_matop.c b/src/parcsr_mv/par_csr_matop.c index c28d57a8c5..dc6e4d30f7 100644 --- a/src/parcsr_mv/par_csr_matop.c +++ b/src/parcsr_mv/par_csr_matop.c @@ -6285,7 +6285,6 @@ hypre_ParCSRMatrixDiagScaleHost( hypre_ParCSRMatrix *par_A, /* Local variables */ HYPRE_Int i; hypre_Vector *rdbuf; - HYPRE_Complex *recv_rdbuf_data; HYPRE_Complex *send_rdbuf_data; /*--------------------------------------------------------------------- @@ -6307,23 +6306,21 @@ hypre_ParCSRMatrixDiagScaleHost( hypre_ParCSRMatrix *par_A, #if defined(HYPRE_USING_PERSISTENT_COMM) hypre_ParCSRPersistentCommHandle *comm_handle = - hypre_ParCSRCommPkgGetPersistentCommHandle(1, comm_pkg); + hypre_ParCSRCommPkgGetPersistentCommHandle(1, comm_pkg, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); hypre_VectorData(rdbuf) = (HYPRE_Complex *) - hypre_ParCSRCommHandleRecvDataBuffer(comm_handle); + hypre_ParCSRCommHandleRecvData(comm_handle); hypre_SeqVectorSetDataOwner(rdbuf, 0); #else hypre_ParCSRCommHandle *comm_handle; -#endif - - /* Initialize rdbuf */ hypre_SeqVectorInitialize_v2(rdbuf, HYPRE_MEMORY_HOST); - recv_rdbuf_data = hypre_VectorData(rdbuf); + HYPRE_Complex *recv_rdbuf_data = hypre_VectorData(rdbuf); +#endif /* Allocate send buffer for rdbuf */ #if defined(HYPRE_USING_PERSISTENT_COMM) - send_rdbuf_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendDataBuffer(comm_handle); + send_rdbuf_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendData(comm_handle); #else send_rdbuf_data = hypre_TAlloc(HYPRE_Complex, send_map_starts[num_sends], HYPRE_MEMORY_HOST); #endif @@ -6339,7 +6336,7 @@ hypre_ParCSRMatrixDiagScaleHost( hypre_ParCSRMatrix *par_A, /* Non-blocking communication starts */ #ifdef HYPRE_USING_PERSISTENT_COMM - hypre_ParCSRPersistentCommHandleStart(comm_handle, HYPRE_MEMORY_HOST, send_rdbuf_data); + hypre_ParCSRPersistentCommHandleStart(comm_handle); #else comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, @@ -6356,7 +6353,7 @@ hypre_ParCSRMatrixDiagScaleHost( hypre_ParCSRMatrix *par_A, /* Non-blocking communication ends */ #ifdef HYPRE_USING_PERSISTENT_COMM - hypre_ParCSRPersistentCommHandleWait(comm_handle, HYPRE_MEMORY_HOST, recv_rdbuf_data); + hypre_ParCSRPersistentCommHandleWait(comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif @@ -6710,9 +6707,10 @@ hypre_ParCSRMatrixBlockColSumHost( hypre_ParCSRMatrix *A, /* Allocate the recv and send buffers */ #if defined(HYPRE_USING_PERSISTENT_COMM) - comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_COMM_PKG_JOB_COMPLEX, comm_pkg); - recv_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvDataBuffer(comm_handle); - send_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendDataBuffer(comm_handle); + comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_COMM_PKG_JOB_COMPLEX, comm_pkg, + memory_location, memory_location); + recv_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvData(comm_handle); + send_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendData(comm_handle); send_data = hypre_Memset((void *) send_data, 0, (size_t) (num_cols_offd_A) * sizeof(HYPRE_Complex), memory_location); @@ -6733,7 +6731,7 @@ hypre_ParCSRMatrixBlockColSumHost( hypre_ParCSRMatrix *A, /* Non-blocking communication starts */ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleStart(comm_handle, memory_location, send_data); + hypre_ParCSRPersistentCommHandleStart(comm_handle); #else comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, @@ -6757,7 +6755,7 @@ hypre_ParCSRMatrixBlockColSumHost( hypre_ParCSRMatrix *A, /* Non-blocking communication ends */ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleWait(comm_handle, memory_location, recv_data); + hypre_ParCSRPersistentCommHandleWait(comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif @@ -6919,9 +6917,10 @@ hypre_ParCSRMatrixColSumHost( hypre_ParCSRMatrix *A, /* Allocate the recv and send buffers */ #if defined(HYPRE_USING_PERSISTENT_COMM) - comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_COMM_PKG_JOB_COMPLEX, comm_pkg); - recv_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvDataBuffer(comm_handle); - send_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendDataBuffer(comm_handle); + comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_COMM_PKG_JOB_COMPLEX, comm_pkg, + memory_location, memory_location); + recv_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvData(comm_handle); + send_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendData(comm_handle); send_data = hypre_Memset((void *) send_data, 0, (size_t) (num_cols_offd_A) * sizeof(HYPRE_Complex), memory_location); @@ -6942,7 +6941,7 @@ hypre_ParCSRMatrixColSumHost( hypre_ParCSRMatrix *A, /* Non-blocking communication starts */ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleStart(comm_handle, memory_location, send_data); + hypre_ParCSRPersistentCommHandleStart(comm_handle); #else comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, @@ -6962,7 +6961,7 @@ hypre_ParCSRMatrixColSumHost( hypre_ParCSRMatrix *A, /* Non-blocking communication ends */ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleWait(comm_handle, memory_location, recv_data); + hypre_ParCSRPersistentCommHandleWait(comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif diff --git a/src/parcsr_mv/par_csr_matvec.c b/src/parcsr_mv/par_csr_matvec.c index d9ff56f7c2..ff8905de16 100644 --- a/src/parcsr_mv/par_csr_matvec.c +++ b/src/parcsr_mv/par_csr_matvec.c @@ -49,7 +49,6 @@ hypre_ParCSRMatrixMatvecOutOfPlaceHost( HYPRE_Complex alpha, HYPRE_Int idxstride = hypre_VectorIndexStride(x_local); HYPRE_Int num_vectors = hypre_VectorNumVectors(x_local); HYPRE_Complex *x_local_data = hypre_VectorData(x_local); - HYPRE_Complex *x_tmp_data; HYPRE_Complex *x_buf_data; HYPRE_ANNOTATE_FUNC_BEGIN; @@ -125,7 +124,7 @@ hypre_ParCSRMatrixMatvecOutOfPlaceHost( HYPRE_Complex alpha, #if defined(HYPRE_USING_PERSISTENT_COMM) hypre_ParCSRPersistentCommHandle *persistent_comm_handle = - hypre_ParCSRCommPkgGetPersistentCommHandle(1, comm_pkg); + hypre_ParCSRCommPkgGetPersistentCommHandle(1, comm_pkg, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); #else hypre_ParCSRCommHandle *comm_handle; #endif @@ -136,21 +135,18 @@ hypre_ParCSRMatrixMatvecOutOfPlaceHost( HYPRE_Complex alpha, *--------------------------------------------------------------------*/ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_VectorData(x_tmp) = (HYPRE_Complex *) - hypre_ParCSRCommHandleRecvDataBuffer(persistent_comm_handle); + hypre_VectorData(x_tmp) = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvData(persistent_comm_handle); hypre_SeqVectorSetDataOwner(x_tmp, 0); #endif hypre_SeqVectorInitialize_v2(x_tmp, HYPRE_MEMORY_HOST); - x_tmp_data = hypre_VectorData(x_tmp); /*--------------------------------------------------------------------- * Allocate data send buffer *--------------------------------------------------------------------*/ #if defined(HYPRE_USING_PERSISTENT_COMM) - x_buf_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendDataBuffer(persistent_comm_handle); - + x_buf_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendData(persistent_comm_handle); #else x_buf_data = hypre_TAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), @@ -181,12 +177,11 @@ hypre_ParCSRMatrixMatvecOutOfPlaceHost( HYPRE_Complex alpha, /* Non-blocking communication starts */ #ifdef HYPRE_USING_PERSISTENT_COMM - hypre_ParCSRPersistentCommHandleStart(persistent_comm_handle, - HYPRE_MEMORY_HOST, x_buf_data); + hypre_ParCSRPersistentCommHandleStart(persistent_comm_handle); #else comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_HOST, x_buf_data, - HYPRE_MEMORY_HOST, x_tmp_data); + HYPRE_MEMORY_HOST, hypre_VectorData(x_tmp)); #endif #ifdef HYPRE_PROFILE @@ -202,7 +197,7 @@ hypre_ParCSRMatrixMatvecOutOfPlaceHost( HYPRE_Complex alpha, /* Non-blocking communication ends */ #ifdef HYPRE_USING_PERSISTENT_COMM - hypre_ParCSRPersistentCommHandleWait(persistent_comm_handle, HYPRE_MEMORY_HOST, x_tmp_data); + hypre_ParCSRPersistentCommHandleWait(persistent_comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif @@ -308,7 +303,6 @@ hypre_ParCSRMatrixMatvecTHost( HYPRE_Complex alpha, HYPRE_BigInt x_size = hypre_ParVectorGlobalSize(x); HYPRE_BigInt y_size = hypre_ParVectorGlobalSize(y); - HYPRE_Complex *y_tmp_data; HYPRE_Complex *y_buf_data; HYPRE_Complex *y_local_data = hypre_VectorData(y_local); HYPRE_Int idxstride = hypre_VectorIndexStride(y_local); @@ -388,7 +382,7 @@ hypre_ParCSRMatrixMatvecTHost( HYPRE_Complex alpha, #if defined(HYPRE_USING_PERSISTENT_COMM) hypre_ParCSRPersistentCommHandle *persistent_comm_handle = - hypre_ParCSRCommPkgGetPersistentCommHandle(2, comm_pkg); + hypre_ParCSRCommPkgGetPersistentCommHandle(2, comm_pkg, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); #else hypre_ParCSRCommHandle *comm_handle; #endif @@ -399,20 +393,18 @@ hypre_ParCSRMatrixMatvecTHost( HYPRE_Complex alpha, *--------------------------------------------------------------------*/ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_VectorData(y_tmp) = (HYPRE_Complex *) - hypre_ParCSRCommHandleSendDataBuffer(persistent_comm_handle); + hypre_VectorData(y_tmp) = (HYPRE_Complex *) hypre_ParCSRCommHandleSendData(persistent_comm_handle); hypre_SeqVectorSetDataOwner(y_tmp, 0); #endif hypre_SeqVectorInitialize_v2(y_tmp, HYPRE_MEMORY_HOST); - y_tmp_data = hypre_VectorData(y_tmp); /*--------------------------------------------------------------------- * Allocate receive data buffer *--------------------------------------------------------------------*/ #if defined(HYPRE_USING_PERSISTENT_COMM) - y_buf_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvDataBuffer(persistent_comm_handle); + y_buf_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvData(persistent_comm_handle); #else y_buf_data = hypre_TAlloc(HYPRE_Complex, @@ -444,11 +436,10 @@ hypre_ParCSRMatrixMatvecTHost( HYPRE_Complex alpha, /* Non-blocking communication starts */ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleStart(persistent_comm_handle, HYPRE_MEMORY_HOST, y_tmp_data); - + hypre_ParCSRPersistentCommHandleStart(persistent_comm_handle); #else comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, - HYPRE_MEMORY_HOST, y_tmp_data, + HYPRE_MEMORY_HOST, hypre_VectorData(y_tmp), HYPRE_MEMORY_HOST, y_buf_data ); #endif @@ -473,8 +464,7 @@ hypre_ParCSRMatrixMatvecTHost( HYPRE_Complex alpha, /* Non-blocking communication ends */ #if defined(HYPRE_USING_PERSISTENT_COMM) - hypre_ParCSRPersistentCommHandleWait(persistent_comm_handle, - HYPRE_MEMORY_HOST, y_buf_data); + hypre_ParCSRPersistentCommHandleWait(persistent_comm_handle); #else hypre_ParCSRCommHandleDestroy(comm_handle); #endif diff --git a/src/parcsr_mv/par_csr_matvec_device.c b/src/parcsr_mv/par_csr_matvec_device.c index e25c7bf78d..507e889ad1 100644 --- a/src/parcsr_mv/par_csr_matvec_device.c +++ b/src/parcsr_mv/par_csr_matvec_device.c @@ -143,31 +143,37 @@ hypre_ParCSRMatrixMatvecOutOfPlaceDevice( HYPRE_Complex alpha, hypre_profile_times[HYPRE_TIMER_ID_PACK_UNPACK] -= hypre_MPI_Wtime(); #endif +#if defined(HYPRE_USING_PERSISTENT_COMM) + comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(1, comm_pkg, + HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); + x_tmp_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvData(comm_handle); + x_buf_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendData(comm_handle); +#else /*--------------------------------------------------------------------- * Allocate or reuse receive data buffer for x_tmp *--------------------------------------------------------------------*/ - if (!hypre_ParCSRCommPkgTmpData(comm_pkg)) { hypre_ParCSRCommPkgTmpData(comm_pkg) = hypre_TAlloc(HYPRE_Complex, num_cols_offd * num_vectors, HYPRE_MEMORY_DEVICE); } - hypre_VectorData(x_tmp) = x_tmp_data = hypre_ParCSRCommPkgTmpData(comm_pkg); - hypre_SeqVectorSetDataOwner(x_tmp, 0); - hypre_SeqVectorInitialize_v2(x_tmp, HYPRE_MEMORY_DEVICE); - /*--------------------------------------------------------------------- * Allocate or reuse send data buffer *--------------------------------------------------------------------*/ - if (!hypre_ParCSRCommPkgBufData(comm_pkg)) { hypre_ParCSRCommPkgBufData(comm_pkg) = hypre_TAlloc(HYPRE_Complex, send_map_num_elmts, HYPRE_MEMORY_DEVICE); } + x_tmp_data = hypre_ParCSRCommPkgTmpData(comm_pkg); x_buf_data = hypre_ParCSRCommPkgBufData(comm_pkg); +#endif + + hypre_VectorData(x_tmp) = x_tmp_data; + hypre_SeqVectorSetDataOwner(x_tmp, 0); + hypre_SeqVectorInitialize_v2(x_tmp, HYPRE_MEMORY_DEVICE); /* The assert is because this code has been tested for column-wise vector storage only. */ hypre_assert(idxstride == 1); @@ -223,12 +229,16 @@ hypre_ParCSRMatrixMatvecOutOfPlaceDevice( HYPRE_Complex alpha, #endif /* Non-blocking communication starts */ +#if !defined(HYPRE_USING_PERSISTENT_COMM) comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, x_buf_data, HYPRE_MEMORY_DEVICE, x_tmp_data); - /* Non-blocking communication ends */ hypre_ParCSRCommHandleDestroy(comm_handle); +#else + hypre_ParCSRPersistentCommHandleStart(comm_handle); + hypre_ParCSRPersistentCommHandleWait(comm_handle); +#endif #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_HALO_EXCHANGE] += hypre_MPI_Wtime(); @@ -389,31 +399,37 @@ hypre_ParCSRMatrixMatvecTDevice( HYPRE_Complex alpha, hypre_profile_times[HYPRE_TIMER_ID_PACK_UNPACK] -= hypre_MPI_Wtime(); #endif +#if defined(HYPRE_USING_PERSISTENT_COMM) + comm_handle = hypre_ParCSRCommPkgGetPersistentCommHandle(2, comm_pkg, + HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); + y_tmp_data = (HYPRE_Complex *) hypre_ParCSRCommHandleSendData(comm_handle); + y_buf_data = (HYPRE_Complex *) hypre_ParCSRCommHandleRecvData(comm_handle); +#else /*--------------------------------------------------------------------- * Allocate or reuse send data buffer for y_tmp *--------------------------------------------------------------------*/ - if (!hypre_ParCSRCommPkgTmpData(comm_pkg)) { hypre_ParCSRCommPkgTmpData(comm_pkg) = hypre_TAlloc(HYPRE_Complex, num_cols_offd * num_vectors, HYPRE_MEMORY_DEVICE); } - hypre_VectorData(y_tmp) = y_tmp_data = hypre_ParCSRCommPkgTmpData(comm_pkg); - hypre_SeqVectorSetDataOwner(y_tmp, 0); - hypre_SeqVectorInitialize_v2(y_tmp, HYPRE_MEMORY_DEVICE); - /*--------------------------------------------------------------------- * Allocate receive data buffer *--------------------------------------------------------------------*/ - if (!hypre_ParCSRCommPkgBufData(comm_pkg)) { hypre_ParCSRCommPkgBufData(comm_pkg) = hypre_TAlloc(HYPRE_Complex, send_map_num_elmts, HYPRE_MEMORY_DEVICE); } + y_tmp_data = hypre_ParCSRCommPkgTmpData(comm_pkg); y_buf_data = hypre_ParCSRCommPkgBufData(comm_pkg); +#endif + + hypre_VectorData(y_tmp) = y_tmp_data; + hypre_SeqVectorSetDataOwner(y_tmp, 0); + hypre_SeqVectorInitialize_v2(y_tmp, HYPRE_MEMORY_DEVICE); #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_PACK_UNPACK] += hypre_MPI_Wtime(); @@ -455,12 +471,16 @@ hypre_ParCSRMatrixMatvecTDevice( HYPRE_Complex alpha, #endif /* Non-blocking communication starts */ +#if !defined(HYPRE_USING_PERSISTENT_COMM) comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, HYPRE_MEMORY_DEVICE, y_tmp_data, HYPRE_MEMORY_DEVICE, y_buf_data ); - /* Non-blocking communication ends */ hypre_ParCSRCommHandleDestroy(comm_handle); +#else + hypre_ParCSRPersistentCommHandleStart(comm_handle); + hypre_ParCSRPersistentCommHandleWait(comm_handle); +#endif #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_HALO_EXCHANGE] += hypre_MPI_Wtime(); diff --git a/src/parcsr_mv/protos.h b/src/parcsr_mv/protos.h index 127497564f..442ca9c524 100644 --- a/src/parcsr_mv/protos.h +++ b/src/parcsr_mv/protos.h @@ -237,12 +237,20 @@ HYPRE_Int hypre_BooleanGenerateDiagAndOffd ( hypre_CSRBooleanMatrix *A, /* par_csr_communication.c */ hypre_ParCSRCommHandle *hypre_ParCSRCommHandleCreate ( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg, void *send_data, void *recv_data ); +hypre_ParCSRCommHandle *hypre_ParCSRCommHandleCreate_core ( HYPRE_Int persistent, + HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + void *send_data_in, + HYPRE_MemoryLocation recv_memory_location, + void *recv_data_in ); hypre_ParCSRCommHandle *hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job, hypre_ParCSRCommPkg *comm_pkg, HYPRE_MemoryLocation send_memory_location, void *send_data_in, HYPRE_MemoryLocation recv_memory_location, void *recv_data_in ); +HYPRE_Int hypre_ParCSRCommHandleWait(hypre_ParCSRCommHandle *comm_handle); HYPRE_Int hypre_ParCSRCommHandleDestroy ( hypre_ParCSRCommHandle *comm_handle ); void hypre_ParCSRCommPkgCreate_core ( MPI_Comm comm, HYPRE_BigInt *col_map_offd, HYPRE_BigInt first_col_diag, HYPRE_BigInt *col_starts, HYPRE_Int num_cols_diag, @@ -373,15 +381,18 @@ HYPRE_Int hypre_ParCSRMatrixDropSmallEntriesDevice( hypre_ParCSRMatrix *A, HYPRE HYPRE_Int hypre_ParCSRCommPkgCreateMatrixE( hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int local_ncols ); #ifdef HYPRE_USING_PERSISTENT_COMM -hypre_ParCSRPersistentCommHandle* hypre_ParCSRPersistentCommHandleCreate(HYPRE_Int job, - hypre_ParCSRCommPkg *comm_pkg); -hypre_ParCSRPersistentCommHandle* hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_Int job, - hypre_ParCSRCommPkg *comm_pkg); -void hypre_ParCSRPersistentCommHandleDestroy(hypre_ParCSRPersistentCommHandle *comm_handle); -void hypre_ParCSRPersistentCommHandleStart(hypre_ParCSRPersistentCommHandle *comm_handle, - HYPRE_MemoryLocation send_memory_location, void *send_data); -void hypre_ParCSRPersistentCommHandleWait(hypre_ParCSRPersistentCommHandle *comm_handle, - HYPRE_MemoryLocation recv_memory_location, void *recv_data); +hypre_ParCSRCommHandle* hypre_ParCSRPersistentCommHandleCreate(HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location); +hypre_ParCSRCommHandle* hypre_ParCSRCommPkgGetPersistentCommHandle(HYPRE_Int job, + hypre_ParCSRCommPkg *comm_pkg, + HYPRE_MemoryLocation send_memory_location, + HYPRE_MemoryLocation recv_memory_location); +#define hypre_ParCSRPersistentCommHandleWait hypre_ParCSRCommHandleWait +#define hypre_ParCSRPersistentCommHandleDestroy hypre_ParCSRCommHandleDestroy + +HYPRE_Int hypre_ParCSRPersistentCommHandleStart(hypre_ParCSRCommHandle *comm_handle); #endif HYPRE_Int hypre_ParcsrGetExternalRowsInit( hypre_ParCSRMatrix *A, HYPRE_Int indices_len, diff --git a/src/struct_mv/_hypre_struct_mv.h b/src/struct_mv/_hypre_struct_mv.h index fdb019e167..bca2198a32 100644 --- a/src/struct_mv/_hypre_struct_mv.h +++ b/src/struct_mv/_hypre_struct_mv.h @@ -929,23 +929,20 @@ typedef struct hypre_CommPkg_struct typedef struct hypre_CommHandle_struct { - hypre_CommPkg *comm_pkg; - HYPRE_Complex *send_data; - HYPRE_Complex *recv_data; + hypre_CommPkg *comm_pkg; + HYPRE_Complex *send_data; + HYPRE_Complex *recv_data; - HYPRE_Int num_requests; - hypre_MPI_Request *requests; - hypre_MPI_Status *status; + HYPRE_Int num_requests; + hypre_MPI_Request *requests; + hypre_MPI_Status *status; - HYPRE_Complex **send_buffers; - HYPRE_Complex **recv_buffers; - - HYPRE_Complex **send_buffers_mpi; - HYPRE_Complex **recv_buffers_mpi; + HYPRE_Complex **send_buffers; + HYPRE_Complex **recv_buffers; /* set = 0, add = 1 */ - HYPRE_Int action; - + HYPRE_Int action; + hypre_MPICommWrapper *comm; } hypre_CommHandle; /*-------------------------------------------------------------------------- @@ -1043,17 +1040,16 @@ typedef struct hypre_CommHandle_struct * Accessor macros: hypre_CommHandle *--------------------------------------------------------------------------*/ -#define hypre_CommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) -#define hypre_CommHandleSendData(comm_handle) (comm_handle -> send_data) -#define hypre_CommHandleRecvData(comm_handle) (comm_handle -> recv_data) -#define hypre_CommHandleNumRequests(comm_handle) (comm_handle -> num_requests) -#define hypre_CommHandleRequests(comm_handle) (comm_handle -> requests) -#define hypre_CommHandleStatus(comm_handle) (comm_handle -> status) -#define hypre_CommHandleSendBuffers(comm_handle) (comm_handle -> send_buffers) -#define hypre_CommHandleRecvBuffers(comm_handle) (comm_handle -> recv_buffers) -#define hypre_CommHandleAction(comm_handle) (comm_handle -> action) -#define hypre_CommHandleSendBuffersMPI(comm_handle) (comm_handle -> send_buffers_mpi) -#define hypre_CommHandleRecvBuffersMPI(comm_handle) (comm_handle -> recv_buffers_mpi) +#define hypre_CommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) +#define hypre_CommHandleSendData(comm_handle) (comm_handle -> send_data) +#define hypre_CommHandleRecvData(comm_handle) (comm_handle -> recv_data) +#define hypre_CommHandleNumRequests(comm_handle) (comm_handle -> num_requests) +#define hypre_CommHandleRequests(comm_handle) (comm_handle -> requests) +#define hypre_CommHandleStatus(comm_handle) (comm_handle -> status) +#define hypre_CommHandleSendBuffers(comm_handle) (comm_handle -> send_buffers) +#define hypre_CommHandleRecvBuffers(comm_handle) (comm_handle -> recv_buffers) +#define hypre_CommHandleAction(comm_handle) (comm_handle -> action) +#define hypre_CommHandleComm(comm_handle) (comm_handle -> comm) #endif /****************************************************************************** diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index a6ea8a8d7c..787fae2998 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -764,43 +764,100 @@ hypre_CommTypeSetEntry( hypre_Box *box, return hypre_error_flag; } -HYPRE_Complex * -hypre_StructCommunicationGetBuffer(HYPRE_MemoryLocation memory_location, - HYPRE_Int size) +HYPRE_Int +hypre_CommHandleAllocateBuffers( HYPRE_MemoryLocation memory_location, + hypre_CommPkg *comm_pkg, + hypre_CommHandle *comm_handle ) { - HYPRE_Complex *ptr; + hypre_MemoryLocation send_memory_alocation = hypre_GetActualMemLocation(memory_location); + hypre_MemoryLocation recv_memory_alocation = hypre_GetActualMemLocation(memory_location); + hypre_MemoryLocation send_copy_location = hypre_MEMORY_UNDEFINED; + hypre_MemoryLocation recv_copy_location = hypre_MEMORY_UNDEFINED; + void *send_copy_buffer = NULL; + void *recv_copy_buffer = NULL; + HYPRE_Complex **send_buffers = NULL; + HYPRE_Complex **recv_buffers = NULL; + HYPRE_Int num_sends = hypre_CommPkgNumSends(comm_pkg); + HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); + HYPRE_Int num_send_elems = hypre_CommPkgSendBufsize(comm_pkg); + HYPRE_Int num_recv_elems = hypre_CommPkgRecvBufsize(comm_pkg); + HYPRE_Int size_of_elem = sizeof(HYPRE_Complex); + HYPRE_Int i; -#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) - if (hypre_GetActualMemLocation(memory_location) != hypre_MEMORY_HOST) + /* allocate send buffers */ + send_buffers = hypre_TAlloc(HYPRE_Complex *, num_sends + 1, HYPRE_MEMORY_HOST); + if (num_sends > 0) { - if (size > hypre_HandleStructCommSendBufferSize(hypre_handle())) + send_buffers[0] = hypre_TAlloc(HYPRE_Complex, num_send_elems, memory_location); + for (i = 1; i <= num_sends; i++) { - HYPRE_Int new_size = 5 * size; - hypre_HandleStructCommSendBufferSize(hypre_handle()) = new_size; - hypre_TFree(hypre_HandleStructCommSendBuffer(hypre_handle()), memory_location); - hypre_HandleStructCommSendBuffer(hypre_handle()) = hypre_CTAlloc(HYPRE_Complex, new_size, - memory_location); + hypre_CommType *comm_type = hypre_CommPkgSendType(comm_pkg, i - 1); + send_buffers[i] = send_buffers[i - 1] + hypre_CommTypeBufsize(comm_type); } + } - ptr = hypre_HandleStructCommSendBuffer(hypre_handle()); + /* allocate recv buffers */ + recv_buffers = hypre_TAlloc(HYPRE_Complex *, num_recvs + 1, HYPRE_MEMORY_HOST); + if (num_recvs > 0) + { + recv_buffers[0] = hypre_TAlloc(HYPRE_Complex, num_recv_elems, memory_location); + for (i = 1; i <= num_recvs; i++) + { + hypre_CommType *comm_type = hypre_CommPkgRecvType(comm_pkg, i - 1); + recv_buffers[i] = recv_buffers[i - 1] + hypre_CommTypeBufsize(comm_type); + } } - else -#endif + + hypre_CommHandleSendBuffers(comm_handle) = send_buffers; + hypre_CommHandleRecvBuffers(comm_handle) = recv_buffers; + + if (hypre_NeedMPICopyBuffer(send_memory_alocation)) { - ptr = hypre_CTAlloc(HYPRE_Complex, size, memory_location); + send_copy_location = hypre_HandleMPICopyBufferLocation(hypre_handle()); + send_copy_buffer = _hypre_TAlloc(char, num_send_elems * size_of_elem, send_copy_location); } - return ptr; -} + if (hypre_NeedMPICopyBuffer(recv_memory_alocation)) + { + recv_copy_location = hypre_HandleMPICopyBufferLocation(hypre_handle()); + recv_copy_buffer = _hypre_TAlloc(char, num_recv_elems * size_of_elem, recv_copy_location); + } -HYPRE_Int -hypre_StructCommunicationReleaseBuffer(HYPRE_Complex *buffer, - HYPRE_MemoryLocation memory_location) -{ - if (hypre_GetActualMemLocation(memory_location) == hypre_MEMORY_HOST) + hypre_MPICommWrapper *comm = hypre_CommHandleComm(comm_handle); + +#if defined(HYPRE_DEBUG) + if (hypre_MPICommGetSendLocation(comm) != hypre_MEMORY_UNDEFINED) + { + hypre_printf("[%s, %d] MPI_Comm SendLocation existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetRecvLocation(comm) != hypre_MEMORY_UNDEFINED) + { + hypre_printf("[%s, %d] MPI_Comm RecvLocation existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetSendBuffer(comm)) + { + hypre_printf("[%s, %d] MPI_Comm SendBuffer existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetRecvBuffer(comm)) + { + hypre_printf("[%s, %d] MPI_Comm RecvBuffer existed!\n", __FILE__, __LINE__); + } + if (hypre_MPICommGetSendBufferLocation(comm) != hypre_MEMORY_UNDEFINED) { - hypre_TFree(buffer, memory_location); + hypre_printf("[%s, %d] MPI_Comm SendBufferLocation existed!\n", __FILE__, __LINE__); } + if (hypre_MPICommGetRecvBufferLocation(comm) != hypre_MEMORY_UNDEFINED) + { + hypre_printf("[%s, %d] MPI_Comm RecvBufferLocation existed!\n", __FILE__, __LINE__); + } +#endif + + hypre_MPICommSetSendLocation(comm, send_memory_alocation); + hypre_MPICommSetRecvLocation(comm, recv_memory_alocation); + hypre_MPICommSetSendBuffer(comm, send_copy_buffer); + hypre_MPICommSetRecvBuffer(comm, recv_copy_buffer); + hypre_MPICommSetSendBufferLocation(comm, send_copy_location); + hypre_MPICommSetRecvBufferLocation(comm, recv_copy_location); return hypre_error_flag; } @@ -824,39 +881,32 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Int tag, hypre_CommHandle **comm_handle_ptr ) { - hypre_CommHandle *comm_handle; - - HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); - HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); - HYPRE_Int num_sends = hypre_CommPkgNumSends(comm_pkg); - HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); - MPI_Comm comm = hypre_CommPkgComm(comm_pkg); - - HYPRE_Int num_requests; - hypre_MPI_Request *requests; - hypre_MPI_Status *status; - - HYPRE_Complex **send_buffers; - HYPRE_Complex **recv_buffers; - HYPRE_Complex **send_buffers_mpi; - HYPRE_Complex **recv_buffers_mpi; - - hypre_CommType *comm_type, *from_type, *to_type; - hypre_CommEntryType *comm_entry; - HYPRE_Int num_entries; - - HYPRE_Int *length_array; - HYPRE_Int *stride_array, unitst_array[HYPRE_MAXDIM + 1]; - HYPRE_Int *order; - - HYPRE_Complex *dptr, *kptr, *lptr; - HYPRE_Int *qptr; - - HYPRE_Int i, j, d, ll; - HYPRE_Int size; - - HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); - HYPRE_MemoryLocation memory_location_mpi = memory_location; + hypre_CommHandle *comm_handle = hypre_CTAlloc(hypre_CommHandle, 1, HYPRE_MEMORY_HOST); + + HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); + HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); + HYPRE_Int num_sends = hypre_CommPkgNumSends(comm_pkg); + HYPRE_Int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); + MPI_Comm comm_orig = hypre_CommPkgComm(comm_pkg); + hypre_MPICommWrapper *comm = hypre_MPICommWrapperCreate(comm_orig); + HYPRE_Int num_requests; + hypre_MPI_Request *requests; + hypre_MPI_Status *status; + HYPRE_Complex **send_buffers; + HYPRE_Complex **recv_buffers; + hypre_CommType *comm_type, *from_type, *to_type; + hypre_CommEntryType *comm_entry; + HYPRE_Int num_entries; + HYPRE_Int *length_array; + HYPRE_Int *stride_array, unitst_array[HYPRE_MAXDIM + 1]; + HYPRE_Int *order; + HYPRE_Complex *dptr, *kptr, *lptr; + HYPRE_Int *qptr; + HYPRE_Int i, j, d, ll; + HYPRE_Int size; + HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); + + hypre_CommHandleComm(comm_handle) = comm; /*-------------------------------------------------------------------- * allocate requests and status @@ -870,33 +920,10 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, * allocate buffers *--------------------------------------------------------------------*/ - /* allocate send buffers */ - send_buffers = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST); - if (num_sends > 0) - { - size = hypre_CommPkgSendBufsize(comm_pkg); - send_buffers[0] = hypre_StructCommunicationGetBuffer(memory_location, size); - for (i = 1; i < num_sends; i++) - { - comm_type = hypre_CommPkgSendType(comm_pkg, i - 1); - size = hypre_CommTypeBufsize(comm_type); - send_buffers[i] = send_buffers[i - 1] + size; - } - } + hypre_CommHandleAllocateBuffers(memory_location, comm_pkg, comm_handle); - /* allocate recv buffers */ - recv_buffers = hypre_TAlloc(HYPRE_Complex *, num_recvs, HYPRE_MEMORY_HOST); - if (num_recvs > 0) - { - size = hypre_CommPkgRecvBufsize(comm_pkg); - recv_buffers[0] = hypre_StructCommunicationGetBuffer(memory_location, size); - for (i = 1; i < num_recvs; i++) - { - comm_type = hypre_CommPkgRecvType(comm_pkg, i - 1); - size = hypre_CommTypeBufsize(comm_type); - recv_buffers[i] = recv_buffers[i - 1] + size; - } - } + send_buffers = hypre_CommHandleSendBuffers(comm_handle); + recv_buffers = hypre_CommHandleRecvBuffers(comm_handle); /*-------------------------------------------------------------------- * pack send buffers @@ -960,53 +987,6 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, } } -#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) - if (hypre_GetActualMemLocation(memory_location) != hypre_MEMORY_HOST) - { - if (hypre_GetGpuAwareMPI()) - { -#if defined(HYPRE_USING_GPU) - hypre_ForceSyncComputeStream(); -#endif - send_buffers_mpi = send_buffers; - recv_buffers_mpi = recv_buffers; - } - else - { - memory_location_mpi = HYPRE_MEMORY_HOST; - - send_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST); - if (num_sends > 0) - { - size = hypre_CommPkgSendBufsize(comm_pkg); - send_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); - for (i = 1; i < num_sends; i++) - { - send_buffers_mpi[i] = send_buffers_mpi[i - 1] + (send_buffers[i] - send_buffers[i - 1]); - } - hypre_TMemcpy(send_buffers_mpi[0], send_buffers[0], HYPRE_Complex, size, HYPRE_MEMORY_HOST, - memory_location); - } - - recv_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_recvs, HYPRE_MEMORY_HOST); - if (num_recvs > 0) - { - size = hypre_CommPkgRecvBufsize(comm_pkg); - recv_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); - for (i = 1; i < num_recvs; i++) - { - recv_buffers_mpi[i] = recv_buffers_mpi[i - 1] + (recv_buffers[i] - recv_buffers[i - 1]); - } - } - } - } - else -#endif - { - send_buffers_mpi = send_buffers; - recv_buffers_mpi = recv_buffers; - } - for (i = 0; i < num_sends; i++) { comm_type = hypre_CommPkgSendType(comm_pkg, i); @@ -1014,15 +994,15 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, if ( hypre_CommPkgFirstComm(comm_pkg) ) { - qptr = (HYPRE_Int *) send_buffers_mpi[i]; + qptr = (HYPRE_Int *) (send_buffers[0] + (send_buffers[i] - send_buffers[0])); hypre_TMemcpy(qptr, &num_entries, - HYPRE_Int, 1, memory_location_mpi, HYPRE_MEMORY_HOST); + HYPRE_Int, 1, memory_location, HYPRE_MEMORY_HOST); qptr ++; hypre_TMemcpy(qptr, hypre_CommTypeRemBoxnums(comm_type), - HYPRE_Int, num_entries, memory_location_mpi, HYPRE_MEMORY_HOST); + HYPRE_Int, num_entries, memory_location, HYPRE_MEMORY_HOST); qptr += num_entries; hypre_TMemcpy(qptr, hypre_CommTypeRemBoxes(comm_type), - hypre_Box, num_entries, memory_location_mpi, HYPRE_MEMORY_HOST); + hypre_Box, num_entries, memory_location, HYPRE_MEMORY_HOST); hypre_CommTypeRemBoxnums(comm_type) = NULL; hypre_CommTypeRemBoxes(comm_type) = NULL; } @@ -1032,37 +1012,79 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, * post receives and initiate sends *--------------------------------------------------------------------*/ - j = 0; - for (i = 0; i < num_recvs; i++) + HYPRE_Int *displs_recv = NULL; + HYPRE_Int *procs_recv = NULL; + HYPRE_Int *displs_send = NULL; + HYPRE_Int *procs_send = NULL; + + if (num_recvs) { - comm_type = hypre_CommPkgRecvType(comm_pkg, i); - hypre_MPI_Irecv(recv_buffers_mpi[i], - hypre_CommTypeBufsize(comm_type)*sizeof(HYPRE_Complex), - hypre_MPI_BYTE, hypre_CommTypeProc(comm_type), - tag, comm, &requests[j++]); - if ( hypre_CommPkgFirstComm(comm_pkg) ) + displs_recv = hypre_CTAlloc(HYPRE_Int, num_recvs + 1, HYPRE_MEMORY_HOST); + procs_recv = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); + for (i = 0; i < num_recvs; i++) { + comm_type = hypre_CommPkgRecvType(comm_pkg, i); + procs_recv[i] = hypre_CommTypeProc(comm_type); + displs_recv[i+1] = (recv_buffers[i+1] - recv_buffers[0]) * sizeof(HYPRE_Complex); + } + } + + if (num_sends) + { + displs_send = hypre_CTAlloc(HYPRE_Int, num_sends + 1, HYPRE_MEMORY_HOST); + procs_send = hypre_CTAlloc(HYPRE_Int, num_sends, HYPRE_MEMORY_HOST); + for (i = 0; i < num_sends; i++) + { + comm_type = hypre_CommPkgSendType(comm_pkg, i); + procs_send[i] = hypre_CommTypeProc(comm_type); + displs_send[i+1] = (send_buffers[i+1] - send_buffers[0]) * sizeof(HYPRE_Complex); + } + } + + hypre_MPI_Irecv_Multiple(recv_buffers ? recv_buffers[0] : NULL, + num_recvs, + displs_recv, + NULL, + hypre_MPI_BYTE, + procs_recv, + tag, + comm, + requests); + + hypre_MPI_Isend_Multiple(send_buffers ? send_buffers[0] : NULL, + num_sends, + displs_send, + NULL, + hypre_MPI_BYTE, + procs_send, + tag, + comm, + requests + num_recvs); + + if ( hypre_CommPkgFirstComm(comm_pkg) ) + { + for (i = 0; i < num_recvs; i++) + { + comm_type = hypre_CommPkgRecvType(comm_pkg, i); size = hypre_CommPrefixSize(hypre_CommTypeNumEntries(comm_type)); hypre_CommTypeBufsize(comm_type) -= size; hypre_CommPkgRecvBufsize(comm_pkg) -= size; } - } - for (i = 0; i < num_sends; i++) - { - comm_type = hypre_CommPkgSendType(comm_pkg, i); - hypre_MPI_Isend(send_buffers_mpi[i], - hypre_CommTypeBufsize(comm_type)*sizeof(HYPRE_Complex), - hypre_MPI_BYTE, hypre_CommTypeProc(comm_type), - tag, comm, &requests[j++]); - if ( hypre_CommPkgFirstComm(comm_pkg) ) + for (i = 0; i < num_sends; i++) { + comm_type = hypre_CommPkgSendType(comm_pkg, i); size = hypre_CommPrefixSize(hypre_CommTypeNumEntries(comm_type)); hypre_CommTypeBufsize(comm_type) -= size; hypre_CommPkgSendBufsize(comm_pkg) -= size; } } + hypre_TFree(displs_recv, HYPRE_MEMORY_HOST); + hypre_TFree(procs_recv, HYPRE_MEMORY_HOST); + hypre_TFree(displs_send, HYPRE_MEMORY_HOST); + hypre_TFree(procs_send, HYPRE_MEMORY_HOST); + /*-------------------------------------------------------------------- * set up CopyToType and exchange local data *--------------------------------------------------------------------*/ @@ -1094,19 +1116,13 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, * set up comm_handle and return *--------------------------------------------------------------------*/ - comm_handle = hypre_TAlloc(hypre_CommHandle, 1, HYPRE_MEMORY_HOST); - hypre_CommHandleCommPkg(comm_handle) = comm_pkg; hypre_CommHandleSendData(comm_handle) = send_data; hypre_CommHandleRecvData(comm_handle) = recv_data; hypre_CommHandleNumRequests(comm_handle) = num_requests; hypre_CommHandleRequests(comm_handle) = requests; hypre_CommHandleStatus(comm_handle) = status; - hypre_CommHandleSendBuffers(comm_handle) = send_buffers; - hypre_CommHandleRecvBuffers(comm_handle) = recv_buffers; hypre_CommHandleAction(comm_handle) = action; - hypre_CommHandleSendBuffersMPI(comm_handle) = send_buffers_mpi; - hypre_CommHandleRecvBuffersMPI(comm_handle) = recv_buffers_mpi; *comm_handle_ptr = comm_handle; @@ -1124,12 +1140,11 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, HYPRE_Int hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) { - hypre_CommPkg *comm_pkg = hypre_CommHandleCommPkg(comm_handle); - HYPRE_Complex **send_buffers = hypre_CommHandleSendBuffers(comm_handle); - HYPRE_Complex **recv_buffers = hypre_CommHandleRecvBuffers(comm_handle); - HYPRE_Complex **send_buffers_mpi = hypre_CommHandleSendBuffersMPI(comm_handle); - HYPRE_Complex **recv_buffers_mpi = hypre_CommHandleRecvBuffersMPI(comm_handle); - HYPRE_Int action = hypre_CommHandleAction(comm_handle); + hypre_MPICommWrapper *comm = hypre_CommHandleComm(comm_handle); + hypre_CommPkg *comm_pkg = hypre_CommHandleCommPkg(comm_handle); + HYPRE_Complex **send_buffers = hypre_CommHandleSendBuffers(comm_handle); + HYPRE_Complex **recv_buffers = hypre_CommHandleRecvBuffers(comm_handle); + HYPRE_Int action = hypre_CommHandleAction(comm_handle); HYPRE_Int ndim = hypre_CommPkgNDim(comm_pkg); HYPRE_Int num_values = hypre_CommPkgNumValues(comm_pkg); @@ -1152,15 +1167,8 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) HYPRE_Int i, j, d, ll; - HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); - HYPRE_MemoryLocation memory_location_mpi = memory_location; - -#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) - if (!hypre_GetGpuAwareMPI()) - { - memory_location_mpi = HYPRE_MEMORY_HOST; - } -#endif + HYPRE_MemoryLocation memory_location = hypre_HandleMemoryLocation(hypre_handle()); + hypre_MPI_Request *post_recv_request = hypre_MPICommGetPostRecvRequest(comm); /*-------------------------------------------------------------------- * finish communications @@ -1173,6 +1181,11 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_CommHandleStatus(comm_handle)); } + if (post_recv_request) + { + hypre_MPI_Wait(post_recv_request, hypre_MPI_STATUS_IGNORE); + } + /*-------------------------------------------------------------------- * if FirstComm, unpack prefix information and set 'num_entries' and * 'entries' for RecvType @@ -1187,10 +1200,8 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) { comm_type = hypre_CommPkgRecvType(comm_pkg, i); - qptr = (HYPRE_Int *) recv_buffers_mpi[i]; - - hypre_TMemcpy(&hypre_CommTypeNumEntries(comm_type), qptr, - HYPRE_Int, 1, HYPRE_MEMORY_HOST, memory_location_mpi); + hypre_TMemcpy(&hypre_CommTypeNumEntries(comm_type), recv_buffers[i], + HYPRE_Int, 1, HYPRE_MEMORY_HOST, memory_location); num_entries += hypre_CommTypeNumEntries(comm_type); } @@ -1208,25 +1219,18 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) num_entries = hypre_CommTypeNumEntries(comm_type); ct_entries += num_entries; - qptr = (HYPRE_Int *) recv_buffers_mpi[i]; + qptr = (HYPRE_Int *) recv_buffers[i]; qptr++; /* Set boxnums and boxes from MPI recv buffer */ - if (!hypre_GetGpuAwareMPI()) - { - boxnums = (HYPRE_Int*) qptr; - qptr += num_entries; - boxes = (hypre_Box*) qptr; - } - else { boxnums = hypre_TAlloc(HYPRE_Int, num_entries, HYPRE_MEMORY_HOST); hypre_TMemcpy(boxnums, qptr, HYPRE_Int, num_entries, - HYPRE_MEMORY_HOST, memory_location_mpi); + HYPRE_MEMORY_HOST, memory_location); qptr += num_entries; boxes = hypre_TAlloc(hypre_Box, num_entries, HYPRE_MEMORY_HOST); hypre_TMemcpy(boxes, qptr, hypre_Box, num_entries, - HYPRE_MEMORY_HOST, memory_location_mpi); + HYPRE_MEMORY_HOST, memory_location); } /* Set the entries for the comm_type */ @@ -1238,8 +1242,6 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_CommPkgRecvDataSpace(comm_pkg), hypre_CommPkgRecvDataOffsets(comm_pkg)); - /* Free allocated memory if using GPU-aware MPI */ - if (hypre_GetGpuAwareMPI()) { hypre_TFree(boxnums, HYPRE_MEMORY_HOST); hypre_TFree(boxes, HYPRE_MEMORY_HOST); @@ -1251,21 +1253,6 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) * unpack receive buffer data *--------------------------------------------------------------------*/ - /* Note: hypre_CommPkgRecvBufsize is different in the first comm */ - if (recv_buffers != recv_buffers_mpi) - { - if (num_recvs > 0) - { - HYPRE_Int recv_buf_size; - - recv_buf_size = hypre_CommPkgFirstComm(comm_pkg) ? hypre_CommPkgRecvBufsizeFirstComm(comm_pkg) : - hypre_CommPkgRecvBufsize(comm_pkg); - - hypre_TMemcpy(recv_buffers[0], recv_buffers_mpi[0], HYPRE_Complex, recv_buf_size, - memory_location, memory_location_mpi); - } - } - for (i = 0; i < num_recvs; i++) { comm_type = hypre_CommPkgRecvType(comm_pkg, i); @@ -1329,26 +1316,40 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) hypre_TFree(hypre_CommHandleRequests(comm_handle), HYPRE_MEMORY_HOST); hypre_TFree(hypre_CommHandleStatus(comm_handle), HYPRE_MEMORY_HOST); - if (num_sends > 0) - { - hypre_StructCommunicationReleaseBuffer(send_buffers[0], memory_location); - } - if (num_recvs > 0) - { - hypre_StructCommunicationReleaseBuffer(recv_buffers[0], memory_location); - } + hypre_MemoryLocation send_buffer_location = hypre_MPICommGetSendBufferLocation(comm); + hypre_MemoryLocation recv_buffer_location = hypre_MPICommGetRecvBufferLocation(comm); + void *send_copy_buffer = hypre_MPICommGetSendBuffer(comm); + void *recv_copy_buffer = hypre_MPICommGetRecvBuffer(comm); + + _hypre_TFree(send_copy_buffer, send_buffer_location); + _hypre_TFree(recv_copy_buffer, recv_buffer_location); + + /* attributes should be deleted when the communicator is being freed * + * but since we delete comm right after, so we don't .... */ + #if 1 + hypre_MPICommDeleteSendLocation(comm); + hypre_MPICommDeleteRecvLocation(comm); + hypre_MPICommDeleteSendBufferLocation(comm); + hypre_MPICommDeleteRecvBufferLocation(comm); + hypre_MPICommDeleteSendBuffer(comm); + hypre_MPICommDeleteRecvBuffer(comm); + if (post_recv_request) { hypre_MPICommDeletePostRecvRequest(comm); } + #endif + + hypre_TFree(post_recv_request, HYPRE_MEMORY_HOST); + + hypre_TFree(comm, HYPRE_MEMORY_HOST); hypre_TFree(comm_handle, HYPRE_MEMORY_HOST); - if (send_buffers != send_buffers_mpi) + if (num_sends > 0) { - hypre_TFree(send_buffers_mpi[0], memory_location_mpi); - hypre_TFree(send_buffers_mpi, HYPRE_MEMORY_HOST); + hypre_TFree(send_buffers[0], memory_location); } - if (recv_buffers != recv_buffers_mpi) + + if (num_recvs > 0) { - hypre_TFree(recv_buffers_mpi[0], memory_location_mpi); - hypre_TFree(recv_buffers_mpi, HYPRE_MEMORY_HOST); + hypre_TFree(recv_buffers[0], memory_location); } hypre_TFree(send_buffers, HYPRE_MEMORY_HOST); diff --git a/src/struct_mv/struct_communication.h b/src/struct_mv/struct_communication.h index fa5ddf1af7..c575ced6a6 100644 --- a/src/struct_mv/struct_communication.h +++ b/src/struct_mv/struct_communication.h @@ -131,23 +131,20 @@ typedef struct hypre_CommPkg_struct typedef struct hypre_CommHandle_struct { - hypre_CommPkg *comm_pkg; - HYPRE_Complex *send_data; - HYPRE_Complex *recv_data; + hypre_CommPkg *comm_pkg; + HYPRE_Complex *send_data; + HYPRE_Complex *recv_data; - HYPRE_Int num_requests; - hypre_MPI_Request *requests; - hypre_MPI_Status *status; + HYPRE_Int num_requests; + hypre_MPI_Request *requests; + hypre_MPI_Status *status; - HYPRE_Complex **send_buffers; - HYPRE_Complex **recv_buffers; - - HYPRE_Complex **send_buffers_mpi; - HYPRE_Complex **recv_buffers_mpi; + HYPRE_Complex **send_buffers; + HYPRE_Complex **recv_buffers; /* set = 0, add = 1 */ - HYPRE_Int action; - + HYPRE_Int action; + hypre_MPICommWrapper *comm; } hypre_CommHandle; /*-------------------------------------------------------------------------- @@ -245,16 +242,15 @@ typedef struct hypre_CommHandle_struct * Accessor macros: hypre_CommHandle *--------------------------------------------------------------------------*/ -#define hypre_CommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) -#define hypre_CommHandleSendData(comm_handle) (comm_handle -> send_data) -#define hypre_CommHandleRecvData(comm_handle) (comm_handle -> recv_data) -#define hypre_CommHandleNumRequests(comm_handle) (comm_handle -> num_requests) -#define hypre_CommHandleRequests(comm_handle) (comm_handle -> requests) -#define hypre_CommHandleStatus(comm_handle) (comm_handle -> status) -#define hypre_CommHandleSendBuffers(comm_handle) (comm_handle -> send_buffers) -#define hypre_CommHandleRecvBuffers(comm_handle) (comm_handle -> recv_buffers) -#define hypre_CommHandleAction(comm_handle) (comm_handle -> action) -#define hypre_CommHandleSendBuffersMPI(comm_handle) (comm_handle -> send_buffers_mpi) -#define hypre_CommHandleRecvBuffersMPI(comm_handle) (comm_handle -> recv_buffers_mpi) +#define hypre_CommHandleCommPkg(comm_handle) (comm_handle -> comm_pkg) +#define hypre_CommHandleSendData(comm_handle) (comm_handle -> send_data) +#define hypre_CommHandleRecvData(comm_handle) (comm_handle -> recv_data) +#define hypre_CommHandleNumRequests(comm_handle) (comm_handle -> num_requests) +#define hypre_CommHandleRequests(comm_handle) (comm_handle -> requests) +#define hypre_CommHandleStatus(comm_handle) (comm_handle -> status) +#define hypre_CommHandleSendBuffers(comm_handle) (comm_handle -> send_buffers) +#define hypre_CommHandleRecvBuffers(comm_handle) (comm_handle -> recv_buffers) +#define hypre_CommHandleAction(comm_handle) (comm_handle -> action) +#define hypre_CommHandleComm(comm_handle) (comm_handle -> comm) #endif diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index ccdce50c7f..3f9a15423b 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -14,154 +14,6 @@ extern "C" { #endif -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - -/****************************************************************************** - * - * General structures and values - * - *****************************************************************************/ - -#ifndef HYPRE_HANDLE_H -#define HYPRE_HANDLE_H - -#if defined(HYPRE_USING_UMPIRE) -#include "umpire/config.hpp" -#if UMPIRE_VERSION_MAJOR >= 2022 -#include "umpire/interface/c_fortran/umpire.h" -#define hypre_umpire_resourcemanager_make_allocator_pool umpire_resourcemanager_make_allocator_quick_pool -#else -#include "umpire/interface/umpire.h" -#define hypre_umpire_resourcemanager_make_allocator_pool umpire_resourcemanager_make_allocator_pool -#endif /* UMPIRE_VERSION_MAJOR >= 2022 */ -#define HYPRE_UMPIRE_POOL_NAME_MAX_LEN 1024 -#endif /* defined(HYPRE_USING_UMPIRE) */ - -struct hypre_DeviceData; -typedef struct hypre_DeviceData hypre_DeviceData; -typedef void (*GPUMallocFunc)(void **, size_t); -typedef void (*GPUMfreeFunc)(void *); - -typedef struct -{ - HYPRE_Int log_level; - HYPRE_Int hypre_error; - HYPRE_MemoryLocation memory_location; - HYPRE_ExecutionPolicy default_exec_policy; - - /* the device buffers needed to do MPI communication for struct comm */ - HYPRE_Complex *struct_comm_recv_buffer; - HYPRE_Complex *struct_comm_send_buffer; - HYPRE_Int struct_comm_recv_buffer_size; - HYPRE_Int struct_comm_send_buffer_size; - - /* GPU MPI */ -#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) - HYPRE_Int use_gpu_aware_mpi; -#endif - -#if defined(HYPRE_USING_GPU) - hypre_DeviceData *device_data; - HYPRE_Int device_gs_method; /* device G-S options */ -#endif - - /* user malloc/free function pointers */ - GPUMallocFunc user_device_malloc; - GPUMfreeFunc user_device_free; - -#if defined(HYPRE_USING_UMPIRE) - char umpire_device_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; - char umpire_um_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; - char umpire_host_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; - char umpire_pinned_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; - size_t umpire_device_pool_size; - size_t umpire_um_pool_size; - size_t umpire_host_pool_size; - size_t umpire_pinned_pool_size; - size_t umpire_block_size; - HYPRE_Int own_umpire_device_pool; - HYPRE_Int own_umpire_um_pool; - HYPRE_Int own_umpire_host_pool; - HYPRE_Int own_umpire_pinned_pool; - umpire_resourcemanager umpire_rm; -#endif - -#if defined(HYPRE_USING_MAGMA) - magma_queue_t magma_queue; -#endif -} hypre_Handle; - -/* accessor macros to hypre_Handle */ -#define hypre_HandleLogLevel(hypre_handle) ((hypre_handle) -> log_level) -#define hypre_HandleMemoryLocation(hypre_handle) ((hypre_handle) -> memory_location) -#define hypre_HandleDefaultExecPolicy(hypre_handle) ((hypre_handle) -> default_exec_policy) - -#define hypre_HandleStructCommRecvBuffer(hypre_handle) ((hypre_handle) -> struct_comm_recv_buffer) -#define hypre_HandleStructCommSendBuffer(hypre_handle) ((hypre_handle) -> struct_comm_send_buffer) -#define hypre_HandleStructCommRecvBufferSize(hypre_handle) ((hypre_handle) -> struct_comm_recv_buffer_size) -#define hypre_HandleStructCommSendBufferSize(hypre_handle) ((hypre_handle) -> struct_comm_send_buffer_size) - -#define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) -#define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) -#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) - -#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCusparseHandle(hypre_handle) hypre_DeviceDataCusparseHandle(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleVendorSolverHandle(hypre_handle) hypre_DeviceDataVendorSolverHandle(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleComputeStream(hypre_handle) hypre_DeviceDataComputeStream(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubBinGrowth(hypre_handle) hypre_DeviceDataCubBinGrowth(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubMinBin(hypre_handle) hypre_DeviceDataCubMinBin(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubMaxBin(hypre_handle) hypre_DeviceDataCubMaxBin(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubMaxCachedBytes(hypre_handle) hypre_DeviceDataCubMaxCachedBytes(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubDevAllocator(hypre_handle) hypre_DeviceDataCubDevAllocator(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleCubUvmAllocator(hypre_handle) hypre_DeviceDataCubUvmAllocator(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleDevice(hypre_handle) hypre_DeviceDataDevice(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleDeviceMaxWorkGroupSize(hypre_handle) hypre_DeviceDataDeviceMaxWorkGroupSize(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleDeviceMaxShmemPerBlock(hypre_handle) hypre_DeviceDataDeviceMaxShmemPerBlock(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleDeviceMaxShmemPerBlockInited(hypre_handle) hypre_DeviceDataDeviceMaxShmemPerBlockInited(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleComputeStreamNum(hypre_handle) hypre_DeviceDataComputeStreamNum(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleReduceBuffer(hypre_handle) hypre_DeviceDataReduceBuffer(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmUseVendor(hypre_handle) hypre_DeviceDataSpgemmUseVendor(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpMVUseVendor(hypre_handle) hypre_DeviceDataSpMVUseVendor(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpTransUseVendor(hypre_handle) hypre_DeviceDataSpTransUseVendor(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmAlgorithm(hypre_handle) hypre_DeviceDataSpgemmAlgorithm(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmBinned(hypre_handle) hypre_DeviceDataSpgemmBinned(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmNumBin(hypre_handle) hypre_DeviceDataSpgemmNumBin(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmHighestBin(hypre_handle) hypre_DeviceDataSpgemmHighestBin(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmBlockNumDim(hypre_handle) hypre_DeviceDataSpgemmBlockNumDim(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmRownnzEstimateMethod(hypre_handle) hypre_DeviceDataSpgemmRownnzEstimateMethod(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmRownnzEstimateNsamples(hypre_handle) hypre_DeviceDataSpgemmRownnzEstimateNsamples(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleSpgemmRownnzEstimateMultFactor(hypre_handle) hypre_DeviceDataSpgemmRownnzEstimateMultFactor(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleDeviceAllocator(hypre_handle) hypre_DeviceDataDeviceAllocator(hypre_HandleDeviceData(hypre_handle)) -#define hypre_HandleUseGpuRand(hypre_handle) hypre_DeviceDataUseGpuRand(hypre_HandleDeviceData(hypre_handle)) - -#define hypre_HandleUserDeviceMalloc(hypre_handle) ((hypre_handle) -> user_device_malloc) -#define hypre_HandleUserDeviceMfree(hypre_handle) ((hypre_handle) -> user_device_free) - -#define hypre_HandleUmpireResourceMan(hypre_handle) ((hypre_handle) -> umpire_rm) -#define hypre_HandleUmpireDevicePoolSize(hypre_handle) ((hypre_handle) -> umpire_device_pool_size) -#define hypre_HandleUmpireUMPoolSize(hypre_handle) ((hypre_handle) -> umpire_um_pool_size) -#define hypre_HandleUmpireHostPoolSize(hypre_handle) ((hypre_handle) -> umpire_host_pool_size) -#define hypre_HandleUmpirePinnedPoolSize(hypre_handle) ((hypre_handle) -> umpire_pinned_pool_size) -#define hypre_HandleUmpireBlockSize(hypre_handle) ((hypre_handle) -> umpire_block_size) -#define hypre_HandleUmpireDevicePoolName(hypre_handle) ((hypre_handle) -> umpire_device_pool_name) -#define hypre_HandleUmpireUMPoolName(hypre_handle) ((hypre_handle) -> umpire_um_pool_name) -#define hypre_HandleUmpireHostPoolName(hypre_handle) ((hypre_handle) -> umpire_host_pool_name) -#define hypre_HandleUmpirePinnedPoolName(hypre_handle) ((hypre_handle) -> umpire_pinned_pool_name) -#define hypre_HandleOwnUmpireDevicePool(hypre_handle) ((hypre_handle) -> own_umpire_device_pool) -#define hypre_HandleOwnUmpireUMPool(hypre_handle) ((hypre_handle) -> own_umpire_um_pool) -#define hypre_HandleOwnUmpireHostPool(hypre_handle) ((hypre_handle) -> own_umpire_host_pool) -#define hypre_HandleOwnUmpirePinnedPool(hypre_handle) ((hypre_handle) -> own_umpire_pinned_pool) - -#define hypre_HandleMagmaQueue(hypre_handle) ((hypre_handle) -> magma_queue) - -#endif /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -713,6 +565,19 @@ void hypre_error_code_restore(void); #endif #endif /* hypre_ERROR_HEADER */ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#ifndef HYPRE_SMP_HEADER +#define HYPRE_SMP_HEADER +#endif + +#define HYPRE_SMP_SCHEDULE schedule(static) + /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -722,366 +587,489 @@ void hypre_error_code_restore(void); /****************************************************************************** * - * Fake mpi stubs to generate serial codes without mpi + * Header file for memory management utilities + * + * The abstract memory model has a Host (think CPU) and a Device (think GPU) and + * three basic types of memory management utilities: + * + * 1. Malloc(..., location) + * location=LOCATION_DEVICE - malloc memory on the device + * location=LOCATION_HOST - malloc memory on the host + * 2. MemCopy(..., method) + * method=HOST_TO_DEVICE - copy from host to device + * method=DEVICE_TO_HOST - copy from device to host + * method=DEVICE_TO_DEVICE - copy from device to device + * 3. SetExecutionMode + * location=LOCATION_DEVICE - execute on the device + * location=LOCATION_HOST - execute on the host + * + * Although the abstract model does not explicitly reflect a managed memory + * model (i.e., unified memory), it can support it. Here is a summary of how + * the abstract model would be mapped to specific hardware scenarios: + * + * Not using a device, not using managed memory + * Malloc(..., location) + * location=LOCATION_DEVICE - host malloc e.g., malloc + * location=LOCATION_HOST - host malloc e.g., malloc + * MemoryCopy(..., locTo,locFrom) + * locTo=LOCATION_HOST, locFrom=LOCATION_DEVICE - copy from host to host e.g., memcpy + * locTo=LOCATION_DEVICE, locFrom=LOCATION_HOST - copy from host to host e.g., memcpy + * locTo=LOCATION_DEVICE, locFrom=LOCATION_DEVICE - copy from host to host e.g., memcpy + * SetExecutionMode + * location=LOCATION_DEVICE - execute on the host + * location=LOCATION_HOST - execute on the host + * + * Using a device, not using managed memory + * Malloc(..., location) + * location=LOCATION_DEVICE - device malloc e.g., cudaMalloc + * location=LOCATION_HOST - host malloc e.g., malloc + * MemoryCopy(..., locTo,locFrom) + * locTo=LOCATION_HOST, locFrom=LOCATION_DEVICE - copy from device to host e.g., cudaMemcpy + * locTo=LOCATION_DEVICE, locFrom=LOCATION_HOST - copy from host to device e.g., cudaMemcpy + * locTo=LOCATION_DEVICE, locFrom=LOCATION_DEVICE - copy from device to device e.g., cudaMemcpy + * SetExecutionMode + * location=LOCATION_DEVICE - execute on the device + * location=LOCATION_HOST - execute on the host + * + * Using a device, using managed memory + * Malloc(..., location) + * location=LOCATION_DEVICE - managed malloc e.g., cudaMallocManaged + * location=LOCATION_HOST - host malloc e.g., malloc + * MemoryCopy(..., locTo,locFrom) + * locTo=LOCATION_HOST, locFrom=LOCATION_DEVICE - copy from device to host e.g., cudaMallocManaged + * locTo=LOCATION_DEVICE, locFrom=LOCATION_HOST - copy from host to device e.g., cudaMallocManaged + * locTo=LOCATION_DEVICE, locFrom=LOCATION_DEVICE - copy from device to device e.g., cudaMallocManaged + * SetExecutionMode + * location=LOCATION_DEVICE - execute on the device + * location=LOCATION_HOST - execute on the host * *****************************************************************************/ -#ifndef hypre_MPISTUBS -#define hypre_MPISTUBS +#ifndef hypre_MEMORY_HEADER +#define hypre_MEMORY_HEADER + +#include +#include + +#if defined(HYPRE_USING_UNIFIED_MEMORY) && defined(HYPRE_USING_DEVICE_OPENMP) +//#pragma omp requires unified_shared_memory +#endif + +/* stringification: + * _Pragma(string-literal), so we need to cast argument to a string + * The three dots as last argument of the macro tells compiler that this is a variadic macro. + * I.e. this is a macro that receives variable number of arguments. + */ +#define HYPRE_STR(...) #__VA_ARGS__ +#define HYPRE_XSTR(...) HYPRE_STR(__VA_ARGS__) #ifdef __cplusplus extern "C" { #endif -#ifdef HYPRE_SEQUENTIAL - -/****************************************************************************** - * MPI stubs to generate serial codes without mpi - *****************************************************************************/ +typedef enum _hypre_MemoryLocation +{ + hypre_MEMORY_UNDEFINED = -1, + hypre_MEMORY_HOST, + hypre_MEMORY_HOST_PINNED, + hypre_MEMORY_DEVICE, + hypre_MEMORY_UNIFIED, + hypre_NUM_MEMORY_LOCATION +} hypre_MemoryLocation; -/*-------------------------------------------------------------------------- - * Change all MPI names to hypre_MPI names to avoid link conflicts. - * - * NOTE: MPI_Comm is the only MPI symbol in the HYPRE user interface, - * and is defined in `HYPRE_utilities.h'. - *--------------------------------------------------------------------------*/ +/*------------------------------------------------------- + * hypre_GetActualMemLocation + * return actual location based on the selected memory model + *-------------------------------------------------------*/ +static inline HYPRE_MAYBE_UNUSED_FUNC hypre_MemoryLocation +hypre_GetActualMemLocation(HYPRE_MemoryLocation location) +{ + if (location == HYPRE_MEMORY_HOST) + { + return hypre_MEMORY_HOST; + } -#define MPI_Comm hypre_MPI_Comm -#define MPI_Group hypre_MPI_Group -#define MPI_Request hypre_MPI_Request -#define MPI_Datatype hypre_MPI_Datatype -#define MPI_Status hypre_MPI_Status -#define MPI_Op hypre_MPI_Op -#define MPI_Aint hypre_MPI_Aint -#define MPI_Info hypre_MPI_Info - -#define MPI_COMM_WORLD hypre_MPI_COMM_WORLD -#define MPI_COMM_NULL hypre_MPI_COMM_NULL -#define MPI_COMM_SELF hypre_MPI_COMM_SELF -#define MPI_COMM_TYPE_SHARED hypre_MPI_COMM_TYPE_SHARED - -#define MPI_BOTTOM hypre_MPI_BOTTOM - -#define MPI_FLOAT hypre_MPI_FLOAT -#define MPI_DOUBLE hypre_MPI_DOUBLE -#define MPI_LONG_DOUBLE hypre_MPI_LONG_DOUBLE -#define MPI_INT hypre_MPI_INT -#define MPI_LONG_LONG_INT hypre_MPI_LONG_LONG_INT -#define MPI_CHAR hypre_MPI_CHAR -#define MPI_LONG hypre_MPI_LONG -#define MPI_BYTE hypre_MPI_BYTE + if (location == HYPRE_MEMORY_DEVICE) + { +#if defined(HYPRE_USING_HOST_MEMORY) + return hypre_MEMORY_HOST; +#elif defined(HYPRE_USING_DEVICE_MEMORY) + return hypre_MEMORY_DEVICE; +#elif defined(HYPRE_USING_UNIFIED_MEMORY) + return hypre_MEMORY_UNIFIED; +#else +#error Wrong HYPRE memory setting. +#endif + } -#define MPI_C_FLOAT_COMPLEX hypre_MPI_COMPLEX -#define MPI_C_LONG_DOUBLE_COMPLEX hypre_MPI_COMPLEX -#define MPI_C_DOUBLE_COMPLEX hypre_MPI_COMPLEX + return hypre_MEMORY_UNDEFINED; +} -#define MPI_SUM hypre_MPI_SUM -#define MPI_MIN hypre_MPI_MIN -#define MPI_MAX hypre_MPI_MAX -#define MPI_LOR hypre_MPI_LOR -#define MPI_LAND hypre_MPI_LAND -#define MPI_BOR hypre_MPI_BOR -#define MPI_SUCCESS hypre_MPI_SUCCESS -#define MPI_STATUSES_IGNORE hypre_MPI_STATUSES_IGNORE -#define MPI_UNDEFINED hypre_MPI_UNDEFINED -#define MPI_REQUEST_NULL hypre_MPI_REQUEST_NULL -#define MPI_INFO_NULL hypre_MPI_INFO_NULL -#define MPI_ANY_SOURCE hypre_MPI_ANY_SOURCE -#define MPI_ANY_TAG hypre_MPI_ANY_TAG -#define MPI_SOURCE hypre_MPI_SOURCE -#define MPI_TAG hypre_MPI_TAG +#if !defined(HYPRE_USING_MEMORY_TRACKER) -#define MPI_Init hypre_MPI_Init -#define MPI_Finalize hypre_MPI_Finalize -#define MPI_Abort hypre_MPI_Abort -#define MPI_Wtime hypre_MPI_Wtime -#define MPI_Wtick hypre_MPI_Wtick -#define MPI_Barrier hypre_MPI_Barrier -#define MPI_Comm_create hypre_MPI_Comm_create -#define MPI_Comm_dup hypre_MPI_Comm_dup -#define MPI_Comm_f2c hypre_MPI_Comm_f2c -#define MPI_Comm_group hypre_MPI_Comm_group -#define MPI_Comm_size hypre_MPI_Comm_size -#define MPI_Comm_rank hypre_MPI_Comm_rank -#define MPI_Comm_free hypre_MPI_Comm_free -#define MPI_Comm_split hypre_MPI_Comm_split -#define MPI_Comm_split_type hypre_MPI_Comm_split_type -#define MPI_Group_incl hypre_MPI_Group_incl -#define MPI_Group_free hypre_MPI_Group_free -#define MPI_Address hypre_MPI_Address -#define MPI_Get_count hypre_MPI_Get_count -#define MPI_Alltoall hypre_MPI_Alltoall -#define MPI_Allgather hypre_MPI_Allgather -#define MPI_Allgatherv hypre_MPI_Allgatherv -#define MPI_Gather hypre_MPI_Gather -#define MPI_Gatherv hypre_MPI_Gatherv -#define MPI_Scatter hypre_MPI_Scatter -#define MPI_Scatterv hypre_MPI_Scatterv -#define MPI_Bcast hypre_MPI_Bcast -#define MPI_Send hypre_MPI_Send -#define MPI_Recv hypre_MPI_Recv -#define MPI_Isend hypre_MPI_Isend -#define MPI_Irecv hypre_MPI_Irecv -#define MPI_Send_init hypre_MPI_Send_init -#define MPI_Recv_init hypre_MPI_Recv_init -#define MPI_Irsend hypre_MPI_Irsend -#define MPI_Startall hypre_MPI_Startall -#define MPI_Probe hypre_MPI_Probe -#define MPI_Iprobe hypre_MPI_Iprobe -#define MPI_Test hypre_MPI_Test -#define MPI_Testall hypre_MPI_Testall -#define MPI_Wait hypre_MPI_Wait -#define MPI_Waitall hypre_MPI_Waitall -#define MPI_Waitany hypre_MPI_Waitany -#define MPI_Allreduce hypre_MPI_Allreduce -#define MPI_Reduce hypre_MPI_Reduce -#define MPI_Scan hypre_MPI_Scan -#define MPI_Request_free hypre_MPI_Request_free -#define MPI_Type_contiguous hypre_MPI_Type_contiguous -#define MPI_Type_vector hypre_MPI_Type_vector -#define MPI_Type_hvector hypre_MPI_Type_hvector -#define MPI_Type_struct hypre_MPI_Type_struct -#define MPI_Type_commit hypre_MPI_Type_commit -#define MPI_Type_free hypre_MPI_Type_free -#define MPI_Op_free hypre_MPI_Op_free -#define MPI_Op_create hypre_MPI_Op_create -#define MPI_User_function hypre_MPI_User_function -#define MPI_Info_create hypre_MPI_Info_create +#define hypre_TAlloc(type, count, location) \ +( (type *) hypre_MAlloc((size_t)(sizeof(type) * (count)), location) ) -/*-------------------------------------------------------------------------- - * Types, etc. - *--------------------------------------------------------------------------*/ +#define _hypre_TAlloc(type, count, location) \ +( (type *) _hypre_MAlloc((size_t)(sizeof(type) * (count)), location) ) -/* These types have associated creation and destruction routines */ -typedef HYPRE_Int hypre_MPI_Comm; -typedef HYPRE_Int hypre_MPI_Group; -typedef HYPRE_Int hypre_MPI_Request; -typedef HYPRE_Int hypre_MPI_Datatype; -typedef void (hypre_MPI_User_function) (void); +#define hypre_CTAlloc(type, count, location) \ +( (type *) hypre_CAlloc((size_t)(count), (size_t)sizeof(type), location) ) -typedef struct -{ - HYPRE_Int hypre_MPI_SOURCE; - HYPRE_Int hypre_MPI_TAG; -} hypre_MPI_Status; +#define hypre_TReAlloc(ptr, type, count, location) \ +( (type *) hypre_ReAlloc((char *)ptr, (size_t)(sizeof(type) * (count)), location) ) -typedef HYPRE_Int hypre_MPI_Op; -typedef HYPRE_Int hypre_MPI_Aint; -typedef HYPRE_Int hypre_MPI_Info; +#define hypre_TReAlloc_v2(ptr, old_type, old_count, new_type, new_count, location) \ +( (new_type *) hypre_ReAlloc_v2((char *)ptr, (size_t)(sizeof(old_type)*(old_count)), (size_t)(sizeof(new_type)*(new_count)), location) ) -#define hypre_MPI_COMM_SELF 1 -#define hypre_MPI_COMM_WORLD 0 -#define hypre_MPI_COMM_NULL -1 +#define hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ +(hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc)) -#define hypre_MPI_COMM_TYPE_SHARED 0 +#define _hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ +(_hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc)) -#define hypre_MPI_BOTTOM 0x0 +#define hypre_TFree(ptr, location) \ +( hypre_Free((void *)ptr, location), ptr = NULL ) -#define hypre_MPI_FLOAT 0 -#define hypre_MPI_DOUBLE 1 -#define hypre_MPI_LONG_DOUBLE 2 -#define hypre_MPI_INT 3 -#define hypre_MPI_CHAR 4 -#define hypre_MPI_LONG 5 -#define hypre_MPI_BYTE 6 -#define hypre_MPI_REAL 7 -#define hypre_MPI_COMPLEX 8 -#define hypre_MPI_LONG_LONG_INT 9 +#define _hypre_TFree(ptr, location) \ +( _hypre_Free((void *)ptr, location), ptr = NULL ) -#define hypre_MPI_SUM 0 -#define hypre_MPI_MIN 1 -#define hypre_MPI_MAX 2 -#define hypre_MPI_LOR 3 -#define hypre_MPI_LAND 4 -#define hypre_MPI_BOR 5 -#define hypre_MPI_SUCCESS 0 -#define hypre_MPI_STATUSES_IGNORE 0 +#endif /* #if !defined(HYPRE_USING_MEMORY_TRACKER) */ -#define hypre_MPI_UNDEFINED -9999 -#define hypre_MPI_REQUEST_NULL 0 -#define hypre_MPI_INFO_NULL 0 -#define hypre_MPI_ANY_SOURCE 1 -#define hypre_MPI_ANY_TAG 1 +#ifdef __cplusplus +} +#endif -#else +#endif /* hypre_MEMORY_HEADER */ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ /****************************************************************************** - * MPI stubs to do casting of HYPRE_Int and hypre_int correctly + * + * General structures and values + * *****************************************************************************/ -typedef MPI_Comm hypre_MPI_Comm; -typedef MPI_Group hypre_MPI_Group; -typedef MPI_Request hypre_MPI_Request; -typedef MPI_Datatype hypre_MPI_Datatype; -typedef MPI_Status hypre_MPI_Status; -typedef MPI_Op hypre_MPI_Op; -typedef MPI_Aint hypre_MPI_Aint; -typedef MPI_Info hypre_MPI_Info; -typedef MPI_User_function hypre_MPI_User_function; +#ifndef HYPRE_HANDLE_H +#define HYPRE_HANDLE_H -#define hypre_MPI_COMM_WORLD MPI_COMM_WORLD -#define hypre_MPI_COMM_NULL MPI_COMM_NULL -#define hypre_MPI_BOTTOM MPI_BOTTOM -#define hypre_MPI_COMM_SELF MPI_COMM_SELF -#define hypre_MPI_COMM_TYPE_SHARED MPI_COMM_TYPE_SHARED +#if defined(HYPRE_USING_UMPIRE) +#include "umpire/config.hpp" +#if UMPIRE_VERSION_MAJOR >= 2022 +#include "umpire/interface/c_fortran/umpire.h" +#define hypre_umpire_resourcemanager_make_allocator_pool umpire_resourcemanager_make_allocator_quick_pool +#else +#include "umpire/interface/umpire.h" +#define hypre_umpire_resourcemanager_make_allocator_pool umpire_resourcemanager_make_allocator_pool +#endif /* UMPIRE_VERSION_MAJOR >= 2022 */ +#define HYPRE_UMPIRE_POOL_NAME_MAX_LEN 1024 +#endif /* defined(HYPRE_USING_UMPIRE) */ -#define hypre_MPI_FLOAT MPI_FLOAT -#define hypre_MPI_DOUBLE MPI_DOUBLE -#define hypre_MPI_LONG_DOUBLE MPI_LONG_DOUBLE -/* HYPRE_MPI_INT is defined in HYPRE_utilities.h */ -#define hypre_MPI_INT HYPRE_MPI_INT -#define hypre_MPI_CHAR MPI_CHAR -#define hypre_MPI_LONG MPI_LONG -#define hypre_MPI_BYTE MPI_BYTE -/* HYPRE_MPI_REAL is defined in HYPRE_utilities.h */ -#define hypre_MPI_REAL HYPRE_MPI_REAL -/* HYPRE_MPI_COMPLEX is defined in HYPRE_utilities.h */ -#define hypre_MPI_COMPLEX HYPRE_MPI_COMPLEX +struct hypre_DeviceData; +typedef struct hypre_DeviceData hypre_DeviceData; +typedef void (*GPUMallocFunc)(void **, size_t); +typedef void (*GPUMfreeFunc)(void *); -#define hypre_MPI_SUM MPI_SUM -#define hypre_MPI_MIN MPI_MIN -#define hypre_MPI_MAX MPI_MAX -#define hypre_MPI_LOR MPI_LOR -#define hypre_MPI_BOR MPI_BOR -#define hypre_MPI_SUCCESS MPI_SUCCESS -#define hypre_MPI_STATUSES_IGNORE MPI_STATUSES_IGNORE +#define HYPRE_MAX_NUM_COMM_KEYS 8 -#define hypre_MPI_UNDEFINED MPI_UNDEFINED -#define hypre_MPI_REQUEST_NULL MPI_REQUEST_NULL -#define hypre_MPI_INFO_NULL MPI_INFO_NULL -#define hypre_MPI_ANY_SOURCE MPI_ANY_SOURCE -#define hypre_MPI_ANY_TAG MPI_ANY_TAG -#define hypre_MPI_SOURCE MPI_SOURCE -#define hypre_MPI_TAG MPI_TAG -#define hypre_MPI_LAND MPI_LAND +typedef struct +{ + HYPRE_Int log_level; + HYPRE_Int hypre_error; + HYPRE_MemoryLocation memory_location; + HYPRE_ExecutionPolicy default_exec_policy; +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_Int use_gpu_aware_mpi; #endif -/****************************************************************************** - * Everything below this applies to both ifdef cases above - *****************************************************************************/ + hypre_MemoryLocation mpi_copy_buffer_location; -/*-------------------------------------------------------------------------- - * Prototypes - *--------------------------------------------------------------------------*/ +#if defined(HYPRE_USING_GPU) + hypre_DeviceData *device_data; + HYPRE_Int device_gs_method; /* device G-S options */ +#endif -/* mpistubs.c */ -HYPRE_Int hypre_MPI_Init( hypre_int *argc, char ***argv ); -HYPRE_Int hypre_MPI_Finalize( void ); -HYPRE_Int hypre_MPI_Abort( hypre_MPI_Comm comm, HYPRE_Int errorcode ); -HYPRE_Real hypre_MPI_Wtime( void ); -HYPRE_Real hypre_MPI_Wtick( void ); -HYPRE_Int hypre_MPI_Barrier( hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Comm_create( hypre_MPI_Comm comm, hypre_MPI_Group group, - hypre_MPI_Comm *newcomm ); -HYPRE_Int hypre_MPI_Comm_dup( hypre_MPI_Comm comm, hypre_MPI_Comm *newcomm ); -hypre_MPI_Comm hypre_MPI_Comm_f2c( hypre_int comm ); -HYPRE_Int hypre_MPI_Comm_size( hypre_MPI_Comm comm, HYPRE_Int *size ); -HYPRE_Int hypre_MPI_Comm_rank( hypre_MPI_Comm comm, HYPRE_Int *rank ); -HYPRE_Int hypre_MPI_Comm_free( hypre_MPI_Comm *comm ); -HYPRE_Int hypre_MPI_Comm_group( hypre_MPI_Comm comm, hypre_MPI_Group *group ); -HYPRE_Int hypre_MPI_Comm_split( hypre_MPI_Comm comm, HYPRE_Int n, HYPRE_Int m, - hypre_MPI_Comm * comms ); -HYPRE_Int hypre_MPI_Group_incl( hypre_MPI_Group group, HYPRE_Int n, HYPRE_Int *ranks, - hypre_MPI_Group *newgroup ); -HYPRE_Int hypre_MPI_Group_free( hypre_MPI_Group *group ); -HYPRE_Int hypre_MPI_Address( void *location, hypre_MPI_Aint *address ); -HYPRE_Int hypre_MPI_Get_count( hypre_MPI_Status *status, hypre_MPI_Datatype datatype, - HYPRE_Int *count ); -HYPRE_Int hypre_MPI_Alltoall( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, - void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Allgather( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, - void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Allgatherv( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, - void *recvbuf, HYPRE_Int *recvcounts, HYPRE_Int *displs, hypre_MPI_Datatype recvtype, - hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Gather( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, - void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, HYPRE_Int root, - hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Gatherv( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, - void *recvbuf, HYPRE_Int *recvcounts, HYPRE_Int *displs, hypre_MPI_Datatype recvtype, - HYPRE_Int root, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Scatter( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, - void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, HYPRE_Int root, - hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Scatterv( void *sendbuf, HYPRE_Int *sendcounts, HYPRE_Int *displs, - hypre_MPI_Datatype sendtype, void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, - HYPRE_Int root, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Bcast( void *buffer, HYPRE_Int count, hypre_MPI_Datatype datatype, - HYPRE_Int root, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Send( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int dest, - HYPRE_Int tag, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Recv( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int source, - HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Status *status ); -HYPRE_Int hypre_MPI_Isend( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int dest, - HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); -HYPRE_Int hypre_MPI_Irecv( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, - HYPRE_Int source, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); -HYPRE_Int hypre_MPI_Send_init( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, - HYPRE_Int dest, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); -HYPRE_Int hypre_MPI_Recv_init( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, - HYPRE_Int dest, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); -HYPRE_Int hypre_MPI_Irsend( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int dest, - HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); -HYPRE_Int hypre_MPI_Startall( HYPRE_Int count, hypre_MPI_Request *array_of_requests ); -HYPRE_Int hypre_MPI_Probe( HYPRE_Int source, HYPRE_Int tag, hypre_MPI_Comm comm, - hypre_MPI_Status *status ); -HYPRE_Int hypre_MPI_Iprobe( HYPRE_Int source, HYPRE_Int tag, hypre_MPI_Comm comm, HYPRE_Int *flag, - hypre_MPI_Status *status ); -HYPRE_Int hypre_MPI_Test( hypre_MPI_Request *request, HYPRE_Int *flag, hypre_MPI_Status *status ); -HYPRE_Int hypre_MPI_Testall( HYPRE_Int count, hypre_MPI_Request *array_of_requests, HYPRE_Int *flag, - hypre_MPI_Status *array_of_statuses ); -HYPRE_Int hypre_MPI_Wait( hypre_MPI_Request *request, hypre_MPI_Status *status ); -HYPRE_Int hypre_MPI_Waitall( HYPRE_Int count, hypre_MPI_Request *array_of_requests, - hypre_MPI_Status *array_of_statuses ); -HYPRE_Int hypre_MPI_Waitany( HYPRE_Int count, hypre_MPI_Request *array_of_requests, - HYPRE_Int *index, hypre_MPI_Status *status ); -HYPRE_Int hypre_MPI_Allreduce( void *sendbuf, void *recvbuf, HYPRE_Int count, - hypre_MPI_Datatype datatype, hypre_MPI_Op op, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Reduce( void *sendbuf, void *recvbuf, HYPRE_Int count, - hypre_MPI_Datatype datatype, hypre_MPI_Op op, HYPRE_Int root, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Scan( void *sendbuf, void *recvbuf, HYPRE_Int count, - hypre_MPI_Datatype datatype, hypre_MPI_Op op, hypre_MPI_Comm comm ); -HYPRE_Int hypre_MPI_Request_free( hypre_MPI_Request *request ); -HYPRE_Int hypre_MPI_Type_contiguous( HYPRE_Int count, hypre_MPI_Datatype oldtype, - hypre_MPI_Datatype *newtype ); -HYPRE_Int hypre_MPI_Type_vector( HYPRE_Int count, HYPRE_Int blocklength, HYPRE_Int stride, - hypre_MPI_Datatype oldtype, hypre_MPI_Datatype *newtype ); -HYPRE_Int hypre_MPI_Type_hvector( HYPRE_Int count, HYPRE_Int blocklength, hypre_MPI_Aint stride, - hypre_MPI_Datatype oldtype, hypre_MPI_Datatype *newtype ); -HYPRE_Int hypre_MPI_Type_struct( HYPRE_Int count, HYPRE_Int *array_of_blocklengths, - hypre_MPI_Aint *array_of_displacements, hypre_MPI_Datatype *array_of_types, - hypre_MPI_Datatype *newtype ); -HYPRE_Int hypre_MPI_Type_commit( hypre_MPI_Datatype *datatype ); -HYPRE_Int hypre_MPI_Type_free( hypre_MPI_Datatype *datatype ); -HYPRE_Int hypre_MPI_Op_free( hypre_MPI_Op *op ); -HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, - hypre_MPI_Op *op ); -#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) -HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, - hypre_MPI_Info info, hypre_MPI_Comm *newcomm); -HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); -HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); + /* user malloc/free function pointers */ + GPUMallocFunc user_device_malloc; + GPUMfreeFunc user_device_free; + +#if defined(HYPRE_USING_UMPIRE) + char umpire_device_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; + char umpire_um_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; + char umpire_host_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; + char umpire_pinned_pool_name[HYPRE_UMPIRE_POOL_NAME_MAX_LEN]; + size_t umpire_device_pool_size; + size_t umpire_um_pool_size; + size_t umpire_host_pool_size; + size_t umpire_pinned_pool_size; + size_t umpire_block_size; + HYPRE_Int own_umpire_device_pool; + HYPRE_Int own_umpire_um_pool; + HYPRE_Int own_umpire_host_pool; + HYPRE_Int own_umpire_pinned_pool; + umpire_resourcemanager umpire_rm; +#endif + +#if defined(HYPRE_USING_MAGMA) + magma_queue_t magma_queue; #endif +} hypre_Handle; + +/* accessor macros to hypre_Handle */ +#define hypre_HandleLogLevel(hypre_handle) ((hypre_handle) -> log_level) +#define hypre_HandleMemoryLocation(hypre_handle) ((hypre_handle) -> memory_location) +#define hypre_HandleDefaultExecPolicy(hypre_handle) ((hypre_handle) -> default_exec_policy) + +#define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) +#define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) + +#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) +#define hypre_HandleMPICopyBufferLocation(hypre_handle) ((hypre_handle) -> mpi_copy_buffer_location) + +#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleCusparseHandle(hypre_handle) hypre_DeviceDataCusparseHandle(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleVendorSolverHandle(hypre_handle) hypre_DeviceDataVendorSolverHandle(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleComputeStream(hypre_handle) hypre_DeviceDataComputeStream(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleCubBinGrowth(hypre_handle) hypre_DeviceDataCubBinGrowth(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleCubMinBin(hypre_handle) hypre_DeviceDataCubMinBin(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleCubMaxBin(hypre_handle) hypre_DeviceDataCubMaxBin(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleCubMaxCachedBytes(hypre_handle) hypre_DeviceDataCubMaxCachedBytes(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleCubDevAllocator(hypre_handle) hypre_DeviceDataCubDevAllocator(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleCubUvmAllocator(hypre_handle) hypre_DeviceDataCubUvmAllocator(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleDevice(hypre_handle) hypre_DeviceDataDevice(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleDeviceMaxWorkGroupSize(hypre_handle) hypre_DeviceDataDeviceMaxWorkGroupSize(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleDeviceMaxShmemPerBlock(hypre_handle) hypre_DeviceDataDeviceMaxShmemPerBlock(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleDeviceMaxShmemPerBlockInited(hypre_handle) hypre_DeviceDataDeviceMaxShmemPerBlockInited(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleComputeStreamNum(hypre_handle) hypre_DeviceDataComputeStreamNum(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleReduceBuffer(hypre_handle) hypre_DeviceDataReduceBuffer(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmUseVendor(hypre_handle) hypre_DeviceDataSpgemmUseVendor(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpMVUseVendor(hypre_handle) hypre_DeviceDataSpMVUseVendor(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpTransUseVendor(hypre_handle) hypre_DeviceDataSpTransUseVendor(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmAlgorithm(hypre_handle) hypre_DeviceDataSpgemmAlgorithm(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmBinned(hypre_handle) hypre_DeviceDataSpgemmBinned(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmNumBin(hypre_handle) hypre_DeviceDataSpgemmNumBin(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmHighestBin(hypre_handle) hypre_DeviceDataSpgemmHighestBin(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmBlockNumDim(hypre_handle) hypre_DeviceDataSpgemmBlockNumDim(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmRownnzEstimateMethod(hypre_handle) hypre_DeviceDataSpgemmRownnzEstimateMethod(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmRownnzEstimateNsamples(hypre_handle) hypre_DeviceDataSpgemmRownnzEstimateNsamples(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleSpgemmRownnzEstimateMultFactor(hypre_handle) hypre_DeviceDataSpgemmRownnzEstimateMultFactor(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleDeviceAllocator(hypre_handle) hypre_DeviceDataDeviceAllocator(hypre_HandleDeviceData(hypre_handle)) +#define hypre_HandleUseGpuRand(hypre_handle) hypre_DeviceDataUseGpuRand(hypre_HandleDeviceData(hypre_handle)) + +#define hypre_HandleUserDeviceMalloc(hypre_handle) ((hypre_handle) -> user_device_malloc) +#define hypre_HandleUserDeviceMfree(hypre_handle) ((hypre_handle) -> user_device_free) + +#define hypre_HandleUmpireResourceMan(hypre_handle) ((hypre_handle) -> umpire_rm) +#define hypre_HandleUmpireDevicePoolSize(hypre_handle) ((hypre_handle) -> umpire_device_pool_size) +#define hypre_HandleUmpireUMPoolSize(hypre_handle) ((hypre_handle) -> umpire_um_pool_size) +#define hypre_HandleUmpireHostPoolSize(hypre_handle) ((hypre_handle) -> umpire_host_pool_size) +#define hypre_HandleUmpirePinnedPoolSize(hypre_handle) ((hypre_handle) -> umpire_pinned_pool_size) +#define hypre_HandleUmpireBlockSize(hypre_handle) ((hypre_handle) -> umpire_block_size) +#define hypre_HandleUmpireDevicePoolName(hypre_handle) ((hypre_handle) -> umpire_device_pool_name) +#define hypre_HandleUmpireUMPoolName(hypre_handle) ((hypre_handle) -> umpire_um_pool_name) +#define hypre_HandleUmpireHostPoolName(hypre_handle) ((hypre_handle) -> umpire_host_pool_name) +#define hypre_HandleUmpirePinnedPoolName(hypre_handle) ((hypre_handle) -> umpire_pinned_pool_name) +#define hypre_HandleOwnUmpireDevicePool(hypre_handle) ((hypre_handle) -> own_umpire_device_pool) +#define hypre_HandleOwnUmpireUMPool(hypre_handle) ((hypre_handle) -> own_umpire_um_pool) +#define hypre_HandleOwnUmpireHostPool(hypre_handle) ((hypre_handle) -> own_umpire_host_pool) +#define hypre_HandleOwnUmpirePinnedPool(hypre_handle) ((hypre_handle) -> own_umpire_pinned_pool) + +#define hypre_HandleMagmaQueue(hypre_handle) ((hypre_handle) -> magma_queue) + +#endif +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#ifndef hypre_MEMORY_TRACKER_HEADER +#define hypre_MEMORY_TRACKER_HEADER + +#if defined(HYPRE_USING_MEMORY_TRACKER) + +extern size_t hypre_total_bytes[hypre_MEMORY_UNIFIED + 1]; +extern size_t hypre_peak_bytes[hypre_MEMORY_UNIFIED + 1]; +extern size_t hypre_current_bytes[hypre_MEMORY_UNIFIED + 1]; +extern HYPRE_Int hypre_memory_tracker_print; +extern char hypre_memory_tracker_filename[HYPRE_MAX_FILE_NAME_LEN]; + +typedef enum _hypre_MemoryTrackerEvent +{ + HYPRE_MEMORY_EVENT_ALLOC = 0, + HYPRE_MEMORY_EVENT_FREE, + HYPRE_MEMORY_EVENT_COPY, + HYPRE_MEMORY_NUM_EVENTS, +} hypre_MemoryTrackerEvent; + +typedef enum _hypre_MemcpyType +{ + hypre_MEMCPY_H2H = 0, + hypre_MEMCPY_D2H, + hypre_MEMCPY_H2D, + hypre_MEMCPY_D2D, + hypre_MEMCPY_NUM_TYPES, +} hypre_MemcpyType; + +typedef struct +{ + size_t index; + size_t time_step; + char action[16]; + void *ptr; + void *ptr2; + size_t nbytes; + hypre_MemoryLocation memory_location; + hypre_MemoryLocation memory_location2; + char filename[HYPRE_MAX_FILE_NAME_LEN]; + char function[256]; + HYPRE_Int line; + size_t pair; +} hypre_MemoryTrackerEntry; + +typedef struct +{ + size_t head; + size_t actual_size; + size_t alloced_size; + hypre_MemoryTrackerEntry *data; + /* Free Queue is sorted based on (ptr, time_step) ascendingly */ + hypre_MemoryTrackerEntry *sorted_data; + /* compressed sorted_data with the same ptr */ + size_t sorted_data_compressed_len; + size_t *sorted_data_compressed_offset; + hypre_MemoryTrackerEntry **sorted_data_compressed; +} hypre_MemoryTrackerQueue; + +typedef struct +{ + size_t curr_time_step; + hypre_MemoryTrackerQueue queue[HYPRE_MEMORY_NUM_EVENTS]; +} hypre_MemoryTracker; + +extern hypre_MemoryTracker *_hypre_memory_tracker; + +#define hypre_TAlloc(type, count, location) \ +( \ +{ \ + void *ptr = hypre_MAlloc((size_t)(sizeof(type) * (count)), location); \ + \ + hypre_MemoryLocation alocation = hypre_GetActualMemLocation(location); \ + hypre_MemoryTrackerInsert1("malloc", ptr, sizeof(type)*(count), alocation, \ + __FILE__, __func__, __LINE__); \ + (type *) ptr; \ +} \ +) + +#define hypre_CTAlloc(type, count, location) \ +( \ +{ \ + void *ptr = hypre_CAlloc((size_t)(count), (size_t)sizeof(type), location); \ + \ + hypre_MemoryLocation alocation = hypre_GetActualMemLocation(location); \ + hypre_MemoryTrackerInsert1("calloc", ptr, sizeof(type)*(count), alocation, \ + __FILE__, __func__, __LINE__); \ + (type *) ptr; \ +} \ +) + +#define hypre_TReAlloc(ptr, type, count, location) \ +( \ +{ \ + void *new_ptr = hypre_ReAlloc((char *)ptr, (size_t)(sizeof(type) * (count)), location); \ + \ + hypre_MemoryLocation alocation = hypre_GetActualMemLocation(location); \ + hypre_MemoryTrackerInsert1("rfree", ptr, (size_t) -1, alocation, \ + __FILE__, __func__, __LINE__); \ + hypre_MemoryTrackerInsert1("rmalloc", new_ptr, sizeof(type)*(count), alocation, \ + __FILE__, __func__, __LINE__); \ + (type *) new_ptr; \ +} \ +) + +#define hypre_TReAlloc_v2(ptr, old_type, old_count, new_type, new_count, location) \ +( \ +{ \ + void *new_ptr = hypre_ReAlloc_v2((char *)ptr, (size_t)(sizeof(old_type)*(old_count)), \ + (size_t)(sizeof(new_type)*(new_count)), location); \ + \ + hypre_MemoryLocation alocation = hypre_GetActualMemLocation(location); \ + hypre_MemoryTrackerInsert1("rfree", ptr, sizeof(old_type)*(old_count), alocation, \ + __FILE__, __func__, __LINE__); \ + hypre_MemoryTrackerInsert1("rmalloc", new_ptr, sizeof(new_type)*(new_count), alocation, \ + __FILE__, __func__, __LINE__); \ + (new_type *) new_ptr; \ +} \ +) -#ifdef __cplusplus -} -#endif +#define hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ +( \ +{ \ + hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc); \ + \ + hypre_MemoryLocation alocation_dst = hypre_GetActualMemLocation(locdst); \ + hypre_MemoryLocation alocation_src = hypre_GetActualMemLocation(locsrc); \ + hypre_MemoryTrackerInsert2("memcpy", (void *) (dst), (void *) (src), sizeof(type)*(count), \ + alocation_dst, alocation_src, \ + __FILE__, __func__, __LINE__); \ +} \ +) -#endif -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ +#define hypre_TFree(ptr, location) \ +( \ +{ \ + hypre_Free((void *)ptr, location); \ + \ + hypre_MemoryLocation alocation = hypre_GetActualMemLocation(location); \ + hypre_MemoryTrackerInsert1("free", ptr, (size_t) -1, alocation, \ + __FILE__, __func__, __LINE__); \ + ptr = NULL; \ +} \ +) -#ifndef HYPRE_SMP_HEADER -#define HYPRE_SMP_HEADER -#endif +#define _hypre_TAlloc(type, count, location) \ +( \ +{ \ + void *ptr = _hypre_MAlloc((size_t)(sizeof(type) * (count)), location); \ + \ + hypre_MemoryTrackerInsert1("malloc", ptr, sizeof(type)*(count), location, \ + __FILE__, __func__, __LINE__); \ + (type *) ptr; \ +} \ +) -#define HYPRE_SMP_SCHEDULE schedule(static) +#define _hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ +( \ +{ \ + _hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc); \ + \ + hypre_MemoryTrackerInsert2("memcpy", (void *) (dst), (void *) (src), sizeof(type)*(count), \ + location_dst, location_src, \ + __FILE__, __func__, __LINE__); \ +} \ +) + +#define _hypre_TFree(ptr, location) \ +( \ +{ \ + _hypre_Free((void *)ptr, location); \ + \ + hypre_MemoryTrackerInsert1("free", ptr, (size_t) -1, location, \ + __FILE__, __func__, __LINE__); \ + ptr = NULL; \ +} \ +) + +#endif /* #if defined(HYPRE_USING_MEMORY_TRACKER) */ +#endif /* #ifndef hypre_MEMORY_TRACKER_HEADER */ /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other @@ -1092,391 +1080,476 @@ HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); /****************************************************************************** * - * Header file for memory management utilities - * - * The abstract memory model has a Host (think CPU) and a Device (think GPU) and - * three basic types of memory management utilities: - * - * 1. Malloc(..., location) - * location=LOCATION_DEVICE - malloc memory on the device - * location=LOCATION_HOST - malloc memory on the host - * 2. MemCopy(..., method) - * method=HOST_TO_DEVICE - copy from host to device - * method=DEVICE_TO_HOST - copy from device to host - * method=DEVICE_TO_DEVICE - copy from device to device - * 3. SetExecutionMode - * location=LOCATION_DEVICE - execute on the device - * location=LOCATION_HOST - execute on the host - * - * Although the abstract model does not explicitly reflect a managed memory - * model (i.e., unified memory), it can support it. Here is a summary of how - * the abstract model would be mapped to specific hardware scenarios: - * - * Not using a device, not using managed memory - * Malloc(..., location) - * location=LOCATION_DEVICE - host malloc e.g., malloc - * location=LOCATION_HOST - host malloc e.g., malloc - * MemoryCopy(..., locTo,locFrom) - * locTo=LOCATION_HOST, locFrom=LOCATION_DEVICE - copy from host to host e.g., memcpy - * locTo=LOCATION_DEVICE, locFrom=LOCATION_HOST - copy from host to host e.g., memcpy - * locTo=LOCATION_DEVICE, locFrom=LOCATION_DEVICE - copy from host to host e.g., memcpy - * SetExecutionMode - * location=LOCATION_DEVICE - execute on the host - * location=LOCATION_HOST - execute on the host - * - * Using a device, not using managed memory - * Malloc(..., location) - * location=LOCATION_DEVICE - device malloc e.g., cudaMalloc - * location=LOCATION_HOST - host malloc e.g., malloc - * MemoryCopy(..., locTo,locFrom) - * locTo=LOCATION_HOST, locFrom=LOCATION_DEVICE - copy from device to host e.g., cudaMemcpy - * locTo=LOCATION_DEVICE, locFrom=LOCATION_HOST - copy from host to device e.g., cudaMemcpy - * locTo=LOCATION_DEVICE, locFrom=LOCATION_DEVICE - copy from device to device e.g., cudaMemcpy - * SetExecutionMode - * location=LOCATION_DEVICE - execute on the device - * location=LOCATION_HOST - execute on the host - * - * Using a device, using managed memory - * Malloc(..., location) - * location=LOCATION_DEVICE - managed malloc e.g., cudaMallocManaged - * location=LOCATION_HOST - host malloc e.g., malloc - * MemoryCopy(..., locTo,locFrom) - * locTo=LOCATION_HOST, locFrom=LOCATION_DEVICE - copy from device to host e.g., cudaMallocManaged - * locTo=LOCATION_DEVICE, locFrom=LOCATION_HOST - copy from host to device e.g., cudaMallocManaged - * locTo=LOCATION_DEVICE, locFrom=LOCATION_DEVICE - copy from device to device e.g., cudaMallocManaged - * SetExecutionMode - * location=LOCATION_DEVICE - execute on the device - * location=LOCATION_HOST - execute on the host + * Fake mpi stubs to generate serial codes without mpi * *****************************************************************************/ -#ifndef hypre_MEMORY_HEADER -#define hypre_MEMORY_HEADER - -#include -#include - -#if defined(HYPRE_USING_UNIFIED_MEMORY) && defined(HYPRE_USING_DEVICE_OPENMP) -//#pragma omp requires unified_shared_memory -#endif - -/* stringification: - * _Pragma(string-literal), so we need to cast argument to a string - * The three dots as last argument of the macro tells compiler that this is a variadic macro. - * I.e. this is a macro that receives variable number of arguments. - */ -#define HYPRE_STR(...) #__VA_ARGS__ -#define HYPRE_XSTR(...) HYPRE_STR(__VA_ARGS__) +#ifndef hypre_MPISTUBS +#define hypre_MPISTUBS #ifdef __cplusplus extern "C" { #endif -typedef enum _hypre_MemoryLocation -{ - hypre_MEMORY_UNDEFINED = -1, - hypre_MEMORY_HOST, - hypre_MEMORY_HOST_PINNED, - hypre_MEMORY_DEVICE, - hypre_MEMORY_UNIFIED, - hypre_NUM_MEMORY_LOCATION -} hypre_MemoryLocation; - -/*------------------------------------------------------- - * hypre_GetActualMemLocation - * return actual location based on the selected memory model - *-------------------------------------------------------*/ -static inline HYPRE_MAYBE_UNUSED_FUNC hypre_MemoryLocation -hypre_GetActualMemLocation(HYPRE_MemoryLocation location) -{ - if (location == HYPRE_MEMORY_HOST) - { - return hypre_MEMORY_HOST; - } - - if (location == HYPRE_MEMORY_DEVICE) - { -#if defined(HYPRE_USING_HOST_MEMORY) - return hypre_MEMORY_HOST; -#elif defined(HYPRE_USING_DEVICE_MEMORY) - return hypre_MEMORY_DEVICE; -#elif defined(HYPRE_USING_UNIFIED_MEMORY) - return hypre_MEMORY_UNIFIED; -#else -#error Wrong HYPRE memory setting. -#endif - } - - return hypre_MEMORY_UNDEFINED; -} - - -#if !defined(HYPRE_USING_MEMORY_TRACKER) +#if defined(HYPRE_SEQUENTIAL) -#define hypre_TAlloc(type, count, location) \ -( (type *) hypre_MAlloc((size_t)(sizeof(type) * (count)), location) ) +/****************************************************************************** + * MPI stubs to generate serial codes without mpi + *****************************************************************************/ -#define _hypre_TAlloc(type, count, location) \ -( (type *) _hypre_MAlloc((size_t)(sizeof(type) * (count)), location) ) +/*-------------------------------------------------------------------------- + * Change all MPI names to hypre_MPI names to avoid link conflicts. + * + * NOTE: MPI_Comm is the only MPI symbol in the HYPRE user interface, + * and is defined in `HYPRE_utilities.h'. + *--------------------------------------------------------------------------*/ -#define hypre_CTAlloc(type, count, location) \ -( (type *) hypre_CAlloc((size_t)(count), (size_t)sizeof(type), location) ) +#define MPI_Comm hypre_MPI_Comm +#define MPI_Group hypre_MPI_Group +#define MPI_Request hypre_MPI_Request +#define MPI_Datatype hypre_MPI_Datatype +#define MPI_Status hypre_MPI_Status +#define MPI_Op hypre_MPI_Op +#define MPI_Aint hypre_MPI_Aint +#define MPI_Info hypre_MPI_Info -#define hypre_TReAlloc(ptr, type, count, location) \ -( (type *) hypre_ReAlloc((char *)ptr, (size_t)(sizeof(type) * (count)), location) ) +#define MPI_COMM_WORLD hypre_MPI_COMM_WORLD +#define MPI_COMM_NULL hypre_MPI_COMM_NULL +#define MPI_COMM_SELF hypre_MPI_COMM_SELF +#define MPI_COMM_TYPE_SHARED hypre_MPI_COMM_TYPE_SHARED -#define hypre_TReAlloc_v2(ptr, old_type, old_count, new_type, new_count, location) \ -( (new_type *) hypre_ReAlloc_v2((char *)ptr, (size_t)(sizeof(old_type)*(old_count)), (size_t)(sizeof(new_type)*(new_count)), location) ) +#define MPI_BOTTOM hypre_MPI_BOTTOM -#define hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ -(hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc)) +#define MPI_FLOAT hypre_MPI_FLOAT +#define MPI_DOUBLE hypre_MPI_DOUBLE +#define MPI_LONG_DOUBLE hypre_MPI_LONG_DOUBLE +#define MPI_INT hypre_MPI_INT +#define MPI_LONG_LONG_INT hypre_MPI_LONG_LONG_INT +#define MPI_CHAR hypre_MPI_CHAR +#define MPI_LONG hypre_MPI_LONG +#define MPI_BYTE hypre_MPI_BYTE -#define hypre_TFree(ptr, location) \ -( hypre_Free((void *)ptr, location), ptr = NULL ) +#define MPI_C_FLOAT_COMPLEX hypre_MPI_COMPLEX +#define MPI_C_LONG_DOUBLE_COMPLEX hypre_MPI_COMPLEX +#define MPI_C_DOUBLE_COMPLEX hypre_MPI_COMPLEX -#define _hypre_TFree(ptr, location) \ -( _hypre_Free((void *)ptr, location), ptr = NULL ) +#define MPI_SUM hypre_MPI_SUM +#define MPI_MIN hypre_MPI_MIN +#define MPI_MAX hypre_MPI_MAX +#define MPI_LOR hypre_MPI_LOR +#define MPI_LAND hypre_MPI_LAND +#define MPI_BOR hypre_MPI_BOR +#define MPI_SUCCESS hypre_MPI_SUCCESS +#define MPI_STATUSES_IGNORE hypre_MPI_STATUSES_IGNORE +#define MPI_STATUS_IGNORE hypre_MPI_STATUS_IGNORE -#endif /* #if !defined(HYPRE_USING_MEMORY_TRACKER) */ +#define MPI_UNDEFINED hypre_MPI_UNDEFINED +#define MPI_REQUEST_NULL hypre_MPI_REQUEST_NULL +#define MPI_INFO_NULL hypre_MPI_INFO_NULL +#define MPI_ANY_SOURCE hypre_MPI_ANY_SOURCE +#define MPI_ANY_TAG hypre_MPI_ANY_TAG +#define MPI_SOURCE hypre_MPI_SOURCE +#define MPI_TAG hypre_MPI_TAG +#define MPI_Init hypre_MPI_Init +#define MPI_Finalize hypre_MPI_Finalize +#define MPI_Abort hypre_MPI_Abort +#define MPI_Wtime hypre_MPI_Wtime +#define MPI_Wtick hypre_MPI_Wtick +#define MPI_Barrier hypre_MPI_Barrier +#define MPI_Comm_create hypre_MPI_Comm_create +#define MPI_Comm_dup hypre_MPI_Comm_dup +#define MPI_Comm_f2c hypre_MPI_Comm_f2c +#define MPI_Comm_group hypre_MPI_Comm_group +#define MPI_Comm_size hypre_MPI_Comm_size +#define MPI_Comm_rank hypre_MPI_Comm_rank +#define MPI_Comm_free hypre_MPI_Comm_free +#define MPI_Comm_split hypre_MPI_Comm_split +#define MPI_Comm_split_type hypre_MPI_Comm_split_type +#define MPI_Group_incl hypre_MPI_Group_incl +#define MPI_Group_free hypre_MPI_Group_free +#define MPI_Address hypre_MPI_Address +#define MPI_Get_count hypre_MPI_Get_count +#define MPI_Alltoall hypre_MPI_Alltoall +#define MPI_Allgather hypre_MPI_Allgather +#define MPI_Allgatherv hypre_MPI_Allgatherv +#define MPI_Gather hypre_MPI_Gather +#define MPI_Gatherv hypre_MPI_Gatherv +#define MPI_Scatter hypre_MPI_Scatter +#define MPI_Scatterv hypre_MPI_Scatterv +#define MPI_Bcast hypre_MPI_Bcast +#define MPI_Send hypre_MPI_Send +#define MPI_Recv hypre_MPI_Recv +#define MPI_Isend hypre_MPI_Isend +#define MPI_Irecv hypre_MPI_Irecv +#define MPI_Send_init hypre_MPI_Send_init +#define MPI_Recv_init hypre_MPI_Recv_init +#define MPI_Irsend hypre_MPI_Irsend +#define MPI_Startall hypre_MPI_Startall +#define MPI_Probe hypre_MPI_Probe +#define MPI_Iprobe hypre_MPI_Iprobe +#define MPI_Test hypre_MPI_Test +#define MPI_Testall hypre_MPI_Testall +#define MPI_Wait hypre_MPI_Wait +#define MPI_Waitall hypre_MPI_Waitall +#define MPI_Waitany hypre_MPI_Waitany +#define MPI_Allreduce hypre_MPI_Allreduce +#define MPI_Reduce hypre_MPI_Reduce +#define MPI_Scan hypre_MPI_Scan +#define MPI_Request_free hypre_MPI_Request_free +#define MPI_Type_contiguous hypre_MPI_Type_contiguous +#define MPI_Type_vector hypre_MPI_Type_vector +#define MPI_Type_hvector hypre_MPI_Type_hvector +#define MPI_Type_struct hypre_MPI_Type_struct +#define MPI_Type_commit hypre_MPI_Type_commit +#define MPI_Type_free hypre_MPI_Type_free +#define MPI_Op_free hypre_MPI_Op_free +#define MPI_Op_create hypre_MPI_Op_create +#define MPI_User_function hypre_MPI_User_function +#define MPI_Info_create hypre_MPI_Info_create +#define MPI_Comm_set_attr hypre_MPI_Comm_set_attr +#define MPI_Comm_copy_attr_function hypre_MPI_Comm_copy_attr_function +#define MPI_Comm_delete_attr_function hypre_MPI_Comm_delete_attr_function +#define MPI_Grequest_query_function hypre_MPI_Grequest_query_function; +#define MPI_Grequest_free_function hypre_MPI_Grequest_free_function; +#define MPI_Grequest_cancel_function hypre_MPI_Grequest_cancel_function; /*-------------------------------------------------------------------------- - * Prototypes + * Types, etc. *--------------------------------------------------------------------------*/ -/* memory.c */ -HYPRE_Int hypre_GetMemoryLocationName(hypre_MemoryLocation memory_location, - char *memory_location_name); -void hypre_CheckMemoryLocation(void *ptr, hypre_MemoryLocation location); -void * hypre_Memset(void *ptr, HYPRE_Int value, size_t num, HYPRE_MemoryLocation location); -void hypre_MemPrefetch(void *ptr, size_t size, HYPRE_MemoryLocation location); -void * hypre_MAlloc(size_t size, HYPRE_MemoryLocation location); -void * hypre_CAlloc( size_t count, size_t elt_size, HYPRE_MemoryLocation location); -void hypre_Free(void *ptr, HYPRE_MemoryLocation location); -void hypre_Memcpy(void *dst, void *src, size_t size, HYPRE_MemoryLocation loc_dst, - HYPRE_MemoryLocation loc_src); -void * hypre_ReAlloc(void *ptr, size_t size, HYPRE_MemoryLocation location); -void * hypre_ReAlloc_v2(void *ptr, size_t old_size, size_t new_size, HYPRE_MemoryLocation location); +/* These types have associated creation and destruction routines */ +typedef HYPRE_Int hypre_MPI_Comm; +typedef HYPRE_Int hypre_MPI_Group; +typedef HYPRE_Int hypre_MPI_Request; +typedef HYPRE_Int hypre_MPI_Datatype; +typedef void (hypre_MPI_User_function) (void); +typedef void (hypre_MPI_Comm_copy_attr_function) (void); +typedef void (hypre_MPI_Comm_delete_attr_function) (void); +typedef void (hypre_MPI_Grequest_query_function) (void); +typedef void (hypre_MPI_Grequest_free_function) (void); +typedef void (hypre_MPI_Grequest_cancel_function) (void); -void * _hypre_MAlloc(size_t size, hypre_MemoryLocation location); -void _hypre_Free(void *ptr, hypre_MemoryLocation location); +typedef struct +{ + HYPRE_Int hypre_MPI_SOURCE; + HYPRE_Int hypre_MPI_TAG; +} hypre_MPI_Status; -HYPRE_ExecutionPolicy hypre_GetExecPolicy1(HYPRE_MemoryLocation location); -HYPRE_ExecutionPolicy hypre_GetExecPolicy2(HYPRE_MemoryLocation location1, - HYPRE_MemoryLocation location2); +typedef HYPRE_Int hypre_MPI_Op; +typedef intptr_t hypre_MPI_Aint; +typedef HYPRE_Int hypre_MPI_Info; -HYPRE_Int hypre_GetPointerLocation(const void *ptr, hypre_MemoryLocation *memory_location); -HYPRE_Int hypre_SetCubMemPoolSize( hypre_uint bin_growth, hypre_uint min_bin, hypre_uint max_bin, - size_t max_cached_bytes ); -HYPRE_Int hypre_umpire_host_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_host_pooled_free(void *ptr); -void *hypre_umpire_host_pooled_realloc(void *ptr, size_t size); -HYPRE_Int hypre_umpire_device_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_device_pooled_free(void *ptr); -HYPRE_Int hypre_umpire_um_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_um_pooled_free(void *ptr); -HYPRE_Int hypre_umpire_pinned_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_pinned_pooled_free(void *ptr); -HYPRE_Int hypre_UmpireInit(hypre_Handle *hypre_handle_); -HYPRE_Int hypre_UmpireFinalize(hypre_Handle *hypre_handle_); -HYPRE_Int hypre_UmpireGetCurrentMemoryUsage(MPI_Comm comm, HYPRE_Real *current); -HYPRE_Int hypre_UmpireMemoryGetUsage(HYPRE_Real *memory); -HYPRE_Int hypre_HostMemoryGetUsage(HYPRE_Real *mem); -HYPRE_Int hypre_MemoryPrintUsage(MPI_Comm comm, HYPRE_Int level, - const char *function, HYPRE_Int line); -#define HYPRE_PRINT_MEMORY_USAGE(comm) hypre_MemoryPrintUsage(comm,\ - hypre_HandleLogLevel(hypre_handle()),\ - __func__,\ - __LINE__) -/* memory_dmalloc.c */ -HYPRE_Int hypre_InitMemoryDebugDML( HYPRE_Int id ); -HYPRE_Int hypre_FinalizeMemoryDebugDML( void ); -char *hypre_MAllocDML( HYPRE_Int size, char *file, HYPRE_Int line ); -char *hypre_CAllocDML( HYPRE_Int count, HYPRE_Int elt_size, char *file, HYPRE_Int line ); -char *hypre_ReAllocDML( char *ptr, HYPRE_Int size, char *file, HYPRE_Int line ); -void hypre_FreeDML( char *ptr, char *file, HYPRE_Int line ); +#define hypre_MPI_COMM_SELF 1 +#define hypre_MPI_COMM_WORLD 0 +#define hypre_MPI_COMM_NULL -1 -#ifdef __cplusplus -} -#endif +#define hypre_MPI_COMM_TYPE_SHARED 0 -#endif /* hypre_MEMORY_HEADER */ -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ +#define hypre_MPI_BOTTOM 0x0 -#ifndef hypre_MEMORY_TRACKER_HEADER -#define hypre_MEMORY_TRACKER_HEADER +#define hypre_MPI_FLOAT 0 +#define hypre_MPI_DOUBLE 1 +#define hypre_MPI_LONG_DOUBLE 2 +#define hypre_MPI_INT 3 +#define hypre_MPI_CHAR 4 +#define hypre_MPI_LONG 5 +#define hypre_MPI_BYTE 6 +#define hypre_MPI_REAL 7 +#define hypre_MPI_COMPLEX 8 +#define hypre_MPI_LONG_LONG_INT 9 -#if defined(HYPRE_USING_MEMORY_TRACKER) +#define hypre_MPI_SUM 0 +#define hypre_MPI_MIN 1 +#define hypre_MPI_MAX 2 +#define hypre_MPI_LOR 3 +#define hypre_MPI_LAND 4 +#define hypre_MPI_BOR 5 +#define hypre_MPI_SUCCESS 0 +#define hypre_MPI_STATUSES_IGNORE 0 +#define hypre_MPI_STATUS_IGNORE 0 -extern size_t hypre_total_bytes[hypre_MEMORY_UNIFIED + 1]; -extern size_t hypre_peak_bytes[hypre_MEMORY_UNIFIED + 1]; -extern size_t hypre_current_bytes[hypre_MEMORY_UNIFIED + 1]; -extern HYPRE_Int hypre_memory_tracker_print; -extern char hypre_memory_tracker_filename[HYPRE_MAX_FILE_NAME_LEN]; +#define hypre_MPI_UNDEFINED -9999 +#define hypre_MPI_REQUEST_NULL 0 +#define hypre_MPI_INFO_NULL 0 +#define hypre_MPI_ANY_SOURCE 1 +#define hypre_MPI_ANY_TAG 1 -typedef enum _hypre_MemoryTrackerEvent -{ - HYPRE_MEMORY_EVENT_ALLOC = 0, - HYPRE_MEMORY_EVENT_FREE, - HYPRE_MEMORY_EVENT_COPY, - HYPRE_MEMORY_NUM_EVENTS, -} hypre_MemoryTrackerEvent; +#define hypre_MPI_COMM_NULL_COPY_FN NULL +#define hypre_MPI_COMM_NULL_DELETE_FN NULL -typedef enum _hypre_MemcpyType -{ - hypre_MEMCPY_H2H = 0, - hypre_MEMCPY_D2H, - hypre_MEMCPY_H2D, - hypre_MEMCPY_D2D, - hypre_MEMCPY_NUM_TYPES, -} hypre_MemcpyType; +#else -typedef struct -{ - size_t index; - size_t time_step; - char action[16]; - void *ptr; - void *ptr2; - size_t nbytes; - hypre_MemoryLocation memory_location; - hypre_MemoryLocation memory_location2; - char filename[HYPRE_MAX_FILE_NAME_LEN]; - char function[256]; - HYPRE_Int line; - size_t pair; -} hypre_MemoryTrackerEntry; +/****************************************************************************** + * MPI stubs to do casting of HYPRE_Int and hypre_int correctly + *****************************************************************************/ -typedef struct -{ - size_t head; - size_t actual_size; - size_t alloced_size; - hypre_MemoryTrackerEntry *data; - /* Free Queue is sorted based on (ptr, time_step) ascendingly */ - hypre_MemoryTrackerEntry *sorted_data; - /* compressed sorted_data with the same ptr */ - size_t sorted_data_compressed_len; - size_t *sorted_data_compressed_offset; - hypre_MemoryTrackerEntry **sorted_data_compressed; -} hypre_MemoryTrackerQueue; +typedef MPI_Comm hypre_MPI_Comm; +typedef MPI_Group hypre_MPI_Group; +typedef MPI_Request hypre_MPI_Request; +typedef MPI_Datatype hypre_MPI_Datatype; +typedef MPI_Status hypre_MPI_Status; +typedef MPI_Op hypre_MPI_Op; +typedef MPI_Aint hypre_MPI_Aint; +typedef MPI_Info hypre_MPI_Info; +typedef MPI_User_function hypre_MPI_User_function; +typedef MPI_Comm_copy_attr_function hypre_MPI_Comm_copy_attr_function; +typedef MPI_Comm_delete_attr_function hypre_MPI_Comm_delete_attr_function; +typedef MPI_Grequest_query_function hypre_MPI_Grequest_query_function; +typedef MPI_Grequest_free_function hypre_MPI_Grequest_free_function; +typedef MPI_Grequest_cancel_function hypre_MPI_Grequest_cancel_function; -typedef struct -{ - size_t curr_time_step; - hypre_MemoryTrackerQueue queue[HYPRE_MEMORY_NUM_EVENTS]; -} hypre_MemoryTracker; +#define hypre_MPI_COMM_WORLD MPI_COMM_WORLD +#define hypre_MPI_COMM_NULL MPI_COMM_NULL +#define hypre_MPI_BOTTOM MPI_BOTTOM +#define hypre_MPI_COMM_SELF MPI_COMM_SELF +#define hypre_MPI_COMM_TYPE_SHARED MPI_COMM_TYPE_SHARED -extern hypre_MemoryTracker *_hypre_memory_tracker; +#define hypre_MPI_FLOAT MPI_FLOAT +#define hypre_MPI_DOUBLE MPI_DOUBLE +#define hypre_MPI_LONG_DOUBLE MPI_LONG_DOUBLE +/* HYPRE_MPI_INT is defined in HYPRE_utilities.h */ +#define hypre_MPI_INT HYPRE_MPI_INT +#define hypre_MPI_CHAR MPI_CHAR +#define hypre_MPI_LONG MPI_LONG +#define hypre_MPI_BYTE MPI_BYTE +/* HYPRE_MPI_REAL is defined in HYPRE_utilities.h */ +#define hypre_MPI_REAL HYPRE_MPI_REAL +/* HYPRE_MPI_COMPLEX is defined in HYPRE_utilities.h */ +#define hypre_MPI_COMPLEX HYPRE_MPI_COMPLEX -#define hypre_TAlloc(type, count, location) \ -( \ -{ \ - void *ptr = hypre_MAlloc((size_t)(sizeof(type) * (count)), location); \ - \ - hypre_MemoryLocation alocation = hypre_GetActualMemLocation(location); \ - hypre_MemoryTrackerInsert1("malloc", ptr, sizeof(type)*(count), alocation, \ - __FILE__, __func__, __LINE__); \ - (type *) ptr; \ -} \ -) +#define hypre_MPI_SUM MPI_SUM +#define hypre_MPI_MIN MPI_MIN +#define hypre_MPI_MAX MPI_MAX +#define hypre_MPI_LOR MPI_LOR +#define hypre_MPI_BOR MPI_BOR +#define hypre_MPI_SUCCESS MPI_SUCCESS +#define hypre_MPI_STATUSES_IGNORE MPI_STATUSES_IGNORE +#define hypre_MPI_STATUS_IGNORE MPI_STATUS_IGNORE + +#define hypre_MPI_UNDEFINED MPI_UNDEFINED +#define hypre_MPI_REQUEST_NULL MPI_REQUEST_NULL +#define hypre_MPI_INFO_NULL MPI_INFO_NULL +#define hypre_MPI_ANY_SOURCE MPI_ANY_SOURCE +#define hypre_MPI_ANY_TAG MPI_ANY_TAG +#define hypre_MPI_SOURCE MPI_SOURCE +#define hypre_MPI_TAG MPI_TAG +#define hypre_MPI_LAND MPI_LAND -#define hypre_CTAlloc(type, count, location) \ -( \ -{ \ - void *ptr = hypre_CAlloc((size_t)(count), (size_t)sizeof(type), location); \ - \ - hypre_MemoryLocation alocation = hypre_GetActualMemLocation(location); \ - hypre_MemoryTrackerInsert1("calloc", ptr, sizeof(type)*(count), alocation, \ - __FILE__, __func__, __LINE__); \ - (type *) ptr; \ -} \ -) +#define hypre_MPI_COMM_NULL_COPY_FN MPI_COMM_NULL_COPY_FN +#define hypre_MPI_COMM_NULL_DELETE_FN MPI_COMM_NULL_DELETE_FN -#define hypre_TReAlloc(ptr, type, count, location) \ -( \ -{ \ - void *new_ptr = hypre_ReAlloc((char *)ptr, (size_t)(sizeof(type) * (count)), location); \ - \ - hypre_MemoryLocation alocation = hypre_GetActualMemLocation(location); \ - hypre_MemoryTrackerInsert1("rfree", ptr, (size_t) -1, alocation, \ - __FILE__, __func__, __LINE__); \ - hypre_MemoryTrackerInsert1("rmalloc", new_ptr, sizeof(type)*(count), alocation, \ - __FILE__, __func__, __LINE__); \ - (type *) new_ptr; \ -} \ -) +#endif -#define hypre_TReAlloc_v2(ptr, old_type, old_count, new_type, new_count, location) \ -( \ -{ \ - void *new_ptr = hypre_ReAlloc_v2((char *)ptr, (size_t)(sizeof(old_type)*(old_count)), \ - (size_t)(sizeof(new_type)*(new_count)), location); \ - \ - hypre_MemoryLocation alocation = hypre_GetActualMemLocation(location); \ - hypre_MemoryTrackerInsert1("rfree", ptr, sizeof(old_type)*(old_count), alocation, \ - __FILE__, __func__, __LINE__); \ - hypre_MemoryTrackerInsert1("rmalloc", new_ptr, sizeof(new_type)*(new_count), alocation, \ - __FILE__, __func__, __LINE__); \ - (new_type *) new_ptr; \ -} \ -) +/****************************************************************************** + * Everything below this applies to both ifdef cases above + *****************************************************************************/ -#define hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ -( \ -{ \ - hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc); \ - \ - hypre_MemoryLocation alocation_dst = hypre_GetActualMemLocation(locdst); \ - hypre_MemoryLocation alocation_src = hypre_GetActualMemLocation(locsrc); \ - hypre_MemoryTrackerInsert2("memcpy", (void *) (dst), (void *) (src), sizeof(type)*(count), \ - alocation_dst, alocation_src, \ - __FILE__, __func__, __LINE__); \ -} \ -) +typedef struct +{ + hypre_MPI_Comm comm; + hypre_MemoryLocation send_location; + hypre_MemoryLocation recv_location; + hypre_MemoryLocation send_buffer_location; + hypre_MemoryLocation recv_buffer_location; + void *send_buffer; + void *recv_buffer; + hypre_MPI_Request *pre_send_request; + hypre_MPI_Request *post_recv_request; +} hypre_MPICommWrapper; + +#define hypre_MPICommWrapperComm(wrapper) ((wrapper) -> comm) +#define hypre_MPICommWrapperSendLocation(wrapper) ((wrapper) -> send_location) +#define hypre_MPICommWrapperRecvLocation(wrapper) ((wrapper) -> recv_location) +#define hypre_MPICommWrapperSendBufferLocation(wrapper) ((wrapper) -> send_buffer_location) +#define hypre_MPICommWrapperRecvBufferLocation(wrapper) ((wrapper) -> recv_buffer_location) +#define hypre_MPICommWrapperSendBuffer(wrapper) ((wrapper) -> send_buffer) +#define hypre_MPICommWrapperRecvBuffer(wrapper) ((wrapper) -> recv_buffer) +#define hypre_MPICommWrapperPreSendRequest(wrapper) ((wrapper) -> pre_send_request) +#define hypre_MPICommWrapperPostRecvRequest(wrapper) ((wrapper) -> post_recv_request) + +#define HYPRE_MPI_GREQUEST_FREE 1 +#define HYPRE_MPI_GREQUEST_COPY 2 -#define hypre_TFree(ptr, location) \ -( \ -{ \ - hypre_Free((void *)ptr, location); \ - \ - hypre_MemoryLocation alocation = hypre_GetActualMemLocation(location); \ - hypre_MemoryTrackerInsert1("free", ptr, (size_t) -1, alocation, \ - __FILE__, __func__, __LINE__); \ - ptr = NULL; \ -} \ -) +typedef struct +{ + HYPRE_Int count; + HYPRE_Int data_size; + char *data; +} hypre_MPI_GRequest_Action; -#define _hypre_TAlloc(type, count, location) \ -( \ -{ \ - void *ptr = _hypre_MAlloc((size_t)(sizeof(type) * (count)), location); \ - \ - hypre_MemoryTrackerInsert1("malloc", ptr, sizeof(type)*(count), location, \ - __FILE__, __func__, __LINE__); \ - (type *) ptr; \ -} \ -) +#define hypre_MPI_GRequest_ActionCount(action) ((action) -> count) +#define hypre_MPI_GRequest_ActionDataSize(action) ((action) -> data_size) +#define hypre_MPI_GRequest_ActionData(action) ((action) -> data) -#define _hypre_TFree(ptr, location) \ -( \ -{ \ - _hypre_Free((void *)ptr, location); \ - \ - hypre_MemoryTrackerInsert1("free", ptr, (size_t) -1, location, \ - __FILE__, __func__, __LINE__); \ - ptr = NULL; \ -} \ -) +/*-------------------------------------------------------------------------- + * Prototypes + *--------------------------------------------------------------------------*/ -#endif /* #if defined(HYPRE_USING_MEMORY_TRACKER) */ -#endif /* #ifndef hypre_MEMORY_TRACKER_HEADER */ +/* mpistubs.c */ +HYPRE_Int hypre_MPI_Init( hypre_int *argc, char ***argv ); +HYPRE_Int hypre_MPI_Finalize( void ); +HYPRE_Int hypre_MPI_Abort( hypre_MPI_Comm comm, HYPRE_Int errorcode ); +HYPRE_Real hypre_MPI_Wtime( void ); +HYPRE_Real hypre_MPI_Wtick( void ); +HYPRE_Int hypre_MPI_Barrier( hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Comm_create( hypre_MPI_Comm comm, hypre_MPI_Group group, + hypre_MPI_Comm *newcomm ); +HYPRE_Int hypre_MPI_Comm_dup( hypre_MPI_Comm comm, hypre_MPI_Comm *newcomm ); +hypre_MPI_Comm hypre_MPI_Comm_f2c( hypre_int comm ); +HYPRE_Int hypre_MPI_Comm_size( hypre_MPI_Comm comm, HYPRE_Int *size ); +HYPRE_Int hypre_MPI_Comm_rank( hypre_MPI_Comm comm, HYPRE_Int *rank ); +HYPRE_Int hypre_MPI_Comm_free( hypre_MPI_Comm *comm ); +HYPRE_Int hypre_MPI_Comm_group( hypre_MPI_Comm comm, hypre_MPI_Group *group ); +HYPRE_Int hypre_MPI_Comm_split( hypre_MPI_Comm comm, HYPRE_Int n, HYPRE_Int m, + hypre_MPI_Comm * comms ); +HYPRE_Int hypre_MPI_Group_incl( hypre_MPI_Group group, HYPRE_Int n, HYPRE_Int *ranks, + hypre_MPI_Group *newgroup ); +HYPRE_Int hypre_MPI_Group_free( hypre_MPI_Group *group ); +HYPRE_Int hypre_MPI_Address( void *location, hypre_MPI_Aint *address ); +HYPRE_Int hypre_MPI_Get_count( hypre_MPI_Status *status, hypre_MPI_Datatype datatype, + HYPRE_Int *count ); +HYPRE_Int hypre_MPI_Alltoall( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, + void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Allgather( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, + void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Allgatherv( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, + void *recvbuf, HYPRE_Int *recvcounts, HYPRE_Int *displs, hypre_MPI_Datatype recvtype, + hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Gather( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, + void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, HYPRE_Int root, + hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Gatherv( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, + void *recvbuf, HYPRE_Int *recvcounts, HYPRE_Int *displs, hypre_MPI_Datatype recvtype, + HYPRE_Int root, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Scatter( void *sendbuf, HYPRE_Int sendcount, hypre_MPI_Datatype sendtype, + void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, HYPRE_Int root, + hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Scatterv( void *sendbuf, HYPRE_Int *sendcounts, HYPRE_Int *displs, + hypre_MPI_Datatype sendtype, void *recvbuf, HYPRE_Int recvcount, hypre_MPI_Datatype recvtype, + HYPRE_Int root, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Bcast( void *buffer, HYPRE_Int count, hypre_MPI_Datatype datatype, + HYPRE_Int root, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Send( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int dest, + HYPRE_Int tag, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Recv( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int source, + HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Status *status ); +HYPRE_Int hypre_MPI_Isend( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int dest, + HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); +HYPRE_Int hypre_MPI_Irecv( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, + HYPRE_Int source, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); +HYPRE_Int hypre_MPI_Send_init( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, + HYPRE_Int dest, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); +HYPRE_Int hypre_MPI_Recv_init( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, + HYPRE_Int dest, HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); +HYPRE_Int hypre_MPI_Irsend( void *buf, HYPRE_Int count, hypre_MPI_Datatype datatype, HYPRE_Int dest, + HYPRE_Int tag, hypre_MPI_Comm comm, hypre_MPI_Request *request ); +HYPRE_Int hypre_MPI_Startall( HYPRE_Int count, hypre_MPI_Request *array_of_requests ); +HYPRE_Int hypre_MPI_Probe( HYPRE_Int source, HYPRE_Int tag, hypre_MPI_Comm comm, + hypre_MPI_Status *status ); +HYPRE_Int hypre_MPI_Iprobe( HYPRE_Int source, HYPRE_Int tag, hypre_MPI_Comm comm, HYPRE_Int *flag, + hypre_MPI_Status *status ); +HYPRE_Int hypre_MPI_Test( hypre_MPI_Request *request, HYPRE_Int *flag, hypre_MPI_Status *status ); +HYPRE_Int hypre_MPI_Testall( HYPRE_Int count, hypre_MPI_Request *array_of_requests, HYPRE_Int *flag, + hypre_MPI_Status *array_of_statuses ); +HYPRE_Int hypre_MPI_Wait( hypre_MPI_Request *request, hypre_MPI_Status *status ); +HYPRE_Int hypre_MPI_Waitall( HYPRE_Int count, hypre_MPI_Request *array_of_requests, + hypre_MPI_Status *array_of_statuses ); +HYPRE_Int hypre_MPI_Waitany( HYPRE_Int count, hypre_MPI_Request *array_of_requests, + HYPRE_Int *index, hypre_MPI_Status *status ); +HYPRE_Int hypre_MPI_Allreduce( void *sendbuf, void *recvbuf, HYPRE_Int count, + hypre_MPI_Datatype datatype, hypre_MPI_Op op, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Reduce( void *sendbuf, void *recvbuf, HYPRE_Int count, + hypre_MPI_Datatype datatype, hypre_MPI_Op op, HYPRE_Int root, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Scan( void *sendbuf, void *recvbuf, HYPRE_Int count, + hypre_MPI_Datatype datatype, hypre_MPI_Op op, hypre_MPI_Comm comm ); +HYPRE_Int hypre_MPI_Request_free( hypre_MPI_Request *request ); +HYPRE_Int hypre_MPI_Request_get_status(hypre_MPI_Request request, HYPRE_Int *flag, hypre_MPI_Status *status); +HYPRE_Int hypre_MPI_Type_contiguous( HYPRE_Int count, hypre_MPI_Datatype oldtype, + hypre_MPI_Datatype *newtype ); +HYPRE_Int hypre_MPI_Type_vector( HYPRE_Int count, HYPRE_Int blocklength, HYPRE_Int stride, + hypre_MPI_Datatype oldtype, hypre_MPI_Datatype *newtype ); +HYPRE_Int hypre_MPI_Type_hvector( HYPRE_Int count, HYPRE_Int blocklength, hypre_MPI_Aint stride, + hypre_MPI_Datatype oldtype, hypre_MPI_Datatype *newtype ); +HYPRE_Int hypre_MPI_Type_struct( HYPRE_Int count, HYPRE_Int *array_of_blocklengths, + hypre_MPI_Aint *array_of_displacements, hypre_MPI_Datatype *array_of_types, + hypre_MPI_Datatype *newtype ); +HYPRE_Int hypre_MPI_Type_commit( hypre_MPI_Datatype *datatype ); +HYPRE_Int hypre_MPI_Type_free( hypre_MPI_Datatype *datatype ); +HYPRE_Int hypre_MPI_Op_free( hypre_MPI_Op *op ); +HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, + hypre_MPI_Op *op ); +HYPRE_Int hypre_MPI_GRequestGetCopyAction(void *dest, hypre_MemoryLocation dest_location, + void *src, hypre_MemoryLocation src_location, + HYPRE_Int num_bytes, hypre_MPI_GRequest_Action **action_ptr); +HYPRE_Int hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action); +HYPRE_Int hypre_MPI_GRequestDestroyAction(hypre_MPI_GRequest_Action *action); +HYPRE_Int hypre_NeedMPICopyBuffer(hypre_MemoryLocation memory_location); +HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPICommWrapper *comm, hypre_MPI_Request *requests ); +HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPICommWrapper *comm, hypre_MPI_Request *requests ); +HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPICommWrapper *comm, hypre_MPI_Request *requests ); +HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPICommWrapper *comm, hypre_MPI_Request *requests ); + +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) +HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, + hypre_MPI_Info info, hypre_MPI_Comm *newcomm); +HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); +HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); +#endif + +HYPRE_Int hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr_fn, +hypre_MPI_Comm_delete_attr_function *comm_delete_attr_fn, HYPRE_Int *comm_keyval, void *extra_state); +HYPRE_Int hypre_MPI_Comm_free_keyval(HYPRE_Int *comm_keyval); +HYPRE_Int hypre_MPI_Comm_set_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val); +HYPRE_Int hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val, HYPRE_Int *flag); +HYPRE_Int hypre_MPI_Comm_delete_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval); +HYPRE_Int hypre_MPI_Grequest_start(hypre_MPI_Grequest_query_function *query_fn, hypre_MPI_Grequest_free_function *free_fn, + hypre_MPI_Grequest_cancel_function *cancel_fn, void *extra_state, hypre_MPI_Request *request); +HYPRE_Int hypre_MPI_Grequest_complete( hypre_MPI_Request request ); +HYPRE_Int hypre_MPI_Type_size(hypre_MPI_Datatype datatype, HYPRE_Int *size); + +hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPICommWrapper *comm); +hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPICommWrapper *comm); +hypre_MemoryLocation hypre_MPICommGetSendBufferLocation(hypre_MPICommWrapper *comm); +hypre_MemoryLocation hypre_MPICommGetRecvBufferLocation(hypre_MPICommWrapper *comm); +void* hypre_MPICommGetSendBuffer(hypre_MPICommWrapper *comm); +void* hypre_MPICommGetRecvBuffer(hypre_MPICommWrapper *comm); +hypre_MPI_Request* hypre_MPICommGetPreSendRequest(hypre_MPICommWrapper *comm); +hypre_MPI_Request* hypre_MPICommGetPostRecvRequest(hypre_MPICommWrapper *comm); + +HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPICommWrapper *comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPICommWrapper *comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPICommWrapper *comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPICommWrapper *comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPICommWrapper *comm, void*); +HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPICommWrapper *comm, void*); +HYPRE_Int hypre_MPICommSetPreSendRequest(hypre_MPICommWrapper *comm, hypre_MPI_Request *request); +HYPRE_Int hypre_MPICommSetPostRecvRequest(hypre_MPICommWrapper *comm, hypre_MPI_Request *request); + +HYPRE_Int hypre_MPICommDeleteSendLocation(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteRecvLocation(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteSendBufferLocation(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteRecvBufferLocation(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteSendBuffer(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteRecvBuffer(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeletePreSendRequest(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeletePostRecvRequest(hypre_MPICommWrapper *comm); + +hypre_int hypre_grequest_free_fn(void *extra_state); +hypre_int hypre_grequest_query_fn(void *extra_state, hypre_MPI_Status *status); +hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete); + +hypre_MPICommWrapper *hypre_MPICommWrapperCreate(hypre_MPI_Comm comm); + +#ifdef __cplusplus +} +#endif +#endif /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -2497,6 +2570,60 @@ HYPRE_Int hypre_IntArraySeparateByValueDevice( HYPRE_Int num_values, HYPRE_Int * hypre_IntArrayArray *w ); #endif +/* memory.c */ +HYPRE_Int hypre_GetMemoryLocationName(hypre_MemoryLocation memory_location, + char *memory_location_name); +void hypre_CheckMemoryLocation(void *ptr, hypre_MemoryLocation location); +void * hypre_Memset(void *ptr, HYPRE_Int value, size_t num, HYPRE_MemoryLocation location); +void hypre_MemPrefetch(void *ptr, size_t size, HYPRE_MemoryLocation location); +void * hypre_MAlloc(size_t size, HYPRE_MemoryLocation location); +void * hypre_CAlloc( size_t count, size_t elt_size, HYPRE_MemoryLocation location); +void hypre_Free(void *ptr, HYPRE_MemoryLocation location); +void hypre_Memcpy(void *dst, void *src, size_t size, HYPRE_MemoryLocation loc_dst, + HYPRE_MemoryLocation loc_src); +void * hypre_ReAlloc(void *ptr, size_t size, HYPRE_MemoryLocation location); +void * hypre_ReAlloc_v2(void *ptr, size_t old_size, size_t new_size, HYPRE_MemoryLocation location); + +void * _hypre_MAlloc(size_t size, hypre_MemoryLocation location); +void _hypre_Free(void *ptr, hypre_MemoryLocation location); +void _hypre_Memcpy(void *dst, void *src, size_t size, hypre_MemoryLocation loc_dst, + hypre_MemoryLocation loc_src); + +HYPRE_ExecutionPolicy hypre_GetExecPolicy1(HYPRE_MemoryLocation location); +HYPRE_ExecutionPolicy hypre_GetExecPolicy2(HYPRE_MemoryLocation location1, + HYPRE_MemoryLocation location2); + +HYPRE_Int hypre_GetPointerLocation(const void *ptr, hypre_MemoryLocation *memory_location); +HYPRE_Int hypre_SetCubMemPoolSize( hypre_uint bin_growth, hypre_uint min_bin, hypre_uint max_bin, + size_t max_cached_bytes ); +HYPRE_Int hypre_umpire_host_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_host_pooled_free(void *ptr); +void *hypre_umpire_host_pooled_realloc(void *ptr, size_t size); +HYPRE_Int hypre_umpire_device_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_device_pooled_free(void *ptr); +HYPRE_Int hypre_umpire_um_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_um_pooled_free(void *ptr); +HYPRE_Int hypre_umpire_pinned_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_pinned_pooled_free(void *ptr); +HYPRE_Int hypre_UmpireInit(hypre_Handle *hypre_handle_); +HYPRE_Int hypre_UmpireFinalize(hypre_Handle *hypre_handle_); +HYPRE_Int hypre_UmpireGetCurrentMemoryUsage(MPI_Comm comm, HYPRE_Real *current); +HYPRE_Int hypre_UmpireMemoryGetUsage(HYPRE_Real *memory); +HYPRE_Int hypre_HostMemoryGetUsage(HYPRE_Real *mem); +HYPRE_Int hypre_MemoryPrintUsage(MPI_Comm comm, HYPRE_Int level, + const char *function, HYPRE_Int line); +#define HYPRE_PRINT_MEMORY_USAGE(comm) hypre_MemoryPrintUsage(comm,\ + hypre_HandleLogLevel(hypre_handle()),\ + __func__,\ + __LINE__) +/* memory_dmalloc.c */ +HYPRE_Int hypre_InitMemoryDebugDML( HYPRE_Int id ); +HYPRE_Int hypre_FinalizeMemoryDebugDML( void ); +char *hypre_MAllocDML( HYPRE_Int size, char *file, HYPRE_Int line ); +char *hypre_CAllocDML( HYPRE_Int count, HYPRE_Int elt_size, char *file, HYPRE_Int line ); +char *hypre_ReAllocDML( char *ptr, HYPRE_Int size, char *file, HYPRE_Int line ); +void hypre_FreeDML( char *ptr, char *file, HYPRE_Int line ); + /* memory_tracker.c */ #ifdef HYPRE_USING_MEMORY_TRACKER hypre_MemoryTracker* hypre_memory_tracker(void); @@ -3946,3 +4073,4 @@ HYPRE_Int hypre_mm_read_mtx_crd_size(FILE *f, HYPRE_Int *M, HYPRE_Int *N, HYPRE_ #endif #endif + diff --git a/src/utilities/general.c b/src/utilities/general.c index e6de9e480a..33899c7d91 100644 --- a/src/utilities/general.c +++ b/src/utilities/general.c @@ -55,6 +55,8 @@ hypre_HandleCreate(void) #endif #endif + hypre_HandleMPICopyBufferLocation(hypre_handle_) = hypre_MEMORY_HOST; + return hypre_handle_; } @@ -66,8 +68,6 @@ hypre_HandleDestroy(hypre_Handle *hypre_handle_) return hypre_error_flag; } - hypre_TFree(hypre_HandleStructCommRecvBuffer(hypre_handle_), HYPRE_MEMORY_DEVICE); - hypre_TFree(hypre_HandleStructCommSendBuffer(hypre_handle_), HYPRE_MEMORY_DEVICE); #if defined(HYPRE_USING_GPU) hypre_DeviceDataDestroy(hypre_HandleDeviceData(hypre_handle_)); hypre_HandleDeviceData(hypre_handle_) = NULL; diff --git a/src/utilities/handle.h b/src/utilities/handle.h index 7ca8c12442..0c4b68bfc5 100644 --- a/src/utilities/handle.h +++ b/src/utilities/handle.h @@ -31,6 +31,8 @@ typedef struct hypre_DeviceData hypre_DeviceData; typedef void (*GPUMallocFunc)(void **, size_t); typedef void (*GPUMfreeFunc)(void *); +#define HYPRE_MAX_NUM_COMM_KEYS 8 + typedef struct { HYPRE_Int log_level; @@ -38,17 +40,12 @@ typedef struct HYPRE_MemoryLocation memory_location; HYPRE_ExecutionPolicy default_exec_policy; - /* the device buffers needed to do MPI communication for struct comm */ - HYPRE_Complex *struct_comm_recv_buffer; - HYPRE_Complex *struct_comm_send_buffer; - HYPRE_Int struct_comm_recv_buffer_size; - HYPRE_Int struct_comm_send_buffer_size; - - /* GPU MPI */ #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int use_gpu_aware_mpi; #endif + hypre_MemoryLocation mpi_copy_buffer_location; + #if defined(HYPRE_USING_GPU) hypre_DeviceData *device_data; HYPRE_Int device_gs_method; /* device G-S options */ @@ -85,14 +82,11 @@ typedef struct #define hypre_HandleMemoryLocation(hypre_handle) ((hypre_handle) -> memory_location) #define hypre_HandleDefaultExecPolicy(hypre_handle) ((hypre_handle) -> default_exec_policy) -#define hypre_HandleStructCommRecvBuffer(hypre_handle) ((hypre_handle) -> struct_comm_recv_buffer) -#define hypre_HandleStructCommSendBuffer(hypre_handle) ((hypre_handle) -> struct_comm_send_buffer) -#define hypre_HandleStructCommRecvBufferSize(hypre_handle) ((hypre_handle) -> struct_comm_recv_buffer_size) -#define hypre_HandleStructCommSendBufferSize(hypre_handle) ((hypre_handle) -> struct_comm_send_buffer_size) - #define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) #define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) + #define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) +#define hypre_HandleMPICopyBufferLocation(hypre_handle) ((hypre_handle) -> mpi_copy_buffer_location) #define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) diff --git a/src/utilities/headers b/src/utilities/headers index 7de3d3a8b2..fca45b66c9 100755 --- a/src/utilities/headers +++ b/src/utilities/headers @@ -33,7 +33,6 @@ extern "C" { # Structures and prototypes #=========================================================================== -cat handle.h >> $INTERNAL_HEADER cat state.h >> $INTERNAL_HEADER cat general.h >> $INTERNAL_HEADER cat base.h >> $INTERNAL_HEADER @@ -41,10 +40,11 @@ cat magma.h >> $INTERNAL_HEADER cat matrix_stats.h >> $INTERNAL_HEADER cat printf.h >> $INTERNAL_HEADER cat error.h >> $INTERNAL_HEADER -cat mpistubs.h >> $INTERNAL_HEADER cat smp.h >> $INTERNAL_HEADER cat memory.h >> $INTERNAL_HEADER +cat handle.h >> $INTERNAL_HEADER cat memory_tracker.h >> $INTERNAL_HEADER +cat mpistubs.h >> $INTERNAL_HEADER cat omp_device.h >> $INTERNAL_HEADER cat threading.h >> $INTERNAL_HEADER cat timing.h >> $INTERNAL_HEADER diff --git a/src/utilities/memory.c b/src/utilities/memory.c index 6a53357b1d..6ae0a5e600 100644 --- a/src/utilities/memory.c +++ b/src/utilities/memory.c @@ -443,7 +443,6 @@ hypre_MAlloc_core(size_t size, HYPRE_Int zeroinit, hypre_MemoryLocation location if (!ptr) { hypre_OutOfMemory(size); - hypre_MPI_Abort(hypre_MPI_COMM_WORLD, -1); } return ptr; @@ -809,6 +808,14 @@ hypre_Memcpy_core(void *dst, void *src, size_t size, hypre_MemoryLocation loc_ds hypre_WrongMemoryLocation(); } + +void +_hypre_Memcpy(void *dst, void *src, size_t size, hypre_MemoryLocation loc_dst, + hypre_MemoryLocation loc_src) +{ + hypre_Memcpy_core(dst, src, size, loc_dst, loc_src); +} + /*--------------------------------------------------------------------------* * ExecPolicy *--------------------------------------------------------------------------*/ @@ -1009,9 +1016,8 @@ hypre_ReAlloc(void *ptr, size_t size, HYPRE_MemoryLocation location) if (hypre_GetActualMemLocation(location) != hypre_MEMORY_HOST) { - hypre_printf("hypre_TReAlloc only works with HYPRE_MEMORY_HOST; Use hypre_TReAlloc_v2 instead!\n"); + hypre_error_w_msg(HYPRE_ERROR_MEMORY, "hypre_TReAlloc only works with HYPRE_MEMORY_HOST; Use hypre_TReAlloc_v2 instead!\n"); hypre_assert(0); - hypre_MPI_Abort(hypre_MPI_COMM_WORLD, -1); return NULL; } diff --git a/src/utilities/memory.h b/src/utilities/memory.h index 64c44f4407..c089467e77 100644 --- a/src/utilities/memory.h +++ b/src/utilities/memory.h @@ -146,6 +146,9 @@ hypre_GetActualMemLocation(HYPRE_MemoryLocation location) #define hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ (hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc)) +#define _hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ +(_hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc)) + #define hypre_TFree(ptr, location) \ ( hypre_Free((void *)ptr, location), ptr = NULL ) @@ -154,63 +157,6 @@ hypre_GetActualMemLocation(HYPRE_MemoryLocation location) #endif /* #if !defined(HYPRE_USING_MEMORY_TRACKER) */ - -/*-------------------------------------------------------------------------- - * Prototypes - *--------------------------------------------------------------------------*/ - -/* memory.c */ -HYPRE_Int hypre_GetMemoryLocationName(hypre_MemoryLocation memory_location, - char *memory_location_name); -void hypre_CheckMemoryLocation(void *ptr, hypre_MemoryLocation location); -void * hypre_Memset(void *ptr, HYPRE_Int value, size_t num, HYPRE_MemoryLocation location); -void hypre_MemPrefetch(void *ptr, size_t size, HYPRE_MemoryLocation location); -void * hypre_MAlloc(size_t size, HYPRE_MemoryLocation location); -void * hypre_CAlloc( size_t count, size_t elt_size, HYPRE_MemoryLocation location); -void hypre_Free(void *ptr, HYPRE_MemoryLocation location); -void hypre_Memcpy(void *dst, void *src, size_t size, HYPRE_MemoryLocation loc_dst, - HYPRE_MemoryLocation loc_src); -void * hypre_ReAlloc(void *ptr, size_t size, HYPRE_MemoryLocation location); -void * hypre_ReAlloc_v2(void *ptr, size_t old_size, size_t new_size, HYPRE_MemoryLocation location); - -void * _hypre_MAlloc(size_t size, hypre_MemoryLocation location); -void _hypre_Free(void *ptr, hypre_MemoryLocation location); - -HYPRE_ExecutionPolicy hypre_GetExecPolicy1(HYPRE_MemoryLocation location); -HYPRE_ExecutionPolicy hypre_GetExecPolicy2(HYPRE_MemoryLocation location1, - HYPRE_MemoryLocation location2); - -HYPRE_Int hypre_GetPointerLocation(const void *ptr, hypre_MemoryLocation *memory_location); -HYPRE_Int hypre_SetCubMemPoolSize( hypre_uint bin_growth, hypre_uint min_bin, hypre_uint max_bin, - size_t max_cached_bytes ); -HYPRE_Int hypre_umpire_host_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_host_pooled_free(void *ptr); -void *hypre_umpire_host_pooled_realloc(void *ptr, size_t size); -HYPRE_Int hypre_umpire_device_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_device_pooled_free(void *ptr); -HYPRE_Int hypre_umpire_um_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_um_pooled_free(void *ptr); -HYPRE_Int hypre_umpire_pinned_pooled_allocate(void **ptr, size_t nbytes); -HYPRE_Int hypre_umpire_pinned_pooled_free(void *ptr); -HYPRE_Int hypre_UmpireInit(hypre_Handle *hypre_handle_); -HYPRE_Int hypre_UmpireFinalize(hypre_Handle *hypre_handle_); -HYPRE_Int hypre_UmpireGetCurrentMemoryUsage(MPI_Comm comm, HYPRE_Real *current); -HYPRE_Int hypre_UmpireMemoryGetUsage(HYPRE_Real *memory); -HYPRE_Int hypre_HostMemoryGetUsage(HYPRE_Real *mem); -HYPRE_Int hypre_MemoryPrintUsage(MPI_Comm comm, HYPRE_Int level, - const char *function, HYPRE_Int line); -#define HYPRE_PRINT_MEMORY_USAGE(comm) hypre_MemoryPrintUsage(comm,\ - hypre_HandleLogLevel(hypre_handle()),\ - __func__,\ - __LINE__) -/* memory_dmalloc.c */ -HYPRE_Int hypre_InitMemoryDebugDML( HYPRE_Int id ); -HYPRE_Int hypre_FinalizeMemoryDebugDML( void ); -char *hypre_MAllocDML( HYPRE_Int size, char *file, HYPRE_Int line ); -char *hypre_CAllocDML( HYPRE_Int count, HYPRE_Int elt_size, char *file, HYPRE_Int line ); -char *hypre_ReAllocDML( char *ptr, HYPRE_Int size, char *file, HYPRE_Int line ); -void hypre_FreeDML( char *ptr, char *file, HYPRE_Int line ); - #ifdef __cplusplus } #endif diff --git a/src/utilities/memory_tracker.h b/src/utilities/memory_tracker.h index 72eb01469d..ee4abf5dee 100644 --- a/src/utilities/memory_tracker.h +++ b/src/utilities/memory_tracker.h @@ -160,6 +160,17 @@ extern hypre_MemoryTracker *_hypre_memory_tracker; } \ ) +#define _hypre_TMemcpy(dst, src, type, count, locdst, locsrc) \ +( \ +{ \ + _hypre_Memcpy((void *)(dst), (void *)(src), (size_t)(sizeof(type) * (count)), locdst, locsrc); \ + \ + hypre_MemoryTrackerInsert2("memcpy", (void *) (dst), (void *) (src), sizeof(type)*(count), \ + location_dst, location_src, \ + __FILE__, __func__, __LINE__); \ +} \ +) + #define _hypre_TFree(ptr, location) \ ( \ { \ diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index a33455c152..999eceebf9 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -756,6 +756,7 @@ hypre_MPI_Allreduce( void *sendbuf, case hypre_MPI_COMPLEX: { HYPRE_Complex *crecvbuf = (HYPRE_Complex *)recvbuf; + HYPRE_Complex *csendbuf = (HYPRE_Complex *)sendbuf; for (i = 0; i < count; i++) { @@ -888,8 +889,9 @@ hypre_MPI_Op_free( hypre_MPI_Op *op ) } #if defined(HYPRE_USING_GPU) -HYPRE_Int hypre_MPI_Comm_split_type( hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, - hypre_MPI_Info info, hypre_MPI_Comm *newcomm ) +HYPRE_Int +hypre_MPI_Comm_split_type( hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, + hypre_MPI_Info info, hypre_MPI_Comm *newcomm ) { HYPRE_UNUSED_VAR(comm); HYPRE_UNUSED_VAR(split_type); @@ -899,25 +901,209 @@ HYPRE_Int hypre_MPI_Comm_split_type( hypre_MPI_Comm comm, HYPRE_Int split_type, return (0); } -HYPRE_Int hypre_MPI_Info_create( hypre_MPI_Info *info ) +HYPRE_Int +hypre_MPI_Info_create( hypre_MPI_Info *info ) { HYPRE_UNUSED_VAR(info); return (0); } -HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ) +HYPRE_Int +hypre_MPI_Info_free( hypre_MPI_Info *info ) { HYPRE_UNUSED_VAR(info); return (0); } #endif +HYPRE_Int +hypre_MPI_Request_get_status(hypre_MPI_Request request, HYPRE_Int *flag, hypre_MPI_Status *status) +{ + return (0); +} + +HYPRE_Int +hypre_MPI_Comm_set_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val) +{ + return (0); +} + +HYPRE_Int +hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val, HYPRE_Int *flag) +{ + return (0); +} + +HYPRE_Int +hypre_MPI_Comm_delete_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval) +{ + return (0); +} + +HYPRE_Int +hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr_fn, + hypre_MPI_Comm_delete_attr_function *comm_delete_attr_fn, + HYPRE_Int *comm_keyval, + void *extra_state) +{ + *comm_keyval = 0; + return (0); +} + +HYPRE_Int +hypre_MPI_Comm_free_keyval(HYPRE_Int *comm_keyval) +{ + return (0); +} + +HYPRE_Int +hypre_MPI_Type_size(hypre_MPI_Datatype datatype, HYPRE_Int *size) +{ + *size = 0; + return (0); +} + +HYPRE_Int +hypre_MPI_Isend_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPICommWrapper *comm, + hypre_MPI_Request *requests ) +{ + return (0); +} + +HYPRE_Int +hypre_MPI_Irecv_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPICommWrapper *comm, + hypre_MPI_Request *requests) +{ + return (0); +} + +HYPRE_Int +hypre_MPI_Send_init_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPICommWrapper *comm, + hypre_MPI_Request *requests) +{ + return (0); +} + +HYPRE_Int +hypre_MPI_Recv_init_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPICommWrapper *comm, + hypre_MPI_Request *requests) +{ + return (0); +} + /****************************************************************************** * MPI stubs to do casting of HYPRE_Int and hypre_int correctly *****************************************************************************/ #else +HYPRE_Int +hypre_MPI_Request_get_status(hypre_MPI_Request request, HYPRE_Int *flag, hypre_MPI_Status *status) +{ + hypre_int mpi_flag; + HYPRE_Int ierr = MPI_Request_get_status(request, &mpi_flag, status); + *flag = (HYPRE_Int) mpi_flag; + return ierr; +} + +HYPRE_Int +hypre_MPI_Comm_set_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val) +{ + return (HYPRE_Int) MPI_Comm_set_attr(comm, (hypre_int) comm_keyval, attribute_val); +} + +HYPRE_Int +hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val, HYPRE_Int *flag) +{ + hypre_int mpi_flag; + HYPRE_Int ierr; + ierr = (HYPRE_Int) MPI_Comm_get_attr(comm, (hypre_int) comm_keyval, attribute_val, &mpi_flag); + *flag = (HYPRE_Int) mpi_flag; + return ierr; +} + +HYPRE_Int +hypre_MPI_Comm_delete_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval) +{ + hypre_int mpi_comm_keyval = (hypre_int) comm_keyval; + + return (HYPRE_Int) MPI_Comm_delete_attr(comm, mpi_comm_keyval); +} + +HYPRE_Int +hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr_fn, + hypre_MPI_Comm_delete_attr_function *comm_delete_attr_fn, + HYPRE_Int *comm_keyval, + void *extra_state) +{ + hypre_int mpi_comm_keyval; + HYPRE_Int ierr; + ierr = MPI_Comm_create_keyval(comm_copy_attr_fn, comm_delete_attr_fn, &mpi_comm_keyval, extra_state); + *comm_keyval = mpi_comm_keyval; + return ierr; +} + +HYPRE_Int +hypre_MPI_Comm_free_keyval(HYPRE_Int *comm_keyval) +{ + hypre_int mpi_comm_keyval = (hypre_int) (*comm_keyval); + return MPI_Comm_free_keyval(&mpi_comm_keyval); +} + +HYPRE_Int +hypre_MPI_Grequest_start(hypre_MPI_Grequest_query_function *query_fn, + hypre_MPI_Grequest_free_function *free_fn, + hypre_MPI_Grequest_cancel_function *cancel_fn, + void *extra_state, + hypre_MPI_Request *request) +{ + return (HYPRE_Int) MPI_Grequest_start(query_fn, free_fn, cancel_fn, extra_state, request); +} + +HYPRE_Int +hypre_MPI_Grequest_complete( hypre_MPI_Request request ) +{ + return (HYPRE_Int) MPI_Grequest_complete(request); +} + +HYPRE_Int +hypre_MPI_Type_size(hypre_MPI_Datatype datatype, HYPRE_Int *size) +{ + hypre_int mpi_size; + HYPRE_Int ierr; + ierr = MPI_Type_size(datatype, &mpi_size); + *size = (HYPRE_Int) mpi_size; + return ierr; +} + HYPRE_Int hypre_MPI_Init( hypre_int *argc, char ***argv ) @@ -932,8 +1118,8 @@ hypre_MPI_Finalize( void ) } HYPRE_Int -hypre_MPI_Abort( hypre_MPI_Comm comm, - HYPRE_Int errorcode ) +hypre_MPI_Abort( hypre_MPI_Comm comm, + HYPRE_Int errorcode ) { return (HYPRE_Int) MPI_Abort(comm, (hypre_int)errorcode); } @@ -957,8 +1143,8 @@ hypre_MPI_Barrier( hypre_MPI_Comm comm ) } HYPRE_Int -hypre_MPI_Comm_create( hypre_MPI_Comm comm, - hypre_MPI_Group group, +hypre_MPI_Comm_create( hypre_MPI_Comm comm, + hypre_MPI_Group group, hypre_MPI_Comm *newcomm ) { return (HYPRE_Int) MPI_Comm_create(comm, group, newcomm); @@ -972,8 +1158,8 @@ hypre_MPI_Comm_dup( hypre_MPI_Comm comm, } HYPRE_Int -hypre_MPI_Comm_size( hypre_MPI_Comm comm, - HYPRE_Int *size ) +hypre_MPI_Comm_size( hypre_MPI_Comm comm, + HYPRE_Int *size ) { hypre_int mpi_size; HYPRE_Int ierr; @@ -983,8 +1169,8 @@ hypre_MPI_Comm_size( hypre_MPI_Comm comm, } HYPRE_Int -hypre_MPI_Comm_rank( hypre_MPI_Comm comm, - HYPRE_Int *rank ) +hypre_MPI_Comm_rank( hypre_MPI_Comm comm, + HYPRE_Int *rank ) { hypre_int mpi_rank; HYPRE_Int ierr; @@ -1008,11 +1194,11 @@ hypre_MPI_Comm_group( hypre_MPI_Comm comm, HYPRE_Int hypre_MPI_Comm_split( hypre_MPI_Comm comm, - HYPRE_Int n, - HYPRE_Int m, - hypre_MPI_Comm *comms ) + HYPRE_Int color, + HYPRE_Int key, + hypre_MPI_Comm *newcomm ) { - return (HYPRE_Int) MPI_Comm_split(comm, (hypre_int)n, (hypre_int)m, comms); + return (HYPRE_Int) MPI_Comm_split(comm, (hypre_int) color, (hypre_int) key, newcomm); } HYPRE_Int @@ -1290,6 +1476,97 @@ hypre_MPI_Irecv( void *buf, (hypre_int)source, (hypre_int)tag, comm, request); } +HYPRE_Int +hypre_MPI_Isend_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPICommWrapper *comm, + hypre_MPI_Request *requests ) +{ + if (!num) + { + return hypre_error_flag; + } + + HYPRE_Int data_size; + hypre_MPI_Type_size(datatype, &data_size); + + void *cbuf = hypre_MPICommGetSendBuffer(comm); + void *sbuf = cbuf ? cbuf : buf; + if (sbuf != buf) + { + hypre_MPI_GRequest_Action *action; + hypre_MPI_GRequestGetCopyAction(sbuf, hypre_MPICommGetSendBufferLocation(comm), + buf, hypre_MPICommGetSendLocation(comm), + displs[num] * data_size, &action); + hypre_MPI_GRequestProcessAction(action); + hypre_MPI_GRequestDestroyAction(action); + hypre_TFree(action, HYPRE_MEMORY_HOST); + } + + HYPRE_Int i; + for (i = 0; i < num; i++) + { + HYPRE_Int start = displs[i]; + HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; + hypre_MPI_Isend((char *) sbuf + start * data_size, len, datatype, procs[i], tag, hypre_MPICommWrapperComm(comm), &requests[i]); + } + + return hypre_error_flag; +} + +HYPRE_Int +hypre_MPI_Irecv_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPICommWrapper *comm, + hypre_MPI_Request *requests) +{ + if (!num) + { + return hypre_error_flag; + } + + HYPRE_Int data_size, i; + + hypre_MPI_Type_size(datatype, &data_size); + void *cbuf = hypre_MPICommGetRecvBuffer(comm); + void *rbuf = cbuf ? cbuf : buf; + + for (i = 0; i < num; i++) + { + HYPRE_Int start = displs[i]; + HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; + hypre_MPI_Irecv((char *) rbuf + start * data_size, len, datatype, procs[i], tag, hypre_MPICommWrapperComm(comm), &requests[i]); + } + + if (rbuf != buf) + { + hypre_MPI_Request *extra_request = hypre_CTAlloc(hypre_MPI_Request, 1, HYPRE_MEMORY_HOST); + hypre_MPI_GRequest_Action *action; + hypre_MPI_GRequestGetCopyAction(buf, hypre_MPICommGetRecvLocation(comm), rbuf, + hypre_MPICommGetRecvBufferLocation(comm), + displs[num] * data_size, &action); + + hypre_MPI_Grequest_start(hypre_grequest_query_fn, + hypre_grequest_free_fn, + hypre_grequest_noop_cancel_fn, + action, extra_request); + hypre_MPI_Grequest_complete(*extra_request); + hypre_MPICommSetPostRecvRequest(comm, extra_request); + } + + return hypre_error_flag; +} + HYPRE_Int hypre_MPI_Send_init( void *buf, HYPRE_Int count, @@ -1304,6 +1581,54 @@ hypre_MPI_Send_init( void *buf, comm, request); } +HYPRE_Int +hypre_MPI_Send_init_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPICommWrapper *comm, + hypre_MPI_Request *requests ) +{ + if (!num) + { + return hypre_error_flag; + } + + HYPRE_Int data_size; + hypre_MPI_Type_size(datatype, &data_size); + + void *cbuf = hypre_MPICommGetSendBuffer(comm); + void *sbuf = cbuf ? cbuf : buf; + if (sbuf != buf) + { + hypre_MPI_Request *extra_request = hypre_CTAlloc(hypre_MPI_Request, 1, HYPRE_MEMORY_HOST); + hypre_MPI_GRequest_Action *action; + hypre_MPI_GRequestGetCopyAction(sbuf, hypre_MPICommGetSendBufferLocation(comm), buf, + hypre_MPICommGetSendLocation(comm), + displs[num] * data_size, &action); + + hypre_MPI_Grequest_start(hypre_grequest_query_fn, + hypre_grequest_free_fn, + hypre_grequest_noop_cancel_fn, + action, extra_request); + hypre_MPI_Grequest_complete(*extra_request); + hypre_MPICommSetPreSendRequest(comm, extra_request); + } + + HYPRE_Int i; + for (i = 0; i < num; i++) + { + HYPRE_Int start = displs[i]; + HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; + hypre_MPI_Send_init((char *) sbuf + start * data_size, len, datatype, procs[i], tag, hypre_MPICommWrapperComm(comm), &requests[i]); + } + + return hypre_error_flag; +} + HYPRE_Int hypre_MPI_Recv_init( void *buf, HYPRE_Int count, @@ -1311,13 +1636,61 @@ hypre_MPI_Recv_init( void *buf, HYPRE_Int dest, HYPRE_Int tag, hypre_MPI_Comm comm, - hypre_MPI_Request *request ) + hypre_MPI_Request *request) { return (HYPRE_Int) MPI_Recv_init(buf, (hypre_int)count, datatype, (hypre_int)dest, (hypre_int)tag, comm, request); } +HYPRE_Int +hypre_MPI_Recv_init_Multiple( void *buf, + HYPRE_Int num, + HYPRE_Int *displs, + HYPRE_Int *counts, + hypre_MPI_Datatype datatype, + HYPRE_Int *procs, + HYPRE_Int tag, + hypre_MPICommWrapper *comm, + hypre_MPI_Request *requests ) +{ + if (!num) + { + return hypre_error_flag; + } + + HYPRE_Int data_size, i; + + hypre_MPI_Type_size(datatype, &data_size); + void *cbuf = hypre_MPICommGetRecvBuffer(comm); + void *rbuf = cbuf ? cbuf : buf; + + for (i = 0; i < num; i++) + { + HYPRE_Int start = displs[i]; + HYPRE_Int len = counts ? counts[i] : displs[i + 1] - start; + hypre_MPI_Recv_init((char *) rbuf + start * data_size, len, datatype, procs[i], tag, hypre_MPICommWrapperComm(comm), &requests[i]); + } + + if (rbuf != buf) + { + hypre_MPI_Request *extra_request = hypre_CTAlloc(hypre_MPI_Request, 1, HYPRE_MEMORY_HOST); + hypre_MPI_GRequest_Action *action; + hypre_MPI_GRequestGetCopyAction(buf, hypre_MPICommGetRecvLocation(comm), rbuf, + hypre_MPICommGetRecvBufferLocation(comm), + displs[num] * data_size, &action); + + hypre_MPI_Grequest_start(hypre_grequest_query_fn, + hypre_grequest_free_fn, + hypre_grequest_noop_cancel_fn, + action, extra_request); + hypre_MPI_Grequest_complete(*extra_request); + hypre_MPICommSetPostRecvRequest(comm, extra_request); + } + + return hypre_error_flag; +} + HYPRE_Int hypre_MPI_Irsend( void *buf, HYPRE_Int count, @@ -1580,3 +1953,345 @@ hypre_MPI_Info_free( hypre_MPI_Info *info ) #endif #endif + +HYPRE_Int +hypre_NeedMPICopyBuffer(hypre_MemoryLocation memory_location) +{ +#if defined(HYPRE_USING_GPU) + return !hypre_GetGpuAwareMPI() && + memory_location != hypre_MEMORY_HOST && + memory_location != hypre_MEMORY_HOST_PINNED; +#else + /* RL: return 1 for debugging without GPUs, + so we always has a host buffer for MPI. + O.w. make sure return Z E R O! + */ + return 0; +#endif +} + +HYPRE_Int +hypre_MPICommSetSendLocation(hypre_MPICommWrapper *comm, + hypre_MemoryLocation location) +{ + hypre_MPICommWrapperSendLocation(comm) = location; + return hypre_error_flag; +} + +hypre_MemoryLocation +hypre_MPICommGetSendLocation(hypre_MPICommWrapper *comm) +{ + return hypre_MPICommWrapperSendLocation(comm); +} + +HYPRE_Int +hypre_MPICommDeleteSendLocation(hypre_MPICommWrapper *comm) +{ + hypre_MPICommWrapperSendLocation(comm) = hypre_MEMORY_UNDEFINED; + return hypre_error_flag; +} + +HYPRE_Int +hypre_MPICommSetRecvLocation(hypre_MPICommWrapper *comm, + hypre_MemoryLocation location) +{ + hypre_MPICommWrapperRecvLocation(comm) = location; + return hypre_error_flag; +} + +hypre_MemoryLocation +hypre_MPICommGetRecvLocation(hypre_MPICommWrapper *comm) +{ + return hypre_MPICommWrapperRecvLocation(comm); +} + +HYPRE_Int +hypre_MPICommDeleteRecvLocation(hypre_MPICommWrapper *comm) +{ + hypre_MPICommWrapperRecvLocation(comm) = hypre_MEMORY_UNDEFINED; + return hypre_error_flag; +} + +HYPRE_Int +hypre_MPICommSetSendBufferLocation(hypre_MPICommWrapper *comm, + hypre_MemoryLocation location) +{ + hypre_MPICommWrapperSendBufferLocation(comm) = location; + return hypre_error_flag; +} + +hypre_MemoryLocation +hypre_MPICommGetSendBufferLocation(hypre_MPICommWrapper *comm) +{ + return hypre_MPICommWrapperSendBufferLocation(comm); +} + +HYPRE_Int +hypre_MPICommDeleteSendBufferLocation(hypre_MPICommWrapper *comm) +{ + hypre_MPICommWrapperSendBufferLocation(comm) = hypre_MEMORY_UNDEFINED; + return hypre_error_flag; +} + +HYPRE_Int +hypre_MPICommSetRecvBufferLocation(hypre_MPICommWrapper *comm, + hypre_MemoryLocation location) +{ + hypre_MPICommWrapperRecvBufferLocation(comm) = location; + return hypre_error_flag; +} + +hypre_MemoryLocation +hypre_MPICommGetRecvBufferLocation(hypre_MPICommWrapper *comm) +{ + return hypre_MPICommWrapperRecvBufferLocation(comm); +} + +HYPRE_Int +hypre_MPICommDeleteRecvBufferLocation(hypre_MPICommWrapper *comm) +{ + hypre_MPICommWrapperRecvBufferLocation(comm) = hypre_MEMORY_UNDEFINED; + return hypre_error_flag; +} + +HYPRE_Int +hypre_MPICommSetSendBuffer(hypre_MPICommWrapper *comm, + void *buffer) +{ + hypre_MPICommWrapperSendBuffer(comm) = buffer; + return hypre_error_flag; +} + +void * +hypre_MPICommGetSendBuffer(hypre_MPICommWrapper *comm) +{ + return hypre_MPICommWrapperSendBuffer(comm); +} + +HYPRE_Int +hypre_MPICommDeleteSendBuffer(hypre_MPICommWrapper *comm) +{ + hypre_MPICommWrapperSendBuffer(comm) = NULL; + return hypre_error_flag; +} + +HYPRE_Int +hypre_MPICommSetRecvBuffer(hypre_MPICommWrapper *comm, + void *buffer) +{ + hypre_MPICommWrapperRecvBuffer(comm) = buffer; + return hypre_error_flag; +} + +void * +hypre_MPICommGetRecvBuffer(hypre_MPICommWrapper *comm) +{ + return hypre_MPICommWrapperRecvBuffer(comm); +} + +HYPRE_Int +hypre_MPICommDeleteRecvBuffer(hypre_MPICommWrapper *comm) +{ + hypre_MPICommWrapperRecvBuffer(comm) = NULL; + return hypre_error_flag; +} + +HYPRE_Int +hypre_MPICommSetPreSendRequest(hypre_MPICommWrapper *comm, + hypre_MPI_Request *request) +{ + hypre_MPICommWrapperPreSendRequest(comm) = request; + return hypre_error_flag; +} + +hypre_MPI_Request * +hypre_MPICommGetPreSendRequest(hypre_MPICommWrapper *comm) +{ + return hypre_MPICommWrapperPreSendRequest(comm); +} + +HYPRE_Int +hypre_MPICommDeletePreSendRequest(hypre_MPICommWrapper *comm) +{ + hypre_MPICommWrapperPreSendRequest(comm) = NULL; + return hypre_error_flag; +} + +HYPRE_Int +hypre_MPICommSetPostRecvRequest(hypre_MPICommWrapper *comm, + hypre_MPI_Request *request) +{ + hypre_MPICommWrapperPostRecvRequest(comm) = request; + return hypre_error_flag; +} + +hypre_MPI_Request * +hypre_MPICommGetPostRecvRequest(hypre_MPICommWrapper *comm) +{ + return hypre_MPICommWrapperPostRecvRequest(comm); +} + +HYPRE_Int +hypre_MPICommDeletePostRecvRequest(hypre_MPICommWrapper *comm) +{ + hypre_MPICommWrapperPostRecvRequest(comm) = NULL; + return hypre_error_flag; +} + +HYPRE_Int +hypre_MPI_GRequestGetCopyAction(void *dest, + hypre_MemoryLocation dest_location, + void *src, + hypre_MemoryLocation src_location, + HYPRE_Int num_bytes, + hypre_MPI_GRequest_Action **action_ptr) +{ + if (dest == src || num_bytes == 0) + { + *action_ptr = NULL; + return hypre_error_flag; + } + + HYPRE_Int action_id = HYPRE_MPI_GREQUEST_COPY; + hypre_MPI_GRequest_Action *action = hypre_CTAlloc(hypre_MPI_GRequest_Action, 1, HYPRE_MEMORY_HOST); + + HYPRE_Int nb = 2 * (sizeof(HYPRE_Int) + sizeof(void *) + sizeof(hypre_MemoryLocation)); + HYPRE_Int data_size = hypre_MPI_GRequest_ActionDataSize(action); + + hypre_MPI_GRequest_ActionCount(action) ++; + hypre_MPI_GRequest_ActionDataSize(action) = data_size + nb; + hypre_MPI_GRequest_ActionData(action) = hypre_TReAlloc(hypre_MPI_GRequest_ActionData(action), + char, + hypre_MPI_GRequest_ActionDataSize(action), + HYPRE_MEMORY_HOST); + + char *data = hypre_MPI_GRequest_ActionData(action) + data_size; + hypre_TMemcpy(data, &action_id, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(HYPRE_Int); + hypre_TMemcpy(data, &num_bytes, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(HYPRE_Int); + hypre_TMemcpy(data, &dest, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + hypre_TMemcpy(data, &src, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + hypre_TMemcpy(data, &dest_location, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + hypre_TMemcpy(data, &src_location, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + + hypre_assert(data == hypre_MPI_GRequest_ActionData(action) + hypre_MPI_GRequest_ActionDataSize(action)); + + *action_ptr = action; + + return hypre_error_flag; +} + +HYPRE_Int +hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action) +{ + if (!action) + { + return hypre_error_flag; + } + + HYPRE_Int count = hypre_MPI_GRequest_ActionCount(action); + char *data = hypre_MPI_GRequest_ActionData(action); + HYPRE_Int k; + + for (k = 0; k < count; k ++) + { + HYPRE_Int action_id; + + hypre_TMemcpy(&action_id, data, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(HYPRE_Int); + + if (action_id == HYPRE_MPI_GREQUEST_FREE) + { + void *ptr; + hypre_MemoryLocation ptr_location; + hypre_TMemcpy(&ptr, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + hypre_TMemcpy(&ptr_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + // action! + _hypre_TFree(ptr, ptr_location); + } + else if (action_id == HYPRE_MPI_GREQUEST_COPY) + { + void *dest, *src; + HYPRE_Int num_bytes; + hypre_MemoryLocation dest_location, src_location; + hypre_TMemcpy(&num_bytes, data, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(HYPRE_Int); + hypre_TMemcpy(&dest, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + hypre_TMemcpy(&src, data, void *, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(void *); + hypre_TMemcpy(&dest_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + hypre_TMemcpy(&src_location, data, hypre_MemoryLocation, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + data += sizeof(hypre_MemoryLocation); + // action! + hypre_GpuProfilingPushRange("MPI-H2D/D2H"); +#if 0 + char dname[32],sname[32]; + hypre_GetMemoryLocationName(dest_location, dname); + hypre_GetMemoryLocationName(src_location, sname); + hypre_printf(" copying %s %p <-- %s %p, %d bytes\n", dname, dest, sname, src, num_bytes); +#endif + _hypre_TMemcpy(dest, src, char, num_bytes, dest_location, src_location); + hypre_GpuProfilingPopRange(); + } + } + + hypre_assert(data == hypre_MPI_GRequest_ActionData(action) + hypre_MPI_GRequest_ActionDataSize(action)); + + return hypre_error_flag; +} + +HYPRE_Int +hypre_MPI_GRequestDestroyAction(hypre_MPI_GRequest_Action *action) +{ + if (!action) + { + return hypre_error_flag; + } + + hypre_MPI_GRequest_ActionCount(action) = 0; + hypre_MPI_GRequest_ActionDataSize(action) = 0; + hypre_TFree(hypre_MPI_GRequest_ActionData(action), HYPRE_MEMORY_HOST); + + return hypre_error_flag; +} + +hypre_int +hypre_grequest_free_fn(void *extra_state) +{ + hypre_MPI_GRequest_Action *action = (hypre_MPI_GRequest_Action *) extra_state; + hypre_MPI_GRequestDestroyAction(action); + hypre_TFree(action, HYPRE_MEMORY_HOST); + return hypre_MPI_SUCCESS; +} + +hypre_int +hypre_grequest_query_fn(void *extra_state, hypre_MPI_Status *status) +{ + hypre_MPI_GRequestProcessAction((hypre_MPI_GRequest_Action *) extra_state); + return hypre_MPI_SUCCESS; +} + +hypre_int +hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete) { return hypre_MPI_SUCCESS; } + +hypre_MPICommWrapper * +hypre_MPICommWrapperCreate(hypre_MPI_Comm comm) +{ + hypre_MPICommWrapper *wrapper = hypre_CTAlloc(hypre_MPICommWrapper, 1, HYPRE_MEMORY_HOST); + hypre_MPICommWrapperComm(wrapper) = comm; + hypre_MPICommWrapperSendLocation(wrapper) = hypre_MEMORY_UNDEFINED; + hypre_MPICommWrapperRecvLocation(wrapper) = hypre_MEMORY_UNDEFINED; + hypre_MPICommWrapperSendBufferLocation(wrapper) = hypre_MEMORY_UNDEFINED; + hypre_MPICommWrapperRecvBufferLocation(wrapper) = hypre_MEMORY_UNDEFINED; + + return wrapper; +} diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index 50cf780ef4..343ff06aaf 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -18,7 +18,7 @@ extern "C" { #endif -#ifdef HYPRE_SEQUENTIAL +#if defined(HYPRE_SEQUENTIAL) /****************************************************************************** * MPI stubs to generate serial codes without mpi @@ -68,6 +68,7 @@ extern "C" { #define MPI_BOR hypre_MPI_BOR #define MPI_SUCCESS hypre_MPI_SUCCESS #define MPI_STATUSES_IGNORE hypre_MPI_STATUSES_IGNORE +#define MPI_STATUS_IGNORE hypre_MPI_STATUS_IGNORE #define MPI_UNDEFINED hypre_MPI_UNDEFINED #define MPI_REQUEST_NULL hypre_MPI_REQUEST_NULL @@ -133,6 +134,12 @@ extern "C" { #define MPI_Op_create hypre_MPI_Op_create #define MPI_User_function hypre_MPI_User_function #define MPI_Info_create hypre_MPI_Info_create +#define MPI_Comm_set_attr hypre_MPI_Comm_set_attr +#define MPI_Comm_copy_attr_function hypre_MPI_Comm_copy_attr_function +#define MPI_Comm_delete_attr_function hypre_MPI_Comm_delete_attr_function +#define MPI_Grequest_query_function hypre_MPI_Grequest_query_function; +#define MPI_Grequest_free_function hypre_MPI_Grequest_free_function; +#define MPI_Grequest_cancel_function hypre_MPI_Grequest_cancel_function; /*-------------------------------------------------------------------------- * Types, etc. @@ -144,6 +151,11 @@ typedef HYPRE_Int hypre_MPI_Group; typedef HYPRE_Int hypre_MPI_Request; typedef HYPRE_Int hypre_MPI_Datatype; typedef void (hypre_MPI_User_function) (void); +typedef void (hypre_MPI_Comm_copy_attr_function) (void); +typedef void (hypre_MPI_Comm_delete_attr_function) (void); +typedef void (hypre_MPI_Grequest_query_function) (void); +typedef void (hypre_MPI_Grequest_free_function) (void); +typedef void (hypre_MPI_Grequest_cancel_function) (void); typedef struct { @@ -152,7 +164,7 @@ typedef struct } hypre_MPI_Status; typedef HYPRE_Int hypre_MPI_Op; -typedef HYPRE_Int hypre_MPI_Aint; +typedef intptr_t hypre_MPI_Aint; typedef HYPRE_Int hypre_MPI_Info; #define hypre_MPI_COMM_SELF 1 @@ -182,6 +194,7 @@ typedef HYPRE_Int hypre_MPI_Info; #define hypre_MPI_BOR 5 #define hypre_MPI_SUCCESS 0 #define hypre_MPI_STATUSES_IGNORE 0 +#define hypre_MPI_STATUS_IGNORE 0 #define hypre_MPI_UNDEFINED -9999 #define hypre_MPI_REQUEST_NULL 0 @@ -189,21 +202,29 @@ typedef HYPRE_Int hypre_MPI_Info; #define hypre_MPI_ANY_SOURCE 1 #define hypre_MPI_ANY_TAG 1 +#define hypre_MPI_COMM_NULL_COPY_FN NULL +#define hypre_MPI_COMM_NULL_DELETE_FN NULL + #else /****************************************************************************** * MPI stubs to do casting of HYPRE_Int and hypre_int correctly *****************************************************************************/ -typedef MPI_Comm hypre_MPI_Comm; -typedef MPI_Group hypre_MPI_Group; -typedef MPI_Request hypre_MPI_Request; -typedef MPI_Datatype hypre_MPI_Datatype; -typedef MPI_Status hypre_MPI_Status; -typedef MPI_Op hypre_MPI_Op; -typedef MPI_Aint hypre_MPI_Aint; -typedef MPI_Info hypre_MPI_Info; -typedef MPI_User_function hypre_MPI_User_function; +typedef MPI_Comm hypre_MPI_Comm; +typedef MPI_Group hypre_MPI_Group; +typedef MPI_Request hypre_MPI_Request; +typedef MPI_Datatype hypre_MPI_Datatype; +typedef MPI_Status hypre_MPI_Status; +typedef MPI_Op hypre_MPI_Op; +typedef MPI_Aint hypre_MPI_Aint; +typedef MPI_Info hypre_MPI_Info; +typedef MPI_User_function hypre_MPI_User_function; +typedef MPI_Comm_copy_attr_function hypre_MPI_Comm_copy_attr_function; +typedef MPI_Comm_delete_attr_function hypre_MPI_Comm_delete_attr_function; +typedef MPI_Grequest_query_function hypre_MPI_Grequest_query_function; +typedef MPI_Grequest_free_function hypre_MPI_Grequest_free_function; +typedef MPI_Grequest_cancel_function hypre_MPI_Grequest_cancel_function; #define hypre_MPI_COMM_WORLD MPI_COMM_WORLD #define hypre_MPI_COMM_NULL MPI_COMM_NULL @@ -231,6 +252,7 @@ typedef MPI_User_function hypre_MPI_User_function; #define hypre_MPI_BOR MPI_BOR #define hypre_MPI_SUCCESS MPI_SUCCESS #define hypre_MPI_STATUSES_IGNORE MPI_STATUSES_IGNORE +#define hypre_MPI_STATUS_IGNORE MPI_STATUS_IGNORE #define hypre_MPI_UNDEFINED MPI_UNDEFINED #define hypre_MPI_REQUEST_NULL MPI_REQUEST_NULL @@ -241,12 +263,52 @@ typedef MPI_User_function hypre_MPI_User_function; #define hypre_MPI_TAG MPI_TAG #define hypre_MPI_LAND MPI_LAND +#define hypre_MPI_COMM_NULL_COPY_FN MPI_COMM_NULL_COPY_FN +#define hypre_MPI_COMM_NULL_DELETE_FN MPI_COMM_NULL_DELETE_FN + #endif /****************************************************************************** * Everything below this applies to both ifdef cases above *****************************************************************************/ +typedef struct +{ + hypre_MPI_Comm comm; + hypre_MemoryLocation send_location; + hypre_MemoryLocation recv_location; + hypre_MemoryLocation send_buffer_location; + hypre_MemoryLocation recv_buffer_location; + void *send_buffer; + void *recv_buffer; + hypre_MPI_Request *pre_send_request; + hypre_MPI_Request *post_recv_request; +} hypre_MPICommWrapper; + +#define hypre_MPICommWrapperComm(wrapper) ((wrapper) -> comm) +#define hypre_MPICommWrapperSendLocation(wrapper) ((wrapper) -> send_location) +#define hypre_MPICommWrapperRecvLocation(wrapper) ((wrapper) -> recv_location) +#define hypre_MPICommWrapperSendBufferLocation(wrapper) ((wrapper) -> send_buffer_location) +#define hypre_MPICommWrapperRecvBufferLocation(wrapper) ((wrapper) -> recv_buffer_location) +#define hypre_MPICommWrapperSendBuffer(wrapper) ((wrapper) -> send_buffer) +#define hypre_MPICommWrapperRecvBuffer(wrapper) ((wrapper) -> recv_buffer) +#define hypre_MPICommWrapperPreSendRequest(wrapper) ((wrapper) -> pre_send_request) +#define hypre_MPICommWrapperPostRecvRequest(wrapper) ((wrapper) -> post_recv_request) + +#define HYPRE_MPI_GREQUEST_FREE 1 +#define HYPRE_MPI_GREQUEST_COPY 2 + +typedef struct +{ + HYPRE_Int count; + HYPRE_Int data_size; + char *data; +} hypre_MPI_GRequest_Action; + +#define hypre_MPI_GRequest_ActionCount(action) ((action) -> count) +#define hypre_MPI_GRequest_ActionDataSize(action) ((action) -> data_size) +#define hypre_MPI_GRequest_ActionData(action) ((action) -> data) + /*-------------------------------------------------------------------------- * Prototypes *--------------------------------------------------------------------------*/ @@ -329,6 +391,7 @@ HYPRE_Int hypre_MPI_Reduce( void *sendbuf, void *recvbuf, HYPRE_Int count, HYPRE_Int hypre_MPI_Scan( void *sendbuf, void *recvbuf, HYPRE_Int count, hypre_MPI_Datatype datatype, hypre_MPI_Op op, hypre_MPI_Comm comm ); HYPRE_Int hypre_MPI_Request_free( hypre_MPI_Request *request ); +HYPRE_Int hypre_MPI_Request_get_status(hypre_MPI_Request request, HYPRE_Int *flag, hypre_MPI_Status *status); HYPRE_Int hypre_MPI_Type_contiguous( HYPRE_Int count, hypre_MPI_Datatype oldtype, hypre_MPI_Datatype *newtype ); HYPRE_Int hypre_MPI_Type_vector( HYPRE_Int count, HYPRE_Int blocklength, HYPRE_Int stride, @@ -343,6 +406,21 @@ HYPRE_Int hypre_MPI_Type_free( hypre_MPI_Datatype *datatype ); HYPRE_Int hypre_MPI_Op_free( hypre_MPI_Op *op ); HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, hypre_MPI_Op *op ); +HYPRE_Int hypre_MPI_GRequestGetCopyAction(void *dest, hypre_MemoryLocation dest_location, + void *src, hypre_MemoryLocation src_location, + HYPRE_Int num_bytes, hypre_MPI_GRequest_Action **action_ptr); +HYPRE_Int hypre_MPI_GRequestProcessAction(hypre_MPI_GRequest_Action *action); +HYPRE_Int hypre_MPI_GRequestDestroyAction(hypre_MPI_GRequest_Action *action); +HYPRE_Int hypre_NeedMPICopyBuffer(hypre_MemoryLocation memory_location); +HYPRE_Int hypre_MPI_Isend_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPICommWrapper *comm, hypre_MPI_Request *requests ); +HYPRE_Int hypre_MPI_Irecv_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPICommWrapper *comm, hypre_MPI_Request *requests ); +HYPRE_Int hypre_MPI_Send_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPICommWrapper *comm, hypre_MPI_Request *requests ); +HYPRE_Int hypre_MPI_Recv_init_Multiple( void *buf, HYPRE_Int num, HYPRE_Int *displs, HYPRE_Int *counts, +hypre_MPI_Datatype datatype, HYPRE_Int *procs, HYPRE_Int tag, hypre_MPICommWrapper *comm, hypre_MPI_Request *requests ); + #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, hypre_MPI_Comm *newcomm); @@ -350,6 +428,50 @@ HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); #endif +HYPRE_Int hypre_MPI_Comm_create_keyval(hypre_MPI_Comm_copy_attr_function *comm_copy_attr_fn, +hypre_MPI_Comm_delete_attr_function *comm_delete_attr_fn, HYPRE_Int *comm_keyval, void *extra_state); +HYPRE_Int hypre_MPI_Comm_free_keyval(HYPRE_Int *comm_keyval); +HYPRE_Int hypre_MPI_Comm_set_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val); +HYPRE_Int hypre_MPI_Comm_get_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval, void *attribute_val, HYPRE_Int *flag); +HYPRE_Int hypre_MPI_Comm_delete_attr(hypre_MPI_Comm comm, HYPRE_Int comm_keyval); +HYPRE_Int hypre_MPI_Grequest_start(hypre_MPI_Grequest_query_function *query_fn, hypre_MPI_Grequest_free_function *free_fn, + hypre_MPI_Grequest_cancel_function *cancel_fn, void *extra_state, hypre_MPI_Request *request); +HYPRE_Int hypre_MPI_Grequest_complete( hypre_MPI_Request request ); +HYPRE_Int hypre_MPI_Type_size(hypre_MPI_Datatype datatype, HYPRE_Int *size); + +hypre_MemoryLocation hypre_MPICommGetSendLocation(hypre_MPICommWrapper *comm); +hypre_MemoryLocation hypre_MPICommGetRecvLocation(hypre_MPICommWrapper *comm); +hypre_MemoryLocation hypre_MPICommGetSendBufferLocation(hypre_MPICommWrapper *comm); +hypre_MemoryLocation hypre_MPICommGetRecvBufferLocation(hypre_MPICommWrapper *comm); +void* hypre_MPICommGetSendBuffer(hypre_MPICommWrapper *comm); +void* hypre_MPICommGetRecvBuffer(hypre_MPICommWrapper *comm); +hypre_MPI_Request* hypre_MPICommGetPreSendRequest(hypre_MPICommWrapper *comm); +hypre_MPI_Request* hypre_MPICommGetPostRecvRequest(hypre_MPICommWrapper *comm); + +HYPRE_Int hypre_MPICommSetSendLocation(hypre_MPICommWrapper *comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvLocation(hypre_MPICommWrapper *comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendBufferLocation(hypre_MPICommWrapper *comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetRecvBufferLocation(hypre_MPICommWrapper *comm, hypre_MemoryLocation); +HYPRE_Int hypre_MPICommSetSendBuffer(hypre_MPICommWrapper *comm, void*); +HYPRE_Int hypre_MPICommSetRecvBuffer(hypre_MPICommWrapper *comm, void*); +HYPRE_Int hypre_MPICommSetPreSendRequest(hypre_MPICommWrapper *comm, hypre_MPI_Request *request); +HYPRE_Int hypre_MPICommSetPostRecvRequest(hypre_MPICommWrapper *comm, hypre_MPI_Request *request); + +HYPRE_Int hypre_MPICommDeleteSendLocation(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteRecvLocation(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteSendBufferLocation(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteRecvBufferLocation(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteSendBuffer(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeleteRecvBuffer(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeletePreSendRequest(hypre_MPICommWrapper *comm); +HYPRE_Int hypre_MPICommDeletePostRecvRequest(hypre_MPICommWrapper *comm); + +hypre_int hypre_grequest_free_fn(void *extra_state); +hypre_int hypre_grequest_query_fn(void *extra_state, hypre_MPI_Status *status); +hypre_int hypre_grequest_noop_cancel_fn(void *extra_state, hypre_int complete); + +hypre_MPICommWrapper *hypre_MPICommWrapperCreate(hypre_MPI_Comm comm); + #ifdef __cplusplus } #endif diff --git a/src/utilities/protos.h b/src/utilities/protos.h index 0fe65c07f7..a24f252fd5 100644 --- a/src/utilities/protos.h +++ b/src/utilities/protos.h @@ -446,6 +446,60 @@ HYPRE_Int hypre_IntArraySeparateByValueDevice( HYPRE_Int num_values, HYPRE_Int * hypre_IntArrayArray *w ); #endif +/* memory.c */ +HYPRE_Int hypre_GetMemoryLocationName(hypre_MemoryLocation memory_location, + char *memory_location_name); +void hypre_CheckMemoryLocation(void *ptr, hypre_MemoryLocation location); +void * hypre_Memset(void *ptr, HYPRE_Int value, size_t num, HYPRE_MemoryLocation location); +void hypre_MemPrefetch(void *ptr, size_t size, HYPRE_MemoryLocation location); +void * hypre_MAlloc(size_t size, HYPRE_MemoryLocation location); +void * hypre_CAlloc( size_t count, size_t elt_size, HYPRE_MemoryLocation location); +void hypre_Free(void *ptr, HYPRE_MemoryLocation location); +void hypre_Memcpy(void *dst, void *src, size_t size, HYPRE_MemoryLocation loc_dst, + HYPRE_MemoryLocation loc_src); +void * hypre_ReAlloc(void *ptr, size_t size, HYPRE_MemoryLocation location); +void * hypre_ReAlloc_v2(void *ptr, size_t old_size, size_t new_size, HYPRE_MemoryLocation location); + +void * _hypre_MAlloc(size_t size, hypre_MemoryLocation location); +void _hypre_Free(void *ptr, hypre_MemoryLocation location); +void _hypre_Memcpy(void *dst, void *src, size_t size, hypre_MemoryLocation loc_dst, + hypre_MemoryLocation loc_src); + +HYPRE_ExecutionPolicy hypre_GetExecPolicy1(HYPRE_MemoryLocation location); +HYPRE_ExecutionPolicy hypre_GetExecPolicy2(HYPRE_MemoryLocation location1, + HYPRE_MemoryLocation location2); + +HYPRE_Int hypre_GetPointerLocation(const void *ptr, hypre_MemoryLocation *memory_location); +HYPRE_Int hypre_SetCubMemPoolSize( hypre_uint bin_growth, hypre_uint min_bin, hypre_uint max_bin, + size_t max_cached_bytes ); +HYPRE_Int hypre_umpire_host_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_host_pooled_free(void *ptr); +void *hypre_umpire_host_pooled_realloc(void *ptr, size_t size); +HYPRE_Int hypre_umpire_device_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_device_pooled_free(void *ptr); +HYPRE_Int hypre_umpire_um_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_um_pooled_free(void *ptr); +HYPRE_Int hypre_umpire_pinned_pooled_allocate(void **ptr, size_t nbytes); +HYPRE_Int hypre_umpire_pinned_pooled_free(void *ptr); +HYPRE_Int hypre_UmpireInit(hypre_Handle *hypre_handle_); +HYPRE_Int hypre_UmpireFinalize(hypre_Handle *hypre_handle_); +HYPRE_Int hypre_UmpireGetCurrentMemoryUsage(MPI_Comm comm, HYPRE_Real *current); +HYPRE_Int hypre_UmpireMemoryGetUsage(HYPRE_Real *memory); +HYPRE_Int hypre_HostMemoryGetUsage(HYPRE_Real *mem); +HYPRE_Int hypre_MemoryPrintUsage(MPI_Comm comm, HYPRE_Int level, + const char *function, HYPRE_Int line); +#define HYPRE_PRINT_MEMORY_USAGE(comm) hypre_MemoryPrintUsage(comm,\ + hypre_HandleLogLevel(hypre_handle()),\ + __func__,\ + __LINE__) +/* memory_dmalloc.c */ +HYPRE_Int hypre_InitMemoryDebugDML( HYPRE_Int id ); +HYPRE_Int hypre_FinalizeMemoryDebugDML( void ); +char *hypre_MAllocDML( HYPRE_Int size, char *file, HYPRE_Int line ); +char *hypre_CAllocDML( HYPRE_Int count, HYPRE_Int elt_size, char *file, HYPRE_Int line ); +char *hypre_ReAllocDML( char *ptr, HYPRE_Int size, char *file, HYPRE_Int line ); +void hypre_FreeDML( char *ptr, char *file, HYPRE_Int line ); + /* memory_tracker.c */ #ifdef HYPRE_USING_MEMORY_TRACKER hypre_MemoryTracker* hypre_memory_tracker(void);