diff --git a/src/exchangeDonors.C b/src/exchangeDonors.C index 4e50f66..309b3cd 100644 --- a/src/exchangeDonors.C +++ b/src/exchangeDonors.C @@ -35,7 +35,6 @@ void tioga::exchangeDonors(void) // and receiving // pc->getMap(&nsend,&nrecv,&sndMap,&rcvMap); - if (nsend == 0) return; // // create packets to send and receive // and initialize them to zero @@ -74,7 +73,8 @@ void tioga::exchangeDonors(void) // // communicate donors (comm1) // - pc->sendRecvPackets(sndPack,rcvPack); + pc->sendRecvPackets2(sndPack,rcvPack); + if (nsend == 0) return; // Initialize linked lists and populate donor data from rcvPack for (int ib=0;ib 0) { rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints); } @@ -212,42 +209,166 @@ void parallelComm::sendRecvPacketsAll(PACKET *sndPack, PACKET *rcvPack) // TIOGA_FREE(status); // } -void parallelComm::sendRecvPackets(PACKET *sndPack,PACKET *rcvPack) +void parallelComm::sendRecvPackets2(PACKET *sndPack,PACKET *rcvPack) { - int i; - int *scount,*rcount; - int tag,irnum; - MPI_Request *request; - MPI_Status *status; + int *sint=(int *)malloc(sizeof(int)*numprocs); + int *sreal=(int *) malloc(sizeof(int)*numprocs); + int *rint=(int *)malloc(sizeof(int)*numprocs); + int *rreal=(int *) malloc(sizeof(int)*numprocs); + // remove when using stl vectors and just init the vectors to 0 + for(int i=0;i snd_int_displs(numprocs+1, 0); + std::vector rcv_int_displs(numprocs+1, 0); + for (int i=1; i <= numprocs; i++) { + snd_int_displs[i] = snd_int_displs[i-1] + sint[i-1]; + rcv_int_displs[i] = rcv_int_displs[i-1] + rint[i-1]; + } + for (int i=0; i < nsend; i++) { + int displ = snd_int_displs[sndMap[i]]; + for(int j=0; j < sint[sndMap[i]]; j++){ + all_snd_intData[displ+j] = sndPack[i].intData[j]; + } + } + MPI_Request int_request; + MPI_Ialltoallv(all_snd_intData, + sint, + snd_int_displs.data(), + MPI_INT, + all_rcv_intData, + rint, + rcv_int_displs.data(), + MPI_INT, + scomm, + &int_request); + + int all_snd_nreals = std::accumulate(sreal, sreal + numprocs, 0); + int all_rcv_nreals = std::accumulate(rreal, rreal + numprocs, 0); + REAL *all_snd_realData, *all_rcv_realData; + all_snd_realData=(REAL *) malloc(sizeof(REAL)*all_snd_nreals); + all_rcv_realData=(REAL *) malloc(sizeof(REAL)*all_rcv_nreals); + for (int i = 0; i < all_snd_nreals; i++) { + all_snd_realData[i] = 0; + } + for (int i = 0; i < all_rcv_nreals; i++) { + all_rcv_realData[i] = 0; + } + std::vector snd_real_displs(numprocs+1, 0); + std::vector rcv_real_displs(numprocs+1, 0); + for (int i=1; i <= numprocs; i++) { + snd_real_displs[i] = snd_real_displs[i-1] + sreal[i-1]; + rcv_real_displs[i] = rcv_real_displs[i-1] + rreal[i-1]; + } + for (int i=0; i < nsend; i++) { + int displ = snd_real_displs[sndMap[i]]; + for(int j=0; j < sreal[sndMap[i]]; j++){ + all_snd_realData[displ+j] = sndPack[i].realData[j]; + } + } + MPI_Request real_request; + MPI_Ialltoallv(all_snd_realData, + sreal, + snd_real_displs.data(), + MPI_DOUBLE, + all_rcv_realData, + rreal, + rcv_real_displs.data(), + MPI_DOUBLE, + scomm, + &real_request); + + // FIXME: here and above I think I should move this a bit lower + MPI_Wait(&int_request, MPI_STATUS_IGNORE); + for(int i=0;i 0) { + rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints); + } + if (rcvPack[i].nreals > 0) { + rcvPack[i].realData=(REAL *) malloc(sizeof(REAL)*rcvPack[i].nreals); + } + } + + // FIXME: here and above I think I should move this a bit lower + MPI_Wait(&real_request, MPI_STATUS_IGNORE); + for (int i=0; i < nrecv; i++) { + int displ = rcv_int_displs[rcvMap[i]]; + for(int j=0; j < rint[rcvMap[i]]; j++){ + rcvPack[i].intData[j] = all_rcv_intData[displ+j]; + } + } + for (int i=0; i < nrecv; i++) { + int displ = rcv_real_displs[rcvMap[i]]; + for(int j=0; j < rreal[rcvMap[i]]; j++){ + rcvPack[i].realData[j] = all_rcv_realData[displ+j]; + } + } + + TIOGA_FREE(all_snd_intData); + TIOGA_FREE(all_rcv_intData); + TIOGA_FREE(all_snd_realData); + TIOGA_FREE(all_rcv_realData); + TIOGA_FREE(sint); + TIOGA_FREE(sreal); + TIOGA_FREE(rint); + TIOGA_FREE(rreal); +} + +void parallelComm::sendRecvPackets(PACKET *sndPack,PACKET *rcvPack) +{ + int *scount=(int *)malloc(2*sizeof(int)*nsend); + int *rcount=(int *) malloc(2*sizeof(int)*nrecv); + MPI_Request *request=(MPI_Request *) malloc(sizeof(MPI_Request)*2*(nsend+nrecv)); + MPI_Status *status=(MPI_Status *) malloc(sizeof(MPI_Status)*2*(nsend+nrecv)); + // + for(int i=0;i 0) { tag=1; @@ -265,7 +386,7 @@ void parallelComm::sendRecvPackets(PACKET *sndPack,PACKET *rcvPack) } } // - for(i=0;i 0){ tag=1; @@ -290,34 +411,28 @@ void parallelComm::sendRecvPackets(PACKET *sndPack,PACKET *rcvPack) void parallelComm::sendRecvPacketsCheck(PACKET *sndPack,PACKET *rcvPack) { - int i; - int *scount,*rcount; - int tag,irnum; - MPI_Request *request; - MPI_Status *status; + int *scount=(int *)malloc(2*sizeof(int)*nsend); + int *rcount=(int *) malloc(2*sizeof(int)*nrecv); + MPI_Request *request=(MPI_Request *) malloc(sizeof(MPI_Request)*2*(nsend+nrecv)); + MPI_Status *status=(MPI_Status *) malloc(sizeof(MPI_Status)*2*(nsend+nrecv)); // - scount=(int *)malloc(2*sizeof(int)*nsend); - rcount=(int *) malloc(2*sizeof(int)*nrecv); - request=(MPI_Request *) malloc(sizeof(MPI_Request)*2*(nsend+nrecv)); - status=(MPI_Status *) malloc(sizeof(MPI_Status)*2*(nsend+nrecv)); - // - for(i=0;i 0) { tag=1; @@ -351,7 +466,7 @@ void parallelComm::sendRecvPacketsCheck(PACKET *sndPack,PACKET *rcvPack) } } // - for(i=0;i 0){ tag=1; @@ -376,8 +491,6 @@ void parallelComm::sendRecvPacketsCheck(PACKET *sndPack,PACKET *rcvPack) void parallelComm::setMap(int ns,int nr, int *snd,int *rcv) { - int i; - // if (sndMap) TIOGA_FREE(sndMap); sndMap=NULL; if (rcvMap) TIOGA_FREE(rcvMap); rcvMap=NULL; // @@ -386,8 +499,8 @@ void parallelComm::setMap(int ns,int nr, int *snd,int *rcv) sndMap=(int *) malloc(sizeof(int)*nsend); rcvMap=(int *) malloc(sizeof(int)*nrecv); // - for(i=0;imyTimer("tioga::exchangeDonors",1); //this->reduce_fringes(); //outputStatistics(); - MPI_Allreduce(&ihigh,&ihighGlobal,1,MPI_INT,MPI_MAX,scomm); + //MPI_Allreduce(&ihigh,&ihighGlobal,1,MPI_INT,MPI_MAX,scomm); + assert(ihigh == 0); + ihighGlobal = 0; //if (ihighGlobal) { this->myTimer("tioga::getCellIblanks",0); for (int ib=0;ib