diff --git a/src/Makefile b/src/Makefile index 2814646..7c0d1f8 100644 --- a/src/Makefile +++ b/src/Makefile @@ -49,8 +49,8 @@ ifeq ($(MPI), 1) HIPCUFLAGS += -DMPI_SUPPORT -I${MPI_HOME}/include -I${MPI_HOME}/include/mpi HIPLDFLAGS += -L${MPI_HOME}/lib -lmpi else ifeq ($(MPICH), 1) -HIPCUFLAGS += -DMPI_SUPPORT -I/usr/include/mpich -I/usr/include/x86_64-linux-gnu/mpich -HIPLDFLAGS += -L/usr/lib -lmpich +HIPCUFLAGS += -DMPI_SUPPORT -I/usr/include/mpich -I/usr/include/x86_64-linux-gnu/mpich -I/usr/include/mpich-x86_64/ +HIPLDFLAGS += -L/usr/lib -L/usr/lib64/mpich/lib/ -lmpich endif LIBRARIES += rccl diff --git a/src/alltoallv.cu b/src/alltoallv.cu index 5bab307..b9f4d32 100644 --- a/src/alltoallv.cu +++ b/src/alltoallv.cu @@ -8,7 +8,7 @@ #include "cuda_runtime.h" #include "common.h" -#define USE_RCCL_GATHER_SCATTER +//#define USE_RCCL_GATHER_SCATTER void AlltoAllvGetCollByteCount(size_t *sendcount, size_t *recvcount, size_t *paramcount, size_t *sendInplaceOffset, size_t *recvInplaceOffset, size_t count, int nranks) { if (count < nranks*nranks/2) { @@ -54,12 +54,12 @@ testResult_t AlltoAllvInitData(struct threadArgs* args, ncclDataType_t type, ncc size_t chunksize = data_count/nranks; for (int j=0; j