Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ All tests support the same set of arguments :
* `-u,--cumask <d0,d1,d2,d3>` Default: None
* Performance
* `-n,--iters <iteration count>` number of iterations. Default : 20.
* `-w,--warmup_iters <warmup iteration count>` number of warmup iterations (not timed). Default : 5.
* `-w,--warmup_iters <warmup iteration count>` number of warmup iterations (not timed). Default : 1.
* `-m,--agg_iters <aggregation count>` number of operations to aggregate together in each iteration. Default : 1.
* `-N,--run_cycles <cycle count>` run & print each cycle. Default : 1; 0=infinite.
* `-a,--average <0/1/2/3>` Report performance as an average across all ranks (MPI=1 only). <0=Rank0,1=Avg,2=Min,3=Max>. Default : 1.
Expand All @@ -141,6 +141,7 @@ All tests support the same set of arguments :
* `-G,--hipgraph <num graph launches>` Capture iterations as a HIP graph and then replay specified number of times. Default : 0.
* `-C,--report_cputime <0/1>]` Report CPU time instead of latency. Default : 0.
* `-R,--local_register <0/1/2>` enable local (1) or symmetric (2) buffer registration on send/recv buffers. Default : 0.
* `-S,--report_timestamps <0/1>` Add timestamp ("%Y-%m-%d %H:%M:%S") to each performance report line. Default : 0.
* `-T,--timeout <time in seconds>` timeout each test after specified number of seconds. Default : disabled.
* `-F,--cache_flush <cache flush after every -F iteration>` Enable cache flush after every -F iteration. Default : 0 (No cache flush).
* `-O,--out_of_place <0=in-place only, 1=out-of-place only>`. Default: both.
Expand Down
8 changes: 4 additions & 4 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -169,13 +169,13 @@ ${HIPIFY_DIR}/%.h: %.h

.PRECIOUS: ${DST_DIR}/%.o

${DST_DIR}/%.o: ${HIPIFY_DIR}/%.cu.cpp ${HIPIFY_DIR}/common.h $(TEST_VERIFIABLE_HDRS) $(GIT_VERSION_FILE)
${DST_DIR}/%.o: ${HIPIFY_DIR}/%.cu.cpp ${HIPIFY_DIR}/common.h ${HIPIFY_DIR}/util.h $(TEST_VERIFIABLE_HDRS) $(GIT_VERSION_FILE)
@printf "Compiling %-35s > %s\n" $< $@
@mkdir -p ${DST_DIR}
echo "$(HIPCC) $(HIPCUFLAGS) -I. -c -o $@ $<"
$(HIPCC) $(HIPCUFLAGS) -I. -c -o $@ $<

${DST_DIR}/%$(NAME_SUFFIX).o: %.cu.cpp ${HIPIFY_DIR}/common.h $(TEST_VERIFIABLE_HDRS) $(GIT_VERSION_FILE)
${DST_DIR}/%$(NAME_SUFFIX).o: %.cu.cpp ${HIPIFY_DIR}/common.h ${HIPIFY_DIR}/util.h $(TEST_VERIFIABLE_HDRS) $(GIT_VERSION_FILE)
@printf "Compiling %-35s > %s\n" $< $@
@mkdir -p ${DST_DIR}
echo "$(HIPCC) $(HIPCUFLAGS) -I. -c -o $@ $<"
Expand All @@ -187,13 +187,13 @@ ${DST_DIR}/timer.o: timer.cc timer.h
$(CXX) $(CXXFLAGS) -o $@ -c $<

ifeq ($(DSO), 1)
${DST_DIR}/%_perf$(NAME_SUFFIX): ${DST_DIR}/%.o ${DST_DIR}/common$(NAME_SUFFIX).o ${DST_DIR}/timer.o $(TEST_VERIFIABLE_LIBS) $(DST_DIR)/src/git_version.cpp
${DST_DIR}/%_perf$(NAME_SUFFIX): ${DST_DIR}/%.o ${DST_DIR}/common$(NAME_SUFFIX).o ${DST_DIR}/util$(NAME_SUFFIX).o ${DST_DIR}/timer.o $(TEST_VERIFIABLE_LIBS) $(DST_DIR)/src/git_version.cpp
@printf "Linking %-35s > %s\n" $< $@
@mkdir -p ${DST_DIR}
echo "$(HIPCC) -o $@ $^ $(HIPLDFLAGS)"
$(HIPCC) -o $@ $^ $(HIPLDFLAGS) -L$(TEST_VERIFIABLE_BUILDDIR) -lverifiable -Xlinker "--enable-new-dtags" -Xlinker "-rpath,\$$ORIGIN:\$$ORIGIN/verifiable"
else
${DST_DIR}/%_perf$(NAME_SUFFIX):${DST_DIR}/%.o ${DST_DIR}/common$(NAME_SUFFIX).o ${DST_DIR}/timer.o $(TEST_VERIFIABLE_OBJS) $(DST_DIR)/src/git_version.cpp
${DST_DIR}/%_perf$(NAME_SUFFIX):${DST_DIR}/%.o ${DST_DIR}/common$(NAME_SUFFIX).o ${DST_DIR}/util$(NAME_SUFFIX).o ${DST_DIR}/timer.o $(TEST_VERIFIABLE_OBJS) $(DST_DIR)/src/git_version.cpp
@printf "Linking %-35s > %s\n" $< $@
@mkdir -p ${DST_DIR}
echo "$(HIPCC) -o $@ $^ $(HIPLDFLAGS)"
Expand Down
14 changes: 12 additions & 2 deletions src/all_gather.cu
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,14 @@ void AllGatherGetBw(size_t count, int typesize, double sec, double* algBw, doubl
*busBw = baseBw * factor;
}

testResult_t AllGatherRunColl(void* sendbuff, void* recvbuff, size_t count, ncclDataType_t type, ncclRedOp_t op, int root, ncclComm_t comm, cudaStream_t stream, void* bias = nullptr) {
NCCLCHECK(ncclAllGather(sendbuff, recvbuff, count, type, comm, stream));
testResult_t AllGatherRunColl(void* sendbuff, size_t sendoffset,void* recvbuff, size_t recvoffset, size_t count, ncclDataType_t type, ncclRedOp_t op, int root, ncclComm_t comm, cudaStream_t stream, int deviceImpl, void* bias = nullptr) {
if (deviceImpl == 0) {
char* sptr = (char*)sendbuff + sendoffset;
char* rptr = (char*)recvbuff + recvoffset;
NCCLCHECK(ncclAllGather(sptr, rptr, count, type, comm, stream));
} else {
return testNotImplemented;
}
return testSuccess;
}

Expand Down Expand Up @@ -97,3 +103,7 @@ struct testEngine ncclTestEngine = {
AllGatherGetBuffSize,
AllGatherRunTest
};
// struct testEngine allGatherEngine = {
// .getBuffSize = AllGatherGetBuffSize,
// .runTest = AllGatherRunTest
// };
Loading