Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
7130fa6
Add MPI_IBM build option
AddyLaddy Oct 25, 2021
de3ddbe
Add option to statically link cudart
AddyLaddy Nov 10, 2021
8274cb4
Merge pull request #96 from NVIDIA/nersc-linkage-fix
AddyLaddy May 26, 2022
51af557
Resync with NCCL 2.13
jbachan Aug 19, 2022
bc5f7cf
Changed top-level Makefile behavior so that BUILDDIR is interpreted
jbachan Jul 7, 2022
a0a1491
Display N/A for error count in AlltoAll in-place test
AddyLaddy Sep 6, 2022
afa4c56
Fix an issue with the last commit when data checking is disabled
AddyLaddy Sep 7, 2022
749573f
Fix preprocessor version check for ncclGetLastError()
AddyLaddy Sep 7, 2022
d313d20
Update NCCL tests
sjeaugey Sep 20, 2022
d22281c
Allow more precise measurements of single operation (#20)
wenkaidu Oct 13, 2022
3ae371c
Merge remote-tracking branch 'nccl-tests/master' into topic/v2.13.4-sync
edgargabriel Oct 14, 2022
641e93e
make rccl-test compile again.
edgargabriel Oct 17, 2022
9a89c30
Allow more precise measurements of single operation (#20)
wenkaidu Oct 13, 2022
4d7cd87
Merge branch 'develop' into topic/v2.13.4-sync
edgargabriel Oct 21, 2022
84e8be8
Merge pull request #21 from ROCmSoftwarePlatform/topic/v2.13.4-sync
edgargabriel Oct 21, 2022
8a754f1
fix a messing endif statement
edgargabriel Oct 25, 2022
fb0d339
Merge pull request #22 from edgargabriel/pr/compile-fix
edgargabriel Oct 25, 2022
9c97467
add the rccl/lib directory to the link path
edgargabriel Oct 31, 2022
a80fbba
Merge pull request #23 from edgargabriel/pr/link-fix
edgargabriel Oct 31, 2022
377b28e
make cmake stage also pass in CI
edgargabriel Oct 31, 2022
a8c920c
Merge pull request #24 from edgargabriel/pr/cmake-fix
edgargabriel Nov 1, 2022
9d3a53d
added std::max to avoid buffer overflow in printing (#25)
akolliasAMD Nov 1, 2022
08f8dc6
Adding the script to build and run the rccl-tests for PTS
PedramAlizadeh Nov 24, 2022
e9f5be1
fix algorithm assigning values in testsuite
edgargabriel Nov 30, 2022
b3f0716
Merge pull request #27 from edgargabriel/topic/half_prod_fix
edgargabriel Dec 1, 2022
2b2f23f
auto-detect and enable MPI
edgargabriel Feb 14, 2023
453e729
Merge pull request #28 from edgargabriel/topic/mpi-auto-compile
edgargabriel Feb 23, 2023
5275aa5
Adding -pthread flag for linking issues into src/Makefile (#30)
PedramAlizadeh Feb 25, 2023
bdf58b1
revamp cmake MPI detection
edgargabriel Mar 2, 2023
0fc25d5
Merge pull request #32 from edgargabriel/topic/mpi-auto-compile
edgargabriel Mar 3, 2023
83a89cb
Merge remote-tracking branch 'origin/develop' into create_scripts_PTS
wenkaidu May 23, 2023
948a664
Fixing hipcc location for CI (#47) (#48)
gilbertlee-amd Sep 28, 2023
491c33c
Fixing another hipcc location
gilbertlee-amd Oct 2, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .jenkins/common.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def runCompileCommand(platform, project, jobName)
${auxiliary.exitIfNotSuccess()}
cd ${project.paths.project_build_prefix}
cmake \
-DCMAKE_CXX_COMPILER=/opt/rocm/hip/bin/hipcc \
-DCMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
-S . -B build
make -C build -j\$(nproc)
${auxiliary.exitIfNotSuccess()}
Expand All @@ -30,9 +30,9 @@ def runTestCommand (platform, project)
def command = """#!/usr/bin/env bash
set -x
cd ${project.paths.project_build_prefix}
python3 -m pip install --upgrade pytest
python3 -m pytest --version
python3 -m pytest -k "not MPI and not host and not fine" --verbose --junitxml=./testreport.xml
python3 -m pip install --upgrade pytest
python3 -m pytest --version
python3 -m pytest -k "not MPI and not host and not fine" --verbose --junitxml=./testreport.xml
"""

platform.runCommand(this, command)
Expand Down
71 changes: 62 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,29 @@
# ########################################################################
# Copyright 2022 Advanced Micro Devices, Inc.
# ########################################################################
#Adding pthread flag for linking
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
macro(check_mpi mpi_compiler mpi_lib_a mpi_lib_so mpi_bin_dir mpi_base_lib_dir mpi_inc_dir)
find_program(MPI_MPICXX ${mpi_compiler} PATHS ${mpi_bin_dir} NO_DEFAULT_PATH)
if (MPI_MPICXX)
message ("-- ${mpi_compiler} found @ ${MPI_MPICXX}")
find_file(MPI_H mpi.h PATHS ${mpi_inc_dir} NO_DEFAULT_PATH)
message ("-- mpi.h is in ${MPI_H}")
find_file(MPI_LIB NAMES ${mpi_lib_so} ${mpi_lib_a} PATHS ${mpi_base_lib_dir} PATH_SUFFIXES lib lib64 lib/x86_64-linux-gnu NO_DEFAULT_PATH)
message ("-- libmpi is ${MPI_LIB}")
if (NOT MPI_H OR NOT MPI_LIB)
set (MPI_MPICXX "MPI_MPICXX-NOTFOUND")
set (MPI_H "MPI_H-NOTFOUND")
set (MPI_LIB "MPI_LIB-NOTFOUND")
else()
add_definitions(-DMPI_SUPPORT)
include_directories(${mpi_inc_dir})
link_libraries(${MPI_LIB})
endif()
else()
message ("-- ${mpi_compiler} not found")
endif()
endmacro()

cmake_minimum_required(VERSION 3.16.3 FATAL_ERROR)

Expand Down Expand Up @@ -30,22 +53,52 @@ include(ROCMCheckTargetIds)
include(ROCMClients)

# Build variables
option(USE_MPI "Build RCCL-tests with MPI support. Requires the MPI path to be set.")
set(MPI_PATH "" CACHE PATH "Path to MPI installation")
option(NO_MPI "Build RCCL-tests without MPI support.")
option(MPI_PATH "Use MPI in the specified directory.")
## Get default GPU targets using rocm_check_target_ids
rocm_check_target_ids(
DEFAULT_AMDGPU_TARGETS
TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx1030"
)
set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for these tests to target.")

# Find the MPI package if we're using MPI
if (USE_MPI)
if(NOT MPI_PATH STREQUAL "")
set(MPI_HOME "${MPI_PATH}")
if (NOT NO_MPI)
# CHECK for MPI Path first. User requested this directory explicitely
if (MPI_PATH)
set(mpi_spec_bin_dir "${MPI_PATH}/bin")
set(mpi_spec_inc_dir "${MPI_PATH}/include")
check_mpi(mpicxx libmpi.a libmpi.so ${mpi_spec_bin_dir} ${MPI_PATH} ${mpi_spec_inc_dir})
if (NOT MPI_MPICXX)
# Since the user explicitely requested this directory, abort if something went wrong.
MESSAGE(FATAL_ERROR "Could not find MPI in ${MPI_PATH}")
endif()
endif()

# Check for MPICH Ubuntu installation
if (NOT MPI_MPICXX)
check_mpi(mpicxx.mpich libmpich.a libmpich.so /usr/bin /usr /usr/include/x86_64-linux-gnu/mpich)
endif()

# Check for Open MPI Ubuntu installation
if (NOT MPI_MPICXX)
check_mpi(mpicxx.openmpi libmpi.a libmpi.so /usr/bin /usr/lib/x86_64-linux-gnu/openmpi /usr/lib/x86_64-linux-gnu/openmpi/include)
endif()

# Check for MPICH RHEL installation
if (NOT MPI_MPICXX)
check_mpi(mpicxx libmpich.a libmpich.so /usr/lib64/mpich/bin /usr/lib64/mpich /usr/include/mpich-x86_64)
endif()

# Check for Open MPI RHEL installation
if (NOT MPI_MPICXX)
check_mpi(mpicxx libmpi.a libmpi.so /usr/lib64/openmpi/bin /usr/lib64/openmpi /usr/include/openmpi-x64_64)
endif()
find_package(MPI REQUIRED MODULE)
add_definitions(-DOMPI_SKIP_MPICXX -DMPI_SUPPORT)

if (NOT MPI_MPICXX)
message ("-- no MPI library found")
endif()
else()
message ("-- MPI support explicitely disabled")
endif()

set(ROCM_USE_DEV_COMPONENT OFF) # This repo doesn't have a dev component
Expand All @@ -55,7 +108,7 @@ add_subdirectory(src)

# Create ROCm standard packages
rocm_create_package(
NAME rccl-separate-tests
NAME rccl-tests
DESCRIPTION "Tests for the ROCm Communication Collectives Library"
MAINTAINER "RCCL Maintainer <[email protected]>"
)
7 changes: 5 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
# See LICENCE.txt for license information
#

BUILDDIR ?= build
override BUILDDIR := $(abspath $(BUILDDIR))

.PHONY : all clean

default : src.build
Expand All @@ -14,7 +17,7 @@ all: ${TARGETS:%=%.build}
clean: ${TARGETS:%=%.clean}

%.build:
${MAKE} -C $* build
${MAKE} -C $* build BUILDDIR=${BUILDDIR}

%.clean:
${MAKE} -C $* clean
${MAKE} -C $* clean BUILDDIR=${BUILDDIR}
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,23 @@ RCCL tests rely on MPI to work on multiple processes, hence multiple nodes. If y
$ make MPI=1 MPI_HOME=/path/to/mpi HIP_HOME=/path/to/hip RCCL_HOME=/path/to/rccl
```

RCCL tests can also be built using cmake. A typical sequence will be:

```shell
$ mkdir build
$ cd build
$ CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_PREFIX_PATH=/path/to/rccl ..
$ make
```

When using the cmake build procedure, please make sure that RCCL has also been built using cmake (i.e. not using the install.sh script), since cmake will check
for cmake target and config files that are created during the RCCL build.

Using the cmake method also has the advantage that the build is automatically checking for MPI installations, i.e. it is not necessary to explicitly request
MPI builds. A user can request to use a particular MPI library by using the MPI_PATH variable. MPI support can be explicitely disabled by adding the -DNO_MPI=1
flag to the cmake command line.


## Usage

RCCL tests can run on multiple processes, multiple threads, and multiple HIP devices per thread. The number of process is managed by MPI and is therefore not passed to the tests as argument. The total number of ranks (=HIP devices) will be equal to (number of processes)\*(number of threads)\*(number of GPUs per thread).
Expand Down
73 changes: 73 additions & 0 deletions scripts/rccl_tests_build_run_PTS.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/bin/bash
echo "This script is for building and running the rccl-tests as well as Unit tests"
echo "Please ensure that the following environment variables are pointing to correct directions!"

########## Set the appropriate directories ##########
export _HIP_HOME=/opt/rocm/hip
export _MPI_HOME=/path/to/mpi/build
export _RCCL_HOME=/opt/rocm/rccl/build

export LD_LIBRARY_PATH=$_MPI_HOME/lib:$LD_LIBRARY_PATH
export PATH=$_MPI_HOME/bin/:$PATH
echo "HIP_HOME=$_HIP_HOME"
echo "MPI_HOME=$_MPI_HOME"
echo "RCCL_HOME=$_RCCL_HOME"

echo "########## Print the system information ##########"
sudo dmidecode | grep "Product Name"
rocm-smi --showtopo

########## Set the number of GPUs ##########
ngpus=8
set -x
########## Build the RCCL-tests benchmark ##########
echo "Do you want to run tests on multiple nodes?"
read -p '(y/n) ' RESPONSE
if [ "$RESPONSE" = "y" ]; then

########## MPI Installation check ##########
MPI_Installed=$(which mpicc)

if [ -z "$MPI_Installed" ]; then
echo "MPI is not installed! Install MPI and set the PATH environment variable to include PATH=/path/to/MPI-install/bin/:$PATH";
exit
else
cd ..
rm -rf rccl-tests
git clone https://github.com/ROCmSoftwarePlatform/rccl-tests.git
cd rccl-tests
make MPI=1 MPI_HOME=$_MPI_HOME HIP_HOME=$_HIP_HOME NCCL_HOME=$_RCCL_HOME
fi
else
cd ..
rm -rf rccl-tests
git clone https://github.com/ROCmSoftwarePlatform/rccl-tests.git
cd rccl-tests
make HIP_HOME=$_HIP_HOME NCCL_HOME=$_RCCL_HOME
fi

########## Run the RCCL-tests benchmark ##########
cd build
echo "Allreduce Test"
./all_reduce_perf -b 8 -e 1G -f 2 -g $ngpus
echo "Broadcast Test"
./broadcast_perf -b 8 -e 1G -f 2 -g $ngpus
echo "Reduce Test"
./reduce_perf -b 8 -e 1G -f 2 -g $ngpus
echo "Reduce_scatter Test"
./reduce_scatter_perf -b 8 -e 1G -f 2 -g $ngpus
echo "Allgather Test"
./all_gather_perf -b 8 -e 1G -f 2 -g $ngpus
echo "Send_Recv Test"
./sendrecv_perf -b 8 -e 1G -f 2 -g $ngpus
echo "Scatter Test"
./scatter_perf -b 8 -e 1G -f 2 -g $ngpus
echo "Gather Test"
./gather_perf -b 8 -e 1G -f 2 -g $ngpus
echo "Alltoall Test"
./alltoall_perf -b 8 -e 1G -f 2 -g $ngpus
echo "Alltoallv Test"
./alltoallv_perf -b 8 -e 1G -f 2 -g $ngpus



4 changes: 2 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# ########################################################################

# Compile common object library
set_property(SOURCE common.cu PROPERTY LANGUAGE CXX)
add_library(rccl_common OBJECT common.cu)
set_property(SOURCE common.cu timer.cc ../verifiable/verifiable.cu PROPERTY LANGUAGE CXX)
add_library(rccl_common OBJECT common.cu timer.cc ../verifiable/verifiable.cu)
if(USE_MPI)
target_link_libraries(rccl_common roc::rccl MPI::MPI_CXX)
else()
Expand Down
29 changes: 18 additions & 11 deletions src/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#
# Copyright (c) 2015-2021, NVIDIA CORPORATION. All rights reserved.
# Modifications are Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
# Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
# Modifications are Copyright (c) 2019-2022 Advanced Micro Devices, Inc. All rights reserved.
#
# See LICENSE.txt for license information
#
Expand All @@ -12,22 +12,21 @@ VERBOSE ?= 0
DEBUG ?= 0
NCCL_HOME ?= ""

HIPCC = $(ROCM_PATH)/hip/bin/hipcc
HIPCC = $(ROCM_PATH)/bin/hipcc
CXX = $(HIPCC)

HIPCUFLAGS := -std=c++14
LDFLAGS :=
HIPLDFLAGS :=

ifneq ($(NCCL_HOME), "")
HIPCUFLAGS += -I$(NCCL_HOME) -I$(NCCL_HOME)/rccl/include
HIPLDFLAGS += -Wl,-rpath,$(NCCL_HOME) -L$(NCCL_HOME)
HIPCUFLAGS += -I$(NCCL_HOME)/ -I$(NCCL_HOME)/include
HIPLDFLAGS += -Wl,-rpath,$(NCCL_HOME) -L$(NCCL_HOME) -L$(NCCL_HOME)/lib
endif
HIPCUFLAGS += -I$(ROCM_PATH)/include
HIPCUFLAGS += -I$(ROCM_PATH)/include/rccl
HIPCUFLAGS += -I$(ROCM_PATH)/hip/include/hip
HIPCUFLAGS += -I$(ROCM_PATH)/include/hip
LDFLAGS += -L$(ROCM_PATH)/lib -lhsa-runtime64 -lrt
HIPLDFLAGS += $(CUSTOM_RCCL_LIB) -L$(ROCM_PATH)/lib -lhsa-runtime64 -lrt
HIPLDFLAGS += $(CUSTOM_RCCL_LIB) -L$(ROCM_PATH)/lib -lhsa-runtime64 -lrt -pthread

ifeq ($(DEBUG), 0)
HIPCUFLAGS += -O3
Expand Down Expand Up @@ -65,15 +64,23 @@ build: ${BIN_FILES}
clean:
rm -rf ${DST_DIR}

${DST_DIR}/%.o: %.cu common.h
TEST_VERIFIABLE_SRCDIR := ../verifiable
TEST_VERIFIABLE_BUILDDIR := $(BUILDDIR)/verifiable
include ../verifiable/verifiable.mk

${DST_DIR}/%.o: %.cu common.h $(TEST_VERIFIABLE_HDRS)
@printf "Compiling %-35s > %s\n" $< $@
@mkdir -p ${DST_DIR}
echo "$(HIPCC) -o $@ $(HIPCUFLAGS) -c $<"
$(HIPCC) -o $@ $(HIPCUFLAGS) -c $<

${DST_DIR}/%_perf:${DST_DIR}/%.o ${DST_DIR}/common.o
${DST_DIR}/timer.o: timer.cc timer.h
@printf "Compiling %-35s > %s\n" $< $@
@mkdir -p ${DST_DIR}
$(CXX) $(CXXFLAGS) -o $@ -c timer.cc

${DST_DIR}/%_perf:${DST_DIR}/%.o ${DST_DIR}/common.o ${DST_DIR}/timer.o $(TEST_VERIFIABLE_OBJS)
@printf "Linking %-35s > %s\n" $< $@
@mkdir -p ${DST_DIR}
echo "$(HIPCC) -o $@ $(HIPCUFLAGS) $^ ${HIPLDFLAGS}"
$(HIPCC) -o $@ $(HIPCUFLAGS) $^ ${HIPLDFLAGS}

36 changes: 12 additions & 24 deletions src/all_gather.cu
Original file line number Diff line number Diff line change
@@ -1,31 +1,22 @@
/*************************************************************************
* Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
* Modifications Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
* Modifications Copyright (c) 2019-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/

#include <hip/hip_runtime.h>
#include "common.h"

void print_header() {
PRINT("# %10s %12s %8s out-of-place in-place \n", "", "", "");
PRINT("# %10s %12s %8s %7s %6s %6s %5s %7s %6s %6s %5s\n", "size", "count", "type",
"time", "algbw", "busbw", "error", "time", "algbw", "busbw", "error");
PRINT("# %10s %12s %8s %7s %6s %6s %5s %7s %6s %6s %5s\n", "(B)", "(elements)", "",
"(us)", "(GB/s)", "(GB/s)", "", "(us)", "(GB/s)", "(GB/s)", "");
}

void print_line_header (size_t size, size_t count, const char *typeName, const char *opName, int root) {
PRINT("%12li %12li %8s", size, count, typeName);
}
#define ALIGN 4

void AllGatherGetCollByteCount(size_t *sendcount, size_t *recvcount, size_t *paramcount, size_t *sendInplaceOffset, size_t *recvInplaceOffset, size_t count, int nranks) {
*sendcount = count/nranks;
*recvcount = (count/nranks)*nranks;
*sendInplaceOffset = count/nranks;
size_t base = (count/(ALIGN*nranks))*ALIGN;
*sendcount = base;
*recvcount = base*nranks;
*sendInplaceOffset = base;
*recvInplaceOffset = 0;
*paramcount = *sendcount;
*paramcount = base;
}

testResult_t AllGatherInitData(struct threadArgs* args, ncclDataType_t type, ncclRedOp_t op, int root, int rep, int in_place) {
Expand All @@ -35,18 +26,15 @@ testResult_t AllGatherInitData(struct threadArgs* args, ncclDataType_t type, ncc

int k=0;
for (int i=0; i<args->nGpus; i++) {
int gpuid = args->localRank*args->nThreads*args->nGpus + args->thread*args->nGpus + i;
if (args->enable_multiranks)
gpuid = gpuid % args->localNumDevices;
HIPCHECK(hipSetDevice(gpuid));
HIPCHECK(hipSetDevice(args->gpus[i]));

for (int l=0; l<args->nRanks; l++) {
int rank = ((args->proc*args->nThreads + args->thread)*args->nGpus*args->nRanks + i*args->nRanks + l);
HIPCHECK(hipMemset(args->recvbuffs[k], 0, args->expectedBytes));
void* data = in_place ? ((char*)args->recvbuffs[k])+rank*args->sendBytes : args->sendbuffs[k];
TESTCHECK(InitData(data, sendcount, type, rep, rank));
TESTCHECK(InitData(data, sendcount, 0, type, ncclSum, 33*rep + rank, 1, 0));
for (int j=0; j<nranks; j++) {
TESTCHECK(InitData(((char*)args->expected[k])+args->sendBytes*j, sendcount, type, rep, j));
TESTCHECK(InitData(((char*)args->expected[k])+args->sendBytes*j, sendcount, 0, type, ncclSum, 33*rep + j, 1, 0));
}
k++;
}
Expand Down Expand Up @@ -98,7 +86,7 @@ testResult_t AllGatherRunTest(struct threadArgs* args, int root, ncclDataType_t
}

for (int i=0; i<type_count; i++) {
TESTCHECK(TimeTest(args, run_types[i], run_typenames[i], (ncclRedOp_t)0, "", -1));
TESTCHECK(TimeTest(args, run_types[i], run_typenames[i], (ncclRedOp_t)0, "none", -1));
}
return testSuccess;
}
Expand Down
Loading