@@ -11,10 +11,11 @@ HPCX_HOME ?= /opt/hpcx
11
11
CUDA_VER ?= $(shell nvcc --version | grep 'release' | awk '{print $$6}' | cut -c2- | cut -d '.' -f1-2)
12
12
ROCBLAS_BRANCH ?= rocm-$(shell dpkg -l | grep 'rocm-dev ' | awk '{print $$3}' | cut -d '.' -f1-3)
13
13
14
- .PHONY : all cuda rocm common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest rocm_perftest fio rocm_rccl_tests rocm_rocblas rocm_bandwidthTest gpcnet cuda_gpuburn cpu_stream cpu_hpl directx_amf_encoding_latency directx_amd rocm_hipblaslt megatron_lm megatron_deepspeed
14
+ .PHONY : all cuda_with_msccl cuda rocm common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest cuda_msccl rocm_perftest fio rocm_rccl_tests rocm_rocblas rocm_bandwidthTest gpcnet cuda_gpuburn cpu_stream cpu_hpl directx_amf_encoding_latency directx_amd rocm_hipblaslt megatron_lm megatron_deepspeed
15
15
16
16
# Build all targets.
17
17
all : cuda rocm
18
+ cuda_with_msccl : cuda cuda_msccl
18
19
cuda : common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest gpcnet cuda_gpuburn megatron_lm megatron_deepspeed
19
20
rocm : common rocm_perftest rocm_rccl_tests rocm_rocblas rocm_bandwidthTest rocm_hipblaslt megatron_deepspeed
20
21
cpu : common cpu_perftest
@@ -188,3 +189,26 @@ megatron_deepspeed:
188
189
cd Megatron && \
189
190
python -m pip install -r requirements.txt && \
190
191
python -m pip install DeepSpeed
192
+
193
+ # Build MSCCL for CUDA
194
+ cuda_msccl : sb_micro_path
195
+ ifneq (,$(wildcard msccl/executor/msccl-executor-nccl/Makefile) )
196
+ cd ./msccl/executor/msccl-executor-nccl && \
197
+ make -j4 src.build && \
198
+ cd ../../..
199
+ mkdir -p $(SB_MICRO_PATH)/lib/msccl-executor-nccl && \
200
+ cp -r -v ./msccl/executor/msccl-executor-nccl/build/* $(SB_MICRO_PATH)/lib/msccl-executor-nccl/
201
+ endif
202
+ ifneq (,$(wildcard msccl/scheduler/msccl-scheduler/Makefile) )
203
+ cd ./msccl/scheduler/msccl-scheduler && \
204
+ CXX=nvcc BIN_HOME=$(SB_MICRO_PATH)/lib/msccl-executor-nccl SRC_HOME=../../../msccl/executor/msccl-executor-nccl make -j4 && \
205
+ cd ../../..
206
+ mkdir -p $(SB_MICRO_PATH)/lib/msccl-scheduler && \
207
+ cp -r -v ./msccl/scheduler/msccl-scheduler/build/* $(SB_MICRO_PATH)/lib/msccl-scheduler/
208
+ endif
209
+ ifneq (,$(wildcard msccl/tests/msccl-tests-nccl/Makefile) )
210
+ cd ./msccl/tests/msccl-tests-nccl && \
211
+ make MPI=1 MPI_HOME=$(MPI_HOME) NCCL_HOME=$(SB_MICRO_PATH)/lib/msccl-executor-nccl -j4 && cd ../../..
212
+ mkdir -p $(SB_MICRO_PATH)/bin/msccl-tests-nccl && \
213
+ cp -r -v ./msccl/tests/msccl-tests-nccl/build/* $(SB_MICRO_PATH)/bin/msccl-tests-nccl/
214
+ endif
0 commit comments