-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathCMakeLists.txt
431 lines (392 loc) · 18.5 KB
/
CMakeLists.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
if(POLICY CMP0074)
# 1. Introduced with 3.12.4.
# 2. *_ROOT variables will be checked
cmake_policy(SET CMP0074 NEW)
endif()
find_program(CCACHE_PROGRAM ccache)
if(CCACHE_PROGRAM)
set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
else()
message(STATUS "Could not find CCache. Consider installing CCache to speed up compilation.")
endif()
project(horovod CXX)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
# Configure path to modules (for find_package)
set(CMAKE_MODULE_PATH
${CMAKE_MODULE_PATH}
"${PROJECT_SOURCE_DIR}/cmake/Modules/"
"${PROJECT_SOURCE_DIR}/cmake/upstream/")
include(cmake/Utilities.cmake)
create_metadata()
# 3rd-parties
include_directories("third_party/HTTPRequest/include"
"third_party/boost/assert/include"
"third_party/boost/config/include"
"third_party/boost/core/include"
"third_party/boost/detail/include"
"third_party/boost/iterator/include"
"third_party/boost/lockfree/include"
"third_party/boost/mpl/include"
"third_party/boost/parameter/include"
"third_party/boost/predef/include"
"third_party/boost/preprocessor/include"
"third_party/boost/static_assert/include"
"third_party/boost/type_traits/include"
"third_party/boost/utility/include"
"third_party/lbfgs/include")
# Predefined Eigen and Flatbuffers headers path, they could be replaced by tensorflow headers path
set(EIGEN_INCLUDE_PATH "${PROJECT_SOURCE_DIR}/third_party/eigen")
set(FLATBUFFERS_INCLUDE_PATH "${PROJECT_SOURCE_DIR}/third_party/flatbuffers/include")
# Sources
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/common.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/controller.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/fusion_buffer_manager.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/group_table.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/half.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/logging.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/message.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/operations.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/parameter_manager.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/process_set.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/response_cache.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/stall_inspector.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/thread_pool.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/timeline.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/tensor_queue.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/collective_operations.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/operation_manager.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/optim/bayesian_optimization.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/optim/gaussian_process.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/utils/env_parser.cc")
# Default Macro
add_definitions(-DEIGEN_MPL2_ONLY=1)
# Remove platform default std
string(REGEX REPLACE "-std=[^ ]+" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
# Pickup ar from environmental variable if available
if(DEFINED ENV{AR})
set(CMAKE_AR $ENV{AR})
endif()
# Add default project CXX flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -fPIC -Wall -ftree-vectorize")
# RelWithDebInfo uses -O2, prefer performance over debug info in RelWithDebInfo build type
string(REPLACE "-O2" "-O3" CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO}")
string(REPLACE "-O2" "-O3" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
string(REPLACE "-O2" "-O3" CMAKE_CUDA_FLAGS_RELWITHDEBINFO "${CMAKE_CUDA_FLAGS_RELWITHDEBINFO}")
# Add architecture specific optimization flags
set(ARCH_FLAGS "-mf16c" "-mavx" "-mfma")
set_build_arch_flags("${ARCH_FLAGS}")
# Specify Horovod exports
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -undefined dynamic_lookup -Wl,-exported_symbols_list,${CMAKE_SOURCE_DIR}/horovod.exp")
set(CMAKE_MACOSX_RPATH TRUE)
else()
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--version-script=${CMAKE_SOURCE_DIR}/horovod.lds -Wl,-Bsymbolic-functions -Wl,-z,relro,-z,now")
endif()
if(CMAKE_CXX_COMPILER MATCHES "icpx$")
set(ENV{HOROVOD_GPU} "SYCL")
set(ENV{HOROVOD_GPU_OPERATIONS} "CCL")
set(ENV{HOROVOD_WITHOUT_GLOO} "1")
endif()
# GPU Operations
set(HOROVOD_GPU $ENV{HOROVOD_GPU})
set(HOROVOD_GPU_OPERATIONS $ENV{HOROVOD_GPU_OPERATIONS})
if(DEFINED HOROVOD_GPU_OPERATIONS AND NOT "${HOROVOD_GPU_OPERATIONS}" MATCHES "^(MPI|NCCL|CCL)$")
message(FATAL_ERROR "HOROVOD_GPU_OPERATIONS=${HOROVOD_GPU_OPERATIONS} is invalid, supported values are '', 'MPI', 'CCL' and 'NCCL'.")
endif()
set_gpu_op(HOROVOD_GPU_ALLREDUCE "MPI;NCCL;CCL;DDL")
set_gpu_op(HOROVOD_GPU_ALLGATHER "MPI;NCCL;CCL")
set_gpu_op(HOROVOD_GPU_BROADCAST "MPI;NCCL;CCL")
set_gpu_op(HOROVOD_GPU_ALLTOALL "MPI;NCCL;CCL")
set_gpu_op(HOROVOD_GPU_REDUCESCATTER "MPI;NCCL;CCL")
foreach(VAR in ITEMS HOROVOD_GPU_ALLREDUCE HOROVOD_GPU_ALLGATHER HOROVOD_GPU_BROADCAST HOROVOD_GPU_ALLTOALL HOROVOD_GPU_REDUCESCATTER)
if(DEFINED ${VAR})
string(SUBSTRING ${${VAR}} 0 1 ${VAR})
convert_to_ascii_dec(ASCII_DEC ${${VAR}})
add_definitions(-D${VAR}=${ASCII_DEC})
endif()
endforeach()
# PYTHON
if(NOT PYTHON_EXECUTABLE)
find_package(Python 3.6 COMPONENTS Interpreter REQUIRED)
set(PY_EXE ${Python_EXECUTABLE})
else()
set(PY_EXE ${PYTHON_EXECUTABLE})
endif()
message(STATUS "Using command ${PY_EXE}")
# MPI
if (NOT "$ENV{HOROVOD_WITHOUT_MPI}" STREQUAL "1")
set(MPI_REQUIRED "")
if ("$ENV{HOROVOD_WITH_MPI}" STREQUAL "1")
set(MPI_REQUIRED "REQUIRED")
endif ()
find_package(MPI ${MPI_REQUIRED})
if(MPI_FOUND)
include_directories(SYSTEM ${MPI_INCLUDE_PATH})
list(APPEND LINKER_LIBS ${MPI_LIBRARIES})
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/mpi/mpi_context.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/mpi/mpi_controller.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/mpi_operations.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/adasum/adasum_mpi.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/adasum_mpi_operations.cc")
add_definitions(-DHAVE_MPI=1)
set(HAVE_MPI TRUE)
endif()
endif()
# oneCCL
set(CCL_ROOT $ENV{CCL_ROOT})
if(DEFINED CCL_ROOT)
set(USE_CCL TRUE)
endif()
# CUDA and ROCM
set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}")
if(NOT DEFINED CMAKE_CUDA_RUNTIME_LIBRARY)
set(CMAKE_CUDA_RUNTIME_LIBRARY "Static") # Set to "Static" or "Shared", effective from CMake 3.17
endif()
if(DEFINED ENV{HOROVOD_CUDA_HOME})
set(CMAKE_CUDA_COMPILER "$ENV{HOROVOD_CUDA_HOME}/bin/nvcc")
endif()
include(CheckLanguage)
check_language(CUDA)
if(NOT CMAKE_CUDA_COMPILER)
find_package(CUDAToolkit)
if(CUDAToolkit_BIN_DIR)
message("CUDA compiler was not found in $PATH, but searching again in CUDA Toolkit binary directory")
unset(CMAKE_CUDA_COMPILER CACHE) # need to clear this from cache, else some versions of CMake go into an infinite loop
set(CMAKE_CUDA_COMPILER "${CUDAToolkit_BIN_DIR}/nvcc")
check_language(CUDA)
endif()
endif()
if(CMAKE_CUDA_COMPILER)
if((CMAKE_CXX_COMPILER_ID MATCHES GNU) AND (CMAKE_SYSTEM_PROCESSOR MATCHES ppc64le))
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -std=c++11")
endif()
endif()
enable_language(CUDA)
endif()
macro(ADD_CUDA)
find_package(CUDAToolkit REQUIRED)
include_directories(SYSTEM ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/cuda_operations.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/gpu_operations.cc")
# CUDA + MPI
if(HAVE_MPI)
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/mpi_gpu_operations.cc")
endif()
add_definitions(-DHAVE_CUDA=1 -DHAVE_GPU=1)
set(HAVE_CUDA TRUE)
if(NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
set(HAVE_SUB_PROJECT_CUDA TRUE PARENT_SCOPE)
endif()
endmacro()
macro(ADD_SYCL)
find_package(IntelSYCL REQUIRED)
if (IntelSYCL_FOUND)
add_definitions(-DHAVE_SYCL=1)
set(HAVE_SYCL TRUE)
endif ()
if (NOT USE_CCL)
message(FATAL_ERROR "oneCCL is not installed.")
endif ()
set(SYCL_COMPILE_FLAGS "-fsycl -fno-finite-math-only -fstack-protector")
set(CMAKE_CXX_FLAGS "${SYCL_COMPILE_FLAGS} ${CMAKE_CXX_FLAGS}")
add_definitions(-DHAVE_GPU=1)
set(USE_GPU_CCL TRUE)
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/sycl_operations.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/gpu_operations.cc")
endmacro()
if(DEFINED HOROVOD_GPU_ALLREDUCE OR DEFINED HOROVOD_GPU_ALLGATHER OR DEFINED HOROVOD_GPU_BROADCAST OR DEFINED HOROVOD_GPU_ALLTOALL OR DEFINED HOROVOD_GPU_REDUCESCATTER)
if(NOT DEFINED HOROVOD_GPU OR HOROVOD_GPU STREQUAL "CUDA")
add_cuda()
elseif(HOROVOD_GPU STREQUAL "ROCM")
find_package(ROCM REQUIRED)
include_directories(SYSTEM ${ROCM_INCLUDE_DIRS})
list(APPEND LINKER_LIBS ${ROCM_LIBRARIES})
set(CMAKE_CXX_FLAGS "${ROCM_COMPILE_FLAGS} ${CMAKE_CXX_FLAGS}")
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/hip_operations.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/gpu_operations.cc")
if(HAVE_MPI)
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/mpi_gpu_operations.cc")
endif()
add_definitions(-DHAVE_ROCM=1 -DHAVE_GPU=1)
set(HAVE_ROCM TRUE)
elseif(HOROVOD_GPU STREQUAL "SYCL")
add_sycl()
else()
message(FATAL_ERROR "Unknown HOROVOD_GPU type: ${HOROVOD_GPU}")
endif()
endif()
# NCCL
if(HOROVOD_GPU_ALLREDUCE STREQUAL "N" OR HOROVOD_GPU_ALLGATHER STREQUAL "N" OR HOROVOD_GPU_BROADCAST STREQUAL "N" OR HOROVOD_GPU_ALLTOALL STREQUAL "N" OR HOROVOD_GPU_REDUCESCATTER STREQUAL "N")
if(HAVE_ROCM)
find_package(rccl REQUIRED)
include_directories(SYSTEM ${RCCL_INCLUDE_DIRS})
list(APPEND LINKER_LIBS roc::rccl)
else()
find_package(NCCL REQUIRED)
if (NCCL_MAJOR_VERSION LESS "2")
message(FATAL_ERROR "Horovod requires NCCL 2.0 or later version please upgrade.")
endif()
string(TOLOWER "${CMAKE_CUDA_RUNTIME_LIBRARY}" lowercase_CMAKE_CUDA_RUNTIME_LIBRARY)
get_filename_component(NCCL_LIBRARY_FILE_NAME ${NCCL_LIBRARIES} NAME)
if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.17
AND lowercase_CMAKE_CUDA_RUNTIME_LIBRARY STREQUAL "shared"
AND NCCL_LIBRARY_FILE_NAME MATCHES "static")
message(WARNING "Linking NCCL statically, but linking CUDA runtime library dynamically. This combination is not supported with typical builds of NCCL.")
endif()
include_directories(SYSTEM ${NCCL_INCLUDE_DIRS})
list(APPEND LINKER_LIBS ${NCCL_LIBRARIES})
if(NCCL_LIBRARY_FILE_NAME MATCHES "static" AND lowercase_CMAKE_CUDA_RUNTIME_LIBRARY STREQUAL "static")
# ensure that weak symbols from NCCL's enhcompat.cc are properly overwritten by symbols from libcudart_static.a (https://github.com/horovod/horovod/pull/3846)
list(APPEND LINKER_LIBS -Wl,--whole-archive cudart_static -Wl,--no-whole-archive)
endif()
endif()
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/nccl_operations.cc")
add_definitions(-DHAVE_NCCL=1)
set(HAVE_NCCL TRUE)
endif()
# DDL
if(HOROVOD_GPU_ALLREDUCE STREQUAL "D")
message(DEPRECATION "DDL backend has been deprecated. Please, start using the NCCL backend by building Horovod with "
"'HOROVOD_GPU_OPERATIONS=NCCL'. Will be removed in v0.21.0.")
list(APPEND LINKER_LIBS "${CUDAToolkit_LIBRARY_DIR}/libddl.so" "${CUDAToolkit_LIBRARY_DIR}/libddl_pack.so")
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/mpi/ddl_mpi_context_manager.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/ddl_operations.cc")
add_definitions(-DHAVE_DDL=1)
set(HAVE_DDL TRUE)
endif()
set(HOROVOD_ALLOW_MIXED_GPU_IMPL $ENV{HOROVOD_ALLOW_MIXED_GPU_IMPL})
if(HOROVOD_GPU_ALLREDUCE STREQUAL "N" AND (HOROVOD_GPU_ALLGATHER STREQUAL "M" OR HOROVOD_GPU_BROADCAST STREQUAL "M" OR HOROVOD_GPU_ALLTOALL STREQUAL "M" OR HOROVOD_GPU_REDUCESCATTER STREQUAL "M") AND
NOT HOROVOD_ALLOW_MIXED_GPU_IMPL STREQUAL "1")
message(FATAL_ERROR "You should not mix NCCL and MPI GPU due to a possible deadlock.\n"
"If you are sure you want to mix them, set the "
"HOROVOD_ALLOW_MIXED_GPU_IMPL environment variable to '1'.")
endif()
# NVTX
if (NOT "$ENV{HOROVOD_WITHOUT_NVTX}" STREQUAL "1")
set(NVTX_REQUIRED "")
if ("$ENV{HOROVOD_WITH_NVTX}" STREQUAL "1")
set(NVTX_REQUIRED "REQUIRED")
endif ()
find_package(NVTX ${NVTX_REQUIRED})
if(NVTX_FOUND)
include_directories(SYSTEM ${NVTX_INCLUDE_DIRS})
list(APPEND LINKER_LIBS ${NVTX_LIBRARIES})
add_definitions(-DHAVE_NVTX=1)
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/nvtx_op_range.cc")
set(HAVE_NVTX TRUE)
endif()
endif()
# Gloo
if (NOT "$ENV{HOROVOD_WITHOUT_GLOO}" STREQUAL "1")
if(HAVE_MPI)
set(USE_MPI TRUE)
else()
set(USE_MPI FALSE)
endif()
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(USE_LIBUV ON CACHE BOOL "use libuv for gloo transport" FORCE)
endif()
set(CMAKE_POLICY_DEFAULT_CMP0074 NEW)
add_subdirectory(third_party/gloo)
include_directories(third_party/gloo)
target_compile_definitions(gloo PRIVATE _GLIBCXX_USE_CXX11_ABI=1)
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/gloo/gloo_context.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/gloo/gloo_controller.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/gloo/http_store.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/gloo/memory_store.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/gloo_operations.cc")
add_definitions(-DHAVE_GLOO=1)
set(HAVE_GLOO TRUE)
endif()
# NCCL + MPI
if (HAVE_NCCL AND HAVE_MPI)
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/adasum_gpu_operations.cc")
endif()
set(HOROVOD_CPU_OPERATIONS $ENV{HOROVOD_CPU_OPERATIONS})
if(DEFINED HOROVOD_CPU_OPERATIONS)
message(STATUS "Set default CPU operation to " ${HOROVOD_CPU_OPERATIONS})
if(HOROVOD_CPU_OPERATIONS STREQUAL "MPI")
if(NOT HAVE_MPI)
message(FATAL_ERROR "MPI is not installed, try changing HOROVOD_CPU_OPERATIONS.")
endif()
add_definitions(-DHOROVOD_CPU_OPERATIONS_DEFAULT=M)
elseif(HOROVOD_CPU_OPERATIONS STREQUAL "MLSL")
message(FATAL_ERROR "Intel(R) MLSL was removed. Upgrade to oneCCL and set HOROVOD_CPU_OPERATIONS=CCL.")
elseif(HOROVOD_CPU_OPERATIONS STREQUAL "CCL")
if(NOT USE_CCL)
message(FATAL_ERROR "oneCCL is not installed, try changing HOROVOD_CPU_OPERATIONS.")
endif()
add_definitions(-DHOROVOD_CPU_OPERATIONS_DEFAULT=C)
elseif(HOROVOD_CPU_OPERATIONS STREQUAL "GLOO")
if(NOT HAVE_GLOO)
message(FATAL_ERROR "Cannot set both HOROVOD_WITHOUT_GLOO and HOROVOD_CPU_OPERATIONS=GLOO.")
endif()
add_definitions(-DHOROVOD_CPU_OPERATIONS_DEFAULT=G)
endif()
endif()
# oneCCL
if(USE_CCL)
set(CCL_CONFIGURATION_PATH $ENV{CCL_CONFIGURATION_PATH})
include_directories(${CCL_ROOT}/include)
list(APPEND LINKER_LIBS "${CCL_ROOT}/lib/${CCL_CONFIGURATION_PATH}/libccl.so")
if (USE_GPU_CCL)
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/ccl_gpu_operations.cc")
else()
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/ccl_operations.cc")
endif ()
add_definitions(-DHAVE_CCL=1)
endif()
# Get Python suffix
execute_process(COMMAND ${PY_EXE} -c "import sysconfig; print(next(x for x in [sysconfig.get_config_var('EXT_SUFFIX'), sysconfig.get_config_var('SO'), '.so'] if x))"
OUTPUT_VARIABLE Python_SUFFIX OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET)
# TF
add_subdirectory(horovod/tensorflow)
# PyTorch
add_subdirectory(horovod/torch)
#MXNet
add_subdirectory(horovod/mxnet)
# Correctly wrap up json format
file(APPEND "${CMAKE_LIBRARY_OUTPUT_DIRECTORY_ROOT}/metadata.json" "\"dummy\": \"none\"\n}")
# CUDA kernels
if(HAVE_CUDA OR HAVE_SUB_PROJECT_CUDA)
add_subdirectory(horovod/common/ops/cuda)
endif()
if(HAVE_ROCM)
add_subdirectory(horovod/common/ops/rocm)
endif()
if (HAVE_SYCL)
add_subdirectory(horovod/common/ops/sycl)
endif()
# if we need compatible c++ abi
# Duplicate gloo folder and add it as a new sub-project
if(HAVE_GLOO AND ((DEFINED Tensorflow_CXX11 AND NOT Tensorflow_CXX11) OR (DEFINED Pytorch_CXX11 AND NOT Pytorch_CXX11) OR (DEFINED Mxnet_CXX11 AND NOT Mxnet_CXX11)))
file(COPY ${PROJECT_SOURCE_DIR}/third_party/gloo/ DESTINATION ${PROJECT_SOURCE_DIR}/third_party/compatible_gloo)
file(READ ${PROJECT_SOURCE_DIR}/third_party/compatible_gloo/gloo/CMakeLists.txt GLOO_CMAKE)
string(REPLACE "gloo " "compatible_gloo " GLOO_CMAKE "${GLOO_CMAKE}")
file(WRITE ${PROJECT_SOURCE_DIR}/third_party/compatible_gloo/gloo/CMakeLists.txt "${GLOO_CMAKE}")
add_subdirectory(third_party/compatible_gloo)
target_compile_definitions(compatible_gloo PRIVATE _GLIBCXX_USE_CXX11_ABI=0)
endif()
# Gloo for c++17 TF
if(HAVE_GLOO AND (DEFINED Tensorflow_CXX17 AND Tensorflow_CXX17))
file(COPY ${PROJECT_SOURCE_DIR}/third_party/gloo/ DESTINATION ${PROJECT_SOURCE_DIR}/third_party/compatible17_gloo)
file(READ ${PROJECT_SOURCE_DIR}/third_party/compatible17_gloo/gloo/CMakeLists.txt GLOO_CMAKE)
string(REPLACE "gloo " "compatible17_gloo " GLOO_CMAKE "${GLOO_CMAKE}")
file(WRITE ${PROJECT_SOURCE_DIR}/third_party/compatible17_gloo/gloo/CMakeLists.txt "${GLOO_CMAKE}")
file(READ ${PROJECT_SOURCE_DIR}/third_party/compatible17_gloo/CMakeLists.txt GLOO_CMAKE)
string(REPLACE "-std=c++11" "-std=c++17" GLOO_CMAKE "${GLOO_CMAKE}")
string(PREPEND GLOO_CMAKE "set(CMAKE_CXX_STANDARD 17)\n")
file(WRITE ${PROJECT_SOURCE_DIR}/third_party/compatible17_gloo/CMakeLists.txt "${GLOO_CMAKE}")
add_subdirectory(third_party/compatible17_gloo)
if (Tensorflow_CXX11)
target_compile_definitions(compatible17_gloo PRIVATE _GLIBCXX_USE_CXX11_ABI=1)
else()
target_compile_definitions(compatible17_gloo PRIVATE _GLIBCXX_USE_CXX11_ABI=0)
endif()
endif()