|
| 1 | +cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR) |
| 2 | +if(POLICY CMP0074) |
| 3 | + # 1. Introduced with 3.12.4. |
| 4 | + # 2. *_ROOT variables will be checked |
| 5 | + cmake_policy(SET CMP0074 NEW) |
| 6 | +endif() |
| 7 | + |
| 8 | +project(horovod CXX) |
| 9 | + |
| 10 | +set(CMAKE_CXX_STANDARD 11) |
| 11 | +set(CMAKE_CXX_STANDARD_REQUIRED ON) |
| 12 | +set(CMAKE_CXX_EXTENSIONS OFF) |
| 13 | + |
| 14 | +# Configure path to modules (for find_package) |
| 15 | +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/Modules/") |
| 16 | +include(cmake/Utilities.cmake) |
| 17 | + |
| 18 | +# 3rd-parties |
| 19 | +include_directories("third_party/HTTPRequest/include" |
| 20 | + "third_party/boost/assert/include" |
| 21 | + "third_party/boost/config/include" |
| 22 | + "third_party/boost/core/include" |
| 23 | + "third_party/boost/detail/include" |
| 24 | + "third_party/boost/iterator/include" |
| 25 | + "third_party/boost/lockfree/include" |
| 26 | + "third_party/boost/mpl/include" |
| 27 | + "third_party/boost/parameter/include" |
| 28 | + "third_party/boost/predef/include" |
| 29 | + "third_party/boost/preprocessor/include" |
| 30 | + "third_party/boost/static_assert/include" |
| 31 | + "third_party/boost/type_traits/include" |
| 32 | + "third_party/boost/utility/include" |
| 33 | + "third_party/eigen" |
| 34 | + "third_party/flatbuffers/include" |
| 35 | + "third_party/lbfgs/include") |
| 36 | + |
| 37 | +# Sources |
| 38 | +list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/common.cc" |
| 39 | + "${PROJECT_SOURCE_DIR}/horovod/common/controller.cc" |
| 40 | + "${PROJECT_SOURCE_DIR}/horovod/common/fusion_buffer_manager.cc" |
| 41 | + "${PROJECT_SOURCE_DIR}/horovod/common/half.cc" |
| 42 | + "${PROJECT_SOURCE_DIR}/horovod/common/logging.cc" |
| 43 | + "${PROJECT_SOURCE_DIR}/horovod/common/message.cc" |
| 44 | + "${PROJECT_SOURCE_DIR}/horovod/common/operations.cc" |
| 45 | + "${PROJECT_SOURCE_DIR}/horovod/common/parameter_manager.cc" |
| 46 | + "${PROJECT_SOURCE_DIR}/horovod/common/response_cache.cc" |
| 47 | + "${PROJECT_SOURCE_DIR}/horovod/common/stall_inspector.cc" |
| 48 | + "${PROJECT_SOURCE_DIR}/horovod/common/thread_pool.cc" |
| 49 | + "${PROJECT_SOURCE_DIR}/horovod/common/timeline.cc" |
| 50 | + "${PROJECT_SOURCE_DIR}/horovod/common/tensor_queue.cc" |
| 51 | + "${PROJECT_SOURCE_DIR}/horovod/common/ops/collective_operations.cc" |
| 52 | + "${PROJECT_SOURCE_DIR}/horovod/common/ops/operation_manager.cc" |
| 53 | + "${PROJECT_SOURCE_DIR}/horovod/common/optim/bayesian_optimization.cc" |
| 54 | + "${PROJECT_SOURCE_DIR}/horovod/common/optim/gaussian_process.cc" |
| 55 | + "${PROJECT_SOURCE_DIR}/horovod/common/utils/env_parser.cc") |
| 56 | + |
| 57 | +# Default Macro |
| 58 | +add_definitions(-DEIGEN_MPL2_ONLY=1) |
| 59 | + |
| 60 | +# Remove platform default std |
| 61 | +string(REGEX REPLACE "-std=[^ ]+" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") |
| 62 | +# Pickup ar from environmental variable if available |
| 63 | +if(DEFINED ENV{AR}) |
| 64 | + set(CMAKE_AR $ENV{AR}) |
| 65 | +endif() |
| 66 | +# Make CMAKE read CPPFLAGS |
| 67 | +set(CMAKE_CXX_FLAGS "$ENV{CPPFLAGS} ${CMAKE_CXX_FLAGS}") |
| 68 | +# Add default project CXX flags |
| 69 | +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -fPIC -O3 -Wall -ftree-vectorize -g") |
| 70 | +# Add architecture specific optimization flags |
| 71 | +set(ARCH_FLAGS "-mf16c" "-mavx" "-mfma") |
| 72 | +set_build_arch_flags("${ARCH_FLAGS}") |
| 73 | +# Specify Horovod exports |
| 74 | +if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") |
| 75 | + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -undefined dynamic_lookup -Wl,-exported_symbols_list,${CMAKE_SOURCE_DIR}/horovod.exp") |
| 76 | + set(CMAKE_MACOSX_RPATH TRUE) |
| 77 | +else() |
| 78 | + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--version-script=${CMAKE_SOURCE_DIR}/horovod.lds -Wl,-Bsymbolic-functions -Wl,-z,relro,-z,now") |
| 79 | +endif() |
| 80 | +# GPU Operations |
| 81 | +set(HOROVOD_GPU $ENV{HOROVOD_GPU}) |
| 82 | +set(HOROVOD_GPU_OPERATIONS $ENV{HOROVOD_GPU_OPERATIONS}) |
| 83 | +if(DEFINED HOROVOD_GPU_OPERATIONS AND NOT "${HOROVOD_GPU_OPERATIONS}" MATCHES "^(MPI|NCCL)$") |
| 84 | + message(FATAL_ERROR "HOROVOD_GPU_OPERATIONS=${HOROVOD_GPU_OPERATIONS} is invalid, supported values are '', 'MPI', and 'NCCL'.") |
| 85 | +endif() |
| 86 | +set_gpu_op(HOROVOD_GPU_ALLREDUCE "MPI;NCCL;DDL") |
| 87 | +set_gpu_op(HOROVOD_GPU_ALLGATHER "MPI;NCCL") |
| 88 | +set_gpu_op(HOROVOD_GPU_BROADCAST "MPI;NCCL") |
| 89 | +set_gpu_op(HOROVOD_GPU_ALLTOALL "MPI;NCCL") |
| 90 | + |
| 91 | +foreach(VAR in ITEMS HOROVOD_GPU_ALLREDUCE HOROVOD_GPU_ALLGATHER HOROVOD_GPU_BROADCAST HOROVOD_GPU_ALLTOALL) |
| 92 | + if(DEFINED ${VAR}) |
| 93 | + string(SUBSTRING ${${VAR}} 0 1 ${VAR}) |
| 94 | + convert_to_ascii_dec(ASCII_DEC ${${VAR}}) |
| 95 | + add_definitions(-D${VAR}=${ASCII_DEC}) |
| 96 | + endif() |
| 97 | +endforeach() |
| 98 | + |
| 99 | +# PYTHON |
| 100 | +find_package(PythonInterp "3.6" REQUIRED) |
| 101 | +# Get the latest version intalled |
| 102 | +if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") |
| 103 | + execute_process(COMMAND bash -c "for p in $(echo $PATH | tr ':' ' '); do if find -E $p -maxdepth 1 -regex '.*/python3(\\.[0-9]*)?' -exec basename {} \; | sort -t. -k 2,2n | tail -1 | grep .; then break; fi done" |
| 104 | + OUTPUT_VARIABLE PY_EXE OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET) |
| 105 | +else() |
| 106 | + execute_process(COMMAND bash -c "for p in $(echo $PATH | tr ':' ' '); do if find $p -maxdepth 1 -regex '.*/python3\\(\\.[0-9]*\\)?' -exec basename {} \; | sort -t. -k 2,2n | tail -1 | grep .; then break; fi done" |
| 107 | + OUTPUT_VARIABLE PY_EXE OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET) |
| 108 | +endif() |
| 109 | +if (NOT PY_EXE) |
| 110 | + set(PY_EXE ${PYTHON_EXECUTABLE}) |
| 111 | +endif() |
| 112 | +message(STATUS "Using command ${PY_EXE}") |
| 113 | + |
| 114 | +# MPI |
| 115 | +if (NOT "$ENV{HOROVOD_WITHOUT_MPI}" STREQUAL "1") |
| 116 | + set(MPI_REQUIRED "") |
| 117 | + if ("$ENV{HOROVOD_WITH_MPI}" STREQUAL "1") |
| 118 | + set(MPI_REQUIRED "REQUIRED") |
| 119 | + endif () |
| 120 | + find_package(MPI ${MPI_REQUIRED}) |
| 121 | + if(MPI_FOUND) |
| 122 | + include_directories(SYSTEM ${MPI_INCLUDE_PATH}) |
| 123 | + list(APPEND LINKER_LIBS ${MPI_LIBRARIES}) |
| 124 | + list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/mpi/mpi_context.cc" |
| 125 | + "${PROJECT_SOURCE_DIR}/horovod/common/mpi/mpi_controller.cc" |
| 126 | + "${PROJECT_SOURCE_DIR}/horovod/common/ops/mpi_operations.cc" |
| 127 | + "${PROJECT_SOURCE_DIR}/horovod/common/ops/adasum/adasum_mpi.cc" |
| 128 | + "${PROJECT_SOURCE_DIR}/horovod/common/ops/adasum_mpi_operations.cc") |
| 129 | + add_definitions(-DHAVE_MPI=1) |
| 130 | + set(HAVE_MPI TRUE) |
| 131 | + endif() |
| 132 | +endif() |
| 133 | + |
| 134 | +# CUDA and ROCM |
| 135 | +macro(ADD_CUDA) |
| 136 | + if (DEFINED ENV{HOROVOD_CUDA_HOME}) |
| 137 | + set(CUDA_TOOLKIT_ROOT_DIR $ENV{HOROVOD_CUDA_HOME}) |
| 138 | + endif() |
| 139 | + option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" OFF) |
| 140 | + find_package(CUDA REQUIRED) |
| 141 | + include_directories(SYSTEM ${CUDA_INCLUDE_DIRS}) |
| 142 | + list(APPEND LINKER_LIBS ${CUDA_LIBRARIES}) |
| 143 | + list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/cuda_operations.cc" |
| 144 | + "${PROJECT_SOURCE_DIR}/horovod/common/ops/gpu_operations.cc") |
| 145 | + # CUDA + MPI |
| 146 | + if(HAVE_MPI) |
| 147 | + list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/mpi_gpu_operations.cc") |
| 148 | + endif() |
| 149 | + add_definitions(-DHAVE_CUDA=1 -DHAVE_GPU=1) |
| 150 | + set(HAVE_CUDA TRUE) |
| 151 | + if(NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) |
| 152 | + set(HAVE_SUB_PROJECT_CUDA TRUE PARENT_SCOPE) |
| 153 | + endif() |
| 154 | +endmacro() |
| 155 | + |
| 156 | +if(DEFINED HOROVOD_GPU_ALLREDUCE OR DEFINED HOROVOD_GPU_ALLGATHER OR DEFINED HOROVOD_GPU_BROADCAST OR DEFINED HOROVOD_GPU_ALLTOALL) |
| 157 | + if(NOT DEFINED HOROVOD_GPU OR HOROVOD_GPU STREQUAL "CUDA") |
| 158 | + add_cuda() |
| 159 | + elseif(HOROVOD_GPU STREQUAL "ROCM") |
| 160 | + find_package(ROCM REQUIRED) |
| 161 | + include_directories(SYSTEM ${ROCM_INCLUDE_DIRS}) |
| 162 | + list(APPEND LINKER_LIBS ${ROCM_LIBRARIES}) |
| 163 | + set(CMAKE_CXX_FLAGS "${ROCM_COMPILE_FLAGS} ${CMAKE_CXX_FLAGS}") |
| 164 | + list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/hip_operations.cc" |
| 165 | + "${PROJECT_SOURCE_DIR}/horovod/common/ops/gpu_operations.cc") |
| 166 | + add_definitions(-DHAVE_ROCM=1 -DHAVE_GPU=1) |
| 167 | + set(HAVE_ROCM TRUE) |
| 168 | + else() |
| 169 | + message(FATAL_ERROR "Unknown HOROVOD_GPU type: ${HOROVOD_GPU}") |
| 170 | + endif() |
| 171 | +endif() |
| 172 | + |
| 173 | +# NCCL |
| 174 | +if(HOROVOD_GPU_ALLREDUCE STREQUAL "N" OR HOROVOD_GPU_ALLGATHER STREQUAL "N" OR HOROVOD_GPU_BROADCAST STREQUAL "N" OR HOROVOD_GPU_ALLTOALL STREQUAL "N") |
| 175 | + if(HAVE_ROCM) |
| 176 | + find_package(rccl REQUIRED) |
| 177 | + include_directories(SYSTEM ${RCCL_INCLUDE_DIRS}) |
| 178 | + list(APPEND LINKER_LIBS roc::rccl) |
| 179 | + else() |
| 180 | + find_package(NCCL REQUIRED) |
| 181 | + if (NCCL_MAJOR_VERSION LESS "2") |
| 182 | + message(FATAL_ERROR "Horovod requires NCCL 2.0 or later version please upgrade.") |
| 183 | + endif() |
| 184 | + include_directories(SYSTEM ${NCCL_INCLUDE_DIRS}) |
| 185 | + list(APPEND LINKER_LIBS ${NCCL_LIBRARIES}) |
| 186 | + endif() |
| 187 | + list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/nccl_operations.cc") |
| 188 | + add_definitions(-DHAVE_NCCL=1) |
| 189 | + set(HAVE_NCCL TRUE) |
| 190 | +endif() |
| 191 | + |
| 192 | +# DDL |
| 193 | +if(HOROVOD_GPU_ALLREDUCE STREQUAL "D") |
| 194 | + message(DEPRECATION "DDL backend has been deprecated. Please, start using the NCCL backend by building Horovod with " |
| 195 | + "'HOROVOD_GPU_OPERATIONS=NCCL'. Will be removed in v0.21.0.") |
| 196 | + list(APPEND LINKER_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib/libddl.so" "${CUDA_TOOLKIT_ROOT_DIR}/lib/libddl_pack.so") |
| 197 | + list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/mpi/ddl_mpi_context_manager.cc" |
| 198 | + "${PROJECT_SOURCE_DIR}/horovod/common/ops/ddl_operations.cc") |
| 199 | + add_definitions(-DHAVE_DDL=1) |
| 200 | + set(HAVE_DDL TRUE) |
| 201 | +endif() |
| 202 | + |
| 203 | +# oneCCL |
| 204 | +set(CCL_ROOT $ENV{CCL_ROOT}) |
| 205 | +if(DEFINED CCL_ROOT) |
| 206 | + include_directories(${CCL_ROOT}/include) |
| 207 | + list(APPEND LINKER_LIBS "${CCL_ROOT}/lib/libccl.so") |
| 208 | + list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/ccl_operations.cc") |
| 209 | + add_definitions(-DHAVE_CCL=1) |
| 210 | + set(HAVE_CCL TRUE) |
| 211 | +endif() |
| 212 | + |
| 213 | +set(HOROVOD_ALLOW_MIXED_GPU_IMPL $ENV{HOROVOD_ALLOW_MIXED_GPU_IMPL}) |
| 214 | +if(HOROVOD_GPU_ALLREDUCE STREQUAL "N" AND (HOROVOD_GPU_ALLGATHER STREQUAL "M" OR HOROVOD_GPU_BROADCAST STREQUAL "M" OR HOROVOD_GPU_ALLTOALL STREQUAL "M") AND |
| 215 | + NOT HOROVOD_ALLOW_MIXED_GPU_IMPL STREQUAL "1") |
| 216 | +message(FATAL_ERROR "You should not mix NCCL and MPI GPU due to a possible deadlock.\n" |
| 217 | + "If you are sure you want to mix them, set the " |
| 218 | + "HOROVOD_ALLOW_MIXED_GPU_IMPL environment variable to '1'.") |
| 219 | +endif() |
| 220 | + |
| 221 | +# Gloo |
| 222 | +if (NOT "$ENV{HOROVOD_WITHOUT_GLOO}" STREQUAL "1" AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") |
| 223 | + if(HAVE_MPI) |
| 224 | + set(USE_MPI TRUE) |
| 225 | + else() |
| 226 | + set(USE_MPI FALSE) |
| 227 | + endif() |
| 228 | + set(CMAKE_POLICY_DEFAULT_CMP0074 NEW) |
| 229 | + add_subdirectory(third_party/gloo) |
| 230 | + include_directories(third_party/gloo) |
| 231 | + target_compile_definitions(gloo PRIVATE _GLIBCXX_USE_CXX11_ABI=1) |
| 232 | + list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/gloo/gloo_context.cc" |
| 233 | + "${PROJECT_SOURCE_DIR}/horovod/common/gloo/gloo_controller.cc" |
| 234 | + "${PROJECT_SOURCE_DIR}/horovod/common/gloo/http_store.cc" |
| 235 | + "${PROJECT_SOURCE_DIR}/horovod/common/gloo/memory_store.cc" |
| 236 | + "${PROJECT_SOURCE_DIR}/horovod/common/ops/gloo_operations.cc") |
| 237 | + add_definitions(-DHAVE_GLOO=1) |
| 238 | + set(HAVE_GLOO TRUE) |
| 239 | +endif() |
| 240 | +if (NOT HAVE_MPI AND ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") |
| 241 | + message(FATAL_ERROR "Gloo cannot be compiled on MacOS, install MPI.") |
| 242 | +endif() |
| 243 | + |
| 244 | +# NCCL + MPI |
| 245 | +if (HAVE_NCCL AND HAVE_MPI) |
| 246 | + list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/adasum_gpu_operations.cc") |
| 247 | +endif() |
| 248 | + |
| 249 | +set(HOROVOD_CPU_OPERATIONS $ENV{HOROVOD_CPU_OPERATIONS}) |
| 250 | +if(DEFINED HOROVOD_CPU_OPERATIONS) |
| 251 | + message(STATUS "Set default CPU operation to " ${HOROVOD_CPU_OPERATIONS}) |
| 252 | + if(HOROVOD_CPU_OPERATIONS STREQUAL "MPI") |
| 253 | + if(NOT HAVE_MPI) |
| 254 | + message(FATAL_ERROR "MPI is not installed, try changing HOROVOD_CPU_OPERATIONS.") |
| 255 | + endif() |
| 256 | + add_definitions(-DHOROVOD_CPU_OPERATIONS_DEFAULT=M) |
| 257 | + elseif(HOROVOD_CPU_OPERATIONS STREQUAL "MLSL") |
| 258 | + message(FATAL_ERROR "Intel(R) MLSL was removed. Upgrade to oneCCL and set HOROVOD_CPU_OPERATIONS=CCL.") |
| 259 | + elseif(HOROVOD_CPU_OPERATIONS STREQUAL "CCL") |
| 260 | + if(NOT HAVE_CCL) |
| 261 | + message(FATAL_ERROR "oneCCL is not installed, try changing HOROVOD_CPU_OPERATIONS.") |
| 262 | + endif() |
| 263 | + add_definitions(-DHOROVOD_CPU_OPERATIONS_DEFAULT=C) |
| 264 | + elseif(HOROVOD_CPU_OPERATIONS STREQUAL "GLOO") |
| 265 | + if(NOT HAVE_GLOO) |
| 266 | + message(FATAL_ERROR "Cannot set both HOROVOD_WITHOUT_GLOO and HOROVOD_CPU_OPERATIONS=GLOO.") |
| 267 | + endif() |
| 268 | + add_definitions(-DHOROVOD_CPU_OPERATIONS_DEFAULT=G) |
| 269 | + endif() |
| 270 | +endif() |
| 271 | + |
| 272 | +# Get Python suffix |
| 273 | +execute_process(COMMAND ${PY_EXE} -c "import sysconfig; print(next(x for x in [sysconfig.get_config_var('EXT_SUFFIX'), sysconfig.get_config_var('SO'), '.so'] if x))" |
| 274 | + OUTPUT_VARIABLE Python_SUFFIX OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET) |
| 275 | + |
| 276 | +# TF |
| 277 | +add_subdirectory(horovod/tensorflow) |
| 278 | +# PyTorch |
| 279 | +add_subdirectory(horovod/torch) |
| 280 | +#MXNet |
| 281 | +add_subdirectory(horovod/mxnet) |
| 282 | + |
| 283 | +# CUDA kernels |
| 284 | +if(HAVE_CUDA OR HAVE_SUB_PROJECT_CUDA) |
| 285 | + add_subdirectory(horovod/common/ops/cuda) |
| 286 | +endif() |
| 287 | + |
| 288 | +# if we need compatible c++ abi |
| 289 | +# Duplicate gloo folder and add it as a new sub-project |
| 290 | +if(HAVE_GLOO AND ((DEFINED Tensorflow_CXX11 AND NOT Tensorflow_CXX11) OR (DEFINED Pytorch_CXX11 AND NOT Pytorch_CXX11))) |
| 291 | + file(COPY ${PROJECT_SOURCE_DIR}/third_party/gloo/ DESTINATION ${PROJECT_SOURCE_DIR}/third_party/compatible_gloo) |
| 292 | + file(READ ${PROJECT_SOURCE_DIR}/third_party/compatible_gloo/gloo/CMakeLists.txt GLOO_CMAKE) |
| 293 | + string(REPLACE "gloo " "compatible_gloo " GLOO_CMAKE "${GLOO_CMAKE}") |
| 294 | + file(WRITE ${PROJECT_SOURCE_DIR}/third_party/compatible_gloo/gloo/CMakeLists.txt "${GLOO_CMAKE}") |
| 295 | + add_subdirectory(third_party/compatible_gloo) |
| 296 | + target_compile_definitions(compatible_gloo PRIVATE _GLIBCXX_USE_CXX11_ABI=0) |
| 297 | +endif() |
0 commit comments