Skip to content

Commit 55a4b22

Browse files
Updating RCCL based on NCCL 2.3.7
- Contains modifications to support AMD hardware - Adds unit tests
1 parent 4861e19 commit 55a4b22

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+2675
-396
lines changed

CMakeLists.txt

+180
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
# Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
2+
3+
cmake_minimum_required(VERSION 2.8.12)
4+
5+
set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "")
6+
7+
project(rccl CXX)
8+
9+
find_package(ROCM
10+
REQUIRED
11+
PATHS
12+
/opt/rocm)
13+
14+
include(ROCMInstallTargets)
15+
include(ROCMPackageConfigHelpers)
16+
include(ROCMSetupVersion)
17+
include(ROCMInstallSymlinks)
18+
include(ROCMCreatePackage)
19+
20+
option(BUILD_TESTS "Build test programs" ON)
21+
22+
# In order to support function calls within the kernel, we must use hcc as the
23+
# compiler
24+
set(CMAKE_CXX_COMPILER "/opt/rocm/bin/hcc")
25+
26+
# parse version from Makefile NCCL_MAJOR, NCCL_MINOR, NCCL_PATCH must exist
27+
# NCCL_SUFFIX is optional NCCL_VERSION formatting is ((X) * 1000 + (Y) * 100 +
28+
# (Z)) so we must first detect one or two digits first
29+
file(READ makefiles/version.mk version_mk_text)
30+
if("${version_mk_text}" MATCHES "NCCL_MAJOR *:= *([0-9]*)")
31+
set(NCCL_MAJOR ${CMAKE_MATCH_1})
32+
else()
33+
message(FATAL_ERROR "Failed to parse NCCL_MAJOR")
34+
endif()
35+
if("${version_mk_text}" MATCHES "NCCL_MINOR *:= *([0-9]*)")
36+
set(NCCL_MINOR ${CMAKE_MATCH_1})
37+
else()
38+
message(FATAL_ERROR "Failed to parse NCCL_MINOR")
39+
endif()
40+
if("${version_mk_text}" MATCHES "NCCL_PATCH *:= *([0-9]*)")
41+
set(NCCL_PATCH ${CMAKE_MATCH_1})
42+
else()
43+
message(FATAL_ERROR "Failed to parse NCCL_PATCH")
44+
endif()
45+
if("${version_mk_text}" MATCHES "NCCL_SUFFIX *:= *([0-9]*)")
46+
set(NCCL_SUFFIX ${CMAKE_MATCH_1})
47+
else()
48+
set(NCCL_SUFFIX)
49+
endif()
50+
if("${version_mk_text}" MATCHES "PKG_REVISION *:= *([0-9]*)")
51+
set(PKG_REVISION ${CMAKE_MATCH_1})
52+
else()
53+
message(FATAL_ERROR "Failed to parse PKG_REVISION")
54+
endif()
55+
if("${NCCL_PATCH}" MATCHES "[0-9][0-9]")
56+
set(NCCL_VERSION "${NCCL_MAJOR}${NCCL_MINOR}${NCCL_PATCH}")
57+
else()
58+
set(NCCL_VERSION "${NCCL_MAJOR}${NCCL_MINOR}0${NCCL_PATCH}")
59+
endif()
60+
61+
rocm_setup_version(VERSION
62+
"${NCCL_MAJOR}.${NCCL_MINOR}.${NCCL_PATCH}-${PKG_REVISION}")
63+
64+
list(APPEND CMAKE_PREFIX_PATH
65+
/opt/rocm
66+
/opt/rocm/hip
67+
/opt/rocm/hcc)
68+
69+
find_package(hip REQUIRED)
70+
71+
link_libraries(-amdgpu-target=gfx803
72+
-amdgpu-target=gfx900
73+
-amdgpu-target=gfx906
74+
-hc-function-calls)
75+
76+
option(BUILD_SHARED_LIBS "Build as a shared library" ON)
77+
78+
configure_file(src/nccl.h.in ${PROJECT_BINARY_DIR}/rccl.h)
79+
configure_file(src/nccl.h.in ${PROJECT_BINARY_DIR}/nccl.h)
80+
81+
include_directories(${PROJECT_BINARY_DIR}) # for generated rccl.h header
82+
include_directories(src)
83+
include_directories(src/include)
84+
include_directories(src/collectives)
85+
include_directories(src/collectives/device)
86+
87+
set(CU_SOURCES
88+
src/bootstrap.cu
89+
src/collectives/all_gather.cu
90+
src/collectives/all_reduce.cu
91+
src/collectives/broadcast.cu
92+
src/collectives/reduce.cu
93+
src/collectives/reduce_scatter.cu
94+
src/collectives/device/functions.cu
95+
src/init.cu
96+
src/misc/enqueue.cu
97+
src/misc/group.cu
98+
src/misc/ibvwrap.cu
99+
src/misc/nvmlwrap_stub.cu
100+
src/misc/rings.cu
101+
src/misc/utils.cu
102+
src/ring.cu
103+
src/transport.cu
104+
src/transport/net.cu
105+
src/transport/net_ib.cu
106+
src/transport/net_socket.cu
107+
src/transport/p2p.cu
108+
src/transport/shm.cu)
109+
110+
set(CPP_SOURCES)
111+
foreach(filename ${CU_SOURCES})
112+
string(REPLACE ".cu"
113+
".cpp"
114+
cpp_filename
115+
${filename})
116+
configure_file(${filename} ${cpp_filename} COPYONLY)
117+
list(APPEND CPP_SOURCES ${cpp_filename})
118+
endforeach(filename)
119+
120+
list(APPEND CPP_SOURCES src/collectives/device/all_gather_0.cpp)
121+
list(APPEND CPP_SOURCES src/collectives/device/all_reduce_0.cpp)
122+
list(APPEND CPP_SOURCES src/collectives/device/all_reduce_1.cpp)
123+
list(APPEND CPP_SOURCES src/collectives/device/all_reduce_2.cpp)
124+
list(APPEND CPP_SOURCES src/collectives/device/all_reduce_3.cpp)
125+
list(APPEND CPP_SOURCES src/collectives/device/broadcast_0.cpp)
126+
list(APPEND CPP_SOURCES src/collectives/device/reduce_0.cpp)
127+
list(APPEND CPP_SOURCES src/collectives/device/reduce_1.cpp)
128+
list(APPEND CPP_SOURCES src/collectives/device/reduce_2.cpp)
129+
list(APPEND CPP_SOURCES src/collectives/device/reduce_3.cpp)
130+
list(APPEND CPP_SOURCES src/collectives/device/reduce_scatter_0.cpp)
131+
list(APPEND CPP_SOURCES src/collectives/device/reduce_scatter_1.cpp)
132+
list(APPEND CPP_SOURCES src/collectives/device/reduce_scatter_2.cpp)
133+
list(APPEND CPP_SOURCES src/collectives/device/reduce_scatter_3.cpp)
134+
135+
add_library(rccl ${CPP_SOURCES})
136+
137+
if(TRACE)
138+
add_definitions(-DENABLE_TRACE)
139+
endif()
140+
141+
if(TARGET hip::device)
142+
target_link_libraries(rccl PRIVATE hip::device)
143+
target_link_libraries(rccl INTERFACE hip::host)
144+
else()
145+
target_link_libraries(rccl PUBLIC hip::hip_hcc ${hcc_LIBRARIES} numa)
146+
endif()
147+
148+
rocm_install_targets(TARGETS
149+
rccl
150+
PREFIX
151+
rccl)
152+
install(FILES ${PROJECT_BINARY_DIR}/rccl.h
153+
DESTINATION rccl/${CMAKE_INSTALL_INCLUDEDIR})
154+
155+
rocm_export_targets(NAMESPACE
156+
roc::
157+
PREFIX
158+
rccl
159+
TARGETS
160+
rccl
161+
DEPENDS
162+
hip)
163+
164+
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_hcc")
165+
set(CPACK_RPM_PACKAGE_REQUIRES "hip_hcc")
166+
167+
rocm_create_package(
168+
NAME
169+
rccl
170+
DESCRIPTION
171+
"Optimized primitives for collective multi-GPU communication"
172+
MAINTAINER
173+
"Jeff Daily <[email protected]>"
174+
LDCONFIG)
175+
176+
rocm_install_symlink_subdir(rccl)
177+
178+
if(BUILD_TESTS)
179+
add_subdirectory(test)
180+
endif()

LICENSE.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11

22
Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
3+
Modifications Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
34

45
Redistribution and use in source and binary forms, with or without
56
modification, are permitted provided that the following conditions

NOTICES.txt

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
Notices and Licenses file
2+
_______________________________________________________________
3+
4+
Dependencies on nvidia-nccl v2.3.7-1 (BSD3)
5+
Copyright (c) 2015-2018, NVIDIA CORPORATION.
6+
Modifications Copyright (c) 2019 Advanced Micro Devices, Inc.
7+
8+
Redistribution and use in source and binary forms, with or without
9+
modification, are permitted provided that the following conditions
10+
are met:
11+
* Redistributions of source code must retain the above copyright
12+
notice, this list of conditions and the following disclaimer.
13+
* Redistributions in binary form must reproduce the above copyright
14+
notice, this list of conditions and the following disclaimer in the
15+
documentation and/or other materials provided with the distribution.
16+
* Neither the name of NVIDIA CORPORATION, Lawrence Berkeley National
17+
Laboratory, the U.S. Department of Energy, nor the names of their
18+
contributors may be used to endorse or promote products derived
19+
from this software without specific prior written permission.
20+
21+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
22+
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24+
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
25+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27+
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28+
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
29+
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32+
33+
The U.S. Department of Energy funded the development of this software
34+
under subcontract 7078610 with Lawrence Berkeley National Laboratory.
35+
36+
37+
nvidia-nccl v2.3.7-1 (BSD2)
38+
Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
39+
40+
Redistribution and use in source and binary forms, with or without
41+
modification, are permitted provided that the following conditions
42+
are met:
43+
* Redistributions of source code must retain the above copyright
44+
notice, this list of conditions and the following disclaimer.
45+
* Redistributions in binary form must reproduce the above copyright
46+
notice, this list of conditions and the following disclaimer in the
47+
documentation and/or other materials provided with the distribution.
48+
* Neither the name of NVIDIA CORPORATION, Lawrence Berkeley National
49+
Laboratory, the U.S. Department of Energy, nor the names of their
50+
contributors may be used to endorse or promote products derived
51+
from this software without specific prior written permission.
52+
53+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
54+
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56+
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
57+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59+
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60+
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
61+
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
62+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
63+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64+
65+
The U.S. Department of Energy funded the development of this software
66+
under subcontract 7078610 with Lawrence Berkeley National Laboratory.

README.md

+38-49
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
1-
# NCCL
1+
# RCCL
22

3-
Optimized primitives for collective multi-GPU communication.
3+
ROCm Communication Collectives Library
44

55
## Introduction
66

7-
NCCL (pronounced "Nickel") is a stand-alone library of standard collective communication routines for GPUs, implementing all-reduce, all-gather, reduce, broadcast, and reduce-scatter. It has been optimized to achieve high bandwidth on platforms using PCIe, NVLink, NVswitch, as well as networking using InfiniBand Verbs or TCP/IP sockets. NCCL supports an arbitrary number of GPUs installed in a single node or across multiple nodes, and can be used in either single- or multi-process (e.g., MPI) applications.
8-
9-
For more information on NCCL usage, please refer to the [NCCL documentation](https://docs.nvidia.com/deeplearning/sdk/nccl-developer-guide/index.html).
7+
RCCL (pronounced "Rickle") is a stand-alone library of standard collective communication routines for GPUs, implementing all-reduce, all-gather, reduce, broadcast, and reduce-scatter. It has been optimized to achieve high bandwidth on platforms using PCIe, xGMI as well as networking using InfiniBand Verbs or TCP/IP sockets. RCCL supports an arbitrary number of GPUs installed in a single node or across multiple nodes, and can be used in either single- or multi-process (e.g., MPI) applications.
108

119
## What's inside
1210

@@ -22,71 +20,62 @@ These operations are implemented using ring algorithms and have been optimized f
2220

2321
## Requirements
2422

25-
NCCL requires at least CUDA 7.0 and Kepler or newer GPUs. For PCIe based platforms, best performance is achieved when all GPUs are located on a common PCIe root complex, but multi-socket configurations are also supported.
26-
27-
## Build
23+
1. ROCm supported GPUs
24+
2. ROCm stack installed on the system (HIP runtime & HCC)
2825

29-
Note: the official and tested builds of NCCL can be downloaded from: https://developer.nvidia.com/nccl. You can skip the following build steps if you choose to use the official builds.
26+
## Quickstart RCCL Build
3027

31-
To build the library :
32-
33-
```shell
34-
$ cd nccl
35-
$ make -j src.build
36-
```
28+
RCCL directly depends on HIP runtime & HCC C++ compiler which are part of the ROCm software stack.
29+
In addition, HC Direct Function call support needs to be present on your machine. There are binaries for hcc and HIP that need to be installed to get HC Direct Function call support. These binaries are currently packaged with roc-master, and will be included in ROCm 2.4.
3730

38-
If CUDA is not installed in the default /usr/local/cuda path, you can define the CUDA path with :
31+
The root of this repository has a helper script 'install.sh' to build and install RCCL on Ubuntu with a single command. It does not take a lot of options and hard-codes configuration that can be specified through invoking cmake directly, but it's a great way to get started quickly and can serve as an example of how to build/install.
3932

40-
```shell
41-
$ make src.build CUDA_HOME=<path to cuda install>
42-
```
33+
* `./install.sh` -- builds library including unit tests
34+
* `./install.sh -h` -- shows help
35+
* `./install.sh -t` -- builds library including unit tests, and also runs unit tests
4336

44-
NCCL will be compiled and installed in `build/` unless `BUILDDIR` is set.
37+
## Manual build
38+
#### To build the library :
4539

46-
By default, NCCL is compiled for all supported architectures. To accelerate the compilation and reduce the binary size, consider redefining `NVCC_GENCODE` (defined in `makefiles/common.mk`) to only include the architecture of the target platform :
4740
```shell
48-
$ make -j src.build NVCC_GENCODE="-gencode=arch=compute_70,code=sm_70"
41+
$ git clone https://github.com/ROCmSoftwarePlatform/rccl.git
42+
$ cd rccl
43+
$ mkdir build
44+
$ cd build
45+
$ CXX=/opt/rocm/bin/hcc cmake -DCMAKE_INSTALL_PREFIX=$PWD/rccl-install ..
46+
$ make -j 8 install
4947
```
48+
You may substitute a path of your own choosing for CMAKE_INSTALL_PREFIX.
5049

51-
## Install
50+
#### To build the RCCL package and install package :
5251

53-
To install NCCL on the system, create a package then install it as root.
52+
Assuming you have already cloned this repository and built the library as shown in the previous section:
5453

55-
Debian/Ubuntu :
5654
```shell
57-
$ # Install tools to create debian packages
58-
$ sudo apt install build-essential devscripts debhelper
59-
$ # Build NCCL deb package
60-
$ make pkg.debian.build
61-
$ ls build/pkg/deb/
55+
$ cd rccl/build
56+
$ make package
57+
$ sudo dpkg -i *.deb
6258
```
6359

64-
RedHat/CentOS :
65-
```shell
66-
$ # Install tools to create rpm packages
67-
$ sudo yum install rpm-build rpmdevtools
68-
$ # Build NCCL rpm package
69-
$ make pkg.redhat.build
70-
$ ls build/pkg/rpm/
71-
```
72-
73-
OS-agnostic tarball :
74-
```shell
75-
$ make pkg.txz.build
76-
$ ls build/pkg/txz/
77-
```
60+
RCCL package install requires sudo/root access because it creates a directory called "rccl" under /opt/rocm/. This is an optional step and RCCL can be used directly by including the path containing librccl.so.
7861

7962
## Tests
8063

81-
Tests for NCCL are maintained separately at https://github.com/nvidia/nccl-tests.
64+
There are unit tests implemented with the Googletest framework in RCCL, which are currently a work-in-progress. To invoke the unit tests, go to the rccl-install folder, then the test/ subfolder, and execute the appropriate unit test executable(s). Several notes for running the unit tests:
8265

66+
1. The LD_LIBRARY_PATH environment variable will need to be set to include /path/to/rccl-install/lib/ in order to run the unit tests.
67+
2. The HSA_FORCE_FINE_GRAIN_PCIE environment variable will need to be set to 1 in order to run the unit tests.
68+
69+
An example call to the unit tests:
8370
```shell
84-
$ git clone https://github.com/NVIDIA/nccl-tests.git
85-
$ cd nccl-tests
86-
$ make
87-
$ ./build/all_reduce_perf -b 8 -e 256M -f 2 -g <ngpus>
71+
$ LD_LIBRARY_PATH=rccl-install/lib/ HSA_FORCE_FINE_GRAIN_PCIE=1 rccl-install/test/UnitTests
8872
```
8973

74+
There are also other performance and error-checking tests for RCCL. These are maintained separately at https://github.com/ROCmSoftwarePlatform/rccl-tests.
75+
See the rccl-tests README for more information on how to build and run those tests.
76+
9077
## Copyright
9178

9279
All source code and accompanying documentation is copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
80+
81+
All modifications are copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.

0 commit comments

Comments
 (0)