Skip to content

Commit f11c8f6

Browse files
committed
RCCL 2.4 update
1 parent 4d579e5 commit f11c8f6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+7824
-609
lines changed

CMakeLists.txt

+206
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
# Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
2+
3+
cmake_minimum_required(VERSION 2.8.12)
4+
5+
set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "")
6+
7+
project(rccl CXX)
8+
9+
find_package(ROCM
10+
REQUIRED
11+
PATHS
12+
/opt/rocm)
13+
14+
include(ROCMInstallTargets)
15+
include(ROCMPackageConfigHelpers)
16+
include(ROCMSetupVersion)
17+
include(ROCMInstallSymlinks)
18+
include(ROCMCreatePackage)
19+
20+
option(BUILD_TESTS "Build test programs" OFF)
21+
22+
# parse version from Makefile NCCL_MAJOR, NCCL_MINOR, NCCL_PATCH must exist
23+
# NCCL_SUFFIX is optional NCCL_VERSION formatting is ((X) * 1000 + (Y) * 100 +
24+
# (Z)) so we must first detect one or two digits first
25+
file(READ makefiles/version.mk version_mk_text)
26+
if("${version_mk_text}" MATCHES "NCCL_MAJOR *:= *([0-9]*)")
27+
set(NCCL_MAJOR ${CMAKE_MATCH_1})
28+
else()
29+
message(FATAL_ERROR "Failed to parse NCCL_MAJOR")
30+
endif()
31+
if("${version_mk_text}" MATCHES "NCCL_MINOR *:= *([0-9]*)")
32+
set(NCCL_MINOR ${CMAKE_MATCH_1})
33+
else()
34+
message(FATAL_ERROR "Failed to parse NCCL_MINOR")
35+
endif()
36+
if("${version_mk_text}" MATCHES "NCCL_PATCH *:= *([0-9]*)")
37+
set(NCCL_PATCH ${CMAKE_MATCH_1})
38+
else()
39+
message(FATAL_ERROR "Failed to parse NCCL_PATCH")
40+
endif()
41+
if("${version_mk_text}" MATCHES "NCCL_SUFFIX *:= *([0-9]*)")
42+
set(NCCL_SUFFIX ${CMAKE_MATCH_1})
43+
else()
44+
set(NCCL_SUFFIX)
45+
endif()
46+
if("${version_mk_text}" MATCHES "PKG_REVISION *:= *([0-9]*)")
47+
set(PKG_REVISION ${CMAKE_MATCH_1})
48+
else()
49+
message(FATAL_ERROR "Failed to parse PKG_REVISION")
50+
endif()
51+
if("${NCCL_PATCH}" MATCHES "[0-9][0-9]")
52+
set(NCCL_VERSION "${NCCL_MAJOR}${NCCL_MINOR}${NCCL_PATCH}")
53+
else()
54+
set(NCCL_VERSION "${NCCL_MAJOR}${NCCL_MINOR}0${NCCL_PATCH}")
55+
endif()
56+
57+
# Setup VERSION
58+
set(VERSION_STRING "2.6.0.")
59+
60+
# Check if BUILD_NUMBER is defined in a Jenkins environment
61+
if($ENV{BUILD_NUMBER})
62+
string(CONCAT BUILD_VERSION ${VERSION_STRING} $ENV{BUILD_NUMBER})
63+
else()
64+
string(CONCAT BUILD_VERSION ${VERSION_STRING} "0")
65+
endif()
66+
67+
rocm_setup_version(VERSION ${BUILD_VERSION} NO_GIT_TAG_VERSION)
68+
69+
list(APPEND CMAKE_PREFIX_PATH
70+
/opt/rocm
71+
/opt/rocm/hip
72+
/opt/rocm/hcc)
73+
74+
find_package(hip REQUIRED)
75+
message(STATUS "HIP compiler: ${HIP_COMPILER}")
76+
message(STATUS "HIP runtime: ${HIP_RUNTIME}")
77+
78+
option(BUILD_SHARED_LIBS "Build as a shared library" ON)
79+
80+
configure_file(src/nccl.h.in ${PROJECT_BINARY_DIR}/rccl.h)
81+
configure_file(src/nccl.h.in ${PROJECT_BINARY_DIR}/nccl.h)
82+
83+
include_directories(${PROJECT_BINARY_DIR}) # for generated rccl.h header
84+
include_directories(src)
85+
include_directories(src/include)
86+
include_directories(src/collectives)
87+
include_directories(src/collectives/device)
88+
89+
set(CU_SOURCES
90+
src/collectives/device/all_reduce.cu
91+
src/collectives/device/all_gather.cu
92+
src/collectives/device/reduce.cu
93+
src/collectives/device/broadcast.cu
94+
src/collectives/device/reduce_scatter.cu
95+
src/collectives/device/functions.cu)
96+
97+
set(CPP_SOURCES)
98+
foreach(filename ${CU_SOURCES})
99+
string(REPLACE ".cu"
100+
".cpp"
101+
cpp_filename
102+
${filename})
103+
configure_file(${filename} ${cpp_filename} COPYONLY)
104+
list(APPEND CPP_SOURCES ${cpp_filename})
105+
endforeach(filename)
106+
107+
set(CC_SOURCES
108+
src/init.cc
109+
src/collectives/all_reduce.cc
110+
src/collectives/all_gather.cc
111+
src/collectives/reduce.cc
112+
src/collectives/broadcast.cc
113+
src/collectives/reduce_scatter.cc
114+
src/channel.cc
115+
src/misc/trees.cc
116+
src/misc/rings.cc
117+
src/misc/argcheck.cc
118+
src/misc/group.cc
119+
src/misc/utils.cc
120+
src/misc/ibvwrap.cc
121+
src/misc/nvmlwrap_stub.cc
122+
src/misc/topo.cc
123+
src/transport/net.cc
124+
src/transport/net_ib.cc
125+
src/transport/net_socket.cc
126+
src/transport/p2p.cc
127+
src/transport/shm.cc
128+
src/transport.cc
129+
src/bootstrap.cc
130+
src/enqueue.cc)
131+
132+
foreach(filename ${CC_SOURCES})
133+
list(APPEND CPP_SOURCES ${filename})
134+
endforeach(filename)
135+
136+
add_library(rccl ${CPP_SOURCES})
137+
138+
if(TRACE)
139+
add_definitions(-DENABLE_TRACE)
140+
endif()
141+
142+
if(PROFILE)
143+
add_definitions(-DENABLE_PROFILING)
144+
endif()
145+
146+
target_link_libraries(rccl
147+
PRIVATE --amdgpu-target=gfx803
148+
PRIVATE --amdgpu-target=gfx900
149+
PRIVATE --amdgpu-target=gfx906)
150+
151+
if("${HIP_COMPILER}" MATCHES "clang")
152+
target_compile_options(rccl
153+
PRIVATE --amdgpu-target=gfx803
154+
PRIVATE --amdgpu-target=gfx900
155+
PRIVATE --amdgpu-target=gfx906
156+
PRIVATE -fgpu-rdc)
157+
target_link_libraries(rccl PRIVATE -fgpu-rdc)
158+
target_include_directories(rccl PRIVATE /opt/rocm/hsa/include)
159+
endif()
160+
161+
if("${HIP_COMPILER}" MATCHES "hcc")
162+
target_link_libraries(rccl PRIVATE -hc-function-calls)
163+
endif()
164+
165+
if(TARGET hip::device)
166+
target_link_libraries(rccl PRIVATE hip::device)
167+
target_link_libraries(rccl INTERFACE hip::host)
168+
else()
169+
target_link_libraries(rccl PUBLIC hip::hip_hcc ${hcc_LIBRARIES} numa)
170+
endif()
171+
172+
rocm_install_targets(TARGETS
173+
rccl
174+
PREFIX
175+
rccl)
176+
install(FILES ${PROJECT_BINARY_DIR}/rccl.h
177+
DESTINATION rccl/${CMAKE_INSTALL_INCLUDEDIR})
178+
179+
rocm_export_targets(NAMESPACE
180+
roc::
181+
PREFIX
182+
rccl
183+
TARGETS
184+
rccl
185+
DEPENDS
186+
hip)
187+
188+
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_hcc")
189+
set(CPACK_RPM_PACKAGE_REQUIRES "hip_hcc")
190+
191+
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt" "/opt/rocm")
192+
193+
rocm_create_package(
194+
NAME
195+
rccl
196+
DESCRIPTION
197+
"Optimized primitives for collective multi-GPU communication"
198+
MAINTAINER
199+
200+
LDCONFIG)
201+
202+
rocm_install_symlink_subdir(rccl)
203+
204+
if(BUILD_TESTS)
205+
add_subdirectory(test)
206+
endif()

Jenkinsfile

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
#!/usr/bin/env groovy
2+
// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
3+
// This shared library is available at https://github.com/ROCmSoftwarePlatform/rccl
4+
@Library('rocJenkins@noDocker') _
5+
6+
// This is file for internal AMD use.
7+
// If you are interested in running your own Jenkins, please raise a github issue for assistance.
8+
9+
import com.amd.project.*
10+
import com.amd.docker.*
11+
12+
////////////////////////////////////////////////////////////////////////
13+
// Mostly generated from snippet generator 'properties; set job properties'
14+
// Time-based triggers added to execute nightly tests, eg '30 2 * * *' means 2:30 AM
15+
properties([
16+
pipelineTriggers([cron('0 1 * * *'), [$class: 'PeriodicFolderTrigger', interval: '5m']]),
17+
buildDiscarder(logRotator(
18+
artifactDaysToKeepStr: '',
19+
artifactNumToKeepStr: '',
20+
daysToKeepStr: '',
21+
numToKeepStr: '10')),
22+
disableConcurrentBuilds(),
23+
[$class: 'CopyArtifactPermissionProperty', projectNames: '*']
24+
])
25+
26+
27+
////////////////////////////////////////////////////////////////////////
28+
import java.nio.file.Path;
29+
30+
rcclCI:
31+
{
32+
33+
def rccl = new rocProject('rccl')
34+
// customize for project
35+
rccl.paths.build_command = './install.sh -t'
36+
37+
// Define test architectures, optional rocm version argument is available
38+
def nodes = new dockerNodes(['RCCL'], rccl)
39+
40+
boolean formatCheck = false
41+
42+
def compileCommand =
43+
{
44+
platform, project->
45+
46+
project.paths.construct_build_prefix()
47+
def command = """#!/usr/bin/env bash
48+
set -x
49+
cd ${project.paths.project_build_prefix}
50+
LD_LIBRARY_PATH=/opt/rocm/hcc/lib CXX=${project.compiler.compiler_path} ${project.paths.build_command}
51+
"""
52+
53+
sh command
54+
}
55+
56+
def testCommand =
57+
{
58+
platform, project->
59+
60+
def command = """#!/usr/bin/env bash
61+
set -x
62+
cd ${project.paths.project_build_prefix}/build/release/test
63+
HSA_FORCE_FINE_GRAIN_PCIE=1 ./UnitTests --gtest_output=xml --gtest_color=yes
64+
"""
65+
66+
sh command
67+
//junit "${project.paths.project_build_prefix}/build/release/*.xml"
68+
}
69+
70+
def packageCommand =
71+
{
72+
platform, project->
73+
74+
def command = """
75+
set -x
76+
cd ${project.paths.project_build_prefix}/build
77+
make package
78+
rm -rf package && mkdir -p package
79+
mv *.deb package/
80+
sudo dpkg -i package/*.deb
81+
"""
82+
83+
84+
//platform.archiveArtifacts(this, """${project.paths.project_build_prefix}/build/package/*.deb""")
85+
}
86+
87+
buildProjectNoDocker(rccl, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand)
88+
89+
}

LICENSE.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11

22
Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
3+
Modifications Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
34

45
Redistribution and use in source and binary forms, with or without
56
modification, are permitted provided that the following conditions

NOTICES.txt

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
Notices and Licenses file
2+
_______________________________________________________________
3+
4+
Dependencies on nvidia-nccl v2.3.7-1 (BSD3)
5+
Copyright (c) 2015-2018, NVIDIA CORPORATION.
6+
Modifications Copyright (c) 2019 Advanced Micro Devices, Inc.
7+
8+
Redistribution and use in source and binary forms, with or without
9+
modification, are permitted provided that the following conditions
10+
are met:
11+
* Redistributions of source code must retain the above copyright
12+
notice, this list of conditions and the following disclaimer.
13+
* Redistributions in binary form must reproduce the above copyright
14+
notice, this list of conditions and the following disclaimer in the
15+
documentation and/or other materials provided with the distribution.
16+
* Neither the name of NVIDIA CORPORATION, Lawrence Berkeley National
17+
Laboratory, the U.S. Department of Energy, nor the names of their
18+
contributors may be used to endorse or promote products derived
19+
from this software without specific prior written permission.
20+
21+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
22+
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24+
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
25+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27+
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28+
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
29+
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32+
33+
The U.S. Department of Energy funded the development of this software
34+
under subcontract 7078610 with Lawrence Berkeley National Laboratory.
35+
36+
37+
nvidia-nccl v2.3.7-1 (BSD2)
38+
Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
39+
40+
Redistribution and use in source and binary forms, with or without
41+
modification, are permitted provided that the following conditions
42+
are met:
43+
* Redistributions of source code must retain the above copyright
44+
notice, this list of conditions and the following disclaimer.
45+
* Redistributions in binary form must reproduce the above copyright
46+
notice, this list of conditions and the following disclaimer in the
47+
documentation and/or other materials provided with the distribution.
48+
* Neither the name of NVIDIA CORPORATION, Lawrence Berkeley National
49+
Laboratory, the U.S. Department of Energy, nor the names of their
50+
contributors may be used to endorse or promote products derived
51+
from this software without specific prior written permission.
52+
53+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
54+
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56+
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
57+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59+
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60+
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
61+
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
62+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
63+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64+
65+
The U.S. Department of Energy funded the development of this software
66+
under subcontract 7078610 with Lawrence Berkeley National Laboratory.

0 commit comments

Comments
 (0)