Skip to content

Commit f93fe9b

Browse files
committed
2.3.5-5
Add support for inter-node communication using sockets and InfiniBand/RoCE. Improve latency. Add support for aggregation. Improve LL/regular tuning. Remove tests as those are now at github.com/nvidia/nccl-tests .
1 parent 286916a commit f93fe9b

File tree

132 files changed

+12422
-9413
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

132 files changed

+12422
-9413
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
# Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
22
/build
3+
*.gcov
4+
/coverage/

LICENSE.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
2+
Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
33

44
Redistribution and use in source and binary forms, with or without
55
modification, are permitted provided that the following conditions

Makefile

+27-233
Original file line numberDiff line numberDiff line change
@@ -1,236 +1,30 @@
11
#
2-
# Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
2+
# Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
33
#
4-
# See LICENCE.txt for license information
4+
# See LICENSE.txt for license information
55
#
6-
7-
CUDA_HOME ?= /usr/local/cuda
8-
PREFIX ?= /usr/local
9-
VERBOSE ?= 0
10-
KEEP ?= 0
11-
DEBUG ?= 0
12-
PROFAPI ?= 0
13-
BUILDDIR ?= build
14-
BUILDDIR := $(abspath $(BUILDDIR))
15-
16-
CUDA_LIB ?= $(CUDA_HOME)/lib64
17-
CUDA_INC ?= $(CUDA_HOME)/include
18-
NVCC ?= $(CUDA_HOME)/bin/nvcc
19-
20-
NVCC_GENCODE ?= -gencode=arch=compute_35,code=sm_35 \
21-
-gencode=arch=compute_50,code=sm_50 \
22-
-gencode=arch=compute_52,code=sm_52 \
23-
-gencode=arch=compute_60,code=sm_60\
24-
-gencode=arch=compute_61,code=sm_61 \
25-
-gencode=arch=compute_60,code=compute_60
26-
27-
CXXFLAGS := -I$(CUDA_INC) -fPIC -fvisibility=hidden
28-
NVCUFLAGS := -ccbin $(CXX) $(NVCC_GENCODE) -lineinfo -std=c++11 -maxrregcount 96
29-
# Use addprefix so that we can specify more than one path
30-
LDFLAGS := $(addprefix -L,${CUDA_LIB}) -lcudart -lrt
31-
32-
ifeq ($(DEBUG), 0)
33-
NVCUFLAGS += -O3
34-
CXXFLAGS += -O3
35-
else
36-
NVCUFLAGS += -O0 -G
37-
CXXFLAGS += -O0 -g -ggdb3
38-
endif
39-
40-
ifneq ($(VERBOSE), 0)
41-
NVCUFLAGS += -Xptxas -v -Xcompiler -Wall,-Wextra
42-
CXXFLAGS += -Wall -Wextra
43-
else
44-
.SILENT:
45-
endif
46-
47-
ifneq ($(KEEP), 0)
48-
NVCUFLAGS += -keep
49-
endif
50-
51-
ifneq ($(PROFAPI), 0)
52-
CXXFLAGS += -DPROFAPI
53-
endif
54-
55-
NCCL_MAJOR := 1
56-
NCCL_MINOR := 3
57-
NCCL_PATCH := 5
58-
CXXFLAGS += -DNCCL_MAJOR=$(NCCL_MAJOR) -DNCCL_MINOR=$(NCCL_MINOR) -DNCCL_PATCH=$(NCCL_PATCH)
59-
60-
CUDA_VERSION ?= $(shell ls $(CUDA_LIB)/libcudart.so.* | head -1 | rev | cut -d "." -f -2 | rev)
61-
CUDA_MAJOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 1)
62-
CUDA_MINOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 2)
63-
CXXFLAGS += -DCUDA_MAJOR=$(CUDA_MAJOR) -DCUDA_MINOR=$(CUDA_MINOR)
64-
65-
.PHONY : all lib staticlib clean test mpitest install deb debian debclean forlib fortest forclean
66-
.DEFAULT : all
67-
68-
INCEXPORTS := nccl.h
69-
LIBSRCFILES := libwrap.cu core.cu all_gather.cu all_reduce.cu broadcast.cu reduce.cu reduce_scatter.cu
70-
LIBNAME := libnccl.so
71-
STATICLIBNAME := libnccl_static.a
72-
73-
INCDIR := $(BUILDDIR)/include
74-
LIBDIR := $(BUILDDIR)/lib
75-
OBJDIR := $(BUILDDIR)/obj
76-
77-
INCTARGETS := $(patsubst %, $(INCDIR)/%, $(INCEXPORTS))
78-
LIBSONAME := $(patsubst %,%.$(NCCL_MAJOR),$(LIBNAME))
79-
LIBTARGET := $(patsubst %,%.$(NCCL_MAJOR).$(NCCL_MINOR).$(NCCL_PATCH),$(LIBNAME))
80-
STATICLIBTARGET := $(STATICLIBNAME)
81-
LIBLINK := $(patsubst lib%.so, -l%, $(LIBNAME))
82-
LIBOBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(filter %.cu, $(LIBSRCFILES)))
83-
DEPFILES := $(patsubst %.o, %.d, $(LIBOBJ)) $(patsubst %, %.d, $(TESTBINS)) $(patsubst %, %.d, $(MPITESTBINS))
84-
85-
all : lib staticlib
86-
87-
lib : $(INCTARGETS) $(LIBDIR)/$(LIBTARGET)
88-
89-
staticlib : $(INCTARGETS) $(LIBDIR)/$(STATICLIBTARGET)
90-
91-
-include $(DEPFILES)
92-
93-
$(LIBDIR)/$(LIBTARGET) : $(LIBOBJ)
94-
@printf "Linking %-35s > %s\n" $(LIBTARGET) $@
95-
mkdir -p $(LIBDIR)
96-
$(CXX) $(CXXFLAGS) -shared -Wl,--no-as-needed -Wl,-soname,$(LIBSONAME) -o $@ $(LDFLAGS) $(LIBOBJ)
97-
ln -sf $(LIBSONAME) $(LIBDIR)/$(LIBNAME)
98-
ln -sf $(LIBTARGET) $(LIBDIR)/$(LIBSONAME)
99-
100-
$(LIBDIR)/$(STATICLIBTARGET) : $(LIBOBJ)
101-
@printf "Archiving %-35s > %s\n" $(STATICLIBTARGET) $@
102-
mkdir -p $(LIBDIR)
103-
ar cr $@ $(LIBOBJ)
104-
105-
$(INCDIR)/%.h : src/%.h
106-
@printf "Grabbing %-35s > %s\n" $< $@
107-
mkdir -p $(INCDIR)
108-
cp -f $< $@
109-
110-
$(OBJDIR)/%.o : src/%.cu
111-
@printf "Compiling %-35s > %s\n" $< $@
112-
mkdir -p $(OBJDIR)
113-
$(NVCC) -c $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< -o $@
114-
@$(NVCC) -M $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< > $(@:%.o=%.d.tmp)
115-
@sed "0,/^.*:/s//$(subst /,\/,$@):/" $(@:%.o=%.d.tmp) > $(@:%.o=%.d)
116-
@sed -e 's/.*://' -e 's/\\$$//' < $(@:%.o=%.d.tmp) | fmt -1 | \
117-
sed -e 's/^ *//' -e 's/$$/:/' >> $(@:%.o=%.d)
118-
@rm -f $(@:%.o=%.d.tmp)
119-
120-
clean :
121-
rm -rf $(BUILDDIR)
122-
123-
install : lib
124-
mkdir -p $(PREFIX)/lib
125-
mkdir -p $(PREFIX)/include
126-
cp -P -v $(BUILDDIR)/lib/* $(PREFIX)/lib/
127-
cp -v $(BUILDDIR)/include/* $(PREFIX)/include/
128-
129-
130-
#### TESTS ####
131-
132-
TEST_ONLY ?= 0
133-
134-
# Tests depend on lib, except in TEST_ONLY mode.
135-
ifeq ($(TEST_ONLY), 0)
136-
TSTDEP = $(INCTARGETS) $(LIBDIR)/$(LIBTARGET)
137-
endif
138-
139-
NCCL_LIB ?= $(LIBDIR)
140-
NCCL_INC ?= $(INCDIR)
141-
142-
MPI_HOME ?= /usr
143-
MPI_INC ?= $(MPI_HOME)/include
144-
MPI_LIB ?= $(MPI_HOME)/lib
145-
MPIFLAGS := -I$(MPI_INC) -L$(MPI_LIB) -lmpi
146-
147-
TESTS := all_gather_test all_gather_scan \
148-
all_reduce_test all_reduce_scan \
149-
broadcast_test broadcast_scan \
150-
reduce_test reduce_scan \
151-
reduce_scatter_test reduce_scatter_scan
152-
MPITESTS := mpi_test
153-
154-
TSTINC := -I$(NCCL_INC) -Itest/include
155-
TSTLIB := -L$(NCCL_LIB) $(LIBLINK) $(LDFLAGS)
156-
TSTDIR := $(BUILDDIR)/test/single
157-
MPITSTDIR := $(BUILDDIR)/test/mpi
158-
TESTBINS := $(patsubst %, $(TSTDIR)/%, $(TESTS))
159-
MPITESTBINS:= $(patsubst %, $(MPITSTDIR)/%, $(MPITESTS))
160-
161-
test : $(TESTBINS)
162-
163-
$(TSTDIR)/% : test/single/%.cu test/include/*.h $(TSTDEP)
164-
@printf "Building %-35s > %s\n" $< $@
165-
mkdir -p $(TSTDIR)
166-
$(NVCC) $(TSTINC) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< $(TSTLIB) -lcuda -lcurand -lnvToolsExt
167-
@$(NVCC) -M $(TSTINC) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< $(TSTLIB) -lcuda -lcurand -lnvToolsExt > $(@:%=%.d.tmp)
168-
@sed "0,/^.*:/s//$(subst /,\/,$@):/" $(@:%=%.d.tmp) > $(@:%=%.d)
169-
@sed -e 's/.*://' -e 's/\\$$//' < $(@:%=%.d.tmp) | fmt -1 | \
170-
sed -e 's/^ *//' -e 's/$$/:/' >> $(@:%=%.d)
171-
@rm -f $(@:%=%.d.tmp)
172-
173-
mpitest : $(MPITESTBINS)
174-
175-
$(MPITSTDIR)/% : test/mpi/%.cu $(TSTDEP)
176-
@printf "Building %-35s > %s\n" $< $@
177-
mkdir -p $(MPITSTDIR)
178-
$(NVCC) $(MPIFLAGS) $(TSTINC) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< $(TSTLIB) -lcurand
179-
@$(NVCC) $(MPIFLAGS) -M $(TSTINC) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< $(TSTLIB) -lcurand > $(@:%=%.d.tmp)
180-
@sed "0,/^.*:/s//$(subst /,\/,$@):/" $(@:%=%.d.tmp) > $(@:%=%.d)
181-
@sed -e 's/.*://' -e 's/\\$$//' < $(@:%=%.d.tmp) | fmt -1 | \
182-
sed -e 's/^ *//' -e 's/$$/:/' >> $(@:%=%.d)
183-
@rm -f $(@:%=%.d.tmp)
184-
185-
#### PACKAGING ####
186-
187-
DEBIANDIR := $(BUILDDIR)/debian
188-
189-
DEBGEN_IN := $(shell (cd debian ; ls *.in))
190-
DEBGEN := $(DEBGEN_IN:.in=)
191-
DEBFILES := compat copyright libnccl-dev.install libnccl-dev.manpages nccl.7 rules $(DEBGEN)
192-
DEBTARGETS := $(patsubst %, $(DEBIANDIR)/%, $(DEBFILES))
193-
194-
DEB_REVISION ?= 1
195-
DEB_TIMESTAMP := $(shell date -R)
196-
DEB_ARCH ?= amd64
197-
198-
debian : $(DEBTARGETS)
199-
200-
deb : lib debian
201-
@printf "Building Debian package\n"
202-
(cd $(BUILDDIR); debuild -eLD_LIBRARY_PATH -uc -us -d -b)
203-
mkdir -p $(BUILDDIR)/deb/
204-
mv $(BUILDDIR)/../libnccl*.deb $(BUILDDIR)/deb/
205-
206-
debclean :
207-
rm -Rf $(DEBIANDIR)
208-
209-
$(DEBIANDIR)/% : debian/%.in
210-
@printf "Generating %-35s > %s\n" $< $@
211-
sed -e "s/\$${nccl:Major}/$(NCCL_MAJOR)/g" \
212-
-e "s/\$${nccl:Minor}/$(NCCL_MINOR)/g" \
213-
-e "s/\$${nccl:Patch}/$(NCCL_PATCH)/g" \
214-
-e "s/\$${cuda:Major}/$(CUDA_MAJOR)/g" \
215-
-e "s/\$${cuda:Minor}/$(CUDA_MINOR)/g" \
216-
-e "s/\$${deb:Revision}/$(DEB_REVISION)/g" \
217-
-e "s/\$${deb:Timestamp}/$(DEB_TIMESTAMP)/g" \
218-
-e "s/\$${deb:Arch}/$(DEB_ARCH)/g" \
219-
$< > $@
220-
221-
$(DEBIANDIR)/% : debian/%
222-
@printf "Grabbing %-35s > %s\n" $< $@
223-
mkdir -p $(DEBIANDIR)
224-
cp -f $< $@
225-
226-
#### FORTRAN BINDINGS ####
227-
228-
export NCCL_MAJOR NCCL_MINOR NCCL_PATCH CUDA_MAJOR CUDA_MINOR LIBLINK CUDA_LIB BUILDDIR
229-
230-
forlib : lib
231-
$(MAKE) -C fortran lib
232-
fortest : forlib
233-
$(MAKE) -C fortran test
234-
forclean :
235-
$(MAKE) -C fortran clean
236-
6+
.PHONY : all clean
7+
8+
default : src.build
9+
BUILDDIR ?= $(abspath ./build)
10+
ABSBUILDDIR := $(abspath $(BUILDDIR))
11+
TARGETS := src pkg
12+
clean: ${TARGETS:%=%.clean}
13+
test.build: src.build
14+
LICENSE_FILES := LICENSE.txt
15+
LICENSE_TARGETS := $(LICENSE_FILES:%=$(BUILDDIR)/%)
16+
lic: $(LICENSE_TARGETS)
17+
18+
${BUILDDIR}/%.txt: %.txt
19+
@printf "Copying %-35s > %s\n" $< $@
20+
mkdir -p ${BUILDDIR}
21+
cp $< $@
22+
23+
src.%:
24+
${MAKE} -C src $* BUILDDIR=${ABSBUILDDIR}
25+
26+
pkg.%:
27+
${MAKE} -C pkg $* BUILDDIR=${ABSBUILDDIR}
28+
29+
pkg.debian.prep: lic
30+
pkg.txz.prep: lic

0 commit comments

Comments
 (0)