Skip to content

Commit a77e959

Browse files
authored
Avoid redundant ptx generation for maximum specififed compute capability (horovod#3509)
Signed-off-by: Trevor Morris <[email protected]>
1 parent 399e9ec commit a77e959

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

cmake/build_utils.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,10 @@ def get_nvcc_flags():
109109

110110
# Build native kernels for specified compute capabilities
111111
cc_list = full_cc_list if cc_list_env is None else [int(x) for x in cc_list_env.split(',')]
112-
for cc in cc_list:
113-
default_flags += ['-gencode', 'arch=compute_{cc},code=sm_{cc}'.format(cc=cc)]
112+
cc_list = sorted(cc_list)
113+
for cc in cc_list[:-1]:
114+
default_flags += ['-gencode', 'arch=compute_{cc},code=sm_{cc}'.format(cc=cc)]
114115
# Build PTX for maximum specified compute capability
115-
default_flags += ['-gencode', 'arch=compute_{cc},code=compute_{cc}'.format(cc=max(cc_list))]
116+
default_flags += ['-gencode', 'arch=compute_{cc},code=\\"sm_{cc},compute_{cc}\\"'.format(cc=cc_list[-1])]
116117

117118
return default_flags

0 commit comments

Comments
 (0)