@@ -6,8 +6,8 @@ pipeline {
6
6
agent {
7
7
docker {
8
8
alwaysPull true
9
- // WMLCE 1.7.0 has CUDA 10.2, NCCL 2.5.6 , TensorFlow 2.1 .0, and PyTorch 1.8.0
10
- image 'tensorflowppc64le/tensorflow-ppc64le:osuosl-ubuntu -horovod-wlmce1.7.0 -py3.7 -ppc64le'
9
+ // Open-CE 1.4.1 has CUDA 10.2, NCCL 2.8.3 , TensorFlow 2.6 .0, and PyTorch 1.9.1
10
+ image 'tensorflowppc64le/tensorflow-ppc64le:osuosl-ubi7 -horovod-opence1.4.1 -py3.9 -ppc64le'
11
11
args '--cap-add=SYS_PTRACE --shm-size=256g'
12
12
label 'power8-gpu'
13
13
registryCredentialsId 'TensorFlow'
@@ -25,10 +25,9 @@ pipeline {
25
25
git submodule update --init --recursive
26
26
. ${CONDA_INIT}
27
27
conda activate ${CONDA_ENV}
28
- conda install -y cmake make
29
28
set -xe
30
- HOROVOD_WITHOUT_MXNET=1 HOROVOD_WITHOUT_GLOO=1 HOROVOD_WITHOUT_PYTORCH =1 HOROVOD_WITH_TENSORFLOW=1 \
31
- HOROVOD_CUDA_HOME=$CONDA_PREFIX HOROVOD_GPU_OPERATIONS=NCCL MAKEFLAGS="-j1" \
29
+ HOROVOD_WITHOUT_MXNET=1 HOROVOD_WITHOUT_GLOO=1 HOROVOD_WITH_PYTORCH =1 HOROVOD_WITH_TENSORFLOW=1 \
30
+ HOROVOD_CUDA_HOME="/usr/local/cuda" HOROVOD_GPU_OPERATIONS=NCCL \
32
31
pip install -v . --no-cache-dir --no-deps
33
32
'''
34
33
}
@@ -42,12 +41,12 @@ pipeline {
42
41
set -xe
43
42
44
43
# TensorFlow unit tests
45
- horovodrun -n 2 -H localhost:2 --mpi-args="-pami_noib" pytest -k 'not multi_gpu' -v -s test/parallel/test_tensorflow.py
44
+ horovodrun -n 2 -H localhost:2 pytest -k 'not multi_gpu' -v -s test/parallel/test_tensorflow.py
46
45
# Container has only 2 GPUs, so run the 'multi_gpu' test seperatly on one process
47
- horovodrun -n 1 -H localhost:1 --mpi-args="-pami_noib" pytest -k 'multi_gpu' -v -s test/parallel/test_tensorflow.py
46
+ horovodrun -n 1 -H localhost:1 pytest -k 'multi_gpu' -v -s test/parallel/test_tensorflow.py
48
47
49
48
# PyTorch unit tests
50
- # horovodrun -n 2 -H localhost:2 --mpi-args="-pami_noib" pytest -v -s test/parallel/test_torch.py
49
+ horovodrun -n 2 -H localhost:2 pytest -v -s test/parallel/test_torch.py
51
50
'''
52
51
}
53
52
}
0 commit comments