@@ -99,32 +99,18 @@ run_mpi_pytest() {
99
99
local oneccl_env=${3:- }
100
100
oneccl_env=$( echo ${oneccl_env//:/ } )
101
101
102
- local exclude_keras=" "
103
- if [[ ${test} == * " tf2_" * ]] || [[ ${test} == * " tfhead" * ]]; then
104
- # TODO: support for Keras + TF 2.0 and TF-Keras 2.0
105
- exclude_keras=" | sed 's/test_keras.py//g' | sed 's/test_tensorflow_keras.py//g'"
106
- else
107
- exclude_keras=" | sed 's/[a-z_]*tensorflow2[a-z_.]*//g'"
108
- fi
109
-
110
- local excluded_tests=" | sed 's/test_interactiverun.py//g' | sed 's/test_spark_keras.py//g' | sed 's/test_spark_torch.py//g'"
111
-
112
- # Spark and Run test does not need to be executed with horovodrun, but we still run it below.
113
- local exclude_standalone_test=" | sed 's/test_spark.py//g' | sed 's/test_run.py//g' | sed 's/test_ray.py//g' | sed 's/test_ray_elastic.py//g'"
114
- local standalone_tests=" test_spark.py test_run.py"
115
-
116
102
# pytests have 4x GPU use cases and require a separate queue
117
103
run_test " ${test} " " ${queue} " \
118
- " :pytest: Run PyTests (${test} )" \
119
- " bash -c \" ${oneccl_env} cd /horovod/test && (ls -1 test_*.py ${exclude_keras} ${excluded_tests} ${exclude_standalone_test} | xargs -n 1 \\\$ (cat /mpirun_command) /bin/bash /pytest.sh mpi)\" " \
104
+ " :pytest: MPI Parallel PyTests (${test} )" \
105
+ " bash -c \" ${oneccl_env} cd /horovod/test/parallel && (ls -1 test_*.py | xargs -n 1 \\\$ (cat /mpirun_command) /bin/bash /pytest.sh mpi)\" " \
120
106
5
121
107
run_test " ${test} " " ${queue} " \
122
- " :pytest: Run PyTests Standalone (${test} )" \
123
- " bash -c \" ${oneccl_env} cd /horovod/test && pytest --forked -v --capture=fd --continue-on-collection-errors --junit-xml=/artifacts/junit.mpi.standalone.xml ${standalone_tests} \" " \
124
- 5
108
+ " :pytest: MPI Single PyTests (${test} )" \
109
+ " bash -c \" ${oneccl_env} cd /horovod/test/single && (ls -1 test_*.py | xargs -n 1 /bin/bash /pytest_standalone.sh mpi) \" " \
110
+ 10
125
111
126
112
run_test " ${test} " " ${queue} " \
127
- " :pytest: Run Cluster PyTests (${test} )" \
113
+ " :pytest: MPI Cluster PyTests (${test} )" \
128
114
" bash -c \" ${oneccl_env} /etc/init.d/ssh start && cd /horovod/test/integration && pytest --forked -v --capture=fd --continue-on-collection-errors --junit-xml=/artifacts/junit.mpi.static.xml test_static_run.py\" "
129
115
}
130
116
@@ -139,63 +125,63 @@ run_mpi_integration() {
139
125
# TODO: support mpich
140
126
run_test " ${test} " " ${queue} " \
141
127
" :jupyter: Run PyTests test_interactiverun (${test} )" \
142
- " bash -c \" cd /horovod/test && pytest -v --capture=no --continue-on-collection-errors --junit-xml=/artifacts/junit.mpi.integration.xml test_interactiverun.py\" "
128
+ " bash -c \" cd /horovod/test && pytest -v --capture=no --continue-on-collection-errors --junit-xml=/artifacts/junit.mpi.integration.xml integration/ test_interactiverun.py\" "
143
129
fi
144
130
145
131
# Legacy TensorFlow tests
146
132
if [[ ${test} != * " tf2_" * ]] && [[ ${test} != * " tfhead" * ]]; then
147
133
run_test " ${test} " " ${queue} " \
148
- " :tensorflow: Test TensorFlow MNIST (${test} )" \
134
+ " :tensorflow: MPI TensorFlow MNIST (${test} )" \
149
135
" bash -c \" ${oneccl_env} \\\$ (cat /mpirun_command) python /horovod/examples/tensorflow/tensorflow_mnist.py\" "
150
136
151
137
run_test " ${test} " " ${queue} " \
152
- " :tensorflow: Test TensorFlow Eager MNIST (${test} )" \
138
+ " :tensorflow: MPI TensorFlow Eager MNIST (${test} )" \
153
139
" bash -c \" ${oneccl_env} \\\$ (cat /mpirun_command) python /horovod/examples/tensorflow/tensorflow_mnist_eager.py\" "
154
140
155
141
run_test " ${test} " " ${queue} " \
156
- " :tensorflow: Test Keras MNIST (${test} )" \
142
+ " :tensorflow: MPI Keras MNIST (${test} )" \
157
143
" bash -c \" ${oneccl_env} \\\$ (cat /mpirun_command) python /horovod/examples/keras/keras_mnist_advanced.py\" "
158
144
159
145
run_test " ${test} " " ${queue} " \
160
- " :fire: Test PyTorch MNIST (${test} )" \
146
+ " :fire: MPI PyTorch MNIST (${test} )" \
161
147
" bash -c \" ${oneccl_env} \\\$ (cat /mpirun_command) python /horovod/examples/pytorch/pytorch_mnist.py\" "
162
148
fi
163
149
164
150
if [[ ${test} == * " mxnet2_" * ]] || [[ ${test} == * " mxnethead" * ]]; then
165
151
run_test " ${test} " " ${queue} " \
166
- " :muscle: Test MXNet2 MNIST (${test} )" \
152
+ " :muscle: MPI MXNet2 MNIST (${test} )" \
167
153
" bash -c \" ${oneccl_env} OMP_NUM_THREADS=1 \\\$ (cat /mpirun_command) python /horovod/examples/mxnet/mxnet2_mnist.py\" "
168
154
else
169
155
run_test " ${test} " " ${queue} " \
170
- " :muscle: Test MXNet MNIST (${test} )" \
156
+ " :muscle: MPI MXNet MNIST (${test} )" \
171
157
" bash -c \" ${oneccl_env} OMP_NUM_THREADS=1 \\\$ (cat /mpirun_command) python /horovod/examples/mxnet/mxnet_mnist.py\" "
172
158
fi
173
159
174
160
# tests that should be executed only with the latest release since they don't test
175
161
# a framework-specific functionality
176
162
if [[ ${test} == * " tf1_15_0" * ]]; then
177
163
run_test " ${test} " " ${queue} " \
178
- " :muscle: Test Stall (${test} )" \
179
- " bash -c \" ${oneccl_env} \\\$ (cat /mpirun_command) python /horovod/test/test_stall.py\" "
164
+ " :muscle: MPI Stall (${test} )" \
165
+ " bash -c \" ${oneccl_env} \\\$ (cat /mpirun_command) python /horovod/test/integration/ test_stall.py\" "
180
166
181
167
if [[ ${test} == * " openmpi" * ]]; then
182
168
run_test " ${test} " " ${queue} " \
183
- " :terminal: Test Horovodrun (${test} )" \
169
+ " :terminal: MPI Horovodrun (${test} )" \
184
170
" horovodrun -np 2 -H localhost:2 python /horovod/examples/tensorflow/tensorflow_mnist.py"
185
171
run_test " ${test} " " ${queue} " \
186
- " :terminal: Test Horovodrun (${test} )" \
172
+ " :terminal: MPI Horovodrun (${test} )" \
187
173
" bash -c \" echo 'localhost slots=2' > hostfile && horovodrun -np 2 -hostfile hostfile python /horovod/examples/mxnet/mxnet_mnist.py\" "
188
174
fi
189
175
fi
190
176
191
177
# TensorFlow 2.0 tests
192
178
if [[ ${test} == * " tf2_" * ]] || [[ ${test} == * " tfhead" * ]]; then
193
179
run_test " ${test} " " ${queue} " \
194
- " :tensorflow: Test TensorFlow 2.0 MNIST (${test} )" \
180
+ " :tensorflow: MPI TensorFlow 2.0 MNIST (${test} )" \
195
181
" bash -c \"\\\$ (cat /mpirun_command) python /horovod/examples/tensorflow2/tensorflow2_mnist.py\" "
196
182
197
183
run_test " ${test} " " ${queue} " \
198
- " :tensorflow: Test TensorFlow 2.0 Keras MNIST (${test} )" \
184
+ " :tensorflow: MPI TensorFlow 2.0 Keras MNIST (${test} )" \
199
185
" bash -c \"\\\$ (cat /mpirun_command) python /horovod/examples/tensorflow2/tensorflow2_keras_mnist.py\" "
200
186
fi
201
187
}
@@ -213,33 +199,17 @@ run_gloo_pytest() {
213
199
local test=$1
214
200
local queue=$2
215
201
216
- local exclude_keras=" "
217
- if [[ ${test} == * " tf2_" * ]] || [[ ${test} == * " tfhead" * ]]; then
218
- # TODO: support for Keras + TF 2.0 and TF-Keras 2.0
219
- exclude_keras=" | sed 's/test_keras.py//g' | sed 's/test_tensorflow_keras.py//g'"
220
- else
221
- exclude_keras=" | sed 's/[a-z_]*tensorflow2[a-z_.]*//g'"
222
- fi
223
-
224
- # These are tested as integration style tests.
225
- local excluded_tests=" | sed 's/test_interactiverun.py//g' | sed 's/test_spark_keras.py//g' | sed 's/test_spark_torch.py//g'"
226
-
227
- # Spark and Run test does not need to be executed with horovodrun, but we still run it below.
228
- local exclude_standalone_test=" | sed 's/test_spark.py//g' | sed 's/test_run.py//g' | sed 's/test_ray.py//g' | sed 's/test_ray_elastic.py//g'"
229
- local standalone_tests=" test_spark.py test_run.py"
230
- local standalone_ray_tests=" test_ray.py test_ray_elastic.py"
231
-
232
202
run_test " ${test} " " ${queue} " \
233
- " :pytest: Run PyTests (${test} )" \
234
- " bash -c \" cd /horovod/test && (ls -1 test_*.py ${exclude_keras} ${excluded_tests} ${exclude_standalone_test} | xargs -n 1 horovodrun -np 2 -H localhost:2 --gloo /bin/bash /pytest.sh gloo)\" " \
203
+ " :pytest: Gloo Parallel PyTests (${test} )" \
204
+ " bash -c \" cd /horovod/test/parallel && (ls -1 test_*.py | xargs -n 1 horovodrun -np 2 -H localhost:2 --gloo /bin/bash /pytest.sh gloo)\" " \
235
205
5
236
206
run_test " ${test} " " ${queue} " \
237
- " :pytest: Run PyTests Standalone (${test} )" \
238
- " bash -c \" cd /horovod/test && pytest --forked -v --capture=fd --continue-on-collection-errors --junit-xml=/artifacts/junit.gloo.standalone.xml ${standalone_tests} && pytest --forked -v --capture=fd --continue-on-collection-errors --junit-xml=/artifacts/junit.gloo.standalone.xml ${standalone_ray_tests} \" " \
207
+ " :pytest: Gloo Single PyTests (${test} )" \
208
+ " bash -c \" cd /horovod/test/single && (ls -1 test_*.py | xargs -n 1 /bin/bash /pytest_standalone.sh gloo) \" " \
239
209
10
240
210
241
211
run_test " ${test} " " ${queue} " \
242
- " :pytest: Run Cluster PyTests (${test} )" \
212
+ " :pytest: Gloo Cluster PyTests (${test} )" \
243
213
" bash -c \" /etc/init.d/ssh start && cd /horovod/test/integration && pytest --forked -v --capture=fd --continue-on-collection-errors --junit-xml=/artifacts/junit.gloo.static.xml test_static_run.py\" "
244
214
}
245
215
@@ -250,33 +220,33 @@ run_gloo_integration() {
250
220
# TensorFlow 2.0 tests
251
221
if [[ ${test} == * " tf2_" * ]] || [[ ${test} == * " tfhead" * ]]; then
252
222
run_test " ${test} " " ${queue} " \
253
- " :tensorflow: Test TensorFlow 2.0 MNIST (${test} )" \
223
+ " :tensorflow: Gloo TensorFlow 2.0 MNIST (${test} )" \
254
224
" horovodrun -np 2 -H localhost:2 --gloo python /horovod/examples/tensorflow2/tensorflow2_mnist.py"
255
225
256
226
run_test " ${test} " " ${queue} " \
257
- " :tensorflow: Test TensorFlow 2.0 Keras MNIST (${test} )" \
227
+ " :tensorflow: Gloo TensorFlow 2.0 Keras MNIST (${test} )" \
258
228
" horovodrun -np 2 -H localhost:2 --gloo python /horovod/examples/tensorflow2/tensorflow2_keras_mnist.py"
259
229
else
260
230
run_test " ${test} " " ${queue} " \
261
- " :tensorflow: Test TensorFlow MNIST (${test} )" \
231
+ " :tensorflow: Gloo TensorFlow MNIST (${test} )" \
262
232
" horovodrun -np 2 -H localhost:2 --gloo python /horovod/examples/tensorflow/tensorflow_mnist.py"
263
233
264
234
run_test " ${test} " " ${queue} " \
265
- " :tensorflow: Test Keras MNIST (${test} )" \
235
+ " :tensorflow: Gloo Keras MNIST (${test} )" \
266
236
" horovodrun -np 2 -H localhost:2 --gloo python /horovod/examples/keras/keras_mnist_advanced.py"
267
237
fi
268
238
269
239
run_test " ${test} " " ${queue} " \
270
- " :fire: Test PyTorch MNIST (${test} )" \
240
+ " :fire: Gloo PyTorch MNIST (${test} )" \
271
241
" horovodrun -np 2 -H localhost:2 --gloo python /horovod/examples/pytorch/pytorch_mnist.py"
272
242
273
243
if [[ ${test} == * " mxnet2_" * ]] || [[ ${test} == * " mxnethead" * ]]; then
274
244
run_test " ${test} " " ${queue} " \
275
- " :muscle: Test MXNet2 MNIST (${test} )" \
245
+ " :muscle: Gloo MXNet2 MNIST (${test} )" \
276
246
" horovodrun -np 2 -H localhost:2 --gloo python /horovod/examples/mxnet/mxnet2_mnist.py"
277
247
else
278
248
run_test " ${test} " " ${queue} " \
279
- " :muscle: Test MXNet MNIST (${test} )" \
249
+ " :muscle: Gloo MXNet MNIST (${test} )" \
280
250
" horovodrun -np 2 -H localhost:2 --gloo python /horovod/examples/mxnet/mxnet_mnist.py"
281
251
fi
282
252
@@ -318,6 +288,12 @@ run_spark_integration() {
318
288
319
289
# Horovod Spark Estimator tests
320
290
if [[ ${test} != * " mpich" * && ${test} != * " oneccl" * ]]; then
291
+ if [[ ${queue} != * gpu* ]]; then
292
+ run_test " ${test} " " ${queue} " \
293
+ " :spark: Spark PyTests (${test} )" \
294
+ " bash -c \" cd /horovod/test/integration && (ls -1 test_spark*.py | xargs -n 1 /bin/bash /pytest_standalone.sh spark)\" "
295
+ fi
296
+
321
297
if [[ ${test} != * " tf2" * && ${test} != * " tfhead" * ]]; then
322
298
run_test " ${test} " " ${queue} " \
323
299
" :spark: Spark Keras Rossmann Run (${test} )" \
@@ -330,12 +306,6 @@ run_spark_integration() {
330
306
run_test " ${test} " " ${queue} " \
331
307
" :spark: Spark Keras MNIST (${test} )" \
332
308
" bash -c \" OMP_NUM_THREADS=1 python /horovod/examples/spark/keras/keras_spark_mnist.py --num-proc 2 --work-dir /work --data-dir /data --epochs 3\" "
333
-
334
- if [[ ${queue} != * gpu* ]]; then
335
- run_test " ${test} " " ${queue} " \
336
- " :spark: PyTests Spark Estimators (${test} )" \
337
- " bash -c \" cd /horovod/test && pytest --forked -v --capture=no --continue-on-collection-errors --junit-xml=/artifacts/junit.spark.integration.xml test_spark_keras.py test_spark_torch.py\" "
338
- fi
339
309
fi
340
310
341
311
run_test " ${test} " " ${queue} " \
0 commit comments