Skip to content

Commit ee0291b

Browse files
Add multiaccelerator H100 tests to optional GPU presubmit.
PiperOrigin-RevId: 751224072
1 parent 1c3f4fa commit ee0291b

File tree

1 file changed

+38
-3
lines changed

1 file changed

+38
-3
lines changed

.github/workflows/bazel_optional_b200.yml renamed to .github/workflows/bazel_optional_h100_b200.yml

+38-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: CI - Bazel Optional B200 CUDA tests
1+
name: CI - Bazel Optional H100 and B200 CUDA tests
22
on:
33
# Runs on PR if label "CI Optional GPU Presubmit" is present.
44
workflow_dispatch:
@@ -36,10 +36,10 @@ jobs:
3636
uses: google-ml-infra/actions/ci_connection@main
3737
with:
3838
halt-dispatch-input: ${{ inputs.halt-for-connection }}
39-
- name: Run Bazel CUDA Tests
39+
- name: Run Bazel single B200 CUDA Tests
4040
run: |
4141
nvidia-smi
42-
bazel test --config=ci_linux_x86_64_cuda \
42+
bazel test --config=rbe_linux_x86_64_cuda \
4343
--config=resultstore \
4444
--config=rbe_cache \
4545
--repo_env=HERMETIC_CUDA_VERSION="12.8.0" \
@@ -50,6 +50,7 @@ jobs:
5050
--test_output=errors \
5151
--test_env=JAX_ACCELERATOR_COUNT=1 \
5252
--test_env=JAX_TESTS_PER_ACCELERATOR=32 \
53+
--strategy=TestRunner=local \
5354
--local_test_jobs=32 \
5455
--test_env=JAX_EXCLUDE_TEST_TARGETS=PmapTest.testSizeOverflow \
5556
--test_tag_filters=-multiaccelerator \
@@ -60,4 +61,38 @@ jobs:
6061
--color=yes \
6162
//tests:gpu_tests //tests:backend_independent_tests \
6263
//tests/pallas:gpu_tests //tests/pallas:backend_independent_tests \
64+
//tests/mosaic:gpu_tests //tests/mosaic:backend_independent_tests
65+
run_multiaccelerator_tests:
66+
if: ${{ github.event.repository.fork == false && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'CI Optional GPU Presubmit')) }}
67+
runs-on: linux-x86-a3-8g-h100-8gpu
68+
container: 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build-cuda12.8-cudnn9.8:latest'
69+
name: "Bazel multiple H100 CUDA tests"
70+
steps:
71+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
72+
- name: Wait For Connection
73+
uses: google-ml-infra/actions/ci_connection@main
74+
with:
75+
halt-dispatch-input: ${{ inputs.halt-for-connection }}
76+
- name: Run Bazel multiple H100 CUDA Tests
77+
run: |
78+
nvidia-smi
79+
bazel test --config=rbe_linux_x86_64_cuda \
80+
--config=resultstore \
81+
--config=rbe_cache \
82+
--repo_env=HERMETIC_CUDA_VERSION="12.8.0" \
83+
--repo_env=HERMETIC_CUDNN_VERSION="9.8.0" \
84+
--repo_env=HERMETIC_PYTHON_VERSION="3.13" \
85+
--test_env=XLA_PYTHON_CLIENT_ALLOCATOR=platform \
86+
--test_output=errors \
87+
--strategy=TestRunner=local \
88+
--local_test_jobs=8 \
89+
--test_env=JAX_EXCLUDE_TEST_TARGETS=PmapTest.testSizeOverflow \
90+
--test_tag_filters=multiaccelerator \
91+
--test_env=TF_CPP_MIN_LOG_LEVEL=0 \
92+
--test_env=JAX_SKIP_SLOW_TESTS=true \
93+
--action_env=JAX_ENABLE_X64="1" \
94+
--action_env=NCCL_DEBUG=WARN \
95+
--color=yes \
96+
//tests:gpu_tests //tests:backend_independent_tests \
97+
//tests/pallas:gpu_tests //tests/pallas:backend_independent_tests \
6398
//tests/mosaic:gpu_tests //tests/mosaic:backend_independent_tests

0 commit comments

Comments
 (0)