1
- name : CI - Bazel Optional B200 CUDA tests
1
+ name : CI - Bazel Optional H100 and B200 CUDA tests
2
2
on :
3
3
# Runs on PR if label "CI Optional GPU Presubmit" is present.
4
4
workflow_dispatch :
@@ -36,10 +36,10 @@ jobs:
36
36
uses : google-ml-infra/actions/ci_connection@main
37
37
with :
38
38
halt-dispatch-input : ${{ inputs.halt-for-connection }}
39
- - name : Run Bazel CUDA Tests
39
+ - name : Run Bazel single B200 CUDA Tests
40
40
run : |
41
41
nvidia-smi
42
- bazel test --config=ci_linux_x86_64_cuda \
42
+ bazel test --config=rbe_linux_x86_64_cuda \
43
43
--config=resultstore \
44
44
--config=rbe_cache \
45
45
--repo_env=HERMETIC_CUDA_VERSION="12.8.0" \
50
50
--test_output=errors \
51
51
--test_env=JAX_ACCELERATOR_COUNT=1 \
52
52
--test_env=JAX_TESTS_PER_ACCELERATOR=32 \
53
+ --strategy=TestRunner=local \
53
54
--local_test_jobs=32 \
54
55
--test_env=JAX_EXCLUDE_TEST_TARGETS=PmapTest.testSizeOverflow \
55
56
--test_tag_filters=-multiaccelerator \
60
61
--color=yes \
61
62
//tests:gpu_tests //tests:backend_independent_tests \
62
63
//tests/pallas:gpu_tests //tests/pallas:backend_independent_tests \
64
+ //tests/mosaic:gpu_tests //tests/mosaic:backend_independent_tests
65
+ run_multiaccelerator_tests :
66
+ if : ${{ github.event.repository.fork == false && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'CI Optional GPU Presubmit')) }}
67
+ runs-on : linux-x86-a3-8g-h100-8gpu
68
+ container : ' us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build-cuda12.8-cudnn9.8:latest'
69
+ name : " Bazel multiple H100 CUDA tests"
70
+ steps :
71
+ - uses : actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
72
+ - name : Wait For Connection
73
+ uses : google-ml-infra/actions/ci_connection@main
74
+ with :
75
+ halt-dispatch-input : ${{ inputs.halt-for-connection }}
76
+ - name : Run Bazel multiple H100 CUDA Tests
77
+ run : |
78
+ nvidia-smi
79
+ bazel test --config=rbe_linux_x86_64_cuda \
80
+ --config=resultstore \
81
+ --config=rbe_cache \
82
+ --repo_env=HERMETIC_CUDA_VERSION="12.8.0" \
83
+ --repo_env=HERMETIC_CUDNN_VERSION="9.8.0" \
84
+ --repo_env=HERMETIC_PYTHON_VERSION="3.13" \
85
+ --test_env=XLA_PYTHON_CLIENT_ALLOCATOR=platform \
86
+ --test_output=errors \
87
+ --strategy=TestRunner=local \
88
+ --local_test_jobs=8 \
89
+ --test_env=JAX_EXCLUDE_TEST_TARGETS=PmapTest.testSizeOverflow \
90
+ --test_tag_filters=multiaccelerator \
91
+ --test_env=TF_CPP_MIN_LOG_LEVEL=0 \
92
+ --test_env=JAX_SKIP_SLOW_TESTS=true \
93
+ --action_env=JAX_ENABLE_X64="1" \
94
+ --action_env=NCCL_DEBUG=WARN \
95
+ --color=yes \
96
+ //tests:gpu_tests //tests:backend_independent_tests \
97
+ //tests/pallas:gpu_tests //tests/pallas:backend_independent_tests \
63
98
//tests/mosaic:gpu_tests //tests/mosaic:backend_independent_tests
0 commit comments