From fbd07ecb35feb4dd08064ceee5fe06399e911304 Mon Sep 17 00:00:00 2001 From: Will Constable Date: Wed, 4 Dec 2024 12:45:30 -0800 Subject: [PATCH] Migrate CI to CUDA 12.4 Pytorch stopped releasing cu121 nightlies. ghstack-source-id: 39850c42c5ec0a8898a208718f35392e98a427f9 Pull Request resolved: https://github.com/pytorch/torchtitan/pull/718 --- .ci/docker/ubuntu/Dockerfile | 2 +- .github/workflows/integration_test_4gpu.yaml | 2 +- .github/workflows/integration_test_8gpu.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile index ba276c299..deb69a506 100644 --- a/.ci/docker/ubuntu/Dockerfile +++ b/.ci/docker/ubuntu/Dockerfile @@ -1,6 +1,6 @@ ARG OS_VERSION -FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu${OS_VERSION} +FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu${OS_VERSION} ARG OS_VERSION diff --git a/.github/workflows/integration_test_4gpu.yaml b/.github/workflows/integration_test_4gpu.yaml index 72cdb8af3..6c5068873 100644 --- a/.github/workflows/integration_test_4gpu.yaml +++ b/.github/workflows/integration_test_4gpu.yaml @@ -37,7 +37,7 @@ jobs: pip config --user set global.progress_bar off - python -m pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 + python -m pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cu124 # install torchtitan to test the files in ./scripts, currently just for memory estimation python -m pip install -e . diff --git a/.github/workflows/integration_test_8gpu.yaml b/.github/workflows/integration_test_8gpu.yaml index 0d8c79db7..0b8f2a1f0 100644 --- a/.github/workflows/integration_test_8gpu.yaml +++ b/.github/workflows/integration_test_8gpu.yaml @@ -36,6 +36,6 @@ jobs: pip config --user set global.progress_bar off - python -m pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 + python -m pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cu124 mkdir artifacts-to-be-uploaded python ./test_runner.py artifacts-to-be-uploaded --ngpu 8