From c229994419a09ca6c198c493637960e75c65716c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cem=20G=C3=B6kmen?= Date: Thu, 21 Nov 2024 12:40:00 -0800 Subject: [PATCH 01/14] Install cuda before torch in an effort to save disk space --- docker/prod.Dockerfile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docker/prod.Dockerfile b/docker/prod.Dockerfile index 0b9a99fc8..444dc1785 100644 --- a/docker/prod.Dockerfile +++ b/docker/prod.Dockerfile @@ -19,6 +19,12 @@ ENV OMNIGIBSON_ASSET_PATH /data/assets ENV GIBSON_DATASET_PATH /data/g_dataset ENV OMNIGIBSON_KEY_PATH /data/omnigibson.key +# Install cuda for compiling curobo +RUN wget -O /cuda.run https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run && \ + sh /cuda.run --silent --toolkit && rm /cuda.run +ENV PATH=/usr/local/cuda-11.8/bin:$PATH +ENV LD_LIBRARY_PATH=/usr/local/cuda-11.8/lib64:$LD_LIBRARY_PATH + # Install Mamba (light conda alternative) RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C / bin/micromamba ENV MAMBA_ROOT_PREFIX /micromamba @@ -30,12 +36,6 @@ RUN micromamba run -n omnigibson micromamba install \ pytorch torchvision pytorch-cuda=11.8 \ -c pytorch -c nvidia -c conda-forge -# Install cuda for compiling curobo -RUN wget -O /cuda.run https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run && \ - sh /cuda.run --silent --toolkit && rm /cuda.run -ENV PATH=/usr/local/cuda-11.8/bin:$PATH -ENV LD_LIBRARY_PATH=/usr/local/cuda-11.8/lib64:$LD_LIBRARY_PATH - # Install curobo. This can normally be installed when OmniGibson is pip # installed, but we need to install it beforehand here so that it doesn't # have to happen on every time a CI action is run (otherwise it's just From c08e8e21d9415d0f3d7bc2e7e78a015ceca2b1dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cem=20G=C3=B6kmen?= Date: Thu, 21 Nov 2024 12:40:14 -0800 Subject: [PATCH 02/14] Enable container build on this branch for testing --- .github/workflows/build-push-containers.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-push-containers.yml b/.github/workflows/build-push-containers.yml index 099f02060..6b832527f 100644 --- a/.github/workflows/build-push-containers.yml +++ b/.github/workflows/build-push-containers.yml @@ -7,6 +7,7 @@ on: branches: - 'main' - 'og-develop' + - 'docker-cuda-install-first' jobs: docker: From f223c46848efe244ca79072553c5e3f9ad98eee8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cem=20G=C3=B6kmen?= Date: Thu, 21 Nov 2024 13:40:44 -0800 Subject: [PATCH 03/14] Update build-push-containers.yml --- .github/workflows/build-push-containers.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build-push-containers.yml b/.github/workflows/build-push-containers.yml index 6b832527f..3da872329 100644 --- a/.github/workflows/build-push-containers.yml +++ b/.github/workflows/build-push-containers.yml @@ -49,6 +49,8 @@ jobs: name: Check disk space run: | df . -h + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 - name: Checkout uses: actions/checkout@v4 From 19dc94b99f7ec779e1b87de1e605ff88b35050e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cem=20G=C3=B6kmen?= Date: Thu, 21 Nov 2024 13:57:16 -0800 Subject: [PATCH 04/14] Remove some extra stuff --- .github/workflows/build-push-containers.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/build-push-containers.yml b/.github/workflows/build-push-containers.yml index 3da872329..cd1d22787 100644 --- a/.github/workflows/build-push-containers.yml +++ b/.github/workflows/build-push-containers.yml @@ -23,7 +23,7 @@ jobs: sudo rm -rf \ /usr/share/dotnet /usr/local/lib/android /opt/ghc \ /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \ - /usr/lib/jvm || true + /usr/lib/jvm /opt/hostedtoolcache/CodeQL || true echo "some directories deleted" sudo apt install aptitude -y >/dev/null 2>&1 sudo aptitude purge aria2 ansible azure-cli shellcheck rpm xorriso zsync \ @@ -49,8 +49,6 @@ jobs: name: Check disk space run: | df . -h - - name: Setup tmate session - uses: mxschmitt/action-tmate@v3 - name: Checkout uses: actions/checkout@v4 From 0ceda2b177897a298172376885c22956e2a20360 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cem=20G=C3=B6kmen?= Date: Thu, 21 Nov 2024 14:29:41 -0800 Subject: [PATCH 05/14] Undo builds on feature branch --- .github/workflows/build-push-containers.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build-push-containers.yml b/.github/workflows/build-push-containers.yml index cd1d22787..334f2bf47 100644 --- a/.github/workflows/build-push-containers.yml +++ b/.github/workflows/build-push-containers.yml @@ -7,7 +7,6 @@ on: branches: - 'main' - 'og-develop' - - 'docker-cuda-install-first' jobs: docker: From 15b987c695542b4a12a4f3258e5b018f47673172 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cem=20G=C3=B6kmen?= Date: Thu, 21 Nov 2024 14:34:55 -0800 Subject: [PATCH 06/14] Update prod.Dockerfile --- docker/prod.Dockerfile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docker/prod.Dockerfile b/docker/prod.Dockerfile index 444dc1785..c3c08c66b 100644 --- a/docker/prod.Dockerfile +++ b/docker/prod.Dockerfile @@ -20,7 +20,7 @@ ENV GIBSON_DATASET_PATH /data/g_dataset ENV OMNIGIBSON_KEY_PATH /data/omnigibson.key # Install cuda for compiling curobo -RUN wget -O /cuda.run https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run && \ +RUN wget --no-verbose -O /cuda.run https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run && \ sh /cuda.run --silent --toolkit && rm /cuda.run ENV PATH=/usr/local/cuda-11.8/bin:$PATH ENV LD_LIBRARY_PATH=/usr/local/cuda-11.8/lib64:$LD_LIBRARY_PATH @@ -42,8 +42,11 @@ RUN micromamba run -n omnigibson micromamba install \ # very slow) # Here we also compile this such that it is compatible with GPU architectures # Turing, Ampere, and Ada; which correspond to 20, 30, and 40 series GPUs. +# We also suppress the output of the installation to avoid the log limit. RUN TORCH_CUDA_ARCH_LIST='7.5;8.0;8.6+PTX' \ - micromamba run -n omnigibson pip install git+https://github.com/StanfordVL/curobo@06d8c79b660db60c2881e9319e60899cbde5c5b5#egg=nvidia_curobo --no-build-isolation + micromamba run -n omnigibson pip install \ + git+https://github.com/StanfordVL/curobo@06d8c79b660db60c2881e9319e60899cbde5c5b5#egg=nvidia_curobo \ + --no-build-isolation > /dev/null # Make sure isaac gets properly sourced every time omnigibson gets called ARG CONDA_ACT_FILE="/micromamba/envs/omnigibson/etc/conda/activate.d/env_vars.sh" From af22859c33bc04be3d0b8e7f201a26c66dc9bad1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cem=20G=C3=B6kmen?= Date: Thu, 21 Nov 2024 15:26:49 -0800 Subject: [PATCH 07/14] Do cuda install, curobo build, and cuda uninstall in a single step --- docker/prod.Dockerfile | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docker/prod.Dockerfile b/docker/prod.Dockerfile index c3c08c66b..94eb92e0b 100644 --- a/docker/prod.Dockerfile +++ b/docker/prod.Dockerfile @@ -19,12 +19,6 @@ ENV OMNIGIBSON_ASSET_PATH /data/assets ENV GIBSON_DATASET_PATH /data/g_dataset ENV OMNIGIBSON_KEY_PATH /data/omnigibson.key -# Install cuda for compiling curobo -RUN wget --no-verbose -O /cuda.run https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run && \ - sh /cuda.run --silent --toolkit && rm /cuda.run -ENV PATH=/usr/local/cuda-11.8/bin:$PATH -ENV LD_LIBRARY_PATH=/usr/local/cuda-11.8/lib64:$LD_LIBRARY_PATH - # Install Mamba (light conda alternative) RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C / bin/micromamba ENV MAMBA_ROOT_PREFIX /micromamba @@ -39,14 +33,20 @@ RUN micromamba run -n omnigibson micromamba install \ # Install curobo. This can normally be installed when OmniGibson is pip # installed, but we need to install it beforehand here so that it doesn't # have to happen on every time a CI action is run (otherwise it's just -# very slow) +# very slow). +# This also allows us to uninstall the cuda toolkit after curobo is built +# to save space (meaning curobo will not be able to be rebuilt at runtime). # Here we also compile this such that it is compatible with GPU architectures # Turing, Ampere, and Ada; which correspond to 20, 30, and 40 series GPUs. # We also suppress the output of the installation to avoid the log limit. -RUN TORCH_CUDA_ARCH_LIST='7.5;8.0;8.6+PTX' \ - micromamba run -n omnigibson pip install \ - git+https://github.com/StanfordVL/curobo@06d8c79b660db60c2881e9319e60899cbde5c5b5#egg=nvidia_curobo \ - --no-build-isolation > /dev/null +RUN wget --no-verbose -O /cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ + dpkg -i /cuda-keyring.deb && rm /cuda-keyring.deb && apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y cuda-toolkit-11-8 && \ + TORCH_CUDA_ARCH_LIST='7.5;8.0;8.6+PTX' PATH=/usr/local/cuda-11.8/bin:$PATH LD_LIBRARY_PATH=/usr/local/cuda-11.8/lib64:$LD_LIBRARY_PATH \ + micromamba run -n omnigibson pip install \ + git+https://github.com/StanfordVL/curobo@06d8c79b660db60c2881e9319e60899cbde5c5b5#egg=nvidia_curobo \ + --no-build-isolation > /dev/null && \ + apt-get remove -y cuda-toolkit && apt-get autoremove -y && apt-get autoclean -y && rm -rf /var/lib/apt/lists/* # Make sure isaac gets properly sourced every time omnigibson gets called ARG CONDA_ACT_FILE="/micromamba/envs/omnigibson/etc/conda/activate.d/env_vars.sh" From 2d6eca3e5920fc8159ed79d0c22222c04d98f2d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cem=20G=C3=B6kmen?= Date: Thu, 21 Nov 2024 15:34:35 -0800 Subject: [PATCH 08/14] Keep running builds on branch for testing --- .github/workflows/build-push-containers.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-push-containers.yml b/.github/workflows/build-push-containers.yml index 334f2bf47..cd1d22787 100644 --- a/.github/workflows/build-push-containers.yml +++ b/.github/workflows/build-push-containers.yml @@ -7,6 +7,7 @@ on: branches: - 'main' - 'og-develop' + - 'docker-cuda-install-first' jobs: docker: From 5e06cb736c8702ccd2ea171cb13c05571ee72a19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cem=20G=C3=B6kmen?= Date: Thu, 21 Nov 2024 15:57:20 -0800 Subject: [PATCH 09/14] Update prod.Dockerfile --- docker/prod.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/prod.Dockerfile b/docker/prod.Dockerfile index 94eb92e0b..7c1ad24e3 100644 --- a/docker/prod.Dockerfile +++ b/docker/prod.Dockerfile @@ -46,7 +46,7 @@ RUN wget --no-verbose -O /cuda-keyring.deb https://developer.download.nvidia.com micromamba run -n omnigibson pip install \ git+https://github.com/StanfordVL/curobo@06d8c79b660db60c2881e9319e60899cbde5c5b5#egg=nvidia_curobo \ --no-build-isolation > /dev/null && \ - apt-get remove -y cuda-toolkit && apt-get autoremove -y && apt-get autoclean -y && rm -rf /var/lib/apt/lists/* + apt-get remove -y cuda-toolkit-11-8 && apt-get autoremove -y && apt-get autoclean -y && rm -rf /var/lib/apt/lists/* # Make sure isaac gets properly sourced every time omnigibson gets called ARG CONDA_ACT_FILE="/micromamba/envs/omnigibson/etc/conda/activate.d/env_vars.sh" From f1a210ed8b2979df90e88260c69ef36dd3983472 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cem=20G=C3=B6kmen?= Date: Thu, 21 Nov 2024 17:04:58 -0800 Subject: [PATCH 10/14] Improve caching by putting the cache on a shared tag on the registry --- .github/workflows/build-push-containers.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-push-containers.yml b/.github/workflows/build-push-containers.yml index cd1d22787..7122abf03 100644 --- a/.github/workflows/build-push-containers.yml +++ b/.github/workflows/build-push-containers.yml @@ -108,8 +108,8 @@ jobs: tags: ${{ steps.meta-prod.outputs.tags }} labels: ${{ steps.meta-prod.outputs.labels }} file: docker/prod.Dockerfile - cache-from: type=registry,ref=stanfordvl/omnigibson:og-develop - cache-to: type=inline + cache-from: type=registry,ref=stanfordvl/omnigibson:build-cache + cache-to: type=registry,ref=stanfordvl/omnigibson:build-cache,mode=max - name: Build and push dev image @@ -122,8 +122,8 @@ jobs: tags: ${{ steps.meta-dev.outputs.tags }} labels: ${{ steps.meta-dev.outputs.labels }} file: docker/prod.Dockerfile - cache-from: type=registry,ref=stanfordvl/omnigibson:og-develop # OK to share cache here. - cache-to: type=inline + cache-from: type=registry,ref=stanfordvl/omnigibson:build-cache # OK to share cache here. + cache-to: type=registry,ref=stanfordvl/omnigibson:build-cache,mode=max - name: Update vscode image Dockerfile with prod image tag run: | From afcc44e624fe053e3e6af3fde0fc5abe19c3cfb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cem=20G=C3=B6kmen?= Date: Thu, 21 Nov 2024 17:16:57 -0800 Subject: [PATCH 11/14] Add the actions image to also be built by github actions. Inception! --- .github/workflows/build-push-containers.yml | 33 +++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-push-containers.yml b/.github/workflows/build-push-containers.yml index 7122abf03..f17f34112 100644 --- a/.github/workflows/build-push-containers.yml +++ b/.github/workflows/build-push-containers.yml @@ -98,6 +98,15 @@ jobs: tags: | type=ref,event=branch type=semver,pattern={{version}} + - + name: Metadata for actions Image + id: meta-actions + uses: docker/metadata-action@v5 + # The actions image should only be built if the push is to og-develop + if: github.ref == 'refs/heads/og-develop' + with: + images: | + stanfordvl/omnigibson-gha - name: Build and push prod image id: build-prod @@ -138,5 +147,25 @@ jobs: tags: ${{ steps.meta-vscode.outputs.tags }} labels: ${{ steps.meta-vscode.outputs.labels }} file: docker/vscode.Dockerfile - cache-from: type=registry,ref=stanfordvl/omnigibson:og-develop # OK to share cache here. - cache-to: type=inline + cache-from: type=registry,ref=stanfordvl/omnigibson:build-cache # OK to share cache here. + cache-to: type=registry,ref=stanfordvl/omnigibson:build-cache,mode=max + + - name: Update actions image Dockerfile with dev image tag + # The actions image should only be built if the push is to og-develop + if: github.ref == 'refs/heads/og-develop' + run: | + sed -i "s/omnigibson-dev:og-develop/omnigibson-dev@${{ steps.build-dev.outputs.digest }}/g" docker/gh-actions/Dockerfile && cat docker/gh-actions/Dockerfile + - + name: Build and push actions image + id: build-actions + uses: docker/build-push-action@v5 + # The actions image should only be built if the push is to og-develop + if: github.ref == 'refs/heads/og-develop' + with: + context: docker/gh-actions + push: true + tags: latest # here we only push from og-develop, and only to the `latest` tag + labels: ${{ steps.meta-actions.outputs.labels }} + file: docker/gh-actions/Dockerfile + cache-from: type=registry,ref=stanfordvl/omnigibson:build-cache # OK to share cache here. + cache-to: type=registry,ref=stanfordvl/omnigibson:build-cache,mode=max \ No newline at end of file From 45b740356c5b563c76c6f78752b6c25071cd91a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cem=20G=C3=B6kmen?= Date: Thu, 21 Nov 2024 17:46:53 -0800 Subject: [PATCH 12/14] Temporarily build the actions image too for testing --- .github/workflows/build-push-containers.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-push-containers.yml b/.github/workflows/build-push-containers.yml index f17f34112..64e945bc0 100644 --- a/.github/workflows/build-push-containers.yml +++ b/.github/workflows/build-push-containers.yml @@ -103,7 +103,7 @@ jobs: id: meta-actions uses: docker/metadata-action@v5 # The actions image should only be built if the push is to og-develop - if: github.ref == 'refs/heads/og-develop' + # if: github.ref == 'refs/heads/og-develop' with: images: | stanfordvl/omnigibson-gha @@ -152,7 +152,7 @@ jobs: - name: Update actions image Dockerfile with dev image tag # The actions image should only be built if the push is to og-develop - if: github.ref == 'refs/heads/og-develop' + # if: github.ref == 'refs/heads/og-develop' run: | sed -i "s/omnigibson-dev:og-develop/omnigibson-dev@${{ steps.build-dev.outputs.digest }}/g" docker/gh-actions/Dockerfile && cat docker/gh-actions/Dockerfile - @@ -160,7 +160,7 @@ jobs: id: build-actions uses: docker/build-push-action@v5 # The actions image should only be built if the push is to og-develop - if: github.ref == 'refs/heads/og-develop' + # if: github.ref == 'refs/heads/og-develop' with: context: docker/gh-actions push: true From c67bd6e7b24fcbe07dbffe60315e67601c311d3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cem=20G=C3=B6kmen?= Date: Thu, 21 Nov 2024 18:57:01 -0800 Subject: [PATCH 13/14] Update build-push-containers.yml --- .github/workflows/build-push-containers.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-push-containers.yml b/.github/workflows/build-push-containers.yml index 64e945bc0..a41f18444 100644 --- a/.github/workflows/build-push-containers.yml +++ b/.github/workflows/build-push-containers.yml @@ -107,6 +107,9 @@ jobs: with: images: | stanfordvl/omnigibson-gha + tags: | + # We only push to the latest tag for the actions image + type=raw,value=latest - name: Build and push prod image id: build-prod @@ -164,7 +167,7 @@ jobs: with: context: docker/gh-actions push: true - tags: latest # here we only push from og-develop, and only to the `latest` tag + tags: ${{ steps.meta-actions.outputs.tags }} labels: ${{ steps.meta-actions.outputs.labels }} file: docker/gh-actions/Dockerfile cache-from: type=registry,ref=stanfordvl/omnigibson:build-cache # OK to share cache here. From 4ed25e9eae94d7668f493e6abf46e97d36351389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cem=20G=C3=B6kmen?= Date: Thu, 21 Nov 2024 19:01:16 -0800 Subject: [PATCH 14/14] Get ready to productionize --- .github/workflows/build-push-containers.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-push-containers.yml b/.github/workflows/build-push-containers.yml index a41f18444..b415fbe37 100644 --- a/.github/workflows/build-push-containers.yml +++ b/.github/workflows/build-push-containers.yml @@ -7,7 +7,6 @@ on: branches: - 'main' - 'og-develop' - - 'docker-cuda-install-first' jobs: docker: @@ -103,7 +102,7 @@ jobs: id: meta-actions uses: docker/metadata-action@v5 # The actions image should only be built if the push is to og-develop - # if: github.ref == 'refs/heads/og-develop' + if: github.ref == 'refs/heads/og-develop' with: images: | stanfordvl/omnigibson-gha @@ -155,7 +154,7 @@ jobs: - name: Update actions image Dockerfile with dev image tag # The actions image should only be built if the push is to og-develop - # if: github.ref == 'refs/heads/og-develop' + if: github.ref == 'refs/heads/og-develop' run: | sed -i "s/omnigibson-dev:og-develop/omnigibson-dev@${{ steps.build-dev.outputs.digest }}/g" docker/gh-actions/Dockerfile && cat docker/gh-actions/Dockerfile - @@ -163,7 +162,7 @@ jobs: id: build-actions uses: docker/build-push-action@v5 # The actions image should only be built if the push is to og-develop - # if: github.ref == 'refs/heads/og-develop' + if: github.ref == 'refs/heads/og-develop' with: context: docker/gh-actions push: true