From db7a111d85b29d41462b349c399f59b95e9151f0 Mon Sep 17 00:00:00 2001 From: Daniele Salvatore Albano Date: Mon, 6 Oct 2025 14:44:52 +0200 Subject: [PATCH 1/9] Add support for sm120 (blackwell / nvidia gtx 5090 gpu support) --- Dockerfile-cuda | 5 ++++- Dockerfile-cuda-all | 2 +- backends/candle/src/compute_cap.rs | 1 + backends/candle/src/flash_attn.rs | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Dockerfile-cuda b/Dockerfile-cuda index 537ad59f..bba1c1b5 100644 --- a/Dockerfile-cuda +++ b/Dockerfile-cuda @@ -1,4 +1,4 @@ -FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder +FROM nvidia/cuda:12.9.0-devel-ubuntu22.04 AS base-builder ENV SCCACHE=0.10.0 ENV RUSTC_WRAPPER=/usr/local/bin/sccache @@ -58,6 +58,9 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ elif [ ${CUDA_COMPUTE_CAP} -eq 90 ]; \ then \ nvprune --generate-code code=sm_90 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a; \ + elif [ ${CUDA_COMPUTE_CAP} -eq 120 ]; \ + then \ + nvprune --generate-code code=sm_120 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a; \ else \ echo "cuda compute cap ${CUDA_COMPUTE_CAP} is not supported"; exit 1; \ fi; diff --git a/Dockerfile-cuda-all b/Dockerfile-cuda-all index 5dca432a..47298b71 100644 --- a/Dockerfile-cuda-all +++ b/Dockerfile-cuda-all @@ -1,4 +1,4 @@ -FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder +FROM nvidia/cuda:12.9.0-devel-ubuntu22.04 AS base-builder ENV SCCACHE=0.10.0 ENV RUSTC_WRAPPER=/usr/local/bin/sccache diff --git a/backends/candle/src/compute_cap.rs b/backends/candle/src/compute_cap.rs index ac79fcf1..c4a7802a 100644 --- a/backends/candle/src/compute_cap.rs +++ b/backends/candle/src/compute_cap.rs @@ -30,6 +30,7 @@ fn compute_cap_matching(runtime_compute_cap: usize, compile_compute_cap: usize) (86..=89, 80..=86) => true, (89, 89) => true, (90, 90) => true, + (120, 120) => true, (_, _) => false, } } diff --git a/backends/candle/src/flash_attn.rs b/backends/candle/src/flash_attn.rs index 8dbe58cf..f1b69c72 100644 --- a/backends/candle/src/flash_attn.rs +++ b/backends/candle/src/flash_attn.rs @@ -61,7 +61,7 @@ pub(crate) fn flash_attn_varlen( } #[cfg(not(feature = "flash-attn-v1"))] candle::bail!("Flash attention v1 is not installed. Use `flash-attn-v1` feature.") - } else if (80..90).contains(&runtime_compute_cap) || runtime_compute_cap == 90 { + } else if (80..90).contains(&runtime_compute_cap) || runtime_compute_cap == 90 || runtime_compute_cap == 120 { #[cfg(feature = "flash-attn")] { use candle_flash_attn::{flash_attn_varlen_alibi_windowed, flash_attn_varlen_windowed}; From 6c78e75bfb5ef4503f3b49e0885f3174644da489 Mon Sep 17 00:00:00 2001 From: Daniele Salvatore Albano Date: Mon, 6 Oct 2025 14:48:25 +0200 Subject: [PATCH 2/9] Update docs --- README.md | 3 +++ docs/source/en/custom_container.md | 1 + 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index e9e98d0b..eeb55ed7 100644 --- a/README.md +++ b/README.md @@ -581,6 +581,9 @@ runtime_compute_cap=89 # Example for H100 runtime_compute_cap=90 +# Example for Nvidia GTX 5090 +runtime_compute_cap=120 + docker build . -f Dockerfile-cuda --build-arg CUDA_COMPUTE_CAP=$runtime_compute_cap ``` diff --git a/docs/source/en/custom_container.md b/docs/source/en/custom_container.md index c670026c..8b85262e 100644 --- a/docs/source/en/custom_container.md +++ b/docs/source/en/custom_container.md @@ -32,6 +32,7 @@ the examples of runtime compute capabilities for various GPU types: - A10 - `runtime_compute_cap=86` - Ada Lovelace (RTX 4000 series, ...) - `runtime_compute_cap=89` - H100 - `runtime_compute_cap=90` +- Blackwell (RTX 5000 series, ...) - `runtime_compute_cap=120` Once you have determined the compute capability is determined, set it as the `runtime_compute_cap` variable and build the container as shown in the example below: From f4eeffd4a4e05fde33abf61801ecba959a66079f Mon Sep 17 00:00:00 2001 From: Daniele Salvatore Albano Date: Mon, 6 Oct 2025 14:48:36 +0200 Subject: [PATCH 3/9] Update compute cap test --- backends/candle/src/compute_cap.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/backends/candle/src/compute_cap.rs b/backends/candle/src/compute_cap.rs index c4a7802a..56978f5d 100644 --- a/backends/candle/src/compute_cap.rs +++ b/backends/candle/src/compute_cap.rs @@ -55,6 +55,7 @@ mod tests { assert!(compute_cap_matching(86, 86)); assert!(compute_cap_matching(89, 89)); assert!(compute_cap_matching(90, 90)); + assert!(compute_cap_matching(120, 120)); assert!(compute_cap_matching(86, 80)); assert!(compute_cap_matching(89, 80)); From d79baf5c07b7ea5ac716fb1efc43c9f44eeeb4e3 Mon Sep 17 00:00:00 2001 From: Daniele Salvatore Albano Date: Mon, 6 Oct 2025 14:49:23 +0200 Subject: [PATCH 4/9] Ensure that the runtime compute cap 120 doc string is uniform with the rest of the related information --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index eeb55ed7..71c49156 100644 --- a/README.md +++ b/README.md @@ -581,7 +581,7 @@ runtime_compute_cap=89 # Example for H100 runtime_compute_cap=90 -# Example for Nvidia GTX 5090 +# Example for Blackwell (RTX 5000 series, ...) runtime_compute_cap=120 docker build . -f Dockerfile-cuda --build-arg CUDA_COMPUTE_CAP=$runtime_compute_cap From 8f4b6c8170d285949a83613c04cf42de2c30688c Mon Sep 17 00:00:00 2001 From: Daniele Salvatore Albano Date: Mon, 6 Oct 2025 15:00:45 +0200 Subject: [PATCH 5/9] Add support for sm120 in Dockerfile-cuda-all --- Dockerfile-cuda-all | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/Dockerfile-cuda-all b/Dockerfile-cuda-all index 47298b71..d3908112 100644 --- a/Dockerfile-cuda-all +++ b/Dockerfile-cuda-all @@ -85,6 +85,15 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \ fi; +RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ $VERTEX = "true" ]; \ + then \ + CUDA_COMPUTE_CAP=120 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \ + else \ + CUDA_COMPUTE_CAP=120 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \ + fi; + COPY backends backends COPY core core COPY router router @@ -122,9 +131,18 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \ fi; +RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ $VERTEX = "true" ]; \ + then \ + CUDA_COMPUTE_CAP=120 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \ + else \ + CUDA_COMPUTE_CAP=120 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \ + fi; + RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90 -FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base +FROM nvidia/cuda:12.9.0-runtime-ubuntu22.04 AS base ARG DEFAULT_USE_FLASH_ATTENTION=True From a6f1bad07613f7a14da76b61fa0c2f5059823a33 Mon Sep 17 00:00:00 2001 From: Daniele Salvatore Albano Date: Mon, 13 Oct 2025 10:44:56 +0200 Subject: [PATCH 6/9] Revert changes to Dockerfile-cuda and Dockerfile-cuda-all --- Dockerfile-cuda | 5 +---- Dockerfile-cuda-all | 22 ++-------------------- 2 files changed, 3 insertions(+), 24 deletions(-) diff --git a/Dockerfile-cuda b/Dockerfile-cuda index bba1c1b5..537ad59f 100644 --- a/Dockerfile-cuda +++ b/Dockerfile-cuda @@ -1,4 +1,4 @@ -FROM nvidia/cuda:12.9.0-devel-ubuntu22.04 AS base-builder +FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder ENV SCCACHE=0.10.0 ENV RUSTC_WRAPPER=/usr/local/bin/sccache @@ -58,9 +58,6 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ elif [ ${CUDA_COMPUTE_CAP} -eq 90 ]; \ then \ nvprune --generate-code code=sm_90 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a; \ - elif [ ${CUDA_COMPUTE_CAP} -eq 120 ]; \ - then \ - nvprune --generate-code code=sm_120 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a; \ else \ echo "cuda compute cap ${CUDA_COMPUTE_CAP} is not supported"; exit 1; \ fi; diff --git a/Dockerfile-cuda-all b/Dockerfile-cuda-all index d3908112..5dca432a 100644 --- a/Dockerfile-cuda-all +++ b/Dockerfile-cuda-all @@ -1,4 +1,4 @@ -FROM nvidia/cuda:12.9.0-devel-ubuntu22.04 AS base-builder +FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder ENV SCCACHE=0.10.0 ENV RUSTC_WRAPPER=/usr/local/bin/sccache @@ -85,15 +85,6 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \ fi; -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - if [ $VERTEX = "true" ]; \ - then \ - CUDA_COMPUTE_CAP=120 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \ - else \ - CUDA_COMPUTE_CAP=120 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \ - fi; - COPY backends backends COPY core core COPY router router @@ -131,18 +122,9 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \ fi; -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - if [ $VERTEX = "true" ]; \ - then \ - CUDA_COMPUTE_CAP=120 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \ - else \ - CUDA_COMPUTE_CAP=120 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \ - fi; - RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90 -FROM nvidia/cuda:12.9.0-runtime-ubuntu22.04 AS base +FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base ARG DEFAULT_USE_FLASH_ATTENTION=True From 32c60396d803173530f74edb1e805132459bab85 Mon Sep 17 00:00:00 2001 From: Daniele Salvatore Albano Date: Mon, 13 Oct 2025 10:45:46 +0200 Subject: [PATCH 7/9] Add a dockerfile to build the cuda blackwell support --- Dockerfile-cuda-blackwell | 145 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 Dockerfile-cuda-blackwell diff --git a/Dockerfile-cuda-blackwell b/Dockerfile-cuda-blackwell new file mode 100644 index 00000000..8614df59 --- /dev/null +++ b/Dockerfile-cuda-blackwell @@ -0,0 +1,145 @@ +FROM nvidia/cuda:12.9.0-devel-ubuntu22.04 AS base-builder + +ENV SCCACHE=0.10.0 +ENV RUSTC_WRAPPER=/usr/local/bin/sccache +ENV PATH="/root/.cargo/bin:${PATH}" +# aligned with `cargo-chef` version in `lukemathwalker/cargo-chef:latest-rust-1.85-bookworm` +ENV CARGO_CHEF=0.1.71 + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + curl \ + libssl-dev \ + pkg-config \ + && rm -rf /var/lib/apt/lists/* + +# Donwload and configure sccache +RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ + chmod +x /usr/local/bin/sccache + +RUN curl https://sh.rustup.rs -sSf | bash -s -- -y +RUN cargo install cargo-chef --version $CARGO_CHEF --locked + +FROM base-builder AS planner + +WORKDIR /usr/src + +COPY backends backends +COPY core core +COPY router router +COPY Cargo.toml ./ +COPY Cargo.lock ./ + +RUN cargo chef prepare --recipe-path recipe.json + +FROM base-builder AS builder + +ARG CUDA_COMPUTE_CAP=80 +ARG GIT_SHA +ARG DOCKER_LABEL + +# Limit parallelism +ARG RAYON_NUM_THREADS +ARG CARGO_BUILD_JOBS +ARG CARGO_BUILD_INCREMENTAL + +# sccache specific variables +ARG SCCACHE_GHA_ENABLED + +WORKDIR /usr/src + +RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \ + then \ + nvprune --generate-code code=sm_${CUDA_COMPUTE_CAP} /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a; \ + elif [ ${CUDA_COMPUTE_CAP} -ge 80 -a ${CUDA_COMPUTE_CAP} -lt 90 ]; \ + then \ + nvprune --generate-code code=sm_80 --generate-code code=sm_${CUDA_COMPUTE_CAP} /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a; \ + elif [ ${CUDA_COMPUTE_CAP} -eq 90 ]; \ + then \ + nvprune --generate-code code=sm_90 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a; \ + elif [ ${CUDA_COMPUTE_CAP} -eq 120 ]; \ + then \ + nvprune --generate-code code=sm_120 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a; \ + else \ + echo "cuda compute cap ${CUDA_COMPUTE_CAP} is not supported"; exit 1; \ + fi; + +COPY --from=planner /usr/src/recipe.json recipe.json + +RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \ + then \ + cargo chef cook --release --features candle-cuda-turing --features static-linking --no-default-features --recipe-path recipe.json && sccache -s; \ + else \ + cargo chef cook --release --features candle-cuda --features static-linking --no-default-features --recipe-path recipe.json && sccache -s; \ + fi; + +COPY backends backends +COPY core core +COPY router router +COPY Cargo.toml ./ +COPY Cargo.lock ./ + +FROM builder AS http-builder + +RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \ + then \ + cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F static-linking -F http --no-default-features && sccache -s; \ + else \ + cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F http --no-default-features && sccache -s; \ + fi; + +FROM builder AS grpc-builder + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + unzip \ + && rm -rf /var/lib/apt/lists/* + +RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ + curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ + unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ + unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ + rm -f $PROTOC_ZIP + +COPY proto proto + +RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \ + then \ + cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F static-linking -F grpc --no-default-features && sccache -s; \ + else \ + cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F grpc --no-default-features && sccache -s; \ + fi; + +FROM nvidia/cuda:12.9.0-base-ubuntu22.04 AS base + +ARG DEFAULT_USE_FLASH_ATTENTION=True + +ENV HUGGINGFACE_HUB_CACHE=/data \ + PORT=80 \ + USE_FLASH_ATTENTION=$DEFAULT_USE_FLASH_ATTENTION + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl-dev \ + curl \ + && rm -rf /var/lib/apt/lists/* + +FROM base AS grpc + +COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router + +ENTRYPOINT ["text-embeddings-router"] +CMD ["--json-output"] + +FROM base + +COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router + +ENTRYPOINT ["text-embeddings-router"] +CMD ["--json-output"] From 448531408388e3e23abc66c26b56e9a0a084eb2e Mon Sep 17 00:00:00 2001 From: Daniele Salvatore Albano Date: Mon, 13 Oct 2025 10:51:47 +0200 Subject: [PATCH 8/9] Update README.md --- README.md | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 71c49156..13a0d2dc 100644 --- a/README.md +++ b/README.md @@ -557,6 +557,8 @@ You can build the CPU container with: docker build . ``` +### CUDA - Pre Blackwell architecture + To build the CUDA containers, you need to know the compute cap of the GPU you will be using at runtime. @@ -581,10 +583,40 @@ runtime_compute_cap=89 # Example for H100 runtime_compute_cap=90 +docker build . -f Dockerfile-cuda --build-arg CUDA_COMPUTE_CAP=$runtime_compute_cap +``` + +### CUDA - Blackwell architecture + +To build the CUDA containers for the Blackwell architecture CUDA 12.9 is required, you need to use a different Dockerfile +and set the compute cap to 120. +This Dockerfile can still be used to build for previous architectures. + +Commands to build the container: + +```shell +# Get submodule dependencies +git submodule update --init + +# Example for Turing (T4, RTX 2000 series, ...) +runtime_compute_cap=75 + +# Example for A100 +runtime_compute_cap=80 + +# Example for A10 +runtime_compute_cap=86 + +# Example for Ada Lovelace (RTX 4000 series, ...) +runtime_compute_cap=89 + +# Example for H100 +runtime_compute_cap=90 + # Example for Blackwell (RTX 5000 series, ...) runtime_compute_cap=120 -docker build . -f Dockerfile-cuda --build-arg CUDA_COMPUTE_CAP=$runtime_compute_cap +docker build . -f Dockerfile-cuda-blackwell --build-arg CUDA_COMPUTE_CAP=$runtime_compute_cap ``` ### Apple M1/M2 arm64 architectures From c40661962348ce3b62b0f7534c663d167740c7be Mon Sep 17 00:00:00 2001 From: Daniele Salvatore Albano Date: Mon, 13 Oct 2025 10:53:55 +0200 Subject: [PATCH 9/9] Update custom_container.md docs --- docs/source/en/custom_container.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/source/en/custom_container.md b/docs/source/en/custom_container.md index 8b85262e..bbdbae27 100644 --- a/docs/source/en/custom_container.md +++ b/docs/source/en/custom_container.md @@ -16,7 +16,7 @@ rendered properly in your Markdown viewer. # Build a custom container for TEI -You can build our own CPU or CUDA TEI container using Docker. To build a CPU container, run the following command in the +You can build our own CPU or CUDA TEI container using Docker. To build a CPU container, run the following command in the directory containing your custom Dockerfile: ```shell @@ -35,7 +35,8 @@ the examples of runtime compute capabilities for various GPU types: - Blackwell (RTX 5000 series, ...) - `runtime_compute_cap=120` Once you have determined the compute capability is determined, set it as the `runtime_compute_cap` variable and build -the container as shown in the example below: +the container using `Dockerfile-cuda` if the runtime compute cap is lower than 120 otherwise use +`Dockerfile-cuda-blackwell` as shown in the example below: ```shell # Get submodule dependencies