cuda for arm64

separate base image fix name make sure onnxruntime-gpu is installed use cuda again add targetarch to build stage
immich-app · Sep 8, 2024 · f5b73bb · f5b73bb
1 parent d1ce9e4
commit f5b73bb
Show file tree

Hide file tree

Showing 5 changed files with 65 additions and 17 deletions.
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -108,7 +108,7 @@ jobs:
           - platforms: linux/amd64,linux/arm64
             device: cpu
 
-          - platforms: linux/amd64
+          - platforms: linux/amd64,linux/arm64
             device: cuda
             suffix: -cuda
 

diff --git a/machine-learning/Dockerfile b/machine-learning/Dockerfile
@@ -17,7 +17,7 @@ RUN mkdir /opt/armnn && \
 
 FROM builder-${DEVICE} AS builder
 
-ARG DEVICE
+ARG DEVICE TARGETARCH
 ENV PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1 \
     PIP_NO_CACHE_DIR=true \
@@ -32,7 +32,11 @@ RUN poetry config installer.max-workers 10 && \
 RUN python3 -m venv /opt/venv
 
 COPY poetry.lock pyproject.toml ./
-RUN poetry install --sync --no-interaction --no-ansi --no-root --with ${DEVICE} --without dev
+RUN if [ "$DEVICE" = "cuda" ] && [ "$TARGETARCH" = "arm64" ]; then \
+    # hack to work around poetry not setting the right filename for the wheel https://github.com/python-poetry/poetry/issues/4472
+    wget -q -O onnxruntime_gpu-1.18.0-cp311-cp311-manylinux_aarch64.whl https://nvidia.box.com/shared/static/fy55jvniujjbigr4gwkv8z1ma6ipgspg.whl; fi && \
+    poetry install --sync --no-interaction --no-ansi --no-root --with ${DEVICE} --without dev && \
+    if [ "$DEVICE" = "cuda" ] && [ "$TARGETARCH" = "arm64" ]; then rm onnxruntime_gpu-1.18.0-cp311-cp311-manylinux_aarch64.whl; fi
 
 FROM python:3.11-slim-bookworm@sha256:ed4e985674f478c90ce879e9aa224fbb772c84e39b4aed5155b9e2280f131039 AS prod-cpu
 
@@ -49,7 +53,20 @@ RUN apt-get update && \
     apt-get remove wget -yqq && \
     rm -rf /var/lib/apt/lists/*
 
-FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04@sha256:fa44193567d1908f7ca1f3abf8623ce9c63bc8cba7bcfdb32702eb04d326f7a8 AS prod-cuda
+FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04 AS prod-cuda-amd64
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends libcudnn9-cuda-12 && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04 AS prod-cuda-arm64
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends libcudnn8 && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+ENV LD_LIBRARY_PATH=/usr/local/cuda-12/compat:$LD_LIBRARY_PATH
+
+FROM prod-cuda-${TARGETARCH} AS prod-cuda
 
 COPY --from=builder-cuda /usr/local/bin/python3 /usr/local/bin/python3
 COPY --from=builder-cuda /usr/local/lib/python3.11 /usr/local/lib/python3.11
@@ -76,10 +93,10 @@ COPY --from=builder-armnn \
     /opt/armnn/
 
 FROM prod-${DEVICE} AS prod
-ARG DEVICE
+ARG DEVICE TARGETARCH
 
 RUN apt-get update && \
-    apt-get install -y --no-install-recommends tini $(if ! [ "$DEVICE" = "openvino" ]; then echo "libmimalloc2.0"; fi) && \
+    apt-get install -y --no-install-recommends tini $(if ! { [ "$DEVICE" = "openvino" ] || { [ "$DEVICE" = "cuda" ] && [ "$TARGETARCH" = "arm64" ]; }; }; then echo "libmimalloc2.0"; fi) && \
     apt-get autoremove -yqq && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*

diff --git a/machine-learning/poetry.lock b/machine-learning/poetry.lock
diff --git a/machine-learning/pyproject.toml b/machine-learning/pyproject.toml
@@ -4,15 +4,15 @@ version = "1.114.0"
 description = ""
 authors = ["Hau Tran <[email protected]>"]
 readme = "README.md"
-packages = [{include = "app"}]
+packages = [{ include = "app" }]
 
 [tool.poetry.dependencies]
 python = ">=3.10,<4.0"
 insightface = ">=0.7.3,<1.0"
 opencv-python-headless = ">=4.7.0.72,<5.0"
 pillow = ">=9.5.0,<11.0"
 fastapi-slim = ">=0.95.2,<1.0"
-uvicorn = {extras = ["standard"], version = ">=0.22.0,<1.0"}
+uvicorn = { extras = ["standard"], version = ">=0.22.0,<1.0" }
 pydantic = "^1.10.8"
 aiocache = ">=0.12.1,<1.0"
 rich = ">=13.4.2"
@@ -45,7 +45,10 @@ onnxruntime = "^1.15.0"
 optional = true
 
 [tool.poetry.group.cuda.dependencies]
-onnxruntime-gpu = {version = "^1.17.0", source = "cuda12"}
+onnxruntime-gpu = [
+    { version = "^1.17.0", source = "cuda12", markers = "platform_machine == 'x86_64'" },
+    { python = "3.11", path = "onnxruntime_gpu-1.18.0-cp311-cp311-manylinux_aarch64.whl", markers = "platform_machine == 'aarch64'" }
+]
 
 [tool.poetry.group.openvino]
 optional = true

diff --git a/machine-learning/start.sh b/machine-learning/start.sh
@@ -1,19 +1,26 @@
 #!/usr/bin/env sh
 
-lib_path="/usr/lib/$(arch)-linux-gnu/libmimalloc.so.2"
 # mimalloc seems to increase memory usage dramatically with openvino, need to investigate
-if ! [ "$DEVICE" = "openvino" ]; then
+mimalloc="/usr/lib/$(arch)-linux-gnu/libmimalloc.so.2"
+if [ -f "$mimalloc" ]; then
+	export LD_PRELOAD="$mimalloc"
+fi
+
+if { [ "$DEVICE" = "cuda" ] && [ "$(arch)" = "aarch64" ]; }; then
+	lib_path="/usr/lib/$(arch)-linux-gnu/libmimalloc.so.2"
 	export LD_PRELOAD="$lib_path"
-	export LD_BIND_NOW=1
-	: "${MACHINE_LEARNING_WORKER_TIMEOUT:=120}"
-else
-	: "${MACHINE_LEARNING_WORKER_TIMEOUT:=300}"
 fi
+export LD_BIND_NOW=1
 
 : "${IMMICH_HOST:=[::]}"
 : "${IMMICH_PORT:=3003}"
 : "${MACHINE_LEARNING_WORKERS:=1}"
 : "${MACHINE_LEARNING_HTTP_KEEPALIVE_TIMEOUT_S:=2}"
+if [ "$DEVICE" = "openvino" ]; then
+	: "${MACHINE_LEARNING_WORKER_TIMEOUT:=300}"
+else
+	: "${MACHINE_LEARNING_WORKER_TIMEOUT:=120}"
+fi
 
 gunicorn app.main:app \
 	-k app.config.CustomUvicornWorker \