diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 8a2ba9f841434..f7f2c110ef186 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -108,7 +108,7 @@ jobs: - platforms: linux/amd64,linux/arm64 device: cpu - - platforms: linux/amd64 + - platforms: linux/amd64,linux/arm64 device: cuda suffix: -cuda diff --git a/machine-learning/Dockerfile b/machine-learning/Dockerfile index 12fb183c953d4..348b418e4d86b 100644 --- a/machine-learning/Dockerfile +++ b/machine-learning/Dockerfile @@ -17,7 +17,7 @@ RUN mkdir /opt/armnn && \ FROM builder-${DEVICE} AS builder -ARG DEVICE +ARG DEVICE TARGETARCH ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ PIP_NO_CACHE_DIR=true \ @@ -32,7 +32,11 @@ RUN poetry config installer.max-workers 10 && \ RUN python3 -m venv /opt/venv COPY poetry.lock pyproject.toml ./ -RUN poetry install --sync --no-interaction --no-ansi --no-root --with ${DEVICE} --without dev +RUN if [ "$DEVICE" = "cuda" ] && [ "$TARGETARCH" = "arm64" ]; then \ + # hack to work around poetry not setting the right filename for the wheel https://github.com/python-poetry/poetry/issues/4472 + wget -q -O onnxruntime_gpu-1.18.0-cp311-cp311-manylinux_aarch64.whl https://nvidia.box.com/shared/static/fy55jvniujjbigr4gwkv8z1ma6ipgspg.whl; fi && \ + poetry install --sync --no-interaction --no-ansi --no-root --with ${DEVICE} --without dev && \ + if [ "$DEVICE" = "cuda" ] && [ "$TARGETARCH" = "arm64" ]; then rm onnxruntime_gpu-1.18.0-cp311-cp311-manylinux_aarch64.whl; fi FROM python:3.11-slim-bookworm@sha256:ed4e985674f478c90ce879e9aa224fbb772c84e39b4aed5155b9e2280f131039 AS prod-cpu @@ -49,7 +53,20 @@ RUN apt-get update && \ apt-get remove wget -yqq && \ rm -rf /var/lib/apt/lists/* -FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04@sha256:fa44193567d1908f7ca1f3abf8623ce9c63bc8cba7bcfdb32702eb04d326f7a8 AS prod-cuda +FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04 AS prod-cuda-amd64 +RUN apt-get update && \ + apt-get install -y --no-install-recommends libcudnn9-cuda-12 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04 AS prod-cuda-arm64 +RUN apt-get update && \ + apt-get install -y --no-install-recommends libcudnn8 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* +ENV LD_LIBRARY_PATH=/usr/local/cuda-12/compat:$LD_LIBRARY_PATH + +FROM prod-cuda-${TARGETARCH} AS prod-cuda COPY --from=builder-cuda /usr/local/bin/python3 /usr/local/bin/python3 COPY --from=builder-cuda /usr/local/lib/python3.11 /usr/local/lib/python3.11 @@ -76,10 +93,10 @@ COPY --from=builder-armnn \ /opt/armnn/ FROM prod-${DEVICE} AS prod -ARG DEVICE +ARG DEVICE TARGETARCH RUN apt-get update && \ - apt-get install -y --no-install-recommends tini $(if ! [ "$DEVICE" = "openvino" ]; then echo "libmimalloc2.0"; fi) && \ + apt-get install -y --no-install-recommends tini $(if ! { [ "$DEVICE" = "openvino" ] || { [ "$DEVICE" = "cuda" ] && [ "$TARGETARCH" = "arm64" ]; }; }; then echo "libmimalloc2.0"; fi) && \ apt-get autoremove -yqq && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* diff --git a/machine-learning/poetry.lock b/machine-learning/poetry.lock index bd09bd8469e67..13e9f35c5420c 100644 --- a/machine-learning/poetry.lock +++ b/machine-learning/poetry.lock @@ -2003,6 +2003,28 @@ packaging = "*" protobuf = "*" sympy = "*" +[[package]] +name = "onnxruntime-gpu" +version = "1.18.0" +description = "ONNX Runtime is a runtime accelerator for Machine Learning models" +optional = false +python-versions = "*" +files = [ + {file = "onnxruntime_gpu-1.18.0-cp311-cp311-manylinux_aarch64.whl", hash = "sha256:7bdd6c373611235e43c8707fa528539327ff17a969448adf956ddf177d5fc8e7"}, +] + +[package.dependencies] +coloredlogs = "*" +flatbuffers = "*" +numpy = ">=1.26.4" +packaging = "*" +protobuf = "*" +sympy = "*" + +[package.source] +type = "file" +url = "onnxruntime_gpu-1.18.0-cp311-cp311-manylinux_aarch64.whl" + [[package]] name = "onnxruntime-gpu" version = "1.18.1" @@ -2636,7 +2658,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -3607,4 +3628,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<4.0" -content-hash = "b2b053886ca1dd3a3305c63caf155b1976dfc4066f72f5d1ecfc42099db34aab" +content-hash = "87e047b423bdf74fa1825e509d6a31d2bac57e8e7d4419cfefcc86aa98b139a2" diff --git a/machine-learning/pyproject.toml b/machine-learning/pyproject.toml index a69fb33a8d50e..c33868f13809e 100644 --- a/machine-learning/pyproject.toml +++ b/machine-learning/pyproject.toml @@ -4,7 +4,7 @@ version = "1.114.0" description = "" authors = ["Hau Tran "] readme = "README.md" -packages = [{include = "app"}] +packages = [{ include = "app" }] [tool.poetry.dependencies] python = ">=3.10,<4.0" @@ -12,7 +12,7 @@ insightface = ">=0.7.3,<1.0" opencv-python-headless = ">=4.7.0.72,<5.0" pillow = ">=9.5.0,<11.0" fastapi-slim = ">=0.95.2,<1.0" -uvicorn = {extras = ["standard"], version = ">=0.22.0,<1.0"} +uvicorn = { extras = ["standard"], version = ">=0.22.0,<1.0" } pydantic = "^1.10.8" aiocache = ">=0.12.1,<1.0" rich = ">=13.4.2" @@ -45,7 +45,10 @@ onnxruntime = "^1.15.0" optional = true [tool.poetry.group.cuda.dependencies] -onnxruntime-gpu = {version = "^1.17.0", source = "cuda12"} +onnxruntime-gpu = [ + { version = "^1.17.0", source = "cuda12", markers = "platform_machine == 'x86_64'" }, + { python = "3.11", path = "onnxruntime_gpu-1.18.0-cp311-cp311-manylinux_aarch64.whl", markers = "platform_machine == 'aarch64'" } +] [tool.poetry.group.openvino] optional = true diff --git a/machine-learning/start.sh b/machine-learning/start.sh index c3fda523df832..9cddd6d6eb2bc 100755 --- a/machine-learning/start.sh +++ b/machine-learning/start.sh @@ -1,19 +1,26 @@ #!/usr/bin/env sh -lib_path="/usr/lib/$(arch)-linux-gnu/libmimalloc.so.2" # mimalloc seems to increase memory usage dramatically with openvino, need to investigate -if ! [ "$DEVICE" = "openvino" ]; then +mimalloc="/usr/lib/$(arch)-linux-gnu/libmimalloc.so.2" +if [ -f "$mimalloc" ]; then + export LD_PRELOAD="$mimalloc" +fi + +if { [ "$DEVICE" = "cuda" ] && [ "$(arch)" = "aarch64" ]; }; then + lib_path="/usr/lib/$(arch)-linux-gnu/libmimalloc.so.2" export LD_PRELOAD="$lib_path" - export LD_BIND_NOW=1 - : "${MACHINE_LEARNING_WORKER_TIMEOUT:=120}" -else - : "${MACHINE_LEARNING_WORKER_TIMEOUT:=300}" fi +export LD_BIND_NOW=1 : "${IMMICH_HOST:=[::]}" : "${IMMICH_PORT:=3003}" : "${MACHINE_LEARNING_WORKERS:=1}" : "${MACHINE_LEARNING_HTTP_KEEPALIVE_TIMEOUT_S:=2}" +if [ "$DEVICE" = "openvino" ]; then + : "${MACHINE_LEARNING_WORKER_TIMEOUT:=300}" +else + : "${MACHINE_LEARNING_WORKER_TIMEOUT:=120}" +fi gunicorn app.main:app \ -k app.config.CustomUvicornWorker \