diff --git a/.github/workflows/test_docker.yml b/.github/workflows/test_docker.yml index 9702ce774c..bc8b4c4d01 100644 --- a/.github/workflows/test_docker.yml +++ b/.github/workflows/test_docker.yml @@ -103,3 +103,44 @@ jobs: with: image: lmdeploy:ascend github-token: ${{ secrets.GITHUB_TOKEN }} + + test_jetson_docker_image: + permissions: + pull-requests: write + runs-on: ubuntu-22.04-arm + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + ref: ${{github.event.inputs.repo_ref}} + - name: Free disk space + uses: jlumbroso/free-disk-space@main + with: + # This might remove tools that are actually needed, if set to "true" but frees about 6 GB + tool-cache: false + docker-images: false + # All of these default to true, but feel free to set to "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: false + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Get docker info + run: | + docker info + # remove http extraheader + git config --local --unset "http.https://github.com/.extraheader" + - name: Build Docker image + run: | + docker build . -t lmdeploy:jetson -f docker/Dockerfile.jetson + - name: Test image with lmdeploy check_env + run: | + docker images + docker run --rm lmdeploy:jetson lmdeploy check_env + - name: Dive + uses: MaxymVlasov/dive-action@v1.5.0 + with: + image: lmdeploy:jetson + github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 462d3a01cc..2b31daa76d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -148,28 +148,71 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") set(CMAKE_CXX_STANDARD 17) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall -ldl") # -Xptxas -v -# TODO: build for sm_72 & sm_87 on aarch64 platform (Jetson devices) -if (NOT CMAKE_CUDA_ARCHITECTURES) - set(CMAKE_CUDA_ARCHITECTURES 70-real 75-real) # V100, 2080 - if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11") - list(APPEND CMAKE_CUDA_ARCHITECTURES 80-real) # A100 - endif () - if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.1") - list(APPEND CMAKE_CUDA_ARCHITECTURES 86-real) # 3090 - endif () - if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.8") - list(APPEND CMAKE_CUDA_ARCHITECTURES 89-real) # 4090 - endif () - if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "12.0") - list(APPEND CMAKE_CUDA_ARCHITECTURES 90a-real) # H100 - endif () - if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "12.8") - list(APPEND CMAKE_CUDA_ARCHITECTURES 120a-real) # 5090 - endif () - if (MSVC) - list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES 80-real 90a-real) +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + set(ARCH "x86_64") +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64") + set(ARCH "x86_64") +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64") + # cmake reports AMD64 on Windows, but we might be building for 32-bit. + if(CMAKE_SIZEOF_VOID_P EQUAL 8) + set(ARCH "x86_64") + else() + set(ARCH "x86") + endif() +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86") + set(ARCH "x86") +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "i386") + set(ARCH "x86") +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "i686") + set(ARCH "x86") +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + set(ARCH "aarch64") +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") + set(ARCH "aarch64") +# Apple A12 Bionic chipset which is added in iPhone XS/XS Max/XR uses arm64e architecture. +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64e") + set(ARCH "aarch64") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm*") + set(ARCH "arm") +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "mips") + # Just to avoid the “unknown processor” error. + set(ARCH "generic") +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le") + set(ARCH "ppc64le") +else() + message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR}) +endif() + + +if(ARCH STREQUAL "x86_64") + if (NOT CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 70-real 75-real) # V100, 2080 + if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11") + list(APPEND CMAKE_CUDA_ARCHITECTURES 80-real) # A100 + endif () + if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.1") + list(APPEND CMAKE_CUDA_ARCHITECTURES 86-real) # 3090 + endif () + if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.8") + list(APPEND CMAKE_CUDA_ARCHITECTURES 89-real) # 4090 + endif () + if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "12.0") + list(APPEND CMAKE_CUDA_ARCHITECTURES 90a-real) # H100 + endif () + if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "12.8") + list(APPEND CMAKE_CUDA_ARCHITECTURES 120a-real) # 5090 + endif () + if (MSVC) + list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES 80-real 90a-real) + endif () endif () -endif () +elseif(ARCH STREQUAL "aarch64") + if (NOT CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 72-real 87-real) # Jetson + endif() +else() + message(FATAL_ERROR "Unsupported Architecture:" ${ARCH}) +endif() message(STATUS "Building with CUDA archs: ${CMAKE_CUDA_ARCHITECTURES}") diff --git a/docker/Dockerfile.jetson b/docker/Dockerfile.jetson new file mode 100644 index 0000000000..a0d5de81cd --- /dev/null +++ b/docker/Dockerfile.jetson @@ -0,0 +1,35 @@ +# Base images +FROM nvcr.io/nvidia/l4t-base:r36.2.0 +ENV CUDA_VER=12.6 \ + PYTHON_VERSION=3.10 \ + PATH=/opt/py3/bin:/root/.local/bin:/usr/local/cuda/bin:${PATH} + +RUN --mount=type=cache,target=/root/.cache \ + --mount=type=cache,target=/tmp/download \ + export CUDA_SUFFIX=$(echo $CUDA_VER | sed 's/\./-/g') && \ + cd /tmp/download && \ + mkdir -p /opt/nvidia/l4t-packages/ && \ + touch /opt/nvidia/l4t-packages/.nv-l4t-disable-boot-fw-update-in-preinstall && \ + wget -q "https://repo.download.nvidia.com/jetson/t234/pool/main/n/nvidia-l4t-core/nvidia-l4t-core_36.2.0-20231218214829_arm64.deb" && \ + wget -q "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb" && \ + yes | dpkg -i nvidia-l4t-core_*.deb cuda-keyring_*.deb && \ + rm -rf *.deb *.deb.* && \ + apt update -y && \ + apt-get install -y --no-install-recommends \ + cuda-toolkit-${CUDA_SUFFIX} cuda-compat-${CUDA_SUFFIX} libcudnn9-cuda-12 libcusparselt0 cudss \ + git libopenblas-dev python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv && \ + apt-get clean -y && \ + rm -rf /var/lib/apt/lists/* && \ + python${PYTHON_VERSION} -m venv /opt/py3 && \ + mkdir -p /wheels + +# Should be in the lmdeploy root directory when building docker image +COPY . /opt/lmdeploy +WORKDIR /opt/lmdeploy + +RUN --mount=type=cache,target=/root/.cache \ + --mount=type=cache,target=/opt/pytorch \ + pip install build change-wheel-version && \ + python -m build -w -o /wheels -v . && \ + change_wheel_version --local-version cu126 --delete-old-wheel /wheels/lmdeploy*.whl && \ + pip install -v /wheels/lmdeploy*.whl --index-url https://pypi.jetson-ai-lab.io/jp6/cu126/+simple/ diff --git a/requirements/runtime_cuda.txt b/requirements/runtime_cuda.txt index 21502e1103..5dd0d8d110 100644 --- a/requirements/runtime_cuda.txt +++ b/requirements/runtime_cuda.txt @@ -24,5 +24,5 @@ tiktoken torch<=2.8.0,>=2.0.0 torchvision<=0.23.0,>=0.15.0 transformers -triton<=3.4.0,>=3.0.0; sys_platform == "linux" +triton<=3.4.0,>=3.0.0; sys_platform == "linux" and "aarch64" not in platform_machine and "arm" not in platform_machine uvicorn