Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions .github/workflows/test_docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,44 @@ jobs:
with:
image: lmdeploy:ascend
github-token: ${{ secrets.GITHUB_TOKEN }}

test_jetson_docker_image:
permissions:
pull-requests: write
runs-on: ubuntu-22.04-arm
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
ref: ${{github.event.inputs.repo_ref}}
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Get docker info
run: |
docker info
# remove http extraheader
git config --local --unset "http.https://github.com/.extraheader"
- name: Build Docker image
run: |
docker build . -t lmdeploy:jetson -f docker/Dockerfile.jetson
- name: Test image with lmdeploy check_env
run: |
docker images
docker run --rm lmdeploy:jetson lmdeploy check_env
- name: Dive
uses: MaxymVlasov/[email protected]
with:
image: lmdeploy:jetson
github-token: ${{ secrets.GITHUB_TOKEN }}
85 changes: 64 additions & 21 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,28 +148,71 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall -ldl") # -Xptxas -v

# TODO: build for sm_72 & sm_87 on aarch64 platform (Jetson devices)
if (NOT CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 70-real 75-real) # V100, 2080
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11")
list(APPEND CMAKE_CUDA_ARCHITECTURES 80-real) # A100
endif ()
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.1")
list(APPEND CMAKE_CUDA_ARCHITECTURES 86-real) # 3090
endif ()
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.8")
list(APPEND CMAKE_CUDA_ARCHITECTURES 89-real) # 4090
endif ()
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "12.0")
list(APPEND CMAKE_CUDA_ARCHITECTURES 90a-real) # H100
endif ()
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "12.8")
list(APPEND CMAKE_CUDA_ARCHITECTURES 120a-real) # 5090
endif ()
if (MSVC)
list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES 80-real 90a-real)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
set(ARCH "x86_64")
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
set(ARCH "x86_64")
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64")
# cmake reports AMD64 on Windows, but we might be building for 32-bit.
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(ARCH "x86_64")
else()
set(ARCH "x86")
endif()
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86")
set(ARCH "x86")
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "i386")
set(ARCH "x86")
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "i686")
set(ARCH "x86")
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
set(ARCH "aarch64")
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
set(ARCH "aarch64")
# Apple A12 Bionic chipset which is added in iPhone XS/XS Max/XR uses arm64e architecture.
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64e")
set(ARCH "aarch64")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm*")
set(ARCH "arm")
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "mips")
# Just to avoid the “unknown processor” error.
set(ARCH "generic")
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le")
set(ARCH "ppc64le")
else()
message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR})
endif()


if(ARCH STREQUAL "x86_64")
if (NOT CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 70-real 75-real) # V100, 2080
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11")
list(APPEND CMAKE_CUDA_ARCHITECTURES 80-real) # A100
endif ()
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.1")
list(APPEND CMAKE_CUDA_ARCHITECTURES 86-real) # 3090
endif ()
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.8")
list(APPEND CMAKE_CUDA_ARCHITECTURES 89-real) # 4090
endif ()
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "12.0")
list(APPEND CMAKE_CUDA_ARCHITECTURES 90a-real) # H100
endif ()
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "12.8")
list(APPEND CMAKE_CUDA_ARCHITECTURES 120a-real) # 5090
endif ()
if (MSVC)
list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES 80-real 90a-real)
endif ()
endif ()
endif ()
elseif(ARCH STREQUAL "aarch64")
if (NOT CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 72-real 87-real) # Jetson
endif()
else()
message(FATAL_ERROR "Unsupported Architecture:" ${ARCH})
endif()

message(STATUS "Building with CUDA archs: ${CMAKE_CUDA_ARCHITECTURES}")

Expand Down
35 changes: 35 additions & 0 deletions docker/Dockerfile.jetson
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Base images
FROM nvcr.io/nvidia/l4t-base:r36.2.0
ENV CUDA_VER=12.6 \
PYTHON_VERSION=3.10 \
PATH=/opt/py3/bin:/root/.local/bin:/usr/local/cuda/bin:${PATH}

RUN --mount=type=cache,target=/root/.cache \
--mount=type=cache,target=/tmp/download \
export CUDA_SUFFIX=$(echo $CUDA_VER | sed 's/\./-/g') && \
cd /tmp/download && \
mkdir -p /opt/nvidia/l4t-packages/ && \
touch /opt/nvidia/l4t-packages/.nv-l4t-disable-boot-fw-update-in-preinstall && \
wget -q "https://repo.download.nvidia.com/jetson/t234/pool/main/n/nvidia-l4t-core/nvidia-l4t-core_36.2.0-20231218214829_arm64.deb" && \
wget -q "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb" && \
yes | dpkg -i nvidia-l4t-core_*.deb cuda-keyring_*.deb && \
rm -rf *.deb *.deb.* && \
apt update -y && \
apt-get install -y --no-install-recommends \
cuda-toolkit-${CUDA_SUFFIX} cuda-compat-${CUDA_SUFFIX} libcudnn9-cuda-12 libcusparselt0 cudss \
git libopenblas-dev python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv && \
apt-get clean -y && \
rm -rf /var/lib/apt/lists/* && \
python${PYTHON_VERSION} -m venv /opt/py3 && \
mkdir -p /wheels

# Should be in the lmdeploy root directory when building docker image
COPY . /opt/lmdeploy
WORKDIR /opt/lmdeploy

RUN --mount=type=cache,target=/root/.cache \
--mount=type=cache,target=/opt/pytorch \
pip install build change-wheel-version && \
python -m build -w -o /wheels -v . && \
change_wheel_version --local-version cu126 --delete-old-wheel /wheels/lmdeploy*.whl && \
pip install -v /wheels/lmdeploy*.whl --index-url https://pypi.jetson-ai-lab.io/jp6/cu126/+simple/
2 changes: 1 addition & 1 deletion requirements/runtime_cuda.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@ tiktoken
torch<=2.8.0,>=2.0.0
torchvision<=0.23.0,>=0.15.0
transformers
triton<=3.4.0,>=3.0.0; sys_platform == "linux"
triton<=3.4.0,>=3.0.0; sys_platform == "linux" and "aarch64" not in platform_machine and "arm" not in platform_machine
uvicorn
Loading