diff --git a/.devops/cuda.Dockerfile b/.devops/cuda.Dockerfile index 0bc4e7ee13f66..fb72b67c2f109 100644 --- a/.devops/cuda.Dockerfile +++ b/.devops/cuda.Dockerfile @@ -12,13 +12,16 @@ FROM ${BASE_CUDA_DEV_CONTAINER} AS build ARG CUDA_DOCKER_ARCH=default RUN apt-get update && \ - apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1 + apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1 ccache WORKDIR /app COPY . . -RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ +RUN --mount=type=cache,target=/root/.ccache \ + --mount=type=cache,target=/var/lib/apt/lists \ + --mount=type=cache,target=/var/cache/apt \ + if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ fi && \ cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b96e1f50acc9e..672e124fbe74c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -943,21 +943,65 @@ jobs: ubuntu-latest-cmake-cuda: runs-on: ubuntu-latest - container: nvidia/cuda:12.6.2-devel-ubuntu24.04 + + strategy: + matrix: + cuda: + # Colab and lightning.ai currently use CUDA 12.2 (test w/ `nvidia-smi | grep "CUDA Version: "`) + # Capabilities of GPUs are listed on https://developer.nvidia.com/cuda-gpus, can test w/ `nvidia-smi --query-gpu=compute_cap --format=csv` + # See available containers at https://hub.docker.com/r/nvidia/cuda/tags + - version: 12.2 + container: nvidia/cuda:12.2.2-devel-ubuntu22.04 + cap: 7.5 + arch: 75-real + example: 'T4' + package: true + - version: 12.2 + container: nvidia/cuda:12.2.2-devel-ubuntu22.04 + cap: 8.0 + arch: 80-real + example: 'A100' + package: true + - version: 12.2 + container: nvidia/cuda:12.2.2-devel-ubuntu22.04 + cap: 8.6 + arch: 86-real + example: 'A10' + package: true + - version: 12.2 + container: nvidia/cuda:12.2.2-devel-ubuntu22.04 + cap: 8.9 + arch: 89-real + example: 'L4, L40S' + - version: 12.2 + container: nvidia/cuda:12.2.2-devel-ubuntu22.04 + cap: 9.0 + arch: 90-real + example: 'H100' + package: true + # Build only, don't package. + - version: 12.6 + container: nvidia/cuda:12.6.2-devel-ubuntu22.04 + cap: 8.9 + arch: 89-real + package: false + + container: ${{ matrix.cuda.container }} + + name: ubuntu-22-cuda (${{ matrix.cuda.version }} cap ${{ matrix.cuda.cap }}, e.g. ${{ matrix.cuda.example }}) steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Install dependencies + - name: Dependencies + id: depends env: DEBIAN_FRONTEND: noninteractive run: | - apt update - apt install -y cmake build-essential ninja-build libgomp1 git + apt-get update + apt install -y cmake build-essential ninja-build libcurl4-openssl-dev libgomp1 git zip - name: ccache uses: hendrikmuhs/ccache-action@v1.2.16 @@ -969,13 +1013,42 @@ jobs: run: | cmake -S . -B build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CUDA_ARCHITECTURES=89-real \ + -DCMAKE_CUDA_ARCHITECTURES=${{ matrix.cuda.arch }} \ -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \ -DLLAMA_FATAL_WARNINGS=ON \ -DGGML_NATIVE=OFF \ -DGGML_CUDA=ON cmake --build build + - name: Determine tag name + if: ${{ matrix.cuda.package }} + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + echo "cuda_name=cu${{ matrix.cuda.short_version }}-cap${{ matrix.cuda.cap }}" >> $GITHUB_OUTPUT + + - name: Pack artifacts + id: pack_artifacts + # if: ${{ matrix.cuda.package && (( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true') }} + run: | + cp LICENSE ./build/bin/ + zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-cuda-${{ steps.tag.outputs.cuda_name }}-x64.zip ./build/bin/* + + - name: Upload artifacts + # if: ${{ matrix.cuda.package && (( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true') }} + uses: actions/upload-artifact@v4 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-cuda-${{ steps.tag.outputs.cuda_name }}-x64.zip + name: llama-bin-ubuntu-cuda-${{ steps.tag.outputs.cuda_name }}-x64.zip + windows-2019-cmake-cuda: runs-on: windows-2019 @@ -1383,6 +1456,7 @@ jobs: needs: - ubuntu-cpu-cmake + - ubuntu-latest-cmake-cuda - ubuntu-22-cmake-vulkan - windows-latest-cmake - windows-2019-cmake-cuda diff --git a/ggml/src/kompute b/ggml/src/kompute new file mode 160000 index 0000000000000..4565194ed7c32 --- /dev/null +++ b/ggml/src/kompute @@ -0,0 +1 @@ +Subproject commit 4565194ed7c32d1d2efa32ceab4d3c6cae006306