diff --git a/Dockerfile b/Dockerfile index 7033c9c..9516531 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,6 +9,15 @@ RUN apt-get update && \ python3-dev \ python3-pip \ wget \ + pkg-config \ + libavformat-dev \ + libavcodec-dev \ + libavdevice-dev \ + libavutil-dev \ + libswscale-dev \ + libswresample-dev \ + libavfilter-dev \ + libopus-dev \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -79,7 +88,10 @@ RUN apt-get update && apt-get install -y zstd git-lfs && rm -rf /var/lib/apt/lis COPY requirements.txt . # Run pip install with cache so we speedup subsequent rebuilds -RUN --mount=type=cache,target=/root/.cache pip install -r requirements.txt +RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \ + pip install --no-cache-dir -r requirements.txt || \ + pip install -r requirements.txt +#RUN --mount=type=cache,target=/root/.cache pip install -r requirements.txt # Install our torch ver matching cuda RUN --mount=type=cache,target=/root/.cache pip install -U torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 diff --git a/deps/ubuntu.sh b/deps/ubuntu.sh index 6e73237..303bd23 100755 --- a/deps/ubuntu.sh +++ b/deps/ubuntu.sh @@ -5,11 +5,7 @@ set -e curl https://get.docker.com | sh && sudo systemctl --now enable docker # Install NVIDIA Container Toolkit -distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \ - && curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ - && curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.list | \ - sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ - sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list +curl https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list -o /etc/apt/sources.list.d/nvidia-container-toolkit.list sudo apt-get update @@ -18,4 +14,7 @@ sudo apt-get install -y nvidia-container-toolkit sudo nvidia-ctk runtime configure --runtime=docker # Restart Docker -sudo systemctl restart docker \ No newline at end of file +sudo systemctl restart docker + +# ...existing code... +echo "It is recommended to reboot the system to activate the NVIDIA drivers. Reboot now with: sudo reboot" \ No newline at end of file diff --git a/docs/UBU2510fixes_changes.md b/docs/UBU2510fixes_changes.md new file mode 100644 index 0000000..604db92 --- /dev/null +++ b/docs/UBU2510fixes_changes.md @@ -0,0 +1,54 @@ +# UBU2510fixes — Changes vs `main` + +This document summarizes the changes introduced on the `UBU2510fixes` branch compared to `main` for the `willow-inference-server` repository. + +## Summary +- Files modified: + - `Dockerfile` + - `deps/ubuntu.sh` + - `requirements.txt` + - `utils.sh` + +These changes are focused on improving Docker build reliability, updating dependency constraints (including NVIDIA CUDA/CUDNN packages), and simplifying the NVIDIA Container Toolkit installation script. A small change was also made to the `utils.sh` docker build invocation. + +## Per-file details + +- `deps/ubuntu.sh` + - Replaced a more complex distribution-specific sequence for adding the NVIDIA container toolkit APT source with a simpler curl command that writes the stable `nvidia-container-toolkit.list` directly to `/etc/apt/sources.list.d/`. + - Ensures `apt-get update`, installs `nvidia-container-toolkit`, configures runtime with `nvidia-ctk`, and restarts Docker. + - Added a final `echo` recommending a system reboot to activate NVIDIA drivers: `It is recommended to reboot the system to activate the NVIDIA drivers. Reboot now with: sudo reboot`. + +- `Dockerfile` + - Added additional audio/video related system packages to the builder image (`pkg-config`, `libavformat-dev`, `libavcodec-dev`, `libavdevice-dev`, `libavutil-dev`, `libswscale-dev`, `libswresample-dev`, `libavfilter-dev`, `libopus-dev`). These are likely added to support media processing and related Python packages. + - Changed the `pip install -r requirements.txt` step in the runtime image to use the pip cache mount with `--mount=type=cache,id=pip-cache,target=/root/.cache/pip` and to try `pip install --no-cache-dir -r requirements.txt` first, falling back to `pip install -r requirements.txt` on failure. The old single-line `pip install -r requirements.txt` was removed (and a previous cache-line is commented out). + +- `requirements.txt` + - Many packages had version constraints relaxed or changed to ranges (for example: `accelerate`, `aioice`, `aiortc`, `cryptography`, `huggingface-hub`, `pyee`, `pylibsrtp`, `pyOpenSSL`, `transformers`, `typing_extensions`). + - Some packages changed from pinned versions to compatible ranges (e.g., `accelerate` now `>=0.22.0,<1.0.0`). + - `av` now uses a looser constraint (`av>=12`) and several NVIDIA binary wheel packages are included (CUDA/cu12 CUDNN, cuBLAS, etc.) — indicating a move to support CUDA 12 NVIDIA runtime and associated wheels. + +- `utils.sh` + - `docker build` command in `build_docker()` now uses `--no-cache --debug` by default (previously just `-t "$IMAGE":"$TAG" .`). This forces a clean build and provides extra debug output during image creation. + +## Rationale / Notes +- The changes appear targeted at preparing the project for newer NVIDIA CUDA runtimes (cu12) and making Docker builds more deterministic — forcing no-cache builds and using a pip cache mount to speed repeated installs. +- Loosening some requirements to ranges improves compatibility with newer packages; however, this can also bring in unexpected behavior if upstream packages change. Pinning critical packages (e.g., frameworks) may still be advisable for production images. +- The simplified `deps/ubuntu.sh` approach reduces complexity for adding the NVIDIA toolkit APT source; the newly added reboot recommendation ensures the user reboots to activate kernel driver updates. + +## Actions / Recommendations +- After running `deps/ubuntu.sh` on a host that installed or updated NVIDIA drivers/toolkit, reboot the machine: `sudo reboot`. +- When building images locally, expect longer builds due to `--no-cache`. Consider removing `--no-cache` locally if you want incremental builds. +- Verify GPU runtime inside the container after building with `docker run --gpus all --rm nvcr.io/nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi` (or similar) to confirm drivers/toolkit are configured. + +## File location +The generated summary is saved here: + +`docs/UBU2510fixes_changes.md` + +If you want, I can also: +- Create a short changelog entry in `CHANGELOG.md`. +- Open a PR with this documentation and the branch diff summary. +- Run additional checks (e.g., build a Docker image locally) and report results. + +--- +Generated by automation to summarize `origin/main..origin/UBU2510fixes` diffs. diff --git a/requirements.txt b/requirements.txt index 7908f92..c7349cb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ -accelerate==0.22.0 +accelerate>=0.22.0,<1.0.0 aiofiles==23.2.1 aiohttp==3.8.5 -aioice==0.9.0 -aiortc==1.5.0 +aioice>=0.10.1,<1.0.0 +aiortc==1.14.0 aiosignal==1.3.1 altair==5.1.1 anyio==3.7.1 @@ -10,7 +10,7 @@ appdirs==1.4.4 async-timeout==4.0.3 attrs==23.1.0 audioread==3.0.0 -av==10.0.0 +av>=12 certifi==2022.12.7 cffi==1.15.1 charset-normalizer==2.1.1 @@ -19,7 +19,7 @@ cmake==3.25.0 coloredlogs==15.0.1 colorlog==6.7.0 contourpy==1.1.0 -cryptography==41.0.3 +cryptography>=45.0.7,<47.0.0 cycler==0.11.0 datasets==2.14.5 decorator==5.1.1 @@ -44,7 +44,7 @@ h11==0.14.0 httpcore==0.17.3 httptools==0.6.0 httpx==0.24.1 -huggingface-hub==0.16.4 +huggingface-hub>=0.23.0,<1.0.0 humanfriendly==10.0 HyperPyYAML==1.2.1 idna==3.4 @@ -108,10 +108,10 @@ pycparser==2.21 pycuda==2022.2.2 pydantic==1.10.12 pydub==0.25.1 -pyee==11.0.0 +pyee>=13.0.0,<14.0.0 Pygments==2.16.1 -pylibsrtp==0.8.0 -pyOpenSSL==23.2.0 +pylibsrtp>=0.10.0,<1.0.0 +pyOpenSSL>=25.0.0,<26.0.0 pyparsing==3.0.9 pyrsistent==0.19.3 pyston-lite==2.3.5 @@ -154,10 +154,10 @@ tiktoken==0.4.0 tokenizers==0.13.3 toolz==0.12.0 tqdm==4.66.1 -transformers==4.33.1 +transformers>=4.33.0,<5.0.0 triton==2.1.0 types-PyYAML==6.0.12.11 -typing_extensions==4.7.1 +typing_extensions>=4.9.0,<5.0.0 tzdata==2023.3 uc-micro-py==1.0.2 ujson==5.8.0 @@ -171,4 +171,4 @@ wcwidth==0.2.6 websockets==11.0.3 xxhash==3.3.0 yarl==1.9.2 -zipp==3.16.2 +zipp==3.16.2 \ No newline at end of file diff --git a/utils.sh b/utils.sh index 5fa4d75..1eea3b1 100755 --- a/utils.sh +++ b/utils.sh @@ -218,7 +218,7 @@ freeze_requirements() { } build_docker() { - docker build -t "$IMAGE":"$TAG" . + docker build --no-cache --debug -t "$IMAGE":"$TAG" . } shell() {