Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0e2eba0
chore(ci): switch sccache from local disk to memcached backend
drew Mar 3, 2026
d23bb58
chore: trigger e2e workflow
drew Mar 3, 2026
cd9c65b
refactor(ci): use repo variables instead of hardcoded sccache endpoint
drew Mar 3, 2026
521fd04
fix(build): restore SCCACHE_DIR for local disk cache fallback
drew Mar 3, 2026
1a4365c
fix(build): handle empty SCCACHE_MEMCACHED_ENDPOINT in Docker builds
drew Mar 3, 2026
83de34f
fix(ci): propagate SCCACHE_MEMCACHED_ENDPOINT to docker-build and pub…
drew Mar 3, 2026
8deb0f5
fix(build): retry cargo build after cleaning stale target cache
drew Mar 3, 2026
ff01d4a
refactor(build): drop cargo-target cache mounts from Dockerfiles
drew Mar 3, 2026
206693f
refactor(build): use sccache disk cache mounts instead of cargo-targe…
drew Mar 3, 2026
5f6c275
fix(build): restore cargo-target mounts for incremental local rebuilds
drew Mar 3, 2026
da6b112
fix(build): retry cargo build after cleaning stale target cache
drew Mar 3, 2026
24bb708
wip
drew Mar 3, 2026
94a7650
ci(rust): add Swatinem/rust-cache to speed up Rust checks
drew Mar 3, 2026
a3cc534
ci(rust): pin rust-cache to allowed SHA for enterprise policy
drew Mar 3, 2026
d45c957
ci(checks): run rust and python checks on both amd64 and arm64 runners
drew Mar 3, 2026
69abc1a
ci(checks): fix arm64 CI by switching sccache to ubi backend
drew Mar 3, 2026
d1f229b
fix(mise): use github backend for sccache instead of deprecated ubi
drew Mar 3, 2026
798d150
fix(ci): set rustup default before rust-cache action
drew Mar 3, 2026
99aa7fe
fix(ci): use dotenv and GITHUB_PATH for mise env activation
drew Mar 3, 2026
69401de
fix(ci): resolve RUSTC_WRAPPER to absolute sccache path
drew Mar 3, 2026
fe5b310
fix(ci): register sccache globally before resolving binary path
drew Mar 3, 2026
344f99c
fix(mise): switch sccache to ubi backend to match shim expectations
drew Mar 3, 2026
6219fc6
fix(ci): disambiguate sccache binary for ubi backend on x86_64
drew Mar 3, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 27 additions & 11 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,20 @@ env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: "0"
MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SCCACHE_MEMCACHED_ENDPOINT: ${{ vars.SCCACHE_MEMCACHED_ENDPOINT }}

permissions:
contents: read
packages: read

jobs:
rust:
name: Rust
runs-on: build-amd64
name: Rust (${{ matrix.runner }})
strategy:
fail-fast: false
matrix:
runner: [build-amd64, build-arm64]
runs-on: ${{ matrix.runner }}
container:
image: ghcr.io/nvidia/nv-agent-env/ci:latest
credentials:
Expand All @@ -26,13 +31,17 @@ jobs:
steps:
- uses: actions/checkout@v4

- name: Cache sccache
uses: actions/cache@v4
- name: Install tools
run: mise install

- name: Cache Rust target and registry
uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
with:
path: .cache/sccache
key: sccache-rust-${{ runner.arch }}-${{ hashFiles('Cargo.lock') }}
restore-keys: |
sccache-rust-${{ runner.arch }}-
# Separate caches for clippy (check-like) vs test (full build)
# so they don't thrash each other's artifacts
shared-key: rust-checks-${{ matrix.runner }}
# Cache the sccache directory too
cache-directories: .cache/sccache

- name: Format
run: mise run rust:format:check
Expand All @@ -45,11 +54,15 @@ jobs:

- name: sccache stats
if: always()
run: sccache --show-stats
run: mise x -- sccache --show-stats

python:
name: Python
runs-on: build-amd64
name: Python (${{ matrix.runner }})
strategy:
fail-fast: false
matrix:
runner: [build-amd64, build-arm64]
runs-on: ${{ matrix.runner }}
container:
image: ghcr.io/nvidia/nv-agent-env/ci:latest
credentials:
Expand All @@ -58,6 +71,9 @@ jobs:
steps:
- uses: actions/checkout@v4

- name: Install tools
run: mise install

- name: Install dependencies
run: uv sync --frozen

Expand Down
1 change: 1 addition & 0 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ on:

env:
MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SCCACHE_MEMCACHED_ENDPOINT: ${{ vars.SCCACHE_MEMCACHED_ENDPOINT }}

permissions:
contents: read
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ jobs:
- /var/run/docker.sock:/var/run/docker.sock
env:
MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SCCACHE_MEMCACHED_ENDPOINT: ${{ vars.SCCACHE_MEMCACHED_ENDPOINT }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-west-2
Expand Down Expand Up @@ -93,6 +94,7 @@ jobs:
- /var/run/docker.sock:/var/run/docker.sock
env:
MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SCCACHE_MEMCACHED_ENDPOINT: ${{ vars.SCCACHE_MEMCACHED_ENDPOINT }}
NAV_PYPI_S3_BUCKET: navigator-pypi-artifacts
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
Expand Down
6 changes: 3 additions & 3 deletions architecture/build-containers.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ A k3s image with bundled Helm charts and Kubernetes manifests for single-contain

Two Dockerfiles produce Python wheels for the CLI package distribution. These are not deployed as running containers.

- **`Dockerfile.python-wheels`** -- Builds Linux amd64/arm64 wheels using Maturin with a two-pass Rust build (dependency prebuild + final wheel build), BuildKit cache mounts for cargo registry/git/target and sccache, and `cross-build.sh` for conditional cross-toolchain installation. The final build step patches workspace version inside the container layer from `NAVIGATOR_CARGO_VERSION` (computed before Docker build), preserving cacheable dependency layers and avoiding dirty working-tree edits. Output stage is `scratch` with only the `.whl` files.
- **`Dockerfile.python-wheels`** -- Builds Linux amd64/arm64 wheels using Maturin with a two-pass Rust build (dependency prebuild + final wheel build), BuildKit cache mounts for cargo registry/git/target, sccache (backed by memcached when `SCCACHE_MEMCACHED_ENDPOINT` build arg is provided), and `cross-build.sh` for conditional cross-toolchain installation. The final build step patches workspace version inside the container layer from `NAVIGATOR_CARGO_VERSION` (computed before Docker build), preserving cacheable dependency layers and avoiding dirty working-tree edits. Output stage is `scratch` with only the `.whl` files.
- **`Dockerfile.python-wheels-macos`** -- Builds macOS arm64 wheels using osxcross (cross-compiling from Linux) with the same two-pass dependency caching pattern and cargo cache mounts. Version injection uses the same in-container workspace-version patch from `NAVIGATOR_CARGO_VERSION`, avoiding host-side file edits that break Docker layer caching. Uses `crazymax/osxcross:latest` as the cross-toolchain source. The `OSXCROSS_IMAGE` build arg allows using a mirrored registry image instead of Docker Hub.

### CI Runner Image (`navigator-ci`)
Expand Down Expand Up @@ -386,7 +386,7 @@ Container builds use Docker BuildKit with local cache directories:
- `build/scripts/docker-build-component.sh` stores per-component caches in `.cache/buildkit/<component>`.
- `build/scripts/docker-build-cluster.sh` stores the cluster image cache in `.cache/buildkit/cluster`.
- `mise run python:build:multiarch` stores per-platform wheel caches in `.cache/buildkit/python-wheels/<platform>` for local builds when using a `docker-container` buildx driver.
- Rust-heavy Dockerfiles use BuildKit cache mounts for cargo registry and target directories, keyed by image name and `TARGETARCH`, with `sharing=locked` to prevent concurrent cache corruption in parallel CI builds.
- Rust-heavy Dockerfiles use BuildKit cache mounts for cargo registry, cargo target, and sccache local disk directories, keyed by image name and `TARGETARCH`, with `sharing=locked` to prevent concurrent cache corruption in parallel CI builds. The cargo target mount gives cargo a persistent `target/` directory for true incremental rebuilds on source-only changes. sccache uses memcached in CI (`SCCACHE_MEMCACHED_ENDPOINT`) and falls back to the local disk cache mount for local dev builds, providing a second layer of caching at the compilation unit level.
- When the active buildx driver is `docker` (not `docker-container`), local cache import/export flags are skipped automatically because the docker driver cannot export local caches. In CI, cache export is also skipped.
- For local single-arch builds, the scripts auto-select a builder with the native `docker` driver (matching the active Docker context) so images land directly in the Docker image store without slow tarball export.

Expand All @@ -395,7 +395,7 @@ Container builds use Docker BuildKit with local cache directories:
In CI pipelines:

- Remote BuildKit daemons (`buildkit-amd64` and `buildkit-arm64`) are used as persistent builders via `driver: remote`. Their built-in layer cache persists across builds, so no external cache (registry-backed or otherwise) is needed in CI.
- Rust lint/test jobs cache `.cache/sccache/` and `target/` with keys derived from `Cargo.lock` and Rust task config files, scoped per runner architecture.
- Rust lint/test jobs cache `target/` with keys derived from `Cargo.lock` and Rust task config files, scoped per runner architecture. sccache uses a shared memcached backend (`SCCACHE_MEMCACHED_ENDPOINT`) instead of local disk.
- CI sets `CARGO_INCREMENTAL=0` to favor deterministic clean builds over incremental metadata churn.
- Publish jobs mirror `crazymax/osxcross:latest` into `$CI_REGISTRY_IMAGE/third_party/osxcross:latest` (when missing) and set `OSXCROSS_IMAGE` so macOS wheel Docker builds consume the mirrored image instead of pulling from Docker Hub on each run.
- The sandbox e2e test job tags and pushes component images to the GitLab project registry (`$CI_REGISTRY_IMAGE`) and configures cluster bootstrap to pull from that remote registry with CI credentials.
Expand Down
6 changes: 6 additions & 0 deletions build/scripts/docker-build-component.sh
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,16 @@ if [[ "${DOCKER_PUSH:-}" == "1" ]]; then
OUTPUT_FLAG="--push"
fi

SCCACHE_ARGS=()
if [[ -n "${SCCACHE_MEMCACHED_ENDPOINT:-}" ]]; then
SCCACHE_ARGS=(--build-arg "SCCACHE_MEMCACHED_ENDPOINT=${SCCACHE_MEMCACHED_ENDPOINT}")
fi

docker buildx build \
${BUILDER_ARGS[@]+"${BUILDER_ARGS[@]}"} \
${DOCKER_PLATFORM:+--platform ${DOCKER_PLATFORM}} \
${CACHE_ARGS[@]+"${CACHE_ARGS[@]}"} \
${SCCACHE_ARGS[@]+"${SCCACHE_ARGS[@]}"} \
-f "${DOCKERFILE}" \
-t "${IMAGE_NAME}:${IMAGE_TAG}" \
--provenance=false \
Expand Down
3 changes: 3 additions & 0 deletions build/scripts/docker-publish-multiarch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ for component in sandbox server; do
if [ "$component" = "sandbox" ]; then
BUILD_ARGS="--build-arg RUST_BUILD_PROFILE=${RUST_BUILD_PROFILE:-release}"
fi
if [ -n "${SCCACHE_MEMCACHED_ENDPOINT:-}" ]; then
BUILD_ARGS="${BUILD_ARGS} --build-arg SCCACHE_MEMCACHED_ENDPOINT=${SCCACHE_MEMCACHED_ENDPOINT}"
fi
DOCKERFILE=$(resolve_dockerfile "${component}")
FULL_IMAGE="${REGISTRY}/${IMAGE_PREFIX}${component}"
docker buildx build \
Expand Down
6 changes: 3 additions & 3 deletions deploy/docker/Dockerfile.python-wheels
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ ARG TARGETARCH
ARG BUILDARCH
ARG NAVIGATOR_CARGO_VERSION

ENV SCCACHE_DIR=/tmp/sccache
ARG SCCACHE_MEMCACHED_ENDPOINT

WORKDIR /build

Expand Down Expand Up @@ -55,6 +55,8 @@ RUN mkdir -p crates/navigator-cli/src crates/navigator-core/src crates/navigator
touch crates/navigator-bootstrap/src/lib.rs

# Build dependencies only (cached unless Cargo.toml/lock changes).
# sccache uses memcached in CI or the local disk cache mount for local dev.
# The cargo-target mount gives cargo a persistent target/ dir for incremental rebuilds.
RUN --mount=type=cache,id=cargo-registry-python-wheels-${TARGETARCH},sharing=locked,target=/root/.cargo/registry \
--mount=type=cache,id=cargo-git-python-wheels-${TARGETARCH},sharing=locked,target=/root/.cargo/git \
--mount=type=cache,id=cargo-target-python-wheels-${TARGETARCH},sharing=locked,target=/build/target \
Expand All @@ -68,8 +70,6 @@ COPY dev-sandbox-policy.yaml ./
COPY python/ python/

# Touch source files to ensure they're rebuilt (not the cached dummy).
# Touch build.rs and proto files to force proto code regeneration when the
# cargo target cache mount retains stale OUT_DIR artifacts from prior builds.
RUN touch crates/navigator-cli/src/main.rs \
crates/navigator-cli/src/lib.rs \
crates/navigator-bootstrap/src/lib.rs \
Expand Down
7 changes: 5 additions & 2 deletions deploy/docker/Dockerfile.server
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
COPY deploy/docker/cross-build.sh /usr/local/bin/
RUN . cross-build.sh && install_cross_toolchain && install_sccache && add_rust_target

ENV SCCACHE_DIR=/tmp/sccache
ARG SCCACHE_MEMCACHED_ENDPOINT

WORKDIR /build

Expand All @@ -42,7 +42,10 @@ RUN mkdir -p crates/navigator-cli/src crates/navigator-core/src crates/navigator
# Copy proto files needed for build
COPY proto/ proto/

# Build dependencies only (cached unless Cargo.toml/lock changes)
# Build dependencies only (cached unless Cargo.toml/lock changes).
# sccache uses memcached in CI (SCCACHE_MEMCACHED_ENDPOINT) or the local
# disk cache mount for local dev builds. The cargo-target mount gives cargo
# a persistent target/ dir for true incremental rebuilds on source changes.
RUN --mount=type=cache,id=cargo-registry-server-${TARGETARCH},sharing=locked,target=/usr/local/cargo/registry \
--mount=type=cache,id=cargo-target-server-${TARGETARCH},sharing=locked,target=/build/target \
--mount=type=cache,id=sccache-server-${TARGETARCH},sharing=locked,target=/tmp/sccache \
Expand Down
18 changes: 17 additions & 1 deletion deploy/docker/cross-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,28 @@ export_cross_env() {
# Automatically wraps with sccache when available.
cargo_cross_build() {
export_cross_env
# Unset empty SCCACHE_MEMCACHED_ENDPOINT so sccache falls back to the
# local disk cache instead of erroring on an empty endpoint string.
if [ -z "${SCCACHE_MEMCACHED_ENDPOINT:-}" ]; then
unset SCCACHE_MEMCACHED_ENDPOINT 2>/dev/null || true
fi
# Default sccache local disk cache to /tmp/sccache (matches BuildKit
# cache mount target in Dockerfiles) when no dir is explicitly set.
export SCCACHE_DIR="${SCCACHE_DIR:-/tmp/sccache}"
if command -v sccache >/dev/null 2>&1; then
export RUSTC_WRAPPER=sccache
fi
local target_flag=""
if is_cross; then target_flag="--target $(rust_target)"; fi
cargo build $target_flag "$@"
# Retry once after cleaning if the build fails. BuildKit cargo-target cache
# mounts can retain stale .rmeta files from prior builds with different
# dependency versions; cargo clean purges them so the retry succeeds.
# sccache still has the compiled objects, so the clean rebuild is fast.
if ! cargo build $target_flag "$@"; then
echo "cargo build failed; cleaning stale target cache and retrying..." >&2
cargo clean 2>/dev/null || true
cargo build $target_flag "$@"
fi
}

# Print the directory containing the compiled binary.
Expand Down
6 changes: 4 additions & 2 deletions deploy/docker/sandbox/Dockerfile.base
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
COPY deploy/docker/cross-build.sh /usr/local/bin/
RUN . cross-build.sh && install_cross_toolchain && install_sccache && add_rust_target

ENV SCCACHE_DIR=/tmp/sccache
ARG SCCACHE_MEMCACHED_ENDPOINT

WORKDIR /build

Expand Down Expand Up @@ -44,7 +44,9 @@ RUN mkdir -p crates/navigator-cli/src crates/navigator-core/src crates/navigator
COPY proto/ proto/
COPY dev-sandbox-policy.rego ./

# Build dependencies only (cached unless Cargo.toml/lock changes)
# Build dependencies only (cached unless Cargo.toml/lock changes).
# sccache uses memcached in CI or the local disk cache mount for local dev.
# The cargo-target mount gives cargo a persistent target/ dir for incremental rebuilds.
RUN --mount=type=cache,id=cargo-registry-sandbox-${TARGETARCH},sharing=locked,target=/usr/local/cargo/registry \
--mount=type=cache,id=cargo-target-sandbox-${TARGETARCH},sharing=locked,target=/build/target \
--mount=type=cache,id=sccache-sandbox-${TARGETARCH},sharing=locked,target=/tmp/sccache \
Expand Down
6 changes: 4 additions & 2 deletions mise.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ kubectl = "1.35.1"
uv = "0.10.2"
protoc = "29.6"
helm = "4.1.1"
sccache = "0.14.0"
"ubi:mozilla/sccache" = { version = "0.14.0", matching = "sccache-v" }

[env]
_.path = ["{{config_root}}/scripts/bin"]
Expand All @@ -25,7 +25,9 @@ _.file = [".env"]
KUBECONFIG = "{{config_root}}/kubeconfig"
UV_CACHE_DIR = "{{config_root}}/.cache/uv"

# Enable sccache for faster Rust builds
# Enable sccache for faster Rust builds.
# Local builds use a disk cache (SCCACHE_DIR); CI sets SCCACHE_MEMCACHED_ENDPOINT
# via repository variables to use a shared memcached backend instead.
RUSTC_WRAPPER = "sccache"
SCCACHE_DIR = "{{config_root}}/.cache/sccache"

Expand Down
Loading