Skip to content

Commit 36ddfca

Browse files
committed
fix(docker): honor configured supervisor image
Signed-off-by: Evan Lezar <elezar@nvidia.com>
1 parent 4c75b85 commit 36ddfca

4 files changed

Lines changed: 41 additions & 88 deletions

File tree

crates/openshell-driver-docker/README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,11 @@ The Docker driver bind-mounts a host-side Linux `openshell-sandbox` binary into
7979
each sandbox container. Resolution order is:
8080

8181
1. `supervisor_bin` in `[openshell.drivers.docker]`.
82-
2. A sibling `openshell-sandbox` next to the running `openshell-gateway` binary.
83-
3. A local Linux cargo target build for the Docker daemon architecture.
84-
4. `supervisor_image` in `[openshell.drivers.docker]`, or the
85-
release-matched default supervisor image, extracting `/openshell-sandbox`.
82+
2. `supervisor_image` in `[openshell.drivers.docker]`, extracting
83+
`/openshell-sandbox` from that image.
84+
3. A sibling `openshell-sandbox` next to the running `openshell-gateway` binary.
85+
4. A local Linux cargo target build for the Docker daemon architecture.
86+
5. The release-matched default supervisor image, extracting `/openshell-sandbox`.
8687

8788
Release and Docker-image gateway builds bake the matching supervisor image tag
8889
into the binary at compile time. The default Docker supervisor image is not

crates/openshell-driver-docker/src/lib.rs

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ const DOCKER_NETWORK_DRIVER: &str = "bridge";
7979

8080
/// Default image holding the Linux `openshell-sandbox` binary. The gateway
8181
/// pulls this image and extracts the binary to a host-side cache when no
82-
/// explicit `supervisor_bin` override or local build is available.
82+
/// explicit `supervisor_bin`, configured `supervisor_image`, sibling binary,
83+
/// or local build is available.
8384
const DEFAULT_DOCKER_SUPERVISOR_IMAGE_REPO: &str = "ghcr.io/nvidia/openshell/supervisor";
8485

8586
/// Return the default `ghcr.io/nvidia/openshell/supervisor:<tag>` reference
@@ -2960,7 +2961,14 @@ pub(crate) async fn resolve_supervisor_bin(
29602961
return Ok(path);
29612962
}
29622963

2963-
// Tier 2: sibling `openshell-sandbox` next to the running gateway
2964+
// Tier 2: explicit supervisor_image in [openshell.drivers.docker].
2965+
// A configured image should be the source of truth even when a local
2966+
// developer build is present under target/.
2967+
if let Some(image) = docker_config.supervisor_image.clone() {
2968+
return extract_supervisor_bin_from_image(docker, &image).await;
2969+
}
2970+
2971+
// Tier 3: sibling `openshell-sandbox` next to the running gateway
29642972
// (release artifact layout). Linux-only because the sibling must be a
29652973
// Linux ELF to bind-mount into a Linux container.
29662974
if cfg!(target_os = "linux") {
@@ -2977,9 +2985,9 @@ pub(crate) async fn resolve_supervisor_bin(
29772985
}
29782986
}
29792987

2980-
// Tier 3: local cargo target build (developer workflow). Preferred
2981-
// over a registry pull when available because it matches whatever the
2982-
// developer just built.
2988+
// Tier 4: local cargo target build (developer workflow). Preferred
2989+
// over the default registry image when available because it matches
2990+
// whatever the developer just built.
29832991
let target_candidates = linux_supervisor_candidates(daemon_arch);
29842992
for candidate in &target_candidates {
29852993
if candidate.is_file() {
@@ -2990,13 +2998,9 @@ pub(crate) async fn resolve_supervisor_bin(
29902998
}
29912999
}
29923000

2993-
// Tier 4: pull the supervisor image from a registry and extract the
2994-
// binary to a host-side cache keyed by image content digest. This is
2995-
// the default path for released gateway binaries.
2996-
let image = docker_config
2997-
.supervisor_image
2998-
.clone()
2999-
.unwrap_or_else(default_docker_supervisor_image);
3001+
// Tier 5: pull the release-matched default supervisor image and extract
3002+
// the binary to a host-side cache keyed by image content digest.
3003+
let image = default_docker_supervisor_image();
30003004
extract_supervisor_bin_from_image(docker, &image).await
30013005
}
30023006

docs/reference/gateway-config.mdx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ sandbox_namespace = "docker-dev"
218218
grpc_endpoint = "https://host.openshell.internal:17670"
219219
# Skip the image-pull-and-extract step by pointing at a locally built binary.
220220
supervisor_bin = "/usr/local/libexec/openshell/openshell-sandbox"
221+
# When supervisor_bin is omitted, Docker extracts /openshell-sandbox from this image.
221222
supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest"
222223
guest_tls_ca = "/etc/openshell/certs/ca.pem"
223224
guest_tls_cert = "/etc/openshell/certs/client.pem"

e2e/with-docker-gateway.sh

Lines changed: 19 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ DOCKER_NETWORK_NAME=""
7373
DOCKER_NETWORK_CONNECTED_CONTAINER=""
7474
DOCKER_NETWORK_MANAGED=0
7575
GPU_MODE="${OPENSHELL_E2E_DOCKER_GPU:-0}"
76-
DOCKER_SUPERVISOR_ARGS=()
7776

7877
# Isolate CLI/SDK gateway metadata from the developer's real config.
7978
export XDG_CONFIG_HOME="${WORKDIR}/config"
@@ -255,25 +254,6 @@ if [ "${GPU_MODE}" = "1" ]; then
255254
fi
256255
fi
257256

258-
normalize_arch() {
259-
case "$1" in
260-
x86_64|amd64) echo "amd64" ;;
261-
aarch64|arm64) echo "arm64" ;;
262-
*) echo "$1" ;;
263-
esac
264-
}
265-
266-
linux_target_triple() {
267-
case "$1" in
268-
amd64) echo "x86_64-unknown-linux-gnu" ;;
269-
arm64) echo "aarch64-unknown-linux-gnu" ;;
270-
*)
271-
echo "ERROR: unsupported Docker daemon architecture '$1'" >&2
272-
exit 2
273-
;;
274-
esac
275-
}
276-
277257
resolve_docker_supervisor_image() {
278258
if [ -n "${OPENSHELL_DOCKER_SUPERVISOR_IMAGE:-}" ]; then
279259
printf '%s\n' "${OPENSHELL_DOCKER_SUPERVISOR_IMAGE}"
@@ -296,7 +276,7 @@ resolve_docker_supervisor_image() {
296276
return 0
297277
fi
298278

299-
printf '%s\n' ""
279+
printf '%s\n' "openshell/supervisor:dev"
300280
}
301281

302282
docker_pull_with_retry() {
@@ -328,6 +308,21 @@ docker_pull_with_retry() {
328308
ensure_docker_supervisor_image() {
329309
local image=$1
330310

311+
if [ "${image}" = "openshell/supervisor:dev" ] \
312+
&& [ -z "${OPENSHELL_DOCKER_SUPERVISOR_IMAGE:-}" ] \
313+
&& [ -z "${OPENSHELL_SUPERVISOR_IMAGE:-}" ] \
314+
&& [ -z "${CI:-}" ]; then
315+
echo "Building local Docker supervisor image ${image}..."
316+
CONTAINER_ENGINE=docker IMAGE_TAG=dev \
317+
bash "${ROOT}/tasks/scripts/docker-build-image.sh" supervisor
318+
if docker image inspect "${image}" >/dev/null 2>&1; then
319+
return 0
320+
fi
321+
322+
echo "ERROR: expected supervisor image '${image}' after local build." >&2
323+
exit 2
324+
fi
325+
331326
if docker image inspect "${image}" >/dev/null 2>&1; then
332327
return 0
333328
fi
@@ -342,47 +337,11 @@ ensure_docker_supervisor_image() {
342337
exit 2
343338
}
344339

345-
DAEMON_ARCH="$(normalize_arch "$(docker info --format '{{.Architecture}}' 2>/dev/null || true)")"
346-
SUPERVISOR_TARGET="$(linux_target_triple "${DAEMON_ARCH}")"
347-
HOST_OS="$(uname -s)"
348-
HOST_ARCH="$(normalize_arch "$(uname -m)")"
349-
SUPERVISOR_OUT_DIR="${WORKDIR}/supervisor/${DAEMON_ARCH}"
350-
SUPERVISOR_BIN="${SUPERVISOR_OUT_DIR}/openshell-sandbox"
351-
352-
CARGO_BUILD_JOBS_ARG=()
353-
if [ -n "${CARGO_BUILD_JOBS:-}" ]; then
354-
CARGO_BUILD_JOBS_ARG=(-j "${CARGO_BUILD_JOBS}")
355-
fi
356-
357340
e2e_build_gateway_binaries "${ROOT}" TARGET_DIR GATEWAY_BIN CLI_BIN
358341

359342
SUPERVISOR_IMAGE="$(resolve_docker_supervisor_image)"
360-
if [ -n "${SUPERVISOR_IMAGE}" ]; then
361-
ensure_docker_supervisor_image "${SUPERVISOR_IMAGE}"
362-
echo "Using Docker supervisor image: ${SUPERVISOR_IMAGE}"
363-
DOCKER_SUPERVISOR_ARGS=(--docker-supervisor-image "${SUPERVISOR_IMAGE}")
364-
else
365-
echo "Building openshell-sandbox for ${SUPERVISOR_TARGET}..."
366-
mkdir -p "${SUPERVISOR_OUT_DIR}"
367-
if [ "${HOST_OS}" = "Linux" ] && [ "${HOST_ARCH}" = "${DAEMON_ARCH}" ]; then
368-
rustup target add "${SUPERVISOR_TARGET}" >/dev/null 2>&1 || true
369-
cargo build ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \
370-
--release -p openshell-sandbox --target "${SUPERVISOR_TARGET}"
371-
cp "${TARGET_DIR}/${SUPERVISOR_TARGET}/release/openshell-sandbox" "${SUPERVISOR_BIN}"
372-
else
373-
CONTAINER_ENGINE=docker \
374-
DOCKER_PLATFORM="linux/${DAEMON_ARCH}" \
375-
DOCKER_OUTPUT="type=local,dest=${SUPERVISOR_OUT_DIR}" \
376-
bash "${ROOT}/tasks/scripts/docker-build-image.sh" supervisor-output
377-
fi
378-
379-
if [ ! -f "${SUPERVISOR_BIN}" ]; then
380-
echo "ERROR: expected supervisor binary at ${SUPERVISOR_BIN}" >&2
381-
exit 1
382-
fi
383-
chmod +x "${SUPERVISOR_BIN}"
384-
DOCKER_SUPERVISOR_ARGS=(--docker-supervisor-bin "${SUPERVISOR_BIN}")
385-
fi
343+
ensure_docker_supervisor_image "${SUPERVISOR_IMAGE}"
344+
echo "Using Docker supervisor image: ${SUPERVISOR_IMAGE}"
386345

387346
DEFAULT_SANDBOX_IMAGE="ghcr.io/nvidia/openshell-community/sandboxes/base:latest"
388347
SANDBOX_IMAGE="${OPENSHELL_E2E_DOCKER_SANDBOX_IMAGE:-${OPENSHELL_SANDBOX_IMAGE:-${DEFAULT_SANDBOX_IMAGE}}}"
@@ -451,19 +410,7 @@ GATEWAY_CONFIG="${STATE_DIR}/gateway.toml"
451410
printf 'guest_tls_cert = %s\n' "$(toml_string "${PKI_DIR}/client/tls.crt")"
452411
printf 'guest_tls_key = %s\n' "$(toml_string "${PKI_DIR}/client/tls.key")"
453412
printf 'enable_bind_mounts = true\n'
454-
# DOCKER_SUPERVISOR_ARGS holds either ("--docker-supervisor-bin" "<path>")
455-
# or ("--docker-supervisor-image" "<image>"); both map to TOML keys on
456-
# the docker driver config.
457-
for ((i=0; i<${#DOCKER_SUPERVISOR_ARGS[@]}; i+=2)); do
458-
case "${DOCKER_SUPERVISOR_ARGS[$i]}" in
459-
--docker-supervisor-bin)
460-
printf 'supervisor_bin = %s\n' "$(toml_string "${DOCKER_SUPERVISOR_ARGS[$((i+1))]}")"
461-
;;
462-
--docker-supervisor-image)
463-
printf 'supervisor_image = %s\n' "$(toml_string "${DOCKER_SUPERVISOR_ARGS[$((i+1))]}")"
464-
;;
465-
esac
466-
done
413+
printf 'supervisor_image = %s\n' "$(toml_string "${SUPERVISOR_IMAGE}")"
467414
if [ -n "${GATEWAY_HOST_ALIAS_IP}" ]; then
468415
printf 'host_gateway_ip = %s\n' "$(toml_string "${GATEWAY_HOST_ALIAS_IP}")"
469416
fi

0 commit comments

Comments
 (0)