diff --git a/test/e2e-scenario/support-tests/hosted-inference.test.ts b/test/e2e-scenario/support-tests/hosted-inference.test.ts index eaa2a60bc4..8a21df8066 100644 --- a/test/e2e-scenario/support-tests/hosted-inference.test.ts +++ b/test/e2e-scenario/support-tests/hosted-inference.test.ts @@ -1,10 +1,23 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; import { describe, expect, it } from "vitest"; import { requireHostedInferenceConfig } from "../fixtures/hosted-inference.ts"; +const COMPAT_HELPER = path.join( + import.meta.dirname, + "..", + "..", + "e2e", + "lib", + "ci-compatible-inference.sh", +); + function secrets(values: Record) { return { required: (name: string) => { @@ -15,6 +28,55 @@ function secrets(values: Record) { }; } +type ProbeRunOptions = { + env?: Record; + curlExitCode?: number; + curlStatus?: string; +}; + +function runHostedProbe(options: ProbeRunOptions = {}) { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-hosted-probe-")); + const callsPath = path.join(tmpDir, "curl.calls"); + const curlPath = path.join(tmpDir, "curl"); + const scriptPath = path.join(tmpDir, "run-probe.sh"); + const curlExitCode = options.curlExitCode ?? 0; + const curlStatus = options.curlStatus ?? "404"; + + fs.writeFileSync( + curlPath, + `#!/bin/sh +for arg in "$@"; do + printf 'ARG:%s\n' "$arg" >> ${JSON.stringify(callsPath)} +done +printf %s ${JSON.stringify(curlStatus)} +exit ${curlExitCode} +`, + { mode: 0o755 }, + ); + fs.writeFileSync( + scriptPath, + `#!/usr/bin/env bash +set -euo pipefail +. ${JSON.stringify(COMPAT_HELPER)} +nemoclaw_e2e_probe_hosted_inference +`, + { mode: 0o755 }, + ); + + const result = spawnSync("bash", [scriptPath], { + encoding: "utf-8", + env: { + ...process.env, + PATH: `${tmpDir}:${process.env.PATH ?? ""}`, + NVIDIA_INFERENCE_API_KEY: "hosted-compatible-key", + ...options.env, + }, + }); + const calls = fs.existsSync(callsPath) ? fs.readFileSync(callsPath, "utf-8") : ""; + fs.rmSync(tmpDir, { recursive: true, force: true }); + return { result, calls }; +} + describe("hosted inference E2E config", () => { it("uses NVIDIA_INFERENCE_API_KEY as the hosted compatible endpoint source secret", () => { const cfg = requireHostedInferenceConfig( @@ -41,6 +103,50 @@ describe("hosted inference E2E config", () => { expect(cfg.credentialEnv).toBe("COMPATIBLE_API_KEY"); }); + it("uses a lightweight compatible reachability probe without API or auth requests", () => { + const { result, calls } = runHostedProbe({ + env: { + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1", + NEMOCLAW_ENDPOINT_URL: "https://inference-api.nvidia.com/v1", + }, + }); + + expect(result.status).toBe(0); + expect(calls).toContain("ARG:https://inference-api.nvidia.com/v1"); + expect(calls).not.toContain("chat/completions"); + expect(calls).not.toContain("/models"); + expect(calls).not.toContain("Authorization"); + expect(calls).not.toContain("Bearer"); + }); + + it("uses a lightweight nvapi reachability probe without /models or auth", () => { + const { result, calls } = runHostedProbe({ + env: { + NVIDIA_INFERENCE_API_KEY: "nvapi-test-key", + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "", + NEMOCLAW_PROVIDER: "cloud", + }, + }); + + expect(result.status).toBe(0); + expect(calls).toContain("ARG:https://inference-api.nvidia.com/v1"); + expect(calls).not.toContain("/models"); + expect(calls).not.toContain("Authorization"); + expect(calls).not.toContain("Bearer"); + }); + + it("fails hosted reachability when curl returns HTTP status 000", () => { + const { result } = runHostedProbe({ curlStatus: "000" }); + + expect(result.status).not.toBe(0); + }); + + it("fails hosted reachability when curl exits nonzero", () => { + const { result } = runHostedProbe({ curlExitCode: 7, curlStatus: "" }); + + expect(result.status).not.toBe(0); + }); + it("configures the custom provider route for inference-api.nvidia.com", () => { const cfg = requireHostedInferenceConfig( secrets({ NVIDIA_INFERENCE_API_KEY: "repo-hosted-key" }), diff --git a/test/e2e/lib/ci-compatible-inference.sh b/test/e2e/lib/ci-compatible-inference.sh index 01b677d26c..01ae3c9932 100755 --- a/test/e2e/lib/ci-compatible-inference.sh +++ b/test/e2e/lib/ci-compatible-inference.sh @@ -105,27 +105,19 @@ nemoclaw_e2e_hosted_inference_model() { } nemoclaw_e2e_probe_hosted_inference() { - local base_url key + local base_url status base_url="$(nemoclaw_e2e_hosted_inference_base_url)" - key="$(nemoclaw_e2e_hosted_inference_key)" - - if nemoclaw_e2e_using_compatible_inference; then - local model payload - model="$(nemoclaw_e2e_hosted_inference_model)" - payload=$( - printf '{"model":"%s","messages":[{"role":"user","content":"Respond with OK."}],"temperature":0,"max_tokens":8}' "$model" - ) - curl -sf --max-time 30 \ - -X POST "${base_url}/chat/completions" \ - -H "Authorization: Bearer $key" \ - -H "Content-Type: application/json" \ - -d "$payload" >/dev/null 2>&1 - return $? - fi - curl -sf --max-time 10 \ - -H "Authorization: Bearer $key" \ - "${base_url}/models" >/dev/null 2>&1 + # This preflight is a network/TLS reachability check only. Do not spend an + # inference request here: full parallel nightly runs can otherwise burn CI + # quota or trip HTTP 429 before the scenario reaches the behavior under test. + # In compatible mode, NEMOCLAW_ENDPOINT_URL is a trusted repo-controlled CI + # input from nightly workflow env_json; this probe intentionally validates + # only TCP/TLS/HTTP reachability for that base URL, not provider semantics. + # Onboarding still performs the authenticated model/API validation with + # redaction and retries. + status=$(curl -sS --connect-timeout 10 --max-time 20 -o /dev/null -w "%{http_code}" "$base_url" 2>/dev/null) || return $? + [ -n "$status" ] && [ "$status" != "000" ] } nemoclaw_e2e_require_hosted_inference_key() {