Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions test/e2e-scenario/support-tests/hosted-inference.test.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,23 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

import { spawnSync } from "node:child_process";
import fs from "node:fs";
import os from "node:os";
import path from "node:path";
import { describe, expect, it } from "vitest";

import { requireHostedInferenceConfig } from "../fixtures/hosted-inference.ts";

const COMPAT_HELPER = path.join(
import.meta.dirname,
"..",
"..",
"e2e",
"lib",
"ci-compatible-inference.sh",
);

function secrets(values: Record<string, string | undefined>) {
return {
required: (name: string) => {
Expand All @@ -15,6 +28,55 @@ function secrets(values: Record<string, string | undefined>) {
};
}

type ProbeRunOptions = {
env?: Record<string, string>;
curlExitCode?: number;
curlStatus?: string;
};

function runHostedProbe(options: ProbeRunOptions = {}) {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-hosted-probe-"));
const callsPath = path.join(tmpDir, "curl.calls");
const curlPath = path.join(tmpDir, "curl");
const scriptPath = path.join(tmpDir, "run-probe.sh");
const curlExitCode = options.curlExitCode ?? 0;
const curlStatus = options.curlStatus ?? "404";

fs.writeFileSync(
curlPath,
`#!/bin/sh
for arg in "$@"; do
printf 'ARG:%s\n' "$arg" >> ${JSON.stringify(callsPath)}
done
printf %s ${JSON.stringify(curlStatus)}
exit ${curlExitCode}
`,
{ mode: 0o755 },
);
fs.writeFileSync(
scriptPath,
`#!/usr/bin/env bash
set -euo pipefail
. ${JSON.stringify(COMPAT_HELPER)}
nemoclaw_e2e_probe_hosted_inference
`,
{ mode: 0o755 },
);

const result = spawnSync("bash", [scriptPath], {
encoding: "utf-8",
env: {
...process.env,
PATH: `${tmpDir}:${process.env.PATH ?? ""}`,
NVIDIA_INFERENCE_API_KEY: "hosted-compatible-key",
...options.env,
},
});
const calls = fs.existsSync(callsPath) ? fs.readFileSync(callsPath, "utf-8") : "";
fs.rmSync(tmpDir, { recursive: true, force: true });
return { result, calls };
}

describe("hosted inference E2E config", () => {
it("uses NVIDIA_INFERENCE_API_KEY as the hosted compatible endpoint source secret", () => {
const cfg = requireHostedInferenceConfig(
Expand All @@ -41,6 +103,50 @@ describe("hosted inference E2E config", () => {
expect(cfg.credentialEnv).toBe("COMPATIBLE_API_KEY");
});

it("uses a lightweight compatible reachability probe without API or auth requests", () => {
const { result, calls } = runHostedProbe({
env: {
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1",
NEMOCLAW_ENDPOINT_URL: "https://inference-api.nvidia.com/v1",
},
});

expect(result.status).toBe(0);
expect(calls).toContain("ARG:https://inference-api.nvidia.com/v1");
expect(calls).not.toContain("chat/completions");
expect(calls).not.toContain("/models");
expect(calls).not.toContain("Authorization");
expect(calls).not.toContain("Bearer");
});

it("uses a lightweight nvapi reachability probe without /models or auth", () => {
const { result, calls } = runHostedProbe({
env: {
NVIDIA_INFERENCE_API_KEY: "nvapi-test-key",
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "",
NEMOCLAW_PROVIDER: "cloud",
},
});

expect(result.status).toBe(0);
expect(calls).toContain("ARG:https://inference-api.nvidia.com/v1");
expect(calls).not.toContain("/models");
expect(calls).not.toContain("Authorization");
expect(calls).not.toContain("Bearer");
});

it("fails hosted reachability when curl returns HTTP status 000", () => {
const { result } = runHostedProbe({ curlStatus: "000" });

expect(result.status).not.toBe(0);
});

it("fails hosted reachability when curl exits nonzero", () => {
const { result } = runHostedProbe({ curlExitCode: 7, curlStatus: "" });

expect(result.status).not.toBe(0);
});

it("configures the custom provider route for inference-api.nvidia.com", () => {
const cfg = requireHostedInferenceConfig(
secrets({ NVIDIA_INFERENCE_API_KEY: "repo-hosted-key" }),
Expand Down
30 changes: 11 additions & 19 deletions test/e2e/lib/ci-compatible-inference.sh
Original file line number Diff line number Diff line change
Expand Up @@ -105,27 +105,19 @@ nemoclaw_e2e_hosted_inference_model() {
}

nemoclaw_e2e_probe_hosted_inference() {
local base_url key
local base_url status
base_url="$(nemoclaw_e2e_hosted_inference_base_url)"
key="$(nemoclaw_e2e_hosted_inference_key)"

if nemoclaw_e2e_using_compatible_inference; then
local model payload
model="$(nemoclaw_e2e_hosted_inference_model)"
payload=$(
printf '{"model":"%s","messages":[{"role":"user","content":"Respond with OK."}],"temperature":0,"max_tokens":8}' "$model"
)
curl -sf --max-time 30 \
-X POST "${base_url}/chat/completions" \
-H "Authorization: Bearer $key" \
-H "Content-Type: application/json" \
-d "$payload" >/dev/null 2>&1
return $?
fi

curl -sf --max-time 10 \
-H "Authorization: Bearer $key" \
"${base_url}/models" >/dev/null 2>&1
# This preflight is a network/TLS reachability check only. Do not spend an
# inference request here: full parallel nightly runs can otherwise burn CI
# quota or trip HTTP 429 before the scenario reaches the behavior under test.
# In compatible mode, NEMOCLAW_ENDPOINT_URL is a trusted repo-controlled CI
# input from nightly workflow env_json; this probe intentionally validates
# only TCP/TLS/HTTP reachability for that base URL, not provider semantics.
# Onboarding still performs the authenticated model/API validation with
# redaction and retries.
status=$(curl -sS --connect-timeout 10 --max-time 20 -o /dev/null -w "%{http_code}" "$base_url" 2>/dev/null) || return $?
[ -n "$status" ] && [ "$status" != "000" ]
}

nemoclaw_e2e_require_hosted_inference_key() {
Expand Down
Loading