diff --git a/.github/workflows/e2e-script.yaml b/.github/workflows/e2e-script.yaml index f326f2b403..0b729fead7 100644 --- a/.github/workflows/e2e-script.yaml +++ b/.github/workflows/e2e-script.yaml @@ -229,8 +229,8 @@ jobs: printf 'NEMOCLAW_E2E_USE_HOSTED_INFERENCE=1\n' printf 'NEMOCLAW_PROVIDER=custom\n' printf 'NEMOCLAW_ENDPOINT_URL=https://inference-api.nvidia.com/v1\n' - printf 'NEMOCLAW_MODEL=nvidia/nemotron-3-super-120b-a12b\n' - printf 'NEMOCLAW_COMPAT_MODEL=nvidia/nemotron-3-super-120b-a12b\n' + printf 'NEMOCLAW_MODEL=nvidia/nvidia/nemotron-3-super-v3\n' + printf 'NEMOCLAW_COMPAT_MODEL=nvidia/nvidia/nemotron-3-super-v3\n' printf 'NEMOCLAW_PREFERRED_API=openai-completions\n' printf 'COMPATIBLE_API_KEY=%s\n' "${NVIDIA_INFERENCE_API_KEY}" } >> "$GITHUB_ENV" diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml index de84f94437..a77db9295d 100644 --- a/.github/workflows/e2e-vitest-scenarios.yaml +++ b/.github/workflows/e2e-vitest-scenarios.yaml @@ -990,8 +990,8 @@ jobs: NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions run: | set -euo pipefail diff --git a/.github/workflows/nightly-e2e.yaml b/.github/workflows/nightly-e2e.yaml index 65f2f7fc49..98619d67d4 100644 --- a/.github/workflows/nightly-e2e.yaml +++ b/.github/workflows/nightly-e2e.yaml @@ -468,8 +468,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" @@ -544,8 +544,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_ISSUE_4434_LIVE: "1" @@ -973,8 +973,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" @@ -1271,8 +1271,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" @@ -1578,8 +1578,8 @@ jobs: NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/credential-migration @@ -1804,8 +1804,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" @@ -1817,8 +1817,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" @@ -1856,8 +1856,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" @@ -1869,8 +1869,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" @@ -1908,8 +1908,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" @@ -1921,8 +1921,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" @@ -1961,8 +1961,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" @@ -1974,8 +1974,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" @@ -2014,8 +2014,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" @@ -2028,8 +2028,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" @@ -2070,8 +2070,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" @@ -2084,8 +2084,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" @@ -2164,8 +2164,8 @@ jobs: NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 - NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b - NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_PREFERRED_API: openai-completions COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" diff --git a/src/lib/inference/onboard-probes.test.ts b/src/lib/inference/onboard-probes.test.ts index 8fbfacfd63..7230f5fef3 100644 --- a/src/lib/inference/onboard-probes.test.ts +++ b/src/lib/inference/onboard-probes.test.ts @@ -284,10 +284,19 @@ describe("OpenAI-compatible inference probes", () => { }); }); - it("keeps the default chat-completions probe minimal for other models", () => { + it("keeps the default chat-completions probe bounded for other models", () => { expect(getChatCompletionsProbePayload("nvidia/nemotron-3-super-120b-a12b")).toEqual({ model: "nvidia/nemotron-3-super-120b-a12b", messages: [{ role: "user", content: "Reply with exactly: OK" }], + max_tokens: 8, + }); + }); + + it("bounds the hosted compatible inference probe for the served Nemotron model", () => { + expect(getChatCompletionsProbePayload("nvidia/nvidia/nemotron-3-super-v3")).toEqual({ + model: "nvidia/nvidia/nemotron-3-super-v3", + messages: [{ role: "user", content: "Reply with exactly: OK" }], + max_tokens: 8, }); }); diff --git a/src/lib/inference/onboard-probes.ts b/src/lib/inference/onboard-probes.ts index e1511e5b01..a7084360cf 100644 --- a/src/lib/inference/onboard-probes.ts +++ b/src/lib/inference/onboard-probes.ts @@ -509,6 +509,7 @@ function getChatCompletionsProbePayload(model) { const payload = { model, messages: [{ role: "user", content: "Reply with exactly: OK" }], + max_tokens: 8, }; if (isDeepSeekV4ProModel(model)) { diff --git a/src/lib/onboard/providers.ts b/src/lib/onboard/providers.ts index 1987048a7a..18f835cb1b 100644 --- a/src/lib/onboard/providers.ts +++ b/src/lib/onboard/providers.ts @@ -26,7 +26,7 @@ const HERMES_INFERENCE_ENDPOINT_URL = "https://inference-api.nousresearch.com/v1 const HOSTED_INFERENCE_SOURCE_ENV = "NVIDIA_INFERENCE_API_KEY"; const HOSTED_INFERENCE_CREDENTIAL_ENV = "COMPATIBLE_API_KEY"; const HOSTED_INFERENCE_ENDPOINT_URL = "https://inference-api.nvidia.com/v1"; -const HOSTED_INFERENCE_MODEL = "nvidia/nemotron-3-super-120b-a12b"; +const HOSTED_INFERENCE_MODEL = "nvidia/nvidia/nemotron-3-super-v3"; const REMOTE_PROVIDER_CONFIG = { build: { diff --git a/test/e2e-scenario/fixtures/hosted-inference.ts b/test/e2e-scenario/fixtures/hosted-inference.ts index ba5c69b75f..f46b446bf7 100644 --- a/test/e2e-scenario/fixtures/hosted-inference.ts +++ b/test/e2e-scenario/fixtures/hosted-inference.ts @@ -6,7 +6,7 @@ const HOSTED_INFERENCE_CREDENTIAL_ENV = "COMPATIBLE_API_KEY"; const HOSTED_INFERENCE_PROVIDER = "custom"; const HOSTED_INFERENCE_PROVIDER_NAME = "compatible-endpoint"; const DEFAULT_HOSTED_INFERENCE_BASE_URL = "https://inference-api.nvidia.com/v1"; -const DEFAULT_HOSTED_INFERENCE_MODEL = "nvidia/nemotron-3-super-120b-a12b"; +const DEFAULT_HOSTED_INFERENCE_MODEL = "nvidia/nvidia/nemotron-3-super-v3"; export interface HostedInferenceSecrets { required(name: string): string; diff --git a/test/e2e-script-workflow.test.ts b/test/e2e-script-workflow.test.ts index a3a4111118..57e9a520d2 100644 --- a/test/e2e-script-workflow.test.ts +++ b/test/e2e-script-workflow.test.ts @@ -545,8 +545,8 @@ describe("E2E reusable workflow contract", () => { expect(runStep?.env?.NVIDIA_INFERENCE_API_KEY).toBe(GUARDED_HOSTED_INFERENCE_SECRET); expect(runStep?.env?.NEMOCLAW_PROVIDER).toBe("custom"); expect(runStep?.env?.NEMOCLAW_ENDPOINT_URL).toBe("https://inference-api.nvidia.com/v1"); - expect(runStep?.env?.NEMOCLAW_MODEL).toBe("nvidia/nemotron-3-super-120b-a12b"); - expect(runStep?.env?.NEMOCLAW_COMPAT_MODEL).toBe("nvidia/nemotron-3-super-120b-a12b"); + expect(runStep?.env?.NEMOCLAW_MODEL).toBe("nvidia/nvidia/nemotron-3-super-v3"); + expect(runStep?.env?.NEMOCLAW_COMPAT_MODEL).toBe("nvidia/nvidia/nemotron-3-super-v3"); expect(runStep?.env?.NEMOCLAW_PREFERRED_API).toBe("openai-completions"); expect(runStep?.env?.COMPATIBLE_API_KEY).toBe(GUARDED_HOSTED_INFERENCE_SECRET); expect(runStep?.env?.GITHUB_TOKEN).toBeUndefined(); @@ -904,8 +904,8 @@ describe("E2E reusable workflow contract", () => { expect(exportStep?.run).toContain("NEMOCLAW_E2E_USE_HOSTED_INFERENCE=1"); expect(exportStep?.run).toContain("NEMOCLAW_PROVIDER=custom"); expect(exportStep?.run).toContain("NEMOCLAW_ENDPOINT_URL=https://inference-api.nvidia.com/v1"); - expect(exportStep?.run).toContain("NEMOCLAW_MODEL=nvidia/nemotron-3-super-120b-a12b"); - expect(exportStep?.run).toContain("NEMOCLAW_COMPAT_MODEL=nvidia/nemotron-3-super-120b-a12b"); + expect(exportStep?.run).toContain("NEMOCLAW_MODEL=nvidia/nvidia/nemotron-3-super-v3"); + expect(exportStep?.run).toContain("NEMOCLAW_COMPAT_MODEL=nvidia/nvidia/nemotron-3-super-v3"); expect(exportStep?.run).toContain("NEMOCLAW_PREFERRED_API=openai-completions"); expect(exportStep?.run).toContain("COMPATIBLE_API_KEY=%s"); @@ -915,7 +915,7 @@ describe("E2E reusable workflow contract", () => { } }); - it("keeps rebuild fixture registry inference aligned with the onboard session", () => { + it("keeps rebuild fixture registry inference aligned with hosted custom inference", () => { const rebuildFixtures = [ "test/e2e/test-rebuild-openclaw.sh", "test/e2e/test-rebuild-hermes.sh", @@ -925,9 +925,10 @@ describe("E2E reusable workflow contract", () => { for (const fixture of rebuildFixtures) { const body = readFileSync(fixture, "utf8"); expect(body, fixture).toContain("provider = sess.get('provider')"); - expect(body, fixture).toContain("model = ("); + expect(body, fixture).toContain("if env_provider == 'custom'"); expect(body, fixture).toContain("'provider': provider"); expect(body, fixture).toContain("'model': model"); + expect(body, fixture).toContain("nvidia/nvidia/nemotron-3-super-v3"); expect(body, fixture).not.toContain("'provider': 'nvidia-prod'"); expect(body, fixture).not.toContain("'model': 'nvidia/nemotron-3-super-120b-a12b'"); } @@ -980,8 +981,8 @@ describe("E2E reusable workflow contract", () => { } expect(step.env?.NEMOCLAW_PROVIDER, jobName).toBe("custom"); expect(step.env?.NEMOCLAW_ENDPOINT_URL, jobName).toBe("https://inference-api.nvidia.com/v1"); - expect(step.env?.NEMOCLAW_MODEL, jobName).toBe("nvidia/nemotron-3-super-120b-a12b"); - expect(step.env?.NEMOCLAW_COMPAT_MODEL, jobName).toBe("nvidia/nemotron-3-super-120b-a12b"); + expect(step.env?.NEMOCLAW_MODEL, jobName).toBe("nvidia/nvidia/nemotron-3-super-v3"); + expect(step.env?.NEMOCLAW_COMPAT_MODEL, jobName).toBe("nvidia/nvidia/nemotron-3-super-v3"); expect(step.env?.NEMOCLAW_PREFERRED_API, jobName).toBe("openai-completions"); expect(step.env?.COMPATIBLE_API_KEY, jobName).toBe(GUARDED_HOSTED_INFERENCE_SECRET); } diff --git a/test/e2e/lib/ci-compatible-inference.sh b/test/e2e/lib/ci-compatible-inference.sh index 09fa5e5352..01b677d26c 100755 --- a/test/e2e/lib/ci-compatible-inference.sh +++ b/test/e2e/lib/ci-compatible-inference.sh @@ -7,7 +7,7 @@ # at inference-api.nvidia.com. Keep this helper in test/e2e so the # product-facing provider/default endpoint remain unchanged. -NEMOCLAW_E2E_COMPATIBLE_INFERENCE_MODEL_DEFAULT="nvidia/nemotron-3-super-120b-a12b" +NEMOCLAW_E2E_COMPATIBLE_INFERENCE_MODEL_DEFAULT="nvidia/nvidia/nemotron-3-super-v3" NEMOCLAW_E2E_HOSTED_INFERENCE_PROVIDER_DEFAULT="compatible-endpoint" NEMOCLAW_E2E_NVIDIA_INFERENCE_MODEL_DEFAULT="nvidia/nemotron-3-super-120b-a12b" diff --git a/test/e2e/test-rebuild-hermes.sh b/test/e2e/test-rebuild-hermes.sh index 94fb897182..ecd6413ebf 100755 --- a/test/e2e/test-rebuild-hermes.sh +++ b/test/e2e/test-rebuild-hermes.sh @@ -253,16 +253,15 @@ try: sess = json.load(f) except Exception: sess = {} -provider = sess.get('provider') or ( - 'compatible-endpoint' - if os.environ.get('NEMOCLAW_ENDPOINT_URL') and os.environ.get('COMPATIBLE_API_KEY') - else 'nvidia-prod' -) +env_provider = (os.environ.get('NEMOCLAW_PROVIDER') or '').strip() +if env_provider == 'custom': + env_provider = 'compatible-endpoint' +provider = sess.get('provider') or env_provider or 'compatible-endpoint' model = ( sess.get('model') or os.environ.get('NEMOCLAW_MODEL') or os.environ.get('NEMOCLAW_COMPAT_MODEL') - or 'nvidia/nemotron-3-super-120b-a12b' + or 'nvidia/nvidia/nemotron-3-super-v3' ) credential_hash = hashlib.sha256('${DISCORD_FAKE_TOKEN}'.encode()).hexdigest() plan = { @@ -401,10 +400,11 @@ fi # Inference works after rebuild (proves credential chain is intact) info "Verifying inference after rebuild..." +POST_REBUILD_INFERENCE_MODEL="${NEMOCLAW_MODEL:-${NEMOCLAW_COMPAT_MODEL:-nvidia/nvidia/nemotron-3-super-v3}}" INFERENCE_RESPONSE=$(openshell sandbox exec --name "${SANDBOX_NAME}" -- \ curl -s --max-time 60 https://inference.local/v1/chat/completions \ -H 'Content-Type: application/json' \ - -d '{"model":"nvidia/nemotron-3-super-120b-a12b","messages":[{"role":"user","content":"Reply with exactly one word: PONG"}],"max_tokens":100}' \ + -d "{\"model\":\"${POST_REBUILD_INFERENCE_MODEL}\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":100}" \ 2>&1 || true) if echo "${INFERENCE_RESPONSE}" | python3 -c "import json,sys; r=json.load(sys.stdin); c=r['choices'][0]['message']; print(c.get('content',''))" 2>/dev/null | grep -qi "PONG"; then pass "Inference works after rebuild (NVIDIA API key + provider chain intact)" diff --git a/test/e2e/test-rebuild-openclaw.sh b/test/e2e/test-rebuild-openclaw.sh index 7bd8e2491b..1cbbcc86bc 100755 --- a/test/e2e/test-rebuild-openclaw.sh +++ b/test/e2e/test-rebuild-openclaw.sh @@ -222,16 +222,15 @@ try: sess = json.load(f) except Exception: sess = {} -provider = sess.get('provider') or ( - 'compatible-endpoint' - if os.environ.get('NEMOCLAW_ENDPOINT_URL') and os.environ.get('COMPATIBLE_API_KEY') - else 'nvidia-prod' -) +env_provider = (os.environ.get('NEMOCLAW_PROVIDER') or '').strip() +if env_provider == 'custom': + env_provider = 'compatible-endpoint' +provider = sess.get('provider') or env_provider or 'compatible-endpoint' model = ( sess.get('model') or os.environ.get('NEMOCLAW_MODEL') or os.environ.get('NEMOCLAW_COMPAT_MODEL') - or 'nvidia/nemotron-3-super-120b-a12b' + or 'nvidia/nvidia/nemotron-3-super-v3' ) reg = {'sandboxes': {'${SANDBOX_NAME}': { 'name': '${SANDBOX_NAME}', @@ -447,10 +446,11 @@ fi # Inference works after rebuild (proves credential chain is intact) info "Verifying inference after rebuild..." +POST_REBUILD_INFERENCE_MODEL="${NEMOCLAW_MODEL:-${NEMOCLAW_COMPAT_MODEL:-nvidia/nvidia/nemotron-3-super-v3}}" INFERENCE_RESPONSE=$(openshell sandbox exec --name "${SANDBOX_NAME}" -- \ curl -s --max-time 60 https://inference.local/v1/chat/completions \ -H 'Content-Type: application/json' \ - -d '{"model":"nvidia/nemotron-3-super-120b-a12b","messages":[{"role":"user","content":"Reply with exactly one word: PONG"}],"max_tokens":100}' \ + -d "{\"model\":\"${POST_REBUILD_INFERENCE_MODEL}\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":100}" \ 2>&1 || true) if echo "${INFERENCE_RESPONSE}" | python3 -c "import json,sys; r=json.load(sys.stdin); c=r['choices'][0]['message']; print(c.get('content',''))" 2>/dev/null | grep -qi "PONG"; then pass "Inference works after rebuild (NVIDIA API key + provider chain intact)" diff --git a/test/e2e/test-upgrade-stale-sandbox.sh b/test/e2e/test-upgrade-stale-sandbox.sh index b095126bca..9af15e7e3f 100755 --- a/test/e2e/test-upgrade-stale-sandbox.sh +++ b/test/e2e/test-upgrade-stale-sandbox.sh @@ -162,16 +162,15 @@ try: sess = json.load(f) except Exception: sess = {} -provider = sess.get('provider') or ( - 'compatible-endpoint' - if os.environ.get('NEMOCLAW_ENDPOINT_URL') and os.environ.get('COMPATIBLE_API_KEY') - else 'nvidia-prod' -) +env_provider = (os.environ.get('NEMOCLAW_PROVIDER') or '').strip() +if env_provider == 'custom': + env_provider = 'compatible-endpoint' +provider = sess.get('provider') or env_provider or 'compatible-endpoint' model = ( sess.get('model') or os.environ.get('NEMOCLAW_MODEL') or os.environ.get('NEMOCLAW_COMPAT_MODEL') - or 'nvidia/nemotron-3-super-120b-a12b' + or 'nvidia/nvidia/nemotron-3-super-v3' ) reg = {'sandboxes': {'${SANDBOX_NAME}': { 'name': '${SANDBOX_NAME}',