Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/e2e-script.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,8 @@ jobs:
printf 'NEMOCLAW_E2E_USE_HOSTED_INFERENCE=1\n'
printf 'NEMOCLAW_PROVIDER=custom\n'
printf 'NEMOCLAW_ENDPOINT_URL=https://inference-api.nvidia.com/v1\n'
printf 'NEMOCLAW_MODEL=nvidia/nvidia/nemotron-3-super-v3\n'
printf 'NEMOCLAW_COMPAT_MODEL=nvidia/nvidia/nemotron-3-super-v3\n'
printf 'NEMOCLAW_MODEL=nvidia/nemotron-3-super-120b-a12b\n'
printf 'NEMOCLAW_COMPAT_MODEL=nvidia/nemotron-3-super-120b-a12b\n'
printf 'NEMOCLAW_PREFERRED_API=openai-completions\n'
printf 'COMPATIBLE_API_KEY=%s\n' "${NVIDIA_INFERENCE_API_KEY}"
} >> "$GITHUB_ENV"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/e2e-vitest-scenarios.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -990,8 +990,8 @@ jobs:
NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
run: |
set -euo pipefail
Expand Down
72 changes: 36 additions & 36 deletions .github/workflows/nightly-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -468,8 +468,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_NON_INTERACTIVE: "1"
Expand Down Expand Up @@ -544,8 +544,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_ISSUE_4434_LIVE: "1"
Expand Down Expand Up @@ -973,8 +973,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_NON_INTERACTIVE: "1"
Expand Down Expand Up @@ -1271,8 +1271,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_NON_INTERACTIVE: "1"
Expand Down Expand Up @@ -1578,8 +1578,8 @@ jobs:
NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/credential-migration
Expand Down Expand Up @@ -1804,8 +1804,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_NON_INTERACTIVE: "1"
Expand All @@ -1817,8 +1817,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_NON_INTERACTIVE: "1"
Expand Down Expand Up @@ -1856,8 +1856,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_NON_INTERACTIVE: "1"
Expand All @@ -1869,8 +1869,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_NON_INTERACTIVE: "1"
Expand Down Expand Up @@ -1908,8 +1908,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_NON_INTERACTIVE: "1"
Expand All @@ -1921,8 +1921,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_NON_INTERACTIVE: "1"
Expand Down Expand Up @@ -1961,8 +1961,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_NON_INTERACTIVE: "1"
Expand All @@ -1974,8 +1974,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_NON_INTERACTIVE: "1"
Expand Down Expand Up @@ -2014,8 +2014,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_NON_INTERACTIVE: "1"
Expand All @@ -2028,8 +2028,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_NON_INTERACTIVE: "1"
Expand Down Expand Up @@ -2070,8 +2070,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_NON_INTERACTIVE: "1"
Expand All @@ -2084,8 +2084,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_NON_INTERACTIVE: "1"
Expand Down Expand Up @@ -2164,8 +2164,8 @@ jobs:
NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
NEMOCLAW_PROVIDER: custom
NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
NEMOCLAW_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_COMPAT_MODEL: nvidia/nemotron-3-super-120b-a12b
NEMOCLAW_PREFERRED_API: openai-completions
COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
NEMOCLAW_NON_INTERACTIVE: "1"
Expand Down
2 changes: 1 addition & 1 deletion docs/inference/inference-options.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ models:
api_base: "https://integrate.api.nvidia.com"

- name: super
litellm_model: "openai/nvidia/nvidia/nemotron-3-super-v3"
litellm_model: "openai/nvidia/nemotron-3-super-120b-a12b"
cost_per_m_input_tokens: 0.10
api_base: "https://integrate.api.nvidia.com"
```
Expand Down
2 changes: 1 addition & 1 deletion src/lib/onboard/providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ const HERMES_INFERENCE_ENDPOINT_URL = "https://inference-api.nousresearch.com/v1
const HOSTED_INFERENCE_SOURCE_ENV = "NVIDIA_INFERENCE_API_KEY";
const HOSTED_INFERENCE_CREDENTIAL_ENV = "COMPATIBLE_API_KEY";
const HOSTED_INFERENCE_ENDPOINT_URL = "https://inference-api.nvidia.com/v1";
const HOSTED_INFERENCE_MODEL = "nvidia/nvidia/nemotron-3-super-v3";
const HOSTED_INFERENCE_MODEL = "nvidia/nemotron-3-super-120b-a12b";

const REMOTE_PROVIDER_CONFIG = {
build: {
Expand Down
2 changes: 1 addition & 1 deletion test/e2e-scenario/fixtures/hosted-inference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ const HOSTED_INFERENCE_CREDENTIAL_ENV = "COMPATIBLE_API_KEY";
const HOSTED_INFERENCE_PROVIDER = "custom";
const HOSTED_INFERENCE_PROVIDER_NAME = "compatible-endpoint";
const DEFAULT_HOSTED_INFERENCE_BASE_URL = "https://inference-api.nvidia.com/v1";
const DEFAULT_HOSTED_INFERENCE_MODEL = "nvidia/nvidia/nemotron-3-super-v3";
const DEFAULT_HOSTED_INFERENCE_MODEL = "nvidia/nemotron-3-super-120b-a12b";

export interface HostedInferenceSecrets {
required(name: string): string;
Expand Down
30 changes: 24 additions & 6 deletions test/e2e-script-workflow.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -545,8 +545,8 @@ describe("E2E reusable workflow contract", () => {
expect(runStep?.env?.NVIDIA_INFERENCE_API_KEY).toBe(GUARDED_HOSTED_INFERENCE_SECRET);
expect(runStep?.env?.NEMOCLAW_PROVIDER).toBe("custom");
expect(runStep?.env?.NEMOCLAW_ENDPOINT_URL).toBe("https://inference-api.nvidia.com/v1");
expect(runStep?.env?.NEMOCLAW_MODEL).toBe("nvidia/nvidia/nemotron-3-super-v3");
expect(runStep?.env?.NEMOCLAW_COMPAT_MODEL).toBe("nvidia/nvidia/nemotron-3-super-v3");
expect(runStep?.env?.NEMOCLAW_MODEL).toBe("nvidia/nemotron-3-super-120b-a12b");
expect(runStep?.env?.NEMOCLAW_COMPAT_MODEL).toBe("nvidia/nemotron-3-super-120b-a12b");
expect(runStep?.env?.NEMOCLAW_PREFERRED_API).toBe("openai-completions");
expect(runStep?.env?.COMPATIBLE_API_KEY).toBe(GUARDED_HOSTED_INFERENCE_SECRET);
expect(runStep?.env?.GITHUB_TOKEN).toBeUndefined();
Expand Down Expand Up @@ -904,8 +904,8 @@ describe("E2E reusable workflow contract", () => {
expect(exportStep?.run).toContain("NEMOCLAW_E2E_USE_HOSTED_INFERENCE=1");
expect(exportStep?.run).toContain("NEMOCLAW_PROVIDER=custom");
expect(exportStep?.run).toContain("NEMOCLAW_ENDPOINT_URL=https://inference-api.nvidia.com/v1");
expect(exportStep?.run).toContain("NEMOCLAW_MODEL=nvidia/nvidia/nemotron-3-super-v3");
expect(exportStep?.run).toContain("NEMOCLAW_COMPAT_MODEL=nvidia/nvidia/nemotron-3-super-v3");
expect(exportStep?.run).toContain("NEMOCLAW_MODEL=nvidia/nemotron-3-super-120b-a12b");
expect(exportStep?.run).toContain("NEMOCLAW_COMPAT_MODEL=nvidia/nemotron-3-super-120b-a12b");
expect(exportStep?.run).toContain("NEMOCLAW_PREFERRED_API=openai-completions");
expect(exportStep?.run).toContain("COMPATIBLE_API_KEY=%s");

Expand All @@ -915,6 +915,24 @@ describe("E2E reusable workflow contract", () => {
}
});

it("keeps rebuild fixture registry inference aligned with the onboard session", () => {
const rebuildFixtures = [
"test/e2e/test-rebuild-openclaw.sh",
"test/e2e/test-rebuild-hermes.sh",
"test/e2e/test-upgrade-stale-sandbox.sh",
];

for (const fixture of rebuildFixtures) {
const body = readFileSync(fixture, "utf8");
expect(body, fixture).toContain("provider = sess.get('provider')");
expect(body, fixture).toContain("model = (");
expect(body, fixture).toContain("'provider': provider");
expect(body, fixture).toContain("'model': model");
expect(body, fixture).not.toContain("'provider': 'nvidia-prod'");
expect(body, fixture).not.toContain("'model': 'nvidia/nemotron-3-super-120b-a12b'");
}
});

it("routes direct hosted-secret jobs through the hosted custom inference endpoint", () => {
const trustedWorkflowSecretExceptions = new Set([
"issue-4434-tui-unreachable-inference-e2e:Sanitize issue #4434 logs on failure",
Expand Down Expand Up @@ -962,8 +980,8 @@ describe("E2E reusable workflow contract", () => {
}
expect(step.env?.NEMOCLAW_PROVIDER, jobName).toBe("custom");
expect(step.env?.NEMOCLAW_ENDPOINT_URL, jobName).toBe("https://inference-api.nvidia.com/v1");
expect(step.env?.NEMOCLAW_MODEL, jobName).toBe("nvidia/nvidia/nemotron-3-super-v3");
expect(step.env?.NEMOCLAW_COMPAT_MODEL, jobName).toBe("nvidia/nvidia/nemotron-3-super-v3");
expect(step.env?.NEMOCLAW_MODEL, jobName).toBe("nvidia/nemotron-3-super-120b-a12b");
expect(step.env?.NEMOCLAW_COMPAT_MODEL, jobName).toBe("nvidia/nemotron-3-super-120b-a12b");
expect(step.env?.NEMOCLAW_PREFERRED_API, jobName).toBe("openai-completions");
expect(step.env?.COMPATIBLE_API_KEY, jobName).toBe(GUARDED_HOSTED_INFERENCE_SECRET);
}
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/lib/ci-compatible-inference.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# at inference-api.nvidia.com. Keep this helper in test/e2e so the
# product-facing provider/default endpoint remain unchanged.

NEMOCLAW_E2E_COMPATIBLE_INFERENCE_MODEL_DEFAULT="nvidia/nvidia/nemotron-3-super-v3"
NEMOCLAW_E2E_COMPATIBLE_INFERENCE_MODEL_DEFAULT="nvidia/nemotron-3-super-120b-a12b"
NEMOCLAW_E2E_HOSTED_INFERENCE_PROVIDER_DEFAULT="compatible-endpoint"
NEMOCLAW_E2E_NVIDIA_INFERENCE_MODEL_DEFAULT="nvidia/nemotron-3-super-120b-a12b"

Expand Down
Loading
Loading