From 0005c90a98ad2ee33674113ac5c4907dd8ea5b97 Mon Sep 17 00:00:00 2001 From: Vishesh Tanksale Date: Thu, 4 Sep 2025 19:48:17 +0000 Subject: [PATCH] Updating multi LLM samples without precaching Signed-off-by: Vishesh Tanksale --- .../nim/serving/standalone/no-precaching/multi-llm-hf.yaml | 6 ++++-- .../nim/serving/standalone/no-precaching/multi-llm-ngc.yaml | 6 +++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/config/samples/nim/serving/standalone/no-precaching/multi-llm-hf.yaml b/config/samples/nim/serving/standalone/no-precaching/multi-llm-hf.yaml index 2d4e92431..ba5f71d19 100644 --- a/config/samples/nim/serving/standalone/no-precaching/multi-llm-hf.yaml +++ b/config/samples/nim/serving/standalone/no-precaching/multi-llm-hf.yaml @@ -1,5 +1,5 @@ --- -# NIM Service with Multi-LLM NIM with Autoscaling +# NIM Service with Multi-LLM NIM with model not pre-cached apiVersion: apps.nvidia.com/v1alpha1 kind: NIMService metadata: @@ -12,10 +12,12 @@ spec: pullPolicy: IfNotPresent pullSecrets: - ngc-secret - authSecret: hf-secret # with HF_TOKEN set + authSecret: ngc-api-secret # with NGC_API_KEY set env: - name: NIM_MODEL_NAME value: hf://meta-llama/Llama-3.2-1B-Instruct + - name: HF_TOKEN + value: # Replace with your actual HF token storage: pvc: create: true diff --git a/config/samples/nim/serving/standalone/no-precaching/multi-llm-ngc.yaml b/config/samples/nim/serving/standalone/no-precaching/multi-llm-ngc.yaml index b6ca9ffcf..5267bb07d 100644 --- a/config/samples/nim/serving/standalone/no-precaching/multi-llm-ngc.yaml +++ b/config/samples/nim/serving/standalone/no-precaching/multi-llm-ngc.yaml @@ -1,9 +1,9 @@ --- -# NIM Service with Multi-LLM NIM with Autoscaling +# NIM Service with Multi-LLM NIM with model not pre-cached apiVersion: apps.nvidia.com/v1alpha1 kind: NIMService metadata: - name: meta-llama-3-2-1b-instruct + name: meta-llama-3-8b-instruct namespace: nim-service spec: image: @@ -15,7 +15,7 @@ spec: authSecret: ngc-api-secret env: - name: NIM_MODEL_NAME - value: nvidia/nemo/llama-3_2-1b-instruct + value: 'ngc://nvidian/nim-llm-dev/meta-llama3-8b-instruct:hf' storage: pvc: create: true