From 82a38de2c4b6ee549ed636c50845aceff1368775 Mon Sep 17 00:00:00 2001 From: Aaron Rueth Date: Thu, 30 Oct 2025 22:44:35 +0000 Subject: [PATCH] Updated vllm version --- .../terraform/_shared_config/inference-ref-arch_variables.tf | 4 ++-- .../model-download/huggingface/download_wait.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/platforms/gke/base/use-cases/inference-ref-arch/terraform/_shared_config/inference-ref-arch_variables.tf b/platforms/gke/base/use-cases/inference-ref-arch/terraform/_shared_config/inference-ref-arch_variables.tf index 80515abdb..a86da1c73 100644 --- a/platforms/gke/base/use-cases/inference-ref-arch/terraform/_shared_config/inference-ref-arch_variables.tf +++ b/platforms/gke/base/use-cases/inference-ref-arch/terraform/_shared_config/inference-ref-arch_variables.tf @@ -48,7 +48,7 @@ variable "ira_online_gpu_kubernetes_service_account_name" { } variable "ira_online_gpu_vllm_image_url" { - default = "docker.io/vllm/vllm-openai:v0.10.1.1" + default = "docker.io/vllm/vllm-openai:v0.11.0" description = "The URL for the GPU vLLM container image." type = string } @@ -72,7 +72,7 @@ variable "ira_online_tpu_max_diffusion_sdxl_image_url" { } variable "ira_online_tpu_vllm_image_url" { - default = "docker.io/vllm/vllm-tpu:4c409cabc2c1c432ba670029990bd59e6bbf1479" + default = "docker.io/vllm/vllm-tpu:v0.11.1" description = "The URL for the TPU vLLM container image." type = string } diff --git a/test/scripts/platforms/gke/base/use-cases/inference-ref-arch/model-download/huggingface/download_wait.sh b/test/scripts/platforms/gke/base/use-cases/inference-ref-arch/model-download/huggingface/download_wait.sh index 23caa0556..62a34c109 100755 --- a/test/scripts/platforms/gke/base/use-cases/inference-ref-arch/model-download/huggingface/download_wait.sh +++ b/test/scripts/platforms/gke/base/use-cases/inference-ref-arch/model-download/huggingface/download_wait.sh @@ -31,7 +31,7 @@ kubectl_wait(){ source "${ACP_REPO_DIR}/platforms/gke/base/use-cases/inference-ref-arch/terraform/_shared_config/scripts/set_environment_variables.sh" echo "Waiting for '${HF_MODEL_ID}'(${HF_MODEL_ID_HASH}) download" | ts "$(date +'%Y-%m-%d %H:%M:%S.%N %Z') [${HF_MODEL_ID}]" - kubectl --namespace=${huggingface_hub_downloader_kubernetes_namespace_name} wait job/${HF_MODEL_ID_HASH}-hf-model-to-gcs --for=condition=complete --timeout=14400s | ts "$(date +'%Y-%m-%d %H:%M:%S.%N %Z') [${HF_MODEL_ID}]" & + kubectl --namespace=${huggingface_hub_downloader_kubernetes_namespace_name} wait job/${HF_MODEL_ID_HASH}-hf-model-to-gcs --for=condition=complete --timeout=14400s | ts "$(date +'%Y-%m-%d %H:%M:%S.%N %Z') [${HF_MODEL_ID}]" & kubectl --namespace=${huggingface_hub_downloader_kubernetes_namespace_name} wait job/${HF_MODEL_ID_HASH}-hf-model-to-gcs --for=condition=failed --timeout=14400s | ts "$(date +'%Y-%m-%d %H:%M:%S.%N %Z') [${HF_MODEL_ID}]" && exit 1 & wait -n && \ pkill -f "kubectl --namespace=${huggingface_hub_downloader_kubernetes_namespace_name} wait job/${HF_MODEL_ID_HASH}-hf-model-to-gcs" || true