diff --git a/.dev-tools/.gitignore b/.dev-tools/.gitignore index b93b0f69e..12262726f 100644 --- a/.dev-tools/.gitignore +++ b/.dev-tools/.gitignore @@ -8,6 +8,7 @@ platforms/gke/base/core/workloads/inference_gateway/manifests/* platforms/gke/base/core/workloads/jobset/manifests/* platforms/gke/base/core/workloads/kueue/manifests/* platforms/gke/base/core/workloads/lws/manifests/* +platforms/gke/base/core/workloads/nri_device_injector/manifests/* platforms/gke/base/core/workloads/nvidia_nim/* platforms/gke/base/core/workloads/priority_class/manifests/* platforms/gke/base/kubernetes/* diff --git a/platforms/gke/base/core/custom_compute_class/templates/manifests/gpu/rtx-pro-6000-96gb/custom-compute-gpu-rtx-pro-6000-96gb-x1-2.yaml b/platforms/gke/base/core/custom_compute_class/templates/manifests/gpu/rtx-pro-6000-96gb/custom-compute-gpu-rtx-pro-6000-96gb-x1-2.yaml new file mode 100644 index 000000000..a5d3c9f0e --- /dev/null +++ b/platforms/gke/base/core/custom_compute_class/templates/manifests/gpu/rtx-pro-6000-96gb/custom-compute-gpu-rtx-pro-6000-96gb-x1-2.yaml @@ -0,0 +1,96 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: cloud.google.com/v1 +kind: ComputeClass +metadata: + name: gpu-rtx-pro-6000-96gb-x1-2 +spec: + activeMigration: + optimizeRulePriority: true + nodePoolConfig: + imageStreaming: + enabled: true + nodePoolAutoCreation: + enabled: true + priorities: + # Use a specific reservation + # - gpu: + # count: 1 + # driverVersion: latest + # type: nvidia-rtx-pro-6000 + # machineType: g4-standard-24 + # maxPodsPerNode: 32 + # reservations: + # affinity: Specific + # specific: + # - name: nvidia-rtx-pro-6000-specific + # reservationBlock: + # name: + # spot: false + + # Use any reservation + - gpu: + count: 1 + driverVersion: latest + type: nvidia-rtx-pro-6000 + machineType: g4-standard-24 + maxPodsPerNode: 32 + reservations: + affinity: AnyBestEffort + spot: false + + # Use on-demand + - gpu: + count: 1 + driverVersion: latest + type: nvidia-rtx-pro-6000 + machineType: g4-standard-24 + maxPodsPerNode: 32 + spot: false + + # Use DWS FlexStart with 7 day limit + - flexStart: + enabled: true + nodeRecycling: + leadTimeSeconds: 3600 + gpu: + count: 1 + driverVersion: latest + type: nvidia-rtx-pro-6000 + machineType: g4-standard-24 + maxPodsPerNode: 32 + maxRunDurationSeconds: 604800 + + # Use DWS FlexStart with 1 day limit + - flexStart: + enabled: true + nodeRecycling: + leadTimeSeconds: 3600 + gpu: + count: 1 + driverVersion: latest + type: nvidia-rtx-pro-6000 + machineType: g4-standard-24 + maxPodsPerNode: 32 + maxRunDurationSeconds: 86400 + + # Use spot + - gpu: + count: 1 + driverVersion: latest + type: nvidia-rtx-pro-6000 + machineType: g4-standard-24 + maxPodsPerNode: 32 + spot: true diff --git a/platforms/gke/base/core/custom_compute_class/templates/manifests/gpu/rtx-pro-6000-96gb/custom-compute-gpu-rtx-pro-6000-96gb-x1-4.yaml b/platforms/gke/base/core/custom_compute_class/templates/manifests/gpu/rtx-pro-6000-96gb/custom-compute-gpu-rtx-pro-6000-96gb-x1-4.yaml new file mode 100644 index 000000000..2fd214151 --- /dev/null +++ b/platforms/gke/base/core/custom_compute_class/templates/manifests/gpu/rtx-pro-6000-96gb/custom-compute-gpu-rtx-pro-6000-96gb-x1-4.yaml @@ -0,0 +1,96 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: cloud.google.com/v1 +kind: ComputeClass +metadata: + name: gpu-rtx-pro-6000-96gb-x1-4 +spec: + activeMigration: + optimizeRulePriority: true + nodePoolConfig: + imageStreaming: + enabled: true + nodePoolAutoCreation: + enabled: true + priorities: + # Use a specific reservation + # - gpu: + # count: 1 + # driverVersion: latest + # type: nvidia-rtx-pro-6000 + # machineType: g4-standard-12 + # maxPodsPerNode: 32 + # reservations: + # affinity: Specific + # specific: + # - name: nvidia-rtx-pro-6000-specific + # reservationBlock: + # name: + # spot: false + + # Use any reservation + - gpu: + count: 1 + driverVersion: latest + type: nvidia-rtx-pro-6000 + machineType: g4-standard-12 + maxPodsPerNode: 32 + reservations: + affinity: AnyBestEffort + spot: false + + # Use on-demand + - gpu: + count: 1 + driverVersion: latest + type: nvidia-rtx-pro-6000 + machineType: g4-standard-12 + maxPodsPerNode: 32 + spot: false + + # Use DWS FlexStart with 7 day limit + - flexStart: + enabled: true + nodeRecycling: + leadTimeSeconds: 3600 + gpu: + count: 1 + driverVersion: latest + type: nvidia-rtx-pro-6000 + machineType: g4-standard-12 + maxPodsPerNode: 32 + maxRunDurationSeconds: 604800 + + # Use DWS FlexStart with 1 day limit + - flexStart: + enabled: true + nodeRecycling: + leadTimeSeconds: 3600 + gpu: + count: 1 + driverVersion: latest + type: nvidia-rtx-pro-6000 + machineType: g4-standard-12 + maxPodsPerNode: 32 + maxRunDurationSeconds: 86400 + + # Use spot + - gpu: + count: 1 + driverVersion: latest + type: nvidia-rtx-pro-6000 + machineType: g4-standard-12 + maxPodsPerNode: 32 + spot: true diff --git a/platforms/gke/base/core/custom_compute_class/templates/manifests/gpu/rtx-pro-6000-96gb/custom-compute-gpu-rtx-pro-6000-96gb-x1-8.yaml b/platforms/gke/base/core/custom_compute_class/templates/manifests/gpu/rtx-pro-6000-96gb/custom-compute-gpu-rtx-pro-6000-96gb-x1-8.yaml new file mode 100644 index 000000000..df6c0baa1 --- /dev/null +++ b/platforms/gke/base/core/custom_compute_class/templates/manifests/gpu/rtx-pro-6000-96gb/custom-compute-gpu-rtx-pro-6000-96gb-x1-8.yaml @@ -0,0 +1,96 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: cloud.google.com/v1 +kind: ComputeClass +metadata: + name: gpu-rtx-pro-6000-96gb-x1-8 +spec: + activeMigration: + optimizeRulePriority: true + nodePoolConfig: + imageStreaming: + enabled: true + nodePoolAutoCreation: + enabled: true + priorities: + # Use a specific reservation + # - gpu: + # count: 1 + # driverVersion: latest + # type: nvidia-rtx-pro-6000 + # machineType: g4-standard-6 + # maxPodsPerNode: 32 + # reservations: + # affinity: Specific + # specific: + # - name: nvidia-rtx-pro-6000-specific + # reservationBlock: + # name: + # spot: false + + # Use any reservation + - gpu: + count: 1 + driverVersion: latest + type: nvidia-rtx-pro-6000 + machineType: g4-standard-6 + maxPodsPerNode: 32 + reservations: + affinity: AnyBestEffort + spot: false + + # Use on-demand + - gpu: + count: 1 + driverVersion: latest + type: nvidia-rtx-pro-6000 + machineType: g4-standard-6 + maxPodsPerNode: 32 + spot: false + + # Use DWS FlexStart with 7 day limit + - flexStart: + enabled: true + nodeRecycling: + leadTimeSeconds: 3600 + gpu: + count: 1 + driverVersion: latest + type: nvidia-rtx-pro-6000 + machineType: g4-standard-6 + maxPodsPerNode: 32 + maxRunDurationSeconds: 604800 + + # Use DWS FlexStart with 1 day limit + - flexStart: + enabled: true + nodeRecycling: + leadTimeSeconds: 3600 + gpu: + count: 1 + driverVersion: latest + type: nvidia-rtx-pro-6000 + machineType: g4-standard-6 + maxPodsPerNode: 32 + maxRunDurationSeconds: 86400 + + # Use spot + - gpu: + count: 1 + driverVersion: latest + type: nvidia-rtx-pro-6000 + machineType: g4-standard-6 + maxPodsPerNode: 32 + spot: true diff --git a/platforms/gke/base/core/workloads/nri_device_injector/.terraform.lock.hcl b/platforms/gke/base/core/workloads/nri_device_injector/.terraform.lock.hcl new file mode 100644 index 000000000..27d625960 --- /dev/null +++ b/platforms/gke/base/core/workloads/nri_device_injector/.terraform.lock.hcl @@ -0,0 +1,42 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/google" { + version = "7.6.0" + constraints = "7.6.0" + hashes = [ + "h1:JYsO3fV5OtaNuRTdjGZC1Z3Ku1ZIrRJGwXwsBjtWudk=", + "zh:0c70c768b0a34d7a61de70d0e85cf0057820556647bbce2384972a45d7092e4e", + "zh:0cb7aab89cd435c5c8dab9231ea176d64fdf1df1125db15a6b9ead978a93c0b2", + "zh:32f25c42214bb356bb67cef6057c9904f2878cd053a7760e5ee3737619f28638", + "zh:38b05b1171ab086c88b95d379120fb6c28c9e895ae924557c11c35e138319119", + "zh:39d8206d453a614fa0be3aeac8ea3921fb3ab7ed122205cbbcc2a41ca6176cb5", + "zh:58d9059aa6b4aab5ede4fc173dcdc7b4d042d0b1a1ab55407dd345931d7f4815", + "zh:a4bc001c8ac7700d0107155296250c3b8969511e1a488f3b318f3db62362eef2", + "zh:cc75e25db4bb672ebc200a89d6cff9ff0b9911e14e188d1b4429bb3511d2b35f", + "zh:d7f7639930735f17b2b4f73814204a9a050186ea7e1c2671a52e0fa7ddf7a001", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + "zh:ff1190ae618dae9243de59caf4149abb4a9b775cb6439f119cd32a30f1a21820", + "zh:ff15b7b86787f6fd186211e7c37a72f2cc70374b284aaf063e1f989717441161", + ] +} + +provider "registry.terraform.io/hashicorp/local" { + version = "2.5.3" + constraints = "2.5.3" + hashes = [ + "h1:1Nkh16jQJMp0EuDmvP/96f5Unnir0z12WyDuoR6HjMo=", + "zh:284d4b5b572eacd456e605e94372f740f6de27b71b4e1fd49b63745d8ecd4927", + "zh:40d9dfc9c549e406b5aab73c023aa485633c1b6b730c933d7bcc2fa67fd1ae6e", + "zh:6243509bb208656eb9dc17d3c525c89acdd27f08def427a0dce22d5db90a4c8b", + "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", + "zh:885d85869f927853b6fe330e235cd03c337ac3b933b0d9ae827ec32fa1fdcdbf", + "zh:bab66af51039bdfcccf85b25fe562cbba2f54f6b3812202f4873ade834ec201d", + "zh:c505ff1bf9442a889ac7dca3ac05a8ee6f852e0118dd9a61796a2f6ff4837f09", + "zh:d36c0b5770841ddb6eaf0499ba3de48e5d4fc99f4829b6ab66b0fab59b1aaf4f", + "zh:ddb6a407c7f3ec63efb4dad5f948b54f7f4434ee1a2607a49680d494b1776fe1", + "zh:e0dafdd4500bec23d3ff221e3a9b60621c5273e5df867bc59ef6b7e41f5c91f6", + "zh:ece8742fd2882a8fc9d6efd20e2590010d43db386b920b2a9c220cfecc18de47", + "zh:f4c6b3eb8f39105004cf720e202f04f57e3578441cfb76ca27611139bc116a82", + ] +} diff --git a/platforms/gke/base/core/workloads/nri_device_injector/_cluster.auto.tfvars b/platforms/gke/base/core/workloads/nri_device_injector/_cluster.auto.tfvars new file mode 120000 index 000000000..4d9954e5a --- /dev/null +++ b/platforms/gke/base/core/workloads/nri_device_injector/_cluster.auto.tfvars @@ -0,0 +1 @@ +../../../_shared_config/cluster.auto.tfvars \ No newline at end of file diff --git a/platforms/gke/base/core/workloads/nri_device_injector/_cluster_variables.tf b/platforms/gke/base/core/workloads/nri_device_injector/_cluster_variables.tf new file mode 120000 index 000000000..3f2c29e19 --- /dev/null +++ b/platforms/gke/base/core/workloads/nri_device_injector/_cluster_variables.tf @@ -0,0 +1 @@ +../../../_shared_config/cluster_variables.tf \ No newline at end of file diff --git a/platforms/gke/base/core/workloads/nri_device_injector/_platform.auto.tfvars b/platforms/gke/base/core/workloads/nri_device_injector/_platform.auto.tfvars new file mode 120000 index 000000000..c3133e727 --- /dev/null +++ b/platforms/gke/base/core/workloads/nri_device_injector/_platform.auto.tfvars @@ -0,0 +1 @@ +../../../_shared_config/platform.auto.tfvars \ No newline at end of file diff --git a/platforms/gke/base/core/workloads/nri_device_injector/_platform_variables.tf b/platforms/gke/base/core/workloads/nri_device_injector/_platform_variables.tf new file mode 120000 index 000000000..c68738baa --- /dev/null +++ b/platforms/gke/base/core/workloads/nri_device_injector/_platform_variables.tf @@ -0,0 +1 @@ +../../../_shared_config/platform_variables.tf \ No newline at end of file diff --git a/platforms/gke/base/core/workloads/nri_device_injector/_workloads.auto.tfvars b/platforms/gke/base/core/workloads/nri_device_injector/_workloads.auto.tfvars new file mode 120000 index 000000000..b65551f53 --- /dev/null +++ b/platforms/gke/base/core/workloads/nri_device_injector/_workloads.auto.tfvars @@ -0,0 +1 @@ +../../../_shared_config/workloads.auto.tfvars \ No newline at end of file diff --git a/platforms/gke/base/core/workloads/nri_device_injector/_workloads_variables.tf b/platforms/gke/base/core/workloads/nri_device_injector/_workloads_variables.tf new file mode 120000 index 000000000..fec5c48ce --- /dev/null +++ b/platforms/gke/base/core/workloads/nri_device_injector/_workloads_variables.tf @@ -0,0 +1 @@ +../../../_shared_config/workloads_variables.tf \ No newline at end of file diff --git a/platforms/gke/base/core/workloads/nri_device_injector/main.tf b/platforms/gke/base/core/workloads/nri_device_injector/main.tf new file mode 100644 index 000000000..ea8b5384c --- /dev/null +++ b/platforms/gke/base/core/workloads/nri_device_injector/main.tf @@ -0,0 +1,62 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +locals { + kubeconfig_directory = "${path.module}/../../../kubernetes/kubeconfig" + kubeconfig_file = "${local.kubeconfig_directory}/${local.kubeconfig_file_name}" + + manifests_directory = "${local.namespace_directory}/kube-system" + namespace_directory = "${local.manifests_directory_root}/namespace" + version_manifests_directory = "${path.module}/manifests" +} + +data "local_file" "kubeconfig" { + filename = local.kubeconfig_file +} + +resource "terraform_data" "manifests" { + input = { + manifests_dir = local.manifests_directory + version_manifests_dir = local.version_manifests_directory + } + + provisioner "local-exec" { + command = <gpu-l4-24gb-x2
  • gpu-l4-24gb-x4
  • gpu-l4-24gb-x8
  • +
  • gpu-rtx-pro-6000-96gb-x1
  • +
  • gpu-rtx-pro-6000-96gb-x1-2
  • +
  • gpu-rtx-pro-6000-96gb-x1-4
  • +
  • gpu-rtx-pro-6000-96gb-x1-8
    • -
      diff --git a/platforms/gke/base/use-cases/inference-ref-arch/terraform/deploy-standard.sh b/platforms/gke/base/use-cases/inference-ref-arch/terraform/deploy-standard.sh index c83ae354b..d2f6a6a66 100755 --- a/platforms/gke/base/use-cases/inference-ref-arch/terraform/deploy-standard.sh +++ b/platforms/gke/base/use-cases/inference-ref-arch/terraform/deploy-standard.sh @@ -50,6 +50,7 @@ declare -a CORE_TERRASERVICES_APPLY=( "workloads/jobset" "workloads/lws" "workloads/priority_class" + "workloads/nri_device_injector" "workloads/kueue" ) CORE_TERRASERVICES_APPLY="${CORE_TERRASERVICES_APPLY[*]}" "${ACP_PLATFORM_CORE_DIR}/deploy.sh" diff --git a/platforms/gke/base/use-cases/inference-ref-arch/terraform/teardown-standard.sh b/platforms/gke/base/use-cases/inference-ref-arch/terraform/teardown-standard.sh index 1d0e1ba9a..e59cf3f97 100755 --- a/platforms/gke/base/use-cases/inference-ref-arch/terraform/teardown-standard.sh +++ b/platforms/gke/base/use-cases/inference-ref-arch/terraform/teardown-standard.sh @@ -69,6 +69,7 @@ done if [ "${ACP_TEARDOWN_CORE_PLATFORM}" = "true" ]; then declare -a CORE_TERRASERVICES_DESTROY=( "workloads/kueue" + "workloads/nri_device_injector" "workloads/priority_class" "workloads/lws" "workloads/jobset"