GoogleCloudPlatform · ferrarimarco · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/.dev-tools/.gitignore b/.dev-tools/.gitignore
@@ -8,6 +8,7 @@ platforms/gke/base/core/workloads/inference_gateway/manifests/*
 platforms/gke/base/core/workloads/jobset/manifests/*
 platforms/gke/base/core/workloads/kueue/manifests/*
 platforms/gke/base/core/workloads/lws/manifests/*
+platforms/gke/base/core/workloads/nri_device_injector/manifests/*
 platforms/gke/base/core/workloads/nvidia_nim/*
 platforms/gke/base/core/workloads/priority_class/manifests/*
 platforms/gke/base/kubernetes/*

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-FROM hashicorp/terraform:1.5.7 AS terraform
-FROM koalaman/shellcheck:v0.10.0 AS shellcheck
-FROM mvdan/shfmt:v3.10.0 AS shfmt
+FROM hashicorp/terraform:1.14.8 AS terraform
+FROM koalaman/shellcheck:v0.11.0 AS shellcheck
+FROM mvdan/shfmt:v3.13.1 AS shfmt
 
 FROM python:3.13-bookworm AS python-builder
 

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
-  "name": "Cloud Solutions devcontainer",
+  "name": "Accelerated Platforms devcontainer",
   "build": {
     "dockerfile": "Dockerfile"
   },
@@ -13,7 +13,9 @@
         "editor.wordWrap": "off",
         "files.insertFinalNewline": true,
         "files.trimFinalNewlines": true,
+        "geminicodeassist.displayInlineContextHint": false,
         "prettier.resolveGlobalModules": true,
+        "python.defaultInterpreterPath": "/venv/bin/python",
         "redhat.telemetry.enabled": false,
         "telemetry.telemetryLevel": "off",
         "[css]": {
@@ -78,6 +80,7 @@
         "ms-azuretools.vscode-containers",
         "ms-python.black-formatter",
         "ms-python.isort",
+        "ms-python.python",
         "streetsidesoftware.code-spell-checker",
         "timonwong.shellcheck"
       ]

diff --git a/.github/workflows/dictionary/python.txt b/.github/workflows/dictionary/python.txt
@@ -3,10 +3,16 @@ aiohttp
 aqtp
 asctime
 asgi
+asynccontextmanager
 asyncio
+certifi
+cffi
 classmethod
 configparser
+contextlib
 coveragerc
+dataclass
+dataclasses
 dataframe
 dbapi
 dbcommands
@@ -17,6 +23,7 @@ fastapi
 fillna
 fromarray
 frombuffer
+fromisoformat
 fsspec
 ftfy
 functools
@@ -29,11 +36,13 @@ getframerate
 getnchannels
 getnframes
 getsampwidth
+grpcio
 gunicorn
 hasattr
 hashlib
 hexdigest
 httpx
+idna
 iloc
 imgf
 inplace
@@ -59,7 +68,10 @@ pgvector
 pipreqs
 pmap
 prng
+protos
+pyasn
 pycache
+pycparser
 pydantic
 pyenv
 pylint
@@ -69,8 +81,10 @@ pythondontwritebytecode
 pythonpath
 pythonunbuffered
 qualname
+quantiles
 readframes
 removesuffix
+reqs
 rerank
 reranked
 retryable
@@ -83,13 +97,16 @@ shutil
 spacy
 splitlines
 sqlalchemy
+strftime
 tensorboard
 tensorboardx
 thejsonlogger
 tqdm
 unittests
 urllib
+urlopen
 urlretrieve
 uvicorn
 venv
 writerow
+writestr
diff --git a/.github/workflows/dictionary/sglang.txt b/.github/workflows/dictionary/sglang.txt
@@ -0,0 +1,4 @@
+lmsysorg
+musa
+nvls
+sglang
diff --git a/.github/workflows/dictionary/shell.txt b/.github/workflows/dictionary/shell.txt
@@ -16,6 +16,7 @@ nslookup
 pipefail
 pkill
 shuf
+subshell
 syscall
 xtrace
 zxvf
diff --git a/.gitignore b/.gitignore
@@ -39,3 +39,10 @@ terraform.tfstate*
 # Test
 test/log/*.log
 test/scripts/environment_files/*
+
+# Generated outputs
+*.log
+k6-*.txt
+k6-*.csv
+k6-*.jsonl
+k6-report.md
diff --git a/README.md b/README.md
@@ -73,7 +73,7 @@ the primary runtime.
 - [LLM Inference Optimization: Achieving faster Pod Startup with Google Cloud Storage](/use-cases/inferencing/cost-optimization/gcsfuse/AchievingFasterPodStartup.md)
 - [Optimizing GKE Workloads with Custom Compute Classes](/docs/guides/optimizing-gke-workloads-with-custom-compute-classes/README.md)
 
-### [Deprecated] Playground AI/ML Platform on GKE
+### \[Deprecated\] Playground AI/ML Platform on GKE
 
 The [Playground AI/ML Platform on GKE](/platforms/gke-aiml/playground/README.md)
 is a quick-start implementation of the platform that can be used to familiarize

diff --git a/container-images/cpu/k6-benchmark/Dockerfile b/container-images/cpu/k6-benchmark/Dockerfile
@@ -0,0 +1,31 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM grafana/k6:1.7.1
+
+USER root
+
+WORKDIR /app
+# Create the /output directory and ensure k6 owns it, along with /app
+RUN mkdir -p /output && chown -R k6:k6 /app /output
+
+COPY --chown=k6:k6 scripts /app/scripts
+COPY --chmod=a+x --chown=k6:k6 entrypoint.sh /app/entrypoint.sh
+
+# Switch back to the unprivileged k6 user
+USER k6
+
+ENTRYPOINT ["/app/entrypoint.sh"]
+
+CMD ["--help"]
diff --git a/container-images/cpu/k6-benchmark/README.md b/container-images/cpu/k6-benchmark/README.md
@@ -0,0 +1,99 @@
+# k6 Benchmark Image
+
+This container image packages [k6](https://k6.io/) load testing tool with
+specific scripts to benchmark Machine Learning inference workloads.
+
+It is designed to run in environments like Google Kubernetes Engine (GKE) to
+generate consistent, reproducible load against target endpoints and output
+granular metrics to a JSONL file for further analysis. It also includes a Python
+script (`extract_metrics.py`) that can be run manually to process the k6 output
+and generate a price/performance report.
+
+## Usage
+
+You can run this container image via Docker or deploy it as a Job in a
+Kubernetes cluster.
+
+### Environment Variables
+
+The container accepts the following optional environment variables for metric
+output naming and processing:
+
+- `ACCELERATOR_NAME`: A string representing the target hardware (e.g., `l4`,
+  `a100`, `v5p`). If not provided, it defaults to `accelerator-not-set`.
+- `NODE_HOURLY_COST`: The hourly cost of the underlying node in USD. Used by the
+  automatic metric extraction script to compute cost per 1k images. Defaults to
+  `0.0`.
+
+The default benchmark script (`k6-diffusers-flux-2-klein-4b.js`) expects the
+following environment variables:
+
+- `TARGET_URL`: The full URL of the inference endpoint to test (e.g.,
+  `http://model-service:8000/generate`).
+- `BATCH_SIZE`: The batch size to request in the payload (default: `1`).
+- `VUS`: The number of concurrent Virtual Users to simulate (default: `1`).
+
+### Running via Docker
+
+Set the k6 script to run by setting the `CMD` to point to the script path when
+starting the container:
+
+```bash
+# Example: running a different script mounted into the container
+docker run --rm \
+  -e ACCELERATOR_NAME="custom" \
+  -v $(pwd)/custom-script.js:/app/custom-script.js \
+  -v $(pwd)/output:/output \
+  k6-benchmark:latest /app/your-k6-script.js
+```
+
+The k6 output will be saved in the mapped `/output` directory on your host. The
+filename will be dynamically generated in the format:
+`<name-of-k6-script>-<ACCELERATOR_NAME>-<experiment-start-timestamp>.jsonl`. For
+For example: `k6-diffusers-flux-2-klein-4b-l4-20260417T120000Z.jsonl`.
+
+#### Supported Benchmarks
+
+The following benchmark scripts are included:
+
+- **`/app/k6-diffusers-flux-2-klein-4b.js`**: Benchmark the FLUX.2-klein-4B
+  image generation model.
+
+## Metrics Extraction
+
+The extraction script (`extract_metrics.py`) can be run manually after the
+benchmark finishes to generate a price/performance report.
+
+The extraction script calculates throughput (Images/sec) and latencies (p50,
+p95, p99) strictly from the `benchmark` scenario, and automatically fetches
+corresponding on-node telemetry (Peak VRAM, Avg GPU Utilization) from Google
+Cloud Monitoring if the dependencies are installed and it is running on Google
+Cloud.
+
+To ensure accurate hardware metrics when multiple deployments are running in the
+same project, the script can filter by pod, namespace, or node. If the `--pod`
+argument is omitted, the script automatically uses the `deployment_name`
+(extracted from the `TARGET_URL` hostname) as a prefix to filter for relevant
+pods.
+
+### Script Arguments
+
+- `--file`: Path to the k6 `.jsonl` output file (Required).
+- `--output-csv`: Path to the output CSV file where aggregated results are
+  stored (Optional, default: `k6-benchmark.csv`).
+- `--hourly-cost`: The hourly cost of the underlying GKE node in USD. If set to
+  `0.0`, a warning is emitted and cost metrics will be `0.0` (Optional, default:
+  `0.0`).
+- `--project-id`: Google Cloud Project ID to query DCGM metrics via Cloud
+  Monitoring. If omitted, the script dynamically fetches the project ID from the
+  Google Cloud Metadata server (Optional).
+- `--pod`: Filter metrics by a specific pod name. If omitted, the script
+  automatically uses the `deployment_name` (derived from the `TARGET_URL`
+  hostname) as a prefix filter to match all relevant pods in the deployment
+  (Optional).
+- `--namespace`: Filter metrics by a specific namespace (Optional).
+- `--node`: Filter metrics by a specific node name (Optional).
+- `--vram-metric`: The Prometheus metric string for VRAM usage (Default:
+  `prometheus.googleapis.com/DCGM_FI_DEV_FB_USED/gauge`).
+- `--util-metric`: The Prometheus metric string for GPU utilization (Default:
+  `prometheus.googleapis.com/DCGM_FI_DEV_GPU_UTIL/gauge`).
diff --git a/container-images/cpu/k6-benchmark/cloudbuild.yaml b/container-images/cpu/k6-benchmark/cloudbuild.yaml
@@ -0,0 +1,28 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+images:
+  - ${_DESTINATION}
+
+options:
+  logging: CLOUD_LOGGING_ONLY
+
+steps:
+  - args:
+      - build
+      - --tag=${_DESTINATION}
+      - .
+    id: "Build k6 benchmark image"
+    name: "docker.io/docker:28.3.3-dind-alpine3.22"
+    waitFor: ["-"]
diff --git a/container-images/cpu/k6-benchmark/entrypoint.sh b/container-images/cpu/k6-benchmark/entrypoint.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+set -o nounset
+
+# Default accelerator name
+ACCELERATOR="${ACCELERATOR_NAME:-accelerator-not-set}"
+
+# Find the script name from the arguments
+SCRIPT_PATH=""
+for arg in "$@"; do
+  case "$arg" in
+  *.js)
+    SCRIPT_PATH="$arg"
+    ;;
+  esac
+done
+
+if [ -n "${SCRIPT_PATH:-}" ]; then
+  SCRIPT_NAME=$(basename "$SCRIPT_PATH" .js)
+else
+  SCRIPT_NAME="unknown-script"
+fi
+
+TIMESTAMP=$(date -u +"%Y%m%dT%H%M%SZ")
+FILENAME="${SCRIPT_NAME}-${ACCELERATOR}-${TIMESTAMP}.jsonl"
+OUTPUT_FILE_PATH="/output/${FILENAME}"
+echo "Configured metrics output file: ${OUTPUT_FILE_PATH}"
+
+if [ "$*" = "--help" ]; then
+  k6 --help
+else
+  k6 run \
+    --out "json=${OUTPUT_FILE_PATH}" \
+    "$@"
+fi
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,6 +16,7 @@ nslookup @@
     pipefail
     pkill
     shuf
+    subshell
     syscall
     xtrace
     zxvf