Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
5ea1972
chore: update terraform in devcontainer
ferrarimarco Apr 8, 2026
3376bfa
chore: disable code assist hint
ferrarimarco Apr 8, 2026
1096f10
chore: fix markdown title issue
ferrarimarco Apr 8, 2026
bff5f8e
chore: fix markdown links
ferrarimarco Apr 8, 2026
70fd1a6
chore: add missing node pools to details
ferrarimarco Apr 13, 2026
6bd3409
chore: update devcontainer deps
ferrarimarco Apr 13, 2026
2da31bf
fix: fallback on $0 if BASH_SOURCE is not set
ferrarimarco Apr 13, 2026
9b9f446
fix: use tr for lowercasing model id
ferrarimarco Apr 16, 2026
57fce2d
chore: don't install deps on diffusers on each req
ferrarimarco Apr 16, 2026
eaffd60
feat: support running flux.2-klein-4b
ferrarimarco Apr 16, 2026
efec638
docs: fix diffusers guide
ferrarimarco Apr 16, 2026
fe89a3d
feat: initial flux.2 benchmark implementation
ferrarimarco Apr 16, 2026
c68c7a3
feat: flux.2 benchmark deployment on l4
ferrarimarco Apr 16, 2026
0ade430
fix: load flux.2 from gcs
ferrarimarco Apr 16, 2026
411a1e2
chore: extract request parameters
ferrarimarco Apr 17, 2026
9b1beb5
feat: k6 benchmark
ferrarimarco Apr 17, 2026
88f8ef9
feat: k6 image build
ferrarimarco Apr 17, 2026
7b15662
chore: shorten var name
ferrarimarco Apr 17, 2026
928c5de
chore: update diffusers docs
ferrarimarco Apr 17, 2026
348ee33
chore: remove warmup request
ferrarimarco Apr 17, 2026
0585729
fix: fallback ok $0 if BASH_SOURCE is not defined
ferrarimarco Apr 17, 2026
7cff93f
feat: dynamic k6 output path
ferrarimarco Apr 17, 2026
705da18
feat: deploy k6 on gke
ferrarimarco Apr 17, 2026
8e4c26a
feat: k6 benchmark for diffusers
ferrarimarco Apr 20, 2026
24807a7
feat: flux.2 on rtx 6000 pro
ferrarimarco Apr 21, 2026
afc8d17
chore: minor fixes
ferrarimarco Apr 21, 2026
72c7498
chore: minor fixes
ferrarimarco Apr 21, 2026
4ecfbd6
chore: sort python dict
ferrarimarco Apr 21, 2026
eb9fcef
fix: more precise latency quantiles
ferrarimarco Apr 22, 2026
2bf684f
docs: k6 on rtx
ferrarimarco Apr 22, 2026
6add39c
fix: async-pubsub-subscriber-path
ferrarimarco Apr 22, 2026
a56e4db
feat: add more metrics
ferrarimarco Apr 22, 2026
e75222b
fix: address linting issues
ferrarimarco Apr 22, 2026
61e1265
feat: run multiple vus
ferrarimarco Apr 22, 2026
52096fd
feat: run multiple scenarios
ferrarimarco Apr 23, 2026
9ff4d99
feat: custom resolution and multiple accelerators
ferrarimarco Apr 23, 2026
860a30c
fix: check for both failed and complete jobs
ferrarimarco Apr 23, 2026
4665e4d
chore: remove resolution env vars
ferrarimarco Apr 24, 2026
a589dfc
feat: more status info
ferrarimarco Apr 24, 2026
a22b430
chore: more echoes
ferrarimarco Apr 24, 2026
0f38ed1
feat: g4 deployment
ferrarimarco Apr 27, 2026
9be5b34
chore: fix linting issues and cccs
ferrarimarco May 4, 2026
6429a8d
chore: add missing header and fix lint issue
ferrarimarco May 4, 2026
6163da1
fix: count scenarios in extract script
ferrarimarco May 5, 2026
436f01b
fix: linting issues
ferrarimarco May 5, 2026
cccdcb9
chore: update supported configs
ferrarimarco May 7, 2026
8c8ee5d
fix: init vgpu separately
ferrarimarco May 19, 2026
0b941b6
fix: fix 1/8 deployment
ferrarimarco May 19, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .dev-tools/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ platforms/gke/base/core/workloads/inference_gateway/manifests/*
platforms/gke/base/core/workloads/jobset/manifests/*
platforms/gke/base/core/workloads/kueue/manifests/*
platforms/gke/base/core/workloads/lws/manifests/*
platforms/gke/base/core/workloads/nri_device_injector/manifests/*
platforms/gke/base/core/workloads/nvidia_nim/*
platforms/gke/base/core/workloads/priority_class/manifests/*
platforms/gke/base/kubernetes/*
Expand Down
6 changes: 3 additions & 3 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM hashicorp/terraform:1.5.7 AS terraform
FROM koalaman/shellcheck:v0.10.0 AS shellcheck
FROM mvdan/shfmt:v3.10.0 AS shfmt
FROM hashicorp/terraform:1.14.8 AS terraform
FROM koalaman/shellcheck:v0.11.0 AS shellcheck
FROM mvdan/shfmt:v3.13.1 AS shfmt

FROM python:3.13-bookworm AS python-builder

Expand Down
5 changes: 4 additions & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
"name": "Cloud Solutions devcontainer",
"name": "Accelerated Platforms devcontainer",
"build": {
"dockerfile": "Dockerfile"
},
Expand All @@ -13,7 +13,9 @@
"editor.wordWrap": "off",
"files.insertFinalNewline": true,
"files.trimFinalNewlines": true,
"geminicodeassist.displayInlineContextHint": false,
"prettier.resolveGlobalModules": true,
"python.defaultInterpreterPath": "/venv/bin/python",
"redhat.telemetry.enabled": false,
"telemetry.telemetryLevel": "off",
"[css]": {
Expand Down Expand Up @@ -78,6 +80,7 @@
"ms-azuretools.vscode-containers",
"ms-python.black-formatter",
"ms-python.isort",
"ms-python.python",
"streetsidesoftware.code-spell-checker",
"timonwong.shellcheck"
]
Expand Down
17 changes: 17 additions & 0 deletions .github/workflows/dictionary/python.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,16 @@ aiohttp
aqtp
asctime
asgi
asynccontextmanager
asyncio
certifi
cffi
classmethod
configparser
contextlib
coveragerc
dataclass
dataclasses
dataframe
dbapi
dbcommands
Expand All @@ -17,6 +23,7 @@ fastapi
fillna
fromarray
frombuffer
fromisoformat
fsspec
ftfy
functools
Expand All @@ -29,11 +36,13 @@ getframerate
getnchannels
getnframes
getsampwidth
grpcio
gunicorn
hasattr
hashlib
hexdigest
httpx
idna
iloc
imgf
inplace
Expand All @@ -59,7 +68,10 @@ pgvector
pipreqs
pmap
prng
protos
pyasn
pycache
pycparser
pydantic
pyenv
pylint
Expand All @@ -69,8 +81,10 @@ pythondontwritebytecode
pythonpath
pythonunbuffered
qualname
quantiles
readframes
removesuffix
reqs
rerank
reranked
retryable
Expand All @@ -83,13 +97,16 @@ shutil
spacy
splitlines
sqlalchemy
strftime
tensorboard
tensorboardx
thejsonlogger
tqdm
unittests
urllib
urlopen
urlretrieve
uvicorn
venv
writerow
writestr
4 changes: 4 additions & 0 deletions .github/workflows/dictionary/sglang.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
lmsysorg
musa
nvls
sglang
1 change: 1 addition & 0 deletions .github/workflows/dictionary/shell.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ nslookup
pipefail
pkill
shuf
subshell
syscall
xtrace
zxvf
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,10 @@ terraform.tfstate*
# Test
test/log/*.log
test/scripts/environment_files/*

# Generated outputs
*.log
k6-*.txt
k6-*.csv
k6-*.jsonl
k6-report.md
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ the primary runtime.
- [LLM Inference Optimization: Achieving faster Pod Startup with Google Cloud Storage](/use-cases/inferencing/cost-optimization/gcsfuse/AchievingFasterPodStartup.md)
- [Optimizing GKE Workloads with Custom Compute Classes](/docs/guides/optimizing-gke-workloads-with-custom-compute-classes/README.md)

### [Deprecated] Playground AI/ML Platform on GKE
### \[Deprecated\] Playground AI/ML Platform on GKE

The [Playground AI/ML Platform on GKE](/platforms/gke-aiml/playground/README.md)
is a quick-start implementation of the platform that can be used to familiarize
Expand Down
31 changes: 31 additions & 0 deletions container-images/cpu/k6-benchmark/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM grafana/k6:1.7.1

USER root

WORKDIR /app
# Create the /output directory and ensure k6 owns it, along with /app
RUN mkdir -p /output && chown -R k6:k6 /app /output

COPY --chown=k6:k6 scripts /app/scripts
COPY --chmod=a+x --chown=k6:k6 entrypoint.sh /app/entrypoint.sh

# Switch back to the unprivileged k6 user
USER k6

ENTRYPOINT ["/app/entrypoint.sh"]

CMD ["--help"]
99 changes: 99 additions & 0 deletions container-images/cpu/k6-benchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# k6 Benchmark Image

This container image packages [k6](https://k6.io/) load testing tool with
specific scripts to benchmark Machine Learning inference workloads.

It is designed to run in environments like Google Kubernetes Engine (GKE) to
generate consistent, reproducible load against target endpoints and output
granular metrics to a JSONL file for further analysis. It also includes a Python
script (`extract_metrics.py`) that can be run manually to process the k6 output
and generate a price/performance report.

## Usage

You can run this container image via Docker or deploy it as a Job in a
Kubernetes cluster.

### Environment Variables

The container accepts the following optional environment variables for metric
output naming and processing:

- `ACCELERATOR_NAME`: A string representing the target hardware (e.g., `l4`,
`a100`, `v5p`). If not provided, it defaults to `accelerator-not-set`.
- `NODE_HOURLY_COST`: The hourly cost of the underlying node in USD. Used by the
automatic metric extraction script to compute cost per 1k images. Defaults to
`0.0`.

The default benchmark script (`k6-diffusers-flux-2-klein-4b.js`) expects the
following environment variables:

- `TARGET_URL`: The full URL of the inference endpoint to test (e.g.,
`http://model-service:8000/generate`).
- `BATCH_SIZE`: The batch size to request in the payload (default: `1`).
- `VUS`: The number of concurrent Virtual Users to simulate (default: `1`).

### Running via Docker

Set the k6 script to run by setting the `CMD` to point to the script path when
starting the container:

```bash
# Example: running a different script mounted into the container
docker run --rm \
-e ACCELERATOR_NAME="custom" \
-v $(pwd)/custom-script.js:/app/custom-script.js \
-v $(pwd)/output:/output \
k6-benchmark:latest /app/your-k6-script.js
```

The k6 output will be saved in the mapped `/output` directory on your host. The
filename will be dynamically generated in the format:
`<name-of-k6-script>-<ACCELERATOR_NAME>-<experiment-start-timestamp>.jsonl`. For
For example: `k6-diffusers-flux-2-klein-4b-l4-20260417T120000Z.jsonl`.

#### Supported Benchmarks

The following benchmark scripts are included:

- **`/app/k6-diffusers-flux-2-klein-4b.js`**: Benchmark the FLUX.2-klein-4B
image generation model.

## Metrics Extraction

The extraction script (`extract_metrics.py`) can be run manually after the
benchmark finishes to generate a price/performance report.

The extraction script calculates throughput (Images/sec) and latencies (p50,
p95, p99) strictly from the `benchmark` scenario, and automatically fetches
corresponding on-node telemetry (Peak VRAM, Avg GPU Utilization) from Google
Cloud Monitoring if the dependencies are installed and it is running on Google
Cloud.

To ensure accurate hardware metrics when multiple deployments are running in the
same project, the script can filter by pod, namespace, or node. If the `--pod`
argument is omitted, the script automatically uses the `deployment_name`
(extracted from the `TARGET_URL` hostname) as a prefix to filter for relevant
pods.

### Script Arguments

- `--file`: Path to the k6 `.jsonl` output file (Required).
- `--output-csv`: Path to the output CSV file where aggregated results are
stored (Optional, default: `k6-benchmark.csv`).
- `--hourly-cost`: The hourly cost of the underlying GKE node in USD. If set to
`0.0`, a warning is emitted and cost metrics will be `0.0` (Optional, default:
`0.0`).
- `--project-id`: Google Cloud Project ID to query DCGM metrics via Cloud
Monitoring. If omitted, the script dynamically fetches the project ID from the
Google Cloud Metadata server (Optional).
- `--pod`: Filter metrics by a specific pod name. If omitted, the script
automatically uses the `deployment_name` (derived from the `TARGET_URL`
hostname) as a prefix filter to match all relevant pods in the deployment
(Optional).
- `--namespace`: Filter metrics by a specific namespace (Optional).
- `--node`: Filter metrics by a specific node name (Optional).
- `--vram-metric`: The Prometheus metric string for VRAM usage (Default:
`prometheus.googleapis.com/DCGM_FI_DEV_FB_USED/gauge`).
- `--util-metric`: The Prometheus metric string for GPU utilization (Default:
`prometheus.googleapis.com/DCGM_FI_DEV_GPU_UTIL/gauge`).
28 changes: 28 additions & 0 deletions container-images/cpu/k6-benchmark/cloudbuild.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

images:
- ${_DESTINATION}

options:
logging: CLOUD_LOGGING_ONLY

steps:
- args:
- build
- --tag=${_DESTINATION}
- .
id: "Build k6 benchmark image"
name: "docker.io/docker:28.3.3-dind-alpine3.22"
waitFor: ["-"]
49 changes: 49 additions & 0 deletions container-images/cpu/k6-benchmark/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/sh
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -o errexit
set -o nounset

# Default accelerator name
ACCELERATOR="${ACCELERATOR_NAME:-accelerator-not-set}"

# Find the script name from the arguments
SCRIPT_PATH=""
for arg in "$@"; do
case "$arg" in
*.js)
SCRIPT_PATH="$arg"
;;
esac
done

if [ -n "${SCRIPT_PATH:-}" ]; then
SCRIPT_NAME=$(basename "$SCRIPT_PATH" .js)
else
SCRIPT_NAME="unknown-script"
fi

TIMESTAMP=$(date -u +"%Y%m%dT%H%M%SZ")
FILENAME="${SCRIPT_NAME}-${ACCELERATOR}-${TIMESTAMP}.jsonl"
OUTPUT_FILE_PATH="/output/${FILENAME}"
echo "Configured metrics output file: ${OUTPUT_FILE_PATH}"

if [ "$*" = "--help" ]; then
k6 --help
else
k6 run \
--out "json=${OUTPUT_FILE_PATH}" \
"$@"
fi
Loading
Loading