GoogleCloudPlatform · laurentgrangeau · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026
diff --git a/README.md b/README.md
@@ -68,6 +68,8 @@ the primary runtime.
     - [Data preparation](/docs/platforms/gke/base/use-cases/training-ref-arch/model-fine-tuning/data-preparation.md)
     - [Fine tuning](/docs/platforms/gke/base/use-cases/training-ref-arch/model-fine-tuning/fine-tuning.md)
     - [Model evaluation](/docs/platforms/gke/base/use-cases/training-ref-arch/model-fine-tuning/model-evaluation.md)
+- [Reinforcement Learning reference architecture](/docs/platforms/gke/base/use-cases/reinforcement-larning/README.md)
+  - [RL on TPU](/docs/platforms/gke/base/use-cases/reinforcement-larning/rl-on-tpu/README.md)
 
 - [Reinforcement Learning reference architecture](/docs/platforms/gke/base/use-cases/reinforcement-learning/README.md)
   - [RL on TPU](/docs/platforms/gke/base/use-cases/reinforcement-learning/single-host-tpu-grpo/README.md)

diff --git a/container-images/cpu/reinforcement-learning-dataset-downloader/Dockerfile b/container-images/cpu/reinforcement-learning-dataset-downloader/Dockerfile
@@ -0,0 +1,55 @@
+# syntax=docker.io/docker/dockerfile:1.17.1
+
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# --- STAGE 1: Build Stage ---
+# Use a Python image that includes tools for installing dependencies
+FROM python:3.14.0-slim-trixie as builder
+
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+ENV APP_HOME /usr/src/app
+
+# Create and set the working directory
+WORKDIR $APP_HOME
+
+# Copy only the requirements file first to leverage Docker cache
+COPY --from=primary requirements.txt .
+
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# --- STAGE 2: Final Runtime Stage ---
+# Use a minimal runtime image for security and size
+FROM python:3.14.0-slim-trixie
+
+# Set environment variables for the runtime
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+ENV APP_HOME /usr/src/app
+
+# Create and set the working directory
+WORKDIR $APP_HOME
+
+# Copy installed dependencies from the builder stage
+COPY --from=builder /usr/local/lib/python3.14/site-packages /usr/local/lib/python3.14/site-packages
+COPY --from=builder /usr/local/bin /usr/local/bin
+
+# Copy the application script itself
+COPY --from=primary app.py .
+
+# Command to run the application when the container starts
+CMD ["python", "app.py"]
diff --git a/container-images/cpu/reinforcement-learning-dataset-downloader/cloudbuild.yaml b/container-images/cpu/reinforcement-learning-dataset-downloader/cloudbuild.yaml
@@ -0,0 +1,31 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+images:
+  - ${_DESTINATION}
+
+options:
+  logging: CLOUD_LOGGING_ONLY
+  machineType: E2_HIGHCPU_8
+
+steps:
+  - args:
+      - build
+      - --build-context=primary=container-images/cpu/reinforcement-learning-dataset-downloader/src
+      - --file=container-images/cpu/reinforcement-learning-dataset-downloader/Dockerfile
+      - --tag=${_DESTINATION}
+      - .
+    id: "Build Reinforcement Learning Dataset Downloader image"
+    name: "docker.io/docker:28.3.3-dind-alpine3.22"
+    waitFor: ["-"]
diff --git a/container-images/cpu/reinforcement-learning-dataset-downloader/src/app.py b/container-images/cpu/reinforcement-learning-dataset-downloader/src/app.py
@@ -0,0 +1,107 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import logging
+import logging.config
+import os
+
+from datasets import load_dataset
+from google.cloud import storage
+
+# --- LOGGING CONFIGURATION ---
+logging.config.fileConfig("logging.conf", disable_existing_loggers=True)
+LOG = logging.getLogger(__name__)
+
+# --- Configuration ---
+DATASET_BUCKET_NAME = os.getenv("DATASET_BUCKET_NAME")
+GCS_PREFIX = "gsm8k"
+OUTPUT_FILENAME = "gsm8k_full.json"
+
+
+def validate_config() -> None:
+    """Validates that required environment variables are set.
+
+    Raises:
+        ValueError: If the DATASET_BUCKET_NAME environment variable is missing or empty.
+    """
+    if not DATASET_BUCKET_NAME:
+        LOG.error("❌ Error: Environment variable 'DATASET_BUCKET_NAME' is not set.")
+        raise ValueError("DATASET_BUCKET_NAME environment variable is required.")
+
+
+def prepare_and_upload_dataset() -> None:
+    """Downloads the GSM8K dataset from Hugging Face and uploads it to Google Cloud Storage.
+
+    This function initializes a GCS client, attempts to fetch the GSM8K dataset
+    from the Hugging Face hub, converts the records into a single JSON string,
+    and uploads the resulting file to the configured GCS bucket.
+
+    Raises:
+        ValueError: If the specified GCS bucket does not exist or is inaccessible.
+        Exception: If an error occurs during GCS client initialization, dataset
+            download, or the final upload process.
+    """
+    validate_config()
+
+    # 1. Initialize GCS Client
+    try:
+        storage_client = storage.Client()
+        bucket = storage_client.bucket(DATASET_BUCKET_NAME)
+        if not bucket.exists():
+            LOG.error(f"❌ Error: Bucket '{DATASET_BUCKET_NAME}' is not accessible.")
+            raise ValueError(f"Bucket '{DATASET_BUCKET_NAME}' is not accessible.")
+    except Exception as e:
+        LOG.error(f"❌ Error connecting to GCS: {e}")
+        raise e
+
+    # 2. Load Dataset (GSM8K from Hugging Face)
+    LOG.info("⬇️  Downloading dataset from Hugging Face...")
+    try:
+        # Loading the full 'main' split
+        dataset = load_dataset("openai/gsm8k", "main", split="train")
+    except Exception as e:
+        LOG.info("Attempting alternative split loading...")
+        dataset = load_dataset("openai/gsm8k", split="train")
+
+    total_records = len(dataset)
+    LOG.info(f"✅ Dataset loaded. Total records: {total_records}")
+
+    # 3. Convert to List and Upload
+    LOG.info(
+        f"🚀 Uploading to gs://{DATASET_BUCKET_NAME}/{GCS_PREFIX}/{OUTPUT_FILENAME} ..."
+    )
+
+    try:
+        # Convert the entire dataset to a list of dicts
+        dataset_list = list(dataset)
+
+        # Serialize to JSON
+        json_data = json.dumps(dataset_list, indent=2)
+
+        # Define GCS path
+        blob_name = f"{GCS_PREFIX}/{OUTPUT_FILENAME}"
+        blob = bucket.blob(blob_name)
+
+        # Upload string directly to GCS
+        blob.upload_from_string(data=json_data, content_type="application/json")
+        LOG.info(f"✨ Successfully uploaded {total_records} records to {blob_name}")
+
+    except Exception as e:
+        LOG.error(f"❌ Failed to process or upload dataset: {e}")
+        raise e
+
+
+if __name__ == "__main__":
+    prepare_and_upload_dataset()
diff --git a/container-images/cpu/reinforcement-learning-dataset-downloader/src/logging.conf b/container-images/cpu/reinforcement-learning-dataset-downloader/src/logging.conf
@@ -0,0 +1,35 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[loggers]
+keys=root
+
+[handlers]
+keys=defaultHandler
+
+[formatters]
+keys=standardFormatter
+
+[logger_root]
+level=INFO
+handlers=defaultHandler
+
+[handler_defaultHandler]
+class=StreamHandler
+level=INFO
+formatter=standardFormatter
+args=(sys.stdout,)
+
+[formatter_standardFormatter]
+format=%(asctime)s [%(levelname)s] %(name)s: %(message)s
diff --git a/container-images/cpu/reinforcement-learning-dataset-downloader/src/requirements.txt b/container-images/cpu/reinforcement-learning-dataset-downloader/src/requirements.txt
@@ -0,0 +1,2 @@
+datasets==4.5.0
+google-cloud-storage==3.8.0
diff --git a/container-images/cpu/reinforcement-learning-model-converter/Dockerfile b/container-images/cpu/reinforcement-learning-model-converter/Dockerfile
@@ -0,0 +1,74 @@
+# syntax=docker.io/docker/dockerfile:1.17.1
+
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ==========================================
+# Stage 1: Builder
+# ==========================================
+FROM python:3.12.13-slim-trixie AS builder
+
+# Install build dependencies and git
+RUN apt-get update && apt-get install -y \
+    git \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /workspace
+
+# Clone the MaxText repository and immediately remove the hidden .git folder
+RUN git clone https://github.com/AI-Hypercomputer/maxtext.git . \
+    && rm -rf .git
+
+# Create a virtual environment to isolate dependencies
+RUN python -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+
+# Install 'uv' (the fast Python package installer)
+RUN pip install --no-cache-dir uv
+
+# Install MaxText with the [tpu] dependency group 
+# (This includes absl-py, jax, etc., which are needed for conversion)
+RUN uv pip install .[tpu] --resolution=lowest
+
+# MaxText uses a custom CLI command to install unreleased dependencies from GitHub
+RUN install_maxtext_tpu_github_deps
+
+# Explicitly install CPU-only PyTorch (Required to read HF Checkpoints)
+RUN uv pip install torch --index-url https://download.pytorch.org/whl/cpu
+
+# ==========================================
+# Stage 2: Final Runtime
+# ==========================================
+FROM python:3.12.13-slim-trixie
+
+WORKDIR /workspace
+
+# Copy the pre-built virtual environment from the builder stage
+COPY --from=builder /opt/venv /opt/venv
+
+# Copy the necessary source code from the builder stage
+COPY --from=builder /workspace /workspace
+
+# Activate the virtual environment by default
+ENV PATH="/opt/venv/bin:$PATH"
+
+# Set PYTHONPATH so Python can locate the maxtext modules
+ENV PYTHONPATH="/workspace/src:${PYTHONPATH}"
+
+# Set the entrypoint to the checkpoint conversion script
+ENTRYPOINT ["python", "src/maxtext/checkpoint_conversion/to_maxtext.py"]
+
+# Fallback command to display help flags
+CMD ["--help"]
diff --git a/container-images/cpu/reinforcement-learning-model-converter/cloudbuild.yaml b/container-images/cpu/reinforcement-learning-model-converter/cloudbuild.yaml
@@ -0,0 +1,30 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+images:
+  - ${_DESTINATION}
+
+options:
+  logging: CLOUD_LOGGING_ONLY
+  machineType: E2_HIGHCPU_8
+
+steps:
+  - args:
+      - build
+      - --file=container-images/cpu/reinforcement-learning-model-converter/Dockerfile
+      - --tag=${_DESTINATION}
+      - .
+    id: "Build Reinforcement Learning Model Converter image"
+    name: "docker.io/docker:28.3.3-dind-alpine3.22"
+    waitFor: ["-"]