ROCm · mawad-amd · Sep 10, 2025 · Sep 4, 2025 · Sep 4, 2025 · Sep 4, 2025
@@ -14,15 +14,16 @@ on:
       - 'docs/**'
       - 'iris/**'
       - 'examples/**'
+      - '.github/workflows/docs.yml'
 
 permissions:
   contents: read
   pages: write
   id-token: write
 
 concurrency:
-  group: "pages"
-  cancel-in-progress: true
+  group: "pages-${{ github.ref }}"
+  cancel-in-progress: false
 
 jobs:
   build:
@@ -65,7 +66,7 @@ jobs:
       url: ${{ steps.deployment.outputs.page_url }}
     runs-on: ubuntu-latest
     needs: build
-    if: github.ref == 'refs/heads/main'
+    if: github.ref == 'refs/heads/main' && github.event_name == 'push'
     steps:
       - name: Deploy to GitHub Pages
         id: deployment

@@ -24,13 +24,20 @@ Prefer using the convenience factory over calling the constructor directly:
 Use Iris-aware logging that automatically annotates each message with the current rank and world size. This is helpful when debugging multi-rank programs.
 
 ```{eval-rst}
+.. autofunction:: iris.logging.set_logger_level
 .. automethod:: iris.iris.Iris.debug
 .. automethod:: iris.iris.Iris.info
 .. automethod:: iris.iris.Iris.warning
 .. automethod:: iris.iris.Iris.error
 ```
 
 
+## Utility Functions
+
+```{eval-rst}
+.. autofunction:: iris.util.do_bench
+```
+
 ## Broadcast Helper
 
 Broadcast a Python scalar or small object from a source rank to all ranks. This is a convenience wrapper over the internal Torch Distributed helper.

@@ -167,7 +167,7 @@ def _worker(local_rank: int, world_size: int, init_url: str, args: dict):
 
     def preamble():
         shmem.barrier()
-        iris.memset_tensor(tile_completed, 0)
+        tile_completed.zero_()
         shmem.barrier()
 
     def run_experiment():

@@ -163,7 +163,7 @@ def _worker(local_rank: int, world_size: int, init_url: str, args: dict):
 
     def preamble():
         shmem.barrier()
-        iris.memset_tensor(tile_completed, 0)
+        tile_completed.zero_()
         shmem.barrier()
 
     def run_experiment():

@@ -12,7 +12,7 @@
 - Iris: Main class for multi-GPU operations
 - Atomic operations: add, sub, cas, xchg, xor, and, or, min, max
 - Memory operations: load, store, get, put
-- Utility functions: do_bench, memset_tensor
+- Utility functions: do_bench
 - HIP integration for AMD GPU support
 - Logging utilities with rank information
 
@@ -46,7 +46,6 @@
 
 from .util import (
     do_bench,
-    memset_tensor,
 )
 
 from . import hip
@@ -98,7 +97,6 @@
     "atomic_min",
     "atomic_max",
     "do_bench",
-    "memset_tensor",
     "hip",
     "set_logger_level",
     "logger",