panwudi · panwudi · May 27, 2026 · May 27, 2026 · May 27, 2026 · May 27, 2026
diff --git a/omlx/cache/__init__.py b/omlx/cache/__init__.py
@@ -52,7 +52,6 @@
 )
 
 # Managers
-from .tiered_manager import TieredCacheManager
 from .recovery import CacheRecoveryManager
 
 # Factory
@@ -109,7 +108,6 @@
     "VisionFeatureSSDCache",
     "VisionFeatureSSDEntry",
     # Managers
-    "TieredCacheManager",
     "CacheRecoveryManager",
     # Factory
     "CacheConfig",

diff --git a/omlx/cache/boundary_snapshot_store.py b/omlx/cache/boundary_snapshot_store.py
diff --git a/omlx/cache/paged_ssd_cache.py b/omlx/cache/paged_ssd_cache.py
@@ -1280,14 +1280,19 @@ def _store_nstate_elements(prefix: str, elements):
             metadata.update(cache_list_meta)
 
             # Caller (scheduler._cleanup_finished, async store-cache path)
-            # already mx.eval's all real KV arrays on the inference thread
-            # before submitting to the omlx-store-cache executor. The tiny
+            # dispatches real KV arrays via mx.async_eval on the inference
+            # thread's generation_stream before submitting to the
+            # omlx-store-cache executor. The worker then waits on that same
+            # stream via mx.synchronize(generation_stream) (see
+            # _async_store_cache_worker) before reaching this code path,
+            # so the arrays are fully materialized by the time
+            # _extract_tensor_bytes hits the buffer protocol. The tiny
             # mx.zeros((1,)) placeholders allocated above are lazy nodes
             # whose buffer materialization happens implicitly via the buffer
-            # protocol. Skipping the explicit mx.eval here keeps save_block
+            # protocol. Skipping any explicit mx.eval here keeps save_block
             # off the Metal command-submission path when invoked from a
             # non-inference thread, which is the source of the cross-thread
-            # race tracked in #978/#1040.
+            # race tracked in #978/#1040/#1106/#1437.
             tensors_raw = {}
             for name, arr in arrays.items():
                 tensors_raw[name] = _extract_tensor_bytes(arr)

diff --git a/omlx/cache/tiered_manager.py b/omlx/cache/tiered_manager.py