diff --git a/examples/camera_viz/camera_streamer.py b/examples/camera_viz/camera_streamer.py
index 08ce48085..e3fe05104 100755
--- a/examples/camera_viz/camera_streamer.py
+++ b/examples/camera_viz/camera_streamer.py
@@ -30,7 +30,7 @@
 import yaml
 
 from pipeline import FrameSource
-from sources import build_local_camera
+from sources import PairedFrameSource, build_local_camera
 from transports import RtpH264Sender, make_encoder
 
 logger = logging.getLogger("camera_streamer")
@@ -45,16 +45,22 @@
 SUPERVISOR_TICK_S = 1.0
 
 
-def _pick_mono_source(sources: List[FrameSource], camera_name: str) -> FrameSource:
-    """Mono-only sender: exactly one source per camera. Stereo cameras
-    aren't supported here pending per-eye QuadLayer binding."""
+def _eye_sources(sources: List[FrameSource], camera_name: str) -> List[FrameSource]:
+    """Normalize ``build_local_camera`` output into a 1-or-2 element list.
+
+    Mono cameras → [src]. Stereo cameras → [left, right] unwrapped from
+    the PairedFrameSource wrapper that ``build_local_camera`` returns.
+    The streamer then fires one independent RTP stream per element."""
+    if len(sources) == 1 and isinstance(sources[0], PairedFrameSource):
+        paired = sources[0]
+        return [paired.left, paired.right]
     if len(sources) != 1:
         names = [s.spec.name for s in sources]
         raise ValueError(
             f"camera {camera_name!r} produced {len(sources)} streams {names}; "
-            "only mono cameras are supported here. Set mode/stereo to mono/false."
+            "expected 1 (mono) or a PairedFrameSource (stereo)."
         )
-    return sources[0]
+    return [sources[0]]
 
 
 class CameraSupervisor:
@@ -96,66 +102,91 @@ def stop(self) -> None:
                 return
             self._thread = None
 
-    def _build_sender(self) -> RtpH264Sender:
-        source = _pick_mono_source(build_local_camera(self._cfg), self._name)
+    def _build_senders(self) -> List[RtpH264Sender]:
+        eyes = _eye_sources(build_local_camera(self._cfg), self._name)
         rtp = self._cfg.get("rtp", {})
         if "port" not in rtp:
             raise ValueError(f"camera {self._name!r} missing rtp.port")
-        encoder = make_encoder(
-            rtp.get("encoder", self._default_encoder),
-            width=int(self._cfg["width"]),
-            height=int(self._cfg["height"]),
-            bitrate=int(rtp.get("bitrate_mbps", 15)) * 1_000_000,
-            fps=int(self._cfg.get("fps", 30)),
-            gop=int(rtp["gop"]) if "gop" in rtp else None,
-            gpu_id=int(rtp.get("gpu_id", 0)),
-        )
-        return RtpH264Sender(
-            source=source,
-            encoder=encoder,
-            host=self._host,
-            port=int(rtp["port"]),
-            width=int(self._cfg["width"]),
-            height=int(self._cfg["height"]),
-            fps=int(self._cfg.get("fps", 30)),
-            mtu=int(rtp.get("mtu", 1400)),
-        )
+        is_stereo = len(eyes) == 2
+        if is_stereo and "port_right" not in rtp:
+            raise ValueError(
+                f"camera {self._name!r}: stereo requires rtp.port_right (the "
+                "left eye goes to rtp.port, the right eye to rtp.port_right)"
+            )
+
+        def build_one(source: FrameSource, port: int) -> RtpH264Sender:
+            encoder = make_encoder(
+                rtp.get("encoder", self._default_encoder),
+                width=int(self._cfg["width"]),
+                height=int(self._cfg["height"]),
+                bitrate=int(rtp.get("bitrate_mbps", 15)) * 1_000_000,
+                fps=int(self._cfg.get("fps", 30)),
+                gop=int(rtp["gop"]) if "gop" in rtp else None,
+                gpu_id=int(rtp.get("gpu_id", 0)),
+            )
+            return RtpH264Sender(
+                source=source,
+                encoder=encoder,
+                host=self._host,
+                port=port,
+                width=int(self._cfg["width"]),
+                height=int(self._cfg["height"]),
+                fps=int(self._cfg.get("fps", 30)),
+                mtu=int(rtp.get("mtu", 1400)),
+            )
+
+        if is_stereo:
+            return [
+                build_one(eyes[0], int(rtp["port"])),
+                build_one(eyes[1], int(rtp["port_right"])),
+            ]
+        return [build_one(eyes[0], int(rtp["port"]))]
 
     def _run(self) -> None:
         attempt = 0
         while not self._stop.is_set():
             attempt += 1
-            sender: Optional[RtpH264Sender] = None
+            senders: List[RtpH264Sender] = []
             started_at: Optional[float] = None
             try:
                 logger.info("camera %r: building (attempt %d)", self._name, attempt)
-                sender = self._build_sender()
-                sender.start()
+                senders = self._build_senders()
+                for s in senders:
+                    s.start()
                 started_at = time.monotonic()
-                logger.info(
-                    "camera %r: streaming → %s:%s",
-                    self._name,
-                    self._host,
-                    self._cfg["rtp"]["port"],
-                )
-                # Poll sender liveness. If the send-loop thread dies
+                rtp = self._cfg.get("rtp", {})
+                if len(senders) == 2:
+                    logger.info(
+                        "camera %r: streaming stereo → %s:%s (L) + %s:%s (R)",
+                        self._name,
+                        self._host,
+                        rtp.get("port"),
+                        self._host,
+                        rtp.get("port_right"),
+                    )
+                else:
+                    logger.info(
+                        "camera %r: streaming → %s:%s",
+                        self._name,
+                        self._host,
+                        rtp.get("port"),
+                    )
+                # Poll sender liveness. If any send-loop thread dies
                 # after startup (GStreamer pipeline error, encoder
-                # crash, etc.) raise into the retry path; otherwise a
-                # silent-but-dead supervisor would keep the service
-                # "healthy" while nothing is being streamed.
+                # crash, etc.) raise into the retry path — for stereo
+                # we treat the pair atomically: if one eye drops, we
+                # restart both.
                 while not self._stop.is_set():
                     self._stop.wait(timeout=SUPERVISOR_TICK_S)
-                    if not sender.is_alive():
-                        raise RuntimeError("RtpH264Sender thread exited unexpectedly")
+                    dead = [s for s in senders if not s.is_alive()]
+                    if dead:
+                        raise RuntimeError(
+                            f"{len(dead)}/{len(senders)} RtpH264Sender thread(s) exited unexpectedly"
+                        )
             except KeyboardInterrupt:
-                # SIGINT during ``sender.start()`` arrives as KeyboardInterrupt
-                # in this thread; surface as a stop, not a retry.
                 self._stop.set()
                 break
             except Exception as e:
-                # ``camera_streamer`` is supposed to never exit. Log full
-                # traceback at debug and a one-liner at warning so journalctl
-                # stays readable while preserving the detail for triage.
                 uptime = (time.monotonic() - started_at) if started_at else 0.0
                 logger.warning(
                     "camera %r: failure after %.1fs uptime: %s — retrying in %.1fs",
@@ -166,9 +197,9 @@ def _run(self) -> None:
                 )
                 logger.debug("camera %r: traceback", self._name, exc_info=True)
             finally:
-                if sender is not None:
+                for s in senders:
                     try:
-                        sender.stop()
+                        s.stop()
                     except Exception:
                         logger.debug(
                             "camera %r: sender.stop() raised", self._name, exc_info=True
diff --git a/examples/camera_viz/camera_viz.py b/examples/camera_viz/camera_viz.py
index d4eb6a94c..368125991 100755
--- a/examples/camera_viz/camera_viz.py
+++ b/examples/camera_viz/camera_viz.py
@@ -20,6 +20,7 @@
 import argparse
 import signal
 import sys
+from dataclasses import dataclass
 from pathlib import Path
 from typing import List, Optional, Tuple
 
@@ -29,9 +30,23 @@
 
 from pipeline import FrameSource, VizRunner
 from placements import PlacementConfig, PlacementStrategy, build as build_placement
-from sources import RtpH264Source, build_local_camera
+from sources import PairedFrameSource, RtpH264Source, build_local_camera
 
-SourceEntry = Tuple[FrameSource, Optional[PlacementStrategy]]
+
+@dataclass
+class SourceEntry:
+    """One row in the layer plan: a source + its placement + stereo cfg.
+
+    ``stereo`` and ``stereo_baseline_mm`` are pulled from the camera spec
+    (``cameras.<cam>.stereo``) and the placement spec
+    (``placements.<cam>.stereo_baseline_mm``) respectively. They drive
+    the QuadLayer Config when the layer is added to the session.
+    """
+
+    source: FrameSource
+    placement: Optional[PlacementStrategy]
+    stereo: bool = False
+    stereo_baseline_mm: float = 0.0
 
 
 def _build_placement(spec: Optional[dict], is_xr: bool) -> Optional[PlacementStrategy]:
@@ -82,19 +97,48 @@ def _placement_with_aspect(
     return _build_placement(spec, is_xr)
 
 
+def _stereo_for(cam: dict, placements_cfg: dict) -> Tuple[bool, float]:
+    """Resolve stereo + baseline for one camera.
+
+    ``stereo`` lives on the camera (so the producer side knows). The
+    rendering knob ``stereo_baseline_mm`` lives on the placement (it's
+    a display-time parameter). 0.0 means both eyes see the same world
+    quad — all parallax comes from the captured frames.
+    """
+    stereo = bool(cam.get("stereo", False))
+    pspec = placements_cfg.get(cam["name"]) or {}
+    baseline_mm = float(pspec.get("stereo_baseline_mm", 0.0))
+    return stereo, baseline_mm
+
+
 def _build_local_entries(cfg: dict, is_xr: bool) -> List[SourceEntry]:
     """source=local: open each enabled camera directly."""
     placements_cfg = cfg.get("display", {}).get("placements", {})
     entries: List[SourceEntry] = []
     for cam in _enabled_cameras(cfg):
         placement = _placement_with_aspect(placements_cfg.get(cam["name"]), cam, is_xr)
+        stereo, baseline_mm = _stereo_for(cam, placements_cfg)
         for source in build_local_camera(cam):
-            entries.append((source, placement))
+            entries.append(
+                SourceEntry(
+                    source=source,
+                    placement=placement,
+                    stereo=stereo,
+                    stereo_baseline_mm=baseline_mm,
+                )
+            )
     return entries
 
 
 def _build_rtp_entries(cfg: dict, is_xr: bool) -> List[SourceEntry]:
-    """source=rtp: build an RTP listener per camera using its ``rtp.port``."""
+    """source=rtp: build an RTP listener per camera using its ``rtp.port``.
+
+    Stereo cameras open TWO listeners (rtp.port for left, rtp.port_right
+    for right) and pair them via PairedFrameSource. The wire path treats
+    the two eyes as independent streams — drift is acceptable (the user
+    accepted "no sync" for RTP stereo); paired-frame atomicity at the
+    QuadLayer mailbox is what stops torn pairs from reaching the GPU.
+    """
     placements_cfg = cfg.get("display", {}).get("placements", {})
     entries: List[SourceEntry] = []
     for cam in _enabled_cameras(cfg):
@@ -104,16 +148,52 @@ def _build_rtp_entries(cfg: dict, is_xr: bool) -> List[SourceEntry]:
                 f"camera_viz: camera {cam.get('name')!r} missing rtp.port; "
                 "required when source: rtp"
             )
-        source = RtpH264Source(
-            name=cam["name"],
-            width=int(cam["width"]),
-            height=int(cam["height"]),
-            port=int(rtp["port"]),
-            rtp_buffer_size=int(rtp.get("rtp_buffer_size", 212992)),
-            gpu_id=int(rtp.get("gpu_id", 0)),
-        )
         placement = _placement_with_aspect(placements_cfg.get(cam["name"]), cam, is_xr)
-        entries.append((source, placement))
+        stereo, baseline_mm = _stereo_for(cam, placements_cfg)
+
+        if stereo:
+            if "port_right" not in rtp:
+                raise ValueError(
+                    f"camera_viz: stereo camera {cam.get('name')!r} missing "
+                    "rtp.port_right (required when stereo + source: rtp)"
+                )
+            left = RtpH264Source(
+                name=f"{cam['name']}.left",
+                width=int(cam["width"]),
+                height=int(cam["height"]),
+                port=int(rtp["port"]),
+                rtp_buffer_size=int(rtp.get("rtp_buffer_size", 212992)),
+                gpu_id=int(rtp.get("gpu_id", 0)),
+            )
+            right = RtpH264Source(
+                name=f"{cam['name']}.right",
+                width=int(cam["width"]),
+                height=int(cam["height"]),
+                port=int(rtp["port_right"]),
+                rtp_buffer_size=int(rtp.get("rtp_buffer_size", 212992)),
+                gpu_id=int(rtp.get("gpu_id", 0)),
+            )
+            source: FrameSource = PairedFrameSource(
+                name=cam["name"], left=left, right=right
+            )
+        else:
+            source = RtpH264Source(
+                name=cam["name"],
+                width=int(cam["width"]),
+                height=int(cam["height"]),
+                port=int(rtp["port"]),
+                rtp_buffer_size=int(rtp.get("rtp_buffer_size", 212992)),
+                gpu_id=int(rtp.get("gpu_id", 0)),
+            )
+
+        entries.append(
+            SourceEntry(
+                source=source,
+                placement=placement,
+                stereo=stereo,
+                stereo_baseline_mm=baseline_mm,
+            )
+        )
     return entries
 
 
@@ -168,14 +248,19 @@ def main(argv: Optional[list[str]] = None) -> int:
 
     # Build sources, layers, and placement strategies in parallel arrays.
     sources, layers, strategies = [], [], []
-    for source, placement in entries:
-        sources.append(source)
+    for entry in entries:
+        sources.append(entry.source)
         layer_cfg = viz.QuadLayerConfig()
-        layer_cfg.name = source.spec.name
-        layer_cfg.resolution = viz.Resolution(source.spec.width, source.spec.height)
+        layer_cfg.name = entry.source.spec.name
+        layer_cfg.resolution = viz.Resolution(
+            entry.source.spec.width, entry.source.spec.height
+        )
         layer_cfg.format = viz.PixelFormat.kRGBA8
+        if entry.stereo:
+            layer_cfg.stereo = True
+            layer_cfg.stereo_baseline_mm = entry.stereo_baseline_mm
         layers.append(session.add_quad_layer(layer_cfg))
-        strategies.append(placement)
+        strategies.append(entry.placement)
 
     print(
         f"camera_viz: source={source_mode}, mode={cfg.get('display', {}).get('mode')}, "
diff --git a/examples/camera_viz/configs/synthetic_stereo.yaml b/examples/camera_viz/configs/synthetic_stereo.yaml
new file mode 100644
index 000000000..cf2ee2aa0
--- /dev/null
+++ b/examples/camera_viz/configs/synthetic_stereo.yaml
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Synthetic stereo source — exercises the stereo QuadLayer path without
+# stereo camera hardware. Same animated pattern as synthetic.yaml, but
+# with a horizontal pixel disparity between the eyes so the parallax is
+# visible in XR. Window mode renders the LEFT eye (per the documented
+# single-view fallback).
+
+source: local
+streaming:
+  host: 127.0.0.1
+encoder: auto
+
+cameras:
+  - name: synth
+    enabled: true
+    type: synthetic
+    stereo: true                  # → SyntheticStereoSource + stereo QuadLayer
+    width: 1280
+    height: 720
+    fps: 60
+    hue_speed_hz: 0.25
+    disparity_px: 20              # horizontal pixel offset between eyes
+    rtp:
+      port: 5000
+      bitrate_mbps: 15
+
+display:
+  mode: xr                        # window | xr — stereo really only shines in xr
+  window:
+    width: 1280
+    height: 720
+  xr:
+    near_z: 0.05
+    far_z: 100.0
+  clear_color: [0.05, 0.05, 0.08, 1.0]
+  placements:
+    synth:
+      lock_mode: lazy
+      distance: 1.5
+      stereo_baseline_mm: 0.0     # 0 → both eyes see the same world quad
+                                  # (parallax purely from the captured frames).
+                                  # Try 65.0 (typical IPD) to virtually push the
+                                  # screen further; -30.0 to bring it closer.
diff --git a/examples/camera_viz/pipeline/interface.py b/examples/camera_viz/pipeline/interface.py
index 6d057831e..39edb8bd3 100644
--- a/examples/camera_viz/pipeline/interface.py
+++ b/examples/camera_viz/pipeline/interface.py
@@ -35,12 +35,18 @@ class Frame:
     ``image`` is anything that exposes ``__cuda_array_interface__`` —
     CuPy / PyTorch / Numba arrays all work. ``stream`` is the producer's
     CUDA stream so the consumer can synchronize when it's not 0/default.
+
+    Stereo: when the source produces a second eye, set ``image_right``
+    to the right-eye buffer. ``image`` is the left eye. Both eyes MUST
+    come from the same capture instant — the renderer submits them as
+    an atomic pair. Mono sources leave ``image_right`` as None.
     """
 
     image: Any
     timestamp_ns: int
     source_id: str
     stream: int = 0
+    image_right: Optional[Any] = None
 
 
 class FrameSource(ABC):
diff --git a/examples/camera_viz/pipeline/runner.py b/examples/camera_viz/pipeline/runner.py
index cd9d1ec67..99768b47c 100644
--- a/examples/camera_viz/pipeline/runner.py
+++ b/examples/camera_viz/pipeline/runner.py
@@ -5,7 +5,7 @@
 VizRunner owns two threads:
 
   * **submit thread** — polls each source's ``latest()`` at ~1 kHz,
-    calls ``layer.submit_cuda_array()`` on new frames, and notifies a
+    calls ``layer.submit()`` on new frames, and notifies a
     condition variable.
   * **render thread** — waits on the condition. Wakes within ~µs of a
     new publish and calls ``session.render()``. A safety-net timeout
@@ -200,7 +200,13 @@ def _submit_loop_inner(self) -> None:
                 if not device_pinned:
                     self._pin_to_device(frame)
                     device_pinned = True
-                layer.submit_cuda_array(frame.image, stream=frame.stream)
+                # Stereo dispatch: when the source carries a second eye,
+                # hand both to QuadLayer in one atomic submit. The layer
+                # validates that its stereo-ness matches.
+                if frame.image_right is not None:
+                    layer.submit(frame.image, frame.image_right, stream=frame.stream)
+                else:
+                    layer.submit(frame.image, stream=frame.stream)
                 published_any = True
             if published_any:
                 with self._data_cond:
diff --git a/examples/camera_viz/sources/__init__.py b/examples/camera_viz/sources/__init__.py
index fe1484bb3..8243ff704 100644
--- a/examples/camera_viz/sources/__init__.py
+++ b/examples/camera_viz/sources/__init__.py
@@ -12,16 +12,19 @@
 
 from pipeline import FrameSource
 
+from ._helpers import PairedFrameSource
 from .oakd import OakdSource
 from .rtp_h264 import RtpH264Source
-from .synthetic import SyntheticSource
+from .synthetic import SyntheticSource, SyntheticStereoSource
 from .v4l2 import V4l2Source
 from .zed import ZedSource
 
 __all__ = [
     "OakdSource",
+    "PairedFrameSource",
     "RtpH264Source",
     "SyntheticSource",
+    "SyntheticStereoSource",
     "V4l2Source",
     "ZedSource",
     "build_local_camera",
@@ -31,15 +34,33 @@
 def build_local_camera(spec: dict) -> List[FrameSource]:
     """Build the local FrameSource(s) for one ``cameras:`` entry.
 
-    Returns a list because multi-stream cameras (OAK-D stereo, ZED stereo)
-    fan out to one source per stream. Single-stream cameras return [source].
+    Mono cameras return [source]. Stereo cameras (``stereo: true``)
+    return [PairedFrameSource] — a single FrameSource that emits a
+    ``Frame`` with both ``image`` (left eye) and ``image_right`` populated.
+    The camera_viz pipeline routes that to ``QuadLayer.submit(left, right)``.
+    For v4l2 the ``stereo`` toggle is rejected (USB cameras are mono);
+    OAK-D / ZED / synthetic each have their own native paths.
+
     Shared by camera_viz.py and camera_streamer.py — keep the schema stable.
     """
     kind = spec["type"]
+    stereo = bool(spec.get("stereo", False))
+    name = spec["name"]
     if kind == "synthetic":
+        if stereo:
+            return [
+                SyntheticStereoSource(
+                    name=name,
+                    width=int(spec["width"]),
+                    height=int(spec["height"]),
+                    fps=float(spec.get("fps", 60.0)),
+                    hue_speed_hz=float(spec.get("hue_speed_hz", 0.25)),
+                    disparity_px=int(spec.get("disparity_px", 20)),
+                )
+            ]
         return [
             SyntheticSource(
-                name=spec["name"],
+                name=name,
                 width=int(spec["width"]),
                 height=int(spec["height"]),
                 fps=float(spec.get("fps", 60.0)),
@@ -47,9 +68,14 @@ def build_local_camera(spec: dict) -> List[FrameSource]:
             )
         ]
     if kind == "v4l2":
+        if stereo:
+            raise ValueError(
+                f"build_local_camera: v4l2 camera {name!r} cannot be stereo "
+                "(single-stream USB / UVC). Use type: oakd or zed."
+            )
         return [
             V4l2Source(
-                name=spec["name"],
+                name=name,
                 device=spec.get("device", "/dev/video0"),
                 width=int(spec["width"]),
                 height=int(spec["height"]),
@@ -58,10 +84,13 @@ def build_local_camera(spec: dict) -> List[FrameSource]:
             )
         ]
     if kind == "oakd":
-        return list(
+        # ``stereo: true`` is a shorthand for the OAK-D ``mode: stereo``.
+        # If the user passed both, an explicit mode wins.
+        mode = spec.get("mode", "stereo" if stereo else "mono")
+        eyes = list(
             OakdSource.build(
-                base_name=spec["name"],
-                mode=spec.get("mode", "mono"),
+                base_name=name,
+                mode=mode,
                 device_id=spec.get("device_id", ""),
                 width=int(spec["width"]),
                 height=int(spec["height"]),
@@ -72,17 +101,37 @@ def build_local_camera(spec: dict) -> List[FrameSource]:
                 rgb_fps=int(spec.get("rgb_fps", 0)),
             )
         )
+        if stereo or mode in ("stereo", "stereo_rgb"):
+            # OakdSource.build returns 2 per-eye sources in stereo and 3
+            # in stereo_rgb; we pair the first two (left + right). The
+            # extra RGB stream of stereo_rgb is intentionally dropped —
+            # the QuadLayer takes exactly two eyes.
+            if len(eyes) < 2:
+                raise ValueError(
+                    f"build_local_camera: oakd {name!r} stereo mode produced {len(eyes)} "
+                    "source(s); expected at least 2"
+                )
+            return [PairedFrameSource(name=name, left=eyes[0], right=eyes[1])]
+        return eyes
     if kind == "zed":
-        return list(
+        eyes = list(
             ZedSource.build(
-                base_name=spec["name"],
+                base_name=name,
                 resolution=spec.get("resolution", "HD720"),
                 fps=int(spec.get("fps", 30)),
                 serial_number=int(spec.get("serial_number", 0)),
                 bus_type=spec.get("bus_type", "usb"),
-                stereo=bool(spec.get("stereo", False)),
+                stereo=stereo,
             )
         )
+        if stereo:
+            if len(eyes) != 2:
+                raise ValueError(
+                    f"build_local_camera: zed {name!r} stereo produced {len(eyes)} "
+                    "source(s); expected 2"
+                )
+            return [PairedFrameSource(name=name, left=eyes[0], right=eyes[1])]
+        return eyes
     raise ValueError(
         f"build_local_camera: unknown camera type {kind!r} "
         "(known: synthetic, v4l2, oakd, zed)"
diff --git a/examples/camera_viz/sources/_helpers.py b/examples/camera_viz/sources/_helpers.py
index 5d6c316b1..19f749931 100644
--- a/examples/camera_viz/sources/_helpers.py
+++ b/examples/camera_viz/sources/_helpers.py
@@ -15,7 +15,7 @@
      - kicks off an async ``cudaMemcpyAsync`` H2D + color-convert on the
        producer stream,
      - synchronizes the stream so the consumer (renderer thread) can safely
-       read the GPU buffer via ``submit_cuda_array(stream=0)`` — until the
+       read the GPU buffer via ``submit(stream=0)`` — until the
        viz binding grows cross-stream sync (M6 review item #2), this is
        the only correct way to hand the buffer across threads,
      - flips the write/publish index atomically under a short lock.
@@ -317,3 +317,81 @@ def _mark_disconnected(self) -> None:
             pass
         self._connected = False
         self._last_reconnect_attempt_s = time.monotonic()
+
+
+class PairedFrameSource(FrameSource):
+    """Pair two per-eye FrameSources into a single stereo source.
+
+    Owns no thread of its own. latest() polls both children: when
+    each has produced a fresh Frame, emits one combined Frame with the
+    left source's image in Frame.image and the right source's image
+    in Frame.image_right. Skips the publish (returns None) if either
+    eye hasn't produced — the QuadLayer mailbox keeps the previous
+    matched pair until both eyes catch up. Acceptable because both eyes
+    share the camera producer (same SDK grab cycle), so they re-sync
+    within one frame.
+
+    Both children MUST agree on (width, height, pixel_format).
+    """
+
+    def __init__(self, name: str, left: FrameSource, right: FrameSource) -> None:
+        if left.spec.width != right.spec.width or left.spec.height != right.spec.height:
+            raise ValueError(
+                f"PairedFrameSource: left/right resolution mismatch "
+                f"({left.spec.width}x{left.spec.height} vs "
+                f"{right.spec.width}x{right.spec.height})"
+            )
+        if left.spec.pixel_format != right.spec.pixel_format:
+            raise ValueError(
+                f"PairedFrameSource: left/right pixel_format mismatch "
+                f"({left.spec.pixel_format!r} vs {right.spec.pixel_format!r})"
+            )
+        self._spec = SourceSpec(
+            name=name,
+            width=left.spec.width,
+            height=left.spec.height,
+            pixel_format=left.spec.pixel_format,
+        )
+        self._left = left
+        self._right = right
+
+    @property
+    def spec(self) -> SourceSpec:
+        return self._spec
+
+    @property
+    def left(self) -> FrameSource:
+        """Per-eye left source. Used by camera_streamer.py to fan out
+        two independent RTP streams for stereo cameras (paired
+        atomicity comes back at the receiver, not on the wire)."""
+        return self._left
+
+    @property
+    def right(self) -> FrameSource:
+        return self._right
+
+    def start(self) -> None:
+        self._left.start()
+        self._right.start()
+
+    def stop(self) -> None:
+        self._left.stop()
+        self._right.stop()
+
+    def latest(self) -> Optional[Frame]:
+        # Read both. We only publish when BOTH eyes have produced —
+        # otherwise the renderer would see a mismatched pair (or an
+        # update on one eye only, which submit() would treat as the
+        # left of a new pair with the previous right). Returning None
+        # leaves the layer rendering the prior matched pair.
+        fl = self._left.latest()
+        fr = self._right.latest()
+        if fl is None or fr is None:
+            return None
+        return Frame(
+            image=fl.image,
+            image_right=fr.image,
+            timestamp_ns=fl.timestamp_ns,
+            source_id=self._spec.name,
+            stream=fl.stream,
+        )
diff --git a/examples/camera_viz/sources/synthetic.py b/examples/camera_viz/sources/synthetic.py
index e6846a876..777070f44 100644
--- a/examples/camera_viz/sources/synthetic.py
+++ b/examples/camera_viz/sources/synthetic.py
@@ -142,3 +142,134 @@ def _produce_loop(self) -> None:
                     # Coarse pacing — CuPy fill kernels at 1080p are well under
                     # the budget, so a simple sleep keeps us at target fps.
                     time.sleep(self._frame_interval_s)
+
+
+class SyntheticStereoSource(FrameSource):
+    """GPU-resident stereo source emitting a paired RGBA8 test pattern.
+
+    Same animated pattern as ``SyntheticSource``, but with a horizontal
+    pixel shift between the eyes so the disparity is visible — useful
+    for sanity-checking a stereo QuadLayer end-to-end without a real
+    camera. The left/right kernels run on the same stream and are
+    pre-synced before publish, so the renderer always reads a
+    same-instant pair.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        width: int,
+        height: int,
+        fps: float = 60.0,
+        hue_speed_hz: float = 0.25,
+        disparity_px: int = 20,
+    ) -> None:
+        try:
+            import cupy as cp
+        except ImportError as e:
+            raise RuntimeError(
+                "SyntheticStereoSource requires CuPy (cupy-cuda12x). Install via "
+                "`uv pip install cupy-cuda12x` or skip this source."
+            ) from e
+
+        self._cp = cp
+        self._spec = SourceSpec(
+            name=name, width=width, height=height, pixel_format="rgba8"
+        )
+        self._frame_interval_s = 1.0 / fps if fps > 0.0 else 0.0
+        self._hue_speed_hz = hue_speed_hz
+        self._disparity_px = int(disparity_px)
+
+        # Triple-buffer per eye. Indices stay in lock-step so latest()
+        # always returns matching (left[i], right[i]).
+        self._left = [cp.zeros((height, width, 4), dtype=cp.uint8) for _ in range(3)]
+        self._right = [cp.zeros((height, width, 4), dtype=cp.uint8) for _ in range(3)]
+        self._write_idx = 0
+        self._publish_idx: int = -1
+        self._consumed_idx: int = -2
+        self._lock = threading.Lock()
+
+        self._stop = threading.Event()
+        self._thread: Optional[threading.Thread] = None
+        self._t0_ns = 0
+
+    @property
+    def spec(self) -> SourceSpec:
+        return self._spec
+
+    def start(self) -> None:
+        if self._thread is not None:
+            return
+        self._stop.clear()
+        self._t0_ns = time.monotonic_ns()
+        self._thread = threading.Thread(
+            target=self._produce_loop,
+            name=f"synth_stereo_{self._spec.name}",
+            daemon=False,
+        )
+        self._thread.start()
+
+    def stop(self) -> None:
+        self._stop.set()
+        if self._thread is not None:
+            self._thread.join()
+            self._thread = None
+
+    def latest(self) -> Optional[Frame]:
+        with self._lock:
+            if self._publish_idx < 0 or self._publish_idx == self._consumed_idx:
+                return None
+            idx = self._publish_idx
+            self._consumed_idx = idx
+        return Frame(
+            image=self._left[idx],
+            image_right=self._right[idx],
+            timestamp_ns=time.monotonic_ns(),
+            source_id=self._spec.name,
+            stream=0,
+        )
+
+    def _produce_loop(self) -> None:
+        cp = self._cp
+        h, w = self._spec.height, self._spec.width
+        with cp.cuda.Device(int(self._left[0].device.id)):
+            y_grid = cp.arange(h, dtype=cp.float32).reshape(h, 1)
+            x_grid_l = cp.arange(w, dtype=cp.float32).reshape(1, w)
+            # Right-eye coord shifted by disparity_px so the same content
+            # appears at a slightly different x position. Visible parallax
+            # confirms the renderer routed both eyes correctly.
+            x_grid_r = (cp.arange(w, dtype=cp.float32) - self._disparity_px).reshape(
+                1, w
+            )
+            diag_l = (x_grid_l + y_grid) / float(w + h)
+            diag_r = (x_grid_r + y_grid) / float(w + h)
+
+            while not self._stop.is_set():
+                t = (time.monotonic_ns() - self._t0_ns) * 1e-9
+                phase = (t * self._hue_speed_hz) % 1.0
+
+                def fill(buf, diag):
+                    r = (cp.sin((diag + phase) * 6.2831853) * 127.0 + 128.0).astype(
+                        cp.uint8
+                    )
+                    g = (
+                        cp.sin((diag + phase + 0.3333) * 6.2831853) * 127.0 + 128.0
+                    ).astype(cp.uint8)
+                    b = (
+                        cp.sin((diag + phase + 0.6667) * 6.2831853) * 127.0 + 128.0
+                    ).astype(cp.uint8)
+                    buf[..., 0] = r
+                    buf[..., 1] = g
+                    buf[..., 2] = b
+                    buf[..., 3] = 255
+
+                fill(self._left[self._write_idx], diag_l)
+                fill(self._right[self._write_idx], diag_r)
+                cp.cuda.Stream.null.synchronize()
+
+                with self._lock:
+                    self._publish_idx = self._write_idx
+                self._write_idx = (self._write_idx + 1) % len(self._left)
+
+                if self._frame_interval_s > 0.0:
+                    time.sleep(self._frame_interval_s)
diff --git a/src/viz/layers/cpp/inc/viz/layers/quad_layer.hpp b/src/viz/layers/cpp/inc/viz/layers/quad_layer.hpp
index 96c7ee2a4..4faacdea8 100644
--- a/src/viz/layers/cpp/inc/viz/layers/quad_layer.hpp
+++ b/src/viz/layers/cpp/inc/viz/layers/quad_layer.hpp
@@ -42,6 +42,14 @@ class VkContext;
 // bump kMaxFramesInFlight and kSlotCount together.
 //
 // Memory: kSlotCount × width × height × bpp (~40 MB at 1080p RGBA8).
+//
+// Stereo: when Config::stereo is true, each slot owns a PAIR of
+// DeviceImages (left + right). The two-arg submit() does both
+// memcpy2Ds + the cuda_done_writing signal on a single CUDA stream,
+// so stream ordering guarantees the renderer never sees a half-
+// updated pair. Memory doubles. In kXr, record() binds the left
+// descriptor for view 0 and the right for view 1; window/offscreen
+// (single view) draws the left buffer only.
 class QuadLayer : public LayerBase
 {
 public:
@@ -84,6 +92,25 @@ class QuadLayer : public LayerBase
         // Set to false to save the ~33% extra image memory on layers
         // that are always sampled at native resolution.
         bool generate_mipmaps = true;
+
+        // Stereo mode. When true, the layer owns a paired left+right
+        // mailbox; submit MUST be called with both buffers. In kXr,
+        // view 0 (left eye) samples the left buffer and view 1 (right
+        // eye) the right. In window/offscreen the left buffer is drawn
+        // and the right is allocated but unused. Memory doubles.
+        bool stereo = false;
+
+        // Horizontal disparity between the left-plane (in the left eye)
+        // and the right-plane (in the right eye), in millimeters along
+        // the placement's local +x axis. Each eye's quad center is
+        // shifted by ±stereo_baseline_mm/2 (left eye: −, right eye: +).
+        // 0 means both eyes see the same world-space quad; all stereo
+        // cues come from the captured images. Positive values let the
+        // planes splay outward (virtual screen further back); negative
+        // makes them cross (closer to viewer). Ignored when stereo is
+        // false or outside kXr. mm-scale chosen because typical real-
+        // world IPDs and camera baselines are 50–80 mm.
+        float stereo_baseline_mm = 0.0f;
     };
 
     // Hard cap on the mip chain when generate_mipmaps is enabled.
@@ -110,7 +137,17 @@ class QuadLayer : public LayerBase
     // producer wrapping its mailbox could overwrite src.data while our
     // async memcpy was still reading. Cost: ~0.5 ms per 1080p call on
     // the calling thread; the render path is unaffected.
+    //
+    // Mono layer (Config::stereo == false): use the one-arg overload.
+    // The two-arg overload throws std::logic_error.
+    //
+    // Stereo layer (Config::stereo == true): use the two-arg overload.
+    // Both buffers are copied + the single cuda_done_writing signal is
+    // emitted on the SAME ``stream``, so stream ordering guarantees
+    // the renderer never reads a half-matched pair. The one-arg
+    // overload throws std::logic_error.
     void submit(const VizBuffer& src, cudaStream_t stream = 0);
+    void submit(const VizBuffer& left, const VizBuffer& right, cudaStream_t stream = 0);
 
     // Pre-pass slot: promote latest_ -> in_use_[in_flight_slot] AND
     // (when generate_mipmaps is on) emit the mip-chain blits on the
@@ -141,8 +178,10 @@ class QuadLayer : public LayerBase
     void set_placement(std::optional<Config::Placement> placement) noexcept;
     std::optional<Config::Placement> placement() const noexcept;
 
-    // Diagnostic accessor; nullptr for slots beyond kSlotCount.
+    // Diagnostic accessor; nullptr for slots beyond kSlotCount, and
+    // device_image_right is null on mono layers.
     const DeviceImage* device_image(uint32_t slot) const noexcept;
+    const DeviceImage* device_image_right(uint32_t slot) const noexcept;
 
 private:
     void init();
@@ -178,8 +217,10 @@ class QuadLayer : public LayerBase
     // Number of mip levels per DeviceImage slot. 1 when mips disabled.
     uint32_t mip_levels_ = 1;
 
-    // One DeviceImage per mailbox slot.
+    // One DeviceImage per mailbox slot. ``slots_`` is the left/mono
+    // image; ``slots_right_`` only allocated when Config::stereo.
     std::array<std::unique_ptr<DeviceImage>, kSlotCount> slots_;
+    std::array<std::unique_ptr<DeviceImage>, kSlotCount> slots_right_;
 
     VkSampler sampler_ = VK_NULL_HANDLE;
     VkDescriptorSetLayout descriptor_set_layout_ = VK_NULL_HANDLE;
@@ -188,7 +229,9 @@ class QuadLayer : public LayerBase
 
     VkDescriptorPool descriptor_pool_ = VK_NULL_HANDLE;
     // One descriptor set per slot — record() binds the one for in_use_.
+    // ``descriptor_sets_right_`` is only populated when Config::stereo.
     std::array<VkDescriptorSet, kSlotCount> descriptor_sets_{};
+    std::array<VkDescriptorSet, kSlotCount> descriptor_sets_right_{};
 
     // Mailbox: latest_ = most recent publish. in_use_[i] = slot the
     // i-th in-flight frame is sampling. Atomic so producer and
diff --git a/src/viz/layers/cpp/quad_layer.cpp b/src/viz/layers/cpp/quad_layer.cpp
index 4678fef64..f226d8cf1 100644
--- a/src/viz/layers/cpp/quad_layer.cpp
+++ b/src/viz/layers/cpp/quad_layer.cpp
@@ -10,6 +10,7 @@
 #include <viz/shaders/textured_quad.frag.spv.h>
 #include <viz/shaders/textured_quad.vert.spv.h>
 
+#include <cmath>
 #include <cstdint>
 #include <cstring>
 #include <cuda_runtime.h>
@@ -113,6 +114,10 @@ QuadLayer::QuadLayer(const VkContext& ctx, VkRenderPass render_pass, Config conf
             throw std::invalid_argument("QuadLayer: Placement::size_meters must be > 0 in both components");
         }
     }
+    if (!std::isfinite(config_.stereo_baseline_mm))
+    {
+        throw std::invalid_argument("QuadLayer: stereo_baseline_mm must be finite");
+    }
 
     // Resolve mip count: capped chain when enabled, single level
     // otherwise. The cap (kMaxMipLevels) keeps the per-frame blit
@@ -158,6 +163,13 @@ void QuadLayer::init()
         {
             slot = DeviceImage::create(*ctx_, config_.resolution, config_.format, mip_levels_);
         }
+        if (config_.stereo)
+        {
+            for (auto& slot : slots_right_)
+            {
+                slot = DeviceImage::create(*ctx_, config_.resolution, config_.format, mip_levels_);
+            }
+        }
         create_sampler();
         create_descriptor_set_layout();
         create_pipeline_layout();
@@ -186,14 +198,20 @@ void QuadLayer::destroy()
         {
             slot.reset();
         }
+        for (auto& slot : slots_right_)
+        {
+            slot.reset();
+        }
         return;
     }
     if (descriptor_pool_ != VK_NULL_HANDLE)
     {
-        // descriptor_sets_ are freed implicitly with the pool.
+        // descriptor_sets_ + descriptor_sets_right_ are freed implicitly
+        // with the pool.
         vkDestroyDescriptorPool(device, descriptor_pool_, nullptr);
         descriptor_pool_ = VK_NULL_HANDLE;
         descriptor_sets_.fill(VK_NULL_HANDLE);
+        descriptor_sets_right_.fill(VK_NULL_HANDLE);
     }
     if (pipeline_ != VK_NULL_HANDLE)
     {
@@ -219,6 +237,10 @@ void QuadLayer::destroy()
     {
         slot.reset();
     }
+    for (auto& slot : slots_right_)
+    {
+        slot.reset();
+    }
     latest_.store(kSlotNone, std::memory_order_release);
     for (auto& e : in_use_)
     {
@@ -255,6 +277,15 @@ const DeviceImage* QuadLayer::device_image(uint32_t slot) const noexcept
     return slots_[slot].get();
 }
 
+const DeviceImage* QuadLayer::device_image_right(uint32_t slot) const noexcept
+{
+    if (slot >= kSlotCount)
+    {
+        return nullptr;
+    }
+    return slots_right_[slot].get();
+}
+
 uint8_t QuadLayer::pick_free_slot(uint8_t latest,
                                   const std::array<std::atomic<uint8_t>, kMaxFramesInFlight>& in_use) const noexcept
 {
@@ -285,25 +316,56 @@ uint8_t QuadLayer::pick_free_slot(uint8_t latest,
     return kSlotNone;
 }
 
-void QuadLayer::submit(const VizBuffer& src, cudaStream_t stream)
+namespace
 {
-    require_alive(slots_[0], "submit");
-    if (src.space != MemorySpace::kDevice)
+// Shared per-buffer validation for the submit overloads. ``label`` is
+// the caller's tag (e.g. "src", "left", "right") so the error message
+// names which buffer failed in the stereo case.
+void validate_submit_buffer(const VizBuffer& buf, const QuadLayer::Config& cfg, const char* label)
+{
+    if (buf.space != MemorySpace::kDevice)
+    {
+        throw std::invalid_argument(std::string("QuadLayer::submit: ") + label + " must be MemorySpace::kDevice");
+    }
+    if (buf.width != cfg.resolution.width || buf.height != cfg.resolution.height)
+    {
+        throw std::invalid_argument(std::string("QuadLayer::submit: ") + label +
+                                    " dimensions do not match layer resolution");
+    }
+    if (buf.format != cfg.format)
     {
-        throw std::invalid_argument("QuadLayer::submit: src must be MemorySpace::kDevice");
+        throw std::invalid_argument(std::string("QuadLayer::submit: ") + label + " format does not match layer format");
     }
-    if (src.width != config_.resolution.width || src.height != config_.resolution.height)
+    if (buf.data == nullptr)
     {
-        throw std::invalid_argument("QuadLayer::submit: src dimensions do not match layer resolution");
+        throw std::invalid_argument(std::string("QuadLayer::submit: ") + label + ".data is null");
     }
-    if (src.format != config_.format)
+}
+
+// Queue an async D2D copy of ``buf`` → ``image.cuda_array()`` on
+// ``stream``. Shared between the mono and stereo submit paths.
+void enqueue_copy(const VizBuffer& buf, DeviceImage& image, cudaStream_t stream)
+{
+    const size_t row_bytes = static_cast<size_t>(buf.width) * bytes_per_pixel(buf.format);
+    const size_t src_pitch = (buf.pitch == 0) ? row_bytes : buf.pitch;
+    const cudaError_t err = cudaMemcpy2DToArrayAsync(
+        image.cuda_array(), 0, 0, buf.data, src_pitch, row_bytes, buf.height, cudaMemcpyDeviceToDevice, stream);
+    if (err != cudaSuccess)
     {
-        throw std::invalid_argument("QuadLayer::submit: src format does not match layer format");
+        throw std::runtime_error(std::string("QuadLayer::submit: cudaMemcpy2DToArrayAsync failed: ") +
+                                 cudaGetErrorString(err));
     }
-    if (src.data == nullptr)
+}
+} // namespace
+
+void QuadLayer::submit(const VizBuffer& src, cudaStream_t stream)
+{
+    require_alive(slots_[0], "submit");
+    if (config_.stereo)
     {
-        throw std::invalid_argument("QuadLayer::submit: src.data is null");
+        throw std::logic_error("QuadLayer::submit: this layer is stereo — use the two-arg submit(left, right) overload");
     }
+    validate_submit_buffer(src, config_, "src");
 
     const uint8_t latest = latest_.load(std::memory_order_acquire);
     const uint8_t slot = pick_free_slot(latest, in_use_);
@@ -316,11 +378,7 @@ void QuadLayer::submit(const VizBuffer& src, cudaStream_t stream)
     DeviceImage& image = *slots_[slot];
 
     check_cuda(cudaSetDevice(ctx_->cuda_device_id()), "cudaSetDevice");
-    const size_t row_bytes = static_cast<size_t>(src.width) * bytes_per_pixel(src.format);
-    const size_t src_pitch = (src.pitch == 0) ? row_bytes : src.pitch;
-    check_cuda(cudaMemcpy2DToArrayAsync(image.cuda_array(), 0, 0, src.data, src_pitch, row_bytes, src.height,
-                                        cudaMemcpyDeviceToDevice, stream),
-               "cudaMemcpy2DToArrayAsync");
+    enqueue_copy(src, image, stream);
     image.cuda_signal_write_done(stream);
 
     // Wait for the D2D copy to complete before returning. Sources publish
@@ -335,6 +393,40 @@ void QuadLayer::submit(const VizBuffer& src, cudaStream_t stream)
     latest_.store(slot, std::memory_order_release);
 }
 
+void QuadLayer::submit(const VizBuffer& left, const VizBuffer& right, cudaStream_t stream)
+{
+    require_alive(slots_[0], "submit");
+    if (!config_.stereo)
+    {
+        throw std::logic_error("QuadLayer::submit: this layer is mono — call submit(src) with a single buffer");
+    }
+    validate_submit_buffer(left, config_, "left");
+    validate_submit_buffer(right, config_, "right");
+
+    const uint8_t latest = latest_.load(std::memory_order_acquire);
+    const uint8_t slot = pick_free_slot(latest, in_use_);
+    if (slot == kSlotNone)
+    {
+        return;
+    }
+    DeviceImage& image_l = *slots_[slot];
+    DeviceImage& image_r = *slots_right_[slot];
+
+    check_cuda(cudaSetDevice(ctx_->cuda_device_id()), "cudaSetDevice");
+    // Both copies on the same stream + a single signal on the left's
+    // semaphore. Stream ordering guarantees the right copy completes
+    // before the signal fires, so the renderer waiting on the left's
+    // semaphore implies the right is ready too. No second semaphore
+    // needed — by construction the renderer cannot see a half-pair.
+    enqueue_copy(left, image_l, stream);
+    enqueue_copy(right, image_r, stream);
+    image_l.cuda_signal_write_done(stream);
+
+    check_cuda(cudaStreamSynchronize(stream), "cudaStreamSynchronize(submit-stereo)");
+
+    latest_.store(slot, std::memory_order_release);
+}
+
 void QuadLayer::record_mip_generation(VkCommandBuffer cmd, DeviceImage& image)
 {
     // Mip-chain regeneration via vkCmdBlitImage. The image lives in
@@ -469,10 +561,17 @@ void QuadLayer::record_pre_render_pass(VkCommandBuffer cmd, uint32_t in_flight_s
 
     // Mip generation (if configured). Reads level 0 written by CUDA,
     // writes levels 1..N-1, ends with the whole image back in
-    // SHADER_READ_ONLY for the sampler in record().
+    // SHADER_READ_ONLY for the sampler in record(). For stereo we
+    // regenerate both eyes' chains; the right image's level 0 was
+    // written by the same producer stream that signaled the left's
+    // semaphore, so the queue's TRANSFER-stage wait already covers it.
     if (mip_levels_ > 1)
     {
         record_mip_generation(cmd, *slots_[cur]);
+        if (config_.stereo)
+        {
+            record_mip_generation(cmd, *slots_right_[cur]);
+        }
     }
 }
 
@@ -494,8 +593,6 @@ void QuadLayer::record(VkCommandBuffer cmd,
     }
 
     vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_);
-    vkCmdBindDescriptorSets(
-        cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout_, 0, 1, &descriptor_sets_[cur], 0, nullptr);
 
     // Snapshot under lock so set_placement() can run concurrently.
     std::optional<Config::Placement> placement;
@@ -514,15 +611,45 @@ void QuadLayer::record(VkCommandBuffer cmd,
     // xr: 4-vertex triangle strip; else: 3-vertex NDC-cover triangle.
     const uint32_t vertex_count = xr_mode ? 4u : 3u;
 
+    // Stereo baseline offset along the placement's local +x axis. ±half
+    // the configured baseline, signed per eye: left eye gets the
+    // negative offset, right eye the positive. Direction in world space
+    // is the placement orientation rotating local +x. We evaluate it
+    // once outside the per-view loop since orientation is per-frame
+    // constant. Skipped when the layer is mono (baseline doesn't apply)
+    // OR outside kXr (both eyes converge to a single view, no signed
+    // disambiguation possible). Zero baseline elides to the mono MVP.
+    const bool apply_baseline = xr_mode && config_.stereo && config_.stereo_baseline_mm != 0.0f;
+    glm::vec3 baseline_axis_ws{ 0.0f };
+    if (apply_baseline)
+    {
+        baseline_axis_ws = glm::mat3_cast(placement->pose.orientation) * glm::vec3(1.0f, 0.0f, 0.0f);
+    }
+
     // Compositor pre-binds the layer's scissor; we set per-view viewport.
-    for (const auto& view : views)
+    for (size_t view_idx = 0; view_idx < views.size(); ++view_idx)
     {
+        const auto& view = views[view_idx];
         bind_view_viewport(cmd, view);
 
+        // Stereo: view 0 → left descriptor, view 1 → right descriptor.
+        // Mono OR single-view backends (window/offscreen): always left.
+        const bool sample_right = config_.stereo && view_idx == 1;
+        VkDescriptorSet ds = sample_right ? descriptor_sets_right_[cur] : descriptor_sets_[cur];
+        vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout_, 0, 1, &ds, 0, nullptr);
+
         QuadShaderData data{};
         if (xr_mode)
         {
-            const glm::mat4 mvp = placement_mvp(*placement, view);
+            Config::Placement eye_placement = *placement;
+            if (apply_baseline)
+            {
+                const float sign = (view_idx == 0) ? -1.0f : +1.0f;
+                // 0.5 to halve the disparity per eye, 0.001 to convert mm → m
+                // (placement.pose.position is in world meters).
+                eye_placement.pose.position += sign * (config_.stereo_baseline_mm * 0.0005f) * baseline_axis_ws;
+            }
+            const glm::mat4 mvp = placement_mvp(eye_placement, view);
             std::memcpy(data.mvp, &mvp[0][0], sizeof(data.mvp));
             data.mode = 1;
         }
@@ -755,13 +882,16 @@ void QuadLayer::create_pipeline()
 
 void QuadLayer::create_descriptor_pool()
 {
+    // Stereo needs twice the sets: one per slot per eye.
+    const uint32_t set_count = config_.stereo ? (kSlotCount * 2u) : kSlotCount;
+
     VkDescriptorPoolSize pool_size{};
     pool_size.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
-    pool_size.descriptorCount = kSlotCount;
+    pool_size.descriptorCount = set_count;
 
     VkDescriptorPoolCreateInfo info{};
     info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
-    info.maxSets = kSlotCount;
+    info.maxSets = set_count;
     info.poolSizeCount = 1;
     info.pPoolSizes = &pool_size;
     check_vk(vkCreateDescriptorPool(ctx_->device(), &info, nullptr, &descriptor_pool_), "vkCreateDescriptorPool");
@@ -778,14 +908,24 @@ void QuadLayer::allocate_descriptor_sets()
     info.descriptorSetCount = kSlotCount;
     info.pSetLayouts = layouts.data();
     check_vk(vkAllocateDescriptorSets(ctx_->device(), &info, descriptor_sets_.data()), "vkAllocateDescriptorSets");
+    if (config_.stereo)
+    {
+        check_vk(vkAllocateDescriptorSets(ctx_->device(), &info, descriptor_sets_right_.data()),
+                 "vkAllocateDescriptorSets(right)");
+    }
 }
 
 void QuadLayer::update_descriptor_sets()
 {
-    // One write per slot, each pointing at the slot's own image view.
-    std::array<VkDescriptorImageInfo, kSlotCount> image_infos{};
-    std::array<VkWriteDescriptorSet, kSlotCount> writes{};
-    for (uint32_t i = 0; i < kSlotCount; ++i)
+    // One write per slot per eye, pointing at the eye-specific image
+    // view. Stereo doubles the write count.
+    const uint32_t per_eye = kSlotCount;
+    const uint32_t total = config_.stereo ? (per_eye * 2u) : per_eye;
+
+    std::array<VkDescriptorImageInfo, kSlotCount * 2> image_infos{};
+    std::array<VkWriteDescriptorSet, kSlotCount * 2> writes{};
+
+    for (uint32_t i = 0; i < per_eye; ++i)
     {
         image_infos[i].sampler = sampler_;
         image_infos[i].imageView = slots_[i]->vk_image_view();
@@ -799,7 +939,25 @@ void QuadLayer::update_descriptor_sets()
         writes[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
         writes[i].pImageInfo = &image_infos[i];
     }
-    vkUpdateDescriptorSets(ctx_->device(), kSlotCount, writes.data(), 0, nullptr);
+    if (config_.stereo)
+    {
+        for (uint32_t i = 0; i < per_eye; ++i)
+        {
+            const uint32_t k = per_eye + i;
+            image_infos[k].sampler = sampler_;
+            image_infos[k].imageView = slots_right_[i]->vk_image_view();
+            image_infos[k].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+
+            writes[k].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+            writes[k].dstSet = descriptor_sets_right_[i];
+            writes[k].dstBinding = 0;
+            writes[k].dstArrayElement = 0;
+            writes[k].descriptorCount = 1;
+            writes[k].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+            writes[k].pImageInfo = &image_infos[k];
+        }
+    }
+    vkUpdateDescriptorSets(ctx_->device(), total, writes.data(), 0, nullptr);
 }
 
 } // namespace viz
diff --git a/src/viz/layers_tests/cpp/test_quad_layer.cpp b/src/viz/layers_tests/cpp/test_quad_layer.cpp
index ed540e11a..3f18bd96a 100644
--- a/src/viz/layers_tests/cpp/test_quad_layer.cpp
+++ b/src/viz/layers_tests/cpp/test_quad_layer.cpp
@@ -14,8 +14,11 @@
 #include <viz/core/vk_context.hpp>
 #include <viz/layers/quad_layer.hpp>
 
+#include <cstdint>
 #include <cuda_runtime.h>
+#include <limits>
 #include <stdexcept>
+#include <vector>
 
 using viz::DeviceImage;
 using viz::PixelFormat;
@@ -315,3 +318,308 @@ TEST_CASE("QuadLayer visibility toggle is independent of pipeline state", "[gpu]
     layer.set_visible(true);
     CHECK(layer.is_visible());
 }
+
+// ────────────────────────────────────────────────────────────────────
+// Stereo (Config::stereo == true)
+// ────────────────────────────────────────────────────────────────────
+
+TEST_CASE("QuadLayer ctor rejects non-finite stereo_baseline_mm", "[unit][quad_layer]")
+{
+    VkContext ctx;
+    QuadLayer::Config cfg;
+    cfg.resolution = { 64, 64 };
+    cfg.stereo = true;
+    cfg.stereo_baseline_mm = std::numeric_limits<float>::quiet_NaN();
+    CHECK_THROWS_AS(QuadLayer(ctx, VK_NULL_HANDLE, cfg), std::invalid_argument);
+}
+
+TEST_CASE("QuadLayer stereo allocates paired DeviceImages for every slot", "[gpu][quad_layer][stereo]")
+{
+    if (!is_gpu_available())
+    {
+        SKIP("No Vulkan-capable GPU available");
+    }
+    VkContext ctx;
+    ctx.init({});
+    auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 64, 64 } });
+
+    QuadLayer::Config cfg;
+    cfg.resolution = { 64, 64 };
+    cfg.stereo = true;
+    QuadLayer layer(ctx, target->render_pass(), cfg);
+
+    for (uint32_t i = 0; i < QuadLayer::kSlotCount; ++i)
+    {
+        REQUIRE(layer.device_image(i) != nullptr);
+        REQUIRE(layer.device_image_right(i) != nullptr);
+        // Distinct backing images — submit must NOT alias the two eyes.
+        CHECK(layer.device_image(i)->vk_image() != layer.device_image_right(i)->vk_image());
+        CHECK(layer.device_image(i)->cuda_array() != layer.device_image_right(i)->cuda_array());
+    }
+}
+
+TEST_CASE("QuadLayer mono device_image_right is null", "[gpu][quad_layer][stereo]")
+{
+    if (!is_gpu_available())
+    {
+        SKIP("No Vulkan-capable GPU available");
+    }
+    VkContext ctx;
+    ctx.init({});
+    auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 32, 32 } });
+
+    QuadLayer::Config cfg;
+    cfg.resolution = { 32, 32 };
+    // cfg.stereo defaults to false.
+    QuadLayer layer(ctx, target->render_pass(), cfg);
+
+    for (uint32_t i = 0; i < QuadLayer::kSlotCount; ++i)
+    {
+        CHECK(layer.device_image_right(i) == nullptr);
+    }
+}
+
+TEST_CASE("QuadLayer mono submit(left, right) throws", "[gpu][quad_layer][stereo]")
+{
+    if (!is_gpu_available())
+    {
+        SKIP("No Vulkan-capable GPU available");
+    }
+    VkContext ctx;
+    ctx.init({});
+    auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 32, 32 } });
+
+    QuadLayer::Config cfg;
+    cfg.resolution = { 32, 32 };
+    // cfg.stereo defaults to false.
+    QuadLayer layer(ctx, target->render_pass(), cfg);
+
+    void* dev_l = nullptr;
+    void* dev_r = nullptr;
+    REQUIRE(cudaMalloc(&dev_l, 32 * 32 * 4) == cudaSuccess);
+    REQUIRE(cudaMalloc(&dev_r, 32 * 32 * 4) == cudaSuccess);
+    struct CudaFreeGuard
+    {
+        void* p;
+        ~CudaFreeGuard()
+        {
+            cudaFree(p);
+        }
+    } gl{ dev_l }, gr{ dev_r };
+
+    auto make_buf = [](void* p)
+    {
+        VizBuffer b{};
+        b.data = p;
+        b.width = 32;
+        b.height = 32;
+        b.format = PixelFormat::kRGBA8;
+        b.space = viz::MemorySpace::kDevice;
+        return b;
+    };
+    CHECK_THROWS_AS(layer.submit(make_buf(dev_l), make_buf(dev_r)), std::logic_error);
+}
+
+TEST_CASE("QuadLayer stereo submit(left) throws", "[gpu][quad_layer][stereo]")
+{
+    if (!is_gpu_available())
+    {
+        SKIP("No Vulkan-capable GPU available");
+    }
+    VkContext ctx;
+    ctx.init({});
+    auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 32, 32 } });
+
+    QuadLayer::Config cfg;
+    cfg.resolution = { 32, 32 };
+    cfg.stereo = true;
+    QuadLayer layer(ctx, target->render_pass(), cfg);
+
+    void* dev = nullptr;
+    REQUIRE(cudaMalloc(&dev, 32 * 32 * 4) == cudaSuccess);
+    struct CudaFreeGuard
+    {
+        void* p;
+        ~CudaFreeGuard()
+        {
+            cudaFree(p);
+        }
+    } guard{ dev };
+
+    VizBuffer src{};
+    src.data = dev;
+    src.width = 32;
+    src.height = 32;
+    src.format = PixelFormat::kRGBA8;
+    src.space = viz::MemorySpace::kDevice;
+    CHECK_THROWS_AS(layer.submit(src), std::logic_error);
+}
+
+namespace
+{
+// Fill a CUDA RGBA8 surface with a solid 32-bit color on the calling stream.
+void fill_solid_rgba(void* dev_ptr, uint32_t w, uint32_t h, uint32_t rgba8)
+{
+    std::vector<uint32_t> host(static_cast<size_t>(w) * h, rgba8);
+    REQUIRE(cudaMemcpy(dev_ptr, host.data(), host.size() * 4, cudaMemcpyHostToDevice) == cudaSuccess);
+}
+
+// Read pixel (0,0) of a cudaArray back to host.
+uint32_t read_pixel0_from_array(cudaArray_t arr)
+{
+    uint32_t px = 0;
+    REQUIRE(cudaMemcpy2DFromArray(&px, sizeof(uint32_t), arr, 0, 0, sizeof(uint32_t), 1, cudaMemcpyDeviceToHost) ==
+            cudaSuccess);
+    return px;
+}
+} // namespace
+
+TEST_CASE("QuadLayer stereo submit lands matching L/R pair in the latest slot", "[gpu][quad_layer][stereo]")
+{
+    if (!is_gpu_available())
+    {
+        SKIP("No Vulkan-capable GPU available");
+    }
+    VkContext ctx;
+    ctx.init({});
+    auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 32, 32 } });
+
+    QuadLayer::Config cfg;
+    cfg.resolution = { 32, 32 };
+    cfg.stereo = true;
+    QuadLayer layer(ctx, target->render_pass(), cfg);
+
+    void* dev_l = nullptr;
+    void* dev_r = nullptr;
+    REQUIRE(cudaMalloc(&dev_l, 32 * 32 * 4) == cudaSuccess);
+    REQUIRE(cudaMalloc(&dev_r, 32 * 32 * 4) == cudaSuccess);
+    struct CudaFreeGuard
+    {
+        void* p;
+        ~CudaFreeGuard()
+        {
+            cudaFree(p);
+        }
+    } gl{ dev_l }, gr{ dev_r };
+
+    // Distinct colors so a swapped binding would be visible in the readback.
+    constexpr uint32_t kLeftRgba = 0xFF1122AAu; // ABGR-packed in memory little-endian
+    constexpr uint32_t kRightRgba = 0xFFDDCC11u;
+    fill_solid_rgba(dev_l, 32, 32, kLeftRgba);
+    fill_solid_rgba(dev_r, 32, 32, kRightRgba);
+
+    auto make_buf = [](void* p)
+    {
+        VizBuffer b{};
+        b.data = p;
+        b.width = 32;
+        b.height = 32;
+        b.format = PixelFormat::kRGBA8;
+        b.space = viz::MemorySpace::kDevice;
+        return b;
+    };
+    layer.submit(make_buf(dev_l), make_buf(dev_r));
+
+    // Find which slot got the publish (the one whose semaphore was signaled).
+    // Iterate slots looking for a non-zero done-writing value; submit() syncs
+    // the stream so the data is guaranteed visible.
+    int written_slot = -1;
+    for (uint32_t i = 0; i < QuadLayer::kSlotCount; ++i)
+    {
+        if (layer.device_image(i)->cuda_done_writing_value() != 0)
+        {
+            written_slot = static_cast<int>(i);
+            break;
+        }
+    }
+    REQUIRE(written_slot >= 0);
+
+    // Left slot must hold the left color; right slot the right color. A
+    // mis-paired submit (e.g. both eyes writing to the left array) would
+    // surface as right_px == kLeftRgba.
+    const uint32_t left_px = read_pixel0_from_array(layer.device_image(written_slot)->cuda_array());
+    const uint32_t right_px = read_pixel0_from_array(layer.device_image_right(written_slot)->cuda_array());
+    CHECK(left_px == kLeftRgba);
+    CHECK(right_px == kRightRgba);
+}
+
+TEST_CASE("QuadLayer stereo rapid submits keep every L/R pair atomic", "[gpu][quad_layer][stereo]")
+{
+    // The mailbox guarantee for stereo: for any slot the producer has
+    // written, the left and right images come from the SAME submit().
+    // We submit a sequence of (L_i, R_i) pairs where L_i and R_i are
+    // derived from the same index i — so any slot that holds L_i must
+    // also hold R_i. A cross-eye write or torn pair would surface as
+    // mismatching i.
+    if (!is_gpu_available())
+    {
+        SKIP("No Vulkan-capable GPU available");
+    }
+    VkContext ctx;
+    ctx.init({});
+    auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 32, 32 } });
+
+    QuadLayer::Config cfg;
+    cfg.resolution = { 32, 32 };
+    cfg.stereo = true;
+    QuadLayer layer(ctx, target->render_pass(), cfg);
+
+    void* dev_l = nullptr;
+    void* dev_r = nullptr;
+    REQUIRE(cudaMalloc(&dev_l, 32 * 32 * 4) == cudaSuccess);
+    REQUIRE(cudaMalloc(&dev_r, 32 * 32 * 4) == cudaSuccess);
+    struct CudaFreeGuard
+    {
+        void* p;
+        ~CudaFreeGuard()
+        {
+            cudaFree(p);
+        }
+    } gl{ dev_l }, gr{ dev_r };
+
+    auto make_buf = [](void* p)
+    {
+        VizBuffer b{};
+        b.data = p;
+        b.width = 32;
+        b.height = 32;
+        b.format = PixelFormat::kRGBA8;
+        b.space = viz::MemorySpace::kDevice;
+        return b;
+    };
+
+    // Index i ∈ [1, kPairs] is encoded in the low byte of left (= i)
+    // and the next byte of right (= 100 + i, to make eye-swap visible).
+    // Start from 1 so untouched slots (low byte == 0) are easy to skip.
+    auto left_for = [](uint32_t i) { return 0xFF000000u | (i & 0xFFu); };
+    auto right_for = [](uint32_t i) { return 0xFF000000u | ((100u + i) & 0xFFu) << 8; };
+
+    constexpr uint32_t kPairs = QuadLayer::kSlotCount + 3;
+    for (uint32_t i = 1; i <= kPairs; ++i)
+    {
+        fill_solid_rgba(dev_l, 32, 32, left_for(i));
+        fill_solid_rgba(dev_r, 32, 32, right_for(i));
+        layer.submit(make_buf(dev_l), make_buf(dev_r));
+    }
+
+    // Walk every slot. A slot is "written" iff its left low-byte index
+    // is one of the values we submitted (1..kPairs). For each such slot,
+    // its right MUST match right_for(i) — that's the atomicity proof.
+    uint32_t written = 0;
+    for (uint32_t s = 0; s < QuadLayer::kSlotCount; ++s)
+    {
+        const uint32_t lp = read_pixel0_from_array(layer.device_image(s)->cuda_array());
+        const uint32_t rp = read_pixel0_from_array(layer.device_image_right(s)->cuda_array());
+        const uint32_t i = lp & 0xFFu;
+        if (i < 1 || i > kPairs)
+        {
+            continue;
+        }
+        // Slot index decoded from left → reconstruct expected right.
+        CHECK(lp == left_for(i));
+        CHECK(rp == right_for(i));
+        ++written;
+    }
+    // The producer must have actually used the mailbox.
+    CHECK(written > 0);
+}
diff --git a/src/viz/python/bindings_helpers.hpp b/src/viz/python/bindings_helpers.hpp
index e2d91c530..e45d59670 100644
--- a/src/viz/python/bindings_helpers.hpp
+++ b/src/viz/python/bindings_helpers.hpp
@@ -12,10 +12,12 @@
 
 #include <pybind11/pybind11.h>
 #include <viz/core/viz_buffer.hpp>
+#include <viz/core/viz_types.hpp>
 
 #include <cstddef>
 #include <cstdint>
 #include <stdexcept>
+#include <string>
 
 namespace viz_py
 {
@@ -59,6 +61,135 @@ inline py::tuple shape_for(uint32_t width, uint32_t height, viz::PixelFormat for
     throw std::runtime_error("VizBuffer: unknown PixelFormat");
 }
 
+// Convert a Python object exposing ``__cuda_array_interface__`` (CuPy /
+// PyTorch / Numba / VizBuffer / any other CAI-2-or-3 producer) into a
+// VizBuffer suitable for ``QuadLayer::submit``. Validates the protocol
+// dict eagerly so silent dtype / shape / stride mismatches surface as a
+// readable error instead of corrupted pixels or a cryptic CUDA error.
+//
+// ``label`` is prefixed to every error message — the unified
+// ``QuadLayer.submit`` binding passes "submit(left)" / "submit(right)"
+// so stereo callers know which buffer failed.
+//
+// Layout requirements: row-major, tightly-packed-within-each-row.
+// ``submit`` does one ``cudaMemcpy2D`` per row at the row pitch, so
+// non-unit pixel/channel strides would silently mis-pack the texture.
+inline viz::VizBuffer cuda_array_to_viz_buffer(py::object obj,
+                                               viz::PixelFormat expected_format,
+                                               viz::Resolution expected_resolution,
+                                               const char* label)
+{
+    if (!py::hasattr(obj, "__cuda_array_interface__"))
+    {
+        throw std::runtime_error(std::string(label) + ": object does not expose __cuda_array_interface__");
+    }
+    py::dict iface = obj.attr("__cuda_array_interface__").cast<py::dict>();
+    if (!iface.contains("shape") || !iface.contains("typestr") || !iface.contains("data"))
+    {
+        throw std::runtime_error(std::string(label) +
+                                 ": __cuda_array_interface__ missing required key (shape/typestr/data)");
+    }
+
+    const char* expected_typestr = nullptr;
+    std::size_t expected_rank = 0;
+    std::size_t expected_channels = 0;
+    if (expected_format == viz::PixelFormat::kRGBA8)
+    {
+        expected_typestr = "|u1";
+        expected_rank = 3;
+        expected_channels = 4;
+    }
+    else if (expected_format == viz::PixelFormat::kD32F)
+    {
+        expected_typestr = "<f4";
+        expected_rank = 2;
+        expected_channels = 1;
+    }
+    else
+    {
+        throw std::runtime_error(std::string(label) + ": unsupported layer PixelFormat");
+    }
+
+    const std::string typestr = iface["typestr"].cast<std::string>();
+    if (typestr != expected_typestr)
+    {
+        throw std::runtime_error(std::string(label) + ": typestr '" + typestr +
+                                 "' does not match layer format (expected '" + expected_typestr + "')");
+    }
+
+    py::tuple shape = iface["shape"].cast<py::tuple>();
+    if (shape.size() != expected_rank)
+    {
+        throw std::runtime_error(std::string(label) + ": shape rank " + std::to_string(shape.size()) +
+                                 " does not match layer format (expected " + std::to_string(expected_rank) + ")");
+    }
+    const uint32_t h = shape[0].cast<uint32_t>();
+    const uint32_t w = shape[1].cast<uint32_t>();
+    if (expected_channels > 1)
+    {
+        const std::size_t c = shape[2].cast<std::size_t>();
+        if (c != expected_channels)
+        {
+            throw std::runtime_error(std::string(label) + ": channel count " + std::to_string(c) +
+                                     " does not match layer format (expected " + std::to_string(expected_channels) + ")");
+        }
+    }
+    if (h != expected_resolution.height || w != expected_resolution.width)
+    {
+        throw std::runtime_error(std::string(label) + ": shape (" + std::to_string(h) + ", " + std::to_string(w) +
+                                 ") does not match layer resolution (" + std::to_string(expected_resolution.height) +
+                                 ", " + std::to_string(expected_resolution.width) + ")");
+    }
+
+    const std::size_t bpp = viz::bytes_per_pixel(expected_format);
+    std::size_t pitch_bytes = 0;
+    if (iface.contains("strides") && !iface["strides"].is_none())
+    {
+        py::tuple strides = iface["strides"].cast<py::tuple>();
+        if (strides.size() != expected_rank)
+        {
+            throw std::runtime_error(std::string(label) + ": strides rank " + std::to_string(strides.size()) +
+                                     " does not match shape rank " + std::to_string(expected_rank));
+        }
+        const std::ptrdiff_t row_stride = strides[0].cast<std::ptrdiff_t>();
+        const std::ptrdiff_t pixel_stride = strides[1].cast<std::ptrdiff_t>();
+        if (row_stride < static_cast<std::ptrdiff_t>(w * bpp))
+        {
+            throw std::runtime_error(std::string(label) + ": row stride " + std::to_string(row_stride) +
+                                     " is less than width*bpp " + std::to_string(w * bpp) +
+                                     " — non-positive or reversed strides aren't supported");
+        }
+        if (pixel_stride != static_cast<std::ptrdiff_t>(bpp))
+        {
+            throw std::runtime_error(std::string(label) + ": pixel stride " + std::to_string(pixel_stride) +
+                                     " does not match bytes-per-pixel " + std::to_string(bpp) +
+                                     " — transposed / non-contiguous-per-pixel layout isn't supported");
+        }
+        if (expected_rank == 3)
+        {
+            const std::ptrdiff_t channel_stride = strides[2].cast<std::ptrdiff_t>();
+            if (channel_stride != 1)
+            {
+                throw std::runtime_error(std::string(label) + ": channel stride " + std::to_string(channel_stride) +
+                                         " is not 1 — non-contiguous channels aren't supported");
+            }
+        }
+        pitch_bytes = static_cast<std::size_t>(row_stride);
+    }
+
+    py::tuple data = iface["data"].cast<py::tuple>();
+    const uintptr_t ptr = data[0].cast<uintptr_t>();
+
+    viz::VizBuffer buf;
+    buf.data = reinterpret_cast<void*>(ptr);
+    buf.width = w;
+    buf.height = h;
+    buf.format = expected_format;
+    buf.pitch = pitch_bytes; // 0 = tightly packed; submit() uses effective_pitch().
+    buf.space = viz::MemorySpace::kDevice;
+    return buf;
+}
+
 // Build the dict returned by __cuda_array_interface__ / __array_interface__.
 // Version 3 of the protocol (matches what CuPy / Numba / PyTorch expect).
 // `data` is (ptr_as_int, read_only). `strides` is None for C-contiguous,
diff --git a/src/viz/python/layers_bindings.cpp b/src/viz/python/layers_bindings.cpp
index 38a815c17..dba90c76c 100644
--- a/src/viz/python/layers_bindings.cpp
+++ b/src/viz/python/layers_bindings.cpp
@@ -60,7 +60,16 @@ void bind_layers(py::module_& m)
         .def_readwrite("placement", &viz::QuadLayer::Config::placement)
         .def_readwrite("generate_mipmaps", &viz::QuadLayer::Config::generate_mipmaps,
                        "Allocate + regenerate a capped mip chain each frame; sampler "
-                       "uses trilinear filtering. Off by default.");
+                       "uses trilinear filtering. On by default.")
+        .def_readwrite("stereo", &viz::QuadLayer::Config::stereo,
+                       "Per-eye stereo. When true, submit MUST be called with both buffers; "
+                       "view 0 (left eye) samples the left buffer, view 1 (right eye) the right. "
+                       "Memory doubles. Off by default.")
+        .def_readwrite("stereo_baseline_mm", &viz::QuadLayer::Config::stereo_baseline_mm,
+                       "Horizontal disparity between left and right planes (millimeters), "
+                       "applied along the placement's local +x axis. 0 → both eyes see the "
+                       "same world quad. Ignored unless stereo + kXr. mm-scale chosen because "
+                       "typical IPDs / stereo camera baselines are 50–80 mm.");
 
     // ── QuadLayer (non-owning; session owns the lifetime) ─────────────
 
@@ -69,145 +78,53 @@ void bind_layers(py::module_& m)
 Single CUDA-fed quad layer. Owned by VizSession; the Python handle is
 non-owning (don't keep it around past the session).
 
-Call ``submit`` with a VizBuffer or any object exposing
-``__cuda_array_interface__``. Render order = insertion order.
+Render order = insertion order. Call ``submit(left, right=None, stream=0)``:
+
+  * Mono layer (Config.stereo == False): pass exactly one buffer as
+    ``left``. Passing ``right`` raises ``RuntimeError``.
+  * Stereo layer (Config.stereo == True): pass both. Missing ``right``
+    raises ``RuntimeError``. Both buffers are copied on the same CUDA
+    stream + a single semaphore signals when they're both ready, so
+    the renderer never sees a half-matched pair.
+
+Each buffer is either a ``VizBuffer`` (passed straight to C++) or any
+object exposing ``__cuda_array_interface__`` (CuPy / PyTorch / Numba /
+numpy on a CUDA device pointer); the binding converts it on the fly.
 )doc")
         .def(
-            "submit", [](viz::QuadLayer& self, const viz::VizBuffer& src) { self.submit(src); }, "src"_a,
-            py::call_guard<py::gil_scoped_release>(), "Submit a pre-built VizBuffer (kDevice).")
-        .def(
-            "submit_cuda_array",
-            [](viz::QuadLayer& self, py::object obj, uintptr_t stream)
+            "submit",
+            [](viz::QuadLayer& self, py::object left, py::object right, uintptr_t stream)
             {
-                // Accept anything exposing __cuda_array_interface__. Validate
-                // the dict before constructing a VizBuffer — silent dtype /
-                // shape / stride mismatches would surface inside the cudaMemcpy
-                // as either corrupted pixels or a cryptic CUDA error.
-                if (!py::hasattr(obj, "__cuda_array_interface__"))
-                {
-                    throw std::runtime_error("submit_cuda_array: object does not expose __cuda_array_interface__");
-                }
-                py::dict iface = obj.attr("__cuda_array_interface__").cast<py::dict>();
-                if (!iface.contains("shape") || !iface.contains("typestr") || !iface.contains("data"))
-                {
-                    throw std::runtime_error(
-                        "submit_cuda_array: __cuda_array_interface__ missing required key (shape/typestr/data)");
-                }
-
-                // Per-format expectations (typestr + channels). Must match the
-                // layer's PixelFormat exactly — submit() reinterprets memory.
-                const viz::PixelFormat fmt = self.format();
-                const char* expected_typestr = nullptr;
-                std::size_t expected_rank = 0;
-                std::size_t expected_channels = 0;
-                if (fmt == viz::PixelFormat::kRGBA8)
-                {
-                    expected_typestr = "|u1";
-                    expected_rank = 3;
-                    expected_channels = 4;
-                }
-                else if (fmt == viz::PixelFormat::kD32F)
+                // Resolve each Python arg to a VizBuffer. VizBuffer passes
+                // through; anything else goes via the cuda-array-interface
+                // converter (which validates dtype / shape / strides
+                // before constructing the buffer).
+                auto to_buf = [&self](py::object obj, const char* label) -> viz::VizBuffer
                 {
-                    expected_typestr = "<f4";
-                    expected_rank = 2;
-                    expected_channels = 1;
-                }
-                else
-                {
-                    throw std::runtime_error("submit_cuda_array: unsupported layer PixelFormat");
-                }
-
-                const std::string typestr = iface["typestr"].cast<std::string>();
-                if (typestr != expected_typestr)
-                {
-                    throw std::runtime_error(std::string("submit_cuda_array: typestr '") + typestr +
-                                             "' does not match layer format (expected '" + expected_typestr + "')");
-                }
-
-                py::tuple shape = iface["shape"].cast<py::tuple>();
-                if (shape.size() != expected_rank)
-                {
-                    throw std::runtime_error("submit_cuda_array: shape rank " + std::to_string(shape.size()) +
-                                             " does not match layer format (expected " + std::to_string(expected_rank) +
-                                             ")");
-                }
-                const uint32_t h = shape[0].cast<uint32_t>();
-                const uint32_t w = shape[1].cast<uint32_t>();
-                if (expected_channels > 1)
-                {
-                    const std::size_t c = shape[2].cast<std::size_t>();
-                    if (c != expected_channels)
+                    if (py::isinstance<viz::VizBuffer>(obj))
                     {
-                        throw std::runtime_error("submit_cuda_array: channel count " + std::to_string(c) +
-                                                 " does not match layer format (expected " +
-                                                 std::to_string(expected_channels) + ")");
+                        return obj.cast<viz::VizBuffer>();
                     }
-                }
-                const viz::Resolution res = self.resolution();
-                if (h != res.height || w != res.width)
+                    return cuda_array_to_viz_buffer(obj, self.format(), self.resolution(), label);
+                };
+
+                if (right.is_none())
                 {
-                    throw std::runtime_error("submit_cuda_array: shape (" + std::to_string(h) + ", " +
-                                             std::to_string(w) + ") does not match layer resolution (" +
-                                             std::to_string(res.height) + ", " + std::to_string(res.width) + ")");
+                    viz::VizBuffer left_buf = to_buf(left, "QuadLayer.submit(left)");
+                    py::gil_scoped_release release;
+                    self.submit(left_buf, reinterpret_cast<cudaStream_t>(stream));
                 }
-
-                // Row pitch: explicit when strides present + non-null (slice
-                // views, padded buffers); else tightly packed. We require
-                // row-major, tightly-packed-within-each-row layout because
-                // submit() does a single cudaMemcpy2D per row at row_pitch
-                // stride — non-unit pixel/channel strides would silently
-                // mis-pack the destination texture.
-                const std::size_t bpp = viz::bytes_per_pixel(fmt);
-                std::size_t pitch_bytes = 0;
-                if (iface.contains("strides") && !iface["strides"].is_none())
+                else
                 {
-                    py::tuple strides = iface["strides"].cast<py::tuple>();
-                    if (strides.size() != expected_rank)
-                    {
-                        throw std::runtime_error("submit_cuda_array: strides rank " + std::to_string(strides.size()) +
-                                                 " does not match shape rank " + std::to_string(expected_rank));
-                    }
-                    const std::ptrdiff_t row_stride = strides[0].cast<std::ptrdiff_t>();
-                    const std::ptrdiff_t pixel_stride = strides[1].cast<std::ptrdiff_t>();
-                    if (row_stride < static_cast<std::ptrdiff_t>(w * bpp))
-                    {
-                        throw std::runtime_error("submit_cuda_array: row stride " + std::to_string(row_stride) +
-                                                 " is less than width*bpp " + std::to_string(w * bpp) +
-                                                 " — non-positive or reversed strides aren't supported");
-                    }
-                    if (pixel_stride != static_cast<std::ptrdiff_t>(bpp))
-                    {
-                        throw std::runtime_error("submit_cuda_array: pixel stride " + std::to_string(pixel_stride) +
-                                                 " does not match bytes-per-pixel " + std::to_string(bpp) +
-                                                 " — transposed / non-contiguous-per-pixel layout isn't supported");
-                    }
-                    if (expected_rank == 3)
-                    {
-                        const std::ptrdiff_t channel_stride = strides[2].cast<std::ptrdiff_t>();
-                        if (channel_stride != 1)
-                        {
-                            throw std::runtime_error("submit_cuda_array: channel stride " +
-                                                     std::to_string(channel_stride) +
-                                                     " is not 1 — non-contiguous channels aren't supported");
-                        }
-                    }
-                    pitch_bytes = static_cast<std::size_t>(row_stride);
+                    viz::VizBuffer left_buf = to_buf(left, "QuadLayer.submit(left)");
+                    viz::VizBuffer right_buf = to_buf(right, "QuadLayer.submit(right)");
+                    py::gil_scoped_release release;
+                    self.submit(left_buf, right_buf, reinterpret_cast<cudaStream_t>(stream));
                 }
-
-                py::tuple data = iface["data"].cast<py::tuple>();
-                const uintptr_t ptr = data[0].cast<uintptr_t>();
-
-                viz::VizBuffer buf;
-                buf.data = reinterpret_cast<void*>(ptr);
-                buf.width = w;
-                buf.height = h;
-                buf.format = fmt;
-                buf.pitch = pitch_bytes; // 0 = tightly packed; submit() uses effective_pitch().
-                buf.space = viz::MemorySpace::kDevice;
-                py::gil_scoped_release release;
-                self.submit(buf, reinterpret_cast<cudaStream_t>(stream));
             },
-            "obj"_a, "stream"_a = 0, "Submit any object exposing __cuda_array_interface__ (CuPy / PyTorch / Numba).")
+            "left"_a, "right"_a = py::none(), "stream"_a = 0,
+            "Submit a frame. Each arg is a VizBuffer or any __cuda_array_interface__ "
+            "object. Mono layer: pass only ``left``. Stereo layer: pass both.")
         .def_property_readonly("resolution", &viz::QuadLayer::resolution)
         .def_property_readonly("format", &viz::QuadLayer::format)
         .def_property_readonly("aspect_ratio", &viz::QuadLayer::aspect_ratio)
diff --git a/src/viz/python/viz_init.py b/src/viz/python/viz_init.py
index c8c52665a..c07923b0d 100644
--- a/src/viz/python/viz_init.py
+++ b/src/viz/python/viz_init.py
@@ -22,8 +22,9 @@
     layer_cfg.resolution = viz.Resolution(1024, 1024)
     layer = session.add_quad_layer(layer_cfg)
 
-    # CuPy / PyTorch / Numba arrays (anything with __cuda_array_interface__):
-    layer.submit_cuda_array(cupy_rgba8)
+    # CuPy / PyTorch / Numba arrays (anything with __cuda_array_interface__),
+    # or a pre-built VizBuffer:
+    layer.submit(cupy_rgba8)
 
     info = session.render()
     img = session.readback_to_host()      # HostImage with __array_interface__
diff --git a/src/viz/python_tests/test_offscreen_session.py b/src/viz/python_tests/test_offscreen_session.py
index 893404974..d1f0946bc 100644
--- a/src/viz/python_tests/test_offscreen_session.py
+++ b/src/viz/python_tests/test_offscreen_session.py
@@ -92,7 +92,7 @@ def test_quad_layer_round_trip_via_cuda_array_interface():
     layer = session.add_quad_layer(layer_cfg)
     assert layer.name == "cam"
 
-    # Solid green RGBA8 source. submit_cuda_array consumes
+    # Solid green RGBA8 source. submit consumes
     # __cuda_array_interface__ on the CuPy array.
     #
     # Build host-side first, then H2D once via cp.asarray. Avoiding
@@ -102,7 +102,7 @@ def test_quad_layer_round_trip_via_cuda_array_interface():
     host_src[..., 1] = 200  # G
     host_src[..., 3] = 255  # A
     src = cp.asarray(host_src)
-    layer.submit_cuda_array(src)
+    layer.submit(src)
 
     info = session.render()
     assert info.frame_index == 0
@@ -114,7 +114,7 @@ def test_quad_layer_round_trip_via_cuda_array_interface():
     r, g, b, _a = arr[cy, cx]
     assert g > r and g > b
 
-    # ── submit_cuda_array validation ──────────────────────────────────
+    # ── submit validation ──────────────────────────────────
     # Bad inputs built host-side then transferred via cp.asarray so we
     # don't depend on libnvrtc.so being present (cp.zeros / setitem
     # would JIT-compile a fill kernel and the GPU CI runner ships only
@@ -123,21 +123,131 @@ def test_quad_layer_round_trip_via_cuda_array_interface():
     # Wrong dtype: layer is RGBA8 (uint8); float32 source must reject.
     bad_dtype = cp.asarray(np.zeros((32, 32, 4), dtype=np.float32))
     with pytest.raises(RuntimeError, match="typestr"):
-        layer.submit_cuda_array(bad_dtype)
+        layer.submit(bad_dtype)
 
     # Wrong shape: doesn't match layer resolution.
     bad_shape = cp.asarray(np.zeros((16, 16, 4), dtype=np.uint8))
     with pytest.raises(RuntimeError, match="resolution"):
-        layer.submit_cuda_array(bad_shape)
+        layer.submit(bad_shape)
 
     # Wrong channel count: RGB instead of RGBA.
     bad_channels = cp.asarray(np.zeros((32, 32, 3), dtype=np.uint8))
     with pytest.raises(RuntimeError, match="channel"):
-        layer.submit_cuda_array(bad_channels)
+        layer.submit(bad_channels)
 
     # Wrong rank: 2D for an RGBA layer.
     bad_rank = cp.asarray(np.zeros((32, 32), dtype=np.uint8))
     with pytest.raises(RuntimeError, match="rank"):
-        layer.submit_cuda_array(bad_rank)
+        layer.submit(bad_rank)
+
+    session.destroy()
+
+
+def test_stereo_quad_layer_round_trip():
+    """Stereo QuadLayer in offscreen mode renders the LEFT buffer (per
+    the documented single-view fallback). Distinct L/R sources confirm
+    we're sampling left and not accidentally swapping eyes."""
+    cp = pytest.importorskip("cupy")
+    try:
+        if cp.cuda.runtime.getDeviceCount() == 0:
+            pytest.skip("no CUDA device")
+    except cp.cuda.runtime.CUDARuntimeError:
+        pytest.skip("no CUDA device")
+
+    cfg = viz.VizSessionConfig()
+    cfg.mode = viz.DisplayMode.kOffscreen
+    cfg.window_width = 64
+    cfg.window_height = 64
+    session = viz.VizSession.create(cfg)
+
+    layer_cfg = viz.QuadLayerConfig()
+    layer_cfg.name = "stereo_cam"
+    layer_cfg.resolution = viz.Resolution(32, 32)
+    layer_cfg.stereo = True
+    layer_cfg.stereo_baseline_mm = 65.0  # roughly human IPD
+    layer = session.add_quad_layer(layer_cfg)
+
+    host_left = np.zeros((32, 32, 4), dtype=np.uint8)
+    host_left[..., 1] = 200  # G — distinguishable from right
+    host_left[..., 3] = 255
+    host_right = np.zeros((32, 32, 4), dtype=np.uint8)
+    host_right[..., 2] = 200  # B — distinguishable from left
+    host_right[..., 3] = 255
+
+    left = cp.asarray(host_left)
+    right = cp.asarray(host_right)
+    layer.submit(left, right)
+
+    session.render()
+    img = session.readback_to_host()
+    arr = np.asarray(img)
+    cx, cy = 32, 32
+    r, g, b, _a = arr[cy, cx]
+    # Offscreen / single-view backends draw the LEFT buffer per docs.
+    # If we accidentally bound the right descriptor here, this pixel
+    # would be blue-dominant instead of green-dominant.
+    assert g > r and g > b, (
+        f"expected green-dominant (left buffer), got ({r}, {g}, {b})"
+    )
+
+    session.destroy()
+
+
+def test_stereo_invariants():
+    """submit's strict mono/stereo contract: one-arg on stereo throws,
+    two-arg on mono throws. Validated end-to-end through the binding."""
+    cp = pytest.importorskip("cupy")
+    try:
+        if cp.cuda.runtime.getDeviceCount() == 0:
+            pytest.skip("no CUDA device")
+    except cp.cuda.runtime.CUDARuntimeError:
+        pytest.skip("no CUDA device")
+
+    cfg = viz.VizSessionConfig()
+    cfg.mode = viz.DisplayMode.kOffscreen
+    cfg.window_width = 64
+    cfg.window_height = 64
+    session = viz.VizSession.create(cfg)
+
+    mono_cfg = viz.QuadLayerConfig()
+    mono_cfg.name = "mono"
+    mono_cfg.resolution = viz.Resolution(32, 32)
+    # mono_cfg.stereo defaults to False.
+    mono_layer = session.add_quad_layer(mono_cfg)
+
+    stereo_cfg = viz.QuadLayerConfig()
+    stereo_cfg.name = "stereo"
+    stereo_cfg.resolution = viz.Resolution(32, 32)
+    stereo_cfg.stereo = True
+    stereo_layer = session.add_quad_layer(stereo_cfg)
+
+    blank = cp.asarray(np.zeros((32, 32, 4), dtype=np.uint8))
+
+    # Mono + right buffer → reject.
+    with pytest.raises(RuntimeError, match="mono"):
+        mono_layer.submit(blank, blank)
+
+    # Stereo + missing right → reject.
+    with pytest.raises(RuntimeError, match="stereo"):
+        stereo_layer.submit(blank)
+
+    session.destroy()
+
+
+def test_stereo_invalid_baseline_rejected():
+    """Non-finite baseline values must be rejected at ctor time."""
+    cfg = viz.VizSessionConfig()
+    cfg.mode = viz.DisplayMode.kOffscreen
+    cfg.window_width = 64
+    cfg.window_height = 64
+    session = viz.VizSession.create(cfg)
+
+    layer_cfg = viz.QuadLayerConfig()
+    layer_cfg.name = "bad_baseline"
+    layer_cfg.resolution = viz.Resolution(32, 32)
+    layer_cfg.stereo = True
+    layer_cfg.stereo_baseline_mm = float("nan")
+    with pytest.raises((RuntimeError, ValueError), match="stereo_baseline_mm"):
+        session.add_quad_layer(layer_cfg)
 
     session.destroy()