diff --git a/examples/camera_viz/camera_streamer.py b/examples/camera_viz/camera_streamer.py index 08ce48085..e3fe05104 100755 --- a/examples/camera_viz/camera_streamer.py +++ b/examples/camera_viz/camera_streamer.py @@ -30,7 +30,7 @@ import yaml from pipeline import FrameSource -from sources import build_local_camera +from sources import PairedFrameSource, build_local_camera from transports import RtpH264Sender, make_encoder logger = logging.getLogger("camera_streamer") @@ -45,16 +45,22 @@ SUPERVISOR_TICK_S = 1.0 -def _pick_mono_source(sources: List[FrameSource], camera_name: str) -> FrameSource: - """Mono-only sender: exactly one source per camera. Stereo cameras - aren't supported here pending per-eye QuadLayer binding.""" +def _eye_sources(sources: List[FrameSource], camera_name: str) -> List[FrameSource]: + """Normalize ``build_local_camera`` output into a 1-or-2 element list. + + Mono cameras → [src]. Stereo cameras → [left, right] unwrapped from + the PairedFrameSource wrapper that ``build_local_camera`` returns. + The streamer then fires one independent RTP stream per element.""" + if len(sources) == 1 and isinstance(sources[0], PairedFrameSource): + paired = sources[0] + return [paired.left, paired.right] if len(sources) != 1: names = [s.spec.name for s in sources] raise ValueError( f"camera {camera_name!r} produced {len(sources)} streams {names}; " - "only mono cameras are supported here. Set mode/stereo to mono/false." + "expected 1 (mono) or a PairedFrameSource (stereo)." ) - return sources[0] + return [sources[0]] class CameraSupervisor: @@ -96,66 +102,91 @@ def stop(self) -> None: return self._thread = None - def _build_sender(self) -> RtpH264Sender: - source = _pick_mono_source(build_local_camera(self._cfg), self._name) + def _build_senders(self) -> List[RtpH264Sender]: + eyes = _eye_sources(build_local_camera(self._cfg), self._name) rtp = self._cfg.get("rtp", {}) if "port" not in rtp: raise ValueError(f"camera {self._name!r} missing rtp.port") - encoder = make_encoder( - rtp.get("encoder", self._default_encoder), - width=int(self._cfg["width"]), - height=int(self._cfg["height"]), - bitrate=int(rtp.get("bitrate_mbps", 15)) * 1_000_000, - fps=int(self._cfg.get("fps", 30)), - gop=int(rtp["gop"]) if "gop" in rtp else None, - gpu_id=int(rtp.get("gpu_id", 0)), - ) - return RtpH264Sender( - source=source, - encoder=encoder, - host=self._host, - port=int(rtp["port"]), - width=int(self._cfg["width"]), - height=int(self._cfg["height"]), - fps=int(self._cfg.get("fps", 30)), - mtu=int(rtp.get("mtu", 1400)), - ) + is_stereo = len(eyes) == 2 + if is_stereo and "port_right" not in rtp: + raise ValueError( + f"camera {self._name!r}: stereo requires rtp.port_right (the " + "left eye goes to rtp.port, the right eye to rtp.port_right)" + ) + + def build_one(source: FrameSource, port: int) -> RtpH264Sender: + encoder = make_encoder( + rtp.get("encoder", self._default_encoder), + width=int(self._cfg["width"]), + height=int(self._cfg["height"]), + bitrate=int(rtp.get("bitrate_mbps", 15)) * 1_000_000, + fps=int(self._cfg.get("fps", 30)), + gop=int(rtp["gop"]) if "gop" in rtp else None, + gpu_id=int(rtp.get("gpu_id", 0)), + ) + return RtpH264Sender( + source=source, + encoder=encoder, + host=self._host, + port=port, + width=int(self._cfg["width"]), + height=int(self._cfg["height"]), + fps=int(self._cfg.get("fps", 30)), + mtu=int(rtp.get("mtu", 1400)), + ) + + if is_stereo: + return [ + build_one(eyes[0], int(rtp["port"])), + build_one(eyes[1], int(rtp["port_right"])), + ] + return [build_one(eyes[0], int(rtp["port"]))] def _run(self) -> None: attempt = 0 while not self._stop.is_set(): attempt += 1 - sender: Optional[RtpH264Sender] = None + senders: List[RtpH264Sender] = [] started_at: Optional[float] = None try: logger.info("camera %r: building (attempt %d)", self._name, attempt) - sender = self._build_sender() - sender.start() + senders = self._build_senders() + for s in senders: + s.start() started_at = time.monotonic() - logger.info( - "camera %r: streaming → %s:%s", - self._name, - self._host, - self._cfg["rtp"]["port"], - ) - # Poll sender liveness. If the send-loop thread dies + rtp = self._cfg.get("rtp", {}) + if len(senders) == 2: + logger.info( + "camera %r: streaming stereo → %s:%s (L) + %s:%s (R)", + self._name, + self._host, + rtp.get("port"), + self._host, + rtp.get("port_right"), + ) + else: + logger.info( + "camera %r: streaming → %s:%s", + self._name, + self._host, + rtp.get("port"), + ) + # Poll sender liveness. If any send-loop thread dies # after startup (GStreamer pipeline error, encoder - # crash, etc.) raise into the retry path; otherwise a - # silent-but-dead supervisor would keep the service - # "healthy" while nothing is being streamed. + # crash, etc.) raise into the retry path — for stereo + # we treat the pair atomically: if one eye drops, we + # restart both. while not self._stop.is_set(): self._stop.wait(timeout=SUPERVISOR_TICK_S) - if not sender.is_alive(): - raise RuntimeError("RtpH264Sender thread exited unexpectedly") + dead = [s for s in senders if not s.is_alive()] + if dead: + raise RuntimeError( + f"{len(dead)}/{len(senders)} RtpH264Sender thread(s) exited unexpectedly" + ) except KeyboardInterrupt: - # SIGINT during ``sender.start()`` arrives as KeyboardInterrupt - # in this thread; surface as a stop, not a retry. self._stop.set() break except Exception as e: - # ``camera_streamer`` is supposed to never exit. Log full - # traceback at debug and a one-liner at warning so journalctl - # stays readable while preserving the detail for triage. uptime = (time.monotonic() - started_at) if started_at else 0.0 logger.warning( "camera %r: failure after %.1fs uptime: %s — retrying in %.1fs", @@ -166,9 +197,9 @@ def _run(self) -> None: ) logger.debug("camera %r: traceback", self._name, exc_info=True) finally: - if sender is not None: + for s in senders: try: - sender.stop() + s.stop() except Exception: logger.debug( "camera %r: sender.stop() raised", self._name, exc_info=True diff --git a/examples/camera_viz/camera_viz.py b/examples/camera_viz/camera_viz.py index d4eb6a94c..368125991 100755 --- a/examples/camera_viz/camera_viz.py +++ b/examples/camera_viz/camera_viz.py @@ -20,6 +20,7 @@ import argparse import signal import sys +from dataclasses import dataclass from pathlib import Path from typing import List, Optional, Tuple @@ -29,9 +30,23 @@ from pipeline import FrameSource, VizRunner from placements import PlacementConfig, PlacementStrategy, build as build_placement -from sources import RtpH264Source, build_local_camera +from sources import PairedFrameSource, RtpH264Source, build_local_camera -SourceEntry = Tuple[FrameSource, Optional[PlacementStrategy]] + +@dataclass +class SourceEntry: + """One row in the layer plan: a source + its placement + stereo cfg. + + ``stereo`` and ``stereo_baseline_mm`` are pulled from the camera spec + (``cameras..stereo``) and the placement spec + (``placements..stereo_baseline_mm``) respectively. They drive + the QuadLayer Config when the layer is added to the session. + """ + + source: FrameSource + placement: Optional[PlacementStrategy] + stereo: bool = False + stereo_baseline_mm: float = 0.0 def _build_placement(spec: Optional[dict], is_xr: bool) -> Optional[PlacementStrategy]: @@ -82,19 +97,48 @@ def _placement_with_aspect( return _build_placement(spec, is_xr) +def _stereo_for(cam: dict, placements_cfg: dict) -> Tuple[bool, float]: + """Resolve stereo + baseline for one camera. + + ``stereo`` lives on the camera (so the producer side knows). The + rendering knob ``stereo_baseline_mm`` lives on the placement (it's + a display-time parameter). 0.0 means both eyes see the same world + quad — all parallax comes from the captured frames. + """ + stereo = bool(cam.get("stereo", False)) + pspec = placements_cfg.get(cam["name"]) or {} + baseline_mm = float(pspec.get("stereo_baseline_mm", 0.0)) + return stereo, baseline_mm + + def _build_local_entries(cfg: dict, is_xr: bool) -> List[SourceEntry]: """source=local: open each enabled camera directly.""" placements_cfg = cfg.get("display", {}).get("placements", {}) entries: List[SourceEntry] = [] for cam in _enabled_cameras(cfg): placement = _placement_with_aspect(placements_cfg.get(cam["name"]), cam, is_xr) + stereo, baseline_mm = _stereo_for(cam, placements_cfg) for source in build_local_camera(cam): - entries.append((source, placement)) + entries.append( + SourceEntry( + source=source, + placement=placement, + stereo=stereo, + stereo_baseline_mm=baseline_mm, + ) + ) return entries def _build_rtp_entries(cfg: dict, is_xr: bool) -> List[SourceEntry]: - """source=rtp: build an RTP listener per camera using its ``rtp.port``.""" + """source=rtp: build an RTP listener per camera using its ``rtp.port``. + + Stereo cameras open TWO listeners (rtp.port for left, rtp.port_right + for right) and pair them via PairedFrameSource. The wire path treats + the two eyes as independent streams — drift is acceptable (the user + accepted "no sync" for RTP stereo); paired-frame atomicity at the + QuadLayer mailbox is what stops torn pairs from reaching the GPU. + """ placements_cfg = cfg.get("display", {}).get("placements", {}) entries: List[SourceEntry] = [] for cam in _enabled_cameras(cfg): @@ -104,16 +148,52 @@ def _build_rtp_entries(cfg: dict, is_xr: bool) -> List[SourceEntry]: f"camera_viz: camera {cam.get('name')!r} missing rtp.port; " "required when source: rtp" ) - source = RtpH264Source( - name=cam["name"], - width=int(cam["width"]), - height=int(cam["height"]), - port=int(rtp["port"]), - rtp_buffer_size=int(rtp.get("rtp_buffer_size", 212992)), - gpu_id=int(rtp.get("gpu_id", 0)), - ) placement = _placement_with_aspect(placements_cfg.get(cam["name"]), cam, is_xr) - entries.append((source, placement)) + stereo, baseline_mm = _stereo_for(cam, placements_cfg) + + if stereo: + if "port_right" not in rtp: + raise ValueError( + f"camera_viz: stereo camera {cam.get('name')!r} missing " + "rtp.port_right (required when stereo + source: rtp)" + ) + left = RtpH264Source( + name=f"{cam['name']}.left", + width=int(cam["width"]), + height=int(cam["height"]), + port=int(rtp["port"]), + rtp_buffer_size=int(rtp.get("rtp_buffer_size", 212992)), + gpu_id=int(rtp.get("gpu_id", 0)), + ) + right = RtpH264Source( + name=f"{cam['name']}.right", + width=int(cam["width"]), + height=int(cam["height"]), + port=int(rtp["port_right"]), + rtp_buffer_size=int(rtp.get("rtp_buffer_size", 212992)), + gpu_id=int(rtp.get("gpu_id", 0)), + ) + source: FrameSource = PairedFrameSource( + name=cam["name"], left=left, right=right + ) + else: + source = RtpH264Source( + name=cam["name"], + width=int(cam["width"]), + height=int(cam["height"]), + port=int(rtp["port"]), + rtp_buffer_size=int(rtp.get("rtp_buffer_size", 212992)), + gpu_id=int(rtp.get("gpu_id", 0)), + ) + + entries.append( + SourceEntry( + source=source, + placement=placement, + stereo=stereo, + stereo_baseline_mm=baseline_mm, + ) + ) return entries @@ -168,14 +248,19 @@ def main(argv: Optional[list[str]] = None) -> int: # Build sources, layers, and placement strategies in parallel arrays. sources, layers, strategies = [], [], [] - for source, placement in entries: - sources.append(source) + for entry in entries: + sources.append(entry.source) layer_cfg = viz.QuadLayerConfig() - layer_cfg.name = source.spec.name - layer_cfg.resolution = viz.Resolution(source.spec.width, source.spec.height) + layer_cfg.name = entry.source.spec.name + layer_cfg.resolution = viz.Resolution( + entry.source.spec.width, entry.source.spec.height + ) layer_cfg.format = viz.PixelFormat.kRGBA8 + if entry.stereo: + layer_cfg.stereo = True + layer_cfg.stereo_baseline_mm = entry.stereo_baseline_mm layers.append(session.add_quad_layer(layer_cfg)) - strategies.append(placement) + strategies.append(entry.placement) print( f"camera_viz: source={source_mode}, mode={cfg.get('display', {}).get('mode')}, " diff --git a/examples/camera_viz/configs/synthetic_stereo.yaml b/examples/camera_viz/configs/synthetic_stereo.yaml new file mode 100644 index 000000000..cf2ee2aa0 --- /dev/null +++ b/examples/camera_viz/configs/synthetic_stereo.yaml @@ -0,0 +1,45 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Synthetic stereo source — exercises the stereo QuadLayer path without +# stereo camera hardware. Same animated pattern as synthetic.yaml, but +# with a horizontal pixel disparity between the eyes so the parallax is +# visible in XR. Window mode renders the LEFT eye (per the documented +# single-view fallback). + +source: local +streaming: + host: 127.0.0.1 +encoder: auto + +cameras: + - name: synth + enabled: true + type: synthetic + stereo: true # → SyntheticStereoSource + stereo QuadLayer + width: 1280 + height: 720 + fps: 60 + hue_speed_hz: 0.25 + disparity_px: 20 # horizontal pixel offset between eyes + rtp: + port: 5000 + bitrate_mbps: 15 + +display: + mode: xr # window | xr — stereo really only shines in xr + window: + width: 1280 + height: 720 + xr: + near_z: 0.05 + far_z: 100.0 + clear_color: [0.05, 0.05, 0.08, 1.0] + placements: + synth: + lock_mode: lazy + distance: 1.5 + stereo_baseline_mm: 0.0 # 0 → both eyes see the same world quad + # (parallax purely from the captured frames). + # Try 65.0 (typical IPD) to virtually push the + # screen further; -30.0 to bring it closer. diff --git a/examples/camera_viz/pipeline/interface.py b/examples/camera_viz/pipeline/interface.py index 6d057831e..39edb8bd3 100644 --- a/examples/camera_viz/pipeline/interface.py +++ b/examples/camera_viz/pipeline/interface.py @@ -35,12 +35,18 @@ class Frame: ``image`` is anything that exposes ``__cuda_array_interface__`` — CuPy / PyTorch / Numba arrays all work. ``stream`` is the producer's CUDA stream so the consumer can synchronize when it's not 0/default. + + Stereo: when the source produces a second eye, set ``image_right`` + to the right-eye buffer. ``image`` is the left eye. Both eyes MUST + come from the same capture instant — the renderer submits them as + an atomic pair. Mono sources leave ``image_right`` as None. """ image: Any timestamp_ns: int source_id: str stream: int = 0 + image_right: Optional[Any] = None class FrameSource(ABC): diff --git a/examples/camera_viz/pipeline/runner.py b/examples/camera_viz/pipeline/runner.py index cd9d1ec67..99768b47c 100644 --- a/examples/camera_viz/pipeline/runner.py +++ b/examples/camera_viz/pipeline/runner.py @@ -5,7 +5,7 @@ VizRunner owns two threads: * **submit thread** — polls each source's ``latest()`` at ~1 kHz, - calls ``layer.submit_cuda_array()`` on new frames, and notifies a + calls ``layer.submit()`` on new frames, and notifies a condition variable. * **render thread** — waits on the condition. Wakes within ~µs of a new publish and calls ``session.render()``. A safety-net timeout @@ -200,7 +200,13 @@ def _submit_loop_inner(self) -> None: if not device_pinned: self._pin_to_device(frame) device_pinned = True - layer.submit_cuda_array(frame.image, stream=frame.stream) + # Stereo dispatch: when the source carries a second eye, + # hand both to QuadLayer in one atomic submit. The layer + # validates that its stereo-ness matches. + if frame.image_right is not None: + layer.submit(frame.image, frame.image_right, stream=frame.stream) + else: + layer.submit(frame.image, stream=frame.stream) published_any = True if published_any: with self._data_cond: diff --git a/examples/camera_viz/sources/__init__.py b/examples/camera_viz/sources/__init__.py index fe1484bb3..8243ff704 100644 --- a/examples/camera_viz/sources/__init__.py +++ b/examples/camera_viz/sources/__init__.py @@ -12,16 +12,19 @@ from pipeline import FrameSource +from ._helpers import PairedFrameSource from .oakd import OakdSource from .rtp_h264 import RtpH264Source -from .synthetic import SyntheticSource +from .synthetic import SyntheticSource, SyntheticStereoSource from .v4l2 import V4l2Source from .zed import ZedSource __all__ = [ "OakdSource", + "PairedFrameSource", "RtpH264Source", "SyntheticSource", + "SyntheticStereoSource", "V4l2Source", "ZedSource", "build_local_camera", @@ -31,15 +34,33 @@ def build_local_camera(spec: dict) -> List[FrameSource]: """Build the local FrameSource(s) for one ``cameras:`` entry. - Returns a list because multi-stream cameras (OAK-D stereo, ZED stereo) - fan out to one source per stream. Single-stream cameras return [source]. + Mono cameras return [source]. Stereo cameras (``stereo: true``) + return [PairedFrameSource] — a single FrameSource that emits a + ``Frame`` with both ``image`` (left eye) and ``image_right`` populated. + The camera_viz pipeline routes that to ``QuadLayer.submit(left, right)``. + For v4l2 the ``stereo`` toggle is rejected (USB cameras are mono); + OAK-D / ZED / synthetic each have their own native paths. + Shared by camera_viz.py and camera_streamer.py — keep the schema stable. """ kind = spec["type"] + stereo = bool(spec.get("stereo", False)) + name = spec["name"] if kind == "synthetic": + if stereo: + return [ + SyntheticStereoSource( + name=name, + width=int(spec["width"]), + height=int(spec["height"]), + fps=float(spec.get("fps", 60.0)), + hue_speed_hz=float(spec.get("hue_speed_hz", 0.25)), + disparity_px=int(spec.get("disparity_px", 20)), + ) + ] return [ SyntheticSource( - name=spec["name"], + name=name, width=int(spec["width"]), height=int(spec["height"]), fps=float(spec.get("fps", 60.0)), @@ -47,9 +68,14 @@ def build_local_camera(spec: dict) -> List[FrameSource]: ) ] if kind == "v4l2": + if stereo: + raise ValueError( + f"build_local_camera: v4l2 camera {name!r} cannot be stereo " + "(single-stream USB / UVC). Use type: oakd or zed." + ) return [ V4l2Source( - name=spec["name"], + name=name, device=spec.get("device", "/dev/video0"), width=int(spec["width"]), height=int(spec["height"]), @@ -58,10 +84,13 @@ def build_local_camera(spec: dict) -> List[FrameSource]: ) ] if kind == "oakd": - return list( + # ``stereo: true`` is a shorthand for the OAK-D ``mode: stereo``. + # If the user passed both, an explicit mode wins. + mode = spec.get("mode", "stereo" if stereo else "mono") + eyes = list( OakdSource.build( - base_name=spec["name"], - mode=spec.get("mode", "mono"), + base_name=name, + mode=mode, device_id=spec.get("device_id", ""), width=int(spec["width"]), height=int(spec["height"]), @@ -72,17 +101,37 @@ def build_local_camera(spec: dict) -> List[FrameSource]: rgb_fps=int(spec.get("rgb_fps", 0)), ) ) + if stereo or mode in ("stereo", "stereo_rgb"): + # OakdSource.build returns 2 per-eye sources in stereo and 3 + # in stereo_rgb; we pair the first two (left + right). The + # extra RGB stream of stereo_rgb is intentionally dropped — + # the QuadLayer takes exactly two eyes. + if len(eyes) < 2: + raise ValueError( + f"build_local_camera: oakd {name!r} stereo mode produced {len(eyes)} " + "source(s); expected at least 2" + ) + return [PairedFrameSource(name=name, left=eyes[0], right=eyes[1])] + return eyes if kind == "zed": - return list( + eyes = list( ZedSource.build( - base_name=spec["name"], + base_name=name, resolution=spec.get("resolution", "HD720"), fps=int(spec.get("fps", 30)), serial_number=int(spec.get("serial_number", 0)), bus_type=spec.get("bus_type", "usb"), - stereo=bool(spec.get("stereo", False)), + stereo=stereo, ) ) + if stereo: + if len(eyes) != 2: + raise ValueError( + f"build_local_camera: zed {name!r} stereo produced {len(eyes)} " + "source(s); expected 2" + ) + return [PairedFrameSource(name=name, left=eyes[0], right=eyes[1])] + return eyes raise ValueError( f"build_local_camera: unknown camera type {kind!r} " "(known: synthetic, v4l2, oakd, zed)" diff --git a/examples/camera_viz/sources/_helpers.py b/examples/camera_viz/sources/_helpers.py index 5d6c316b1..19f749931 100644 --- a/examples/camera_viz/sources/_helpers.py +++ b/examples/camera_viz/sources/_helpers.py @@ -15,7 +15,7 @@ - kicks off an async ``cudaMemcpyAsync`` H2D + color-convert on the producer stream, - synchronizes the stream so the consumer (renderer thread) can safely - read the GPU buffer via ``submit_cuda_array(stream=0)`` — until the + read the GPU buffer via ``submit(stream=0)`` — until the viz binding grows cross-stream sync (M6 review item #2), this is the only correct way to hand the buffer across threads, - flips the write/publish index atomically under a short lock. @@ -317,3 +317,81 @@ def _mark_disconnected(self) -> None: pass self._connected = False self._last_reconnect_attempt_s = time.monotonic() + + +class PairedFrameSource(FrameSource): + """Pair two per-eye FrameSources into a single stereo source. + + Owns no thread of its own. latest() polls both children: when + each has produced a fresh Frame, emits one combined Frame with the + left source's image in Frame.image and the right source's image + in Frame.image_right. Skips the publish (returns None) if either + eye hasn't produced — the QuadLayer mailbox keeps the previous + matched pair until both eyes catch up. Acceptable because both eyes + share the camera producer (same SDK grab cycle), so they re-sync + within one frame. + + Both children MUST agree on (width, height, pixel_format). + """ + + def __init__(self, name: str, left: FrameSource, right: FrameSource) -> None: + if left.spec.width != right.spec.width or left.spec.height != right.spec.height: + raise ValueError( + f"PairedFrameSource: left/right resolution mismatch " + f"({left.spec.width}x{left.spec.height} vs " + f"{right.spec.width}x{right.spec.height})" + ) + if left.spec.pixel_format != right.spec.pixel_format: + raise ValueError( + f"PairedFrameSource: left/right pixel_format mismatch " + f"({left.spec.pixel_format!r} vs {right.spec.pixel_format!r})" + ) + self._spec = SourceSpec( + name=name, + width=left.spec.width, + height=left.spec.height, + pixel_format=left.spec.pixel_format, + ) + self._left = left + self._right = right + + @property + def spec(self) -> SourceSpec: + return self._spec + + @property + def left(self) -> FrameSource: + """Per-eye left source. Used by camera_streamer.py to fan out + two independent RTP streams for stereo cameras (paired + atomicity comes back at the receiver, not on the wire).""" + return self._left + + @property + def right(self) -> FrameSource: + return self._right + + def start(self) -> None: + self._left.start() + self._right.start() + + def stop(self) -> None: + self._left.stop() + self._right.stop() + + def latest(self) -> Optional[Frame]: + # Read both. We only publish when BOTH eyes have produced — + # otherwise the renderer would see a mismatched pair (or an + # update on one eye only, which submit() would treat as the + # left of a new pair with the previous right). Returning None + # leaves the layer rendering the prior matched pair. + fl = self._left.latest() + fr = self._right.latest() + if fl is None or fr is None: + return None + return Frame( + image=fl.image, + image_right=fr.image, + timestamp_ns=fl.timestamp_ns, + source_id=self._spec.name, + stream=fl.stream, + ) diff --git a/examples/camera_viz/sources/synthetic.py b/examples/camera_viz/sources/synthetic.py index e6846a876..777070f44 100644 --- a/examples/camera_viz/sources/synthetic.py +++ b/examples/camera_viz/sources/synthetic.py @@ -142,3 +142,134 @@ def _produce_loop(self) -> None: # Coarse pacing — CuPy fill kernels at 1080p are well under # the budget, so a simple sleep keeps us at target fps. time.sleep(self._frame_interval_s) + + +class SyntheticStereoSource(FrameSource): + """GPU-resident stereo source emitting a paired RGBA8 test pattern. + + Same animated pattern as ``SyntheticSource``, but with a horizontal + pixel shift between the eyes so the disparity is visible — useful + for sanity-checking a stereo QuadLayer end-to-end without a real + camera. The left/right kernels run on the same stream and are + pre-synced before publish, so the renderer always reads a + same-instant pair. + """ + + def __init__( + self, + name: str, + width: int, + height: int, + fps: float = 60.0, + hue_speed_hz: float = 0.25, + disparity_px: int = 20, + ) -> None: + try: + import cupy as cp + except ImportError as e: + raise RuntimeError( + "SyntheticStereoSource requires CuPy (cupy-cuda12x). Install via " + "`uv pip install cupy-cuda12x` or skip this source." + ) from e + + self._cp = cp + self._spec = SourceSpec( + name=name, width=width, height=height, pixel_format="rgba8" + ) + self._frame_interval_s = 1.0 / fps if fps > 0.0 else 0.0 + self._hue_speed_hz = hue_speed_hz + self._disparity_px = int(disparity_px) + + # Triple-buffer per eye. Indices stay in lock-step so latest() + # always returns matching (left[i], right[i]). + self._left = [cp.zeros((height, width, 4), dtype=cp.uint8) for _ in range(3)] + self._right = [cp.zeros((height, width, 4), dtype=cp.uint8) for _ in range(3)] + self._write_idx = 0 + self._publish_idx: int = -1 + self._consumed_idx: int = -2 + self._lock = threading.Lock() + + self._stop = threading.Event() + self._thread: Optional[threading.Thread] = None + self._t0_ns = 0 + + @property + def spec(self) -> SourceSpec: + return self._spec + + def start(self) -> None: + if self._thread is not None: + return + self._stop.clear() + self._t0_ns = time.monotonic_ns() + self._thread = threading.Thread( + target=self._produce_loop, + name=f"synth_stereo_{self._spec.name}", + daemon=False, + ) + self._thread.start() + + def stop(self) -> None: + self._stop.set() + if self._thread is not None: + self._thread.join() + self._thread = None + + def latest(self) -> Optional[Frame]: + with self._lock: + if self._publish_idx < 0 or self._publish_idx == self._consumed_idx: + return None + idx = self._publish_idx + self._consumed_idx = idx + return Frame( + image=self._left[idx], + image_right=self._right[idx], + timestamp_ns=time.monotonic_ns(), + source_id=self._spec.name, + stream=0, + ) + + def _produce_loop(self) -> None: + cp = self._cp + h, w = self._spec.height, self._spec.width + with cp.cuda.Device(int(self._left[0].device.id)): + y_grid = cp.arange(h, dtype=cp.float32).reshape(h, 1) + x_grid_l = cp.arange(w, dtype=cp.float32).reshape(1, w) + # Right-eye coord shifted by disparity_px so the same content + # appears at a slightly different x position. Visible parallax + # confirms the renderer routed both eyes correctly. + x_grid_r = (cp.arange(w, dtype=cp.float32) - self._disparity_px).reshape( + 1, w + ) + diag_l = (x_grid_l + y_grid) / float(w + h) + diag_r = (x_grid_r + y_grid) / float(w + h) + + while not self._stop.is_set(): + t = (time.monotonic_ns() - self._t0_ns) * 1e-9 + phase = (t * self._hue_speed_hz) % 1.0 + + def fill(buf, diag): + r = (cp.sin((diag + phase) * 6.2831853) * 127.0 + 128.0).astype( + cp.uint8 + ) + g = ( + cp.sin((diag + phase + 0.3333) * 6.2831853) * 127.0 + 128.0 + ).astype(cp.uint8) + b = ( + cp.sin((diag + phase + 0.6667) * 6.2831853) * 127.0 + 128.0 + ).astype(cp.uint8) + buf[..., 0] = r + buf[..., 1] = g + buf[..., 2] = b + buf[..., 3] = 255 + + fill(self._left[self._write_idx], diag_l) + fill(self._right[self._write_idx], diag_r) + cp.cuda.Stream.null.synchronize() + + with self._lock: + self._publish_idx = self._write_idx + self._write_idx = (self._write_idx + 1) % len(self._left) + + if self._frame_interval_s > 0.0: + time.sleep(self._frame_interval_s) diff --git a/src/viz/layers/cpp/inc/viz/layers/quad_layer.hpp b/src/viz/layers/cpp/inc/viz/layers/quad_layer.hpp index 96c7ee2a4..4faacdea8 100644 --- a/src/viz/layers/cpp/inc/viz/layers/quad_layer.hpp +++ b/src/viz/layers/cpp/inc/viz/layers/quad_layer.hpp @@ -42,6 +42,14 @@ class VkContext; // bump kMaxFramesInFlight and kSlotCount together. // // Memory: kSlotCount × width × height × bpp (~40 MB at 1080p RGBA8). +// +// Stereo: when Config::stereo is true, each slot owns a PAIR of +// DeviceImages (left + right). The two-arg submit() does both +// memcpy2Ds + the cuda_done_writing signal on a single CUDA stream, +// so stream ordering guarantees the renderer never sees a half- +// updated pair. Memory doubles. In kXr, record() binds the left +// descriptor for view 0 and the right for view 1; window/offscreen +// (single view) draws the left buffer only. class QuadLayer : public LayerBase { public: @@ -84,6 +92,25 @@ class QuadLayer : public LayerBase // Set to false to save the ~33% extra image memory on layers // that are always sampled at native resolution. bool generate_mipmaps = true; + + // Stereo mode. When true, the layer owns a paired left+right + // mailbox; submit MUST be called with both buffers. In kXr, + // view 0 (left eye) samples the left buffer and view 1 (right + // eye) the right. In window/offscreen the left buffer is drawn + // and the right is allocated but unused. Memory doubles. + bool stereo = false; + + // Horizontal disparity between the left-plane (in the left eye) + // and the right-plane (in the right eye), in millimeters along + // the placement's local +x axis. Each eye's quad center is + // shifted by ±stereo_baseline_mm/2 (left eye: −, right eye: +). + // 0 means both eyes see the same world-space quad; all stereo + // cues come from the captured images. Positive values let the + // planes splay outward (virtual screen further back); negative + // makes them cross (closer to viewer). Ignored when stereo is + // false or outside kXr. mm-scale chosen because typical real- + // world IPDs and camera baselines are 50–80 mm. + float stereo_baseline_mm = 0.0f; }; // Hard cap on the mip chain when generate_mipmaps is enabled. @@ -110,7 +137,17 @@ class QuadLayer : public LayerBase // producer wrapping its mailbox could overwrite src.data while our // async memcpy was still reading. Cost: ~0.5 ms per 1080p call on // the calling thread; the render path is unaffected. + // + // Mono layer (Config::stereo == false): use the one-arg overload. + // The two-arg overload throws std::logic_error. + // + // Stereo layer (Config::stereo == true): use the two-arg overload. + // Both buffers are copied + the single cuda_done_writing signal is + // emitted on the SAME ``stream``, so stream ordering guarantees + // the renderer never reads a half-matched pair. The one-arg + // overload throws std::logic_error. void submit(const VizBuffer& src, cudaStream_t stream = 0); + void submit(const VizBuffer& left, const VizBuffer& right, cudaStream_t stream = 0); // Pre-pass slot: promote latest_ -> in_use_[in_flight_slot] AND // (when generate_mipmaps is on) emit the mip-chain blits on the @@ -141,8 +178,10 @@ class QuadLayer : public LayerBase void set_placement(std::optional placement) noexcept; std::optional placement() const noexcept; - // Diagnostic accessor; nullptr for slots beyond kSlotCount. + // Diagnostic accessor; nullptr for slots beyond kSlotCount, and + // device_image_right is null on mono layers. const DeviceImage* device_image(uint32_t slot) const noexcept; + const DeviceImage* device_image_right(uint32_t slot) const noexcept; private: void init(); @@ -178,8 +217,10 @@ class QuadLayer : public LayerBase // Number of mip levels per DeviceImage slot. 1 when mips disabled. uint32_t mip_levels_ = 1; - // One DeviceImage per mailbox slot. + // One DeviceImage per mailbox slot. ``slots_`` is the left/mono + // image; ``slots_right_`` only allocated when Config::stereo. std::array, kSlotCount> slots_; + std::array, kSlotCount> slots_right_; VkSampler sampler_ = VK_NULL_HANDLE; VkDescriptorSetLayout descriptor_set_layout_ = VK_NULL_HANDLE; @@ -188,7 +229,9 @@ class QuadLayer : public LayerBase VkDescriptorPool descriptor_pool_ = VK_NULL_HANDLE; // One descriptor set per slot — record() binds the one for in_use_. + // ``descriptor_sets_right_`` is only populated when Config::stereo. std::array descriptor_sets_{}; + std::array descriptor_sets_right_{}; // Mailbox: latest_ = most recent publish. in_use_[i] = slot the // i-th in-flight frame is sampling. Atomic so producer and diff --git a/src/viz/layers/cpp/quad_layer.cpp b/src/viz/layers/cpp/quad_layer.cpp index 4678fef64..f226d8cf1 100644 --- a/src/viz/layers/cpp/quad_layer.cpp +++ b/src/viz/layers/cpp/quad_layer.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -113,6 +114,10 @@ QuadLayer::QuadLayer(const VkContext& ctx, VkRenderPass render_pass, Config conf throw std::invalid_argument("QuadLayer: Placement::size_meters must be > 0 in both components"); } } + if (!std::isfinite(config_.stereo_baseline_mm)) + { + throw std::invalid_argument("QuadLayer: stereo_baseline_mm must be finite"); + } // Resolve mip count: capped chain when enabled, single level // otherwise. The cap (kMaxMipLevels) keeps the per-frame blit @@ -158,6 +163,13 @@ void QuadLayer::init() { slot = DeviceImage::create(*ctx_, config_.resolution, config_.format, mip_levels_); } + if (config_.stereo) + { + for (auto& slot : slots_right_) + { + slot = DeviceImage::create(*ctx_, config_.resolution, config_.format, mip_levels_); + } + } create_sampler(); create_descriptor_set_layout(); create_pipeline_layout(); @@ -186,14 +198,20 @@ void QuadLayer::destroy() { slot.reset(); } + for (auto& slot : slots_right_) + { + slot.reset(); + } return; } if (descriptor_pool_ != VK_NULL_HANDLE) { - // descriptor_sets_ are freed implicitly with the pool. + // descriptor_sets_ + descriptor_sets_right_ are freed implicitly + // with the pool. vkDestroyDescriptorPool(device, descriptor_pool_, nullptr); descriptor_pool_ = VK_NULL_HANDLE; descriptor_sets_.fill(VK_NULL_HANDLE); + descriptor_sets_right_.fill(VK_NULL_HANDLE); } if (pipeline_ != VK_NULL_HANDLE) { @@ -219,6 +237,10 @@ void QuadLayer::destroy() { slot.reset(); } + for (auto& slot : slots_right_) + { + slot.reset(); + } latest_.store(kSlotNone, std::memory_order_release); for (auto& e : in_use_) { @@ -255,6 +277,15 @@ const DeviceImage* QuadLayer::device_image(uint32_t slot) const noexcept return slots_[slot].get(); } +const DeviceImage* QuadLayer::device_image_right(uint32_t slot) const noexcept +{ + if (slot >= kSlotCount) + { + return nullptr; + } + return slots_right_[slot].get(); +} + uint8_t QuadLayer::pick_free_slot(uint8_t latest, const std::array, kMaxFramesInFlight>& in_use) const noexcept { @@ -285,25 +316,56 @@ uint8_t QuadLayer::pick_free_slot(uint8_t latest, return kSlotNone; } -void QuadLayer::submit(const VizBuffer& src, cudaStream_t stream) +namespace { - require_alive(slots_[0], "submit"); - if (src.space != MemorySpace::kDevice) +// Shared per-buffer validation for the submit overloads. ``label`` is +// the caller's tag (e.g. "src", "left", "right") so the error message +// names which buffer failed in the stereo case. +void validate_submit_buffer(const VizBuffer& buf, const QuadLayer::Config& cfg, const char* label) +{ + if (buf.space != MemorySpace::kDevice) + { + throw std::invalid_argument(std::string("QuadLayer::submit: ") + label + " must be MemorySpace::kDevice"); + } + if (buf.width != cfg.resolution.width || buf.height != cfg.resolution.height) + { + throw std::invalid_argument(std::string("QuadLayer::submit: ") + label + + " dimensions do not match layer resolution"); + } + if (buf.format != cfg.format) { - throw std::invalid_argument("QuadLayer::submit: src must be MemorySpace::kDevice"); + throw std::invalid_argument(std::string("QuadLayer::submit: ") + label + " format does not match layer format"); } - if (src.width != config_.resolution.width || src.height != config_.resolution.height) + if (buf.data == nullptr) { - throw std::invalid_argument("QuadLayer::submit: src dimensions do not match layer resolution"); + throw std::invalid_argument(std::string("QuadLayer::submit: ") + label + ".data is null"); } - if (src.format != config_.format) +} + +// Queue an async D2D copy of ``buf`` → ``image.cuda_array()`` on +// ``stream``. Shared between the mono and stereo submit paths. +void enqueue_copy(const VizBuffer& buf, DeviceImage& image, cudaStream_t stream) +{ + const size_t row_bytes = static_cast(buf.width) * bytes_per_pixel(buf.format); + const size_t src_pitch = (buf.pitch == 0) ? row_bytes : buf.pitch; + const cudaError_t err = cudaMemcpy2DToArrayAsync( + image.cuda_array(), 0, 0, buf.data, src_pitch, row_bytes, buf.height, cudaMemcpyDeviceToDevice, stream); + if (err != cudaSuccess) { - throw std::invalid_argument("QuadLayer::submit: src format does not match layer format"); + throw std::runtime_error(std::string("QuadLayer::submit: cudaMemcpy2DToArrayAsync failed: ") + + cudaGetErrorString(err)); } - if (src.data == nullptr) +} +} // namespace + +void QuadLayer::submit(const VizBuffer& src, cudaStream_t stream) +{ + require_alive(slots_[0], "submit"); + if (config_.stereo) { - throw std::invalid_argument("QuadLayer::submit: src.data is null"); + throw std::logic_error("QuadLayer::submit: this layer is stereo — use the two-arg submit(left, right) overload"); } + validate_submit_buffer(src, config_, "src"); const uint8_t latest = latest_.load(std::memory_order_acquire); const uint8_t slot = pick_free_slot(latest, in_use_); @@ -316,11 +378,7 @@ void QuadLayer::submit(const VizBuffer& src, cudaStream_t stream) DeviceImage& image = *slots_[slot]; check_cuda(cudaSetDevice(ctx_->cuda_device_id()), "cudaSetDevice"); - const size_t row_bytes = static_cast(src.width) * bytes_per_pixel(src.format); - const size_t src_pitch = (src.pitch == 0) ? row_bytes : src.pitch; - check_cuda(cudaMemcpy2DToArrayAsync(image.cuda_array(), 0, 0, src.data, src_pitch, row_bytes, src.height, - cudaMemcpyDeviceToDevice, stream), - "cudaMemcpy2DToArrayAsync"); + enqueue_copy(src, image, stream); image.cuda_signal_write_done(stream); // Wait for the D2D copy to complete before returning. Sources publish @@ -335,6 +393,40 @@ void QuadLayer::submit(const VizBuffer& src, cudaStream_t stream) latest_.store(slot, std::memory_order_release); } +void QuadLayer::submit(const VizBuffer& left, const VizBuffer& right, cudaStream_t stream) +{ + require_alive(slots_[0], "submit"); + if (!config_.stereo) + { + throw std::logic_error("QuadLayer::submit: this layer is mono — call submit(src) with a single buffer"); + } + validate_submit_buffer(left, config_, "left"); + validate_submit_buffer(right, config_, "right"); + + const uint8_t latest = latest_.load(std::memory_order_acquire); + const uint8_t slot = pick_free_slot(latest, in_use_); + if (slot == kSlotNone) + { + return; + } + DeviceImage& image_l = *slots_[slot]; + DeviceImage& image_r = *slots_right_[slot]; + + check_cuda(cudaSetDevice(ctx_->cuda_device_id()), "cudaSetDevice"); + // Both copies on the same stream + a single signal on the left's + // semaphore. Stream ordering guarantees the right copy completes + // before the signal fires, so the renderer waiting on the left's + // semaphore implies the right is ready too. No second semaphore + // needed — by construction the renderer cannot see a half-pair. + enqueue_copy(left, image_l, stream); + enqueue_copy(right, image_r, stream); + image_l.cuda_signal_write_done(stream); + + check_cuda(cudaStreamSynchronize(stream), "cudaStreamSynchronize(submit-stereo)"); + + latest_.store(slot, std::memory_order_release); +} + void QuadLayer::record_mip_generation(VkCommandBuffer cmd, DeviceImage& image) { // Mip-chain regeneration via vkCmdBlitImage. The image lives in @@ -469,10 +561,17 @@ void QuadLayer::record_pre_render_pass(VkCommandBuffer cmd, uint32_t in_flight_s // Mip generation (if configured). Reads level 0 written by CUDA, // writes levels 1..N-1, ends with the whole image back in - // SHADER_READ_ONLY for the sampler in record(). + // SHADER_READ_ONLY for the sampler in record(). For stereo we + // regenerate both eyes' chains; the right image's level 0 was + // written by the same producer stream that signaled the left's + // semaphore, so the queue's TRANSFER-stage wait already covers it. if (mip_levels_ > 1) { record_mip_generation(cmd, *slots_[cur]); + if (config_.stereo) + { + record_mip_generation(cmd, *slots_right_[cur]); + } } } @@ -494,8 +593,6 @@ void QuadLayer::record(VkCommandBuffer cmd, } vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_); - vkCmdBindDescriptorSets( - cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout_, 0, 1, &descriptor_sets_[cur], 0, nullptr); // Snapshot under lock so set_placement() can run concurrently. std::optional placement; @@ -514,15 +611,45 @@ void QuadLayer::record(VkCommandBuffer cmd, // xr: 4-vertex triangle strip; else: 3-vertex NDC-cover triangle. const uint32_t vertex_count = xr_mode ? 4u : 3u; + // Stereo baseline offset along the placement's local +x axis. ±half + // the configured baseline, signed per eye: left eye gets the + // negative offset, right eye the positive. Direction in world space + // is the placement orientation rotating local +x. We evaluate it + // once outside the per-view loop since orientation is per-frame + // constant. Skipped when the layer is mono (baseline doesn't apply) + // OR outside kXr (both eyes converge to a single view, no signed + // disambiguation possible). Zero baseline elides to the mono MVP. + const bool apply_baseline = xr_mode && config_.stereo && config_.stereo_baseline_mm != 0.0f; + glm::vec3 baseline_axis_ws{ 0.0f }; + if (apply_baseline) + { + baseline_axis_ws = glm::mat3_cast(placement->pose.orientation) * glm::vec3(1.0f, 0.0f, 0.0f); + } + // Compositor pre-binds the layer's scissor; we set per-view viewport. - for (const auto& view : views) + for (size_t view_idx = 0; view_idx < views.size(); ++view_idx) { + const auto& view = views[view_idx]; bind_view_viewport(cmd, view); + // Stereo: view 0 → left descriptor, view 1 → right descriptor. + // Mono OR single-view backends (window/offscreen): always left. + const bool sample_right = config_.stereo && view_idx == 1; + VkDescriptorSet ds = sample_right ? descriptor_sets_right_[cur] : descriptor_sets_[cur]; + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout_, 0, 1, &ds, 0, nullptr); + QuadShaderData data{}; if (xr_mode) { - const glm::mat4 mvp = placement_mvp(*placement, view); + Config::Placement eye_placement = *placement; + if (apply_baseline) + { + const float sign = (view_idx == 0) ? -1.0f : +1.0f; + // 0.5 to halve the disparity per eye, 0.001 to convert mm → m + // (placement.pose.position is in world meters). + eye_placement.pose.position += sign * (config_.stereo_baseline_mm * 0.0005f) * baseline_axis_ws; + } + const glm::mat4 mvp = placement_mvp(eye_placement, view); std::memcpy(data.mvp, &mvp[0][0], sizeof(data.mvp)); data.mode = 1; } @@ -755,13 +882,16 @@ void QuadLayer::create_pipeline() void QuadLayer::create_descriptor_pool() { + // Stereo needs twice the sets: one per slot per eye. + const uint32_t set_count = config_.stereo ? (kSlotCount * 2u) : kSlotCount; + VkDescriptorPoolSize pool_size{}; pool_size.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - pool_size.descriptorCount = kSlotCount; + pool_size.descriptorCount = set_count; VkDescriptorPoolCreateInfo info{}; info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - info.maxSets = kSlotCount; + info.maxSets = set_count; info.poolSizeCount = 1; info.pPoolSizes = &pool_size; check_vk(vkCreateDescriptorPool(ctx_->device(), &info, nullptr, &descriptor_pool_), "vkCreateDescriptorPool"); @@ -778,14 +908,24 @@ void QuadLayer::allocate_descriptor_sets() info.descriptorSetCount = kSlotCount; info.pSetLayouts = layouts.data(); check_vk(vkAllocateDescriptorSets(ctx_->device(), &info, descriptor_sets_.data()), "vkAllocateDescriptorSets"); + if (config_.stereo) + { + check_vk(vkAllocateDescriptorSets(ctx_->device(), &info, descriptor_sets_right_.data()), + "vkAllocateDescriptorSets(right)"); + } } void QuadLayer::update_descriptor_sets() { - // One write per slot, each pointing at the slot's own image view. - std::array image_infos{}; - std::array writes{}; - for (uint32_t i = 0; i < kSlotCount; ++i) + // One write per slot per eye, pointing at the eye-specific image + // view. Stereo doubles the write count. + const uint32_t per_eye = kSlotCount; + const uint32_t total = config_.stereo ? (per_eye * 2u) : per_eye; + + std::array image_infos{}; + std::array writes{}; + + for (uint32_t i = 0; i < per_eye; ++i) { image_infos[i].sampler = sampler_; image_infos[i].imageView = slots_[i]->vk_image_view(); @@ -799,7 +939,25 @@ void QuadLayer::update_descriptor_sets() writes[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; writes[i].pImageInfo = &image_infos[i]; } - vkUpdateDescriptorSets(ctx_->device(), kSlotCount, writes.data(), 0, nullptr); + if (config_.stereo) + { + for (uint32_t i = 0; i < per_eye; ++i) + { + const uint32_t k = per_eye + i; + image_infos[k].sampler = sampler_; + image_infos[k].imageView = slots_right_[i]->vk_image_view(); + image_infos[k].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + writes[k].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[k].dstSet = descriptor_sets_right_[i]; + writes[k].dstBinding = 0; + writes[k].dstArrayElement = 0; + writes[k].descriptorCount = 1; + writes[k].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + writes[k].pImageInfo = &image_infos[k]; + } + } + vkUpdateDescriptorSets(ctx_->device(), total, writes.data(), 0, nullptr); } } // namespace viz diff --git a/src/viz/layers_tests/cpp/test_quad_layer.cpp b/src/viz/layers_tests/cpp/test_quad_layer.cpp index ed540e11a..3f18bd96a 100644 --- a/src/viz/layers_tests/cpp/test_quad_layer.cpp +++ b/src/viz/layers_tests/cpp/test_quad_layer.cpp @@ -14,8 +14,11 @@ #include #include +#include #include +#include #include +#include using viz::DeviceImage; using viz::PixelFormat; @@ -315,3 +318,308 @@ TEST_CASE("QuadLayer visibility toggle is independent of pipeline state", "[gpu] layer.set_visible(true); CHECK(layer.is_visible()); } + +// ──────────────────────────────────────────────────────────────────── +// Stereo (Config::stereo == true) +// ──────────────────────────────────────────────────────────────────── + +TEST_CASE("QuadLayer ctor rejects non-finite stereo_baseline_mm", "[unit][quad_layer]") +{ + VkContext ctx; + QuadLayer::Config cfg; + cfg.resolution = { 64, 64 }; + cfg.stereo = true; + cfg.stereo_baseline_mm = std::numeric_limits::quiet_NaN(); + CHECK_THROWS_AS(QuadLayer(ctx, VK_NULL_HANDLE, cfg), std::invalid_argument); +} + +TEST_CASE("QuadLayer stereo allocates paired DeviceImages for every slot", "[gpu][quad_layer][stereo]") +{ + if (!is_gpu_available()) + { + SKIP("No Vulkan-capable GPU available"); + } + VkContext ctx; + ctx.init({}); + auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 64, 64 } }); + + QuadLayer::Config cfg; + cfg.resolution = { 64, 64 }; + cfg.stereo = true; + QuadLayer layer(ctx, target->render_pass(), cfg); + + for (uint32_t i = 0; i < QuadLayer::kSlotCount; ++i) + { + REQUIRE(layer.device_image(i) != nullptr); + REQUIRE(layer.device_image_right(i) != nullptr); + // Distinct backing images — submit must NOT alias the two eyes. + CHECK(layer.device_image(i)->vk_image() != layer.device_image_right(i)->vk_image()); + CHECK(layer.device_image(i)->cuda_array() != layer.device_image_right(i)->cuda_array()); + } +} + +TEST_CASE("QuadLayer mono device_image_right is null", "[gpu][quad_layer][stereo]") +{ + if (!is_gpu_available()) + { + SKIP("No Vulkan-capable GPU available"); + } + VkContext ctx; + ctx.init({}); + auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 32, 32 } }); + + QuadLayer::Config cfg; + cfg.resolution = { 32, 32 }; + // cfg.stereo defaults to false. + QuadLayer layer(ctx, target->render_pass(), cfg); + + for (uint32_t i = 0; i < QuadLayer::kSlotCount; ++i) + { + CHECK(layer.device_image_right(i) == nullptr); + } +} + +TEST_CASE("QuadLayer mono submit(left, right) throws", "[gpu][quad_layer][stereo]") +{ + if (!is_gpu_available()) + { + SKIP("No Vulkan-capable GPU available"); + } + VkContext ctx; + ctx.init({}); + auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 32, 32 } }); + + QuadLayer::Config cfg; + cfg.resolution = { 32, 32 }; + // cfg.stereo defaults to false. + QuadLayer layer(ctx, target->render_pass(), cfg); + + void* dev_l = nullptr; + void* dev_r = nullptr; + REQUIRE(cudaMalloc(&dev_l, 32 * 32 * 4) == cudaSuccess); + REQUIRE(cudaMalloc(&dev_r, 32 * 32 * 4) == cudaSuccess); + struct CudaFreeGuard + { + void* p; + ~CudaFreeGuard() + { + cudaFree(p); + } + } gl{ dev_l }, gr{ dev_r }; + + auto make_buf = [](void* p) + { + VizBuffer b{}; + b.data = p; + b.width = 32; + b.height = 32; + b.format = PixelFormat::kRGBA8; + b.space = viz::MemorySpace::kDevice; + return b; + }; + CHECK_THROWS_AS(layer.submit(make_buf(dev_l), make_buf(dev_r)), std::logic_error); +} + +TEST_CASE("QuadLayer stereo submit(left) throws", "[gpu][quad_layer][stereo]") +{ + if (!is_gpu_available()) + { + SKIP("No Vulkan-capable GPU available"); + } + VkContext ctx; + ctx.init({}); + auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 32, 32 } }); + + QuadLayer::Config cfg; + cfg.resolution = { 32, 32 }; + cfg.stereo = true; + QuadLayer layer(ctx, target->render_pass(), cfg); + + void* dev = nullptr; + REQUIRE(cudaMalloc(&dev, 32 * 32 * 4) == cudaSuccess); + struct CudaFreeGuard + { + void* p; + ~CudaFreeGuard() + { + cudaFree(p); + } + } guard{ dev }; + + VizBuffer src{}; + src.data = dev; + src.width = 32; + src.height = 32; + src.format = PixelFormat::kRGBA8; + src.space = viz::MemorySpace::kDevice; + CHECK_THROWS_AS(layer.submit(src), std::logic_error); +} + +namespace +{ +// Fill a CUDA RGBA8 surface with a solid 32-bit color on the calling stream. +void fill_solid_rgba(void* dev_ptr, uint32_t w, uint32_t h, uint32_t rgba8) +{ + std::vector host(static_cast(w) * h, rgba8); + REQUIRE(cudaMemcpy(dev_ptr, host.data(), host.size() * 4, cudaMemcpyHostToDevice) == cudaSuccess); +} + +// Read pixel (0,0) of a cudaArray back to host. +uint32_t read_pixel0_from_array(cudaArray_t arr) +{ + uint32_t px = 0; + REQUIRE(cudaMemcpy2DFromArray(&px, sizeof(uint32_t), arr, 0, 0, sizeof(uint32_t), 1, cudaMemcpyDeviceToHost) == + cudaSuccess); + return px; +} +} // namespace + +TEST_CASE("QuadLayer stereo submit lands matching L/R pair in the latest slot", "[gpu][quad_layer][stereo]") +{ + if (!is_gpu_available()) + { + SKIP("No Vulkan-capable GPU available"); + } + VkContext ctx; + ctx.init({}); + auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 32, 32 } }); + + QuadLayer::Config cfg; + cfg.resolution = { 32, 32 }; + cfg.stereo = true; + QuadLayer layer(ctx, target->render_pass(), cfg); + + void* dev_l = nullptr; + void* dev_r = nullptr; + REQUIRE(cudaMalloc(&dev_l, 32 * 32 * 4) == cudaSuccess); + REQUIRE(cudaMalloc(&dev_r, 32 * 32 * 4) == cudaSuccess); + struct CudaFreeGuard + { + void* p; + ~CudaFreeGuard() + { + cudaFree(p); + } + } gl{ dev_l }, gr{ dev_r }; + + // Distinct colors so a swapped binding would be visible in the readback. + constexpr uint32_t kLeftRgba = 0xFF1122AAu; // ABGR-packed in memory little-endian + constexpr uint32_t kRightRgba = 0xFFDDCC11u; + fill_solid_rgba(dev_l, 32, 32, kLeftRgba); + fill_solid_rgba(dev_r, 32, 32, kRightRgba); + + auto make_buf = [](void* p) + { + VizBuffer b{}; + b.data = p; + b.width = 32; + b.height = 32; + b.format = PixelFormat::kRGBA8; + b.space = viz::MemorySpace::kDevice; + return b; + }; + layer.submit(make_buf(dev_l), make_buf(dev_r)); + + // Find which slot got the publish (the one whose semaphore was signaled). + // Iterate slots looking for a non-zero done-writing value; submit() syncs + // the stream so the data is guaranteed visible. + int written_slot = -1; + for (uint32_t i = 0; i < QuadLayer::kSlotCount; ++i) + { + if (layer.device_image(i)->cuda_done_writing_value() != 0) + { + written_slot = static_cast(i); + break; + } + } + REQUIRE(written_slot >= 0); + + // Left slot must hold the left color; right slot the right color. A + // mis-paired submit (e.g. both eyes writing to the left array) would + // surface as right_px == kLeftRgba. + const uint32_t left_px = read_pixel0_from_array(layer.device_image(written_slot)->cuda_array()); + const uint32_t right_px = read_pixel0_from_array(layer.device_image_right(written_slot)->cuda_array()); + CHECK(left_px == kLeftRgba); + CHECK(right_px == kRightRgba); +} + +TEST_CASE("QuadLayer stereo rapid submits keep every L/R pair atomic", "[gpu][quad_layer][stereo]") +{ + // The mailbox guarantee for stereo: for any slot the producer has + // written, the left and right images come from the SAME submit(). + // We submit a sequence of (L_i, R_i) pairs where L_i and R_i are + // derived from the same index i — so any slot that holds L_i must + // also hold R_i. A cross-eye write or torn pair would surface as + // mismatching i. + if (!is_gpu_available()) + { + SKIP("No Vulkan-capable GPU available"); + } + VkContext ctx; + ctx.init({}); + auto target = RenderTarget::create(ctx, RenderTarget::Config{ Resolution{ 32, 32 } }); + + QuadLayer::Config cfg; + cfg.resolution = { 32, 32 }; + cfg.stereo = true; + QuadLayer layer(ctx, target->render_pass(), cfg); + + void* dev_l = nullptr; + void* dev_r = nullptr; + REQUIRE(cudaMalloc(&dev_l, 32 * 32 * 4) == cudaSuccess); + REQUIRE(cudaMalloc(&dev_r, 32 * 32 * 4) == cudaSuccess); + struct CudaFreeGuard + { + void* p; + ~CudaFreeGuard() + { + cudaFree(p); + } + } gl{ dev_l }, gr{ dev_r }; + + auto make_buf = [](void* p) + { + VizBuffer b{}; + b.data = p; + b.width = 32; + b.height = 32; + b.format = PixelFormat::kRGBA8; + b.space = viz::MemorySpace::kDevice; + return b; + }; + + // Index i ∈ [1, kPairs] is encoded in the low byte of left (= i) + // and the next byte of right (= 100 + i, to make eye-swap visible). + // Start from 1 so untouched slots (low byte == 0) are easy to skip. + auto left_for = [](uint32_t i) { return 0xFF000000u | (i & 0xFFu); }; + auto right_for = [](uint32_t i) { return 0xFF000000u | ((100u + i) & 0xFFu) << 8; }; + + constexpr uint32_t kPairs = QuadLayer::kSlotCount + 3; + for (uint32_t i = 1; i <= kPairs; ++i) + { + fill_solid_rgba(dev_l, 32, 32, left_for(i)); + fill_solid_rgba(dev_r, 32, 32, right_for(i)); + layer.submit(make_buf(dev_l), make_buf(dev_r)); + } + + // Walk every slot. A slot is "written" iff its left low-byte index + // is one of the values we submitted (1..kPairs). For each such slot, + // its right MUST match right_for(i) — that's the atomicity proof. + uint32_t written = 0; + for (uint32_t s = 0; s < QuadLayer::kSlotCount; ++s) + { + const uint32_t lp = read_pixel0_from_array(layer.device_image(s)->cuda_array()); + const uint32_t rp = read_pixel0_from_array(layer.device_image_right(s)->cuda_array()); + const uint32_t i = lp & 0xFFu; + if (i < 1 || i > kPairs) + { + continue; + } + // Slot index decoded from left → reconstruct expected right. + CHECK(lp == left_for(i)); + CHECK(rp == right_for(i)); + ++written; + } + // The producer must have actually used the mailbox. + CHECK(written > 0); +} diff --git a/src/viz/python/bindings_helpers.hpp b/src/viz/python/bindings_helpers.hpp index e2d91c530..e45d59670 100644 --- a/src/viz/python/bindings_helpers.hpp +++ b/src/viz/python/bindings_helpers.hpp @@ -12,10 +12,12 @@ #include #include +#include #include #include #include +#include namespace viz_py { @@ -59,6 +61,135 @@ inline py::tuple shape_for(uint32_t width, uint32_t height, viz::PixelFormat for throw std::runtime_error("VizBuffer: unknown PixelFormat"); } +// Convert a Python object exposing ``__cuda_array_interface__`` (CuPy / +// PyTorch / Numba / VizBuffer / any other CAI-2-or-3 producer) into a +// VizBuffer suitable for ``QuadLayer::submit``. Validates the protocol +// dict eagerly so silent dtype / shape / stride mismatches surface as a +// readable error instead of corrupted pixels or a cryptic CUDA error. +// +// ``label`` is prefixed to every error message — the unified +// ``QuadLayer.submit`` binding passes "submit(left)" / "submit(right)" +// so stereo callers know which buffer failed. +// +// Layout requirements: row-major, tightly-packed-within-each-row. +// ``submit`` does one ``cudaMemcpy2D`` per row at the row pitch, so +// non-unit pixel/channel strides would silently mis-pack the texture. +inline viz::VizBuffer cuda_array_to_viz_buffer(py::object obj, + viz::PixelFormat expected_format, + viz::Resolution expected_resolution, + const char* label) +{ + if (!py::hasattr(obj, "__cuda_array_interface__")) + { + throw std::runtime_error(std::string(label) + ": object does not expose __cuda_array_interface__"); + } + py::dict iface = obj.attr("__cuda_array_interface__").cast(); + if (!iface.contains("shape") || !iface.contains("typestr") || !iface.contains("data")) + { + throw std::runtime_error(std::string(label) + + ": __cuda_array_interface__ missing required key (shape/typestr/data)"); + } + + const char* expected_typestr = nullptr; + std::size_t expected_rank = 0; + std::size_t expected_channels = 0; + if (expected_format == viz::PixelFormat::kRGBA8) + { + expected_typestr = "|u1"; + expected_rank = 3; + expected_channels = 4; + } + else if (expected_format == viz::PixelFormat::kD32F) + { + expected_typestr = "(); + if (typestr != expected_typestr) + { + throw std::runtime_error(std::string(label) + ": typestr '" + typestr + + "' does not match layer format (expected '" + expected_typestr + "')"); + } + + py::tuple shape = iface["shape"].cast(); + if (shape.size() != expected_rank) + { + throw std::runtime_error(std::string(label) + ": shape rank " + std::to_string(shape.size()) + + " does not match layer format (expected " + std::to_string(expected_rank) + ")"); + } + const uint32_t h = shape[0].cast(); + const uint32_t w = shape[1].cast(); + if (expected_channels > 1) + { + const std::size_t c = shape[2].cast(); + if (c != expected_channels) + { + throw std::runtime_error(std::string(label) + ": channel count " + std::to_string(c) + + " does not match layer format (expected " + std::to_string(expected_channels) + ")"); + } + } + if (h != expected_resolution.height || w != expected_resolution.width) + { + throw std::runtime_error(std::string(label) + ": shape (" + std::to_string(h) + ", " + std::to_string(w) + + ") does not match layer resolution (" + std::to_string(expected_resolution.height) + + ", " + std::to_string(expected_resolution.width) + ")"); + } + + const std::size_t bpp = viz::bytes_per_pixel(expected_format); + std::size_t pitch_bytes = 0; + if (iface.contains("strides") && !iface["strides"].is_none()) + { + py::tuple strides = iface["strides"].cast(); + if (strides.size() != expected_rank) + { + throw std::runtime_error(std::string(label) + ": strides rank " + std::to_string(strides.size()) + + " does not match shape rank " + std::to_string(expected_rank)); + } + const std::ptrdiff_t row_stride = strides[0].cast(); + const std::ptrdiff_t pixel_stride = strides[1].cast(); + if (row_stride < static_cast(w * bpp)) + { + throw std::runtime_error(std::string(label) + ": row stride " + std::to_string(row_stride) + + " is less than width*bpp " + std::to_string(w * bpp) + + " — non-positive or reversed strides aren't supported"); + } + if (pixel_stride != static_cast(bpp)) + { + throw std::runtime_error(std::string(label) + ": pixel stride " + std::to_string(pixel_stride) + + " does not match bytes-per-pixel " + std::to_string(bpp) + + " — transposed / non-contiguous-per-pixel layout isn't supported"); + } + if (expected_rank == 3) + { + const std::ptrdiff_t channel_stride = strides[2].cast(); + if (channel_stride != 1) + { + throw std::runtime_error(std::string(label) + ": channel stride " + std::to_string(channel_stride) + + " is not 1 — non-contiguous channels aren't supported"); + } + } + pitch_bytes = static_cast(row_stride); + } + + py::tuple data = iface["data"].cast(); + const uintptr_t ptr = data[0].cast(); + + viz::VizBuffer buf; + buf.data = reinterpret_cast(ptr); + buf.width = w; + buf.height = h; + buf.format = expected_format; + buf.pitch = pitch_bytes; // 0 = tightly packed; submit() uses effective_pitch(). + buf.space = viz::MemorySpace::kDevice; + return buf; +} + // Build the dict returned by __cuda_array_interface__ / __array_interface__. // Version 3 of the protocol (matches what CuPy / Numba / PyTorch expect). // `data` is (ptr_as_int, read_only). `strides` is None for C-contiguous, diff --git a/src/viz/python/layers_bindings.cpp b/src/viz/python/layers_bindings.cpp index 38a815c17..dba90c76c 100644 --- a/src/viz/python/layers_bindings.cpp +++ b/src/viz/python/layers_bindings.cpp @@ -60,7 +60,16 @@ void bind_layers(py::module_& m) .def_readwrite("placement", &viz::QuadLayer::Config::placement) .def_readwrite("generate_mipmaps", &viz::QuadLayer::Config::generate_mipmaps, "Allocate + regenerate a capped mip chain each frame; sampler " - "uses trilinear filtering. Off by default."); + "uses trilinear filtering. On by default.") + .def_readwrite("stereo", &viz::QuadLayer::Config::stereo, + "Per-eye stereo. When true, submit MUST be called with both buffers; " + "view 0 (left eye) samples the left buffer, view 1 (right eye) the right. " + "Memory doubles. Off by default.") + .def_readwrite("stereo_baseline_mm", &viz::QuadLayer::Config::stereo_baseline_mm, + "Horizontal disparity between left and right planes (millimeters), " + "applied along the placement's local +x axis. 0 → both eyes see the " + "same world quad. Ignored unless stereo + kXr. mm-scale chosen because " + "typical IPDs / stereo camera baselines are 50–80 mm."); // ── QuadLayer (non-owning; session owns the lifetime) ───────────── @@ -69,145 +78,53 @@ void bind_layers(py::module_& m) Single CUDA-fed quad layer. Owned by VizSession; the Python handle is non-owning (don't keep it around past the session). -Call ``submit`` with a VizBuffer or any object exposing -``__cuda_array_interface__``. Render order = insertion order. +Render order = insertion order. Call ``submit(left, right=None, stream=0)``: + + * Mono layer (Config.stereo == False): pass exactly one buffer as + ``left``. Passing ``right`` raises ``RuntimeError``. + * Stereo layer (Config.stereo == True): pass both. Missing ``right`` + raises ``RuntimeError``. Both buffers are copied on the same CUDA + stream + a single semaphore signals when they're both ready, so + the renderer never sees a half-matched pair. + +Each buffer is either a ``VizBuffer`` (passed straight to C++) or any +object exposing ``__cuda_array_interface__`` (CuPy / PyTorch / Numba / +numpy on a CUDA device pointer); the binding converts it on the fly. )doc") .def( - "submit", [](viz::QuadLayer& self, const viz::VizBuffer& src) { self.submit(src); }, "src"_a, - py::call_guard(), "Submit a pre-built VizBuffer (kDevice).") - .def( - "submit_cuda_array", - [](viz::QuadLayer& self, py::object obj, uintptr_t stream) + "submit", + [](viz::QuadLayer& self, py::object left, py::object right, uintptr_t stream) { - // Accept anything exposing __cuda_array_interface__. Validate - // the dict before constructing a VizBuffer — silent dtype / - // shape / stride mismatches would surface inside the cudaMemcpy - // as either corrupted pixels or a cryptic CUDA error. - if (!py::hasattr(obj, "__cuda_array_interface__")) - { - throw std::runtime_error("submit_cuda_array: object does not expose __cuda_array_interface__"); - } - py::dict iface = obj.attr("__cuda_array_interface__").cast(); - if (!iface.contains("shape") || !iface.contains("typestr") || !iface.contains("data")) - { - throw std::runtime_error( - "submit_cuda_array: __cuda_array_interface__ missing required key (shape/typestr/data)"); - } - - // Per-format expectations (typestr + channels). Must match the - // layer's PixelFormat exactly — submit() reinterprets memory. - const viz::PixelFormat fmt = self.format(); - const char* expected_typestr = nullptr; - std::size_t expected_rank = 0; - std::size_t expected_channels = 0; - if (fmt == viz::PixelFormat::kRGBA8) - { - expected_typestr = "|u1"; - expected_rank = 3; - expected_channels = 4; - } - else if (fmt == viz::PixelFormat::kD32F) + // Resolve each Python arg to a VizBuffer. VizBuffer passes + // through; anything else goes via the cuda-array-interface + // converter (which validates dtype / shape / strides + // before constructing the buffer). + auto to_buf = [&self](py::object obj, const char* label) -> viz::VizBuffer { - expected_typestr = "(); - if (typestr != expected_typestr) - { - throw std::runtime_error(std::string("submit_cuda_array: typestr '") + typestr + - "' does not match layer format (expected '" + expected_typestr + "')"); - } - - py::tuple shape = iface["shape"].cast(); - if (shape.size() != expected_rank) - { - throw std::runtime_error("submit_cuda_array: shape rank " + std::to_string(shape.size()) + - " does not match layer format (expected " + std::to_string(expected_rank) + - ")"); - } - const uint32_t h = shape[0].cast(); - const uint32_t w = shape[1].cast(); - if (expected_channels > 1) - { - const std::size_t c = shape[2].cast(); - if (c != expected_channels) + if (py::isinstance(obj)) { - throw std::runtime_error("submit_cuda_array: channel count " + std::to_string(c) + - " does not match layer format (expected " + - std::to_string(expected_channels) + ")"); + return obj.cast(); } - } - const viz::Resolution res = self.resolution(); - if (h != res.height || w != res.width) + return cuda_array_to_viz_buffer(obj, self.format(), self.resolution(), label); + }; + + if (right.is_none()) { - throw std::runtime_error("submit_cuda_array: shape (" + std::to_string(h) + ", " + - std::to_string(w) + ") does not match layer resolution (" + - std::to_string(res.height) + ", " + std::to_string(res.width) + ")"); + viz::VizBuffer left_buf = to_buf(left, "QuadLayer.submit(left)"); + py::gil_scoped_release release; + self.submit(left_buf, reinterpret_cast(stream)); } - - // Row pitch: explicit when strides present + non-null (slice - // views, padded buffers); else tightly packed. We require - // row-major, tightly-packed-within-each-row layout because - // submit() does a single cudaMemcpy2D per row at row_pitch - // stride — non-unit pixel/channel strides would silently - // mis-pack the destination texture. - const std::size_t bpp = viz::bytes_per_pixel(fmt); - std::size_t pitch_bytes = 0; - if (iface.contains("strides") && !iface["strides"].is_none()) + else { - py::tuple strides = iface["strides"].cast(); - if (strides.size() != expected_rank) - { - throw std::runtime_error("submit_cuda_array: strides rank " + std::to_string(strides.size()) + - " does not match shape rank " + std::to_string(expected_rank)); - } - const std::ptrdiff_t row_stride = strides[0].cast(); - const std::ptrdiff_t pixel_stride = strides[1].cast(); - if (row_stride < static_cast(w * bpp)) - { - throw std::runtime_error("submit_cuda_array: row stride " + std::to_string(row_stride) + - " is less than width*bpp " + std::to_string(w * bpp) + - " — non-positive or reversed strides aren't supported"); - } - if (pixel_stride != static_cast(bpp)) - { - throw std::runtime_error("submit_cuda_array: pixel stride " + std::to_string(pixel_stride) + - " does not match bytes-per-pixel " + std::to_string(bpp) + - " — transposed / non-contiguous-per-pixel layout isn't supported"); - } - if (expected_rank == 3) - { - const std::ptrdiff_t channel_stride = strides[2].cast(); - if (channel_stride != 1) - { - throw std::runtime_error("submit_cuda_array: channel stride " + - std::to_string(channel_stride) + - " is not 1 — non-contiguous channels aren't supported"); - } - } - pitch_bytes = static_cast(row_stride); + viz::VizBuffer left_buf = to_buf(left, "QuadLayer.submit(left)"); + viz::VizBuffer right_buf = to_buf(right, "QuadLayer.submit(right)"); + py::gil_scoped_release release; + self.submit(left_buf, right_buf, reinterpret_cast(stream)); } - - py::tuple data = iface["data"].cast(); - const uintptr_t ptr = data[0].cast(); - - viz::VizBuffer buf; - buf.data = reinterpret_cast(ptr); - buf.width = w; - buf.height = h; - buf.format = fmt; - buf.pitch = pitch_bytes; // 0 = tightly packed; submit() uses effective_pitch(). - buf.space = viz::MemorySpace::kDevice; - py::gil_scoped_release release; - self.submit(buf, reinterpret_cast(stream)); }, - "obj"_a, "stream"_a = 0, "Submit any object exposing __cuda_array_interface__ (CuPy / PyTorch / Numba).") + "left"_a, "right"_a = py::none(), "stream"_a = 0, + "Submit a frame. Each arg is a VizBuffer or any __cuda_array_interface__ " + "object. Mono layer: pass only ``left``. Stereo layer: pass both.") .def_property_readonly("resolution", &viz::QuadLayer::resolution) .def_property_readonly("format", &viz::QuadLayer::format) .def_property_readonly("aspect_ratio", &viz::QuadLayer::aspect_ratio) diff --git a/src/viz/python/viz_init.py b/src/viz/python/viz_init.py index c8c52665a..c07923b0d 100644 --- a/src/viz/python/viz_init.py +++ b/src/viz/python/viz_init.py @@ -22,8 +22,9 @@ layer_cfg.resolution = viz.Resolution(1024, 1024) layer = session.add_quad_layer(layer_cfg) - # CuPy / PyTorch / Numba arrays (anything with __cuda_array_interface__): - layer.submit_cuda_array(cupy_rgba8) + # CuPy / PyTorch / Numba arrays (anything with __cuda_array_interface__), + # or a pre-built VizBuffer: + layer.submit(cupy_rgba8) info = session.render() img = session.readback_to_host() # HostImage with __array_interface__ diff --git a/src/viz/python_tests/test_offscreen_session.py b/src/viz/python_tests/test_offscreen_session.py index 893404974..d1f0946bc 100644 --- a/src/viz/python_tests/test_offscreen_session.py +++ b/src/viz/python_tests/test_offscreen_session.py @@ -92,7 +92,7 @@ def test_quad_layer_round_trip_via_cuda_array_interface(): layer = session.add_quad_layer(layer_cfg) assert layer.name == "cam" - # Solid green RGBA8 source. submit_cuda_array consumes + # Solid green RGBA8 source. submit consumes # __cuda_array_interface__ on the CuPy array. # # Build host-side first, then H2D once via cp.asarray. Avoiding @@ -102,7 +102,7 @@ def test_quad_layer_round_trip_via_cuda_array_interface(): host_src[..., 1] = 200 # G host_src[..., 3] = 255 # A src = cp.asarray(host_src) - layer.submit_cuda_array(src) + layer.submit(src) info = session.render() assert info.frame_index == 0 @@ -114,7 +114,7 @@ def test_quad_layer_round_trip_via_cuda_array_interface(): r, g, b, _a = arr[cy, cx] assert g > r and g > b - # ── submit_cuda_array validation ────────────────────────────────── + # ── submit validation ────────────────────────────────── # Bad inputs built host-side then transferred via cp.asarray so we # don't depend on libnvrtc.so being present (cp.zeros / setitem # would JIT-compile a fill kernel and the GPU CI runner ships only @@ -123,21 +123,131 @@ def test_quad_layer_round_trip_via_cuda_array_interface(): # Wrong dtype: layer is RGBA8 (uint8); float32 source must reject. bad_dtype = cp.asarray(np.zeros((32, 32, 4), dtype=np.float32)) with pytest.raises(RuntimeError, match="typestr"): - layer.submit_cuda_array(bad_dtype) + layer.submit(bad_dtype) # Wrong shape: doesn't match layer resolution. bad_shape = cp.asarray(np.zeros((16, 16, 4), dtype=np.uint8)) with pytest.raises(RuntimeError, match="resolution"): - layer.submit_cuda_array(bad_shape) + layer.submit(bad_shape) # Wrong channel count: RGB instead of RGBA. bad_channels = cp.asarray(np.zeros((32, 32, 3), dtype=np.uint8)) with pytest.raises(RuntimeError, match="channel"): - layer.submit_cuda_array(bad_channels) + layer.submit(bad_channels) # Wrong rank: 2D for an RGBA layer. bad_rank = cp.asarray(np.zeros((32, 32), dtype=np.uint8)) with pytest.raises(RuntimeError, match="rank"): - layer.submit_cuda_array(bad_rank) + layer.submit(bad_rank) + + session.destroy() + + +def test_stereo_quad_layer_round_trip(): + """Stereo QuadLayer in offscreen mode renders the LEFT buffer (per + the documented single-view fallback). Distinct L/R sources confirm + we're sampling left and not accidentally swapping eyes.""" + cp = pytest.importorskip("cupy") + try: + if cp.cuda.runtime.getDeviceCount() == 0: + pytest.skip("no CUDA device") + except cp.cuda.runtime.CUDARuntimeError: + pytest.skip("no CUDA device") + + cfg = viz.VizSessionConfig() + cfg.mode = viz.DisplayMode.kOffscreen + cfg.window_width = 64 + cfg.window_height = 64 + session = viz.VizSession.create(cfg) + + layer_cfg = viz.QuadLayerConfig() + layer_cfg.name = "stereo_cam" + layer_cfg.resolution = viz.Resolution(32, 32) + layer_cfg.stereo = True + layer_cfg.stereo_baseline_mm = 65.0 # roughly human IPD + layer = session.add_quad_layer(layer_cfg) + + host_left = np.zeros((32, 32, 4), dtype=np.uint8) + host_left[..., 1] = 200 # G — distinguishable from right + host_left[..., 3] = 255 + host_right = np.zeros((32, 32, 4), dtype=np.uint8) + host_right[..., 2] = 200 # B — distinguishable from left + host_right[..., 3] = 255 + + left = cp.asarray(host_left) + right = cp.asarray(host_right) + layer.submit(left, right) + + session.render() + img = session.readback_to_host() + arr = np.asarray(img) + cx, cy = 32, 32 + r, g, b, _a = arr[cy, cx] + # Offscreen / single-view backends draw the LEFT buffer per docs. + # If we accidentally bound the right descriptor here, this pixel + # would be blue-dominant instead of green-dominant. + assert g > r and g > b, ( + f"expected green-dominant (left buffer), got ({r}, {g}, {b})" + ) + + session.destroy() + + +def test_stereo_invariants(): + """submit's strict mono/stereo contract: one-arg on stereo throws, + two-arg on mono throws. Validated end-to-end through the binding.""" + cp = pytest.importorskip("cupy") + try: + if cp.cuda.runtime.getDeviceCount() == 0: + pytest.skip("no CUDA device") + except cp.cuda.runtime.CUDARuntimeError: + pytest.skip("no CUDA device") + + cfg = viz.VizSessionConfig() + cfg.mode = viz.DisplayMode.kOffscreen + cfg.window_width = 64 + cfg.window_height = 64 + session = viz.VizSession.create(cfg) + + mono_cfg = viz.QuadLayerConfig() + mono_cfg.name = "mono" + mono_cfg.resolution = viz.Resolution(32, 32) + # mono_cfg.stereo defaults to False. + mono_layer = session.add_quad_layer(mono_cfg) + + stereo_cfg = viz.QuadLayerConfig() + stereo_cfg.name = "stereo" + stereo_cfg.resolution = viz.Resolution(32, 32) + stereo_cfg.stereo = True + stereo_layer = session.add_quad_layer(stereo_cfg) + + blank = cp.asarray(np.zeros((32, 32, 4), dtype=np.uint8)) + + # Mono + right buffer → reject. + with pytest.raises(RuntimeError, match="mono"): + mono_layer.submit(blank, blank) + + # Stereo + missing right → reject. + with pytest.raises(RuntimeError, match="stereo"): + stereo_layer.submit(blank) + + session.destroy() + + +def test_stereo_invalid_baseline_rejected(): + """Non-finite baseline values must be rejected at ctor time.""" + cfg = viz.VizSessionConfig() + cfg.mode = viz.DisplayMode.kOffscreen + cfg.window_width = 64 + cfg.window_height = 64 + session = viz.VizSession.create(cfg) + + layer_cfg = viz.QuadLayerConfig() + layer_cfg.name = "bad_baseline" + layer_cfg.resolution = viz.Resolution(32, 32) + layer_cfg.stereo = True + layer_cfg.stereo_baseline_mm = float("nan") + with pytest.raises((RuntimeError, ValueError), match="stereo_baseline_mm"): + session.add_quad_layer(layer_cfg) session.destroy()