From 11669059b0e9388b7577ddf3cb7c03477916da91 Mon Sep 17 00:00:00 2001
From: Jammy2211 <JNightingale2211@gmail.com>
Date: Thu, 28 May 2026 10:45:08 +0100
Subject: [PATCH] feat: first-class af.Nautilus search profiling + A100 HPC
 submit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the raw nautilus.Sampler wrappers in searches/ with a first-class
af.Nautilus profile that exercises the full PyAutoFit lifecycle:
visualization, samples I/O, search.summary, latent variables.

- Sweep matrix: (sampler × dataset_class × model × instrument × hardware ×
  precision). Sampler registry in _samplers.py is ready for Dynesty/Emcee/
  BlackJAX additions as one-function changes.
- Per-model n_live matches the SLaM canonical phases (200 for mge /
  point-source / parametric; 150 for pixelization / Delaunay / datacube).
- Datacube uses af.FactorGraphModel to combine N AnalysisInterferometer
  factors, mirroring autolens_workspace/scripts/multi/modeling.py.
- _metrics.attach_viz_timer wraps every visualize-family hook so the JSON
  splits total_wall_s into sampler_wall_s + viz_wall_s.
- force_pickle_overwrite=True + unique path_prefix per cell defeat the
  .completed-file resume that would otherwise return cached results
  across repeated sweep iterations.
- sweep.py: resume-by-default with --force override.
- aggregate.py: walks the 4-level (sampler/ds/model/instrument) tree and
  emits comparison.{json,png} per cell.
- hpc/batch_gpu/submit_imaging_mge_a100_hst_fp64: SLURM submit for the
  HST MGE fp64 cell on A100, modeled on the existing likelihood-profiling
  submits in z_projects/profiling/hpc/batch_gpu/.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 hpc/batch_gpu/error/.gitignore                |   2 +
 hpc/batch_gpu/output/.gitignore               |   2 +
 .../submit_imaging_mge_a100_hst_fp64          |  49 ++
 searches/README.md                            | 185 ++++--
 searches/_metrics.py                          | 236 +++++---
 searches/_runner.py                           | 269 +++++++++
 searches/_samplers.py                         |  96 +++
 searches/_setup.py                            | 557 ++++++++++++++++--
 searches/aggregate.py                         | 256 ++++++++
 searches/nautilus/README.md                   |  46 --
 searches/nautilus/datacube/delaunay.py        |  28 +
 searches/nautilus/imaging/delaunay.py         |  24 +
 searches/nautilus/imaging/mge.py              |  24 +
 searches/nautilus/imaging/pixelization.py     |  24 +
 searches/nautilus/interferometer/delaunay.py  |  19 +
 searches/nautilus/interferometer/mge.py       |  24 +
 .../nautilus/interferometer/pixelization.py   |  19 +
 searches/nautilus/jax.py                      | 227 -------
 searches/nautilus/point_source/image_plane.py |  23 +
 .../nautilus/point_source/source_plane.py     |  23 +
 searches/nautilus/simple.py                   | 198 -------
 searches/sweep.py                             | 359 +++++++++++
 22 files changed, 2039 insertions(+), 651 deletions(-)
 create mode 100644 hpc/batch_gpu/error/.gitignore
 create mode 100644 hpc/batch_gpu/output/.gitignore
 create mode 100755 hpc/batch_gpu/submit_imaging_mge_a100_hst_fp64
 create mode 100644 searches/_runner.py
 create mode 100644 searches/_samplers.py
 create mode 100644 searches/aggregate.py
 delete mode 100644 searches/nautilus/README.md
 create mode 100644 searches/nautilus/datacube/delaunay.py
 create mode 100644 searches/nautilus/imaging/delaunay.py
 create mode 100644 searches/nautilus/imaging/mge.py
 create mode 100644 searches/nautilus/imaging/pixelization.py
 create mode 100644 searches/nautilus/interferometer/delaunay.py
 create mode 100644 searches/nautilus/interferometer/mge.py
 create mode 100644 searches/nautilus/interferometer/pixelization.py
 delete mode 100644 searches/nautilus/jax.py
 create mode 100644 searches/nautilus/point_source/image_plane.py
 create mode 100644 searches/nautilus/point_source/source_plane.py
 delete mode 100644 searches/nautilus/simple.py
 create mode 100644 searches/sweep.py

diff --git a/hpc/batch_gpu/error/.gitignore b/hpc/batch_gpu/error/.gitignore
new file mode 100644
index 0000000..c1aefd2
--- /dev/null
+++ b/hpc/batch_gpu/error/.gitignore
@@ -0,0 +1,2 @@
+.gitignore
+!.gitignore
diff --git a/hpc/batch_gpu/output/.gitignore b/hpc/batch_gpu/output/.gitignore
new file mode 100644
index 0000000..c1aefd2
--- /dev/null
+++ b/hpc/batch_gpu/output/.gitignore
@@ -0,0 +1,2 @@
+.gitignore
+!.gitignore
diff --git a/hpc/batch_gpu/submit_imaging_mge_a100_hst_fp64 b/hpc/batch_gpu/submit_imaging_mge_a100_hst_fp64
new file mode 100755
index 0000000..5f0f3ee
--- /dev/null
+++ b/hpc/batch_gpu/submit_imaging_mge_a100_hst_fp64
@@ -0,0 +1,49 @@
+#!/bin/bash -l
+#
+# A100 first-class search profiling: searches/nautilus/imaging/mge × hst × fp64.
+#
+# Drives af.Nautilus end-to-end (visualization, samples I/O, search.summary)
+# on the HST imaging MGE model from the autolens_profiling/searches package.
+# Mirrors the resource budget of the sibling likelihood profiling submit
+# (z_projects/profiling/hpc/batch_gpu/submit_imaging_mge_a100_hst_fp64) but
+# allocates more wall time because a first-class fit runs the full Nautilus
+# convergence loop, not a one-shot likelihood evaluation.
+
+#SBATCH -J search_nautilus_imaging_mge_hst_fp64
+#SBATCH --partition=gpu
+#SBATCH --gres=gpu:1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=4
+#SBATCH --mem=64gb
+#SBATCH --time=2:00:00
+#SBATCH -o output/output.%A.out
+#SBATCH -e error/error.%A.err
+#SBATCH --mail-type=END,FAIL
+#SBATCH --mail-user=james.w.nightingale@durham.ac.uk
+
+export AP_ROOT=/mnt/ral/jnightin/autolens_profiling
+source /mnt/ral/jnightin/PyAutoNSS/PyAutoNSS/bin/activate
+
+export JAX_PLATFORM_NAME=cuda
+export JAX_PLATFORMS=cuda,cpu
+export XLA_PYTHON_CLIENT_PREALLOCATE=false
+export JAX_ENABLE_X64=True
+export NUMBA_CACHE_DIR=/tmp/numba_cache
+export MPLCONFIGDIR=/tmp/matplotlib
+
+nvidia-smi
+
+echo "=========================================="
+date
+echo "Cell:       searches/nautilus/imaging/mge"
+echo "Instrument: hst"
+echo "Precision:  fp64"
+
+cd $AP_ROOT
+python3 searches/nautilus/imaging/mge.py \
+    --instrument hst \
+    --config-name hpc_a100_fp64 \
+    --output-dir $AP_ROOT/results/searches/nautilus/imaging/mge/hst
+
+echo "Finished."
+date
diff --git a/searches/README.md b/searches/README.md
index 6ab7d3c..26285d4 100644
--- a/searches/README.md
+++ b/searches/README.md
@@ -1,72 +1,159 @@
-# searches
+# `searches/` — first-class search profiling
 
-Sampler / search profiling for the PyAutoLens HST MGE lens-modelling likelihood. Each subfolder drives a single sampler family directly against the real likelihood — bypassing `af.NonLinearSearch` — so the per-sampler convergence characteristics (wall time, likelihood evaluations, posterior ESS, evals/time to ML) can be compared on identical footing.
+This section profiles **first-class PyAutoFit search objects** end-to-end:
+`af.Nautilus` today, with the registry shape ready for `af.DynestyStatic`,
+`af.BlackJAXNUTS`, `af.Emcee`, etc. Unlike `likelihood_runtime/` (which
+profiles `analysis.log_likelihood_function` in isolation), every cell here
+runs `search.fit(model=model, analysis=analysis)` — so visualization,
+samples I/O, `samples_info.json`, latent variables, and every other piece
+of PyAutoFit machinery is exercised and measured.
 
-## Why bypass `af.NonLinearSearch`?
+## Design
 
-`af.NonLinearSearch` adds caching, multi-process forking, output formatting, and result hierarchies that are valuable for production fits but obscure the underlying sampler's cost. The scripts in this section call the sampler library directly and instrument every likelihood evaluation through a shared `MLTracker`. The result is a clean apples-to-apples comparison of:
+| Dimension      | Values                                                                    |
+|----------------|---------------------------------------------------------------------------|
+| Sampler        | `nautilus` (more to come via `_samplers.SAMPLER_BUILDERS`)                 |
+| Dataset class  | `imaging`, `interferometer`, `point_source`, `datacube`                   |
+| Model type     | `mge`, `pixelization`, `delaunay`, `image_plane`, `source_plane`          |
+| Instrument     | per-dataset-class (HST/Euclid/JWST/AO; SMA/ALMA/ALMA-high/JVLA; simple)   |
+| Hardware       | `local_cpu`, `local_gpu`, `hpc_a100` (external dispatch)                  |
+| Precision      | `fp64`, `mp` (mixed precision via `al.Settings(use_mixed_precision=...)`) |
 
-- Wall time and likelihood-evaluation count to **Nautilus's default convergence** (`n_eff=10000`, `f_live=0.01`).
-- Per-evaluation likelihood cost (NumPy baseline vs JAX-JIT'd path).
-- Evals-to-ML and time-to-ML — the eval index and wall time at which the running max log L first came within 1 nat of the final maximum.
-
-## Shared helpers
-
-| File | Role |
-|------|------|
-| [`_setup.py`](./_setup.py) | Builds the HST imaging dataset, the MGE + Isothermal + ExternalShear lens model with an MGE source bulge, and the `AnalysisImaging` object. The dataset, mask, and model mirror the reference setup in [`likelihood/imaging/mge.py`](../likelihood/imaging/mge.py) so likelihood values are directly comparable across the two sections. |
-| [`_metrics.py`](./_metrics.py) | `MLTracker` — records the log-likelihood and wall time of every evaluation, computes evals-to-ML and time-to-ML headline numbers. Also offers `MLTracker.from_log_l_history` for samplers that JIT their likelihood and only expose log-L per dead/live point post hoc. |
-
-## Supported samplers
-
-| Sampler | Folder | Status | Notes |
-|---------|--------|--------|-------|
-| Nautilus | [`nautilus/`](./nautilus/README.md) | ✓ profiled | Both NumPy (`simple.py`) and JAX-JIT (`jax.py`) variants. |
-| Dynesty | _planned_ | not yet mirrored | Static nested sampling; reference scripts at `autolens_workspace_developer/searches_minimal/dynesty_simple.py`. |
-| Emcee | _planned_ | not yet mirrored | Affine-invariant ensemble MCMC. |
-| BlackJAX (NUTS, SMC) | _planned_ | not yet mirrored | Pure-JAX HMC family. Gradient pathology surfaced in upstream `sweep_findings.md`; HMC viability depends on first fixing NaN-gradient hot spots. |
-| NumPyro (ESS) | _planned_ | not yet mirrored | Ensemble slice sampler under JAX. |
-| PocoMC | _planned_ | not yet mirrored | Preconditioned Monte Carlo. |
-| NSS (simple, jit, grad) | _planned_ | not yet mirrored | Nested slice sampler; `nss_jit.py` shows VRAM ceiling on consumer GPUs (see `sweep_findings.md`). |
-| LBFGS | _planned_ | not yet mirrored | Not a sampler; serves as the maximum-likelihood reference point. |
-
-Each row above corresponds to one or more scripts under `autolens_workspace_developer/searches_minimal/`; the mirror migration here under their own follow-up prompts.
-
-## Versioned artifacts
-
-Each script writes a JSON + PNG pair to:
+Layout:
 
 ```
-results/searches/<sampler>/<script>_summary_v<al.__version__>.{json,png}
+searches/
+  README.md                 # this file
+  _setup.py                 # dataset/model/analysis dispatchers
+  _samplers.py              # sampler registry + per-(ds, model) n_live
+  _metrics.py               # viz wall-time interception + result reader
+  _runner.py                # shared driver (every leaf calls run_search)
+  sweep.py                  # matrix driver, resume-by-default
+  aggregate.py              # comparison.json + comparison.png per cell
+  nautilus/
+    imaging/{mge, pixelization, delaunay}.py
+    interferometer/{mge, pixelization, delaunay}.py
+    point_source/{image_plane, source_plane}.py
+    datacube/delaunay.py
 ```
 
-The JSON carries the structured timings + sampler config + best-fit summary. The PNG is a bar chart of the headline timings (wall time, time per eval, time to ML; plus JIT compile time on JAX scripts).
+## Key design choices
+
+**First-class only.** No more wrapping `nautilus.Sampler` directly. The
+old `simple.py` / `jax.py` scripts are deleted. Every cell goes through
+`af.Nautilus.fit(model, analysis)`, so visualization, output writes,
+sample I/O, and latent-variable computation are part of the profile.
+
+**SLaM-matched `n_live`.** Per `autolens_workspace/scripts/guides/modeling/
+slam_start_here.py`: MGE / point-source / parametric phases use
+`n_live=200` (matches `source_lp[1]`); pixelization / Delaunay phases
+use `n_live=150` (matches `source_pix[1]`).
+
+**`number_of_cores=1` always.** This profile measures per-evaluation
+end-to-end cost. Production scaling via `number_of_cores > 1` is a
+separate axis a future sweep can introduce.
+
+**JAX rows force `force_x1_cpu=True` and `use_jax_vmap=True`.** This is
+mandatory: `nautilus.Sampler` forking under multiprocessing corrupts
+JAX state. The trade-off is one batched evaluation per Nautilus step.
+
+**Visualization wall-time is split out.** `_metrics.attach_viz_timer`
+wraps every visualize-family hook on the analysis (`visualize`,
+`visualize_combined`, `visualize_before_fit`,
+`visualize_before_fit_combined`) plus the search's `plot_results`. The
+JSON reports `total_wall_s`, `viz_wall_s` and the derived
+`sampler_wall_s = total_wall_s - viz_wall_s` so you can ask both "how
+long did the full first-class fit take?" and "how much was viz?".
+
+**`force_pickle_overwrite=True` on every search.** Defeats the
+`.completed`-file resume that would otherwise return cached results
+the second time you run the same `path_prefix`. Combined with
+unique-per-(sampler, ds, model, instrument, config) `path_prefix`, this
+keeps repeated sweep runs honest.
+
+## Datacube multi-channel fitting
+
+`datacube/delaunay.py` fits `_DATACUBE_N_CHANNELS` (default 4) identical
+interferometer channels via `af.FactorGraphModel`. Each channel becomes
+its own `al.AnalysisInterferometer`, wrapped in an `af.AnalysisFactor`
+paired with `model.copy()`, then combined under a single global model —
+the same pattern documented in
+`autolens_workspace/scripts/multi/modeling.py`. The N channels are
+identical copies of the per-instrument dataset; the profile measures
+cube-cost scaling, not band-wavelength variation.
+
+To change the channel count, edit `_DATACUBE_N_CHANNELS` in `_setup.py`
+(34 matches the existing ALMA cube fiducial; 4 keeps profiling
+turnaround sane).
+
+## What this *doesn't* profile (yet)
+
+- **Pool scaling.** `number_of_cores > 1` sweeps are future work.
+- **Adapt-image regeneration across phases.** Pixelization / Delaunay
+  cells use a truth-derived `lensed_source.fits` cached next to the
+  dataset. Production SLaM regenerates this between phases.
+- **A100 dispatch.** The local sweep generates only CPU and laptop-GPU
+  rows. The `hpc_a100_fp64` / `hpc_a100_mp` config names exist in
+  `sweep.py` for parity with `likelihood_runtime/`; the actual dispatch
+  to RAL HPC happens externally (same mechanism as the likelihood
+  sweep).
+- **Samplers other than Nautilus.** The registry is in place; adding
+  `dynesty`, `blackjax_nuts`, `emcee`, etc. is one function per sampler
+  in `_samplers.py`.
+
+## Running
+
+Single cell (CPU NumPy, fastest path):
 
-Old versions are retained alongside new ones; Phase 4's dashboard surfaces the latest per axis.
-
-## Running a script
+```bash
+python searches/nautilus/imaging/mge.py \
+    --instrument hst --config-name local_cpu_fp64
+```
 
-From the repo root (cwd matters because `_setup.build_dataset()` resolves `dataset/imaging/hst/` relative to the repo root via `Path(__file__).resolve().parent.parent`):
+Single cell (laptop GPU, JAX-vmap):
 
 ```bash
-cd autolens_profiling
-python searches/nautilus/simple.py
-python searches/nautilus/jax.py
+JAX_PLATFORM_NAME=cuda JAX_PLATFORMS=cuda,cpu \
+XLA_PYTHON_CLIENT_MEM_FRACTION=0.5 \
+python searches/nautilus/imaging/mge.py \
+    --instrument hst --config-name local_gpu_fp64
 ```
 
-Or as modules:
+Full sweep (every cell × instrument × config) — warning, this is long:
 
 ```bash
-python -m searches.nautilus.simple
-python -m searches.nautilus.jax
+python searches/sweep.py
 ```
 
-Both invocation styles work — each script injects the repo root into `sys.path` before importing `searches._{setup,metrics}` for robustness.
+Iteration sweep (one cell, one instrument, CPU only):
 
-**Requirements:** `nautilus-sampler` for the Nautilus scripts (`pip install nautilus-sampler`). The JAX variant additionally needs a working JAX install.
+```bash
+python searches/sweep.py \
+    --only nautilus/imaging/mge \
+    --instrument hst \
+    --skip-gpu --skip-mp
+```
 
-**Codex / sandboxed runs:**
+Aggregate post-sweep:
 
 ```bash
-NUMBA_CACHE_DIR=/tmp/numba_cache MPLCONFIGDIR=/tmp/matplotlib python searches/nautilus/simple.py
+python searches/aggregate.py
+```
+
+## Output layout
+
 ```
+results/searches/
+  <sampler>/<dataset_class>/<model>/<instrument>/
+    <config_name>.json         # per-config headline metrics
+    <config_name>.png          # per-config bar chart
+    <config_name>.log          # subprocess stdout/stderr (sweep only)
+    comparison.json            # cross-config aggregation (aggregate.py)
+    comparison.png             # cross-config bar chart (aggregate.py)
+```
+
+The PyAutoFit search itself writes its own output (`samples.csv`,
+`samples_info.json`, `search.summary`, visualization, ...) to the
+autoconf `output_path` under `path_prefix=searches/<sampler>/
+<dataset_class>/<model>/<instrument>`. The metric JSON+PNG above live
+separately under `results/searches/`.
diff --git a/searches/_metrics.py b/searches/_metrics.py
index 6edb6e4..a5e4470 100644
--- a/searches/_metrics.py
+++ b/searches/_metrics.py
@@ -1,81 +1,165 @@
-"""
-Shared per-evaluation tracker used by every script in this folder.
-
-Wrap a log-likelihood callable with ``MLTracker.wrap`` (or call
-``tracker.record(log_l)`` manually) and the tracker stores the log L and
-wall-clock time of every evaluation. After the run, ``finalise`` returns
-the eval index and wall time at which the running max log L first came
-within ``tolerance`` nats of the final maximum -- the "evals to ML" /
-"time to ML" headline numbers used in the comparison.
-
-For JAX paths where the likelihood runs inside ``jax.jit`` (and a Python
-callback is impossible without forcing a host round-trip), use
-``MLTracker.from_log_l_history`` instead with the full per-eval log L
-sequence reconstructed from the sampler's dead-point + live-point state.
+"""Metrics collected during a first-class PyAutoFit search profiling run.
+
+The runner wraps an analysis instance via ``attach_viz_timer``, runs the
+search, then calls ``collect_metrics`` to assemble the per-cell result dict.
+
+Two metric sources:
+
+1. **Visualization wall-time** — accumulated across every call to the
+   analysis's visualize-family methods plus the search's
+   ``plot_results``. The framework writes a per-update visualization
+   time into ``search.summary`` but only the *last* update's value, so
+   accumulating in-process is the only way to get a total.
+
+2. **Sampler/search statistics** — read post-hoc from the returned
+   ``Result.samples`` (log_evidence, max log L, posterior count, total
+   samples). The framework already persists these to disk; we just
+   surface them in the JSON.
+
+Viz wall-time is intentionally *separate* from total search wall-time so
+the JSON can answer both questions: "how long did the full first-class
+fit take?" and "how much of that was visualization?".
 """
 
 from __future__ import annotations
 
 import time
-from typing import Callable, Optional, Sequence
-
-
-class MLTracker:
-    """Record per-evaluation log L and wall time, compute evals/time to ML."""
-
-    def __init__(self):
-        self.t0 = time.time()
-        self.history_log_l: list[float] = []
-        self.history_wall: list[float] = []
-
-    def record(self, log_l: float) -> None:
-        self.history_log_l.append(float(log_l))
-        self.history_wall.append(time.time() - self.t0)
-
-    def wrap(self, fn: Callable) -> Callable:
-        """Decorate a log-likelihood callable so every call is recorded."""
-
-        def wrapped(*args, **kwargs):
-            log_l = fn(*args, **kwargs)
-            self.record(log_l)
-            return log_l
-
-        return wrapped
-
-    def finalise(
-        self, max_log_l: Optional[float] = None, tolerance: float = 1.0
-    ) -> tuple[Optional[int], Optional[float]]:
-        """Return (evals_to_ml, time_to_ml) — the eval index and wall time at
-        which the running max first came within ``tolerance`` nats of the
-        final maximum. ``(None, None)`` if no evaluations were recorded."""
-        if not self.history_log_l:
-            return None, None
-        if max_log_l is None:
-            max_log_l = max(self.history_log_l)
-        target = max_log_l - tolerance
-        for i, log_l in enumerate(self.history_log_l):
-            if log_l >= target:
-                return i + 1, self.history_wall[i]
-        return None, None
-
-    @staticmethod
-    def from_log_l_history(
-        log_l_history: Sequence[float],
-        total_sampling_time: float,
-        tolerance: float = 1.0,
-    ) -> tuple[Optional[int], Optional[float]]:
-        """Variant for samplers that run their likelihood inside JIT and only
-        expose log L per dead/live point post hoc. ``time_to_ml`` is linearly
-        interpolated from the total sampling time -- evaluations are assumed
-        evenly distributed over the run, which is a reasonable approximation
-        for nested sampling (each step is roughly the same cost)."""
-        if not log_l_history:
-            return None, None
-        max_log_l = max(log_l_history)
-        target = max_log_l - tolerance
-        for i, log_l in enumerate(log_l_history):
-            if log_l >= target:
-                evals_to_ml = i + 1
-                time_to_ml = total_sampling_time * (evals_to_ml / len(log_l_history))
-                return evals_to_ml, time_to_ml
-        return None, None
+import types
+from dataclasses import dataclass, field
+from typing import Any
+
+
+@dataclass
+class VizTimer:
+    """Accumulates wall-time spent inside wrapped visualize callables.
+
+    Calls are not assumed to be re-entrant; each enter pushes a fresh
+    start onto a stack so that nested ``visualize_*`` paths (combined →
+    individual) don't double-count if PyAutoFit ever changes which calls
+    which.
+    """
+
+    total_s: float = 0.0
+    n_calls: int = 0
+    _stack: list[float] = field(default_factory=list)
+
+    def __enter__(self) -> "VizTimer":
+        self._stack.append(time.perf_counter())
+        return self
+
+    def __exit__(self, exc_type, exc, tb) -> None:
+        if not self._stack:
+            return
+        start = self._stack.pop()
+        # Only the outermost frame contributes to the accumulator so we
+        # don't double-count if visualize_combined() internally calls
+        # visualize().
+        if not self._stack:
+            self.total_s += time.perf_counter() - start
+            self.n_calls += 1
+
+
+def _wrap_method(target: Any, attr: str, timer: VizTimer) -> None:
+    """Wrap ``target.attr`` so each call accumulates wall-time into ``timer``.
+
+    No-op if the attribute does not exist (older PyAutoLens analyses may
+    not implement every visualize-family hook).
+    """
+    fn = getattr(target, attr, None)
+    if fn is None:
+        return
+
+    def wrapped(self, *args, **kwargs):
+        with timer:
+            return fn(*args, **kwargs)
+
+    setattr(target, attr, types.MethodType(wrapped, target))
+
+
+def attach_viz_timer(analysis: Any, search: Any) -> VizTimer:
+    """Wrap every visualize-family hook on ``analysis`` and ``search``.
+
+    Hooks captured:
+
+    - ``analysis.visualize_before_fit`` and
+      ``analysis.visualize_before_fit_combined`` — fire once at the
+      start of the search, *outside* the SearchUpdater's per-update
+      timer.
+    - ``analysis.visualize`` and ``analysis.visualize_combined`` — fire
+      every full update during the sampling loop.
+    - ``search.plot_results`` — search-specific plots (e.g. Nautilus
+      corner plots via anesthetic), called from the SearchUpdater.
+
+    Returns the timer; read ``timer.total_s`` after the fit completes.
+    """
+    timer = VizTimer()
+    for attr in (
+        "visualize_before_fit",
+        "visualize_before_fit_combined",
+        "visualize",
+        "visualize_combined",
+    ):
+        _wrap_method(analysis, attr, timer)
+    _wrap_method(search, "plot_results", timer)
+    return timer
+
+
+@dataclass
+class RunMetrics:
+    """Headline numbers a profiling cell writes to its JSON."""
+
+    total_wall_s: float
+    viz_wall_s: float
+    sampler_wall_s: float
+    likelihood_evals: int
+    time_per_eval_ms: float
+    log_evidence: float
+    max_log_likelihood: float
+    posterior_samples: int
+
+
+def collect_metrics(
+    *,
+    result: Any,
+    total_wall_s: float,
+    viz_wall_s: float,
+) -> RunMetrics:
+    """Assemble the headline metric block from a finished ``search.fit`` result.
+
+    ``sampler_wall_s = total_wall_s - viz_wall_s`` keeps things honest
+    relative to per-call counters that might disagree with the
+    framework's own timer.
+    """
+    samples = result.samples
+    total_samples = int(samples.total_samples)
+
+    try:
+        log_evidence = float(samples.log_evidence)
+    except (AttributeError, TypeError):
+        log_evidence = float("nan")
+
+    try:
+        max_log_likelihood = float(samples.max_log_likelihood_sample.log_likelihood)
+    except AttributeError:
+        max_log_likelihood = float("nan")
+
+    try:
+        posterior_samples = int(len(samples.parameter_lists))
+    except (AttributeError, TypeError):
+        posterior_samples = 0
+
+    sampler_wall_s = max(total_wall_s - viz_wall_s, 0.0)
+    time_per_eval_ms = (
+        sampler_wall_s / max(total_samples, 1) * 1e3 if total_samples else float("nan")
+    )
+
+    return RunMetrics(
+        total_wall_s=total_wall_s,
+        viz_wall_s=viz_wall_s,
+        sampler_wall_s=sampler_wall_s,
+        likelihood_evals=total_samples,
+        time_per_eval_ms=time_per_eval_ms,
+        log_evidence=log_evidence,
+        max_log_likelihood=max_log_likelihood,
+        posterior_samples=posterior_samples,
+    )
diff --git a/searches/_runner.py b/searches/_runner.py
new file mode 100644
index 0000000..1e1626e
--- /dev/null
+++ b/searches/_runner.py
@@ -0,0 +1,269 @@
+"""Shared driver for a single first-class search profiling cell.
+
+Every leaf script under ``searches/<sampler>/<dataset_class>/<model>.py``
+calls :func:`run_search` with its cell identity; this module handles
+everything else — CLI parsing, smoke short-circuit, dataset/model/analysis
+build, viz-time instrumentation, ``search.fit()``, metric collection, and
+JSON+PNG output.
+
+The split between this runner and the per-leaf scripts is deliberate: every
+sampler × cell combination shares the same plumbing, so the leaf script is
+two lines (import + call) and adding a new sampler is one entry in
+``_samplers.SAMPLER_BUILDERS``.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+import time
+from pathlib import Path
+from typing import Any
+
+import matplotlib
+
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt  # noqa: E402
+
+import autolens as al  # noqa: E402
+
+_WORKSPACE_ROOT = Path(__file__).resolve().parents[1]
+if str(_WORKSPACE_ROOT) not in sys.path:
+    sys.path.insert(0, str(_WORKSPACE_ROOT))
+
+from _profile_cli import (  # noqa: E402
+    device_info_dict,
+    parse_profile_cli,
+    resolve_output_paths,
+)
+from searches._metrics import attach_viz_timer, collect_metrics  # noqa: E402
+from searches._samplers import SAMPLER_BUILDERS, n_live_for  # noqa: E402
+from searches._setup import build_for_cell, format_best_fit  # noqa: E402
+
+
+_DEFAULT_INSTRUMENTS: dict[str, str] = {
+    "imaging": "hst",
+    "interferometer": "sma",
+    "point_source": "simple",
+    "datacube": "sma",
+}
+
+
+def run_search(
+    *,
+    sampler: str,
+    dataset_class: str,
+    model_type: str,
+    default_instrument: str | None = None,
+) -> None:
+    """Run one (sampler, dataset_class, model_type, instrument, config) cell.
+
+    Designed to be called from a leaf script with no extra plumbing. All
+    behavioural toggles come from CLI flags parsed by ``parse_profile_cli``.
+    """
+    if os.environ.get("AUTOLENS_PROFILING_SMOKE") == "1":
+        # Phase-5 lint smoke: confirm imports + module setup succeed
+        # without paying for dataset + sampling.
+        print(
+            f"[smoke] searches/{sampler}/{dataset_class}/{model_type}.py: "
+            f"imports + module setup OK; exiting."
+        )
+        return
+
+    cli = parse_profile_cli()
+    instrument = (
+        cli.instrument or default_instrument or _DEFAULT_INSTRUMENTS[dataset_class]
+    )
+    config_name = cli.config_name or "default"
+    use_jax = _decide_use_jax()
+
+    print(
+        f"\n--- searches/{sampler}/{dataset_class}/{model_type}"
+        f" [{instrument}, {config_name}, use_jax={use_jax},"
+        f" mp={cli.use_mixed_precision}] ---"
+    )
+    print(f"  n_live: {n_live_for(dataset_class, model_type)}")
+
+    print("  Building dataset / model / analysis...")
+    dataset, model, analysis = build_for_cell(
+        dataset_class=dataset_class,
+        model_type=model_type,
+        instrument=instrument,
+        use_jax=use_jax,
+        use_mixed_precision=cli.use_mixed_precision,
+    )
+    print(f"  Model free parameters: {model.total_free_parameters}")
+
+    builder = SAMPLER_BUILDERS[sampler]
+    search = builder(
+        sampler=sampler,
+        dataset_class=dataset_class,
+        model_type=model_type,
+        instrument=instrument,
+        config_name=config_name,
+        use_jax=use_jax,
+    )
+
+    # Capture visualization wall-time across the full fit (pre-fit + every
+    # update + search-side plot_results).
+    viz_timer = attach_viz_timer(analysis, search)
+
+    print("  Running search.fit() ...")
+    t0 = time.time()
+    result = search.fit(model=model, analysis=analysis)
+    total_wall_s = time.time() - t0
+
+    # FactorGraphModel fits (datacube) return a list of per-factor Result
+    # objects, all backed by the same global posterior — take the first
+    # for sample stats, then summarise the per-channel best fit from the
+    # global instance.
+    primary_result = result[0] if isinstance(result, list) else result
+
+    metrics = collect_metrics(
+        result=primary_result,
+        total_wall_s=total_wall_s,
+        viz_wall_s=viz_timer.total_s,
+    )
+
+    try:
+        best_instance = primary_result.samples.max_log_likelihood_sample.instance
+        best_fit = format_best_fit(best_instance)
+    except Exception as exc:
+        best_fit = f"(unavailable: {exc!r})"
+
+    summary = _build_summary(
+        sampler=sampler,
+        dataset_class=dataset_class,
+        model_type=model_type,
+        instrument=instrument,
+        config_name=config_name,
+        cli=cli,
+        use_jax=use_jax,
+        n_free_params=int(model.total_free_parameters),
+        n_live=n_live_for(dataset_class, model_type),
+        metrics=metrics,
+        viz_n_calls=viz_timer.n_calls,
+        best_fit=best_fit,
+    )
+
+    _print_summary(summary, metrics)
+
+    default_dir = (
+        _WORKSPACE_ROOT
+        / "results"
+        / "searches"
+        / sampler
+        / dataset_class
+        / model_type
+        / instrument
+    )
+    json_path, png_path = resolve_output_paths(
+        cli, default_dir=default_dir, default_basename=config_name
+    )
+    json_path.write_text(json.dumps(summary, indent=2))
+    print(f"\n  Results JSON saved to: {json_path}")
+
+    _render_png(metrics, summary, png_path)
+    print(f"  Bar chart saved to:    {png_path}")
+
+
+def _decide_use_jax() -> bool:
+    """JAX is used unless the user has explicitly disabled it.
+
+    Mirrors the gate already in PyAutoFit (`PYAUTO_DISABLE_JAX=1`). The
+    search-profiling sweep usually wants JAX on for every config except a
+    pure-NumPy CPU baseline, which can be driven by setting the env var
+    in the sweep config (not currently default-on).
+    """
+    return os.environ.get("PYAUTO_DISABLE_JAX") != "1"
+
+
+def _build_summary(
+    *,
+    sampler: str,
+    dataset_class: str,
+    model_type: str,
+    instrument: str,
+    config_name: str,
+    cli: Any,
+    use_jax: bool,
+    n_free_params: int,
+    n_live: int,
+    metrics: Any,
+    viz_n_calls: int,
+    best_fit: str,
+) -> dict:
+    return {
+        "sampler": sampler,
+        "dataset_class": dataset_class,
+        "model": model_type,
+        "instrument": instrument,
+        "config_name": config_name,
+        "version": al.__version__,
+        "device": device_info_dict(),
+        "use_mixed_precision": bool(cli.use_mixed_precision),
+        "sampler_config": {
+            "n_live": n_live,
+            "n_batch": 100,
+            "number_of_cores": 1,
+            "use_jax_vmap": use_jax,
+            "force_x1_cpu": use_jax,
+            "iterations_per_update": 3 * n_live,
+        },
+        "model_summary": {
+            "free_parameters": n_free_params,
+            "best_fit": best_fit,
+        },
+        "results": {
+            "log_evidence": metrics.log_evidence,
+            "max_log_likelihood": metrics.max_log_likelihood,
+            "posterior_samples": metrics.posterior_samples,
+        },
+        "performance": {
+            "total_wall_s": metrics.total_wall_s,
+            "viz_wall_s": metrics.viz_wall_s,
+            "viz_n_calls": viz_n_calls,
+            "sampler_wall_s": metrics.sampler_wall_s,
+            "likelihood_evals": metrics.likelihood_evals,
+            "time_per_eval_ms": metrics.time_per_eval_ms,
+        },
+    }
+
+
+def _print_summary(summary: dict, metrics: Any) -> None:
+    print("\n" + "=" * 70)
+    print(
+        f"SEARCH SUMMARY — {summary['sampler']}/{summary['dataset_class']}/"
+        f"{summary['model']} [{summary['instrument']}, {summary['config_name']}]"
+    )
+    print("=" * 70)
+    print(f"  Best fit:           {summary['model_summary']['best_fit']}")
+    print(f"  Log evidence:       {metrics.log_evidence:.4f}")
+    print(f"  Max log L:          {metrics.max_log_likelihood:.4f}")
+    print(f"  Posterior samples:  {metrics.posterior_samples}")
+    print(f"  Likelihood evals:   {metrics.likelihood_evals}")
+    print(f"  Total wall:         {metrics.total_wall_s:.2f} s")
+    print(f"  Viz wall:           {metrics.viz_wall_s:.2f} s")
+    print(f"  Sampler wall:       {metrics.sampler_wall_s:.2f} s")
+    print(f"  Time per eval:      {metrics.time_per_eval_ms:.3f} ms")
+
+
+def _render_png(metrics: Any, summary: dict, png_path: Path) -> None:
+    labels = ["total_wall (s)", "sampler_wall (s)", "viz_wall (s)", "time_per_eval (ms)"]
+    values = [
+        metrics.total_wall_s,
+        metrics.sampler_wall_s,
+        metrics.viz_wall_s,
+        metrics.time_per_eval_ms,
+    ]
+    fig, ax = plt.subplots(figsize=(8, 3))
+    ax.barh(labels, values, color=["#4C72B0", "#55A868", "#C44E52", "#8172B2"])
+    ax.set_title(
+        f"{summary['sampler']} {summary['dataset_class']}/{summary['model']} "
+        f"[{summary['instrument']}, {summary['config_name']}] — v{summary['version']}",
+        fontsize=10,
+    )
+    fig.tight_layout()
+    fig.savefig(png_path, dpi=120)
+    plt.close(fig)
diff --git a/searches/_samplers.py b/searches/_samplers.py
new file mode 100644
index 0000000..8aebb20
--- /dev/null
+++ b/searches/_samplers.py
@@ -0,0 +1,96 @@
+"""Sampler factories for ``searches/``.
+
+A small registry that maps sampler name → factory function. Every factory
+returns a first-class PyAutoFit search object (``af.Nautilus`` today,
+``af.DynestyStatic`` / ``af.Emcee`` / ``af.BlackJAXNUTS`` / ... in future).
+
+The runner imports ``SAMPLER_BUILDERS`` and dispatches without per-sampler
+branching elsewhere. Adding a new sampler is a single function + one dict
+row.
+
+The per-(dataset_class, model_type) ``n_live`` values mirror the SLaM
+pipeline canonical settings in
+``autolens_workspace/scripts/guides/modeling/slam_start_here.py`` —
+``source_lp[1]`` uses ``n_live=200`` (MGE / parametric sources) and
+``source_pix[1]`` uses ``n_live=150`` (pixelization / Delaunay). Point-
+source phases are parametric like ``source_lp[1]`` so use 200; datacube
+Delaunay matches imaging Delaunay at 150.
+"""
+
+from __future__ import annotations
+
+from typing import Callable
+
+import autofit as af
+
+
+# (dataset_class, model_type) -> n_live. Matches the SLaM defaults so a
+# profiling row is comparable to a real source phase.
+_N_LIVE: dict[tuple[str, str], int] = {
+    ("imaging", "mge"): 200,
+    ("imaging", "pixelization"): 150,
+    ("imaging", "delaunay"): 150,
+    ("interferometer", "mge"): 200,
+    ("interferometer", "pixelization"): 150,
+    ("interferometer", "delaunay"): 150,
+    ("point_source", "image_plane"): 200,
+    ("point_source", "source_plane"): 200,
+    ("datacube", "delaunay"): 150,
+}
+
+
+def n_live_for(dataset_class: str, model_type: str) -> int:
+    """Look up the canonical n_live for a (dataset_class, model_type) cell."""
+    try:
+        return _N_LIVE[(dataset_class, model_type)]
+    except KeyError as exc:
+        raise KeyError(
+            f"No n_live preset for ({dataset_class!r}, {model_type!r}). "
+            f"Add a row to ``_N_LIVE`` in ``searches/_samplers.py``."
+        ) from exc
+
+
+def build_nautilus(
+    *,
+    sampler: str,
+    dataset_class: str,
+    model_type: str,
+    instrument: str,
+    config_name: str,
+    use_jax: bool,
+) -> af.Nautilus:
+    """Construct a first-class ``af.Nautilus`` search for one profiling cell.
+
+    Profiling-specific choices:
+
+    - ``number_of_cores=1`` for every config so what's measured is per-
+      evaluation cost, not pool throughput. Production scaling via
+      ``number_of_cores > 1`` is a separate sweep axis.
+    - ``force_x1_cpu=use_jax`` because ``nautilus.Sampler`` would fork a
+      multiprocessing pool and corrupt JAX state otherwise.
+    - ``use_jax_vmap=use_jax`` so JAX rows get the batched-evaluation
+      win and NumPy rows get the standard per-sample path.
+    - ``force_pickle_overwrite=True`` so re-running the same cell does
+      not short-circuit via a stale ``.completed`` file from a prior
+      sweep iteration.
+    - ``iterations_per_update`` set explicitly so the visualization
+      cadence does not silently change across PyAutoFit versions.
+    """
+    n_live = n_live_for(dataset_class, model_type)
+    return af.Nautilus(
+        name=config_name,
+        path_prefix=f"searches/{sampler}/{dataset_class}/{model_type}/{instrument}",
+        n_live=n_live,
+        n_batch=100,
+        number_of_cores=1,
+        force_x1_cpu=use_jax,
+        use_jax_vmap=use_jax,
+        force_pickle_overwrite=True,
+        iterations_per_update=3 * n_live,
+    )
+
+
+SamplerBuilder = Callable[..., af.NonLinearSearch]
+SAMPLER_BUILDERS: dict[str, SamplerBuilder] = {
+    "nautilus": build_nautilus,
+}
diff --git a/searches/_setup.py b/searches/_setup.py
index 197f9a6..ea19886 100644
--- a/searches/_setup.py
+++ b/searches/_setup.py
@@ -1,108 +1,555 @@
 """
-Shared setup for the ``searches/`` profiling scripts.
+Shared dataset/model/analysis builders for the ``searches/`` profiling scripts.
 
-Builds the HST imaging dataset, the MGE + Isothermal + ExternalShear lens model
-with an MGE source bulge, and the ``AnalysisImaging`` object used by every
-sampler in this section. The dataset, mask, and model mirror the reference setup
-in ``likelihood/imaging/mge.py`` so the likelihood value is directly comparable
-across the two sections.
+Generalises across the cells defined in
+``autolens_profiling/instruments/{imaging,interferometer}.py`` and the
+point-source presets in ``simulators/point_source.py``, with model-type
+dispatch across ``mge`` / ``pixelization`` / ``delaunay`` (and point-source
+``image_plane`` / ``source_plane``).
 
-Usage
------
+The builders use **uniform priors** rather than the ``GaussianPrior``-near-truth
+pattern that the ``likelihood_runtime/`` scripts use. The likelihood scripts are
+profiling deterministic per-call cost at the truth; the search scripts need the
+sampler to actually search a realistic prior volume so its convergence cost
+reflects production use.
 
-    from searches._setup import build_dataset, build_model, build_analysis
+Pixelization / Delaunay sources consume a truth-derived adapt image cached
+next to the dataset as ``lensed_source.fits`` (built by
+``_adapt_image_util.adapt_image_for_dataset`` on first call). This is a
+profiling-convenience simplification — production SLaM regenerates the adapt
+image across phases.
 
-    dataset = build_dataset()
-    model = build_model(mask_radius=3.5)
-    analysis = build_analysis(dataset, use_jax=False)
+Usage::
+
+    from searches._setup import build_for_cell
+
+    dataset, model, analysis = build_for_cell(
+        dataset_class="imaging",
+        model_type="mge",
+        instrument="hst",
+        use_jax=True,
+        use_mixed_precision=False,
+    )
 """
 
-from pathlib import Path
+from __future__ import annotations
 
-import numpy as np
+import sys
+from pathlib import Path
+from typing import Any, Optional
 
 import autofit as af
 import autolens as al
 
-_WORKSPACE_ROOT = Path(__file__).resolve().parent.parent
-_DATASET_SUBPATH = Path("dataset") / "imaging" / "hst"
+_WORKSPACE_ROOT = Path(__file__).resolve().parents[1]  # autolens_profiling/
+
+# ``_adapt_image_util`` lives at the workspace root.
+if str(_WORKSPACE_ROOT) not in sys.path:
+    sys.path.insert(0, str(_WORKSPACE_ROOT))
+from _adapt_image_util import adapt_image_for_dataset  # noqa: E402
+from _profile_cli import auto_simulate_if_missing  # noqa: E402
+from instruments.imaging import INSTRUMENTS as _IMAGING_INSTRUMENTS  # noqa: E402
+from instruments.interferometer import (  # noqa: E402
+    INSTRUMENTS as _INTERFEROMETER_INSTRUMENTS,
+)
+from simulators.point_source import INSTRUMENTS as _POINT_SOURCE_INSTRUMENTS  # noqa: E402
+
+
+_PIXELIZATION_MESH_SHAPE: tuple[int, int] = (39, 39)  # 1521 source pixels — production fiducial
+_HILBERT_PIXELS: int = 1500
+_MGE_TOTAL_GAUSSIANS: int = 20  # ``source_lp[1]`` SLaM fiducial; lighter than likelihood_runtime's 60
+_DATACUBE_N_CHANNELS: int = 4  # matches the "quick iteration" value in likelihood_runtime/datacube/delaunay.py
+
+
+# -----------------------------------------------------------------------------
+# Top-level dispatcher
+# -----------------------------------------------------------------------------
+
+
+def build_for_cell(
+    *,
+    dataset_class: str,
+    model_type: str,
+    instrument: str,
+    use_jax: bool,
+    use_mixed_precision: bool = False,
+) -> tuple[Any, Any, Any]:
+    """Build dataset, model and analysis for one profiling cell.
+
+    Returns ``(dataset, model, analysis)``. The analysis has all per-cell
+    plumbing (adapt images for pix/delaunay; transformer choice for
+    interferometer; solver for point_source) already attached.
 
-PIXEL_SCALE = 0.05
-MASK_RADIUS = 3.5
+    Datacube cells return ``(dataset_list, factor_graph.global_prior_model,
+    factor_graph)`` — the search treats the factor graph as both the model
+    source and the analysis, per the multi-dataset pattern in
+    ``autolens_workspace/scripts/multi/modeling.py``.
+    """
+    if dataset_class == "datacube":
+        return _build_for_datacube(
+            model_type=model_type,
+            instrument=instrument,
+            use_jax=use_jax,
+            use_mixed_precision=use_mixed_precision,
+        )
+
+    dataset, dataset_path = _build_dataset(dataset_class, instrument)
+    mask_radius = _mask_radius_for(dataset_class, instrument)
+    model = _build_model(dataset_class, model_type, mask_radius=mask_radius)
+    adapt_images = _adapt_images_for(
+        dataset_class, model_type, dataset_path=dataset_path, dataset=dataset
+    )
+    analysis = _build_analysis(
+        dataset_class=dataset_class,
+        model_type=model_type,
+        dataset=dataset,
+        use_jax=use_jax,
+        use_mixed_precision=use_mixed_precision,
+        adapt_images=adapt_images,
+    )
+    return dataset, model, analysis
 
 
-def build_dataset(mask_radius: float = MASK_RADIUS) -> al.Imaging:
-    """Load the HST imaging dataset with mask + radial-bin over-sampling applied."""
-    dataset_path = _DATASET_SUBPATH
+def _build_for_datacube(
+    *,
+    model_type: str,
+    instrument: str,
+    use_jax: bool,
+    use_mixed_precision: bool,
+) -> tuple[list, Any, Any]:
+    """Multi-channel datacube fit via ``af.FactorGraphModel``.
 
-    if al.util.dataset.should_simulate(str(dataset_path)):
-        raise FileNotFoundError(
-            f"Input dataset missing at '{dataset_path}'. The autolens_profiling "
-            f"repo mirrors only the curated datasets needed for default smoke "
-            f"runs. To regenerate, use the source-of-truth script at "
-            f"autolens_workspace_developer/jax_profiling/dataset_setup/imaging.py "
-            f"and copy the result into autolens_profiling/dataset/."
+    Mirrors ``autolens_workspace/scripts/multi/modeling.py``: build N
+    per-channel interferometer datasets, wrap each in an
+    ``AnalysisInterferometer``, pair each with a copy of the shared model
+    via ``af.AnalysisFactor``, then combine into an ``af.FactorGraphModel``.
+
+    The N channels are identical copies of the per-instrument dataset (the
+    profiling concern is cube-cost scaling, not band-wavelength variation),
+    so the adapt image is computed once and shared across every channel's
+    AnalysisInterferometer.
+    """
+    dataset_list, dataset_path = _build_datacube_channels(instrument)
+    mask_radius = _mask_radius_for("datacube", instrument)
+    model = _build_model("datacube", model_type, mask_radius=mask_radius)
+
+    adapt_images = _adapt_images_for(
+        "datacube",
+        model_type,
+        dataset_path=dataset_path,
+        dataset=dataset_list[0],
+    )
+
+    analysis_list = [
+        al.AnalysisInterferometer(
+            dataset=ds,
+            adapt_images=adapt_images,
+            settings=al.Settings(
+                use_border_relocator=model_type in ("pixelization", "delaunay"),
+                use_mixed_precision=use_mixed_precision,
+            ),
+            use_jax=use_jax,
         )
+        for ds in dataset_list
+    ]
+
+    # One AnalysisFactor per channel, each with its own copy of the model so
+    # PyAutoFit's factor graph treats them as independent likelihood factors
+    # sharing the same global parameters.
+    analysis_factor_list = [
+        af.AnalysisFactor(prior_model=model.copy(), analysis=analysis)
+        for analysis in analysis_list
+    ]
+    factor_graph = af.FactorGraphModel(*analysis_factor_list, use_jax=use_jax)
+    return dataset_list, factor_graph.global_prior_model, factor_graph
+
+
+# -----------------------------------------------------------------------------
+# Dataset construction
+# -----------------------------------------------------------------------------
+
+
+def _mask_radius_for(dataset_class: str, instrument: str) -> float:
+    if dataset_class == "imaging":
+        return float(_IMAGING_INSTRUMENTS[instrument]["mask_radius"])
+    if dataset_class in ("interferometer", "datacube"):
+        return float(_INTERFEROMETER_INSTRUMENTS[instrument]["mask_radius"])
+    if dataset_class == "point_source":
+        # Point-source mask radius isn't applied to a 2D image; reuse the
+        # imaging value so MGE/source-bulge priors share a sensible scale.
+        return 3.5
+    raise ValueError(f"Unknown dataset_class: {dataset_class!r}")
 
+
+def _build_dataset(dataset_class: str, instrument: str) -> tuple[Any, Path]:
+    if dataset_class == "imaging":
+        return _build_imaging(instrument)
+    if dataset_class == "interferometer":
+        return _build_interferometer(instrument)
+    if dataset_class == "datacube":
+        # Datacube takes the FactorGraphModel path in build_for_cell; this
+        # branch is only here so direct callers of _build_dataset still
+        # work — it returns the first channel only.
+        dataset_list, dataset_path = _build_datacube_channels(instrument)
+        return dataset_list[0], dataset_path
+    if dataset_class == "point_source":
+        return _build_point_source(instrument)
+    raise ValueError(f"Unknown dataset_class: {dataset_class!r}")
+
+
+def _build_datacube_channels(instrument: str) -> tuple[list, Path]:
+    """Build ``_DATACUBE_N_CHANNELS`` identical-channel interferometer datasets.
+
+    Channels are identical copies of the same per-instrument dataset (the
+    profile is cube-cost scaling, not band-wavelength variation). Each
+    channel is built via a fresh ``from_fits + apply_sparse_operator`` so
+    the analyses don't share mutable dataset state — mirrors the existing
+    ``likelihood_runtime/datacube/delaunay.py`` pattern.
+    """
+    dataset_list = []
+    dataset_path: Path | None = None
+    for _ in range(_DATACUBE_N_CHANNELS):
+        ds, dataset_path = _build_interferometer(instrument)
+        dataset_list.append(ds)
+    assert dataset_path is not None  # _DATACUBE_N_CHANNELS >= 1
+    return dataset_list, dataset_path
+
+
+def _build_imaging(instrument: str) -> tuple[al.Imaging, Path]:
+    cfg = _IMAGING_INSTRUMENTS[instrument]
+    pixel_scale = cfg["pixel_scale"]
+    mask_radius = cfg["mask_radius"]
+    dataset_path = Path("dataset") / "imaging" / instrument
+    auto_simulate_if_missing(
+        dataset_path,
+        dataset_type="imaging",
+        instrument=instrument,
+        workspace_root=_WORKSPACE_ROOT,
+    )
     dataset = al.Imaging.from_fits(
         data_path=dataset_path / "data.fits",
         psf_path=dataset_path / "psf.fits",
         noise_map_path=dataset_path / "noise_map.fits",
-        pixel_scales=PIXEL_SCALE,
+        pixel_scales=pixel_scale,
     )
-
     mask = al.Mask2D.circular(
         shape_native=dataset.shape_native,
         pixel_scales=dataset.pixel_scales,
         radius=mask_radius,
     )
     dataset = dataset.apply_mask(mask=mask)
-    dataset = dataset.apply_over_sampling(over_sample_size_lp=4)
-
+    dataset = dataset.apply_over_sampling(
+        over_sample_size_lp=4,
+        over_sample_size_pixelization=1,
+    )
     over_sample_size = al.util.over_sample.over_sample_size_via_radial_bins_from(
         grid=dataset.grid,
         sub_size_list=[4, 2, 1],
         radial_list=[0.3, 0.6],
         centre_list=[(0.0, 0.0)],
     )
-    dataset = dataset.apply_over_sampling(over_sample_size_lp=over_sample_size)
-    return dataset
+    dataset = dataset.apply_over_sampling(
+        over_sample_size_lp=over_sample_size,
+        over_sample_size_pixelization=1,
+    )
+    return dataset, dataset_path
 
 
-def build_model(
-    mask_radius: float = MASK_RADIUS, total_gaussians: int = 20
-) -> af.Collection:
-    """Build the lens + source model used in ``jax_profiling/imaging/mge.py``."""
-    lens_bulge = al.model_util.mge_model_from(
-        mask_radius=mask_radius,
-        total_gaussians=total_gaussians,
-        centre_prior_is_uniform=True,
+def _build_interferometer(instrument: str) -> tuple[al.Interferometer, Path]:
+    cfg = _INTERFEROMETER_INSTRUMENTS[instrument]
+    pixel_scale = cfg["pixel_scale"]
+    mask_radius = cfg["mask_radius"]
+    real_space_shape = cfg["real_space_shape"]
+    transformer_kind = cfg["transformer"]
+    chunk_size = cfg.get("transformer_chunk_size")
+    dataset_path = Path("dataset") / "interferometer" / instrument
+    auto_simulate_if_missing(
+        dataset_path,
+        dataset_type="interferometer",
+        instrument=instrument,
+        workspace_root=_WORKSPACE_ROOT,
+    )
+    real_space_mask = al.Mask2D.circular(
+        shape_native=real_space_shape,
+        pixel_scales=pixel_scale,
+        radius=mask_radius,
+    )
+
+    if transformer_kind == "dft":
+        transformer_class: Any = al.TransformerDFT
+    elif transformer_kind == "nufft":
+        # Inject per-instrument chunk_size into TransformerNUFFT — required
+        # for alma_high / jvla to cap the nufftax gather buffer (see
+        # PyAutoArray#330 and the same idiom in
+        # likelihood_runtime/datacube/delaunay.py).
+        def _build_transformer(uv_wavelengths, real_space_mask):
+            return al.TransformerNUFFT(
+                uv_wavelengths=uv_wavelengths,
+                real_space_mask=real_space_mask,
+                chunk_size=chunk_size,
+            )
+
+        transformer_class = _build_transformer
+    else:
+        raise ValueError(
+            f"Unknown transformer kind {transformer_kind!r} for instrument {instrument!r}"
+        )
+
+    dataset = al.Interferometer.from_fits(
+        data_path=dataset_path / "data.fits",
+        noise_map_path=dataset_path / "noise_map.fits",
+        uv_wavelengths_path=dataset_path / "uv_wavelengths.fits",
+        real_space_mask=real_space_mask,
+        transformer_class=transformer_class,
+    )
+    dataset = dataset.apply_sparse_operator(use_jax=True, show_progress=False)
+    return dataset, dataset_path
+
+
+def _build_point_source(instrument: str) -> tuple[Any, Path]:
+    cfg = _POINT_SOURCE_INSTRUMENTS[instrument]
+    dataset_path = Path("dataset") / "point_source" / instrument
+    auto_simulate_if_missing(
+        dataset_path,
+        dataset_type="point_source",
+        instrument=instrument,
+        workspace_root=_WORKSPACE_ROOT,
     )
+    dataset = al.from_json(
+        file_path=dataset_path / "point_dataset_positions_only.json",
+    )
+    # Stash the per-instrument PointSolver geometry alongside the dataset so
+    # _build_analysis can construct it without re-reading the instrument dict.
+    dataset._profiling_solver_kwargs = {  # type: ignore[attr-defined]
+        "grid_shape": cfg["grid_shape"],
+        "pixel_scale": cfg["pixel_scale"],
+        "pixel_scale_precision": cfg["pixel_scale_precision"],
+        "magnification_threshold": cfg["magnification_threshold"],
+    }
+    return dataset, dataset_path
+
+
+# -----------------------------------------------------------------------------
+# Model construction
+# -----------------------------------------------------------------------------
+
+
+def _build_model(dataset_class: str, model_type: str, *, mask_radius: float) -> af.Collection:
+    if model_type == "mge":
+        return _mge_model(mask_radius=mask_radius)
+    if model_type == "pixelization":
+        return _pixelization_model(mask_radius=mask_radius)
+    if model_type == "delaunay":
+        return _delaunay_model(mask_radius=mask_radius)
+    if model_type in ("image_plane", "source_plane"):
+        return _point_source_model()
+    raise ValueError(f"Unknown model_type: {model_type!r}")
+
+
+def _lens_mass_and_shear() -> tuple[af.Model, af.Model]:
+    """Isothermal + ExternalShear with uniform default priors — used by every
+    non-point-source model.
+    """
     mass = af.Model(al.mp.Isothermal)
     shear = af.Model(al.mp.ExternalShear)
-    lens = af.Model(al.Galaxy, redshift=0.5, bulge=lens_bulge, mass=mass, shear=shear)
+    return mass, shear
 
+
+def _mge_model(*, mask_radius: float) -> af.Collection:
+    lens_bulge = al.model_util.mge_model_from(
+        mask_radius=mask_radius,
+        total_gaussians=_MGE_TOTAL_GAUSSIANS,
+        centre_prior_is_uniform=True,
+    )
+    mass, shear = _lens_mass_and_shear()
+    lens = af.Model(
+        al.Galaxy, redshift=0.5, bulge=lens_bulge, mass=mass, shear=shear
+    )
     source_bulge = al.model_util.mge_model_from(
         mask_radius=mask_radius,
-        total_gaussians=total_gaussians,
+        total_gaussians=_MGE_TOTAL_GAUSSIANS,
         centre_prior_is_uniform=False,
     )
     source = af.Model(al.Galaxy, redshift=1.0, bulge=source_bulge)
+    return af.Collection(galaxies=af.Collection(lens=lens, source=source))
+
 
+def _pixelization_model(*, mask_radius: float) -> af.Collection:
+    """RectangularAdaptImage source, mirrors ``source_pix[1]`` init mesh.
+
+    The lens light is MGE so the lens-light + source-pixelization
+    inversion runs the full Gaussians + mesh columns through the same
+    linear inversion path a real source_pix phase would.
+    """
+    lens_bulge = al.model_util.mge_model_from(
+        mask_radius=mask_radius,
+        total_gaussians=_MGE_TOTAL_GAUSSIANS,
+        centre_prior_is_uniform=True,
+    )
+    mass, shear = _lens_mass_and_shear()
+    lens = af.Model(
+        al.Galaxy, redshift=0.5, bulge=lens_bulge, mass=mass, shear=shear
+    )
+    pixelization = af.Model(
+        al.Pixelization,
+        mesh=al.mesh.RectangularAdaptImage(
+            shape=_PIXELIZATION_MESH_SHAPE,
+            weight_power=1.0,
+            weight_floor=0.0,
+        ),
+        regularization=al.reg.Constant,
+    )
+    source = af.Model(al.Galaxy, redshift=1.0, pixelization=pixelization)
     return af.Collection(galaxies=af.Collection(lens=lens, source=source))
 
 
-def build_analysis(dataset: al.Imaging, use_jax: bool = False) -> al.AnalysisImaging:
-    """Build the analysis object. Set ``use_jax=True`` for the pure-JAX path."""
-    return al.AnalysisImaging(dataset=dataset, use_jax=use_jax)
+def _delaunay_model(*, mask_radius: float) -> af.Collection:
+    """Hilbert image_mesh + Delaunay mesh + ConstantSplit regularization.
+
+    Matches the ``source_pix[2]``-style production pipeline shape, with the
+    Hilbert vertex count fixed at the production fiducial. The lens light is
+    MGE for parity with the pixelization cell.
+    """
+    lens_bulge = al.model_util.mge_model_from(
+        mask_radius=mask_radius,
+        total_gaussians=_MGE_TOTAL_GAUSSIANS,
+        centre_prior_is_uniform=True,
+    )
+    mass, shear = _lens_mass_and_shear()
+    lens = af.Model(
+        al.Galaxy, redshift=0.5, bulge=lens_bulge, mass=mass, shear=shear
+    )
+    pixelization = af.Model(
+        al.Pixelization,
+        image_mesh=al.image_mesh.Hilbert(
+            pixels=_HILBERT_PIXELS, weight_power=1.0, weight_floor=0.0
+        ),
+        mesh=al.mesh.Delaunay,
+        regularization=al.reg.ConstantSplit,
+    )
+    source = af.Model(al.Galaxy, redshift=1.0, pixelization=pixelization)
+    return af.Collection(galaxies=af.Collection(lens=lens, source=source))
+
+
+def _point_source_model() -> af.Collection:
+    mass, _ = _lens_mass_and_shear()  # No shear for the point-source profile.
+    lens = af.Model(al.Galaxy, redshift=0.5, mass=mass)
+    point_0 = af.Model(al.ps.PointFlux)
+    source = af.Model(al.Galaxy, redshift=1.0, point_0=point_0)
+    return af.Collection(galaxies=af.Collection(lens=lens, source=source))
 
 
-def format_best_fit(instance) -> str:
-    """Terse one-line summary of the lens mass + shear of a best-fit instance."""
-    mass = instance.galaxies.lens.mass
-    shear = instance.galaxies.lens.shear
-    return (
-        f"lens.mass.einstein_radius={mass.einstein_radius:.4f}  "
-        f"lens.mass.centre=({mass.centre[0]:.3f}, {mass.centre[1]:.3f})  "
-        f"shear=({shear.gamma_1:.4f}, {shear.gamma_2:.4f})"
+# -----------------------------------------------------------------------------
+# Adapt image (pix/delaunay only)
+# -----------------------------------------------------------------------------
+
+
+def _adapt_images_for(
+    dataset_class: str,
+    model_type: str,
+    *,
+    dataset_path: Path,
+    dataset: Any,
+) -> Optional[al.AdaptImages]:
+    if model_type not in ("pixelization", "delaunay"):
+        return None
+    if dataset_class not in ("imaging", "interferometer", "datacube"):
+        return None
+    adapt_image = adapt_image_for_dataset(dataset_path=dataset_path, dataset=dataset)
+    # The galaxy-name-keyed dict is what survives an in-search instance
+    # reconstruction; the Galaxy-object-keyed dict is for eager comparison
+    # paths and isn't strictly needed for a fresh search.
+    return al.AdaptImages(
+        galaxy_name_image_dict={"('galaxies', 'source')": adapt_image},
     )
+
+
+# -----------------------------------------------------------------------------
+# Analysis construction
+# -----------------------------------------------------------------------------
+
+
+def _build_analysis(
+    *,
+    dataset_class: str,
+    model_type: str,
+    dataset: Any,
+    use_jax: bool,
+    use_mixed_precision: bool,
+    adapt_images: Optional[al.AdaptImages],
+) -> Any:
+    if dataset_class == "imaging":
+        return al.AnalysisImaging(
+            dataset=dataset,
+            adapt_images=adapt_images,
+            settings=al.Settings(
+                use_border_relocator=model_type in ("pixelization", "delaunay"),
+                use_mixed_precision=use_mixed_precision,
+            ),
+            use_jax=use_jax,
+        )
+    if dataset_class in ("interferometer", "datacube"):
+        return al.AnalysisInterferometer(
+            dataset=dataset,
+            adapt_images=adapt_images,
+            settings=al.Settings(
+                use_border_relocator=model_type in ("pixelization", "delaunay"),
+                use_mixed_precision=use_mixed_precision,
+            ),
+            use_jax=use_jax,
+        )
+    if dataset_class == "point_source":
+        solver_kwargs = getattr(dataset, "_profiling_solver_kwargs", None)
+        if solver_kwargs is None:
+            raise RuntimeError(
+                "point_source dataset is missing the solver kwargs stash; "
+                "construct it via _build_point_source first."
+            )
+        grid = al.Grid2D.uniform(
+            shape_native=solver_kwargs["grid_shape"],
+            pixel_scales=solver_kwargs["pixel_scale"],
+        )
+        solver = al.PointSolver.for_grid(
+            grid=grid,
+            pixel_scale_precision=solver_kwargs["pixel_scale_precision"],
+            magnification_threshold=solver_kwargs["magnification_threshold"],
+        )
+        fit_positions_cls = (
+            al.FitPositionsImagePairAll
+            if model_type == "image_plane"
+            else al.FitPositionsSource
+        )
+        return al.AnalysisPoint(
+            dataset=dataset,
+            solver=solver,
+            fit_positions_cls=fit_positions_cls,
+            use_jax=use_jax,
+        )
+    raise ValueError(f"Unknown dataset_class: {dataset_class!r}")
+
+
+# -----------------------------------------------------------------------------
+# Misc helpers
+# -----------------------------------------------------------------------------
+
+
+def format_best_fit(instance: Any) -> str:
+    """One-line summary of an instance's lens mass + shear (best-effort).
+
+    Works across mge / pix / delaunay / point-source models; falls back to a
+    generic representation when fields are missing.
+    """
+    try:
+        mass = instance.galaxies.lens.mass
+        out = (
+            f"lens.mass.einstein_radius={mass.einstein_radius:.4f}  "
+            f"lens.mass.centre=({mass.centre[0]:.3f}, {mass.centre[1]:.3f})"
+        )
+    except AttributeError:
+        return repr(instance)
+    try:
+        shear = instance.galaxies.lens.shear
+        out += f"  shear=({shear.gamma_1:.4f}, {shear.gamma_2:.4f})"
+    except AttributeError:
+        pass
+    return out
diff --git a/searches/aggregate.py b/searches/aggregate.py
new file mode 100644
index 0000000..abe5bbf
--- /dev/null
+++ b/searches/aggregate.py
@@ -0,0 +1,256 @@
+"""Aggregate per-config JSONs for swept search cells into comparison.{json,png}.
+
+Walks the four-level layout written by ``searches/sweep.py``::
+
+    <output_root>/<sampler>/<dataset_class>/<model>/<instrument>/<config_name>.json
+
+For each ``<instrument>`` directory, emits a ``comparison.json`` (per-config
+dict) and a ``comparison.png`` (grouped bar chart of the headline metrics
+across configs: total_wall_s, viz_wall_s, sampler_wall_s, time_per_eval_ms).
+
+Usage::
+
+    # All cells under the default output root
+    python searches/aggregate.py
+
+    # One cell only
+    python searches/aggregate.py --cell nautilus/imaging/mge/hst
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+import matplotlib
+
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt  # noqa: E402
+import numpy as np  # noqa: E402
+
+
+_REPO_ROOT = Path(__file__).resolve().parents[1]
+_DEFAULT_OUTPUT_ROOT = _REPO_ROOT / "results" / "searches"
+
+
+_CONFIG_ORDER = (
+    "local_cpu_fp64",
+    "local_cpu_mp",
+    "local_gpu_fp64",
+    "local_gpu_mp",
+    "hpc_a100_fp64",
+    "hpc_a100_mp",
+)
+
+
+_METRICS_FOR_BAR_CHART: tuple[tuple[str, str], ...] = (
+    ("total_wall_s", "Total wall (s)"),
+    ("sampler_wall_s", "Sampler wall (s)"),
+    ("viz_wall_s", "Viz wall (s)"),
+    ("time_per_eval_ms", "Per-eval (ms)"),
+)
+
+
+def _parse_args() -> argparse.Namespace:
+    p = argparse.ArgumentParser(description=__doc__.split("\n\n")[0])
+    p.add_argument(
+        "--output-root",
+        type=Path,
+        default=_DEFAULT_OUTPUT_ROOT,
+        help=f"Root output dir. Default: {_DEFAULT_OUTPUT_ROOT}",
+    )
+    p.add_argument(
+        "--cell",
+        nargs="+",
+        default=None,
+        metavar="SAMPLER/CLASS/MODEL/INSTRUMENT",
+        help=(
+            "Only aggregate these cells; default = auto-discover under "
+            "--output-root."
+        ),
+    )
+    return p.parse_args()
+
+
+def _discover_cells(output_root: Path) -> list[tuple[str, str, str, str]]:
+    """Find every <sampler>/<ds>/<model>/<instrument> dir with config JSONs."""
+    cells: list[tuple[str, str, str, str]] = []
+    if not output_root.exists():
+        return cells
+
+    def _has_config_json(d: Path) -> bool:
+        return any(p.stem in _CONFIG_ORDER for p in d.glob("*.json"))
+
+    for sampler_dir in sorted(output_root.iterdir()):
+        if not sampler_dir.is_dir():
+            continue
+        for ds_dir in sorted(sampler_dir.iterdir()):
+            if not ds_dir.is_dir():
+                continue
+            for model_dir in sorted(ds_dir.iterdir()):
+                if not model_dir.is_dir():
+                    continue
+                for inst_dir in sorted(model_dir.iterdir()):
+                    if inst_dir.is_dir() and _has_config_json(inst_dir):
+                        cells.append(
+                            (sampler_dir.name, ds_dir.name, model_dir.name, inst_dir.name)
+                        )
+    return cells
+
+
+def _read_config(json_path: Path) -> dict:
+    data = json.loads(json_path.read_text())
+    data.setdefault("config_name", json_path.stem)
+    return data
+
+
+def _aggregate_cell(cell_dir: Path) -> dict:
+    configs: dict[str, dict] = {}
+    for json_path in sorted(cell_dir.glob("*.json")):
+        if json_path.name == "comparison.json":
+            continue
+        try:
+            configs[json_path.stem] = _read_config(json_path)
+        except Exception as exc:
+            sys.stderr.write(f"  warn: failed to read {json_path}: {exc}\n")
+
+    ordered: dict[str, dict] = {}
+    for name in _CONFIG_ORDER:
+        if name in configs:
+            ordered[name] = configs.pop(name)
+    for name in sorted(configs):
+        ordered[name] = configs[name]
+
+    return {"configs": ordered}
+
+
+def _format_seconds(t: float | None) -> str:
+    if t is None or not np.isfinite(t):
+        return "—"
+    if t >= 1.0:
+        return f"{t:.2f}s"
+    if t >= 1e-3:
+        return f"{t * 1e3:.1f}ms"
+    return f"{t * 1e6:.0f}μs"
+
+
+def _get_perf(cfg: dict, key: str) -> float:
+    perf = cfg.get("performance", {})
+    val = perf.get(key)
+    if isinstance(val, (int, float)) and np.isfinite(val):
+        return float(val)
+    return float("nan")
+
+
+def _render_table(comparison: dict, cell_id: str) -> str:
+    lines = [f"=== {cell_id} ==="]
+    rows = [("config", "backend", "total", "sampler", "viz", "per_eval", "log_evidence")]
+    for name, cfg in comparison["configs"].items():
+        backend = cfg.get("device", {}).get("backend", "?")
+        log_evidence = cfg.get("results", {}).get("log_evidence")
+        rows.append(
+            (
+                name,
+                str(backend),
+                _format_seconds(_get_perf(cfg, "total_wall_s")),
+                _format_seconds(_get_perf(cfg, "sampler_wall_s")),
+                _format_seconds(_get_perf(cfg, "viz_wall_s")),
+                f"{_get_perf(cfg, 'time_per_eval_ms'):.2f}ms",
+                f"{log_evidence:.4f}" if isinstance(log_evidence, (int, float)) else "—",
+            )
+        )
+    col_w = [max(len(r[i]) for r in rows) for i in range(len(rows[0]))]
+    for r in rows:
+        lines.append("  " + "  ".join(s.ljust(w) for s, w in zip(r, col_w)))
+    return "\n".join(lines)
+
+
+def _render_png(comparison: dict, cell_id: str, png_path: Path) -> None:
+    configs = comparison["configs"]
+    if not configs:
+        return
+
+    config_names = list(configs.keys())
+    n_cfgs = len(config_names)
+    n_metrics = len(_METRICS_FOR_BAR_CHART)
+
+    fig, ax = plt.subplots(figsize=(11, max(3.5, 0.35 * n_metrics + 1.5)))
+    cmap = plt.get_cmap("tab10")
+    bar_height = 0.8 / n_cfgs
+
+    y_metric = np.arange(n_metrics)
+    for j, cname in enumerate(config_names):
+        cfg = configs[cname]
+        values = [_get_perf(cfg, key) for key, _label in _METRICS_FOR_BAR_CHART]
+        offset = (j - (n_cfgs - 1) / 2) * bar_height
+        ax.barh(
+            y_metric + offset,
+            values,
+            height=bar_height,
+            label=cname,
+            color=cmap(j % cmap.N),
+            edgecolor="white",
+        )
+
+    ax.set_yticks(y_metric)
+    ax.set_yticklabels([label for _key, label in _METRICS_FOR_BAR_CHART], fontsize=9)
+    ax.invert_yaxis()
+    ax.set_xscale("log")
+    ax.set_xlabel("Value (log scale; seconds or ms per the row)")
+    ax.set_title(f"{cell_id}  — search profiling comparison", fontsize=11, fontweight="bold")
+    ax.legend(loc="lower right", fontsize=8)
+    ax.grid(True, axis="x", linestyle=":", alpha=0.5)
+    fig.tight_layout()
+    fig.savefig(png_path, dpi=150)
+    plt.close(fig)
+
+
+def main() -> int:
+    args = _parse_args()
+
+    if args.cell:
+        cells: list[tuple[str, ...]] = []
+        for spec in args.cell:
+            parts = spec.split("/")
+            if len(parts) != 4:
+                sys.stderr.write(
+                    f"bad --cell argument: {spec!r} "
+                    f"(expected sampler/class/model/instrument)\n"
+                )
+                return 2
+            cells.append(tuple(parts))
+    else:
+        cells = _discover_cells(args.output_root)
+
+    if not cells:
+        sys.stderr.write(f"no cells found under {args.output_root}\n")
+        return 1
+
+    for cell_tuple in cells:
+        cell_id = "/".join(cell_tuple)
+        cell_dir = args.output_root.joinpath(*cell_tuple)
+        if not cell_dir.exists():
+            sys.stderr.write(f"  skipping {cell_id}: dir missing\n")
+            continue
+
+        comparison = _aggregate_cell(cell_dir)
+        if not comparison["configs"]:
+            sys.stderr.write(f"  skipping {cell_id}: no per-config JSONs found\n")
+            continue
+
+        comparison_path = cell_dir / "comparison.json"
+        png_path = cell_dir / "comparison.png"
+        comparison_path.write_text(json.dumps(comparison, indent=2, default=str))
+        _render_png(comparison, cell_id, png_path)
+
+        print(_render_table(comparison, cell_id))
+        print(f"  -> {comparison_path}")
+        print(f"  -> {png_path}\n")
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/searches/nautilus/README.md b/searches/nautilus/README.md
deleted file mode 100644
index 1995caf..0000000
--- a/searches/nautilus/README.md
+++ /dev/null
@@ -1,46 +0,0 @@
-# searches/nautilus
-
-[Nautilus](https://github.com/johannesulf/nautilus) is a production nested-importance sampler that combines neural-network-based importance sampling with traditional nested sampling. It's gradient-free, so it sidesteps the JAX-gradient pathologies that affect HMC-family samplers on this likelihood, and is a strong baseline for end-to-end "what does a real sampler do on this lens model" timing.
-
-These scripts drive Nautilus directly against the HST MGE imaging likelihood, bypassing `af.NonLinearSearch`. They are wiring tests + run-time profiling, **not** converged science fits — `n_live=200` is below what you'd use in production, but enough to see per-evaluation cost and reach the default `n_eff=10000` / `f_live=0.01` termination on this MGE setup.
-
-## Scripts
-
-| Script | Likelihood backend | What it profiles |
-|--------|--------------------|------------------|
-| [`simple.py`](./simple.py) | NumPy (`use_jax=False`) | Baseline: pure-NumPy log-likelihood passed straight to Nautilus. Highest per-evaluation cost; useful as the reference point against which JAX speedup is measured. |
-| [`jax.py`](./jax.py) | JAX JIT (`use_jax=True`, `jax.jit`-compiled likelihood) | Reports JIT compile time separately. Per-evaluation cost is JAX kernel + a single Python ↔ JAX boundary crossing per call (Nautilus itself is NumPy-only). Compare versus a pure-JAX nested sampler like NSS-JIT (not yet mirrored) for the no-boundary-crossing variant. |
-
-Both share the same Nautilus configuration so timings are directly comparable: `n_live=200`, default `n_eff=10000`, default `f_live=0.01`. Both use the shared `_setup` / `_metrics` from one folder up.
-
-## What each script reports
-
-- **Best fit**: max-likelihood lens mass / shear parameters (one-line summary).
-- **Max log L** and **log evidence**.
-- **Wall time** for the sampling phase (excluding JIT compile for `jax.py`).
-- **JIT compile time** (one-shot warmup; `jax.py` only).
-- **Likelihood evaluations** and **time per eval** (ms).
-- **ESS** (effective sample size) and **posterior samples**.
-- **Convergence** indicator (Nautilus's `n_eff` / `f_live` defaults are reached).
-- **Evals to ML** and **time to ML** via the shared `MLTracker`.
-
-The headline JSON+PNG pair is written to `results/searches/nautilus/` per the [section README](../README.md#versioned-artifacts) convention.
-
-## Headline run-times (latest per script)
-
-Auto-generated by `scripts/build_readme.py` from the latest `*_summary_v<version>.json` artifacts under `results/searches/nautilus/`.
-
-<!-- BEGIN auto-table:searches-nautilus -->
-_No data yet — run `searches/nautilus/{simple,jax}.py` to populate. See section README._
-<!-- END auto-table:searches-nautilus -->
-
-## Expected behaviour
-
-For reference: prior sweep runs on this exact MGE setup (recorded in `autolens_workspace_developer/searches_minimal/sweep_findings.md`) put converged log-evidence at around **logZ ≈ -169k**. A non-converged early-stop reading of `logZ ≈ -191k` is roughly what you'll see after a few minutes of sampling. The likelihood landscape anneals slowly — fully converged runs at `n_live=100` take ~30–60 minutes on GPU.
-
-The JAX variant's wall time is dominated by the NumPy/JAX boundary crossings, not the JAX kernel. A future NSS-JIT mirror will surface the no-boundary-crossing alternative.
-
-## Caveats
-
-- **`use_jax=True` and JIT compile**: `_setup.build_analysis(dataset, use_jax=True)` returns an analysis object that the Nautilus wrapper feeds via `jax.jit`. If the underlying JAX-jitted likelihood path has an upstream regression (see [PyAutoLens#514](https://github.com/PyAutoLabs/PyAutoLens/issues/514)), the `jax.py` script may produce different log evidence than `simple.py` — that's an upstream issue, not a Nautilus issue.
-- **GPU memory**: on a 6 GB consumer GPU (e.g. RTX 2060), `jax.py` with `n_live=200` fits comfortably for Nautilus (gradient-free, no curvature storage). The same `n_live` causes OOM in NSS-JIT (which stores curvature for HMC-style moves) per the upstream sweep findings.
diff --git a/searches/nautilus/datacube/delaunay.py b/searches/nautilus/datacube/delaunay.py
new file mode 100644
index 0000000..2b463cc
--- /dev/null
+++ b/searches/nautilus/datacube/delaunay.py
@@ -0,0 +1,28 @@
+"""First-class af.Nautilus search profiling — datacube Delaunay.
+
+Multi-channel cube fit via ``af.FactorGraphModel``: N identical channel
+datasets, each wrapped in ``al.AnalysisInterferometer`` + ``af.AnalysisFactor``,
+combined under a single global model — mirrors
+``autolens_workspace/scripts/multi/modeling.py``. The channel count comes
+from ``_DATACUBE_N_CHANNELS`` in ``searches/_setup.py`` (default 4 to
+match the existing ``likelihood_runtime/datacube/delaunay.py`` quick-
+iteration value).
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+_REPO_ROOT = Path(__file__).resolve().parents[3]
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+from searches._runner import run_search  # noqa: E402
+
+run_search(
+    sampler="nautilus",
+    dataset_class="datacube",
+    model_type="delaunay",
+    default_instrument="sma",
+)
diff --git a/searches/nautilus/imaging/delaunay.py b/searches/nautilus/imaging/delaunay.py
new file mode 100644
index 0000000..dcb4429
--- /dev/null
+++ b/searches/nautilus/imaging/delaunay.py
@@ -0,0 +1,24 @@
+"""First-class af.Nautilus search profiling — imaging Delaunay.
+
+Drives a full ``af.Nautilus`` fit on an MGE lens + Hilbert image-mesh Delaunay
+source imaging model. Uses a truth-derived adapt image cached next to the
+dataset. See ``searches/README.md`` for caveats.
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+_REPO_ROOT = Path(__file__).resolve().parents[3]
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+from searches._runner import run_search  # noqa: E402
+
+run_search(
+    sampler="nautilus",
+    dataset_class="imaging",
+    model_type="delaunay",
+    default_instrument="hst",
+)
diff --git a/searches/nautilus/imaging/mge.py b/searches/nautilus/imaging/mge.py
new file mode 100644
index 0000000..fcc1b02
--- /dev/null
+++ b/searches/nautilus/imaging/mge.py
@@ -0,0 +1,24 @@
+"""First-class af.Nautilus search profiling — imaging MGE.
+
+Drives a full ``af.Nautilus`` fit on an MGE lens + MGE source imaging model
+across the canonical instruments (hst / euclid / jwst / ao). See
+``searches/README.md`` for design and the sweep workflow.
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+_REPO_ROOT = Path(__file__).resolve().parents[3]  # autolens_profiling/
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+from searches._runner import run_search  # noqa: E402
+
+run_search(
+    sampler="nautilus",
+    dataset_class="imaging",
+    model_type="mge",
+    default_instrument="hst",
+)
diff --git a/searches/nautilus/imaging/pixelization.py b/searches/nautilus/imaging/pixelization.py
new file mode 100644
index 0000000..6529b1a
--- /dev/null
+++ b/searches/nautilus/imaging/pixelization.py
@@ -0,0 +1,24 @@
+"""First-class af.Nautilus search profiling — imaging pixelization.
+
+Drives a full ``af.Nautilus`` fit on an MGE lens + RectangularAdaptImage
+pixelization source imaging model. Uses a truth-derived adapt image cached
+next to the dataset. See ``searches/README.md`` for caveats.
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+_REPO_ROOT = Path(__file__).resolve().parents[3]
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+from searches._runner import run_search  # noqa: E402
+
+run_search(
+    sampler="nautilus",
+    dataset_class="imaging",
+    model_type="pixelization",
+    default_instrument="hst",
+)
diff --git a/searches/nautilus/interferometer/delaunay.py b/searches/nautilus/interferometer/delaunay.py
new file mode 100644
index 0000000..4b9774d
--- /dev/null
+++ b/searches/nautilus/interferometer/delaunay.py
@@ -0,0 +1,19 @@
+"""First-class af.Nautilus search profiling — interferometer Delaunay."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+_REPO_ROOT = Path(__file__).resolve().parents[3]
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+from searches._runner import run_search  # noqa: E402
+
+run_search(
+    sampler="nautilus",
+    dataset_class="interferometer",
+    model_type="delaunay",
+    default_instrument="sma",
+)
diff --git a/searches/nautilus/interferometer/mge.py b/searches/nautilus/interferometer/mge.py
new file mode 100644
index 0000000..a45f882
--- /dev/null
+++ b/searches/nautilus/interferometer/mge.py
@@ -0,0 +1,24 @@
+"""First-class af.Nautilus search profiling — interferometer MGE.
+
+Drives a full ``af.Nautilus`` fit on an MGE lens + MGE source interferometer
+model across the canonical instruments (sma / alma / alma_high / jvla).
+See ``searches/README.md`` for the sweep workflow.
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+_REPO_ROOT = Path(__file__).resolve().parents[3]
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+from searches._runner import run_search  # noqa: E402
+
+run_search(
+    sampler="nautilus",
+    dataset_class="interferometer",
+    model_type="mge",
+    default_instrument="sma",
+)
diff --git a/searches/nautilus/interferometer/pixelization.py b/searches/nautilus/interferometer/pixelization.py
new file mode 100644
index 0000000..da35343
--- /dev/null
+++ b/searches/nautilus/interferometer/pixelization.py
@@ -0,0 +1,19 @@
+"""First-class af.Nautilus search profiling — interferometer pixelization."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+_REPO_ROOT = Path(__file__).resolve().parents[3]
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+from searches._runner import run_search  # noqa: E402
+
+run_search(
+    sampler="nautilus",
+    dataset_class="interferometer",
+    model_type="pixelization",
+    default_instrument="sma",
+)
diff --git a/searches/nautilus/jax.py b/searches/nautilus/jax.py
deleted file mode 100644
index 05435ba..0000000
--- a/searches/nautilus/jax.py
+++ /dev/null
@@ -1,227 +0,0 @@
-"""
-Minimal Nautilus Example — pure-JAX HST MGE likelihood
-------------------------------------------------------
-
-Drives the Nautilus nested sampler against the HST MGE imaging likelihood
-running fully under ``jax.jit``. The analysis is built with ``use_jax=True``
-and the closure is passed through ``jax.jit`` once, ahead of sampling, so
-the JIT compile cost is reported separately from the sampling wall time.
-
-Nautilus itself is a NumPy sampler, so the wrapper does
-``np.asarray(jit_loglike(jnp.asarray(params)))`` per call -- the JAX kernel
-runs but every evaluation crosses the Python <-> JAX boundary.
-
-``n_live`` is kept at the smoke-test values used by ``simple.py`` — this
-is a wiring test, not a converged posterior.
-
-Compare versus ``simple.py`` (NumPy likelihood under the same sampler) and
-``likelihood/imaging/mge.py`` (single-likelihood JIT profiling of the same
-MGE setup).
-
-Requirements:
-    pip install nautilus-sampler
-"""
-
-import json
-import sys
-import time
-from pathlib import Path
-
-import numpy as np
-import jax
-import jax.numpy as jnp
-
-import matplotlib
-
-
-# AUTOLENS_PROFILING_SMOKE=1 short-circuit (Phase 5 / CI lint smoke).
-# Verifies the import graph + module-level setup succeeded without running
-# the full profiling pipeline. Skipped entirely when the env var is unset.
-import os as _smoke_os
-import sys as _smoke_sys
-if _smoke_os.environ.get("AUTOLENS_PROFILING_SMOKE") == "1":
-    print(f"[smoke] {__file__}: imports + module setup OK; exiting.")
-    _smoke_sys.exit(0)
-
-matplotlib.use("Agg")
-import matplotlib.pyplot as plt
-
-import autolens as al
-
-# Make ``from searches._{setup,metrics}`` importable regardless of how the
-# script is invoked (``python searches/nautilus/jax.py``, ``python -m
-# searches.nautilus.jax``, or a CI runner).
-_REPO_ROOT = Path(__file__).resolve().parents[2]
-if str(_REPO_ROOT) not in sys.path:
-    sys.path.insert(0, str(_REPO_ROOT))
-
-from searches._metrics import MLTracker
-from searches._setup import (
-    build_analysis,
-    build_dataset,
-    build_model,
-    format_best_fit,
-)
-
-dataset = build_dataset()
-model = build_model()
-analysis = build_analysis(dataset, use_jax=True)
-
-print(f"Model free parameters: {model.total_free_parameters}")
-
-from nautilus import Sampler
-
-
-def log_likelihood_jax(params):
-    """Pure-JAX log likelihood: flat parameter vector -> scalar log L."""
-    instance = model.instance_from_vector(vector=params, xp=jnp)
-    return analysis.log_likelihood_function(instance=instance)
-
-
-jit_log_likelihood = jax.jit(log_likelihood_jax)
-
-# Warm up the JIT once so the compile cost is measured separately.
-warmup_unit = [0.5] * model.prior_count
-warmup_physical = jnp.asarray(model.vector_from_unit_vector(warmup_unit))
-print("JIT-compiling MGE likelihood (one-shot)...", flush=True)
-t_jit_start = time.time()
-_ = float(jax.block_until_ready(jit_log_likelihood(warmup_physical)))
-t_jit = time.time() - t_jit_start
-print(f"  Compiled in {t_jit:.2f} s", flush=True)
-
-
-def prior_transform(cube):
-    """Map a unit cube to physical parameters via the model's priors."""
-    return np.array(model.vector_from_unit_vector(cube))
-
-
-n_likelihood_calls = 0
-tracker = MLTracker()
-
-
-def log_likelihood(params):
-    """Adapter: NumPy in, JIT'd JAX likelihood, Python float out."""
-    global n_likelihood_calls
-    n_likelihood_calls += 1
-    log_l = float(jit_log_likelihood(jnp.asarray(params)))
-    tracker.record(log_l)
-    return log_l
-
-
-n_live = 200
-
-sampler = Sampler(
-    prior=prior_transform,
-    likelihood=log_likelihood,
-    n_dim=model.prior_count,
-    n_live=n_live,
-)
-
-t_start = time.time()
-# Run to Nautilus's default convergence (n_eff=10000, f_live=0.01) on the
-# JAX-jitted MGE likelihood. JIT compile is paid once above; per-call cost
-# inside sampling is the JAX kernel + Python<->JAX boundary.
-sampler.run(verbose=True)
-t_elapsed = time.time() - t_start
-
-points, log_w, log_l = sampler.posterior()
-best_idx = np.argmax(log_l)
-best_instance = model.instance_from_vector(vector=list(points[best_idx]))
-max_logl = float(np.max(log_l))
-
-evals_to_ml, time_to_ml = tracker.finalise(max_log_l=max_logl, tolerance=1.0)
-
-# ---------------------------------------------------------------------------
-# Print human-readable summary
-# ---------------------------------------------------------------------------
-
-summary = f"""\
---- Nautilus (JAX JIT) Results ---
-Best fit:        {format_best_fit(best_instance)}
-Max log L:       {max_logl:.4f}
-Log evidence:    {float(sampler.log_z):.4f}
-
---- Performance ---
-Wall time:           {t_elapsed:.2f} s     (excludes JIT compile, run ahead of time)
-JIT compile time:    {t_jit:.2f} s     (one-shot warm-up before sampling)
-Likelihood evals:    {n_likelihood_calls}
-Time per eval:       {t_elapsed / max(n_likelihood_calls, 1) * 1e3:.3f} ms
-ESS:                 {float(sampler.n_eff):.1f}
-Posterior samples:   {len(points)}
-Sampler config:      n_live={n_live}, default n_eff=10000, f_live=0.01
-
---- Convergence ---
-Converged:           yes (Nautilus default n_eff / f_live)
-Evals to ML:         {evals_to_ml if evals_to_ml is not None else 'n/a'}     (first eval within 1 nat of max log L)
-Time to ML:          {f'{time_to_ml:.2f} s' if time_to_ml is not None else 'n/a'}
-"""
-
-print()
-print(summary)
-
-# ---------------------------------------------------------------------------
-# Write versioned JSON + PNG to results/searches/nautilus/
-# ---------------------------------------------------------------------------
-
-al_version = al.__version__
-result_dict = {
-    "sampler": "nautilus",
-    "backend": "jax_jit",
-    "instrument": "hst",
-    "model": {
-        "type": "MGE+Isothermal+ExternalShear",
-        "free_parameters": int(model.total_free_parameters),
-    },
-    "sampler_config": {
-        "n_live": n_live,
-        "n_eff_target": 10000,
-        "f_live": 0.01,
-    },
-    "results": {
-        "max_log_likelihood": max_logl,
-        "log_evidence": float(sampler.log_z),
-        "best_fit_summary": format_best_fit(best_instance),
-    },
-    "performance": {
-        "wall_time_s": t_elapsed,
-        "jit_compile_s": t_jit,
-        "likelihood_evals": int(n_likelihood_calls),
-        "time_per_eval_ms": t_elapsed / max(n_likelihood_calls, 1) * 1e3,
-        "ess": float(sampler.n_eff),
-        "posterior_samples": int(len(points)),
-    },
-    "convergence": {
-        "converged": True,
-        "evals_to_ml": int(evals_to_ml) if evals_to_ml is not None else None,
-        "time_to_ml_s": float(time_to_ml) if time_to_ml is not None else None,
-    },
-    "version": al_version,
-}
-
-results_dir = _REPO_ROOT / "results" / "searches" / "nautilus"
-results_dir.mkdir(parents=True, exist_ok=True)
-json_path = results_dir / f"jax_summary_v{al_version}.json"
-json_path.write_text(json.dumps(result_dict, indent=2))
-print(f"  Results JSON saved to: {json_path}")
-
-# Bar chart of the headline timings
-fig, ax = plt.subplots(figsize=(8, 3))
-labels = [
-    "jit_compile (s)",
-    "wall_time (s)",
-    "time_per_eval (ms)",
-    "time_to_ml (s)" if time_to_ml is not None else "time_to_ml (n/a)",
-]
-times = [
-    t_jit,
-    t_elapsed,
-    t_elapsed / max(n_likelihood_calls, 1) * 1e3,
-    float(time_to_ml) if time_to_ml is not None else 0.0,
-]
-ax.barh(labels, times, color=["#8172B2", "#4C72B0", "#55A868", "#C44E52"])
-ax.set_title(f"Nautilus (JAX JIT) — HST MGE — v{al_version}")
-fig.tight_layout()
-png_path = results_dir / f"jax_summary_v{al_version}.png"
-fig.savefig(png_path, dpi=120)
-plt.close(fig)
-print(f"  Bar chart saved to:    {png_path}")
diff --git a/searches/nautilus/point_source/image_plane.py b/searches/nautilus/point_source/image_plane.py
new file mode 100644
index 0000000..d89994c
--- /dev/null
+++ b/searches/nautilus/point_source/image_plane.py
@@ -0,0 +1,23 @@
+"""First-class af.Nautilus search profiling — point-source image-plane fit.
+
+Image-plane fit uses ``al.FitPositionsImagePairAll`` (chi-squared on the
+solver-reproduced multiple-image positions).
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+_REPO_ROOT = Path(__file__).resolve().parents[3]
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+from searches._runner import run_search  # noqa: E402
+
+run_search(
+    sampler="nautilus",
+    dataset_class="point_source",
+    model_type="image_plane",
+    default_instrument="simple",
+)
diff --git a/searches/nautilus/point_source/source_plane.py b/searches/nautilus/point_source/source_plane.py
new file mode 100644
index 0000000..5aa9b46
--- /dev/null
+++ b/searches/nautilus/point_source/source_plane.py
@@ -0,0 +1,23 @@
+"""First-class af.Nautilus search profiling — point-source source-plane fit.
+
+Source-plane fit uses ``al.FitPositionsSource`` (chi-squared on traced
+positions back to the source plane).
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+_REPO_ROOT = Path(__file__).resolve().parents[3]
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+from searches._runner import run_search  # noqa: E402
+
+run_search(
+    sampler="nautilus",
+    dataset_class="point_source",
+    model_type="source_plane",
+    default_instrument="simple",
+)
diff --git a/searches/nautilus/simple.py b/searches/nautilus/simple.py
deleted file mode 100644
index 0cce08c..0000000
--- a/searches/nautilus/simple.py
+++ /dev/null
@@ -1,198 +0,0 @@
-"""
-Minimal Nautilus Example (HST MGE lens likelihood)
---------------------------------------------------
-
-Drives the Nautilus nested sampler directly against the HST MGE imaging
-likelihood, bypassing ``af.NonLinearSearch``. Useful as a fast end-to-end
-smoke test of the real PyAutoLens likelihood under a production sampler.
-
-``n_live`` is kept small so the search finishes in a few minutes — this is
-a wiring test, not a converged posterior.
-
-Compare versus ``likelihood/imaging/mge.py`` (single-likelihood profiling
-of the same MGE setup) and ``jax.py`` (JAX-JIT'd likelihood under the same
-sampler).
-
-Requirements:
-    pip install nautilus-sampler
-"""
-
-import json
-import sys
-import time
-from pathlib import Path
-
-import numpy as np
-
-import matplotlib
-
-
-# AUTOLENS_PROFILING_SMOKE=1 short-circuit (Phase 5 / CI lint smoke).
-# Verifies the import graph + module-level setup succeeded without running
-# the full profiling pipeline. Skipped entirely when the env var is unset.
-import os as _smoke_os
-import sys as _smoke_sys
-if _smoke_os.environ.get("AUTOLENS_PROFILING_SMOKE") == "1":
-    print(f"[smoke] {__file__}: imports + module setup OK; exiting.")
-    _smoke_sys.exit(0)
-
-matplotlib.use("Agg")
-import matplotlib.pyplot as plt
-
-import autolens as al
-
-# Make ``from searches._{setup,metrics}`` importable regardless of how the
-# script is invoked (``python searches/nautilus/simple.py``, ``python -m
-# searches.nautilus.simple``, or a CI runner).
-_REPO_ROOT = Path(__file__).resolve().parents[2]
-if str(_REPO_ROOT) not in sys.path:
-    sys.path.insert(0, str(_REPO_ROOT))
-
-from searches._metrics import MLTracker
-from searches._setup import (
-    build_analysis,
-    build_dataset,
-    build_model,
-    format_best_fit,
-)
-
-dataset = build_dataset()
-model = build_model()
-analysis = build_analysis(dataset, use_jax=False)
-
-print(f"Model free parameters: {model.total_free_parameters}")
-
-from nautilus import Sampler
-
-
-def prior_transform(cube):
-    """Map a unit cube to physical parameters via the model's priors."""
-    return np.array(model.vector_from_unit_vector(cube))
-
-
-n_likelihood_calls = 0
-tracker = MLTracker()
-
-
-def log_likelihood(params):
-    global n_likelihood_calls
-    n_likelihood_calls += 1
-    instance = model.instance_from_vector(vector=list(params))
-    log_l = float(analysis.log_likelihood_function(instance=instance))
-    tracker.record(log_l)
-    return log_l
-
-
-n_live = 200
-
-sampler = Sampler(
-    prior=prior_transform,
-    likelihood=log_likelihood,
-    n_dim=model.prior_count,
-    n_live=n_live,
-)
-
-t_start = time.time()
-# Run to Nautilus's default convergence (n_eff=10000, f_live=0.01). This
-# may take many thousands of likelihood evaluations against the NumPy MGE
-# -- expect long wall times.
-sampler.run(verbose=True)
-t_elapsed = time.time() - t_start
-
-points, log_w, log_l = sampler.posterior()
-best_idx = np.argmax(log_l)
-best_instance = model.instance_from_vector(vector=list(points[best_idx]))
-max_logl = float(np.max(log_l))
-
-evals_to_ml, time_to_ml = tracker.finalise(max_log_l=max_logl, tolerance=1.0)
-
-# ---------------------------------------------------------------------------
-# Print human-readable summary
-# ---------------------------------------------------------------------------
-
-summary = f"""\
---- Nautilus (NumPy) Results ---
-Best fit:        {format_best_fit(best_instance)}
-Max log L:       {max_logl:.4f}
-Log evidence:    {float(sampler.log_z):.4f}
-
---- Performance ---
-Wall time:           {t_elapsed:.2f} s
-Likelihood evals:    {n_likelihood_calls}
-Time per eval:       {t_elapsed / max(n_likelihood_calls, 1) * 1e3:.3f} ms
-ESS:                 {float(sampler.n_eff):.1f}
-Posterior samples:   {len(points)}
-Sampler config:      n_live={n_live}, default n_eff=10000, f_live=0.01
-
---- Convergence ---
-Converged:           yes (Nautilus default n_eff / f_live)
-Evals to ML:         {evals_to_ml if evals_to_ml is not None else 'n/a'}     (first eval within 1 nat of max log L)
-Time to ML:          {f'{time_to_ml:.2f} s' if time_to_ml is not None else 'n/a'}
-"""
-
-print()
-print(summary)
-
-# ---------------------------------------------------------------------------
-# Write versioned JSON + PNG to results/searches/nautilus/
-# ---------------------------------------------------------------------------
-
-al_version = al.__version__
-result_dict = {
-    "sampler": "nautilus",
-    "backend": "numpy",
-    "instrument": "hst",
-    "model": {
-        "type": "MGE+Isothermal+ExternalShear",
-        "free_parameters": int(model.total_free_parameters),
-    },
-    "sampler_config": {
-        "n_live": n_live,
-        "n_eff_target": 10000,
-        "f_live": 0.01,
-    },
-    "results": {
-        "max_log_likelihood": max_logl,
-        "log_evidence": float(sampler.log_z),
-        "best_fit_summary": format_best_fit(best_instance),
-    },
-    "performance": {
-        "wall_time_s": t_elapsed,
-        "likelihood_evals": int(n_likelihood_calls),
-        "time_per_eval_ms": t_elapsed / max(n_likelihood_calls, 1) * 1e3,
-        "ess": float(sampler.n_eff),
-        "posterior_samples": int(len(points)),
-    },
-    "convergence": {
-        "converged": True,
-        "evals_to_ml": int(evals_to_ml) if evals_to_ml is not None else None,
-        "time_to_ml_s": float(time_to_ml) if time_to_ml is not None else None,
-    },
-    "version": al_version,
-}
-
-results_dir = _REPO_ROOT / "results" / "searches" / "nautilus"
-results_dir.mkdir(parents=True, exist_ok=True)
-json_path = results_dir / f"simple_summary_v{al_version}.json"
-json_path.write_text(json.dumps(result_dict, indent=2))
-print(f"  Results JSON saved to: {json_path}")
-
-# Bar chart of the headline timings
-fig, ax = plt.subplots(figsize=(8, 3))
-labels = [
-    "wall_time (s)",
-    "time_per_eval (ms)",
-    "time_to_ml (s)" if time_to_ml is not None else "time_to_ml (n/a)",
-]
-times = [
-    t_elapsed,
-    t_elapsed / max(n_likelihood_calls, 1) * 1e3,
-    float(time_to_ml) if time_to_ml is not None else 0.0,
-]
-ax.barh(labels, times, color=["#4C72B0", "#55A868", "#C44E52"])
-ax.set_title(f"Nautilus (NumPy) — HST MGE — v{al_version}")
-fig.tight_layout()
-png_path = results_dir / f"simple_summary_v{al_version}.png"
-fig.savefig(png_path, dpi=120)
-plt.close(fig)
-print(f"  Bar chart saved to:    {png_path}")
diff --git a/searches/sweep.py b/searches/sweep.py
new file mode 100644
index 0000000..7cebe70
--- /dev/null
+++ b/searches/sweep.py
@@ -0,0 +1,359 @@
+"""Multi-config first-class search profiling driver.
+
+Runs each in-scope cell across the CPU/GPU × fp64/mp matrix (4 configs per
+cell locally; HPC A100 configs are dispatched separately via the same
+external mechanism used by ``likelihood_runtime/sweep.py``).
+
+Each cell is a ``(sampler, dataset_class, model, instrument)`` quadruple.
+Per-config JSONs land at::
+
+    <output_root>/<sampler>/<dataset_class>/<model>/<instrument>/<config_name>.json
+    <output_root>/<sampler>/<dataset_class>/<model>/<instrument>/<config_name>.png
+    <output_root>/<sampler>/<dataset_class>/<model>/<instrument>/<config_name>.log
+
+Resume-by-default: if the per-config JSON already exists, the cell is
+skipped. Pass ``--force`` to re-run.
+
+Usage::
+
+    # All in-scope cells × instruments × configs (warning: long)
+    python searches/sweep.py
+
+    # One cell, one instrument, CPU only (fast iteration)
+    python searches/sweep.py \\
+        --only nautilus/imaging/mge --instrument hst --skip-gpu --skip-mp
+
+    # Force re-run of one cell (bypass resume)
+    python searches/sweep.py --only nautilus/imaging/mge --instrument hst --force
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import subprocess
+import sys
+import time
+from dataclasses import dataclass
+from pathlib import Path
+
+
+_REPO_ROOT = Path(__file__).resolve().parents[1]                 # autolens_profiling/
+_DEFAULT_OUTPUT_ROOT = _REPO_ROOT / "results" / "searches"
+_DEFAULT_PYTHON = sys.executable
+
+
+# Per-(sampler, dataset_class, model) the canonical instrument set defaults
+# come from the instrument dicts. If the user passes --instrument, that wins.
+_INSTRUMENT_SETS: dict[str, tuple[str, ...]] = {
+    "imaging": ("hst", "euclid", "jwst", "ao"),
+    "interferometer": ("sma", "alma", "alma_high", "jvla"),
+    "point_source": ("simple",),
+    "datacube": ("sma",),
+}
+
+
+# (sampler, dataset_class, model). Order is roughly cheapest -> heaviest so
+# failures surface quickly during iteration.
+CELLS: list[tuple[str, str, str]] = [
+    ("nautilus", "point_source",   "image_plane"),
+    ("nautilus", "point_source",   "source_plane"),
+    ("nautilus", "imaging",        "mge"),
+    ("nautilus", "imaging",        "pixelization"),
+    ("nautilus", "imaging",        "delaunay"),
+    ("nautilus", "interferometer", "mge"),
+    ("nautilus", "interferometer", "pixelization"),
+    ("nautilus", "interferometer", "delaunay"),
+    ("nautilus", "datacube",       "delaunay"),
+]
+
+
+@dataclass(frozen=True)
+class SweepConfig:
+    name: str
+    env_overrides: dict[str, str]
+    extra_args: tuple[str, ...]
+    is_gpu: bool
+
+
+# CPU configs explicitly pin platform to cpu. GPU configs explicitly pin to
+# cuda so a missing CUDA device fails loudly rather than silently producing
+# a CPU number.
+CONFIGS: list[SweepConfig] = [
+    SweepConfig(
+        name="local_cpu_fp64",
+        env_overrides={"JAX_PLATFORM_NAME": "cpu", "JAX_PLATFORMS": "cpu"},
+        extra_args=(),
+        is_gpu=False,
+    ),
+    SweepConfig(
+        name="local_cpu_mp",
+        env_overrides={"JAX_PLATFORM_NAME": "cpu", "JAX_PLATFORMS": "cpu"},
+        extra_args=("--use-mixed-precision",),
+        is_gpu=False,
+    ),
+    SweepConfig(
+        name="local_gpu_fp64",
+        env_overrides={"JAX_PLATFORM_NAME": "cuda", "JAX_PLATFORMS": "cuda,cpu"},
+        extra_args=(),
+        is_gpu=True,
+    ),
+    SweepConfig(
+        name="local_gpu_mp",
+        env_overrides={"JAX_PLATFORM_NAME": "cuda", "JAX_PLATFORMS": "cuda,cpu"},
+        extra_args=("--use-mixed-precision",),
+        is_gpu=True,
+    ),
+]
+
+
+def _parse_args() -> argparse.Namespace:
+    p = argparse.ArgumentParser(description=__doc__.split("\n\n")[0])
+    p.add_argument(
+        "--only",
+        nargs="+",
+        default=None,
+        metavar="SAMPLER/CLASS/MODEL",
+        help="Only run these cells (e.g. nautilus/imaging/mge).",
+    )
+    p.add_argument(
+        "--skip",
+        nargs="+",
+        default=(),
+        metavar="SAMPLER/CLASS/MODEL",
+        help="Skip these cells (applied after --only).",
+    )
+    p.add_argument(
+        "--sampler",
+        nargs="+",
+        default=None,
+        help="Restrict to these samplers (e.g. --sampler nautilus).",
+    )
+    p.add_argument(
+        "--dataset-class",
+        nargs="+",
+        default=None,
+        help="Restrict to these dataset classes (imaging / interferometer / ...).",
+    )
+    p.add_argument(
+        "--instrument",
+        nargs="+",
+        default=None,
+        help=(
+            "Restrict to these instruments. Default: every instrument valid for "
+            "the dataset class of each cell."
+        ),
+    )
+    p.add_argument("--skip-cpu", action="store_true", help="Skip local_cpu_* configs.")
+    p.add_argument("--skip-gpu", action="store_true", help="Skip local_gpu_* configs.")
+    p.add_argument(
+        "--skip-mp",
+        action="store_true",
+        help="Skip the use_mixed_precision rows (just fp64).",
+    )
+    p.add_argument(
+        "--force",
+        action="store_true",
+        help=(
+            "Re-run cells whose per-config JSON already exists. "
+            "Default behaviour is to resume (skip on JSON present)."
+        ),
+    )
+    p.add_argument(
+        "--output-root",
+        type=Path,
+        default=_DEFAULT_OUTPUT_ROOT,
+        help=f"Root output dir. Default: {_DEFAULT_OUTPUT_ROOT}",
+    )
+    p.add_argument(
+        "--python",
+        default=_DEFAULT_PYTHON,
+        help=f"Python interpreter to invoke per subprocess. Default: {_DEFAULT_PYTHON}",
+    )
+    p.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print the planned subprocess commands but don't execute.",
+    )
+    return p.parse_args()
+
+
+def _resolve_cells(args: argparse.Namespace) -> list[tuple[str, str, str, str]]:
+    """Expand CELLS into per-instrument (sampler, ds_class, model, instrument) tuples."""
+    selected = list(CELLS)
+    if args.only:
+        wanted = set(args.only)
+        selected = [c for c in selected if f"{c[0]}/{c[1]}/{c[2]}" in wanted]
+        missing = wanted - {f"{c[0]}/{c[1]}/{c[2]}" for c in selected}
+        if missing:
+            sys.stderr.write(f"warning: --only includes unknown cells: {sorted(missing)}\n")
+    skip = set(args.skip)
+    selected = [c for c in selected if f"{c[0]}/{c[1]}/{c[2]}" not in skip]
+    if args.sampler:
+        wanted_samplers = set(args.sampler)
+        selected = [c for c in selected if c[0] in wanted_samplers]
+    if args.dataset_class:
+        wanted_ds = set(args.dataset_class)
+        selected = [c for c in selected if c[1] in wanted_ds]
+
+    instrument_filter = set(args.instrument) if args.instrument else None
+    expanded: list[tuple[str, str, str, str]] = []
+    for sampler, ds_class, model in selected:
+        for instrument in _INSTRUMENT_SETS.get(ds_class, ()):
+            if instrument_filter and instrument not in instrument_filter:
+                continue
+            expanded.append((sampler, ds_class, model, instrument))
+    return expanded
+
+
+def _resolve_configs(args: argparse.Namespace) -> list[SweepConfig]:
+    configs = list(CONFIGS)
+    if args.skip_cpu:
+        configs = [c for c in configs if c.is_gpu]
+    if args.skip_gpu:
+        configs = [c for c in configs if not c.is_gpu]
+    if args.skip_mp:
+        configs = [c for c in configs if "--use-mixed-precision" not in c.extra_args]
+    return configs
+
+
+def _script_path(sampler: str, ds_class: str, model: str) -> Path:
+    return _REPO_ROOT / "searches" / sampler / ds_class / f"{model}.py"
+
+
+def _run_one(
+    *,
+    python: str,
+    script_path: Path,
+    config: SweepConfig,
+    instrument: str,
+    out_dir: Path,
+    dry_run: bool,
+    force: bool,
+) -> tuple[bool, float, str]:
+    """Run one (cell, instrument, config) triple as a subprocess.
+
+    Returns (ok, elapsed, log_path). Resume-by-default: ``ok=True`` with
+    ``elapsed=0`` is returned when the JSON already exists and ``--force``
+    is not set.
+    """
+    out_dir.mkdir(parents=True, exist_ok=True)
+    json_path = out_dir / f"{config.name}.json"
+    log_path = out_dir / f"{config.name}.log"
+
+    if json_path.exists() and not force:
+        print(f"    SKIP: {json_path.name} exists (use --force to re-run)")
+        return True, 0.0, ""
+
+    cmd = [
+        python,
+        str(script_path),
+        "--config-name", config.name,
+        "--output-dir", str(out_dir),
+        "--instrument", instrument,
+        *config.extra_args,
+    ]
+
+    env = dict(os.environ)
+    env.update(config.env_overrides)
+    env.setdefault("NUMBA_CACHE_DIR", "/tmp/numba_cache")
+    env.setdefault("MPLCONFIGDIR", "/tmp/matplotlib")
+
+    print(f"\n--- [{config.name}] {script_path.relative_to(_REPO_ROOT)} [{instrument}] ---")
+    print(f"    cmd: {' '.join(cmd)}")
+    print(f"    env: {config.env_overrides}")
+
+    if dry_run:
+        return True, 0.0, ""
+
+    t0 = time.time()
+    try:
+        with open(log_path, "w") as log:
+            proc = subprocess.run(
+                cmd,
+                env=env,
+                stdout=log,
+                stderr=subprocess.STDOUT,
+                check=False,
+            )
+        elapsed = time.time() - t0
+        ok = proc.returncode == 0
+        print(
+            f"    {'OK ' if ok else 'FAIL'} ({elapsed:.1f}s, exit={proc.returncode})"
+            f" -> {log_path.name}"
+        )
+        return ok, elapsed, str(log_path)
+    except KeyboardInterrupt:
+        elapsed = time.time() - t0
+        print(f"    INTERRUPTED after {elapsed:.1f}s; partial log -> {log_path}")
+        raise
+
+
+def main() -> int:
+    args = _parse_args()
+    cells = _resolve_cells(args)
+    configs = _resolve_configs(args)
+
+    print(
+        f"sweep_searches: {len(cells)} (cell,instrument) × {len(configs)} configs "
+        f"= {len(cells) * len(configs)} runs"
+    )
+    print(f"  cells:    {[f'{s}/{c}/{m}/{i}' for (s, c, m, i) in cells]}")
+    print(f"  configs:  {[c.name for c in configs]}")
+    print(f"  output:   {args.output_root}")
+    print(f"  python:   {args.python}")
+    print(f"  resume:   {'OFF (--force)' if args.force else 'ON (default)'}")
+    if args.dry_run:
+        print("  (dry-run)")
+
+    summary: list[tuple[str, str, bool, float]] = []
+    overall_t0 = time.time()
+
+    for (sampler, ds_class, model, instrument) in cells:
+        script_path = _script_path(sampler, ds_class, model)
+        cell_id = f"{sampler}/{ds_class}/{model}/{instrument}"
+        if not script_path.exists():
+            print(f"\n!!! missing script: {script_path}")
+            for cfg in configs:
+                summary.append((cell_id, cfg.name, False, 0.0))
+            continue
+
+        out_dir = args.output_root / sampler / ds_class / model / instrument
+
+        for cfg in configs:
+            try:
+                ok, elapsed, _log = _run_one(
+                    python=args.python,
+                    script_path=script_path,
+                    config=cfg,
+                    instrument=instrument,
+                    out_dir=out_dir,
+                    dry_run=args.dry_run,
+                    force=args.force,
+                )
+            except KeyboardInterrupt:
+                print("\n\nsweep interrupted by user")
+                return 130
+            summary.append((cell_id, cfg.name, ok, elapsed))
+
+    total = time.time() - overall_t0
+    print("\n" + "=" * 80)
+    print(f"sweep_searches summary  ({total:.1f}s total)")
+    print("=" * 80)
+    print(f"  {'cell':<46}{'config':<22}{'ok':<6}{'elapsed':>10}")
+    print(f"  {'-'*46}{'-'*22}{'-'*6}{'-'*10}")
+    failures = 0
+    for cell, cfg, ok, t in summary:
+        flag = "OK" if ok else "FAIL"
+        if not ok:
+            failures += 1
+        print(f"  {cell:<46}{cfg:<22}{flag:<6}{t:>9.1f}s")
+    if failures:
+        print(f"\n  {failures} run(s) FAILED — check the .log files in each cell's output dir.")
+        return 1
+    print("\n  All runs OK.")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())