diff --git a/scripts/cluster/csv_api.py b/scripts/cluster/csv_api.py new file mode 100644 index 000000000..6a72ac789 --- /dev/null +++ b/scripts/cluster/csv_api.py @@ -0,0 +1,451 @@ +""" +CSV API: Cluster +================ + +Cluster lens modelling uses a small family of CSV files as its canonical +input format. Rather than composing a tracer and a model inline in Python, +the cluster workflow stores every component — main galaxies, dark-matter +halo, source light, source point components, scaling-tier members — in +spreadsheet-editable CSVs that are then loaded into PyAutoLens objects. + +This guide walks through every CSV in the cluster surface, in the order +they are usually consumed: + + 1. **Point dataset CSV** — ``point_datasets.csv`` — the observational data + (multiple-image positions per source). Loaded *before* modelling, as is + the autolens convention. Produced by ``simulator.py`` for the bundled + cluster dataset; this guide builds an illustrative one to show the + schema. + 2. **Model CSVs (this PR)** — ``mass.csv``, ``light.csv``, ``point.csv`` + — one CSV per profile family, each row carrying a galaxy name, an + attribute name, a profile class, and the constructor parameters of + that profile. The lens galaxies, the host dark-matter halo, and the + background sources all share the same three CSVs (grouping is by + profile family, not by tier). Joined across the family CSVs to + produce a full ``Galaxy`` (or ``af.Model[Galaxy]``). + 3. **Scaling galaxies CSV** — ``scaling_galaxies.csv`` — the legacy + 3-column ``y, x, luminosity`` schema for the scaling-relation tier + that drove the original CSV-first cluster work. Kept as-is because + naming each scaling-tier member would add overhead with no signal. + +Everything this guide writes goes into ``dataset/cluster/csv_api_example/`` +(a scratch folder dedicated to this script). The canonical cluster dataset +at ``dataset/cluster/simple/`` is produced by ``simulator.py`` and is what +``modeling.py`` and ``start_here.py`` consume. + +__Why CSVs?__ + +At cluster scale you might have tens of main galaxies, hundreds of member +galaxies, and a dozen background sources. Building all of that inline in +Python becomes unwieldy and error-prone. CSVs make every component: + + - **Spreadsheet-editable** — open in Excel / LibreOffice / any text + editor, tweak a value, save, re-run. No Python edit needed. + - **Diff-friendly** — git diff on a CSV is line-by-line, so a change to + one galaxy's mass is one line in the diff. + - **Easy to scale up** — adding more galaxies is a row append, not a + Python loop edit. The number of free parameters in the model does not + grow with the population size (see scaling galaxies below). + - **Round-trippable** — write a Python model out, read it back, get the + same Python model. This guide demonstrates that explicitly. + +__Contents__ + +- **Imports & Output Path** — set up the scratch folder. +- **Point Dataset CSV** — write and load ``point_datasets.csv``. +- **Galaxy Model CSV API** — write, read, and round-trip family CSVs. +- **Scaling Galaxies CSV** — the legacy scaling-tier schema. +- **af.Model[Galaxy] from CSVs** — building modelling-ready objects. +- **Wrap Up** — where each CSV lives in the canonical cluster workflow. +""" + +from autoconf import jax_wrapper # Sets JAX environment before other imports + +# from autoconf import setup_notebook; setup_notebook() + +from pathlib import Path + +import autofit as af +import autolens as al + +""" +__Imports & Output Path__ + +Everything this guide writes goes into ``dataset/cluster/csv_api_example/``, +a scratch folder dedicated to this script. The canonical cluster dataset +lives at ``dataset/cluster/simple/`` and is produced by ``simulator.py``; +this guide is illustrative and does not pair to that dataset. +""" +output_path = Path("dataset") / "cluster" / "csv_api_example" +output_path.mkdir(exist_ok=True, parents=True) + + +""" +__Point Dataset CSV__ + +We begin with the observational data CSV because convention is "load data +before modelling". For point-source cluster lensing the observational data +is the image-plane positions of each multiply-imaged source. + +``al.PointDataset`` carries a source's ``name`` (which pairs to a ``Point`` +component in the model), its ``positions`` (image-plane (y, x) coordinates +of the multiple images), a ``positions_noise_map`` (per-position +positional uncertainty), and an optional ``redshift``. A real cluster +typically has one ``PointDataset`` per background source. + +The CSV schema is one row per observed image, grouped by ``name``: + + ``name, y, x, positions_noise, redshift?`` + +All rows sharing a ``name`` belong to the same source. The ``redshift`` +column is consistent within a group (validated on load). +""" +point_dataset_0 = al.PointDataset( + name="point_0", + positions=al.Grid2DIrregular( + [ + (-9.0, -19.3), + (0.04, -0.08), + (1.84, 23.3), + ] + ), + positions_noise_map=0.005, + redshift=1.0, +) + +point_dataset_1 = al.PointDataset( + name="point_1", + positions=al.Grid2DIrregular( + [ + (-15.96, 18.99), + (0.68, -0.51), + (13.65, -14.32), + ] + ), + positions_noise_map=0.005, + redshift=2.0, +) + +""" +Write the combined dataset to a single CSV. +""" +al.output_to_csv( + datasets=[point_dataset_0, point_dataset_1], + file_path=output_path / "point_datasets.csv", +) + +""" +Read it back. ``al.list_from_csv`` returns a ``List[PointDataset]`` with +each entry carrying the per-source positions, noise, and redshift. +""" +loaded_point_datasets = al.list_from_csv(file_path=output_path / "point_datasets.csv") + +print("=" * 64) +print("Point datasets loaded from point_datasets.csv:") +print("=" * 64) +for dataset in loaded_point_datasets: + print(f" name={dataset.name!r} redshift={dataset.redshift}") + print(f" positions={dataset.positions.in_list}") + print(f" noise={dataset.positions_noise_map}") + print() + + +""" +__Galaxy Model CSV API__ + +The model CSV API stores every galaxy component (main galaxies, the dark +matter halo, sources) in three family-level CSVs: + + - ``mass.csv`` — all mass profiles (``dPIEMassSph``, ``NFWMCRLudlowSph``, etc.) + - ``light.csv`` — all light profiles (``SersicSph``, ``SersicCore``, etc.) + - ``point.csv`` — all point-source components (``Point``) + +Each row carries: + + - ``galaxy`` — galaxy name. Rows sharing this name compose into one + ``Galaxy``. + - ``attr_name`` — attribute name to bind under on the Galaxy (e.g. + ``mass``, ``bulge``, ``point_0``). + - ``profile_class``— the concrete class name (looked up via ``getattr`` + against the family namespace ``al.mp`` / ``al.lp`` / + ``al.ps``). + - ```` — the profile's constructor arguments. Tuples like + ``centre`` split into ``y`` / ``x`` columns; other + tuples (e.g. ``ell_comps``) into ``_0`` / + ``_1``. + - ``redshift`` — optional per-row Galaxy redshift (consistent across + every row for the same galaxy or all blank). + +**Sparse columns are supported.** Different profile classes inside the +same family CSV can use disjoint parameter columns; cells unused by a +row's class are simply blank. This is what lets one ``main_lens_mass.csv`` +carry both 2 ``dPIEMassSph`` cluster members and 1 ``NFWMCRLudlowSph`` +host halo even though their parameter sets don't overlap. + +We start by building a small illustrative cluster in Python. +""" +redshift_lens = 0.5 +source_redshifts = [1.0, 2.0] + +# One dict per profile family. Each maps {galaxy_name: {attr_name: profile_instance}}. +# Mass family carries the 2 main-lens dPIE profiles + the host-halo NFW; light family +# carries the 2 main-lens Sersic bulges + the 2 source-galaxy SersicCore bulges; point +# family carries the 2 source-galaxy Point components. + +mass_profiles = { + "lens_0": {"mass": al.mp.dPIEMassSph(centre=(0.0, 0.0), ra=8.0, rs=20.0, b0=3.0)}, + "lens_1": {"mass": al.mp.dPIEMassSph(centre=(10.0, 8.0), ra=5.0, rs=12.0, b0=1.2)}, + "host_halo": { + "dark": al.mp.NFWMCRLudlowSph( + centre=(0.0, 0.0), + mass_at_200=10**15.3, + redshift_object=redshift_lens, + redshift_source=max(source_redshifts), + ) + }, +} + +light_profiles = { + "lens_0": { + "bulge": al.lp.SersicSph( + centre=(0.0, 0.0), intensity=1.5, effective_radius=3.0, sersic_index=4.0 + ) + }, + "lens_1": { + "bulge": al.lp.SersicSph( + centre=(10.0, 8.0), intensity=0.8, effective_radius=1.5, sersic_index=3.5 + ) + }, + "source_0": { + "bulge": al.lp.SersicCore( + centre=(0.3, 0.5), + ell_comps=al.convert.ell_comps_from(axis_ratio=0.8, angle=60.0), + intensity=2.0, + effective_radius=0.3, + sersic_index=1.0, + ) + }, + "source_1": { + "bulge": al.lp.SersicCore( + centre=(-0.8, 1.2), + ell_comps=al.convert.ell_comps_from(axis_ratio=0.8, angle=90.0), + intensity=2.0, + effective_radius=0.3, + sersic_index=1.0, + ) + }, +} + +point_profiles = { + "source_0": {"point_0": al.ps.Point(centre=(0.3, 0.5))}, + "source_1": {"point_1": al.ps.Point(centre=(-0.8, 1.2))}, +} + +""" +We now have a complete cluster model expressed as nested dicts. Time to +write each profile family to its own CSV. + +The ``redshifts`` argument is a flat ``{galaxy_name: redshift}`` mapping +so the writer can emit the per-row ``redshift`` column. Mains + halo sit +at the lens redshift; sources carry their per-source redshift. +""" +mass_csv = output_path / "mass.csv" +light_csv = output_path / "light.csv" +point_csv = output_path / "point.csv" + +redshifts_by_galaxy = { + "lens_0": redshift_lens, + "lens_1": redshift_lens, + "host_halo": redshift_lens, + "source_0": source_redshifts[0], + "source_1": source_redshifts[1], +} + +al.galaxy_models_to_csv( + profiles_by_galaxy=mass_profiles, + file_path=mass_csv, + family="mass", + redshifts=redshifts_by_galaxy, +) + +al.galaxy_models_to_csv( + profiles_by_galaxy=light_profiles, + file_path=light_csv, + family="light", + redshifts=redshifts_by_galaxy, +) + +al.galaxy_models_to_csv( + profiles_by_galaxy=point_profiles, + file_path=point_csv, + family="point", + redshifts=redshifts_by_galaxy, +) + +""" +Read each family CSV back. ``al.galaxy_models_from_csv`` returns a typed +``GalaxyModelTable`` with one ``GalaxyModelRow`` per CSV row: each row +carries the resolved ``profile_class`` (now a Python class, not a string) +and the parameter dict it needs. +""" +mass_table = al.galaxy_models_from_csv(mass_csv, family="mass") +light_table = al.galaxy_models_from_csv(light_csv, family="light") +point_table = al.galaxy_models_from_csv(point_csv, family="point") + +for label, table in [ + ("mass.csv", mass_table), + ("light.csv", light_table), + ("point.csv", point_table), +]: + print("=" * 64) + print(f"{label}:") + print("=" * 64) + for row in table.rows: + print( + f" galaxy={row.galaxy!r} attr_name={row.attr_name!r} " + f"class={row.profile_class.__name__} redshift={row.redshift}" + ) + print(f" params={row.params}") + print() + + +""" +Join the family tables into ``Galaxy`` instances. ``al.galaxies_from_csv_tables`` +takes one or more ``GalaxyModelTable``s and groups rows by the ``galaxy`` +column, attaching each profile under its ``attr_name``. Per-galaxy redshift +consistency is enforced across every family CSV that mentions the galaxy. +""" +galaxies = al.galaxies_from_csv_tables( + mass_table, + light_table, + point_table, +) + +print("=" * 64) +print("Galaxies built from CSVs (al.galaxies_from_csv_tables):") +print("=" * 64) +for name, galaxy in galaxies.items(): + attr_summary = ", ".join( + f"{a}={type(getattr(galaxy, a)).__name__}" + for a in vars(galaxy) + if not a.startswith("_") and a != "redshift" + ) + print(f" {name}: redshift={galaxy.redshift} attrs=[{attr_summary}]") +print() + + +""" +__af.Model[Galaxy] from CSVs__ + +The concrete ``Galaxy`` instances above are useful for visualisation and +for the simulator (which needs concrete parameter values to produce a +truth tracer). For *modelling*, what you want is an ``af.Model[Galaxy]`` +where some parameters are free (priors) and others are fixed. + +``al.galaxy_af_models_from_csv_tables`` returns the same dict keyed by +galaxy name, but each value is an ``af.Model[Galaxy]`` with concrete CSV +values as fixed defaults. To free a parameter for the non-linear search, +mutate the returned model: + +```python +galaxy_models["lens_0"].mass.ra = af.UniformPrior(lower_limit=1.0, upper_limit=15.0) +galaxy_models["lens_0"].mass.rs = af.UniformPrior(lower_limit=5.0, upper_limit=40.0) +galaxy_models["lens_0"].mass.b0 = af.UniformPrior(lower_limit=0.1, upper_limit=10.0) +``` + +This is the same composition pattern as ``af.Model(al.Galaxy, mass=...)`` +elsewhere in the workspace — only the construction step is replaced by +loading from CSVs. +""" +galaxy_models = al.galaxy_af_models_from_csv_tables( + mass_table, + light_table, + point_table, +) + +# Mutate selected params on the main-lens mass profiles into priors. +for name in ("lens_0", "lens_1"): + galaxy_models[name].mass.ra = af.UniformPrior(lower_limit=1.0, upper_limit=15.0) + galaxy_models[name].mass.rs = af.UniformPrior(lower_limit=5.0, upper_limit=40.0) + galaxy_models[name].mass.b0 = af.UniformPrior(lower_limit=0.1, upper_limit=10.0) + +# Host halo: free mass_at_200, keep centre + redshifts fixed. +galaxy_models["host_halo"].dark.mass_at_200 = af.LogUniformPrior( + lower_limit=10**14.5, upper_limit=10**16.0 +) + +model = af.Collection(galaxies=af.Collection(**galaxy_models)) + +print("=" * 64) +print("af.Collection built from CSVs (modelling-ready):") +print("=" * 64) +print(model.info) + + +""" +__Scaling Galaxies CSV__ + +The scaling-tier CSV format predates the named-galaxy model CSV API and +keeps its narrow 3-column schema: ``y, x, luminosity, redshift?``. The +scaling tier is implicitly one profile class per member, so naming each +member and emitting an ``attr_name`` column would be more overhead than +signal — every row uses the same ``dPIEMassSph`` mass profile with +parameters derived from the shared ``scaling_factor`` and +``scaling_exponent`` modelling parameters. + +``al.galaxy_table_to_csv`` and ``al.galaxy_table_from_csv`` are the +schema-specific writers/readers. The simulator emits 10 scaling members +into ``dataset/cluster/simple/scaling_galaxies.csv``; we write an +illustrative 3-member version here. +""" +scaling_centres = [(5.5, -6.5), (-7.5, 3.0), (12.0, -5.0)] +scaling_luminosities = [0.40, 0.32, 0.25] + +scaling_csv = output_path / "scaling_galaxies.csv" + +al.galaxy_table_to_csv( + centres=scaling_centres, + luminosities=scaling_luminosities, + file_path=scaling_csv, +) + +scaling_table = al.galaxy_table_from_csv(scaling_csv) + +print("=" * 64) +print("scaling_galaxies.csv (legacy schema):") +print("=" * 64) +print(f" centres={[tuple(c) for c in scaling_table.centres.in_list]}") +print(f" luminosities={scaling_table.luminosities}") +print(f" redshifts={scaling_table.redshifts}") +print() + + +""" +__Wrap Up__ + +The four CSVs above are the canonical input format for cluster lens +modelling in PyAutoLens. Where they live in the workflow: + + - ``dataset/cluster//point_datasets.csv`` — produced by + ``simulator.py``; consumed by ``modeling.py`` and ``start_here.py`` + via ``al.list_from_csv``. + - ``dataset/cluster//mass.csv`` + ``light.csv`` + + ``point.csv`` — produced by ``simulator.py`` (truth values); consumed + by ``modeling.py`` and ``start_here.py`` via + ``al.galaxy_models_from_csv`` + ``al.galaxy_af_models_from_csv_tables``. + - ``dataset/cluster//scaling_galaxies.csv`` — produced by + ``simulator.py``; consumed by ``modeling.py`` and ``start_here.py`` + via ``al.galaxy_table_from_csv``. + +To start modelling your own cluster: + + 1. Edit (or generate from a light-only fit) the model CSVs and + ``scaling_galaxies.csv`` for your cluster's main + scaling tiers. + 2. Edit ``point_datasets.csv`` with your measured multiple-image + positions per source. + 3. Drop the CSVs into ``dataset/cluster//``. + 4. Set ``dataset_name = ""`` in ``modeling.py`` and run. + +For a deeper view of what ``simulator.py`` and ``modeling.py`` do with +these CSVs end-to-end, follow the next files in this folder. +""" diff --git a/scripts/cluster/modeling.py b/scripts/cluster/modeling.py index 6dbaf80c6..4f2329011 100644 --- a/scripts/cluster/modeling.py +++ b/scripts/cluster/modeling.py @@ -20,7 +20,7 @@ - **Example:** What this script fits and the underlying simulator. - **Simulation:** Overview of how the simulated dataset was generated. - **Dataset:** Load the CCD image and the per-source point datasets from the combined CSV. -- **Centres:** Load the main lens centres and the host halo centre written by the simulator. +- **Model CSVs:** Load the named-galaxy mass + point CSVs written by the simulator. - **Scaling Galaxies Table:** Load the scaling-tier centres + luminosities from the CSV. - **Point Solver:** Set up the image-plane multiple-image solver. - **Chi Squared:** Why this script uses an image-plane chi-squared. @@ -64,11 +64,12 @@ - ``data.fits`` / ``noise_map.fits`` / ``psf.fits`` — CCD imaging of the cluster (used for visualization). - ``point_datasets.csv`` — one row per observed multiple image, grouped by source ``name``, with a ``redshift`` column per group. Loaded here with ``al.list_from_csv``. + - ``mass.csv`` + ``light.csv`` + ``point.csv`` — named-galaxy CSVs carrying the full truth model + (main galaxies + host halo + sources). Loaded here with ``al.galaxy_models_from_csv``. See + ``scripts/cluster/csv_api.py`` for the schema walkthrough. - ``scaling_galaxies.csv`` — one row per scaling-tier member with columns ``y, x, luminosity``. Loaded here with ``al.galaxy_table_from_csv``. - - ``main_lens_centres.json`` — ``Grid2DIrregular`` of the 2 main lens galaxy centres. - - ``host_halo_centre.json`` — ``Grid2DIrregular`` of the 1 host halo centre. - - ``tracer.json`` / ``source_centres.json`` — true model (used by visualization, not modeling). + - ``tracer.json`` — true ``Tracer`` (used by visualization, not modeling). """ from autoconf import jax_wrapper # Sets JAX environment before other imports @@ -101,9 +102,11 @@ If the dataset does not already exist on your system, it will be created by running the corresponding simulator script. This ensures that all example scripts can be run without manually simulating data first. """ -if not (dataset_path / "data.fits").exists() or not ( - dataset_path / "scaling_galaxies.csv" -).exists(): +if ( + not (dataset_path / "data.fits").exists() + or not (dataset_path / "scaling_galaxies.csv").exists() + or not (dataset_path / "mass.csv").exists() +): import subprocess import sys @@ -154,18 +157,26 @@ ) """ -__Centres__ +__Model CSVs__ -The centres of the main lens galaxies and the host halo are loaded from the JSON files written by the -simulator. They are fixed in the model below — for cluster-scale point-source modeling we treat the -observed centres of light as ground truth, since they remove a large block of degenerate parameters that -the multiple-image positions alone cannot constrain. +The simulator writes the truth model into three family-level CSVs — ``mass.csv`` (lens + halo mass +profiles), ``light.csv`` (lens + source light profiles), ``point.csv`` (source point components) — keyed +by a ``galaxy`` column with ``profile_class`` dispatch (see ``scripts/cluster/csv_api.py`` for the full +schema walkthrough). We load the mass and point families here; the light family is not needed for +point-source modeling (light profiles do not affect the lensing). -In a real analysis, the centres come from light-profile fits to the imaging data or from external source -catalogues (e.g. Source Extractor). +In a real analysis these CSVs come from upstream measurement: light-profile fits to the imaging data +populate ``light.csv``; the lens-galaxy centres in ``mass.csv`` are typically pinned to the light +centres (with their values then taken as ground truth). For cluster-scale point-source modeling the +observed centres remove a large block of degenerate parameters that the multiple-image positions alone +cannot constrain. """ -main_lens_centres = al.from_json(file_path=dataset_path / "main_lens_centres.json") -host_halo_centre = al.from_json(file_path=dataset_path / "host_halo_centre.json")[0] +mass_table = al.galaxy_models_from_csv( + file_path=dataset_path / "mass.csv", family="mass" +) +point_table = al.galaxy_models_from_csv( + file_path=dataset_path / "point.csv", family="point" +) """ __Scaling Galaxies Table__ @@ -294,51 +305,40 @@ redshift_lens = 0.5 source_redshifts = [dataset.redshift for dataset in dataset_list] -# Main Lens Galaxies (dPIEMassSph, centre fixed) - -main_lens_dict = {} - -for i, centre in enumerate(main_lens_centres): - - mass = af.Model(al.mp.dPIEMassSph) - mass.centre = tuple(centre) - mass.ra = af.UniformPrior(lower_limit=1.0, upper_limit=15.0) - mass.rs = af.UniformPrior(lower_limit=5.0, upper_limit=40.0) - mass.b0 = af.UniformPrior(lower_limit=0.1, upper_limit=10.0) +# Build af.Model[Galaxy] instances from the family CSVs. Concrete CSV values +# become fixed af.Model defaults; we then promote selected params to priors +# below. This dict is keyed by galaxy name (lens_0, lens_1, host_halo, +# source_0, source_1) — the same naming convention the simulator uses. - main_lens_dict[f"lens_{i}"] = af.Model( - al.Galaxy, redshift=redshift_lens, mass=mass - ) +galaxy_models = al.galaxy_af_models_from_csv_tables(mass_table, point_table) -# Host Dark Matter Halo (NFWMCRLudlowSph, centre fixed, mass_at_200 free) +# Main Lens Galaxies: free dPIE ra / rs / b0 on each; centre stays fixed at the CSV value. +for name in ("lens_0", "lens_1"): + galaxy_models[name].mass.ra = af.UniformPrior(lower_limit=1.0, upper_limit=15.0) + galaxy_models[name].mass.rs = af.UniformPrior(lower_limit=5.0, upper_limit=40.0) + galaxy_models[name].mass.b0 = af.UniformPrior(lower_limit=0.1, upper_limit=10.0) -host_halo_mass = af.Model(al.mp.NFWMCRLudlowSph) -host_halo_mass.centre = tuple(host_halo_centre) -host_halo_mass.redshift_object = redshift_lens -host_halo_mass.redshift_source = max(source_redshifts) -host_halo_mass.mass_at_200 = af.LogUniformPrior( +# Host Halo: free mass_at_200; centre + redshift_object + redshift_source stay fixed. +galaxy_models["host_halo"].dark.mass_at_200 = af.LogUniformPrior( lower_limit=10**14.5, upper_limit=10**16.0 ) -host_halo = af.Model(al.Galaxy, redshift=redshift_lens, dark=host_halo_mass) - -# Source Galaxies (Point, redshift pinned to per-dataset redshift) - -source_dict = {} - +# Source Galaxies: free Point centres with GaussianPrior initialised from the +# mean of each source's observed multiple-image positions in its PointDataset. +# This deliberately ignores the truth centre stored in point.csv — in a real +# analysis you don't know the source's true source-plane position, you only +# have the image-plane positions of its multiple images. for i, dataset in enumerate(dataset_list): - positions = np.atleast_2d(dataset.positions) - - point = af.Model(al.ps.Point) - point.centre_0 = af.GaussianPrior(mean=float(np.mean(positions[:, 0])), sigma=3.0) - point.centre_1 = af.GaussianPrior(mean=float(np.mean(positions[:, 1])), sigma=3.0) - - source_dict[f"source_{i}"] = af.Model( - al.Galaxy, redshift=dataset.redshift, **{f"point_{i}": point} + point_attr = getattr(galaxy_models[f"source_{i}"], f"point_{i}") + point_attr.centre_0 = af.GaussianPrior( + mean=float(np.mean(positions[:, 0])), sigma=3.0 + ) + point_attr.centre_1 = af.GaussianPrior( + mean=float(np.mean(positions[:, 1])), sigma=3.0 ) -# Scaling Tier Members (dPIEMassSph, b0 derived from shared scaling relation) +# Scaling Tier Members (dPIEMassSph, b0 derived from shared scaling relation). # # scaling_factor and scaling_exponent are defined ONCE outside the loop. Every # member's b0 is a derived prior of these two shared parameters plus its own @@ -370,7 +370,7 @@ # Overall Lens Model: model = af.Collection( - galaxies=af.Collection(host_halo=host_halo, **main_lens_dict, **source_dict), + galaxies=af.Collection(**galaxy_models), scaling_galaxies=scaling_galaxies, ) diff --git a/scripts/cluster/simulator.py b/scripts/cluster/simulator.py index d7ab1ee2b..545bf617b 100644 --- a/scripts/cluster/simulator.py +++ b/scripts/cluster/simulator.py @@ -39,8 +39,8 @@ - **Combined CSV:** Write *all* point datasets to a single CSV so a user can hand-edit positions and noise in a spreadsheet. - **Manual CSV Editing:** Instructions for editing the combined CSV by hand, which is the preferred cluster workflow. - **Scaling Galaxies CSV:** Write the scaling-member centres and luminosities to ``scaling_galaxies.csv``. +- **Model CSVs:** Write the truth model to ``mass.csv`` + ``light.csv`` + ``point.csv`` (the named-galaxy CSV API). - **Tracer JSON:** Save the true `Tracer` for future inspection. -- **Centre JSON Files:** Save the main lens, host halo, and source centres as JSON. - **Imaging:** Simulate CCD imaging of the cluster (used to measure positions in real datasets and for visualization). - **Visualize:** Plot the point-source dataset, tracer, and imaging. @@ -617,24 +617,56 @@ def jitted_solve(tracer, source_plane_coordinate): ) """ -__Centre JSON Files__ +__Model CSVs__ -Save the main lens galaxy centres, the host halo centre, and the source centres as JSON so the modeling -scripts can load them directly (e.g. to fix positions, define priors). Scaling-tier centres are stored in -``scaling_galaxies.csv`` above — the CSV carries both centres and luminosities together, so no separate -JSON is written for that tier. +Write the truth model out as three family-level CSVs — ``mass.csv``, ``light.csv``, ``point.csv`` — +keyed by galaxy name. The modeling and start_here scripts load these directly with +``al.galaxy_models_from_csv`` and compose them into ``af.Model[Galaxy]`` instances ready for non-linear +search. See ``scripts/cluster/csv_api.py`` for the full schema walkthrough. + +The scaling tier keeps its narrow 3-column ``scaling_galaxies.csv`` schema written above — naming each +scaling member and emitting an ``attr_name`` column would be more overhead than signal. """ -al.output_to_json( - obj=al.Grid2DIrregular(main_lens_centres), - file_path=dataset_path / "main_lens_centres.json", +mass_profiles = { + **{f"lens_{i}": {"mass": g.mass} for i, g in enumerate(main_lens_galaxies)}, + "host_halo": {"dark": host_halo_galaxy.dark}, +} + +light_profiles = { + **{f"lens_{i}": {"bulge": g.bulge} for i, g in enumerate(main_lens_galaxies)}, + **{f"source_{i}": {"bulge": g.bulge} for i, g in enumerate(source_galaxies)}, +} + +point_profiles = { + f"source_{i}": {f"point_{i}": getattr(g, f"point_{i}")} + for i, g in enumerate(source_galaxies) +} + +redshifts_by_galaxy = { + **{f"lens_{i}": redshift_lens for i in range(len(main_lens_galaxies))}, + "host_halo": redshift_lens, + **{f"source_{i}": z for i, z in enumerate(source_redshifts)}, +} + +al.galaxy_models_to_csv( + profiles_by_galaxy=mass_profiles, + file_path=dataset_path / "mass.csv", + family="mass", + redshifts=redshifts_by_galaxy, ) -al.output_to_json( - obj=al.Grid2DIrregular([host_halo_centre]), - file_path=dataset_path / "host_halo_centre.json", + +al.galaxy_models_to_csv( + profiles_by_galaxy=light_profiles, + file_path=dataset_path / "light.csv", + family="light", + redshifts=redshifts_by_galaxy, ) -al.output_to_json( - obj=al.Grid2DIrregular(source_centres), - file_path=dataset_path / "source_centres.json", + +al.galaxy_models_to_csv( + profiles_by_galaxy=point_profiles, + file_path=dataset_path / "point.csv", + family="point", + redshifts=redshifts_by_galaxy, ) """ diff --git a/scripts/cluster/start_here.py b/scripts/cluster/start_here.py index 3eefa11c8..5e5edb57d 100644 --- a/scripts/cluster/start_here.py +++ b/scripts/cluster/start_here.py @@ -30,7 +30,7 @@ - **Google Colab Setup:** Bootstraps the environment when running on Colab. - **Imports:** The libraries we'll use. - **Dataset:** Load the CCD image and the per-source point datasets. -- **Centres:** Load the main lens and host halo centres. +- **Model CSVs:** Load the named-galaxy mass + point CSVs written by the simulator. - **Scaling Galaxies Table:** Load the 10 scaling-tier members' centres and luminosities from a CSV. - **Point Solver:** Set up the image-plane multiple-image solver. - **Cluster Components:** The four tiers of object that make up the model. @@ -118,9 +118,11 @@ dataset_name = "simple" dataset_path = Path("dataset") / "cluster" / dataset_name -if not (dataset_path / "data.fits").exists() or not ( - dataset_path / "scaling_galaxies.csv" -).exists(): +if ( + not (dataset_path / "data.fits").exists() + or not (dataset_path / "scaling_galaxies.csv").exists() + or not (dataset_path / "mass.csv").exists() +): subprocess.run( [sys.executable, "scripts/cluster/simulator.py"], check=True, @@ -155,15 +157,23 @@ ) """ -__Centres__ +__Model CSVs__ + +The simulator writes the truth model into three family-level CSVs — ``mass.csv``, ``light.csv``, +``point.csv`` — keyed by a ``galaxy`` column with ``profile_class`` dispatch. See +``scripts/cluster/csv_api.py`` for the schema walkthrough. -Centres of the 2 main lens galaxies and the host halo are loaded from JSON. In point-source cluster -modeling, observed galaxy-light centres are treated as ground truth (they remove a large block of -degenerate parameters that the multiple-image positions alone cannot constrain). In a real analysis, -these centres come from light-profile fits to the imaging data or from external source catalogues. +Point-source modeling only needs the mass and point families (light profiles don't affect lensing). +Observed galaxy-light centres are treated as ground truth — they remove a large block of degenerate +parameters that the multiple-image positions alone cannot constrain. In a real analysis these centres +come from light-profile fits to the imaging data or external source catalogues. """ -main_lens_centres = al.from_json(file_path=dataset_path / "main_lens_centres.json") -host_halo_centre = al.from_json(file_path=dataset_path / "host_halo_centre.json")[0] +mass_table = al.galaxy_models_from_csv( + file_path=dataset_path / "mass.csv", family="mass" +) +point_table = al.galaxy_models_from_csv( + file_path=dataset_path / "point.csv", family="point" +) """ __Scaling Galaxies Table__ @@ -240,46 +250,37 @@ redshift_lens = 0.5 source_redshifts = [dataset.redshift for dataset in dataset_list] -# Main Lens Galaxies (dPIEMassSph, centre fixed, ra/rs/b0 free) - -main_lens_dict = {} -for i, centre in enumerate(main_lens_centres): - mass = af.Model(al.mp.dPIEMassSph) - mass.centre = tuple(centre) - mass.ra = af.UniformPrior(lower_limit=1.0, upper_limit=15.0) - mass.rs = af.UniformPrior(lower_limit=5.0, upper_limit=40.0) - mass.b0 = af.UniformPrior(lower_limit=0.1, upper_limit=10.0) +# Build af.Model[Galaxy] instances directly from the family CSVs. Concrete CSV +# values become fixed af.Model defaults; we then promote selected params to +# priors below. Keys: lens_0, lens_1, host_halo, source_0, source_1. - main_lens_dict[f"lens_{i}"] = af.Model( - al.Galaxy, redshift=redshift_lens, mass=mass - ) +galaxy_models = al.galaxy_af_models_from_csv_tables(mass_table, point_table) -# Host Dark Matter Halo (NFWMCRLudlowSph, centre fixed, mass_at_200 free) +# Main Lens Galaxies: free dPIE ra / rs / b0; centre stays fixed at the CSV value. +for name in ("lens_0", "lens_1"): + galaxy_models[name].mass.ra = af.UniformPrior(lower_limit=1.0, upper_limit=15.0) + galaxy_models[name].mass.rs = af.UniformPrior(lower_limit=5.0, upper_limit=40.0) + galaxy_models[name].mass.b0 = af.UniformPrior(lower_limit=0.1, upper_limit=10.0) -host_halo_mass = af.Model(al.mp.NFWMCRLudlowSph) -host_halo_mass.centre = tuple(host_halo_centre) -host_halo_mass.redshift_object = redshift_lens -host_halo_mass.redshift_source = max(source_redshifts) -host_halo_mass.mass_at_200 = af.LogUniformPrior( +# Host Halo: free mass_at_200; centre + redshift_object + redshift_source fixed. +galaxy_models["host_halo"].dark.mass_at_200 = af.LogUniformPrior( lower_limit=10**14.5, upper_limit=10**16.0 ) -host_halo = af.Model(al.Galaxy, redshift=redshift_lens, dark=host_halo_mass) - -# Source Galaxies (Point, redshift pinned to per-dataset redshift) -source_dict = {} +# Source Galaxies: free Point centres with GaussianPrior initialised from the +# mean of each source's observed multiple-image positions (NOT the truth from +# point.csv — in a real analysis the truth is unknown). for i, dataset in enumerate(dataset_list): positions = np.atleast_2d(dataset.positions) - - point = af.Model(al.ps.Point) - point.centre_0 = af.GaussianPrior(mean=float(np.mean(positions[:, 0])), sigma=3.0) - point.centre_1 = af.GaussianPrior(mean=float(np.mean(positions[:, 1])), sigma=3.0) - - source_dict[f"source_{i}"] = af.Model( - al.Galaxy, redshift=dataset.redshift, **{f"point_{i}": point} + point_attr = getattr(galaxy_models[f"source_{i}"], f"point_{i}") + point_attr.centre_0 = af.GaussianPrior( + mean=float(np.mean(positions[:, 0])), sigma=3.0 + ) + point_attr.centre_1 = af.GaussianPrior( + mean=float(np.mean(positions[:, 1])), sigma=3.0 ) -# Scaling Tier (shared scaling_factor + scaling_exponent; per-member b0 derived) +# Scaling Tier (shared scaling_factor + scaling_exponent; per-member b0 derived). scaling_factor = af.UniformPrior(lower_limit=0.0, upper_limit=1.0) scaling_exponent = af.UniformPrior(lower_limit=0.0, upper_limit=2.0) @@ -306,7 +307,7 @@ # Overall Model model = af.Collection( - galaxies=af.Collection(host_halo=host_halo, **main_lens_dict, **source_dict), + galaxies=af.Collection(**galaxy_models), scaling_galaxies=scaling_galaxies, )