Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions workflow/rules/hit.smk
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ rule build_tier_hit:
# not all of them will be used. room for improvement
hpge_dtmaps=lambda wc: aggregate.gen_list_of_merged_dtmaps(config, wc.simid),
hpge_currmods=lambda wc: aggregate.gen_list_of_merged_currmods(config, wc.simid),
hpge_eresmods=lambda wc: aggregate.gen_list_of_eresmods(config, wc.simid),
# NOTE: technically this rule only depends on one block in the
# partitioning file, but in practice the full file will always change
simstat_part_file=rules.make_simstat_partition_file.output[0],
Expand Down Expand Up @@ -321,3 +322,41 @@ rule merge_current_pulse_model_pars:
out_dict[hpges[i]] = dbetto.utils.load_dict(f)

dbetto.utils.write_dict(out_dict, output[0])


rule extract_energy_resolution_model:
"""Extract from the LEGEND-200 data and store on disk the HPGe energy resolution model.

Stores a YAML file with a mapping between HPGe detectors and respective
information to reconstruct the energy resolution function, as determined
during energy calibration. This is done in a separate rule because the
data production parameter database is large and we don't want to use a lot
of memory in the `build_tier_hit` rule.

Uses wildcard `runid`.
"""
message:
"Extracting HPGe energy resolution model for {wildcards.runid}"
# NOTE: we don't list the file dependencies here because they are
# dynamically generated, and that would slow down the DAG generation
output:
patterns.output_eresmod_filename(config),
run:
import dbetto
from legendsimflow import hpge_pars, utils

hit_tier_name = utils.get_hit_tier_name(config.paths.l200data)

pars_dict = hpge_pars.lookup_energy_res_metadata(
config.paths.l200data,
config.metadata,
wildcards.runid,
hit_tier_name=hit_tier_name,
)

out_dict = dbetto.AttrsDict({})
fields = ["expression", "parameters", "uncertainties"]
for hpge, meta in pars_dict.items():
out_dict[hpge] = {f: meta[f] for f in fields}

dbetto.utils.write_dict(out_dict.to_dict(), output[0])
8 changes: 8 additions & 0 deletions workflow/src/legendsimflow/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,14 @@ def gen_list_of_all_currmod_plots_outputs(config: SimflowConfig, **kwargs) -> se
return files


def gen_list_of_eresmods(config: SimflowConfig, simid: str) -> list[Path]:
r"""Generate the list of HPGe energy resolution model parameter files for all requested `runid`\ s."""
return [
patterns.output_eresmod_filename(config, runid=runid)
for runid in get_runlist(config, simid)
]


# tier cvt


Expand Down
3 changes: 2 additions & 1 deletion workflow/src/legendsimflow/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def snakemake_nersc_cli():

config["metadata"] = metadata

simlist = list(config.get("simlist", None)) # make a copy for shuffling later
simlist = config.get("simlist", None)
make_tiers = config.make_tiers
if simlist is None:
# auto determine tier from config
Expand All @@ -88,6 +88,7 @@ def snakemake_nersc_cli():
# trick: there won't be anything to do for some simids (targets already
# done), this could result in a very inefficient partitioning. as a
# mitigation, we randomly shuffle the simlist first
simlist = list(simlist) # make a copy so we don't modify the input simlist in place
random.shuffle(simlist)

procs = []
Expand Down
20 changes: 11 additions & 9 deletions workflow/src/legendsimflow/hpge_pars.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,6 @@ def lookup_energy_res_metadata(
# get the paths to generated parameters
if pars_db is None:
pars_db = utils.init_generated_pars_db(l200data, tier=hit_tier_name, lazy=True)
pars_db.scan() # need to use .on() later

msg = f"loading {hit_tier_name} pars of production {l200data}"
log.debug(msg)
Expand Down Expand Up @@ -386,7 +385,7 @@ def build_energy_res_func_dict(
runid: str,
*,
hit_tier_name: str = "hit",
pars_db: TextDB | None = None,
energy_res_pars: dict | AttrsDict | None = None,
) -> dict[str, Callable]:
r"""Build energy resolution functions for each HPGe detector in a LEGEND-200 run.

Expand All @@ -409,13 +408,16 @@ def build_energy_res_func_dict(
optional existing *non-lazy* instance of
``TextDB(".../path/to/prod/generated/par_{hit_tier_name}")``.
"""
energy_res_pars = lookup_energy_res_metadata(
l200data,
metadata,
runid,
hit_tier_name=hit_tier_name,
pars_db=pars_db,
)
if energy_res_pars is None:
energy_res_pars = lookup_energy_res_metadata(
l200data,
metadata,
runid,
hit_tier_name=hit_tier_name,
)

if not isinstance(energy_res_pars, AttrsDict):
energy_res_pars = AttrsDict(energy_res_pars)

_func_full = build_energy_res_func("FWHMLinear")

Expand Down
10 changes: 10 additions & 0 deletions workflow/src/legendsimflow/patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,16 @@ def plot_currmod_filename(config: SimflowConfig, **kwargs) -> Path:
return _expand(pat, **kwargs)


# hpge energy resolution


def output_eresmod_filename(config: SimflowConfig, **kwargs) -> Path:
return _expand(
config.paths.genmeta / "hpge/eresmod/{runid}-model.yaml",
**kwargs,
)


# hit tier


Expand Down
23 changes: 13 additions & 10 deletions workflow/src/legendsimflow/scripts/tier/hit.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@
metadata = args.config.metadata
hpge_dtmap_files = args.input.hpge_dtmaps
hpge_currmods_files = args.input.hpge_currmods
hpge_eresmods_files = args.input.hpge_eresmods
simstat_part_file = args.input.simstat_part_file
l200data = args.config.paths.l200data
hit_tier_name = utils.get_hit_tier_name(l200data)

BUFFER_LEN = "500*MB"

Expand All @@ -74,13 +74,6 @@ def DEFAULT_ENERGY_RES_FUNC(energy):

partitions = dbetto.utils.load_dict(simstat_part_file)[f"job_{jobid}"]

# pre-load the l200 data pars to save time later
pars_db = utils.init_generated_pars_db(l200data, tier=hit_tier_name, lazy=True)

msg = "loading l200 pars database"
log.debug(msg)
pars_db.scan()

# load TCM, to be used to chunk the event statistics according to the run partitioning
msg = "loading TCM"
log.debug(msg)
Expand All @@ -96,12 +89,16 @@ def DEFAULT_ENERGY_RES_FUNC(energy):

msg = "loading energy resolution parameters"
log.debug(msg)
eresmod_pars_file = patterns.output_eresmod_filename(
snakemake.config, # noqa: F821
runid=runid,
)
eresmod_pars_all = dbetto.utils.load_dict(eresmod_pars_file)
energy_res_func = hpge_pars.build_energy_res_func_dict(
l200data,
metadata,
runid,
hit_tier_name=hit_tier_name,
pars_db=pars_db,
energy_res_pars=eresmod_pars_all,
) # FWHM

msg = "loading current pulse model parameters"
Expand Down Expand Up @@ -213,6 +210,12 @@ def DEFAULT_ENERGY_RES_FUNC(energy):
)
raise RuntimeError(msg)
else:
msg = (
f"{det_name} is marked as '{usability}' but no "
"resolution curves are available. using default "
"resolution of 2.5 keV FWHM at 2 MeV!"
)
log.warning(msg)
energy_res = DEFAULT_ENERGY_RES_FUNC(energy_true)

energy = reboost.math.stats.gaussian_sample(
Expand Down
Loading