diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk index 6786353b..236e8b99 100644 --- a/workflow/rules/hit.smk +++ b/workflow/rules/hit.smk @@ -97,6 +97,7 @@ rule build_tier_hit: # not all of them will be used. room for improvement hpge_dtmaps=lambda wc: aggregate.gen_list_of_merged_dtmaps(config, wc.simid), hpge_currmods=lambda wc: aggregate.gen_list_of_merged_currmods(config, wc.simid), + hpge_eresmods=lambda wc: aggregate.gen_list_of_eresmods(config, wc.simid), # NOTE: technically this rule only depends on one block in the # partitioning file, but in practice the full file will always change simstat_part_file=rules.make_simstat_partition_file.output[0], @@ -321,3 +322,41 @@ rule merge_current_pulse_model_pars: out_dict[hpges[i]] = dbetto.utils.load_dict(f) dbetto.utils.write_dict(out_dict, output[0]) + + +rule extract_energy_resolution_model: + """Extract from the LEGEND-200 data and store on disk the HPGe energy resolution model. + + Stores a YAML file with a mapping between HPGe detectors and respective + information to reconstruct the energy resolution function, as determined + during energy calibration. This is done in a separate rule because the + data production parameter database is large and we don't want to use a lot + of memory in the `build_tier_hit` rule. + + Uses wildcard `runid`. + """ + message: + "Extracting HPGe energy resolution model for {wildcards.runid}" + # NOTE: we don't list the file dependencies here because they are + # dynamically generated, and that would slow down the DAG generation + output: + patterns.output_eresmod_filename(config), + run: + import dbetto + from legendsimflow import hpge_pars, utils + + hit_tier_name = utils.get_hit_tier_name(config.paths.l200data) + + pars_dict = hpge_pars.lookup_energy_res_metadata( + config.paths.l200data, + config.metadata, + wildcards.runid, + hit_tier_name=hit_tier_name, + ) + + out_dict = dbetto.AttrsDict({}) + fields = ["expression", "parameters", "uncertainties"] + for hpge, meta in pars_dict.items(): + out_dict[hpge] = {f: meta[f] for f in fields} + + dbetto.utils.write_dict(out_dict.to_dict(), output[0]) diff --git a/workflow/src/legendsimflow/aggregate.py b/workflow/src/legendsimflow/aggregate.py index 277994ff..c03c9fc8 100644 --- a/workflow/src/legendsimflow/aggregate.py +++ b/workflow/src/legendsimflow/aggregate.py @@ -323,6 +323,14 @@ def gen_list_of_all_currmod_plots_outputs(config: SimflowConfig, **kwargs) -> se return files +def gen_list_of_eresmods(config: SimflowConfig, simid: str) -> list[Path]: + r"""Generate the list of HPGe energy resolution model parameter files for all requested `runid`\ s.""" + return [ + patterns.output_eresmod_filename(config, runid=runid) + for runid in get_runlist(config, simid) + ] + + # tier cvt diff --git a/workflow/src/legendsimflow/cli.py b/workflow/src/legendsimflow/cli.py index 4eb0514f..3017e11c 100644 --- a/workflow/src/legendsimflow/cli.py +++ b/workflow/src/legendsimflow/cli.py @@ -74,7 +74,7 @@ def snakemake_nersc_cli(): config["metadata"] = metadata - simlist = list(config.get("simlist", None)) # make a copy for shuffling later + simlist = config.get("simlist", None) make_tiers = config.make_tiers if simlist is None: # auto determine tier from config @@ -88,6 +88,7 @@ def snakemake_nersc_cli(): # trick: there won't be anything to do for some simids (targets already # done), this could result in a very inefficient partitioning. as a # mitigation, we randomly shuffle the simlist first + simlist = list(simlist) # make a copy so we don't modify the input simlist in place random.shuffle(simlist) procs = [] diff --git a/workflow/src/legendsimflow/hpge_pars.py b/workflow/src/legendsimflow/hpge_pars.py index d6f6ac14..c1099282 100644 --- a/workflow/src/legendsimflow/hpge_pars.py +++ b/workflow/src/legendsimflow/hpge_pars.py @@ -343,7 +343,6 @@ def lookup_energy_res_metadata( # get the paths to generated parameters if pars_db is None: pars_db = utils.init_generated_pars_db(l200data, tier=hit_tier_name, lazy=True) - pars_db.scan() # need to use .on() later msg = f"loading {hit_tier_name} pars of production {l200data}" log.debug(msg) @@ -386,7 +385,7 @@ def build_energy_res_func_dict( runid: str, *, hit_tier_name: str = "hit", - pars_db: TextDB | None = None, + energy_res_pars: dict | AttrsDict | None = None, ) -> dict[str, Callable]: r"""Build energy resolution functions for each HPGe detector in a LEGEND-200 run. @@ -409,13 +408,16 @@ def build_energy_res_func_dict( optional existing *non-lazy* instance of ``TextDB(".../path/to/prod/generated/par_{hit_tier_name}")``. """ - energy_res_pars = lookup_energy_res_metadata( - l200data, - metadata, - runid, - hit_tier_name=hit_tier_name, - pars_db=pars_db, - ) + if energy_res_pars is None: + energy_res_pars = lookup_energy_res_metadata( + l200data, + metadata, + runid, + hit_tier_name=hit_tier_name, + ) + + if not isinstance(energy_res_pars, AttrsDict): + energy_res_pars = AttrsDict(energy_res_pars) _func_full = build_energy_res_func("FWHMLinear") diff --git a/workflow/src/legendsimflow/patterns.py b/workflow/src/legendsimflow/patterns.py index dd86d3a2..1c444b85 100644 --- a/workflow/src/legendsimflow/patterns.py +++ b/workflow/src/legendsimflow/patterns.py @@ -267,6 +267,16 @@ def plot_currmod_filename(config: SimflowConfig, **kwargs) -> Path: return _expand(pat, **kwargs) +# hpge energy resolution + + +def output_eresmod_filename(config: SimflowConfig, **kwargs) -> Path: + return _expand( + config.paths.genmeta / "hpge/eresmod/{runid}-model.yaml", + **kwargs, + ) + + # hit tier diff --git a/workflow/src/legendsimflow/scripts/tier/hit.py b/workflow/src/legendsimflow/scripts/tier/hit.py index aa471e88..7b86b55b 100644 --- a/workflow/src/legendsimflow/scripts/tier/hit.py +++ b/workflow/src/legendsimflow/scripts/tier/hit.py @@ -47,9 +47,9 @@ metadata = args.config.metadata hpge_dtmap_files = args.input.hpge_dtmaps hpge_currmods_files = args.input.hpge_currmods +hpge_eresmods_files = args.input.hpge_eresmods simstat_part_file = args.input.simstat_part_file l200data = args.config.paths.l200data -hit_tier_name = utils.get_hit_tier_name(l200data) BUFFER_LEN = "500*MB" @@ -74,13 +74,6 @@ def DEFAULT_ENERGY_RES_FUNC(energy): partitions = dbetto.utils.load_dict(simstat_part_file)[f"job_{jobid}"] -# pre-load the l200 data pars to save time later -pars_db = utils.init_generated_pars_db(l200data, tier=hit_tier_name, lazy=True) - -msg = "loading l200 pars database" -log.debug(msg) -pars_db.scan() - # load TCM, to be used to chunk the event statistics according to the run partitioning msg = "loading TCM" log.debug(msg) @@ -96,12 +89,16 @@ def DEFAULT_ENERGY_RES_FUNC(energy): msg = "loading energy resolution parameters" log.debug(msg) + eresmod_pars_file = patterns.output_eresmod_filename( + snakemake.config, # noqa: F821 + runid=runid, + ) + eresmod_pars_all = dbetto.utils.load_dict(eresmod_pars_file) energy_res_func = hpge_pars.build_energy_res_func_dict( l200data, metadata, runid, - hit_tier_name=hit_tier_name, - pars_db=pars_db, + energy_res_pars=eresmod_pars_all, ) # FWHM msg = "loading current pulse model parameters" @@ -213,6 +210,12 @@ def DEFAULT_ENERGY_RES_FUNC(energy): ) raise RuntimeError(msg) else: + msg = ( + f"{det_name} is marked as '{usability}' but no " + "resolution curves are available. using default " + "resolution of 2.5 keV FWHM at 2 MeV!" + ) + log.warning(msg) energy_res = DEFAULT_ENERGY_RES_FUNC(energy_true) energy = reboost.math.stats.gaussian_sample(