From ef4b1de80fabe26f8b150e49c5545deba012d039 Mon Sep 17 00:00:00 2001 From: "Schwarz, Mario" Date: Wed, 26 Nov 2025 12:25:55 +0100 Subject: [PATCH 01/10] added configuration for host "sator" --- dataflow-config.yaml | 12 ++++++++++++ workflow/profiles/sator/config.yaml | 8 ++++++++ 2 files changed, 20 insertions(+) create mode 100644 workflow/profiles/sator/config.yaml diff --git a/dataflow-config.yaml b/dataflow-config.yaml index 4a50f93f..f1240e8e 100644 --- a/dataflow-config.yaml +++ b/dataflow-config.yaml @@ -87,6 +87,18 @@ execenv: PYGAMA_FASTMATH: "false" TQDM_DISABLE: "true" + sator: + cmd: apptainer exec + arg: /mnt/atlas01/projects/scarf/software/containers/legendexp_legend-base_latest_20241110203225.sif + env: + PRODENV: $PRODENV + NUMBA_CACHE_DIR: $_/.snakemake/numba-cache + LGDO_BOUNDSCHECK: "false" + DSPEED_BOUNDSCHECK: "false" + PYGAMA_PARALLEL: "false" + PYGAMA_FASTMATH: "false" + TQDM_DISABLE: "true" + nersc: cmd: shifter arg: --image legendexp/legend-base:latest diff --git a/workflow/profiles/sator/config.yaml b/workflow/profiles/sator/config.yaml new file mode 100644 index 00000000..2cbfe77e --- /dev/null +++ b/workflow/profiles/sator/config.yaml @@ -0,0 +1,8 @@ +cores: 50 +# restart-times: 2 +configfile: dataflow-config.yaml +snakefile: ./workflow/Snakefile +keep-going: true +rerun-incomplete: true +config: + - system=sator From 692b6205697ade595e2975121db35f32152e140e Mon Sep 17 00:00:00 2001 From: "Schwarz, Mario" Date: Mon, 24 Nov 2025 12:18:39 +0100 Subject: [PATCH 02/10] bump versions of dependencies --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 03e3d200..79894f06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,10 +52,10 @@ dynamic = ["version"] dependencies = [ "colorlog", "dbetto==1.2.4", - "pygama==2.2.4", - "dspeed==1.7.0", + "pygama>=2.3.0", + "dspeed==2.0.2", "pylegendmeta==1.3.1", - "legend-pydataobj==1.15.1", + "legend-pydataobj==1.16", "legend-daq2lh5==1.6.3", "legend-dataflow-scripts==0.1.8", "pip", From 737996874b216255c7452507c44d8151b55ffa64 Mon Sep 17 00:00:00 2001 From: "Schwarz, Mario" Date: Mon, 24 Nov 2025 12:19:45 +0100 Subject: [PATCH 03/10] channel passed to scripts --- workflow/rules/dsp_pars_geds.smk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk index d94befd5..d87a481c 100644 --- a/workflow/rules/dsp_pars_geds.smk +++ b/workflow/rules/dsp_pars_geds.smk @@ -125,6 +125,7 @@ rule build_pars_evtsel_geds: "raw", ), configs=config_path(config), + channel="{channel}", output: peak_file=temp( get_pattern_pars_tmp_channel(config, "dsp", "peaks", extension="lh5") @@ -141,6 +142,7 @@ rule build_pars_evtsel_geds: "--log-config {params.log_config} " "--config-file {params.config_file} " "--processing-chain {params.processing_chain} " + "--channel {params.channel} " "--raw-table-name {params.raw_table_name} " "--peak-file {output.peak_file} " "--pulser-file {input.pulser_file} " @@ -254,6 +256,7 @@ rule build_pars_dsp_dplms_geds: "raw", ), configs=config_path(config), + channel="{channel}", output: dsp_pars=temp(get_pattern_pars_tmp_channel(config, "dsp", "dplms")), lh5_path=temp(get_pattern_pars_tmp_channel(config, "dsp", extension="lh5")), @@ -272,6 +275,7 @@ rule build_pars_dsp_dplms_geds: "--log {log} " "--log-config {params.log_config} " "--config-file {params.config_file} " + "--channel {params.channel} " "--processing-chain {params.processing_chain} " "--raw-table-name {params.raw_table_name} " "--dsp-pars {output.dsp_pars} " From 29b81b690763c970689dd3bd852719cb4e327686 Mon Sep 17 00:00:00 2001 From: "Schwarz, Mario" Date: Mon, 24 Nov 2025 12:20:30 +0100 Subject: [PATCH 04/10] update to build_dsp --- .../scripts/par/spms/dsp/trigger_threshold.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/workflow/src/legenddataflow/scripts/par/spms/dsp/trigger_threshold.py b/workflow/src/legenddataflow/scripts/par/spms/dsp/trigger_threshold.py index 01eadd16..e57c2625 100644 --- a/workflow/src/legenddataflow/scripts/par/spms/dsp/trigger_threshold.py +++ b/workflow/src/legenddataflow/scripts/par/spms/dsp/trigger_threshold.py @@ -6,7 +6,7 @@ import hist import numpy as np from dbetto import AttrsDict, Props, TextDB, utils -from dspeed import build_processing_chain +from dspeed import build_dsp from legenddataflowscripts.utils import build_log, cfgtools from lgdo import lh5 @@ -59,10 +59,9 @@ def get_channel_trg_thr(df_configs, sipm_name, dsp_db, raw_file, raw_table_name, else: # run the DSP with the provided configuration log.debug("running the DSP chain") - chain, _, dsp_output = build_processing_chain( - data, dsp_config, db_dict=_db_dict + dsp_output = build_dsp( + raw_in=data, dsp_config=dsp_config, database=_db_dict ) - chain.execute() log.debug("analyzing DSP outputs") # get output of the current processor From e45d488565505f7e88e454f23969b90636b06873 Mon Sep 17 00:00:00 2001 From: "Schwarz, Mario" Date: Tue, 25 Nov 2025 10:02:49 +0100 Subject: [PATCH 05/10] force a two-file-creating database backend, since smk requires a .dir file being created --- .../src/legenddataflow/scripts/flow/merge_channels.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/workflow/src/legenddataflow/scripts/flow/merge_channels.py b/workflow/src/legenddataflow/scripts/flow/merge_channels.py index 63838c82..e6485772 100644 --- a/workflow/src/legenddataflow/scripts/flow/merge_channels.py +++ b/workflow/src/legenddataflow/scripts/flow/merge_channels.py @@ -1,6 +1,7 @@ from __future__ import annotations import argparse +import dbm.dumb import pickle as pkl import shelve from pathlib import Path @@ -87,7 +88,14 @@ def merge_channels() -> None: elif file_extension in (".dat", ".dir"): common_dict = {} - with shelve.open(str(out_file), "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: + # Open the 'dumb' database directly. + # This forces a two-file backend, avoiding the automatic selection + # of a one-file backend (e.g. xxx.db) which breaks snakemake's output + # file detection + with ( + dbm.dumb.open(str(out_file), "c") as db_object, + shelve.Shelf(db_object, protocol=pkl.HIGHEST_PROTOCOL) as shelf, + ): for channel in channel_files: with Path(channel).open("rb") as r: channel_dict = pkl.load(r) From ddcd5f9763d0c70769479fc8763f07ae8c536bf7 Mon Sep 17 00:00:00 2001 From: "Schwarz, Mario" Date: Mon, 1 Dec 2025 15:38:35 +0100 Subject: [PATCH 06/10] Do not require crosstalk matrix; some periods miss one --- workflow/rules/evt.smk | 1 + 1 file changed, 1 insertion(+) diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk index a0deca3d..b9813777 100644 --- a/workflow/rules/evt.smk +++ b/workflow/rules/evt.smk @@ -31,6 +31,7 @@ rule build_evt: tier="evt", wildcards=wildcards, name="xtc", + allow_none=True, overwrite=False, extension="lh5", ), From 172c4eb1ef915fd660bfee01c4a196f5f1f2e540 Mon Sep 17 00:00:00 2001 From: "Schwarz, Mario" Date: Mon, 1 Dec 2025 16:59:39 +0100 Subject: [PATCH 07/10] run build_pars_dsp_tau_spms also in cases where there are no dsp overrides --- workflow/rules/dsp_pars_spms.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rules/dsp_pars_spms.smk b/workflow/rules/dsp_pars_spms.smk index e5e83b43..5fc2f2ba 100644 --- a/workflow/rules/dsp_pars_spms.smk +++ b/workflow/rules/dsp_pars_spms.smk @@ -12,7 +12,7 @@ rule build_pars_dsp_tau_spms: input: raw_file=get_pattern_tier(config, "raw", check_in_cycle=False), pardb=lambda wildcards: get_input_par_file( - config, wildcards=wildcards, tier="dsp" + config, wildcards=wildcards, tier="dsp", allow_none=True ), params: timestamp="{timestamp}", From 30141119c557c8ae7f9dea60cef84249ffda4c52 Mon Sep 17 00:00:00 2001 From: Mario Schwarz Date: Tue, 2 Dec 2025 16:30:55 +0100 Subject: [PATCH 08/10] updated docu --- docs/source/index.rst | 37 +++++++++++++++++++++++++++++++++++-- docs/source/user_manual.rst | 25 ++++++++++++++++++------- 2 files changed, 53 insertions(+), 9 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index fdf8cad8..d82ba239 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -11,8 +11,41 @@ takes as an input the metadata at `legend metadata Getting started --------------- -It is recommended to install and use the package through the `legend-prodenv -`_. +Clone the repository using git. + +.. code-block:: bash + + git clone https://github.com/legend-exp/legend-dataflow.git + cd legend-dataflow + +Then create a virtual environment to install *legend-dataflow* to. +Use e.g. ``uv`` for that: + +.. code-block:: bash + + uv venv --python 3.12 + source .venv/bin/activate + uv pip install -e ".[dev]" + +With ``[dev]`` you install the development dependencies. You might want to +use a different set of dependencies dependent on you use case. + +Adapt the ``dataflow-config.yaml`` and add a workflow profile in +``workflow/profiles/`` if you want to set the dataflow up for a new host. +Otherwise, check if your host is already configured or if ``bare`` applies for you. + +Install the dataflow using + +.. code-block:: bash + + dataflow -v install -s dataflow-config.yaml + +with ```` being the hostname as configured in ``dataflow-config.yaml``. +This command installs all the necessary software to run the dataflow to +``.snakemake/legend-dataflow/venv``. +Be sure to clear the numba cache (defined in the config) in case of software updates. + + Next steps ---------- diff --git a/docs/source/user_manual.rst b/docs/source/user_manual.rst index 90f4557b..fe81a376 100644 --- a/docs/source/user_manual.rst +++ b/docs/source/user_manual.rst @@ -5,18 +5,29 @@ Configuration ============= Data processing resources are configured via a single site-dependent (and -possibly user-dependent) configuration file, generally named ``config.json``. +possibly user-dependent) configuration file, generally named ``dataflow-config.json``. Although you can choose any arbitrary name. - -A template for this file is located at ``templates/config.json`` -which can be copied to the working directory -the paths adjusted as necessary. Note that, when running Snakemake, -the default path to the config file is ``./config.json``. +Edit this file and adjust paths adjusted as necessary. Note that, when running Snakemake, +the default path to the config file is ``./dataflow-config.json``. + +The following (non-exhaustive) table shows a list of options: + ++---------------------------+--------------------------------------------------------------------------+ +| Parameter | Description | ++===========================+==========================================================================+ +| legend_metadata_version | The version of legend_metadata to be used automatically | +| | (use custom legend_metadata: put one at the ``paths/metadata`` location) | ++---------------------------+--------------------------------------------------------------------------+ +| allow_none_par | if pargen should be run | ++---------------------------+--------------------------------------------------------------------------+ +| paths | Paths to legend_metadata, data input and output. | +| | Adapt e.g. ``paths/raw`` to point to existing raw input data. | ++---------------------------+--------------------------------------------------------------------------+ Profiles ======== -A number of profiles are also included in the ``profiles`` directory. If none +A number of profiles are also included in the ``workflow/profiles`` directory. If none are specified, the default profile is used. The profile can be specified by using the ``--profile`` option when running Snakemake. These control how many jobs are run simultaneously, based on how many cores are specified and the From 6fbafcc7e42f5ff4ebf90dbbddb71c0c4df073ec Mon Sep 17 00:00:00 2001 From: "Schwarz, Mario" Date: Wed, 10 Dec 2025 09:30:16 +0100 Subject: [PATCH 09/10] update macos runners to 14 --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b2dfcd96..bfb684a3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -23,7 +23,7 @@ jobs: fail-fast: false matrix: python-version: ["3.11", "3.12"] - os: [ubuntu-latest, macos-13] + os: [ubuntu-latest, macos-14] steps: - uses: actions/checkout@v5 From 817b7a7c18595df5250bc2901b8a2406ec5284de Mon Sep 17 00:00:00 2001 From: "Schwarz, Mario" Date: Wed, 10 Dec 2025 16:45:20 +0100 Subject: [PATCH 10/10] use latest legend-dataflow-scripts --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 79894f06..cfc034c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,7 +57,7 @@ dependencies = [ "pylegendmeta==1.3.1", "legend-pydataobj==1.16", "legend-daq2lh5==1.6.3", - "legend-dataflow-scripts==0.1.8", + "legend-dataflow-scripts==0.3.0a2", "pip", ]