legend-exp · ggmarshall · Dec 17, 2025 · Nov 26, 2025 · Nov 24, 2025 · Nov 24, 2025
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -23,7 +23,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ["3.11", "3.12"]
-        os: [ubuntu-latest, macos-13]
+        os: [ubuntu-latest, macos-14]
 
     steps:
       - uses: actions/checkout@v5

diff --git a/dataflow-config.yaml b/dataflow-config.yaml
@@ -87,6 +87,18 @@ execenv:
       PYGAMA_FASTMATH: "false"
       TQDM_DISABLE: "true"
 
+  sator:
+    cmd: apptainer exec
+    arg: /mnt/atlas01/projects/scarf/software/containers/legendexp_legend-base_latest_20241110203225.sif
+    env:
+      PRODENV: $PRODENV
+      NUMBA_CACHE_DIR: $_/.snakemake/numba-cache
+      LGDO_BOUNDSCHECK: "false"
+      DSPEED_BOUNDSCHECK: "false"
+      PYGAMA_PARALLEL: "false"
+      PYGAMA_FASTMATH: "false"
+      TQDM_DISABLE: "true"
+
   nersc:
     cmd: shifter
     arg: --image legendexp/legend-base:latest

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -11,8 +11,41 @@ takes as an input the metadata at `legend metadata
 Getting started
 ---------------
 
-It is recommended to install and use the package through the `legend-prodenv
-<https://github.com/legend-exp/legend-prodenv>`_.
+Clone the repository using git.
+
+.. code-block:: bash
+
+   git clone https://github.com/legend-exp/legend-dataflow.git
+   cd legend-dataflow
+
+Then create a virtual environment to install *legend-dataflow* to.
+Use e.g. ``uv`` for that:
+
+.. code-block:: bash
+
+   uv venv --python 3.12
+   source .venv/bin/activate
+   uv pip install -e ".[dev]"
+
+With ``[dev]`` you install the development dependencies. You might want to
+use a different set of dependencies dependent on you use case.
+
+Adapt the ``dataflow-config.yaml`` and add a workflow profile in
+``workflow/profiles/`` if you want to set the dataflow up for a new host.
+Otherwise, check if your host is already configured or if ``bare`` applies for you.
+
+Install the dataflow using
+
+.. code-block:: bash
+
+   dataflow -v install -s <host> dataflow-config.yaml
+
+with ``<host>`` being the hostname as configured in ``dataflow-config.yaml``.
+This command installs all the necessary software to run the dataflow to
+``.snakemake/legend-dataflow/venv``.
+Be sure to clear the numba cache (defined in the config) in case of software updates.
+
+
 
 Next steps
 ----------

diff --git a/docs/source/user_manual.rst b/docs/source/user_manual.rst
@@ -5,18 +5,29 @@ Configuration
 =============
 
 Data processing resources are configured via a single site-dependent (and
-possibly user-dependent) configuration file, generally named ``config.json``.
+possibly user-dependent) configuration file, generally named ``dataflow-config.json``.
 Although you can choose any arbitrary name.
-
-A template for this file is located at ``templates/config.json``
-which can be copied to the working directory
-the paths adjusted as necessary. Note that, when running Snakemake,
-the default path to the config file is ``./config.json``.
+Edit this file and adjust paths adjusted as necessary. Note that, when running Snakemake,
+the default path to the config file is ``./dataflow-config.json``.
+
+The following (non-exhaustive) table shows a list of options:
+
++---------------------------+--------------------------------------------------------------------------+
+| Parameter                 | Description                                                              |
++===========================+==========================================================================+
+| legend_metadata_version   | The version of legend_metadata to be used automatically                  |
+|                           | (use custom legend_metadata: put one at the ``paths/metadata`` location) |
++---------------------------+--------------------------------------------------------------------------+
+| allow_none_par            | if pargen should be run                                                  |
++---------------------------+--------------------------------------------------------------------------+
+| paths                     | Paths to legend_metadata, data input and output.                         |
+|                           | Adapt e.g. ``paths/raw`` to point to existing raw input data.            |
++---------------------------+--------------------------------------------------------------------------+
 
 Profiles
 ========
 
-A number of profiles are also included in the ``profiles`` directory. If none
+A number of profiles are also included in the ``workflow/profiles`` directory. If none
 are specified, the default profile is used. The profile can be specified by
 using the ``--profile`` option when running Snakemake. These control how many
 jobs are run simultaneously, based on how many cores are specified and the

diff --git a/pyproject.toml b/pyproject.toml
@@ -52,12 +52,12 @@ dynamic = ["version"]
 dependencies = [
     "colorlog",
     "dbetto==1.2.4",
-    "pygama==2.2.4",
-    "dspeed==1.7.0",
+    "pygama>=2.3.0",
+    "dspeed==2.0.2",
     "pylegendmeta==1.3.1",
-    "legend-pydataobj==1.15.1",
+    "legend-pydataobj==1.16",
     "legend-daq2lh5==1.6.3",
-    "legend-dataflow-scripts==0.1.8",
+    "legend-dataflow-scripts==0.3.0a2",
     "pip",
 ]
 

diff --git a/workflow/profiles/sator/config.yaml b/workflow/profiles/sator/config.yaml
@@ -0,0 +1,8 @@
+cores: 50
+# restart-times: 2
+configfile: dataflow-config.yaml
+snakefile: ./workflow/Snakefile
+keep-going: true
+rerun-incomplete: true
+config:
+  - system=sator
diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk
@@ -125,6 +125,7 @@ rule build_pars_evtsel_geds:
             "raw",
         ),
         configs=config_path(config),
+        channel="{channel}",
     output:
         peak_file=temp(
             get_pattern_pars_tmp_channel(config, "dsp", "peaks", extension="lh5")
@@ -141,6 +142,7 @@ rule build_pars_evtsel_geds:
         "--log-config {params.log_config} "
         "--config-file {params.config_file} "
         "--processing-chain {params.processing_chain} "
+        "--channel {params.channel} "
         "--raw-table-name {params.raw_table_name} "
         "--peak-file {output.peak_file} "
         "--pulser-file {input.pulser_file} "
@@ -254,6 +256,7 @@ rule build_pars_dsp_dplms_geds:
             "raw",
         ),
         configs=config_path(config),
+        channel="{channel}",
     output:
         dsp_pars=temp(get_pattern_pars_tmp_channel(config, "dsp", "dplms")),
         lh5_path=temp(get_pattern_pars_tmp_channel(config, "dsp", extension="lh5")),
@@ -272,6 +275,7 @@ rule build_pars_dsp_dplms_geds:
         "--log {log} "
         "--log-config {params.log_config} "
         "--config-file {params.config_file} "
+        "--channel {params.channel} "
         "--processing-chain {params.processing_chain} "
         "--raw-table-name {params.raw_table_name} "
         "--dsp-pars {output.dsp_pars} "

diff --git a/workflow/rules/dsp_pars_spms.smk b/workflow/rules/dsp_pars_spms.smk
@@ -12,7 +12,7 @@ rule build_pars_dsp_tau_spms:
     input:
         raw_file=get_pattern_tier(config, "raw", check_in_cycle=False),
         pardb=lambda wildcards: get_input_par_file(
-            config, wildcards=wildcards, tier="dsp"
+            config, wildcards=wildcards, tier="dsp", allow_none=True
         ),
     params:
         timestamp="{timestamp}",

diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk
@@ -31,6 +31,7 @@ rule build_evt:
             tier="evt",
             wildcards=wildcards,
             name="xtc",
+            allow_none=True,
             overwrite=False,
             extension="lh5",
         ),

diff --git a/workflow/src/legenddataflow/scripts/flow/merge_channels.py b/workflow/src/legenddataflow/scripts/flow/merge_channels.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import argparse
+import dbm.dumb
 import pickle as pkl
 import shelve
 from pathlib import Path
@@ -87,7 +88,14 @@ def merge_channels() -> None:
 
     elif file_extension in (".dat", ".dir"):
         common_dict = {}
-        with shelve.open(str(out_file), "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf:
+        # Open the 'dumb' database directly.
+        # This forces a two-file backend, avoiding the automatic selection
+        # of a one-file backend (e.g. xxx.db) which breaks snakemake's output
+        # file detection
+        with (
+            dbm.dumb.open(str(out_file), "c") as db_object,
+            shelve.Shelf(db_object, protocol=pkl.HIGHEST_PROTOCOL) as shelf,
+        ):
             for channel in channel_files:
                 with Path(channel).open("rb") as r:
                     channel_dict = pkl.load(r)

diff --git a/workflow/src/legenddataflow/scripts/par/spms/dsp/trigger_threshold.py b/workflow/src/legenddataflow/scripts/par/spms/dsp/trigger_threshold.py
@@ -6,7 +6,7 @@
 import hist
 import numpy as np
 from dbetto import AttrsDict, Props, TextDB, utils
-from dspeed import build_processing_chain
+from dspeed import build_dsp
 from legenddataflowscripts.utils import build_log, cfgtools
 from lgdo import lh5
 
@@ -59,10 +59,9 @@ def get_channel_trg_thr(df_configs, sipm_name, dsp_db, raw_file, raw_table_name,
         else:
             # run the DSP with the provided configuration
             log.debug("running the DSP chain")
-            chain, _, dsp_output = build_processing_chain(
-                data, dsp_config, db_dict=_db_dict
+            dsp_output = build_dsp(
+                raw_in=data, dsp_config=dsp_config, database=_db_dict
             )
-            chain.execute()
 
             log.debug("analyzing DSP outputs")
             # get output of the current processor