v2: Update to new observable placeholder specification (#393)

dweindl · web-flow · commit ef795230ed71 · 2025-07-02T07:24:01.000+02:00
Adapt to the changes in PEtab-dev/PEtab#625. Placeholders are now listed explicitly. Closes #390.
diff --git a/petab/v2/C.py b/petab/v2/C.py
@@ -145,10 +145,14 @@
 OBSERVABLE_NAME = "observableName"
 #: Observable formula column in the observable table
 OBSERVABLE_FORMULA = "observableFormula"
+#: Observable placeholders column in the observable table
+OBSERVABLE_PLACEHOLDERS = "observablePlaceholders"
 #: Noise formula column in the observable table
 NOISE_FORMULA = "noiseFormula"
 #: Noise distribution column in the observable table
 NOISE_DISTRIBUTION = "noiseDistribution"
+#: Noise placeholders column in the observable table
+NOISE_PLACEHOLDERS = "noisePlaceholders"
 
 #: Mandatory columns of observable table
 OBSERVABLE_DF_REQUIRED_COLS = [
diff --git a/petab/v2/core.py b/petab/v2/core.py
@@ -2,12 +2,11 @@
 
 from __future__ import annotations
 
-import re
 from collections.abc import Sequence
 from enum import Enum
 from itertools import chain
 from pathlib import Path
-from typing import Annotated, Literal
+from typing import Annotated
 
 import numpy as np
 import pandas as pd
@@ -192,6 +191,14 @@ class Observable(BaseModel):
     noise_distribution: NoiseDistribution = Field(
         alias=C.NOISE_DISTRIBUTION, default=NoiseDistribution.NORMAL
     )
+    #: Placeholder symbols for the observable formula.
+    observable_placeholders: list[sp.Symbol] = Field(
+        alias=C.OBSERVABLE_PLACEHOLDERS, default=[]
+    )
+    #: Placeholder symbols for the noise formula.
+    noise_placeholders: list[sp.Symbol] = Field(
+        alias=C.NOISE_PLACEHOLDERS, default=[]
+    )
 
     #: :meta private:
     model_config = ConfigDict(
@@ -221,37 +228,24 @@ def _sympify(cls, v):
 
         return sympify_petab(v)
 
-    def _placeholders(
-        self, type_: Literal["observable", "noise"]
-    ) -> set[sp.Symbol]:
-        formula = (
-            self.formula
-            if type_ == "observable"
-            else self.noise_formula
-            if type_ == "noise"
-            else None
-        )
-        if formula is None or formula.is_number:
-            return set()
-
-        if not (free_syms := formula.free_symbols):
-            return set()
+    @field_validator(
+        "observable_placeholders", "noise_placeholders", mode="before"
+    )
+    @classmethod
+    def _sympify_id_list(cls, v):
+        if v is None:
+            return []
 
-        # TODO: add field validator to check for 1-based consecutive numbering
-        t = f"{re.escape(type_)}Parameter"
-        o = re.escape(self.id)
-        pattern = re.compile(rf"(?:^|\W)({t}\d+_{o})(?=\W|$)")
-        return {s for s in free_syms if pattern.match(str(s))}
+        if isinstance(v, float) and np.isnan(v):
+            return []
 
-    @property
-    def observable_placeholders(self) -> set[sp.Symbol]:
-        """Placeholder symbols for the observable formula."""
-        return self._placeholders("observable")
+        if isinstance(v, str):
+            v = v.split(C.PARAMETER_SEPARATOR)
+        elif not isinstance(v, Sequence):
+            v = [v]
 
-    @property
-    def noise_placeholders(self) -> set[sp.Symbol]:
-        """Placeholder symbols for the noise formula."""
-        return self._placeholders("noise")
+        v = [pid.strip() for pid in v]
+        return [sympify_petab(_valid_petab_id(pid)) for pid in v if pid]
 
 
 class ObservableTable(BaseModel):
@@ -289,6 +283,12 @@ def to_df(self) -> pd.DataFrame:
             noise = record[C.NOISE_FORMULA]
             record[C.OBSERVABLE_FORMULA] = petab_math_str(obs)
             record[C.NOISE_FORMULA] = petab_math_str(noise)
+            record[C.OBSERVABLE_PLACEHOLDERS] = C.PARAMETER_SEPARATOR.join(
+                map(str, record[C.OBSERVABLE_PLACEHOLDERS])
+            )
+            record[C.NOISE_PLACEHOLDERS] = C.PARAMETER_SEPARATOR.join(
+                map(str, record[C.NOISE_PLACEHOLDERS])
+            )
         return pd.DataFrame(records).set_index([C.OBSERVABLE_ID])
 
     @classmethod
diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import re
 import shutil
 from contextlib import suppress
 from itertools import chain
@@ -14,6 +15,7 @@
 from pandas.io.common import get_handle, is_url
 
 from .. import v1, v2
+from ..v1.math import sympify_petab
 from ..v1.yaml import get_path_prefix, load_yaml, validate
 from ..versions import get_major_version
 from .models import MODEL_TYPE_SBML
@@ -351,6 +353,7 @@ def v1v2_observable_df(observable_df: pd.DataFrame) -> pd.DataFrame:
 
     Perform all updates that can be done solely on the observable table:
     * drop observableTransformation, update noiseDistribution
+    * update placeholder parameters
     """
     df = observable_df.copy().reset_index()
 
@@ -388,6 +391,43 @@ def update_noise_dist(row):
         df[v2.C.NOISE_DISTRIBUTION] = df.apply(update_noise_dist, axis=1)
         df.drop(columns=[v1.C.OBSERVABLE_TRANSFORMATION], inplace=True)
 
+    def extract_placeholders(row: pd.Series, type_: str) -> str:
+        """Extract placeholders from observable formula."""
+        if type_ == "observable":
+            formula = row[v1.C.OBSERVABLE_FORMULA]
+        elif type_ == "noise":
+            formula = row[v1.C.NOISE_FORMULA]
+        else:
+            raise ValueError(f"Unknown placeholder type: {type_}")
+
+        if pd.isna(formula):
+            return ""
+
+        t = f"{re.escape(type_)}Parameter"
+        o = re.escape(row[v1.C.OBSERVABLE_ID])
+
+        pattern = re.compile(rf"(?:^|\W)({t}\d+_{o})(?=\W|$)")
+
+        expr = sympify_petab(formula)
+        # for 10+ placeholders, the current lexicographical sorting will result
+        #  in incorrect ordering of the placeholder IDs, so that they don't
+        #  align with the overrides in the measurement table, but who does
+        #  that anyway?
+        return v2.C.PARAMETER_SEPARATOR.join(
+            sorted(
+                str(sym)
+                for sym in expr.free_symbols
+                if sym.is_Symbol and pattern.match(str(sym))
+            )
+        )
+
+    df[v2.C.OBSERVABLE_PLACEHOLDERS] = df.apply(
+        extract_placeholders, args=("observable",), axis=1
+    )
+    df[v2.C.NOISE_PLACEHOLDERS] = df.apply(
+        extract_placeholders, args=("noise",), axis=1
+    )
+
     return df
 
 
diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py
@@ -160,28 +160,22 @@ def test_observable():
     assert Observable(id="obs1", formula="x + y", non_petab=1).non_petab == 1
 
     o = Observable(id="obs1", formula=x + y)
-    assert o.observable_placeholders == set()
-    assert o.noise_placeholders == set()
+    assert o.observable_placeholders == []
+    assert o.noise_placeholders == []
 
     o = Observable(
         id="obs1",
         formula="observableParameter1_obs1",
         noise_formula="noiseParameter1_obs1",
+        observable_placeholders="observableParameter1_obs1",
+        noise_placeholders="noiseParameter1_obs1",
     )
-    assert o.observable_placeholders == {
+    assert o.observable_placeholders == [
         sp.Symbol("observableParameter1_obs1", real=True),
-    }
-    assert o.noise_placeholders == {
+    ]
+    assert o.noise_placeholders == [
         sp.Symbol("noiseParameter1_obs1", real=True)
-    }
-
-    # TODO: this should raise an error
-    #   (numbering is not consecutive / not starting from 1)
-    # TODO: clarify if observableParameter0_obs1 would be allowed
-    #  as regular parameter
-    #
-    # with pytest.raises(ValidationError):
-    #  Observable(id="obs1", formula="observableParameter2_obs1")
+    ]
 
 
 def test_change():