v2: Basic support for multiple models (#418)

dweindl · web-flow · commit 3156df2f8932 · 2025-07-29T07:30:41.000+02:00
Related to #392. * Let v2.Problem have a list of models * Support constructing v2.Problem from files with multiple models * Move some validators to Annotated * Add some TODOs.
diff --git a/petab/v2/C.py b/petab/v2/C.py
@@ -38,6 +38,9 @@
 #: Replicate ID column in the measurement table
 REPLICATE_ID = "replicateId"
 
+#: The model ID column in the measurement table
+MODEL_ID = "modelId"
+
 #: Mandatory columns of measurement table
 MEASUREMENT_DF_REQUIRED_COLS = [
     OBSERVABLE_ID,
@@ -52,6 +55,7 @@
     NOISE_PARAMETERS,
     DATASET_ID,
     REPLICATE_ID,
+    MODEL_ID,
 ]
 
 #: Measurement table columns
diff --git a/petab/v2/converters.py b/petab/v2/converters.py
@@ -71,6 +71,11 @@ def __init__(self, problem: Problem, default_priority: float = None):
             To ensure that the PEtab condition-start-events are executed before
             any other events, all events should have a priority set.
         """
+        if len(problem.models) > 1:
+            #  https://github.com/PEtab-dev/libpetab-python/issues/392
+            raise NotImplementedError(
+                "Only single-model PEtab problems are supported."
+            )
         if not isinstance(problem.model, SbmlModel):
             raise ValueError("Only SBML models are supported.")
 
diff --git a/petab/v2/core.py b/petab/v2/core.py
@@ -110,6 +110,15 @@ def _valid_petab_id(v: str) -> str:
     return v
 
 
+def _valid_petab_id_or_none(v: str) -> str:
+    """Field validator for optional PEtab IDs."""
+    if not v:
+        return None
+    if not is_valid_identifier(v):
+        raise ValueError(f"Invalid ID: {v}")
+    return v
+
+
 class ParameterScale(str, Enum):
     """Parameter scales.
 
@@ -687,10 +696,18 @@ class Measurement(BaseModel):
     experiment.
     """
 
+    #: The model ID.
+    model_id: Annotated[
+        str | None, BeforeValidator(_valid_petab_id_or_none)
+    ] = Field(alias=C.MODEL_ID, default=None)
     #: The observable ID.
-    observable_id: str = Field(alias=C.OBSERVABLE_ID)
+    observable_id: Annotated[str, BeforeValidator(_valid_petab_id)] = Field(
+        alias=C.OBSERVABLE_ID
+    )
     #: The experiment ID.
-    experiment_id: str | None = Field(alias=C.EXPERIMENT_ID, default=None)
+    experiment_id: Annotated[
+        str | None, BeforeValidator(_valid_petab_id_or_none)
+    ] = Field(alias=C.EXPERIMENT_ID, default=None)
     #: The time point of the measurement in time units as defined in the model.
     time: Annotated[float, AfterValidator(_is_finite_or_pos_inf)] = Field(
         alias=C.TIME
@@ -728,17 +745,6 @@ def convert_nan_to_none(cls, v, info: ValidationInfo):
             return cls.model_fields[info.field_name].default
         return v
 
-    @field_validator("observable_id", "experiment_id")
-    @classmethod
-    def _validate_id(cls, v, info: ValidationInfo):
-        if not v:
-            if info.field_name == "experiment_id":
-                return None
-            raise ValueError("ID must not be empty.")
-        if not is_valid_identifier(v):
-            raise ValueError(f"Invalid ID: {v}")
-        return v
-
     @field_validator(
         "observable_parameters", "noise_parameters", mode="before"
     )
@@ -775,6 +781,9 @@ def from_df(
         if df is None:
             return cls()
 
+        if C.MODEL_ID in df.columns:
+            df[C.MODEL_ID] = df[C.MODEL_ID].apply(_convert_nan_to_none)
+
         measurements = [
             Measurement(
                 **row.to_dict(),
@@ -868,7 +877,9 @@ class Parameter(BaseModel):
     """Parameter definition."""
 
     #: Parameter ID.
-    id: str = Field(alias=C.PARAMETER_ID)
+    id: Annotated[str, BeforeValidator(_valid_petab_id)] = Field(
+        alias=C.PARAMETER_ID
+    )
     #: Lower bound.
     lb: Annotated[float | None, BeforeValidator(_convert_nan_to_none)] = Field(
         alias=C.LOWER_BOUND, default=None
@@ -901,15 +912,6 @@ class Parameter(BaseModel):
         validate_assignment=True,
     )
 
-    @field_validator("id")
-    @classmethod
-    def _validate_id(cls, v):
-        if not v:
-            raise ValueError("ID must not be empty.")
-        if not is_valid_identifier(v):
-            raise ValueError(f"Invalid ID: {v}")
-        return v
-
     @field_validator("prior_parameters", mode="before")
     @classmethod
     def _validate_prior_parameters(
@@ -1067,20 +1069,20 @@ class Problem:
 
     A PEtab parameter estimation problem as defined by
 
-    - model
-    - condition table
-    - experiment table
-    - measurement table
-    - parameter table
-    - observable table
-    - mapping table
+    - models
+    - condition tables
+    - experiment tables
+    - measurement tables
+    - parameter tables
+    - observable tables
+    - mapping tables
 
     See also :doc:`petab:v2/documentation_data_format`.
     """
 
     def __init__(
         self,
-        model: Model = None,
+        models: list[Model] = None,
         condition_tables: list[ConditionTable] = None,
         experiment_tables: list[ExperimentTable] = None,
         observable_tables: list[ObservableTable] = None,
@@ -1092,7 +1094,7 @@ def __init__(
         from ..v2.lint import default_validation_tasks
 
         self.config = config
-        self.model: Model | None = model
+        self.models: list[Model] = models or []
         self.validation_tasks: list[ValidationTask] = (
             default_validation_tasks.copy()
         )
@@ -1210,13 +1212,6 @@ def get_path(filename):
                 f"{yaml_config[C.FORMAT_VERSION]}."
             )
 
-        if len(yaml_config[C.MODEL_FILES]) > 1:
-            raise ValueError(
-                "petab.v2.Problem.from_yaml() can only be used for "
-                "yaml files comprising a single model. "
-                "Consider using "
-                "petab.v2.CompositeProblem.from_yaml() instead."
-            )
         config = ProblemConfig(
             **yaml_config, base_path=base_path, filepath=yaml_file
         )
@@ -1225,19 +1220,14 @@ def get_path(filename):
             for f in config.parameter_files
         ]
 
-        if len(config.model_files or []) > 1:
-            # TODO https://github.com/PEtab-dev/libpetab-python/issues/6
-            raise NotImplementedError(
-                "Support for multiple models is not yet implemented."
-            )
-        model = None
-        if config.model_files:
-            model_id, model_info = next(iter(config.model_files.items()))
-            model = model_factory(
+        models = [
+            model_factory(
                 get_path(model_info.location),
                 model_info.language,
                 model_id=model_id,
             )
+            for model_id, model_info in (config.model_files or {}).items()
+        ]
 
         measurement_tables = (
             [
@@ -1283,7 +1273,7 @@ def get_path(filename):
 
         return Problem(
             config=config,
-            model=model,
+            models=models,
             condition_tables=condition_tables,
             experiment_tables=experiment_tables,
             observable_tables=observable_tables,
@@ -1316,6 +1306,7 @@ def from_dfs(
             model: The underlying model
             config: The PEtab problem configuration
         """
+        # TODO: do we really need this?
 
         observable_table = ObservableTable.from_df(observable_df)
         condition_table = ConditionTable.from_df(condition_df)
@@ -1325,7 +1316,7 @@ def from_dfs(
         parameter_table = ParameterTable.from_df(parameter_df)
 
         return Problem(
-            model=model,
+            models=[model],
             condition_tables=[condition_table],
             experiment_tables=[experiment_table],
             observable_tables=[observable_table],
@@ -1391,6 +1382,39 @@ def get_problem(problem: str | Path | Problem) -> Problem:
             "or a PEtab problem object."
         )
 
+    @property
+    def model(self) -> Model | None:
+        """The model of the problem.
+
+        This is a convenience property for `Problem`s with only one single
+        model.
+
+        :return:
+            The model of the problem, or None if no model is defined.
+        :raises:
+            ValueError: If the problem has more than one model defined.
+        """
+        if len(self.models) == 1:
+            return self.models[0]
+
+        if len(self.models) == 0:
+            return None
+
+        raise ValueError(
+            "Problem contains more than one model. "
+            "Use `Problem.models` to access all models."
+        )
+
+    @model.setter
+    def model(self, value: Model):
+        """Set the model of the problem.
+
+        This is a convenience setter for `Problem`s with only one single
+        model. This will replace any existing models in the problem with the
+        provided model.
+        """
+        self.models = [value]
+
     @property
     def condition_df(self) -> pd.DataFrame | None:
         """Combined condition tables as DataFrame."""
@@ -1745,6 +1769,7 @@ def validate(
         )
 
         validation_results = ValidationResultList()
+
         if self.config and self.config.extensions:
             extensions = ",".join(self.config.extensions.keys())
             validation_results.append(
@@ -1756,6 +1781,19 @@ def validate(
                 )
             )
 
+        if len(self.models) > 1:
+            # TODO https://github.com/PEtab-dev/libpetab-python/issues/392
+            #  We might just want to split the problem into multiple
+            #  problems, one for each model, and then validate each
+            #  problem separately.
+            validation_results.append(
+                ValidationIssue(
+                    ValidationIssueSeverity.WARNING,
+                    "Problem contains multiple models. "
+                    "Validation is not yet fully supported.",
+                )
+            )
+
         for task in validation_tasks or self.validation_tasks:
             try:
                 cur_result = task.run(self)
@@ -2043,7 +2081,7 @@ def model_dump(self, **kwargs) -> dict[str, Any]:
         used for serialization. The output of this function may change
         without notice.
 
-        The output includes all PEtab tables, but not the model itself.
+        The output includes all PEtab tables, but not the models.
 
         See `pydantic.BaseModel.model_dump <https://docs.pydantic.dev/latest/api/base_model/#pydantic.BaseModel.model_dump>`__
         for details.
diff --git a/petab/v2/lint.py b/petab/v2/lint.py
@@ -769,6 +769,10 @@ def run(self, problem: Problem) -> ValidationIssue | None:
         return None
 
 
+# TODO: check that Measurements model IDs match the available ones
+#  https://github.com/PEtab-dev/libpetab-python/issues/392
+
+
 def get_valid_parameters_for_parameter_table(
     problem: Problem,
 ) -> set[str]:
diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py
@@ -28,6 +28,7 @@
     UPPER_BOUND,
 )
 from petab.v2.core import *
+from petab.v2.models.sbml_model import SbmlModel
 from petab.v2.petab1to2 import petab1to2
 
 example_dir_fujita = Path(__file__).parents[2] / "doc/example/example_Fujita"
@@ -335,10 +336,16 @@ def test_problem_from_yaml_multiple_files():
     yaml_config = """
     format_version: 2.0.0
     parameter_files: []
+    model_files:
+        model1:
+            location: model1.xml
+            language: sbml
+        model2:
+            location: model2.xml
+            language: sbml
     condition_files: [conditions1.tsv, conditions2.tsv]
     measurement_files: [measurements1.tsv, measurements2.tsv]
     observable_files: [observables1.tsv, observables2.tsv]
-    model_files: {}
     experiment_files: [experiments1.tsv, experiments2.tsv]
     """
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -347,6 +354,10 @@ def test_problem_from_yaml_multiple_files():
             f.write(yaml_config)
 
         for i in (1, 2):
+            SbmlModel.from_antimony("a = 1;").to_file(
+                Path(tmpdir, f"model{i}.xml")
+            )
+
             problem = Problem()
             problem.add_condition(f"condition{i}", parameter1=i)
             petab.write_condition_df(
@@ -375,6 +386,7 @@ def test_problem_from_yaml_multiple_files():
         petab_problem2 = petab.Problem.from_yaml(yaml_config, base_path=tmpdir)
 
     for petab_problem in (petab_problem1, petab_problem2):
+        assert len(petab_problem.models) == 2
         assert petab_problem.measurement_df.shape[0] == 2
         assert petab_problem.observable_df.shape[0] == 2
         assert petab_problem.condition_df.shape[0] == 2