diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 7d10fd310..fe396968f 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -42,6 +42,7 @@
 /pypesto/select/ @dilpath
 /pypesto/startpoint/ @PaulJonasJost
 /pypesto/store/ @PaulJonasJost
+/pypesto/visualize/ @stephanmg
 
 # Tests
 /test/base/ @PaulJonasJost @vwiela
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4db56ab55..72d27d7f3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -55,10 +55,10 @@ jobs:
         CXX: clang++
 
     - name: Coverage
-      uses: codecov/codecov-action@v4
+      uses: codecov/codecov-action@v5
       with:
         token: ${{ secrets.CODECOV_TOKEN }}
-        file: ./coverage.xml
+        files: ./coverage.xml
 
   mac:
     runs-on: macos-13  # TODO: change to macos-latest after the next release
@@ -91,10 +91,10 @@ jobs:
       run: ulimit -n 65536 65536 && tox -e base
 
     - name: Coverage
-      uses: codecov/codecov-action@v4
+      uses: codecov/codecov-action@v5
       with:
         token: ${{ secrets.CODECOV_TOKEN }}
-        file: ./coverage.xml
+        files: ./coverage.xml
 
   windows:
     runs-on: windows-latest
@@ -157,16 +157,16 @@ jobs:
 
     - name: Run tests
       timeout-minutes: 35
-      run: tox -e petab
+      run: tox -e petab && tox e -e petab -- pip uninstall -y amici
       env:
         CC: clang
         CXX: clang++
 
     - name: Coverage
-      uses: codecov/codecov-action@v4
+      uses: codecov/codecov-action@v5
       with:
         token: ${{ secrets.CODECOV_TOKEN }}
-        file: ./coverage.xml
+        files: ./coverage.xml
 
   julia:
     runs-on: ubuntu-latest
@@ -214,10 +214,10 @@ jobs:
       run: tox -e julia
 
     - name: Coverage
-      uses: codecov/codecov-action@v4
+      uses: codecov/codecov-action@v5
       with:
         token: ${{ secrets.CODECOV_TOKEN }}
-        file: ./coverage.xml
+        files: ./coverage.xml
 
   optimize:
     runs-on: ubuntu-latest
@@ -250,10 +250,10 @@ jobs:
       run: tox -e optimize
 
     - name: Coverage
-      uses: codecov/codecov-action@v4
+      uses: codecov/codecov-action@v5
       with:
         token: ${{ secrets.CODECOV_TOKEN }}
-        file: ./coverage.xml
+        files: ./coverage.xml
 
   hierarchical:
     runs-on: ubuntu-latest
@@ -286,10 +286,10 @@ jobs:
       run: tox -e hierarchical
 
     - name: Coverage
-      uses: codecov/codecov-action@v4
+      uses: codecov/codecov-action@v5
       with:
         token: ${{ secrets.CODECOV_TOKEN }}
-        file: ./coverage.xml
+        files: ./coverage.xml
 
   select:
     runs-on: ubuntu-latest
@@ -322,10 +322,10 @@ jobs:
       run: tox -e select
 
     - name: Coverage
-      uses: codecov/codecov-action@v4
+      uses: codecov/codecov-action@v5
       with:
         token: ${{ secrets.CODECOV_TOKEN }}
-        file: ./coverage.xml
+        files: ./coverage.xml
 
   quality:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/publish_dockerhub.yml b/.github/workflows/publish_dockerhub.yml
new file mode 100644
index 000000000..a98ad41fd
--- /dev/null
+++ b/.github/workflows/publish_dockerhub.yml
@@ -0,0 +1,32 @@
+name: Build and Push Docker Image
+
+on:
+  push:
+    branches:
+      - develop
+    paths:
+      - 'docker/**'
+      - '.github/workflows/docker-publish.yml'
+  workflow_dispatch:
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out the repository
+        uses: actions/checkout@v4
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and tag the Docker image
+        run: |
+          docker build -t stephanmg/pypesto:latest -f docker/Dockerfile .
+
+      - name: Push the Docker image to Docker Hub
+        run: |
+          docker push stephanmg/pypesto:latest
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index c56d4b29e..a46a808e0 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -6,6 +6,56 @@ Release notes
 ..........
 
 
+0.5.5 (2025-01-10)
+-------------------
+
+- **Breaking Changes**
+  - **PETab select**: There are some deprecated features that will show up as warnings. In addition:
+
+    - The plotting methods ignore some arguments. You will need to reimplement these with the newer approach, which uses
+      plotting methods from the PEtab Select library instead. See the model selection notebook for examples.
+
+    - All objects containing multiple models (e.g., dictionaries or lists) are now replaced by `petab_select.Models`,
+      which supports dictionary and list methods.
+
+      To convert your old list of models:
+
+      ```python
+      petab_select.Models(list_of_Model)
+      ```
+- General
+    - Exclude nlopt==2.9.0 from setup (#1519)
+    - Improve CI (#1521, #1523, #1532, #1536, #1508, #1544, #1531)
+    - Update references/documentation (#1506, #1491, #1516, #1543)
+    - **Docker Image** (#1083, #1538)
+- Hierarchical
+    - Fix no error if inner observable parameter in noise formula & viceversa (#1504)
+    - Remove inner datas from relative calculator (#1505)
+    - Fix not scaling inner pars when applying to rdatas (#1534)
+- Optimization
+    - ESSOptimizer: Fix priority for local search startpoints (#1503)
+    - Fix NLoptOptimizer.__repr__ (#1518)
+    - Improve exception-handling in SacessOptimizer (#1517)
+    - Fix ESSOptimizer min of empty sequence (#1510)
+    - Don't modify sys.path for amici model imports (#1522)
+    - Set OptimizerResult.optimizer in Optimizer.minimize (#1525)
+    - SacessOptimizer: More efficient saving of intermediate results (#1529)
+- Objective
+    - AmiciObjective/PEtab import: Fix plist (#1493)
+    - PEtab: Fix warning from fill_in_parameters with fixed parameters (#1509)
+    - Amici: Fix handling of PEtab fixed parameters (#1514)
+    - Fix get_parameter_prior_dict docstring (#1537)
+- Select
+    - Support user-provided calibration results (#1338)
+    - Problem-specific minimize method for SaCeSS (#1339)
+    - Update for the latest PEtab Select version; see example notebook or the PEtab Select repo (#1530)
+- Storage
+    - Enable writing Optimize(r)Result directly in Writer (#1528)
+    - Update parameter scale storage (#1542
+- Visualize
+    - Fix flatten of observable mapping with one observable (#1515)
+
+
 0.5.4 (2024-10-19)
 -------------------
 
diff --git a/doc/example/model_selection.ipynb b/doc/example/model_selection.ipynb
index f31338dc7..5d1ef6800 100644
--- a/doc/example/model_selection.ipynb
+++ b/doc/example/model_selection.ipynb
@@ -105,28 +105,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from petab_select import VIRTUAL_INITIAL_MODEL\n",
-    "\n",
-    "\n",
-    "# Helpers for plotting etc.\n",
-    "def get_labels(models):\n",
-    "    labels = {\n",
-    "        model.get_hash(): str(model.model_subspace_id)\n",
-    "        for model in models\n",
-    "        if model != VIRTUAL_INITIAL_MODEL\n",
-    "    }\n",
-    "    return labels\n",
-    "\n",
-    "\n",
-    "def get_digraph_labels(models, criterion):\n",
-    "    zero = min(model.get_criterion(criterion) for model in models)\n",
-    "    labels = {\n",
-    "        model.get_hash(): f\"{model.model_subspace_id}\\n{model.get_criterion(criterion) - zero:.2f}\"\n",
-    "        for model in models\n",
-    "    }\n",
-    "    return labels\n",
-    "\n",
-    "\n",
     "# Disable some logged messages that make the model selection log more\n",
     "# difficult to read.\n",
     "import tqdm\n",
@@ -190,16 +168,18 @@
    "outputs": [],
    "source": [
     "# Reduce notebook runtime\n",
-    "minimize_options = {\n",
-    "    \"n_starts\": 10,\n",
-    "    \"filename\": None,\n",
-    "    \"progress_bar\": False,\n",
+    "model_problem_options = {\n",
+    "    \"minimize_options\": {\n",
+    "        \"n_starts\": 10,\n",
+    "        \"filename\": None,\n",
+    "        \"progress_bar\": False,\n",
+    "    },\n",
     "}\n",
     "\n",
     "best_model_1, _ = pypesto_select_problem_1.select(\n",
     "    method=Method.FORWARD,\n",
     "    criterion=Criterion.AIC,\n",
-    "    minimize_options=minimize_options,\n",
+    "    model_problem_options=model_problem_options,\n",
     ")"
    ]
   },
@@ -221,7 +201,7 @@
     "best_model_2, _ = pypesto_select_problem_1.select(\n",
     "    method=Method.FORWARD,\n",
     "    criterion=Criterion.AIC,\n",
-    "    minimize_options=minimize_options,\n",
+    "    model_problem_options=model_problem_options,\n",
     "    predecessor_model=best_model_1,\n",
     ")"
    ]
@@ -239,21 +219,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pypesto.visualize.select as pvs\n",
+    "import petab_select.plot as plot\n",
     "\n",
     "selected_models = [best_model_1, best_model_2]\n",
-    "ax = pvs.plot_selected_models(\n",
-    "    [best_model_1, best_model_2],\n",
-    "    criterion=Criterion.AIC,\n",
-    "    relative=False,\n",
-    "    labels=get_labels(selected_models),\n",
-    ")\n",
-    "ax = pvs.plot_selected_models(\n",
-    "    [best_model_1, best_model_2],\n",
+    "\n",
+    "plot_data = plot.PlotData(\n",
+    "    models=petab_select_problem.state.models,\n",
     "    criterion=Criterion.AIC,\n",
-    "    labels=get_labels(selected_models),\n",
+    "    relative_criterion=False,\n",
     ")\n",
-    "ax.plot();"
+    "plot.line_best_by_iteration(plot_data=plot_data)\n",
+    "\n",
+    "plot_data.relative_criterion = True\n",
+    "plot.line_best_by_iteration(plot_data=plot_data);"
    ]
   },
   {
@@ -262,13 +240,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pvs.plot_calibrated_models_digraph(\n",
-    "    problem=pypesto_select_problem_1,\n",
-    "    labels=get_digraph_labels(\n",
-    "        pypesto_select_problem_1.calibrated_models.values(),\n",
-    "        criterion=Criterion.AIC,\n",
-    "    ),\n",
-    ");"
+    "plot_data = plot.PlotData(\n",
+    "    models=petab_select_problem.state.models,\n",
+    "    criterion=Criterion.AIC,\n",
+    ")\n",
+    "plot_data.augment_labels(criterion=True)\n",
+    "plot.graph_history(plot_data=plot_data);"
    ]
   },
   {
@@ -290,15 +267,22 @@
     "import numpy as np\n",
     "from petab_select import Model\n",
     "\n",
-    "petab_select_problem.model_space.reset_exclusions()\n",
+    "# The PEtab Select problem contains information about calibrated models,\n",
+    "# so that they are not recalibrated. In order to do a completely new\n",
+    "# model selection, one can reload the original PEtab Select problem.\n",
+    "petab_select_problem = petab_select.Problem.from_yaml(\n",
+    "    \"model_selection/petab_select_problem.yaml\"\n",
+    ")\n",
     "pypesto_select_problem_2 = pypesto.select.Problem(\n",
     "    petab_select_problem=petab_select_problem\n",
     ")\n",
     "\n",
-    "petab_yaml = \"model_selection/example_modelSelection.yaml\"\n",
+    "model_subspace_petab_yaml = \"model_selection/example_modelSelection.yaml\"\n",
     "initial_model = Model(\n",
     "    model_id=\"myModel\",\n",
-    "    petab_yaml=petab_yaml,\n",
+    "    model_subspace_petab_yaml=model_subspace_petab_yaml,\n",
+    "    model_subspace_id=\"dummy_myModel\",\n",
+    "    model_subspace_indices=[0,0,0],\n",
     "    parameters={\n",
     "        \"k1\": 0.1,\n",
     "        \"k2\": ESTIMATE,\n",
@@ -321,7 +305,7 @@
     "    method=Method.BACKWARD,\n",
     "    criterion=Criterion.AIC,\n",
     "    predecessor_model=initial_model,\n",
-    "    minimize_options=minimize_options,\n",
+    "    model_problem_options=model_problem_options,\n",
     ");"
    ]
   },
@@ -331,18 +315,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "initial_model_label = {initial_model.get_hash(): initial_model.model_id}\n",
-    "\n",
-    "pvs.plot_calibrated_models_digraph(\n",
-    "    problem=pypesto_select_problem_2,\n",
-    "    labels={\n",
-    "        **get_digraph_labels(\n",
-    "            pypesto_select_problem_2.calibrated_models.values(),\n",
-    "            criterion=Criterion.AIC,\n",
-    "        ),\n",
-    "        **initial_model_label,\n",
-    "    },\n",
-    ");"
+    "plot_data = plot.PlotData(\n",
+    "    models=petab_select_problem.state.models,\n",
+    "    criterion=Criterion.AIC,\n",
+    ")\n",
+    "plot_data.augment_labels(criterion=True)\n",
+    "plot.graph_history(plot_data=plot_data);"
    ]
   },
   {
@@ -374,7 +352,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "petab_select_problem.model_space.reset_exclusions()\n",
+    "petab_select_problem = petab_select.Problem.from_yaml(\n",
+    "    \"model_selection/petab_select_problem.yaml\"\n",
+    ")\n",
     "pypesto_select_problem_3 = pypesto.select.Problem(\n",
     "    petab_select_problem=petab_select_problem\n",
     ")\n",
@@ -383,7 +363,7 @@
     "    criterion=Criterion.BIC,\n",
     "    select_first_improvement=True,\n",
     "    startpoint_latest_mle=True,\n",
-    "    minimize_options=minimize_options,\n",
+    "    model_problem_options=model_problem_options,\n",
     ")"
    ]
   },
@@ -393,11 +373,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pvs.plot_selected_models(\n",
-    "    selected_models=best_models,\n",
+    "plot_data = plot.PlotData(\n",
+    "    petab_select_problem.state.models,\n",
     "    criterion=Criterion.BIC,\n",
-    "    labels=get_labels(best_models),\n",
-    ");"
+    ")\n",
+    "plot.line_best_by_iteration(plot_data=plot_data);"
    ]
   },
   {
@@ -406,15 +386,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pvs.plot_calibrated_models_digraph(\n",
-    "    problem=pypesto_select_problem_3,\n",
-    "    criterion=Criterion.BIC,\n",
-    "    relative=False,\n",
-    "    labels=get_digraph_labels(\n",
-    "        pypesto_select_problem_3.calibrated_models.values(),\n",
-    "        criterion=Criterion.BIC,\n",
-    "    ),\n",
-    ");"
+    "plot_data.relative_criterion = False\n",
+    "plot_data.augment_labels(criterion=True)\n",
+    "plot.graph_history(plot_data=plot_data);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Postprocessors\n",
+    "\n",
+    "You can optionally provide postprocessing methods that operate on calibrated models. For example, there are postprocessors to save the pyPESTO result to disk, or save a summarized report of models to a TSV file. You can also write your own method, which should take only a single `pypesto.select.ModelProblem` object as input."
    ]
   },
   {
@@ -424,17 +407,42 @@
    "outputs": [],
    "source": [
     "# Repeat with AICc and criterion_threshold == 10\n",
-    "petab_select_problem.model_space.reset_exclusions()\n",
+    "petab_select_problem = petab_select.Problem.from_yaml(\n",
+    "    \"model_selection/petab_select_problem.yaml\"\n",
+    ")\n",
     "pypesto_select_problem_4 = pypesto.select.Problem(\n",
     "    petab_select_problem=petab_select_problem\n",
-    ")\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Setup a postprocessor that saves the pyPESTO result objects to disk, as well as a summarized report.\n",
+    "from functools import partial\n",
+    "from pypesto.select.postprocessors import save_postprocessor, report_postprocessor, multi_postprocessor\n",
+    "from pathlib import Path\n",
+    "import shutil\n",
+    "output_path = Path() / \"output_select\"\n",
+    "if output_path.exists():\n",
+    "    shutil.rmtree(str(output_path))\n",
+    "output_path.mkdir(exist_ok=True, parents=True)\n",
+    "model_postprocessor = partial(multi_postprocessor, postprocessors=[\n",
+    "    partial(save_postprocessor, output_path=output_path),\n",
+    "    partial(report_postprocessor, output_filepath=output_path / \"report.tsv\"),\n",
+    "])\n",
+    "\n",
     "best_models = pypesto_select_problem_4.select_to_completion(\n",
     "    method=Method.FORWARD,\n",
     "    criterion=Criterion.AICC,\n",
     "    select_first_improvement=True,\n",
     "    startpoint_latest_mle=True,\n",
-    "    minimize_options=minimize_options,\n",
+    "    model_problem_options=model_problem_options,\n",
     "    criterion_threshold=10,\n",
+    "    model_postprocessor=model_postprocessor,\n",
     ")"
    ]
   },
@@ -444,11 +452,41 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pvs.plot_selected_models(\n",
-    "    selected_models=best_models,\n",
+    "df = pd.read_csv(output_path / \"report.tsv\", sep=\"\\t\")\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_data = plot.PlotData(\n",
+    "    models=petab_select_problem.state.models,\n",
     "    criterion=Criterion.AICC,\n",
-    "    labels=get_labels(best_models),\n",
-    ");"
+    ")\n",
+    "plot.line_best_by_iteration(plot_data=plot_data);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_data.relative_criterion = False\n",
+    "plot_data.augment_labels(criterion=True)\n",
+    "plot.graph_history(plot_data=plot_data);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Saving results\n",
+    "\n",
+    "Although pyPESTO results and summaries can be saved to disk via the postprocessors, most analysis methods will expect a `petab_select.Models` object, which is created during model selection and can also be saved to disk for later analysis."
    ]
   },
   {
@@ -457,14 +495,42 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pvs.plot_calibrated_models_digraph(\n",
-    "    problem=pypesto_select_problem_4,\n",
+    "petab_select_problem.state.models.to_yaml(output_path / \"my_models.yaml\")\n",
+    "loaded_models = petab_select.Models.from_yaml(output_path / \"my_models.yaml\")\n",
+    "\n",
+    "plot_data = plot.PlotData(\n",
+    "    models=loaded_models,\n",
     "    criterion=Criterion.AICC,\n",
-    "    relative=False,\n",
-    "    labels=get_digraph_labels(\n",
-    "        pypesto_select_problem_4.calibrated_models.values(),\n",
-    "        criterion=Criterion.AICC,\n",
-    "    ),\n",
+    "    relative_criterion=False\n",
+    ")\n",
+    "plot_data.augment_labels(criterion=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Additional plotting methods\n",
+    "\n",
+    "There are additional plotting methods provided by the PEtab Select library, which are demonstrated in another notebook: https://petab-select.readthedocs.io/en/stable/examples/visualization.html\n",
+    "\n",
+    "An example is this ordering of models according to the iteration in which they were calibrated. N.B. The predecessor of M1_7 is actually M1_1."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot.graph_iteration_layers(\n",
+    "    plot_data=plot_data,\n",
+    "    draw_networkx_kwargs={\n",
+    "        \"arrowstyle\": \"-|>\",\n",
+    "        \"node_shape\": \"s\",\n",
+    "        \"node_size\": 4000,\n",
+    "        \"edgecolors\": \"k\",\n",
+    "    },\n",
     ");"
    ]
   },
@@ -482,14 +548,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "petab_select_problem.model_space.reset_exclusions()\n",
+    "petab_select_problem = petab_select.Problem.from_yaml(\n",
+    "    \"model_selection/petab_select_problem.yaml\"\n",
+    ")\n",
     "pypesto_select_problem_5 = pypesto.select.Problem(\n",
     "    petab_select_problem=petab_select_problem\n",
     ")\n",
     "\n",
     "initial_model_1 = Model(\n",
     "    model_id=\"myModel1\",\n",
-    "    petab_yaml=petab_yaml,\n",
+    "    model_subspace_petab_yaml=model_subspace_petab_yaml,\n",
+    "    model_subspace_id=\"dummy_myModel1\",\n",
+    "    model_subspace_indices=[0,0,0],\n",
     "    parameters={\n",
     "        \"k1\": 0,\n",
     "        \"k2\": 0,\n",
@@ -500,7 +570,9 @@
     "\n",
     "initial_model_2 = Model(\n",
     "    model_id=\"myModel2\",\n",
-    "    petab_yaml=petab_yaml,\n",
+    "    model_subspace_petab_yaml=model_subspace_petab_yaml,\n",
+    "    model_subspace_id=\"dummy_myModel2\",\n",
+    "    model_subspace_indices=[0,0,0],\n",
     "    parameters={\n",
     "        \"k1\": ESTIMATE,\n",
     "        \"k2\": ESTIMATE,\n",
@@ -514,7 +586,7 @@
     "    method=Method.FORWARD,\n",
     "    criterion=Criterion.AIC,\n",
     "    predecessor_models=initial_models,\n",
-    "    minimize_options=minimize_options,\n",
+    "    model_problem_options=model_problem_options,\n",
     ")"
    ]
   },
@@ -524,23 +596,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "initial_model_labels = {\n",
-    "    initial_model.get_hash(): initial_model.model_id\n",
-    "    for initial_model in initial_models\n",
-    "}\n",
-    "\n",
-    "pvs.plot_calibrated_models_digraph(\n",
-    "    problem=pypesto_select_problem_5,\n",
+    "plot_data = plot.PlotData(\n",
+    "    models=petab_select_problem.state.models,\n",
     "    criterion=Criterion.AIC,\n",
-    "    relative=False,\n",
-    "    labels={\n",
-    "        **get_digraph_labels(\n",
-    "            pypesto_select_problem_5.calibrated_models.values(),\n",
-    "            criterion=Criterion.AIC,\n",
-    "        ),\n",
-    "        **initial_model_labels,\n",
-    "    },\n",
-    ");"
+    "    relative_criterion=False,\n",
+    ")\n",
+    "plot.graph_history(plot_data);"
    ]
   }
  ],
diff --git a/doc/example/model_selection/model_space.tsv b/doc/example/model_selection/model_space.tsv
index 42e3f1f54..d4eb7ec42 100644
--- a/doc/example/model_selection/model_space.tsv
+++ b/doc/example/model_selection/model_space.tsv
@@ -1,4 +1,4 @@
-model_subspace_id	petab_yaml	k1	k2	k3
+model_subspace_id	model_subspace_petab_yaml	k1	k2	k3
 M1_0	example_modelSelection.yaml	0	0	0
 M1_1	example_modelSelection.yaml	0.2	0.1	estimate
 M1_2	example_modelSelection.yaml	0.2	estimate	0
diff --git a/doc/using_pypesto.bib b/doc/using_pypesto.bib
index c4c85924d..40ca3247f 100644
--- a/doc/using_pypesto.bib
+++ b/doc/using_pypesto.bib
@@ -14,63 +14,67 @@ @Article{FalcoCoh2023
 }
 
 @Article{LakrisenkoSta2023,
-  author       = {Lakrisenko, Polina AND Stapor, Paul AND Grein, Stephan AND Paszkowski, Łukasz AND Pathirana, Dilan AND Fröhlich, Fabian AND Lines, Glenn Terje AND Weindl, Daniel AND Hasenauer, Jan},
-  journal      = {PLOS Computational Biology},
-  title        = {Efficient computation of adjoint sensitivities at steady-state in ODE models of biochemical reaction networks},
-  year         = {2023},
-  month        = {01},
-  number       = {1},
-  pages        = {1-19},
-  volume       = {19},
-  abstract     = {Dynamical models in the form of systems of ordinary differential equations have become a standard tool in systems biology. Many parameters of such models are usually unknown and have to be inferred from experimental data. Gradient-based optimization has proven to be effective for parameter estimation. However, computing gradients becomes increasingly costly for larger models, which are required for capturing the complex interactions of multiple biochemical pathways. Adjoint sensitivity analysis has been pivotal for working with such large models, but methods tailored for steady-state data are currently not available. We propose a new adjoint method for computing gradients, which is applicable if the experimental data include steady-state measurements. The method is based on a reformulation of the backward integration problem to a system of linear algebraic equations. The evaluation of the proposed method using real-world problems shows a speedup of total simulation time by a factor of up to 4.4. Our results demonstrate that the proposed approach can achieve a substantial improvement in computation time, in particular for large-scale models, where computational efficiency is critical.},
-  creationdate = {2023-01-26T11:19:52},
-  doi          = {10.1371/journal.pcbi.1010783},
-  publisher    = {Public Library of Science},
+  author           = {Lakrisenko, Polina and Stapor, Paul and Grein, Stephan and Paszkowski, Łukasz and Pathirana, Dilan and Fröhlich, Fabian and Lines, Glenn Terje and Weindl, Daniel and Hasenauer, Jan},
+  journal          = {PLOS Computational Biology},
+  title            = {Efficient computation of adjoint sensitivities at steady-state in {ODE} models of biochemical reaction networks},
+  year             = {2023},
+  month            = {01},
+  number           = {1},
+  pages            = {1-19},
+  volume           = {19},
+  abstract         = {Dynamical models in the form of systems of ordinary differential equations have become a standard tool in systems biology. Many parameters of such models are usually unknown and have to be inferred from experimental data. Gradient-based optimization has proven to be effective for parameter estimation. However, computing gradients becomes increasingly costly for larger models, which are required for capturing the complex interactions of multiple biochemical pathways. Adjoint sensitivity analysis has been pivotal for working with such large models, but methods tailored for steady-state data are currently not available. We propose a new adjoint method for computing gradients, which is applicable if the experimental data include steady-state measurements. The method is based on a reformulation of the backward integration problem to a system of linear algebraic equations. The evaluation of the proposed method using real-world problems shows a speedup of total simulation time by a factor of up to 4.4. Our results demonstrate that the proposed approach can achieve a substantial improvement in computation time, in particular for large-scale models, where computational efficiency is critical.},
+  creationdate     = {2023-01-26T11:19:52},
+  doi              = {10.1371/journal.pcbi.1010783},
+  modificationdate = {2024-11-08T18:28:07},
+  publisher        = {Public Library of Science},
 }
 
 @Article{SchmiesterSch2021,
-  author       = {Schmiester, Leonard AND Schälte, Yannik AND Bergmann, Frank T. AND Camba, Tacio AND Dudkin, Erika AND Egert, Janine AND Fröhlich, Fabian AND Fuhrmann, Lara AND Hauber, Adrian L. AND Kemmer, Svenja AND Lakrisenko, Polina AND Loos, Carolin AND Merkt, Simon AND Müller, Wolfgang AND Pathirana, Dilan AND Raimúndez, Elba AND Refisch, Lukas AND Rosenblatt, Marcus AND Stapor, Paul L. AND Städter, Philipp AND Wang, Dantong AND Wieland, Franz-Georg AND Banga, Julio R. AND Timmer, Jens AND Villaverde, Alejandro F. AND Sahle, Sven AND Kreutz, Clemens AND Hasenauer, Jan AND Weindl, Daniel},
-  journal      = {PLOS Computational Biology},
-  title        = {PEtab—Interoperable specification of parameter estimation problems in systems biology},
-  year         = {2021},
-  month        = {01},
-  number       = {1},
-  pages        = {1-10},
-  volume       = {17},
-  abstract     = {Author summary Parameter estimation is a common and crucial task in modeling, as many models depend on unknown parameters which need to be inferred from data. There exist various tools for tasks like model development, model simulation, optimization, or uncertainty analysis, each with different capabilities and strengths. In order to be able to easily combine tools in an interoperable manner, but also to make results accessible and reusable for other researchers, it is valuable to define parameter estimation problems in a standardized form. Here, we introduce PEtab, a parameter estimation problem definition format which integrates with established systems biology standards for model and data specification. As the novel format is already supported by eight software tools with hundreds of users in total, we expect it to be of great use and impact in the community, both for modeling and algorithm development.},
-  creationdate = {2023-01-26T11:30:40},
-  doi          = {10.1371/journal.pcbi.1008646},
-  publisher    = {Public Library of Science},
-  timestamp    = {2021-01-30},
+  author           = {Schmiester, Leonard and Schälte, Yannik and Bergmann, Frank T. and Camba, Tacio and Dudkin, Erika and Egert, Janine and Fröhlich, Fabian and Fuhrmann, Lara and Hauber, Adrian L. and Kemmer, Svenja and Lakrisenko, Polina and Loos, Carolin and Merkt, Simon and Müller, Wolfgang and Pathirana, Dilan and Raimúndez, Elba and Refisch, Lukas and Rosenblatt, Marcus and Stapor, Paul L. and Städter, Philipp and Wang, Dantong and Wieland, Franz-Georg and Banga, Julio R. and Timmer, Jens 2and Villaverde, Alejandro F. and Sahle, Sven and Kreutz, Clemens and Hasenauer, Jan and Weindl, Daniel},
+  journal          = {PLOS Computational Biology},
+  title            = {{PEtab}—Interoperable specification of parameter estimation problems in systems biology},
+  year             = {2021},
+  month            = {01},
+  number           = {1},
+  pages            = {1-10},
+  volume           = {17},
+  abstract         = {Author summary Parameter estimation is a common and crucial task in modeling, as many models depend on unknown parameters which need to be inferred from data. There exist various tools for tasks like model development, model simulation, optimization, or uncertainty analysis, each with different capabilities and strengths. In order to be able to easily combine tools in an interoperable manner, but also to make results accessible and reusable for other researchers, it is valuable to define parameter estimation problems in a standardized form. Here, we introduce PEtab, a parameter estimation problem definition format which integrates with established systems biology standards for model and data specification. As the novel format is already supported by eight software tools with hundreds of users in total, we expect it to be of great use and impact in the community, both for modeling and algorithm development.},
+  creationdate     = {2023-01-26T11:30:40},
+  doi              = {10.1371/journal.pcbi.1008646},
+  modificationdate = {2024-11-08T18:27:03},
+  publisher        = {Public Library of Science},
+  timestamp        = {2021-01-30},
 }
 
 @Article{MishraWan2023,
-  author       = {Shekhar Mishra and Ziyu Wang and Michael J. Volk and Huimin Zhao},
-  journal      = {Metabolic Engineering},
-  title        = {Design and application of a kinetic model of lipid metabolism in Saccharomyces cerevisiae},
-  year         = {2023},
-  issn         = {1096-7176},
-  pages        = {12-18},
-  volume       = {75},
-  abstract     = {Lipid biosynthesis plays a vital role in living cells and has been increasingly engineered to overproduce various lipid-based chemicals. However, owing to the tightly constrained and interconnected nature of lipid biosynthesis, both understanding and engineering of lipid metabolism remain challenging, even with the help of mathematical models. Here we report the development of a kinetic metabolic model of lipid metabolism in Saccharomyces cerevisiae that integrates fatty acid biosynthesis, glycerophospholipid metabolism, sphingolipid metabolism, storage lipids, lumped sterol synthesis, and the synthesis and transport of relevant target-chemicals, such as fatty acids and fatty alcohols. The model was trained on lipidomic data of a reference S. cerevisiae strain, single knockout mutants, and lipid overproduction strains reported in literature. The model was used to design mutants for fatty alcohol overproduction and the lipidomic analysis of the resultant mutant strains coupled with model-guided hypothesis led to discovery of a futile cycle in the triacylglycerol biosynthesis pathway. In addition, the model was used to explain successful and unsuccessful mutant designs in metabolic engineering literature. Thus, this kinetic model of lipid metabolism can not only enable the discovery of new phenomenon in lipid metabolism but also the engineering of mutant strains for overproduction of lipids.},
-  creationdate = {2023-01-26T11:31:17},
-  doi          = {https://doi.org/10.1016/j.ymben.2022.11.003},
-  keywords     = {Lipid metabolism, Kinetic model, Free fatty acid, Fatty alcohol},
+  author           = {Shekhar Mishra and Ziyu Wang and Michael J. Volk and Huimin Zhao},
+  journal          = {Metabolic Engineering},
+  title            = {Design and application of a kinetic model of lipid metabolism in Saccharomyces cerevisiae},
+  year             = {2023},
+  issn             = {1096-7176},
+  pages            = {12-18},
+  volume           = {75},
+  abstract         = {Lipid biosynthesis plays a vital role in living cells and has been increasingly engineered to overproduce various lipid-based chemicals. However, owing to the tightly constrained and interconnected nature of lipid biosynthesis, both understanding and engineering of lipid metabolism remain challenging, even with the help of mathematical models. Here we report the development of a kinetic metabolic model of lipid metabolism in Saccharomyces cerevisiae that integrates fatty acid biosynthesis, glycerophospholipid metabolism, sphingolipid metabolism, storage lipids, lumped sterol synthesis, and the synthesis and transport of relevant target-chemicals, such as fatty acids and fatty alcohols. The model was trained on lipidomic data of a reference S. cerevisiae strain, single knockout mutants, and lipid overproduction strains reported in literature. The model was used to design mutants for fatty alcohol overproduction and the lipidomic analysis of the resultant mutant strains coupled with model-guided hypothesis led to discovery of a futile cycle in the triacylglycerol biosynthesis pathway. In addition, the model was used to explain successful and unsuccessful mutant designs in metabolic engineering literature. Thus, this kinetic model of lipid metabolism can not only enable the discovery of new phenomenon in lipid metabolism but also the engineering of mutant strains for overproduction of lipids.},
+  creationdate     = {2023-01-26T11:31:17},
+  doi              = {10.1016/j.ymben.2022.11.003},
+  keywords         = {Lipid metabolism, Kinetic model, Free fatty acid, Fatty alcohol},
+  modificationdate = {2024-11-08T18:25:04},
 }
 
 @Article{FroehlichSor2022,
-  author       = {Fröhlich, Fabian AND Sorger, Peter K.},
-  journal      = {PLOS Computational Biology},
-  title        = {Fides: Reliable trust-region optimization for parameter estimation of ordinary differential equation models},
-  year         = {2022},
-  month        = {07},
-  number       = {7},
-  pages        = {1-28},
-  volume       = {18},
-  abstract     = {Ordinary differential equation (ODE) models are widely used to study biochemical reactions in cellular networks since they effectively describe the temporal evolution of these networks using mass action kinetics. The parameters of these models are rarely known a priori and must instead be estimated by calibration using experimental data. Optimization-based calibration of ODE models on is often challenging, even for low-dimensional problems. Multiple hypotheses have been advanced to explain why biochemical model calibration is challenging, including non-identifiability of model parameters, but there are few comprehensive studies that test these hypotheses, likely because tools for performing such studies are also lacking. Nonetheless, reliable model calibration is essential for uncertainty analysis, model comparison, and biological interpretation. We implemented an established trust-region method as a modular Python framework (fides) to enable systematic comparison of different approaches to ODE model calibration involving a variety of Hessian approximation schemes. We evaluated fides on a recently developed corpus of biologically realistic benchmark problems for which real experimental data are available. Unexpectedly, we observed high variability in optimizer performance among different implementations of the same mathematical instructions (algorithms). Analysis of possible sources of poor optimizer performance identified limitations in the widely used Gauss-Newton, BFGS and SR1 Hessian approximation schemes. We addressed these drawbacks with a novel hybrid Hessian approximation scheme that enhances optimizer performance and outperforms existing hybrid approaches. When applied to the corpus of test models, we found that fides was on average more reliable and efficient than existing methods using a variety of criteria. We expect fides to be broadly useful for ODE constrained optimization problems in biochemical models and to be a foundation for future methods development.},
-  creationdate = {2023-01-26T11:31:44},
-  doi          = {10.1371/journal.pcbi.1010322},
-  publisher    = {Public Library of Science},
+  author           = {Fröhlich, Fabian and Sorger, Peter K.},
+  journal          = {PLOS Computational Biology},
+  title            = {Fides: Reliable trust-region optimization for parameter estimation of ordinary differential equation models},
+  year             = {2022},
+  month            = {07},
+  number           = {7},
+  pages            = {1-28},
+  volume           = {18},
+  abstract         = {Ordinary differential equation (ODE) models are widely used to study biochemical reactions in cellular networks since they effectively describe the temporal evolution of these networks using mass action kinetics. The parameters of these models are rarely known a priori and must instead be estimated by calibration using experimental data. Optimization-based calibration of ODE models on is often challenging, even for low-dimensional problems. Multiple hypotheses have been advanced to explain why biochemical model calibration is challenging, including non-identifiability of model parameters, but there are few comprehensive studies that test these hypotheses, likely because tools for performing such studies are also lacking. Nonetheless, reliable model calibration is essential for uncertainty analysis, model comparison, and biological interpretation. We implemented an established trust-region method as a modular Python framework (fides) to enable systematic comparison of different approaches to ODE model calibration involving a variety of Hessian approximation schemes. We evaluated fides on a recently developed corpus of biologically realistic benchmark problems for which real experimental data are available. Unexpectedly, we observed high variability in optimizer performance among different implementations of the same mathematical instructions (algorithms). Analysis of possible sources of poor optimizer performance identified limitations in the widely used Gauss-Newton, BFGS and SR1 Hessian approximation schemes. We addressed these drawbacks with a novel hybrid Hessian approximation scheme that enhances optimizer performance and outperforms existing hybrid approaches. When applied to the corpus of test models, we found that fides was on average more reliable and efficient than existing methods using a variety of criteria. We expect fides to be broadly useful for ODE constrained optimization problems in biochemical models and to be a foundation for future methods development.},
+  creationdate     = {2023-01-26T11:31:44},
+  doi              = {10.1371/journal.pcbi.1010322},
+  modificationdate = {2024-11-08T18:20:34},
+  publisher        = {Public Library of Science},
 }
 
 @Article{FroehlichGer2022,
@@ -242,7 +246,7 @@ @Article{ArrudaSch2023
 @Article{MerktAli2024,
   author           = {Merkt, Simon and Ali, Solomon and Gudina, Esayas Kebede and Adissu, Wondimagegn and Gize, Addisu and Muenchhoff, Maximilian and Graf, Alexander and Krebs, Stefan and Elsbernd, Kira and Kisch, Rebecca and Betizazu, Sisay Sirgu and Fantahun, Bereket and Bekele, Delayehu and Rubio-Acero, Raquel and Gashaw, Mulatu and Girma, Eyob and Yilma, Daniel and Zeynudin, Ahmed and Paunovic, Ivana and Hoelscher, Michael and Blum, Helmut and Hasenauer, Jan and Kroidl, Arne and Wieser, Andreas},
   journal          = {Nature Communications},
-  title            = {Long-term monitoring of SARS-CoV-2 seroprevalence and variants in Ethiopia provides prediction for immunity and cross-immunity},
+  title            = {Long-term monitoring of {SARS-CoV-2} seroprevalence and variants in {Ethiopia} provides prediction for immunity and cross-immunity},
   year             = {2024},
   issn             = {2041-1723},
   month            = apr,
@@ -250,7 +254,7 @@ @Article{MerktAli2024
   volume           = {15},
   creationdate     = {2024-04-29T08:32:16},
   doi              = {10.1038/s41467-024-47556-2},
-  modificationdate = {2024-04-29T08:32:16},
+  modificationdate = {2024-11-08T18:27:48},
   publisher        = {Springer Science and Business Media LLC},
 }
 
@@ -282,29 +286,18 @@ @Article{HoepflAlb2024
   modificationdate = {2024-05-16T07:58:55},
 }
 
-@Misc{LakrisenkoPat2024,
-  author           = {Polina Lakrisenko and Dilan Pathirana and Daniel Weindl and Jan Hasenauer},
-  title            = {Exploration of methods for computing sensitivities in ODE models at dynamic and steady states},
-  year             = {2024},
-  archiveprefix    = {arXiv},
-  creationdate     = {2024-05-30T09:47:51},
-  eprint           = {2405.16524},
-  modificationdate = {2024-05-30T09:47:51},
-  primaryclass     = {q-bio.QM},
-}
-
 @Article{PhilippsKoe2024,
   author           = {Maren Philipps and Antonia Körner and Jakob Vanhoefer and Dilan Pathirana and Jan Hasenauer},
+  journal          = {IFAC-PapersOnLine},
   title            = {Non-Negative Universal Differential Equations With Applications in Systems Biology},
   year             = {2024},
-  journal          = {IFAC-PapersOnLine},
-  volume           = {58},
+  issn             = {2405-8963},
   number           = {23},
   pages            = {25-30},
-  issn             = {2405-8963},
-  doi              = {https://doi.org/10.1016/j.ifacol.2024.10.005},
-  url              = {https://www.sciencedirect.com/science/article/pii/S2405896324017518},
-  abstract         = {Universal differential equations (UDEs) leverage the respective advantages of mechanistic models and artificial neural networks and combine them into one dynamic model. However, these hybrid models can suffer from unrealistic solutions, such as negative values for biochemical quantities. We present non-negative UDE (nUDEs), a constrained UDE variant that guarantees non-negative values. Furthermore, we explore regularisation techniques to improve generalisation and interpretability of UDEs.}
+  volume           = {58},
+  abstract         = {Universal differential equations (UDEs) leverage the respective advantages of mechanistic models and artificial neural networks and combine them into one dynamic model. However, these hybrid models can suffer from unrealistic solutions, such as negative values for biochemical quantities. We present non-negative UDE (nUDEs), a constrained UDE variant that guarantees non-negative values. Furthermore, we explore regularisation techniques to improve generalisation and interpretability of UDEs.},
+  doi              = {10.1016/j.ifacol.2024.10.005},
+  modificationdate = {2024-11-08T18:25:20},
 }
 
 @Article{SchmiesterBra2024,
@@ -319,8 +312,7 @@ @Article{SchmiesterBra2024
   creationdate     = {2024-08-01T09:44:04},
   doi              = {10.1158/1078-0432.CCR-24-0244},
   eprint           = {https://aacrjournals.org/clincancerres/article-pdf/doi/10.1158/1078-0432.CCR-24-0244/3478451/ccr-24-0244.pdf},
-  modificationdate = {2024-08-01T09:44:04},
-  url              = {https://doi.org/10.1158/1078-0432.CCR-24-0244},
+  modificationdate = {2024-11-08T18:17:14},
 }
 
 @InProceedings{JacksonCha2023,
@@ -339,4 +331,35 @@ @InProceedings{JacksonCha2023
   modificationdate = {2024-09-06T15:49:47},
 }
 
+@Article{ArmisteadHoe2024,
+  author           = {Armistead, Joy and Höpfl, Sebastian and Goldhausen, Pierre and Müller-Hartmann, Andrea and Fahle, Evelin and Hatzold, Julia and Franzen, Rainer and Brodesser, Susanne and Radde, Nicole E. and Hammerschmidt, Matthias},
+  journal          = {Cell Death & Disease},
+  title            = {A sphingolipid rheostat controls apoptosis versus apical cell extrusion as alternative tumour-suppressive mechanisms},
+  year             = {2024},
+  issn             = {2041-4889},
+  month            = oct,
+  number           = {10},
+  volume           = {15},
+  creationdate     = {2024-10-17T16:30:11},
+  doi              = {10.1038/s41419-024-07134-2},
+  modificationdate = {2024-10-17T16:30:11},
+  publisher        = {Springer Science and Business Media LLC},
+}
+
+@Article{LakrisenkoPat2024,
+  author           = {Lakrisenko, Polina and Pathirana, Dilan and Weindl, Daniel and Hasenauer, Jan},
+  journal          = {PLOS ONE},
+  title            = {Benchmarking methods for computing local sensitivities in ordinary differential equation models at dynamic and steady states},
+  year             = {2024},
+  month            = {10},
+  number           = {10},
+  pages            = {1-19},
+  volume           = {19},
+  abstract         = {Estimating parameters of dynamic models from experimental data is a challenging, and often computationally-demanding task. It requires a large number of model simulations and objective function gradient computations, if gradient-based optimization is used. In many cases, steady-state computation is a part of model simulation, either due to steady-state data or an assumption that the system is at steady state at the initial time point. Various methods are available for steady-state and gradient computation. Yet, the most efficient pair of methods (one for steady states, one for gradients) for a particular model is often not clear. In order to facilitate the selection of methods, we explore six method pairs for computing the steady state and sensitivities at steady state using six real-world problems. The method pairs involve numerical integration or Newton’s method to compute the steady-state, and—for both forward and adjoint sensitivity analysis—numerical integration or a tailored method to compute the sensitivities at steady-state. Our evaluation shows that all method pairs provide accurate steady-state and gradient values, and that the two method pairs that combine numerical integration for the steady-state with a tailored method for the sensitivities at steady-state were the most robust, and amongst the most computationally-efficient. We also observed that while Newton’s method for steady-state computation yields a substantial speedup compared to numerical integration, it may lead to a large number of simulation failures. Overall, our study provides a concise overview across current methods for computing sensitivities at steady state. While our study shows that there is no universally-best method pair, it also provides guidance to modelers in choosing the right methods for a problem at hand.},
+  creationdate     = {2024-11-08T18:16:35},
+  doi              = {10.1371/journal.pone.0312148},
+  modificationdate = {2024-11-08T18:17:06},
+  publisher        = {Public Library of Science},
+}
+
 @Comment{jabref-meta: databaseType:bibtex;}
diff --git a/pypesto/C.py b/pypesto/C.py
index 5a0e7438a..e79c90717 100644
--- a/pypesto/C.py
+++ b/pypesto/C.py
@@ -93,7 +93,6 @@ class EnsembleType(Enum):
 # HIERARCHICAL SCALING + OFFSET
 
 INNER_PARAMETERS = "inner_parameters"
-INNER_RDATAS = "inner_rdatas"
 PARAMETER_TYPE = "parameterType"
 RELATIVE = "relative"
 
diff --git a/pypesto/hierarchical/petab.py b/pypesto/hierarchical/petab.py
index c1b85b3be..dde1d0dad 100644
--- a/pypesto/hierarchical/petab.py
+++ b/pypesto/hierarchical/petab.py
@@ -76,63 +76,63 @@ def validate_hierarchical_petab_problem(petab_problem: petab.Problem) -> None:
     petab_problem:
         The PEtab problem.
     """
-    if PARAMETER_TYPE not in petab_problem.parameter_df:
-        # not a hierarchical optimization problem
-        return
-
-    # ensure we only have linear parameter scale
-    inner_parameter_table = petab_problem.parameter_df[
-        petab_problem.parameter_df[PARAMETER_TYPE].isin(
-            [
-                InnerParameterType.OFFSET,
-                InnerParameterType.SIGMA,
-                InnerParameterType.SCALING,
-            ]
-        )
-    ]
-    if (
-        petab.PARAMETER_SCALE in inner_parameter_table
-        and not (
-            inner_parameter_table[petab.PARAMETER_SCALE].isna()
-            | (inner_parameter_table[petab.PARAMETER_SCALE] == petab.LIN)
-            | (
-                inner_parameter_table[PARAMETER_TYPE]
-                != InnerParameterType.SIGMA
+    if PARAMETER_TYPE in petab_problem.parameter_df:
+        # ensure we only have linear parameter scale
+        inner_parameter_table = petab_problem.parameter_df[
+            petab_problem.parameter_df[PARAMETER_TYPE].isin(
+                [
+                    InnerParameterType.OFFSET,
+                    InnerParameterType.SIGMA,
+                    InnerParameterType.SCALING,
+                ]
             )
-        ).all()
-    ):
-        sub_df = inner_parameter_table.loc[
-            :, [PARAMETER_TYPE, petab.PARAMETER_SCALE]
         ]
-        raise NotImplementedError(
-            "LOG and LOG10 parameter scale of inner parameters is not supported "
-            "for sigma parameters. Inner parameter table:\n"
-            f"{sub_df}"
-        )
-    elif (
-        petab.PARAMETER_SCALE in inner_parameter_table
-        and not (
-            inner_parameter_table[petab.PARAMETER_SCALE].isna()
-            | (inner_parameter_table[petab.PARAMETER_SCALE] == petab.LIN)
-        ).all()
-    ):
-        sub_df = inner_parameter_table.loc[
-            :, [PARAMETER_TYPE, petab.PARAMETER_SCALE]
-        ]
-        warnings.warn(
-            f"LOG and LOG10 parameter scale of inner parameters is used only "
-            f"for their visualization, and does not affect their optimization. "
-            f"Inner parameter table:\n{sub_df}",
-            stacklevel=1,
-        )
+        if (
+            petab.PARAMETER_SCALE in inner_parameter_table
+            and not (
+                inner_parameter_table[petab.PARAMETER_SCALE].isna()
+                | (inner_parameter_table[petab.PARAMETER_SCALE] == petab.LIN)
+                | (
+                    inner_parameter_table[PARAMETER_TYPE]
+                    != InnerParameterType.SIGMA
+                )
+            ).all()
+        ):
+            sub_df = inner_parameter_table.loc[
+                :, [PARAMETER_TYPE, petab.PARAMETER_SCALE]
+            ]
+            raise NotImplementedError(
+                "LOG and LOG10 parameter scale of inner parameters is not supported "
+                "for sigma parameters. Inner parameter table:\n"
+                f"{sub_df}"
+            )
+        elif (
+            petab.PARAMETER_SCALE in inner_parameter_table
+            and not (
+                inner_parameter_table[petab.PARAMETER_SCALE].isna()
+                | (inner_parameter_table[petab.PARAMETER_SCALE] == petab.LIN)
+            ).all()
+        ):
+            sub_df = inner_parameter_table.loc[
+                :, [PARAMETER_TYPE, petab.PARAMETER_SCALE]
+            ]
+            warnings.warn(
+                f"LOG and LOG10 parameter scale of inner parameters is used only "
+                f"for their visualization, and does not affect their optimization. "
+                f"Inner parameter table:\n{sub_df}",
+                stacklevel=1,
+            )
 
-    inner_parameter_df = validate_measurement_formulae(
-        petab_problem=petab_problem
-    )
+        inner_parameter_df = validate_measurement_formulae(
+            petab_problem=petab_problem
+        )
 
-    validate_inner_parameter_pairings(inner_parameter_df=inner_parameter_df)
+        validate_inner_parameter_pairings(
+            inner_parameter_df=inner_parameter_df
+        )
 
-    validate_observable_data_types(petab_problem=petab_problem)
+    if MEASUREMENT_TYPE in petab_problem.measurement_df:
+        validate_observable_data_types(petab_problem=petab_problem)
 
 
 def validate_inner_parameter_pairings(
@@ -477,6 +477,18 @@ def _get_symbolic_formula_from_measurement(
         observable_id=observable_id,
         override_type=formula_type,
     )
+    # Search for placeholders in the formula that are not allowed to be
+    # overridden by inner parameters -- noise inner parameters should not
+    # be in observable formulas, and vice versa.
+    disallowed_formula_type = (
+        "noise" if formula_type == "observable" else "observable"
+    )
+    disallowed_formula_placeholders = get_formula_placeholders(
+        formula_string=formula_string,
+        observable_id=observable_id,
+        override_type=disallowed_formula_type,
+    )
+
     if formula_placeholders:
         overrides = measurement[formula_type + "Parameters"]
         overrides = (
@@ -487,6 +499,20 @@ def _get_symbolic_formula_from_measurement(
         subs = dict(zip(formula_placeholders, overrides))
         symbolic_formula = symbolic_formula.subs(subs)
 
+    if disallowed_formula_placeholders:
+        disallowed_overrides = measurement[
+            disallowed_formula_type + "Parameters"
+        ]
+        disallowed_overrides = (
+            disallowed_overrides.split(PARAMETER_SEPARATOR)
+            if isinstance(disallowed_overrides, str)
+            else [disallowed_overrides]
+        )
+        disallowed_subs = dict(
+            zip(disallowed_formula_placeholders, disallowed_overrides)
+        )
+        symbolic_formula = symbolic_formula.subs(disallowed_subs)
+
     symbolic_formula_inner_parameters = {
         sp.Symbol(inner_parameter_id): inner_parameter_type
         for inner_parameter_id, inner_parameter_type in inner_parameters.items()
@@ -614,7 +640,10 @@ def validate_observable_data_types(petab_problem: petab.Problem) -> None:
             other_data_type,
             other_observables,
         ) in observables_by_data_type.items():
-            if data_type == other_data_type:
+            if data_type == other_data_type or (
+                data_type in CENSORING_TYPES
+                and other_data_type in CENSORING_TYPES
+            ):
                 continue
             if observables & other_observables:
                 raise ValueError(
diff --git a/pypesto/hierarchical/relative/calculator.py b/pypesto/hierarchical/relative/calculator.py
index cfd74a42f..ceec8ef76 100644
--- a/pypesto/hierarchical/relative/calculator.py
+++ b/pypesto/hierarchical/relative/calculator.py
@@ -21,7 +21,6 @@
     GRAD,
     HESS,
     INNER_PARAMETERS,
-    INNER_RDATAS,
     RDATAS,
     RES,
     SRES,
@@ -365,6 +364,6 @@ def calculate_directly(
             rdatas=rdatas,
             inner_parameters=inner_parameters,
         )
-        inner_result[INNER_RDATAS] = rdatas
+        inner_result[RDATAS] = rdatas
 
         return inner_result, inner_parameters
diff --git a/pypesto/hierarchical/relative/solver.py b/pypesto/hierarchical/relative/solver.py
index 930fdeba2..6ee1e2612 100644
--- a/pypesto/hierarchical/relative/solver.py
+++ b/pypesto/hierarchical/relative/solver.py
@@ -238,6 +238,8 @@ def apply_inner_parameters_to_rdatas(
         """
         sim = [rdata["y"] for rdata in rdatas]
         sigma = [rdata["sigmay"] for rdata in rdatas]
+        inner_parameters = copy.deepcopy(inner_parameters)
+        inner_parameters = scale_back_value_dict(inner_parameters, problem)
 
         # apply offsets, scalings and sigmas
         for x in problem.get_xs_for_type(InnerParameterType.SCALING):
diff --git a/pypesto/objective/amici/amici.py b/pypesto/objective/amici/amici.py
index 3af3f7023..d9fd371a3 100644
--- a/pypesto/objective/amici/amici.py
+++ b/pypesto/objective/amici/amici.py
@@ -178,7 +178,16 @@ def __init__(
             parameter_mapping = create_identity_parameter_mapping(
                 amici_model, len(edatas)
             )
+        # parameter mapping where IDs of the currently fixed parameters
+        #  have been replaced by their respective values
+        #  (relevant for setting ``plist`` in ExpData later on)
         self.parameter_mapping = parameter_mapping
+        # parameter mapping independent of fixed parameters
+        #  (i.e., all objective parameters are included as parameter IDs,
+        #  not as their values)
+        self._parameter_mapping_full = copy.deepcopy(parameter_mapping)
+        # IDs of fixed `Problem` parameters
+        self._fixed_parameter_ids = []
 
         # If supported, enable `guess_steadystate` by default. If not
         #  supported, disable by default. If requested but unsupported, raise.
@@ -474,8 +483,16 @@ def call_unprocessed(
             edatas = self.edatas
         if parameter_mapping is None:
             parameter_mapping = self.parameter_mapping
+        # Some parameters may appear estimated in the original compiled model,
+        # but then are fixed during parameter estimation. These are removed
+        # from the parameter vector to avoid warnings about unused parameters.
+        x_dct_free = {
+            par_id: val
+            for par_id, val in x_dct.items()
+            if par_id not in self._fixed_parameter_ids
+        }
         ret = self.calculator(
-            x_dct=x_dct,
+            x_dct=x_dct_free,
             sensi_orders=sensi_orders,
             mode=mode,
             amici_model=self.amici_model,
@@ -654,3 +671,51 @@ def check_gradients_match_finite_differences(
         return super().check_gradients_match_finite_differences(
             *args, x=x, x_free=x_free, **kwargs
         )
+
+    def update_from_problem(
+        self,
+        dim_full: int,
+        x_free_indices: Sequence[int],
+        x_fixed_indices: Sequence[int],
+        x_fixed_vals: Sequence[float],
+    ):
+        """Handle fixed parameters."""
+        super().update_from_problem(
+            dim_full=dim_full,
+            x_free_indices=x_free_indices,
+            x_fixed_indices=x_fixed_indices,
+            x_fixed_vals=x_fixed_vals,
+        )
+
+        # To make amici aware of fixed parameters, and thus, avoid computing
+        #  unnecessary sensitivities, we need to update the parameter mapping
+        #  and replace the IDs of all fixed parameters by their respective
+        #  values.
+        self.parameter_mapping = copy.deepcopy(self._parameter_mapping_full)
+        self._fixed_parameter_ids = [self.x_ids[i] for i in x_fixed_indices]
+        if not len(x_fixed_indices):
+            return
+
+        id_to_val = {
+            self.x_ids[x_idx]: x_val
+            for x_idx, x_val in zip(x_fixed_indices, x_fixed_vals)
+        }
+        for condition_mapping in self.parameter_mapping:
+            for (
+                model_par,
+                mapped_to_par,
+            ) in condition_mapping.map_sim_var.items():
+                if (val := id_to_val.get(mapped_to_par)) is not None:
+                    condition_mapping.map_sim_var[model_par] = val
+            for (
+                model_par,
+                mapped_to_par,
+            ) in condition_mapping.map_sim_fix.items():
+                if (val := id_to_val.get(mapped_to_par)) is not None:
+                    condition_mapping.map_sim_fix[model_par] = val
+            for (
+                model_par,
+                mapped_to_par,
+            ) in condition_mapping.map_preeq_fix.items():
+                if (val := id_to_val.get(mapped_to_par)) is not None:
+                    condition_mapping.map_preeq_fix[model_par] = val
diff --git a/pypesto/objective/priors.py b/pypesto/objective/priors.py
index 4ffcdaf6a..9164c70b2 100644
--- a/pypesto/objective/priors.py
+++ b/pypesto/objective/priors.py
@@ -244,11 +244,13 @@ def get_parameter_prior_dict(
         "parameterScaleUniform", "parameterScaleNormal",
         "parameterScaleLaplace"}
     prior_parameters:
-        Parameters of the priors. Parameters are defined in linear scale.
+        Parameters of the priors. Parameters are defined in the parameter
+        scale if the `prior_type` starts with ``parameterScale``, otherwise in
+        the linear scale.
     parameter_scale:
         scale in which the parameter is defined (since a parameter can be
         log-transformed, while the prior is always defined in the linear
-        space, unless it starts with "parameterScale")
+        space, unless it starts with ``parameterScale``).
     """
     log_f, d_log_f_dx, dd_log_f_ddx, res, d_res_dx = _prior_densities(
         prior_type, prior_parameters
diff --git a/pypesto/optimize/ess/__init__.py b/pypesto/optimize/ess/__init__.py
index c5f2d0df4..d4c53756a 100644
--- a/pypesto/optimize/ess/__init__.py
+++ b/pypesto/optimize/ess/__init__.py
@@ -1,6 +1,6 @@
 """Enhanced Scatter Search."""
 
-from .ess import ESSOptimizer
+from .ess import ESSExitFlag, ESSOptimizer
 from .function_evaluator import (
     FunctionEvaluator,
     FunctionEvaluatorMP,
diff --git a/pypesto/optimize/ess/ess.py b/pypesto/optimize/ess/ess.py
index ca8ffa2f6..2d6f0775e 100644
--- a/pypesto/optimize/ess/ess.py
+++ b/pypesto/optimize/ess/ess.py
@@ -3,11 +3,12 @@
 See papers on ESS :footcite:p:`EgeaBal2009,EgeaMar2010`,
 CESS :footcite:p:`VillaverdeEge2012`, and saCeSS :footcite:p:`PenasGon2017`.
 """
+from __future__ import annotations
 
 import enum
 import logging
 import time
-from typing import Callable, Optional, Union
+from typing import Protocol
 from warnings import warn
 
 import numpy as np
@@ -26,27 +27,98 @@
 
 
 class ESSExitFlag(int, enum.Enum):
-    """Exit flags used by :class:`ESSOptimizer`."""
+    """Scatter search exit flags.
+
+    Exit flags used by :class:`pypesto.ess.ESSOptimizer` and
+    :class:`pypesto.ess.SacessOptimizer`.
+    """
 
     # ESS did not run/finish yet
     DID_NOT_RUN = 0
-    # Exited after reaching maximum number of iterations
+    # Exited after reaching the maximum number of iterations
     MAX_ITER = -1
     # Exited after exhausting function evaluation budget
     MAX_EVAL = -2
     # Exited after exhausting wall-time budget
     MAX_TIME = -3
+    # Termination because of other reasons than exit criteria
+    ERROR = -99
+
+
+class OptimizerFactory(Protocol):
+    def __call__(
+        self, max_eval: float, max_walltime_s: float
+    ) -> pypesto.optimize.Optimizer:
+        """Create a new optimizer instance.
+
+        Parameters
+        ----------
+        max_eval:
+            Maximum number of objective functions allowed.
+        max_walltime_s:
+            Maximum walltime in seconds.
+        """
+        ...
 
 
 class ESSOptimizer:
     """Enhanced Scatter Search (ESS) global optimization.
 
-    See papers on ESS :footcite:p:`EgeaBal2009,EgeaMar2010`,
-    CESS :footcite:p:`VillaverdeEge2012`, and saCeSS :footcite:p:`PenasGon2017`.
+    Scatter search is a meta-heuristic for global optimization. A set of points
+    (the reference set, RefSet) is iteratively adapted to explore the parameter
+    space and to follow promising directions.
 
-    .. footbibliography::
+    This implementation is based on :footcite:p:`EgeaBal2009,EgeaMar2010`,
+    but does not implement any constraint handling beyond box constraints.
+
+    The basic steps of ESS are:
+
+    * Initialization: Generate a diverse set of points (RefSet) in the
+      parameter space.
+    * Recombination: Generate new points by recombining the RefSet points.
+    * Improvement: Improve the RefSet by replacing points with better ones.
+
+    The steps are repeated until a stopping criterion is met.
 
-    .. note: Does not implement any constraint handling beyond box constraints
+    ESS is gradient-free, unless a gradient-based local optimizer is used
+    (``local_optimizer``).
+
+    Hyperparameters
+    ---------------
+
+    Various hyperparameters control the behavior of ESS.
+    Initialization is controlled by ``dim_refset`` and ``n_diverse``.
+    Local optimizations are controlled by ``local_optimizer``, ``local_n1``,
+    ``local_n2``, and ``balance``.
+
+    Exit criteria
+    -------------
+
+    The optimization stops if any of the following criteria are met:
+
+    * The maximum number of iterations is reached (``max_iter``).
+    * The maximum number of objective function evaluations is reached
+      (``max_eval``).
+    * The maximum wall-time is reached (``max_walltime_s``).
+
+    One of these criteria needs to be provided.
+    Note that the wall-time and function evaluation criteria are not checked
+    after every single function evaluation, and thus, the actual number of
+    function evaluations may slightly exceed the given value.
+
+    Parallelization
+    ---------------
+
+    Objective function evaluations inside :class:`ESSOptimizer` can be
+    parallelized using multiprocessing or multithreading by passing a value
+    >1 for ``n_procs`` or ``n_threads``, respectively.
+
+
+    .. seealso::
+
+       :class:`pypesto.optimize.ess.sacess.SacessOptimizer`
+
+    .. footbibliography::
     """
 
     def __init__(
@@ -57,10 +129,9 @@ def __init__(
         local_n1: int = 1,
         local_n2: int = 10,
         balance: float = 0.5,
-        local_optimizer: Union[
-            "pypesto.optimize.Optimizer",
-            Callable[..., "pypesto.optimize.Optimizer"],
-        ] = None,
+        local_optimizer: pypesto.optimize.Optimizer
+        | OptimizerFactory
+        | None = None,
         max_eval=None,
         n_diverse: int = None,
         n_procs=None,
@@ -68,7 +139,7 @@ def __init__(
         max_walltime_s=None,
         result_includes_refset: bool = False,
     ):
-        """Construct new ESS instance.
+        r"""Construct new ESS instance.
 
         For plausible values of hyperparameters, see :footcite:t:`VillaverdeEge2012`.
 
@@ -81,10 +152,11 @@ def __init__(
             Maximum number of ESS iterations.
         local_n1:
             Minimum number of iterations before first local search.
+            Ignored if ``local_optimizer=None``.
         local_n2:
             Minimum number of iterations between consecutive local
             searches. Maximally one local search per performed in each
-            iteration.
+            iteration. Ignored if ``local_optimizer=None``.
         local_optimizer:
             Local optimizer for refinement, or a callable that creates an
             :class:`pypesto.optimize.Optimizer` or ``None`` to skip local searches.
@@ -104,8 +176,14 @@ def __init__(
             optimizations and other simulations, and thus, may be exceeded by
             the duration of a local search.
         balance:
-            Quality vs diversity balancing factor [0, 1];
-            0 = only quality; 1 = only diversity
+            Quality vs. diversity balancing factor with
+            :math:`0 \leq balance \leq 1`; ``0`` = only quality,
+            ``1`` = only diversity.
+            Affects the choice of starting points for local searches. I.e.,
+            whether local optimization should focus on improving the best
+            solutions found so far (quality), or on exploring new regions of
+            the parameter space (diversity).
+            Ignored if ``local_optimizer=None``.
         n_procs:
             Number of parallel processes to use for parallel function
             evaluation. Mutually exclusive with `n_threads`.
@@ -144,8 +222,8 @@ def __init__(
             raise ValueError(
                 "`n_procs` and `n_threads` are mutually exclusive."
             )
-        self.n_procs: Optional[int] = n_procs
-        self.n_threads: Optional[int] = n_threads
+        self.n_procs: int | None = n_procs
+        self.n_threads: int | None = n_threads
         self.balance: float = balance
         # After how many iterations a stagnated solution is to be replaced by
         #  a random one. Default value taken from [EgeaMar2010]_
@@ -162,12 +240,14 @@ def __init__(
     def _initialize(self):
         """(Re-)Initialize."""
         # RefSet
-        self.refset: Optional[RefSet] = None
+        self.refset: RefSet | None = None
         # Overall best parameters found so far
-        self.x_best: Optional[np.array] = None
+        self.x_best: np.ndarray | None = None
         # Overall best function value found so far
         self.fx_best: float = np.inf
         # Results from local searches (only those with finite fval)
+        # (there is potential to save memory here by only keeping the
+        # parameters in memory and not the full result)
         self.local_solutions: list[OptimizerResult] = []
         # Index of current iteration
         self.n_iter: int = 0
@@ -177,15 +257,15 @@ def _initialize(self):
         # Whether self.x_best has changed in the current iteration
         self.x_best_has_changed: bool = False
         self.exit_flag: ESSExitFlag = ESSExitFlag.DID_NOT_RUN
-        self.evaluator: Optional[FunctionEvaluator] = None
-        self.starttime: Optional[float] = None
+        self.evaluator: FunctionEvaluator | None = None
+        self.starttime: float | None = None
         self.history: MemoryHistory = MemoryHistory()
 
     def _initialize_minimize(
         self,
         problem: Problem = None,
         startpoint_method: StartpointMethod = None,
-        refset: Optional[RefSet] = None,
+        refset: RefSet | None = None,
     ):
         """Initialize for optimizations.
 
@@ -242,7 +322,7 @@ def minimize(
         self,
         problem: Problem = None,
         startpoint_method: StartpointMethod = None,
-        refset: Optional[RefSet] = None,
+        refset: RefSet | None = None,
     ) -> pypesto.Result:
         """Minimize the given objective.
 
@@ -329,7 +409,6 @@ def _create_result(self) -> pypesto.Result:
         for i, optimizer_result in enumerate(self.local_solutions):
             i_result += 1
             optimizer_result.id = f"Local solution {i}"
-            optimizer_result.optimizer = str(self.local_optimizer)
             result.optimize_result.append(optimizer_result)
 
         if self._result_includes_refset:
@@ -384,7 +463,7 @@ def _get_remaining_eval(self):
             return np.inf
         return self.max_eval - self.evaluator.n_eval
 
-    def _combine_solutions(self) -> tuple[np.array, np.array]:
+    def _combine_solutions(self) -> tuple[np.ndarray, np.ndarray]:
         """Combine solutions and evaluate.
 
         Creates the next generation from the RefSet by pair-wise combination
@@ -418,7 +497,7 @@ def _combine_solutions(self) -> tuple[np.array, np.array]:
                 break
         return y, fy
 
-    def _combine(self, i, j) -> np.array:
+    def _combine(self, i, j) -> np.ndarray:
         """Combine RefSet members ``i`` and ``j``.
 
         Samples a new point from a biased hyper-rectangle derived from the
@@ -463,7 +542,7 @@ def _combine(self, i, j) -> np.array:
         )
 
     def _do_local_search(
-        self, x_best_children: np.array, fx_best_children: np.array
+        self, x_best_children: np.ndarray, fx_best_children: np.ndarray
     ) -> None:
         """
         Perform a local search to refine the next generation.
@@ -487,14 +566,25 @@ def _do_local_search(
             quality_order = np.argsort(fx_best_children)
             # compute minimal distance between the best children and all local
             #  optima found so far
-            min_distances = np.array(
-                np.min(
-                    np.linalg.norm(
-                        y_i - optimizer_result.x[optimizer_result.free_indices]
-                    )
-                    for optimizer_result in self.local_solutions
+            min_distances = (
+                np.fromiter(
+                    (
+                        min(
+                            np.linalg.norm(
+                                y_i
+                                - optimizer_result.x[
+                                    optimizer_result.free_indices
+                                ]
+                            )
+                            for optimizer_result in self.local_solutions
+                        )
+                        for y_i in x_best_children
+                    ),
+                    dtype=np.float64,
+                    count=len(x_best_children),
                 )
-                for y_i in x_best_children
+                if len(self.local_solutions)
+                else np.zeros(len(x_best_children))
             )
             # sort by furthest distance to existing local optima
             diversity_order = np.argsort(min_distances)[::-1]
diff --git a/pypesto/optimize/ess/refset.py b/pypesto/optimize/ess/refset.py
index 0e3cff403..dfd8a4d42 100644
--- a/pypesto/optimize/ess/refset.py
+++ b/pypesto/optimize/ess/refset.py
@@ -29,8 +29,8 @@ def __init__(
         self,
         dim: int,
         evaluator: FunctionEvaluator,
-        x: Optional[np.array] = None,
-        fx: Optional[np.array] = None,
+        x: Optional[np.ndarray] = None,
+        fx: Optional[np.ndarray] = None,
     ):
         """Construct.
 
@@ -65,7 +65,7 @@ def __init__(
             self.fx = fx
 
         self.n_stuck = np.zeros(shape=[dim])
-        self.attributes: dict[Any, np.array] = {}
+        self.attributes: dict[Any, np.ndarray] = {}
 
     def __repr__(self):
         fx = (
@@ -97,7 +97,9 @@ def initialize_random(
         x_diverse, fx_diverse = self.evaluator.multiple_random(n_diverse)
         self.initialize_from_array(x_diverse=x_diverse, fx_diverse=fx_diverse)
 
-    def initialize_from_array(self, x_diverse: np.array, fx_diverse: np.array):
+    def initialize_from_array(
+        self, x_diverse: np.ndarray, fx_diverse: np.ndarray
+    ):
         """Create an initial reference set using the provided points.
 
         Populate half of the RefSet using the best given solutions and fill the
@@ -168,7 +170,7 @@ def normalize(x):
                         x[j], self.fx[j] = self.evaluator.single_random()
                         self.sort()
 
-    def update(self, i: int, x: np.array, fx: float):
+    def update(self, i: int, x: np.ndarray, fx: float):
         """Update a RefSet entry."""
         self.x[i] = x
         self.fx[i] = fx
@@ -179,7 +181,7 @@ def replace_by_random(self, i: int):
         self.x[i], self.fx[i] = self.evaluator.single_random()
         self.n_stuck[i] = 0
 
-    def add_attribute(self, name: str, values: np.array):
+    def add_attribute(self, name: str, values: np.ndarray):
         """
         Add an attribute array to the refset members.
 
diff --git a/pypesto/optimize/ess/sacess.py b/pypesto/optimize/ess/sacess.py
index c54ef0ad6..04428d9e1 100644
--- a/pypesto/optimize/ess/sacess.py
+++ b/pypesto/optimize/ess/sacess.py
@@ -7,6 +7,7 @@
 import multiprocessing
 import os
 import time
+from contextlib import suppress
 from dataclasses import dataclass
 from math import ceil, sqrt
 from multiprocessing import get_context
@@ -20,9 +21,13 @@
 
 import pypesto
 
+from ... import MemoryHistory
 from ...startpoint import StartpointMethod
 from ...store.read_from_hdf5 import read_result
-from ...store.save_to_hdf5 import write_result
+from ...store.save_to_hdf5 import (
+    OptimizationResultHDF5Writer,
+    ProblemHDF5Writer,
+)
 from ..optimize import Problem
 from .ess import ESSExitFlag, ESSOptimizer
 from .function_evaluator import create_function_evaluator
@@ -41,8 +46,10 @@
 class SacessOptimizer:
     """SACESS optimizer.
 
-    A shared-memory-based implementation of the SaCeSS algorithm presented in
-    :footcite:t:`PenasGon2017`. Multiple processes (`workers`) run
+    A shared-memory-based implementation of the
+    `Self-Adaptive Cooperative Enhanced Scatter Search` (SaCeSS) algorithm
+    presented in :footcite:t:`PenasGon2017`. This is a meta-heuristic for
+    global optimization. Multiple processes (`workers`) run
     :class:`enhanced scatter searches (ESSs) <ESSOptimizer>` in parallel.
     After each ESS iteration, depending on the outcome, there is a chance
     of exchanging good parameters, and changing ESS hyperparameters to those of
@@ -51,6 +58,41 @@ class SacessOptimizer:
     :class:`SacessOptimizer` can be used with or without a local optimizer, but
     it is highly recommended to use one.
 
+    A basic example using :class:`SacessOptimizer` to minimize the Rosenbrock
+    function:
+
+    >>> from pypesto.optimize import SacessOptimizer
+    >>> from pypesto.problem import Problem
+    >>> from pypesto.objective import Objective
+    >>> import scipy as sp
+    >>> import numpy as np
+    >>> import logging
+    >>> # Define some test Problem
+    >>> objective = Objective(
+    ...     fun=sp.optimize.rosen,
+    ...     grad=sp.optimize.rosen_der,
+    ...     hess=sp.optimize.rosen_hess,
+    ... )
+    >>> dim = 6
+    >>> problem = Problem(
+    ...     objective=objective,
+    ...     lb=-5 * np.ones((dim, 1)),
+    ...     ub=5 * np.ones((dim, 1)),
+    ... )
+    >>> # Create and run the optimizer
+    >>> sacess = SacessOptimizer(
+    ...     num_workers=2,
+    ...     max_walltime_s=5,
+    ...     sacess_loglevel=logging.WARNING
+    ... )
+    >>> result = sacess.minimize(problem)
+
+    .. seealso::
+
+       :class:`pypesto.optimize.ess.ess.ESSOptimizer`
+
+    References
+    ----------
     .. footbibliography::
 
     Attributes
@@ -83,11 +125,25 @@ def __init__(
             process. I.e., the length of this list is the number of ESSs.
             Ideally, this list contains some more conservative and some more
             aggressive configurations.
-            Resource limits such as ``max_eval`` apply to a single CESS
+            Resource limits such as ``max_eval`` apply to a single ESS
             iteration, not to the full search.
             Mutually exclusive with ``num_workers``.
+
             Recommended default settings can be obtained from
-            :func:`get_default_ess_options`.
+            :func:`get_default_ess_options`. For example, to run
+            :class:`SacessOptimizer` without a local optimizer, use:
+
+            >>> from pypesto.optimize.ess import get_default_ess_options
+            >>> ess_init_args = get_default_ess_options(
+            ...     num_workers=12,
+            ...     dim=10, # usually problem.dim
+            ...     local_optimizer=False,
+            ... )
+            >>> ess_init_args  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+            [{'dim_refset': 5, 'balance': 0.0, 'local_n1': 1, 'local_n2': 1},
+            ...
+             {'dim_refset': 7, 'balance': 1.0, 'local_n1': 4, 'local_n2': 4}]
+
         num_workers:
             Number of workers to be used. If this argument is given,
             (different) default ESS settings will be used for each worker.
@@ -109,12 +165,13 @@ def __init__(
             Directory for temporary files. This defaults to a directory in the
             current working directory named ``SacessOptimizerTemp-{random suffix}``.
             When setting this option, make sure any optimizers running in
-            parallel have a unique `tmpdir`.
+            parallel have a unique `tmpdir`. Expected to be empty.
         mp_start_method:
             The start method for the multiprocessing context.
-            See :mod:`multiprocessing` for details.
+            See :mod:`multiprocessing` for details. Running `SacessOptimizer`
+            under Jupyter may require ``mp_start_method="fork"``.
         options:
-            Further optimizer hyperparameters.
+            Further optimizer hyperparameters, see :class:`SacessOptions`.
         """
         if (num_workers is None and ess_init_args is None) or (
             num_workers is not None and ess_init_args is not None
@@ -178,11 +235,12 @@ def minimize(
 
         Returns
         -------
-        Result object with optimized parameters in
-        :attr:`pypesto.Result.optimize_result`.
-        Results are sorted by objective. At least the best parameters are
-        included. Additional results may be included - this is subject to
-        change.
+        _:
+            Result object with optimized parameters in
+            :attr:`pypesto.Result.optimize_result`.
+            Results are sorted by objective. At least the best parameters are
+            included. Additional results may be included - this is subject to
+            change.
         """
         if startpoint_method is not None:
             warn(
@@ -271,19 +329,27 @@ def minimize(
         self.histories = [
             worker_result.history for worker_result in self.worker_results
         ]
-
+        self.exit_flag = min(
+            worker_result.exit_flag for worker_result in self.worker_results
+        )
         result = self._create_result(problem)
 
         walltime = time.time() - start_time
         n_eval_total = sum(
             worker_result.n_eval for worker_result in self.worker_results
         )
-        logger.info(
-            f"{self.__class__.__name__} stopped after {walltime:3g}s "
-            f"and {n_eval_total} objective evaluations "
-            f"with global best {result.optimize_result[0].fval}."
-        )
-
+        if len(result.optimize_result):
+            logger.info(
+                f"{self.__class__.__name__} stopped after {walltime:3g}s "
+                f"and {n_eval_total} objective evaluations "
+                f"with global best {result.optimize_result[0].fval}."
+            )
+        else:
+            logger.error(
+                f"{self.__class__.__name__} stopped after {walltime:3g}s "
+                f"and {n_eval_total} objective evaluations without producing "
+                "a result."
+            )
         return result
 
     def _create_result(self, problem: Problem) -> pypesto.Result:
@@ -292,25 +358,40 @@ def _create_result(self, problem: Problem) -> pypesto.Result:
         Creates an overall Result object from the results saved by the workers.
         """
         # gather results from workers and delete temporary result files
-        result = None
+        result = pypesto.Result()
+        retry_after_sleep = True
         for worker_idx in range(self.num_workers):
             tmp_result_filename = SacessWorker.get_temp_result_filename(
                 worker_idx, self._tmpdir
             )
+            tmp_result = None
             try:
                 tmp_result = read_result(
                     tmp_result_filename, problem=False, optimize=True
                 )
             except FileNotFoundError:
                 # wait and retry, maybe the file wasn't found due to some filesystem latency issues
-                time.sleep(5)
-                tmp_result = read_result(
-                    tmp_result_filename, problem=False, optimize=True
-                )
+                if retry_after_sleep:
+                    time.sleep(5)
+                    # waiting once is enough - don't wait for every worker
+                    retry_after_sleep = False
+
+                    try:
+                        tmp_result = read_result(
+                            tmp_result_filename, problem=False, optimize=True
+                        )
+                    except FileNotFoundError:
+                        logger.error(
+                            f"Worker {worker_idx} did not produce a result."
+                        )
+                        continue
+                else:
+                    logger.error(
+                        f"Worker {worker_idx} did not produce a result."
+                    )
+                    continue
 
-            if result is None:
-                result = tmp_result
-            else:
+            if tmp_result:
                 result.optimize_result.append(
                     tmp_result.optimize_result,
                     sort=False,
@@ -322,7 +403,8 @@ def _create_result(self, problem: Problem) -> pypesto.Result:
             filename = SacessWorker.get_temp_result_filename(
                 worker_idx, self._tmpdir
             )
-            os.remove(filename)
+            with suppress(FileNotFoundError):
+                os.remove(filename)
         # delete tmpdir if empty
         try:
             self._tmpdir.rmdir()
@@ -344,6 +426,7 @@ class SacessManager:
 
     Attributes
     ----------
+    _dim: Dimension of the optimization problem
     _num_workers: Number of workers
     _ess_options: ESS options for each worker
     _best_known_fx: Best objective value encountered so far
@@ -357,6 +440,7 @@ class SacessManager:
     _rejection_threshold: Threshold for relative objective improvements that
         incoming solutions have to pass to be accepted
     _lock: Lock for accessing shared state.
+    _terminate: Flag to signal termination of the SACESS run to workers
     _logger: A logger instance
     _options: Further optimizer hyperparameters.
     """
@@ -368,6 +452,7 @@ def __init__(
         dim: int,
         options: SacessOptions = None,
     ):
+        self._dim = dim
         self._options = options or SacessOptions()
         self._num_workers = len(ess_options)
         self._ess_options = [shmem_manager.dict(o) for o in ess_options]
@@ -387,12 +472,13 @@ def __init__(
         self._worker_scores = shmem_manager.Array(
             "d", range(self._num_workers)
         )
+        self._terminate = shmem_manager.Value("b", False)
         self._worker_comms = shmem_manager.Array("i", [0] * self._num_workers)
         self._lock = shmem_manager.RLock()
         self._logger = logging.getLogger()
         self._result_queue = shmem_manager.Queue()
 
-    def get_best_solution(self) -> tuple[np.array, float]:
+    def get_best_solution(self) -> tuple[np.ndarray, float]:
         """Get the best objective value and corresponding parameters."""
         with self._lock:
             return np.array(self._best_known_x), self._best_known_fx.value
@@ -416,7 +502,7 @@ def reconfigure_worker(self, worker_idx: int) -> dict:
 
     def submit_solution(
         self,
-        x: np.array,
+        x: np.ndarray,
         fx: float,
         sender_idx: int,
         elapsed_time_s: float,
@@ -497,6 +583,16 @@ def submit_solution(
                     )
                     self._rejections.value = 0
 
+    def abort(self):
+        """Abort the SACESS run."""
+        with self._lock:
+            self._terminate.value = True
+
+    def aborted(self) -> bool:
+        """Whether this run was aborted."""
+        with self._lock:
+            return self._terminate.value
+
 
 class SacessWorker:
     """A SACESS worker.
@@ -558,6 +654,10 @@ def run(
     ):
         self._start_time = time.time()
 
+        # index of the local solution in ESSOptimizer.local_solutions
+        #  that was most recently saved by _autosave
+        last_saved_local_solution = -1
+
         self._logger.setLevel(self._loglevel)
         # Set the manager logger to one created within the current process
         self._manager._logger = self._logger
@@ -588,24 +688,18 @@ def run(
         ess = self._setup_ess(startpoint_method)
 
         # run ESS until exit criteria are met, but start at least one iteration
-        while self._keep_going() or ess.n_iter == 0:
+        while self._keep_going(ess) or ess.n_iter == 0:
             # perform one ESS iteration
             ess._do_iteration()
 
-            if self._tmp_result_file:
-                # TODO maybe not in every iteration?
-                ess_results = ess._create_result()
-                write_result(
-                    ess_results,
-                    self._tmp_result_file,
-                    overwrite=True,
-                    optimize=True,
-                )
             # check if the best solution of the last local ESS is sufficiently
             # better than the sacess-wide best solution
             self.maybe_update_best(ess.x_best, ess.fx_best)
             self._best_known_fx = min(ess.fx_best, self._best_known_fx)
 
+            self._autosave(ess, last_saved_local_solution)
+            last_saved_local_solution = len(ess.local_solutions) - 1
+
             self._cooperate()
             self._maybe_adapt(problem)
 
@@ -614,19 +708,42 @@ def run(
                 f"(best: {self._best_known_fx}, "
                 f"n_eval: {ess.evaluator.n_eval})."
             )
-
-        ess.history.finalize(exitflag=ess.exit_flag.name)
-        worker_result = SacessWorkerResult(
-            x=ess.x_best,
-            fx=ess.fx_best,
-            history=ess.history,
-            n_eval=ess.evaluator.n_eval,
-            n_iter=ess.n_iter,
-            exit_flag=ess.exit_flag,
-        )
+        self._finalize(ess)
+
+    def _finalize(self, ess: ESSOptimizer = None):
+        """Finalize the worker."""
+        # Whatever happens here, we need to put something to the queue before
+        # returning to avoid deadlocks.
+        worker_result = None
+        if ess is not None:
+            try:
+                ess.history.finalize(exitflag=ess.exit_flag.name)
+                ess._report_final()
+                worker_result = SacessWorkerResult(
+                    x=ess.x_best,
+                    fx=ess.fx_best,
+                    history=ess.history,
+                    n_eval=ess.evaluator.n_eval,
+                    n_iter=ess.n_iter,
+                    exit_flag=ess.exit_flag,
+                )
+            except Exception as e:
+                self._logger.exception(
+                    f"Worker {self._worker_idx} failed to finalize: {e}"
+                )
+        if worker_result is None:
+            # Create some dummy result
+            worker_result = SacessWorkerResult(
+                x=np.full(self._manager._dim, np.nan),
+                fx=np.nan,
+                history=MemoryHistory(),
+                n_eval=0,
+                n_iter=0,
+                exit_flag=ESSExitFlag.ERROR,
+            )
         self._manager._result_queue.put(worker_result)
+
         self._logger.debug(f"Final configuration: {self._ess_kwargs}")
-        ess._report_final()
 
     def _setup_ess(self, startpoint_method: StartpointMethod) -> ESSOptimizer:
         """Run ESS."""
@@ -703,7 +820,7 @@ def _maybe_adapt(self, problem: Problem):
                 f"neval: {self._neval} <= {problem.dim * self._options.adaptation_min_evals}."
             )
 
-    def maybe_update_best(self, x: np.array, fx: float):
+    def maybe_update_best(self, x: np.ndarray, fx: float):
         """Maybe update the best known solution and send it to the manager."""
         rel_change = (
             abs((fx - self._best_known_fx) / fx) if fx != 0 else np.nan
@@ -743,7 +860,7 @@ def maybe_update_best(self, x: np.array, fx: float):
             )
 
     @staticmethod
-    def replace_solution(refset: RefSet, x: np.array, fx: float):
+    def replace_solution(refset: RefSet, x: np.ndarray, fx: float):
         """Replace the global refset member by the given solution."""
         # [PenasGon2017]_ page 8, top
         if "cooperative_solution" not in refset.attributes:
@@ -768,7 +885,7 @@ def replace_solution(refset: RefSet, x: np.array, fx: float):
             fx=fx,
         )
 
-    def _keep_going(self):
+    def _keep_going(self, ess: ESSOptimizer) -> bool:
         """Check exit criteria.
 
         Returns
@@ -777,14 +894,69 @@ def _keep_going(self):
         """
         # elapsed time
         if time.time() - self._start_time >= self._max_walltime_s:
-            self.exit_flag = ESSExitFlag.MAX_TIME
+            ess.exit_flag = ESSExitFlag.MAX_TIME
             self._logger.debug(
                 f"Max walltime ({self._max_walltime_s}s) exceeded."
             )
             return False
-
+        # other reasons for termination (some worker failed, ...)
+        if self._manager.aborted():
+            ess.exit_flag = ESSExitFlag.ERROR
+            self._logger.debug("Manager requested termination.")
+            return False
         return True
 
+    def abort(self):
+        """Send signal to abort."""
+        self._logger.error(f"Worker {self._worker_idx} aborting.")
+        # signal to manager
+        self._manager.abort()
+
+        self._finalize(None)
+
+    def _autosave(self, ess: ESSOptimizer, last_saved_local_solution: int):
+        """Save intermediate results.
+
+        If a temporary result file is set, save the (part of) the current state
+        of the ESS to that file.
+
+        We save the current best solution and the local optimizer results.
+        """
+        if not self._tmp_result_file:
+            return
+
+        # save problem in first iteration
+        if ess.n_iter == 1:
+            pypesto_problem_writer = ProblemHDF5Writer(self._tmp_result_file)
+            pypesto_problem_writer.write(
+                ess.evaluator.problem, overwrite=False
+            )
+
+        opt_res_writer = OptimizationResultHDF5Writer(self._tmp_result_file)
+        for i in range(
+            last_saved_local_solution + 1, len(ess.local_solutions)
+        ):
+            optimizer_result = ess.local_solutions[i]
+            optimizer_result.id = str(i + ess.n_iter)
+            opt_res_writer.write_optimizer_result(
+                optimizer_result, overwrite=False
+            )
+
+        # save the current best solution
+        optimizer_result = pypesto.OptimizerResult(
+            id=str(len(ess.local_solutions) + ess.n_iter),
+            x=ess.x_best,
+            fval=ess.fx_best,
+            message=f"Global best (iteration {ess.n_iter})",
+            time=time.time() - ess.starttime,
+            n_fval=ess.evaluator.n_eval,
+            optimizer=str(ess),
+        )
+        optimizer_result.update_to_full(ess.evaluator.problem)
+        opt_res_writer.write_optimizer_result(
+            optimizer_result, overwrite=False
+        )
+
     @staticmethod
     def get_temp_result_filename(worker_idx: int, tmpdir: str | Path) -> str:
         return str(Path(tmpdir, f"sacess-{worker_idx:02d}_tmp.h5").absolute())
@@ -800,15 +972,24 @@ def _run_worker(
 
     Helper function as entrypoint for sacess worker processes.
     """
-    # different random seeds per process
-    np.random.seed((os.getpid() * int(time.time() * 1000)) % 2**32)
-
-    # Forward log messages to the logging process
-    h = logging.handlers.QueueHandler(log_process_queue)
-    worker._logger = logging.getLogger(multiprocessing.current_process().name)
-    worker._logger.addHandler(h)
+    try:
+        # different random seeds per process
+        np.random.seed((os.getpid() * int(time.time() * 1000)) % 2**32)
+
+        # Forward log messages to the logging process
+        h = logging.handlers.QueueHandler(log_process_queue)
+        worker._logger = logging.getLogger(
+            multiprocessing.current_process().name
+        )
+        worker._logger.addHandler(h)
 
-    return worker.run(problem=problem, startpoint_method=startpoint_method)
+        return worker.run(problem=problem, startpoint_method=startpoint_method)
+    except Exception as e:
+        with suppress(Exception):
+            worker._logger.exception(
+                f"Worker {worker._worker_idx} failed: {e}"
+            )
+        worker.abort()
 
 
 def get_default_ess_options(
@@ -841,6 +1022,11 @@ def get_default_ess_options(
         or a :obj:`Callable` returning an optimizer instance.
         The latter can be used to propagate walltime limits to the local
         optimizers. See :meth:`SacessFidesFactory.__call__` for an example.
+        The current default optimizer assumes that the optimized objective
+        function can provide its gradient. If this is not the case, the user
+        should provide a different local optimizer or consider using
+        :class:`pypesto.objective.finite_difference.FD` to approximate the
+        gradient using finite differences.
     """
     min_dimrefset = 5
 
@@ -1086,7 +1272,7 @@ class SacessWorkerResult:
         Exit flag of the optimization process.
     """
 
-    x: np.array
+    x: np.ndarray
     fx: float
     n_eval: int
     n_iter: int
diff --git a/pypesto/optimize/optimizer.py b/pypesto/optimize/optimizer.py
index b05570e73..7a6d83488 100644
--- a/pypesto/optimize/optimizer.py
+++ b/pypesto/optimize/optimizer.py
@@ -39,7 +39,7 @@ def __init__(self, optimizer: str):
 def hierarchical_decorator(minimize):
     """Add inner parameters to the optimizer result.
 
-    Default decorator for the minimize() method.
+    Default decorator for the :meth:`Optimizer.minimize` method.
     """
 
     @wraps(minimize)
@@ -81,7 +81,7 @@ def wrapped_minimize(
 def history_decorator(minimize):
     """Initialize and extract information stored in the history.
 
-    Default decorator for the minimize() method.
+    Default decorator for the :meth:`Optimizer.minimize` method.
     """
 
     @wraps(minimize)
@@ -140,7 +140,11 @@ def wrapped_minimize(
 
                 logger.error(f"start {id} failed:\n{trace}")
                 result = OptimizerResult(
-                    x0=x0, exitflag=-1, message=str(err), id=id
+                    x0=x0,
+                    exitflag=-1,
+                    message=str(err),
+                    id=id,
+                    optimizer=str(self),
                 )
             else:
                 raise
@@ -163,7 +167,7 @@ def wrapped_minimize(
 def time_decorator(minimize):
     """Measure time of optimization.
 
-    Default decorator for the minimize() method to take time.
+    Default decorator for the :meth:`Optimizer.minimize` method to take time.
     Currently, the method time.time() is used, which measures
     the wall-clock time.
     """
@@ -196,8 +200,8 @@ def wrapped_minimize(
 def fix_decorator(minimize):
     """Include also fixed parameters in the result arrays of minimize().
 
-    Default decorator for the minimize() method (nans will be inserted in the
-    derivatives).
+    Default decorator for the :meth:`Optimizer.minimize` method (nans will be
+    inserted in the derivatives).
     """
 
     @wraps(minimize)
@@ -523,6 +527,7 @@ def fun(x):
             hess=getattr(res, "hess", None),
             exitflag=res.status,
             message=res.message,
+            optimizer=str(self),
         )
 
         return optimizer_result
@@ -544,7 +549,7 @@ def get_default_options(self):
 
 
 class IpoptOptimizer(Optimizer):
-    """Use IpOpt (https://pypi.org/project/ipopt/) for optimization."""
+    """Use Ipopt (https://pypi.org/project/cyipopt/) for optimization."""
 
     def __init__(self, options: dict = None):
         """
@@ -555,6 +560,9 @@ def __init__(self, options: dict = None):
         options:
             Options are directly passed on to `cyipopt.minimize_ipopt`, except
             for the `approx_grad` option, which is handled separately.
+
+            For a list of available options, see the Ipopt documentation
+            (https://coin-or.github.io/Ipopt/OPTIONS.html).
         """
         super().__init__()
         self.approx_grad = False
@@ -594,7 +602,7 @@ def minimize(
             jac = objective.get_grad
         else:
             raise ValueError(
-                "For IPOPT, the objective must either be able to return "
+                "For Ipopt, the objective must either be able to return "
                 "gradients or the `approx_grad` must be set to True."
             )
 
@@ -612,7 +620,10 @@ def minimize(
 
         # the ipopt return object is a scipy.optimize.OptimizeResult
         return OptimizerResult(
-            x=ret.x, exitflag=ret.status, message=ret.message
+            x=ret.x,
+            exitflag=ret.status,
+            message=ret.message,
+            optimizer=str(self),
         )
 
     def is_least_squares(self):
@@ -630,7 +641,7 @@ def __init__(self, options: dict = None):
         if self.options is None:
             self.options = DlibOptimizer.get_default_options(self)
         elif "maxiter" not in self.options:
-            raise KeyError("Dlib options are missing the key word " "maxiter.")
+            raise KeyError("Dlib options are missing the keyword maxiter.")
 
     def __repr__(self) -> str:
         rep = f"<{self.__class__.__name__}"
@@ -677,7 +688,7 @@ def get_fval_vararg(*x):
             0.002,
         )
 
-        optimizer_result = OptimizerResult()
+        optimizer_result = OptimizerResult(optimizer=str(self))
 
         return optimizer_result
 
@@ -737,7 +748,9 @@ def minimize(
             problem.objective.get_fval, lb, ub, **self.options
         )
 
-        optimizer_result = OptimizerResult(x=np.array(xopt), fval=fopt)
+        optimizer_result = OptimizerResult(
+            x=np.array(xopt), fval=fopt, optimizer=str(self)
+        )
 
         return optimizer_result
 
@@ -821,7 +834,7 @@ def minimize(
         )
 
         optimizer_result = OptimizerResult(
-            x=np.array(result[0]), fval=result[1]
+            x=np.array(result[0]), fval=result[1], optimizer=str(self)
         )
 
         return optimizer_result
@@ -901,7 +914,7 @@ def minimize(
         )
 
         optimizer_result = OptimizerResult(
-            x=np.array(result.x), fval=result.fun
+            x=np.array(result.x), fval=result.fun, optimizer=str(self)
         )
 
         return optimizer_result
@@ -1019,6 +1032,7 @@ def successively_working_fval(swarm: np.ndarray) -> np.ndarray:
         optimizer_result = OptimizerResult(
             x=pos,
             fval=float(cost),
+            optimizer=str(self),
         )
 
         return optimizer_result
@@ -1169,7 +1183,7 @@ def __repr__(self) -> str:
         if self.options is not None:
             rep += f" options={self.options}"
         if self.local_options is not None:
-            rep += f" local_options={self.local_methods}"
+            rep += f" local_options={self.local_options}"
         return rep + ">"
 
     @minimize_decorator_collection
@@ -1249,6 +1263,7 @@ def nlopt_objective(x, grad):
             fval=opt.last_optimum_value(),
             message=msg,
             exitflag=opt.last_optimize_result(),
+            optimizer=str(self),
         )
 
         return optimizer_result
@@ -1433,6 +1448,7 @@ def minimize(
             hess=opt.hess,
             message=msg,
             exitflag=opt.exitflag,
+            optimizer=str(self),
         )
 
         return optimizer_result
diff --git a/pypesto/optimize/task.py b/pypesto/optimize/task.py
index 10ae83dd8..7482097e1 100644
--- a/pypesto/optimize/task.py
+++ b/pypesto/optimize/task.py
@@ -63,7 +63,6 @@ def execute(self) -> OptimizerResult:
             history_options=self.history_options,
             optimize_options=self.optimize_options,
         )
-        optimizer_result.optimizer = str(self.optimizer)
 
         if not self.optimize_options.report_hess:
             optimizer_result.hess = None
diff --git a/pypesto/petab/objective_creator.py b/pypesto/petab/objective_creator.py
index 72f98cf03..697c0f9a3 100644
--- a/pypesto/petab/objective_creator.py
+++ b/pypesto/petab/objective_creator.py
@@ -7,7 +7,6 @@
 import os
 import re
 import shutil
-import sys
 import warnings
 from abc import ABC, abstractmethod
 from collections.abc import Iterable, Sequence
@@ -145,10 +144,6 @@ def create_model(
                 f"compilation: Not a folder."
             )
 
-        # add module to path
-        if self.output_folder not in sys.path:
-            sys.path.insert(0, self.output_folder)
-
         # compile
         if self._must_compile(force_compile):
             logger.info(
diff --git a/pypesto/select/__init__.py b/pypesto/select/__init__.py
index eaff283d7..aa927bc39 100644
--- a/pypesto/select/__init__.py
+++ b/pypesto/select/__init__.py
@@ -7,7 +7,8 @@
 """
 
 from . import postprocessors
-from .misc import model_to_pypesto_problem
+from .misc import SacessMinimizeMethod, model_to_pypesto_problem
+from .model_problem import ModelProblem
 from .problem import Problem
 
 try:
diff --git a/pypesto/select/method.py b/pypesto/select/method.py
index 36a726be6..ec30eea97 100644
--- a/pypesto/select/method.py
+++ b/pypesto/select/method.py
@@ -3,16 +3,21 @@
 import logging
 from dataclasses import dataclass
 from enum import Enum
-from typing import Any, Callable, Optional
+from typing import Any, Callable
 
 import numpy as np
 import petab_select
 from petab_select import (
+    CANDIDATE_SPACE,
+    MODELS,
+    PREDECESSOR_MODEL,
+    UNCALIBRATED_MODELS,
     VIRTUAL_INITIAL_MODEL,
     CandidateSpace,
     Criterion,
     Method,
     Model,
+    Models,
 )
 
 from ..problem import Problem
@@ -202,8 +207,7 @@ class MethodCaller:
         example, in `ForwardSelector`, test models are compared to the
         previously selected model.
     calibrated_models:
-        The calibrated models of the model selection, as a `dict` where keys
-        are model hashes and values are models.
+        All calibrated models of the model selection.
     limit:
         Limit the number of calibrated models. NB: the number of accepted
         models may (likely) be fewer.
@@ -213,6 +217,11 @@ class MethodCaller:
         Specify the predecessor (initial) model for the model selection
         algorithm. If ``None``, then the algorithm will generate an initial
         predecessor model if required.
+    user_calibrated_models:
+        Supply calibration results for models yourself, as a list of models.
+        If a model with the same hash is encountered in the current model
+        selection run, and the user-supplied calibrated model has the
+        `criterion` value set, the model will not be calibrated again.
     select_first_improvement:
         If ``True``, model selection will terminate as soon as a better model
         is found. If `False`, all candidate models will be tested.
@@ -224,7 +233,7 @@ class MethodCaller:
     def __init__(
         self,
         petab_select_problem: petab_select.Problem,
-        calibrated_models: dict[str, Model],
+        calibrated_models: Models,
         # Arguments/attributes that can simply take the default value here.
         criterion_threshold: float = 0.0,
         limit: int = np.inf,
@@ -245,6 +254,7 @@ def __init__(
         # TODO deprecated
         model_to_pypesto_problem_method: Callable[[Any], Problem] = None,
         model_problem_options: dict = None,
+        user_calibrated_models: list[Model] = None,
     ):
         """Arguments are used in every `__call__`, unless overridden."""
         self.petab_select_problem = petab_select_problem
@@ -256,6 +266,10 @@ def __init__(
         self.select_first_improvement = select_first_improvement
         self.startpoint_latest_mle = startpoint_latest_mle
 
+        self.user_calibrated_models = Models()
+        if user_calibrated_models is not None:
+            self.user_calibrated_models = user_calibrated_models
+
         self.logger = MethodLogger()
 
         # TODO deprecated
@@ -335,10 +349,7 @@ def __init__(
         # May have changed from `None` to `petab_select.VIRTUAL_INITIAL_MODEL`
         self.predecessor_model = self.candidate_space.get_predecessor_model()
 
-    def __call__(
-        self,
-        newly_calibrated_models: Optional[dict[str, Model]] = None,
-    ) -> tuple[list[Model], dict[str, Model]]:
+    def __call__(self) -> tuple[Model, Models]:
         """Run a single iteration of the model selection method.
 
         A single iteration here refers to calibration of all candidate models.
@@ -347,63 +358,72 @@ def __call__(
         of all models that have both: the same 3 estimated parameters; and 1
         additional estimated parameter.
 
-        The input `newly_calibrated_models` is from the previous iteration. The
-        output `newly_calibrated_models` is from the current iteration.
-
-        Parameters
-        ----------
-        newly_calibrated_models:
-            The newly calibrated models from the previous iteration.
-
         Returns
         -------
         A 2-tuple, with the following values:
 
            1. the predecessor model for the newly calibrated models; and
-           2. the newly calibrated models, as a `dict` where keys are model
-              hashes and values are models.
+           2. the newly calibrated models.
         """
         # All calibrated models in this iteration (see second return value).
         self.logger.new_selection()
 
-        candidate_space = petab_select.ui.candidates(
+        iteration = petab_select.ui.start_iteration(
             problem=self.petab_select_problem,
             candidate_space=self.candidate_space,
             limit=self.limit,
-            calibrated_models=self.calibrated_models,
-            newly_calibrated_models=newly_calibrated_models,
-            excluded_model_hashes=self.calibrated_models.keys(),
             criterion=self.criterion,
+            user_calibrated_models=self.user_calibrated_models,
         )
-        predecessor_model = self.candidate_space.predecessor_model
 
-        if not candidate_space.models:
+        if not iteration[UNCALIBRATED_MODELS]:
             raise StopIteration("No valid models found.")
 
         # TODO parallelize calibration (maybe not sensible if
         #      `self.select_first_improvement`)
-        newly_calibrated_models = {}
-        for candidate_model in candidate_space.models:
-            # autoruns calibration
-            self.new_model_problem(model=candidate_model)
-            newly_calibrated_models[
-                candidate_model.get_hash()
-            ] = candidate_model
+        calibrated_models = Models()
+        for model in iteration[UNCALIBRATED_MODELS]:
+            if (
+                model.get_criterion(
+                    criterion=self.criterion,
+                    compute=True,
+                    raise_on_failure=False,
+                )
+                is not None
+            ):
+                self.logger.log(
+                    message=(
+                        "Unexpected calibration result already available for "
+                        f"model: `{model.get_hash()}`. Skipping "
+                        "calibration."
+                    ),
+                    level="warning",
+                )
+            else:
+                self.new_model_problem(model=model)
+
+            calibrated_models.append(model)
             method_signal = self.handle_calibrated_model(
-                model=candidate_model,
-                predecessor_model=predecessor_model,
+                model=model,
+                predecessor_model=iteration[PREDECESSOR_MODEL],
             )
             if method_signal.proceed == MethodSignalProceed.STOP:
                 break
 
-        self.calibrated_models.update(newly_calibrated_models)
+        iteration_results = petab_select.ui.end_iteration(
+            problem=self.petab_select_problem,
+            candidate_space=iteration[CANDIDATE_SPACE],
+            calibrated_models=calibrated_models,
+        )
+
+        self.calibrated_models += iteration_results[MODELS]
 
-        return predecessor_model, newly_calibrated_models
+        return iteration_results[MODELS]
 
     def handle_calibrated_model(
         self,
         model: Model,
-        predecessor_model: Optional[Model],
+        predecessor_model: Model,
     ) -> MethodSignal:
         """Handle the model selection method, given a new calibrated model.
 
@@ -432,8 +452,7 @@ def handle_calibrated_model(
 
         # Reject the model if it doesn't improve on the predecessor model.
         if (
-            predecessor_model is not None
-            and predecessor_model != VIRTUAL_INITIAL_MODEL
+            predecessor_model.hash != VIRTUAL_INITIAL_MODEL.hash
             and not self.model1_gt_model0(
                 model1=model, model0=predecessor_model
             )
@@ -527,7 +546,9 @@ def new_model_problem(
             predecessor_model = self.calibrated_models[
                 model.predecessor_model_hash
             ]
-            if str(model.petab_yaml) != str(predecessor_model.petab_yaml):
+            if str(model.model_subspace_petab_yaml) != str(
+                predecessor_model.model_subspace_petab_yaml
+            ):
                 raise NotImplementedError(
                     "The PEtab YAML files differ between the model and its "
                     "predecessor model. This may imply different (fixed union "
diff --git a/pypesto/select/misc.py b/pypesto/select/misc.py
index 99fb5fae5..870e3339c 100644
--- a/pypesto/select/misc.py
+++ b/pypesto/select/misc.py
@@ -2,6 +2,7 @@
 
 import logging
 from collections.abc import Iterable
+from pathlib import Path
 
 import pandas as pd
 import petab.v1 as petab
@@ -10,7 +11,13 @@
 from petab_select import Model, parameter_string_to_value
 from petab_select.constants import PETAB_PROBLEM
 
+from ..history import Hdf5History
 from ..objective import Objective
+from ..optimize import Optimizer
+from ..optimize.ess import (
+    SacessOptimizer,
+    get_default_ess_options,
+)
 from ..petab import PetabImporter
 from ..problem import Problem
 
@@ -164,3 +171,78 @@ def correct_x_guesses(
                 corrected_x_guess.append(corrected_value)
             corrected_x_guesses.append(corrected_x_guess)
     return corrected_x_guesses
+
+
+class SacessMinimizeMethod:
+    """Create a minimize method for SaCeSS that adapts to each problem.
+
+    When a pyPESTO SaCeSS optimizer is created, it takes the problem
+    dimension as input. Hence, an optimizer needs to be constructed for
+    each problem. Objects of this class act like a minimize method for model
+    selection, but a new problem-specific SaCeSS optimizer will be created
+    every time a model is minimized.
+
+    Instance attributes correspond to pyPESTO's SaCeSS optimizer, and are
+    documented there. Extra keyword arguments supplied to the constructor
+    will be passed on to the constructor of the SaCeSS optimizer, for example,
+    `max_walltime_s` can be specified in this way. If specified, `tmpdir` will
+    be treated as a parent directory.
+    """
+
+    def __init__(
+        self,
+        num_workers: int,
+        local_optimizer: Optimizer = None,
+        tmpdir: str | Path | None = None,
+        save_history: bool = False,
+        **optimizer_kwargs,
+    ):
+        """Construct a minimize-like object."""
+        self.num_workers = num_workers
+        self.local_optimizer = local_optimizer
+        self.optimizer_kwargs = optimizer_kwargs
+        self.save_history = save_history
+
+        self.tmpdir = tmpdir
+        if self.tmpdir is not None:
+            self.tmpdir = Path(self.tmpdir)
+
+        if self.save_history and self.tmpdir is None:
+            self.tmpdir = Path.cwd() / "sacess_tmpdir"
+
+    def __call__(self, problem: Problem, model_hash: str, **minimize_options):
+        """Create then run a problem-specific sacess optimizer."""
+        # create optimizer
+        ess_init_args = get_default_ess_options(
+            num_workers=self.num_workers,
+            dim=problem.dim,
+        )
+        for x in ess_init_args:
+            x["local_optimizer"] = self.local_optimizer
+        model_tmpdir = None
+        if self.tmpdir is not None:
+            model_tmpdir = self.tmpdir / model_hash
+            model_tmpdir.mkdir(exist_ok=False, parents=True)
+
+        ess = SacessOptimizer(
+            ess_init_args=ess_init_args,
+            tmpdir=model_tmpdir,
+            **self.optimizer_kwargs,
+        )
+
+        # optimize
+        result = ess.minimize(
+            problem=problem,
+            **minimize_options,
+        )
+
+        if self.save_history:
+            history_dir = model_tmpdir / "history"
+            history_dir.mkdir(exist_ok=False, parents=True)
+            for history_index, history in enumerate(ess.histories):
+                Hdf5History.from_history(
+                    other=history,
+                    file=history_dir / (str(history_index) + ".hdf5"),
+                    id_=history_index,
+                )
+        return result
diff --git a/pypesto/select/model_problem.py b/pypesto/select/model_problem.py
index cef94c0e3..3872cd9d5 100644
--- a/pypesto/select/model_problem.py
+++ b/pypesto/select/model_problem.py
@@ -9,12 +9,15 @@
 from ..optimize import minimize
 from ..problem import Problem
 from ..result import OptimizerResult, Result
-from .misc import model_to_pypesto_problem
+from .misc import SacessMinimizeMethod, model_to_pypesto_problem
 
 OBJECTIVE_CUSTOMIZER_TYPE = Callable[[ObjectiveBase], None]
 TYPE_POSTPROCESSOR = Callable[["ModelProblem"], None]  # noqa: F821
 
 
+__all__ = ["ModelProblem"]
+
+
 class ModelProblem:
     """Handles all required calibration tasks on a model.
 
@@ -142,9 +145,16 @@ def __init__(
     def minimize(self) -> Result:
         """Optimize the model.
 
-        Returns:
+        Returns
+        -------
             The optimization result.
         """
+        if isinstance(self.minimize_method, SacessMinimizeMethod):
+            return self.minimize_method(
+                self.pypesto_problem,
+                model_hash=self.model.hash,
+                **self.minimize_options,
+            )
         return self.minimize_method(
             self.pypesto_problem,
             **self.minimize_options,
@@ -195,6 +205,12 @@ def create_fake_pypesto_result_from_fval(
     ----------
     fval:
         The objective function value.
+    evaluation_time:
+        CPU time taken to compute the objective function value.
+
+    Returns
+    -------
+    The dummy result.
     """
     result = Result()
 
diff --git a/pypesto/select/postprocessors.py b/pypesto/select/postprocessors.py
index ae8a9d587..b975065a5 100644
--- a/pypesto/select/postprocessors.py
+++ b/pypesto/select/postprocessors.py
@@ -1,10 +1,11 @@
 """Process a model selection :class:`ModelProblem` after calibration."""
 
+import warnings
 from pathlib import Path
 
 import matplotlib.pyplot as plt
 import numpy as np
-from petab_select.constants import ESTIMATE, TYPE_PATH, Criterion
+from petab_select.constants import TYPE_PATH, Criterion
 
 from .. import store, visualize
 from .model_problem import TYPE_POSTPROCESSOR, ModelProblem
@@ -48,7 +49,7 @@ def waterfall_plot_postprocessor(
     See :meth:`save_postprocessor` for usage hints and argument documentation.
     """
     visualize.waterfall(problem.minimize_result)
-    plot_output_path = Path(output_path) / (problem.model.model_hash + ".png")
+    plot_output_path = Path(output_path) / (str(problem.model.hash) + ".png")
     plt.savefig(str(plot_output_path))
 
 
@@ -85,7 +86,7 @@ def save_postprocessor(
     """
     stem = problem.model.model_id
     if use_model_hash:
-        stem = problem.model.get_hash()
+        stem = str(problem.model.hash)
     store.write_result(
         problem.minimize_result,
         Path(output_path) / (stem + ".hdf5"),
@@ -109,10 +110,15 @@ def model_id_binary_postprocessor(problem: ModelProblem):
     problem:
         A model selection :class:`ModelProblem` that has been optimized.
     """
-    model_id = "M_"
-    for parameter_value in problem.model.parameters.values():
-        model_id += "1" if parameter_value == ESTIMATE else "0"
-    problem.model.model_id = model_id
+    warnings.warn(
+        (
+            "This is obsolete. Model IDs are by default the model hash, which "
+            "is now similar to the binary string."
+        ),
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    problem.model.model_id = str(problem.model.hash)
 
 
 def report_postprocessor(
diff --git a/pypesto/select/problem.py b/pypesto/select/problem.py
index 9692890ef..a0034e218 100644
--- a/pypesto/select/problem.py
+++ b/pypesto/select/problem.py
@@ -1,11 +1,10 @@
 """Manage all components of a pyPESTO model selection problem."""
 
 import warnings
-from collections.abc import Iterable
 from typing import Any, Optional
 
 import petab_select
-from petab_select import Model
+from petab_select import Model, Models
 
 from .method import MethodCaller
 from .model_problem import TYPE_POSTPROCESSOR, ModelProblem  # noqa: F401
@@ -20,18 +19,18 @@ class Problem:
 
     Attributes
     ----------
-    calibrated_models:
-        Storage for all calibrated models. A dictionary, where keys are
-        model hashes, and values are :class:`petab_select.Model` objects.
-    newly_calibrated_models:
-        Storage for models that were calibrated in the previous iteration of
-        model selection. Same type as ``calibrated_models``.
-    method_caller:
-        A :class:`MethodCaller`, used to run a single iteration of a model
+    calibrated_models : petab_select.Models
+        All calibrated models.
+    newly_calibrated_models : petab_select.Models
+        All models that were calibrated in the latest iteration of model
+        selection.
+    method_caller : pypesto.select.method.MethodCaller
+        Used to run a single iteration of a model
         selection method.
-    model_problem_options:
-        Passed to the constructor of :class:``ModelProblem``.
-    petab_select_problem:
+    model_problem_options : dict[str, typing.Any]
+        Keyword arguments, passed to the constructor of
+        :class:`ModelProblem`.
+    petab_select_problem : petab_select.Problem
         A PEtab Select problem.
     """
 
@@ -60,8 +59,8 @@ def __init__(
             self.model_problem_options["postprocessor"] = model_postprocessor
 
         self.set_state(
-            calibrated_models={},
-            newly_calibrated_models={},
+            calibrated_models=Models(),
+            newly_calibrated_models=Models(),
         )
 
         # TODO default caller, based on petab_select.Problem
@@ -90,8 +89,8 @@ def create_method_caller(self, **kwargs) -> MethodCaller:
 
     def set_state(
         self,
-        calibrated_models: dict[str, Model],
-        newly_calibrated_models: dict[str, Model],
+        calibrated_models: Models,
+        newly_calibrated_models: Models,
     ) -> None:
         """Set the state of the problem.
 
@@ -102,7 +101,7 @@ def set_state(
 
     def update_with_newly_calibrated_models(
         self,
-        newly_calibrated_models: Optional[dict[str, Model]] = None,
+        newly_calibrated_models: Optional[Models] = None,
     ) -> None:
         """Update the state of the problem with newly calibrated models.
 
@@ -111,7 +110,7 @@ def update_with_newly_calibrated_models(
                 See attributes of :class:`Problem`.
         """
         self.newly_calibrated_models = newly_calibrated_models
-        self.calibrated_models.update(self.newly_calibrated_models)
+        self.calibrated_models += self.newly_calibrated_models
 
     def handle_select_kwargs(
         self,
@@ -132,7 +131,7 @@ def handle_select_kwargs(
     def select(
         self,
         **kwargs,
-    ) -> tuple[Model, dict[str, Model], dict[str, Model]]:
+    ) -> tuple[Model, Models]:
         """Run a single iteration of a model selection algorithm.
 
         The result is the selected model for the current run, independent of
@@ -142,83 +141,60 @@ def select(
 
         Returns
         -------
-        A 3-tuple, with the following values:
+        A 2-tuple, with the following values from this iteration:
 
-           1. the best model;
-           2. all candidate models in this iteration, as a `dict` with
-              model hashes as keys and models as values; and
-           3. all candidate models from all iterations, as a `dict` with
-              model hashes as keys and models as values.
+           1. the best model; and
+           2. all models.
         """
-        # TODO move some options to PEtab Select? e.g.:
-        # - startpoint_latest_mle
-        # - select_first_improvement
         self.handle_select_kwargs(kwargs)
-        # TODO handle bidirectional
         method_caller = self.create_method_caller(**kwargs)
-        previous_best_model, newly_calibrated_models = method_caller(
-            # TODO add predecessor model to state
-            newly_calibrated_models=self.newly_calibrated_models,
-        )
+        newly_calibrated_models = method_caller()
 
         self.update_with_newly_calibrated_models(
             newly_calibrated_models=newly_calibrated_models,
         )
 
-        best_model = petab_select.ui.best(
+        best_model = petab_select.ui.get_best(
             problem=self.petab_select_problem,
-            models=self.newly_calibrated_models.values(),
+            models=self.newly_calibrated_models,
             criterion=method_caller.criterion,
         )
 
-        # TODO: Reconsider return value. `result` could be stored in attribute,
-        # then no values need to be returned, and users can request values
-        # manually.
         return best_model, newly_calibrated_models
 
     def select_to_completion(
         self,
         **kwargs,
-    ) -> list[Model]:
-        """Run an algorithm until an exception `StopIteration` is raised.
+    ) -> Models:
+        """Perform model selection until the method terminates.
 
         ``kwargs`` are passed to the :class:`MethodCaller` constructor.
 
-        An exception ``StopIteration`` is raised by
-        :meth:`pypesto.select.method.MethodCaller.__call__` when no candidate models
-        are found.
-
         Returns
         -------
-        The best models (the best model at each iteration).
+        All models.
         """
-        best_models = []
+        calibrated_models = Models(problem=self.petab_select_problem)
         self.handle_select_kwargs(kwargs)
         method_caller = self.create_method_caller(**kwargs)
 
         while True:
             try:
-                previous_best_model, newly_calibrated_models = method_caller(
-                    newly_calibrated_models=self.newly_calibrated_models,
-                )
+                iteration_calibrated_models = method_caller()
                 self.update_with_newly_calibrated_models(
-                    newly_calibrated_models=newly_calibrated_models,
+                    newly_calibrated_models=iteration_calibrated_models,
                 )
-                best_models.append(previous_best_model)
+                calibrated_models += iteration_calibrated_models
             except StopIteration:
-                previous_best_model = (
-                    method_caller.candidate_space.predecessor_model
-                )
-                best_models.append(previous_best_model)
                 break
 
-        return best_models
+        return calibrated_models
 
     # TODO method that automatically generates initial models, for a specific
     # number of starts. TODO parallelise?
     def multistart_select(
         self,
-        predecessor_models: Iterable[Model] = None,
+        predecessor_models: Models = None,
         **kwargs,
     ) -> tuple[Model, list[Model]]:
         """Run an algorithm multiple times, with different predecessor models.
@@ -247,33 +223,18 @@ def multistart_select(
         """
         self.handle_select_kwargs(kwargs)
         model_lists = []
-        newly_calibrated_models_list = [
-            self.newly_calibrated_models for _ in predecessor_models
-        ]
 
-        method_caller = self.create_method_caller(**kwargs)
-        for start_index, predecessor_model in enumerate(predecessor_models):
-            method_caller.candidate_space.previous_predecessor_model = (
-                predecessor_model
-            )
-            (
-                best_model,
-                newly_calibrated_models_list[start_index],
-            ) = method_caller(
-                newly_calibrated_models=newly_calibrated_models_list[
-                    start_index
-                ],
-            )
-            self.calibrated_models.update(
-                newly_calibrated_models_list[start_index]
+        for predecessor_model in predecessor_models:
+            method_caller = self.create_method_caller(
+                **(kwargs | {"predecessor_model": predecessor_model})
             )
+            models = method_caller()
+            self.calibrated_models += models
 
-            model_lists.append(
-                newly_calibrated_models_list[start_index].values()
-            )
+            model_lists.append(models)
             method_caller.candidate_space.reset()
 
-        best_model = petab_select.ui.best(
+        best_model = petab_select.ui.get_best(
             problem=method_caller.petab_select_problem,
             models=[model for models in model_lists for model in models],
             criterion=method_caller.criterion,
diff --git a/pypesto/store/read_from_hdf5.py b/pypesto/store/read_from_hdf5.py
index 8f011d114..6c1603bfe 100644
--- a/pypesto/store/read_from_hdf5.py
+++ b/pypesto/store/read_from_hdf5.py
@@ -144,6 +144,7 @@ def read(self, objective: ObjectiveBase = None) -> Problem:
         problem.x_fixed_vals = [float(val) for val in problem.x_fixed_vals]
         problem.x_fixed_indices = [int(ix) for ix in problem.x_fixed_indices]
         problem.x_names = [name.decode() for name in problem.x_names]
+        problem.x_scales = [scale.decode() for scale in problem.x_scales]
 
         return problem
 
diff --git a/pypesto/store/save_to_hdf5.py b/pypesto/store/save_to_hdf5.py
index 3804bfbbb..a4ac4e703 100644
--- a/pypesto/store/save_to_hdf5.py
+++ b/pypesto/store/save_to_hdf5.py
@@ -1,23 +1,22 @@
 """Include functions for saving various results to hdf5."""
+from __future__ import annotations
 
 import logging
 import os
 from numbers import Integral
 from pathlib import Path
-from typing import Union
 
 import h5py
 import numpy as np
 
+from .. import OptimizeResult, OptimizerResult
 from ..result import ProfilerResult, Result, SampleResult
 from .hdf5 import write_array, write_float_array
 
 logger = logging.getLogger(__name__)
 
 
-def check_overwrite(
-    f: Union[h5py.File, h5py.Group], overwrite: bool, target: str
-):
+def check_overwrite(f: h5py.File | h5py.Group, overwrite: bool, target: str):
     """
     Check whether target already exists.
 
@@ -36,7 +35,7 @@ def check_overwrite(
             del f[target]
         else:
             raise RuntimeError(
-                f"File `{f.filename}` already exists and contains "
+                f"File `{f.file.filename}` already exists and contains "
                 f"information about {target} result. "
                 f"If you wish to overwrite the file, set "
                 f"`overwrite=True`."
@@ -53,7 +52,7 @@ class ProblemHDF5Writer:
         HDF5 result file name
     """
 
-    def __init__(self, storage_filename: Union[str, Path]):
+    def __init__(self, storage_filename: str | Path):
         """
         Initialize writer.
 
@@ -106,7 +105,7 @@ class OptimizationResultHDF5Writer:
         HDF5 result file name
     """
 
-    def __init__(self, storage_filename: Union[str, Path]):
+    def __init__(self, storage_filename: str | Path):
         """
         Initialize Writer.
 
@@ -117,32 +116,76 @@ def __init__(self, storage_filename: Union[str, Path]):
         """
         self.storage_filename = str(storage_filename)
 
-    def write(self, result: Result, overwrite=False):
-        """Write HDF5 result file from pyPESTO result object."""
-        # Create destination directory
-        if isinstance(self.storage_filename, str):
-            basedir = os.path.dirname(self.storage_filename)
-            if basedir:
-                os.makedirs(basedir, exist_ok=True)
+    def write(
+        self,
+        result: Result
+        | OptimizeResult
+        | OptimizerResult
+        | list[OptimizerResult],
+        overwrite=False,
+    ):
+        """Write HDF5 result file from pyPESTO result object.
+
+        Parameters
+        ----------
+        result: Result to be saved.
+        overwrite: Boolean, whether already existing results should be
+            overwritten. This applies to the whole list of results, not only to
+            individual results. See :meth:`write_optimizer_result` for
+            incrementally writing a sequence of `OptimizerResult`.
+        """
+        Path(self.storage_filename).parent.mkdir(parents=True, exist_ok=True)
+
+        if isinstance(result, Result):
+            results = result.optimize_result.list
+        elif isinstance(result, OptimizeResult):
+            results = result.list
+        elif isinstance(result, list):
+            results = result
+        elif isinstance(result, OptimizerResult):
+            results = [result]
+        else:
+            raise ValueError(f"Unsupported type for `result`: {type(result)}.")
 
         with h5py.File(self.storage_filename, "a") as f:
             check_overwrite(f, overwrite, "optimization")
             optimization_grp = f.require_group("optimization")
-            # settings =
-            # optimization_grp.create_dataset("settings", settings, dtype=)
             results_grp = optimization_grp.require_group("results")
 
-            for start in result.optimize_result.list:
-                start_id = start["id"]
-                start_grp = results_grp.require_group(start_id)
-                for key in start.keys():
-                    if key == "history":
-                        continue
-                    if isinstance(start[key], np.ndarray):
-                        write_array(start_grp, key, start[key])
-                    elif start[key] is not None:
-                        start_grp.attrs[key] = start[key]
-                f.flush()
+            for start in results:
+                self._do_write_optimizer_result(start, results_grp, overwrite)
+
+    def write_optimizer_result(
+        self, result: OptimizerResult, overwrite: bool = False
+    ):
+        """Write HDF5 result file from pyPESTO result object.
+
+        Parameters
+        ----------
+        result: Result to be saved.
+        overwrite: Boolean, whether already existing results with the same ID
+            should be overwritten.s
+        """
+        Path(self.storage_filename).parent.mkdir(parents=True, exist_ok=True)
+
+        with h5py.File(self.storage_filename, "a") as f:
+            results_grp = f.require_group("optimization/results")
+            self._do_write_optimizer_result(result, results_grp, overwrite)
+
+    def _do_write_optimizer_result(
+        self, result: OptimizerResult, g: h5py.Group = None, overwrite=False
+    ):
+        """Write an OptimizerResult to the given group."""
+        sub_group_id = result["id"]
+        check_overwrite(g, overwrite, sub_group_id)
+        start_grp = g.require_group(sub_group_id)
+        for key in result.keys():
+            if key == "history":
+                continue
+            if isinstance(result[key], np.ndarray):
+                write_array(start_grp, key, result[key])
+            elif result[key] is not None:
+                start_grp.attrs[key] = result[key]
 
 
 class SamplingResultHDF5Writer:
@@ -155,7 +198,7 @@ class SamplingResultHDF5Writer:
         HDF5 result file name
     """
 
-    def __init__(self, storage_filename: Union[str, Path]):
+    def __init__(self, storage_filename: str | Path):
         """
         Initialize Writer.
 
@@ -208,7 +251,7 @@ class ProfileResultHDF5Writer:
         HDF5 result file name
     """
 
-    def __init__(self, storage_filename: Union[str, Path]):
+    def __init__(self, storage_filename: str | Path):
         """
         Initialize Writer.
 
@@ -241,7 +284,7 @@ def write(self, result: Result, overwrite: bool = False):
 
     @staticmethod
     def _write_profiler_result(
-        parameter_profile: Union[ProfilerResult, None], result_grp: h5py.Group
+        parameter_profile: ProfilerResult | None, result_grp: h5py.Group
     ) -> None:
         """Write a single ProfilerResult to hdf5.
 
@@ -267,7 +310,7 @@ def _write_profiler_result(
 
 def write_result(
     result: Result,
-    filename: Union[str, Path],
+    filename: str | Path,
     overwrite: bool = False,
     problem: bool = True,
     optimize: bool = False,
diff --git a/pypesto/version.py b/pypesto/version.py
index 6b27eeebf..86716a713 100644
--- a/pypesto/version.py
+++ b/pypesto/version.py
@@ -1 +1 @@
-__version__ = "0.5.4"
+__version__ = "0.5.5"
diff --git a/pypesto/visualize/observable_mapping.py b/pypesto/visualize/observable_mapping.py
index ec378fd1c..250a82dd5 100644
--- a/pypesto/visualize/observable_mapping.py
+++ b/pypesto/visualize/observable_mapping.py
@@ -129,7 +129,7 @@ def visualize_estimated_observable_mapping(
         n_axes = n_relative_observables + n_semiquant_observables
         n_rows = int(np.ceil(np.sqrt(n_axes)))
         n_cols = int(np.ceil(n_axes / n_rows))
-        _, axes = plt.subplots(n_rows, n_cols, **kwargs)
+        _, axes = plt.subplots(n_rows, n_cols, squeeze=False, **kwargs)
         axes = axes.flatten()
 
     # Plot the estimated observable mapping for relative observables.
@@ -246,8 +246,7 @@ def plot_linear_observable_mappings_from_pypesto_result(
             n_cols = int(np.ceil(n_relative_observables / n_rows))
 
             # Make as many subplots as there are relative observables
-            _, axes = plt.subplots(n_rows, n_cols, **kwargs)
-
+            _, axes = plt.subplots(n_rows, n_cols, squeeze=False, **kwargs)
             # Flatten the axes array
             axes = axes.flatten()
 
@@ -590,8 +589,7 @@ def plot_splines_from_inner_result(
             n_cols = int(np.ceil(n_groups / n_rows))
 
             # Make as many subplots as there are groups
-            _, axes = plt.subplots(n_rows, n_cols, **kwargs)
-
+            _, axes = plt.subplots(n_rows, n_cols, squeeze=False, **kwargs)
             # Flatten the axes array
             axes = axes.flatten()
 
diff --git a/pypesto/visualize/optimizer_history.py b/pypesto/visualize/optimizer_history.py
index 474c030ff..dba5b0200 100644
--- a/pypesto/visualize/optimizer_history.py
+++ b/pypesto/visualize/optimizer_history.py
@@ -1,4 +1,5 @@
 import logging
+import warnings
 from collections.abc import Iterable
 from typing import Optional, Union
 
@@ -501,6 +502,8 @@ def sacess_history(
     The plot axes. `ax` or a new axes if `ax` was `None`.
     """
     ax = ax or plt.subplot()
+    if len(histories) == 0:
+        warnings.warn("No histories to plot.", stacklevel=2)
 
     # plot overall minimum
     # merge results
@@ -530,6 +533,9 @@ def sacess_history(
     # plot steps of individual workers
     for worker_idx, history in enumerate(histories):
         x, y = history.get_time_trace(), history.get_fval_trace()
+        if len(x) == 0:
+            warnings.warn(f"No trace for worker #{worker_idx}.", stacklevel=2)
+            continue
         # extend from last decrease to last timepoint
         x = np.append(x, [np.max(t)])
         y = np.append(y, [np.min(y)])
diff --git a/pypesto/visualize/ordinal_categories.py b/pypesto/visualize/ordinal_categories.py
index b93e97f22..c8e81def5 100644
--- a/pypesto/visualize/ordinal_categories.py
+++ b/pypesto/visualize/ordinal_categories.py
@@ -612,26 +612,18 @@ def _get_data_for_plotting(
 
 def _get_default_axes(n_groups, **kwargs):
     """Return a list of axes with the default layout."""
-    # If there is only one group, make a figure with only one plot
-    if n_groups == 1:
-        # Make figure with only one plot
-        fig, ax = plt.subplots(1, 1, **kwargs)
+    # Choose number of rows and columns to be used for the subplots
+    n_rows = int(np.ceil(np.sqrt(n_groups)))
+    n_cols = int(np.ceil(n_groups / n_rows))
 
-        axes = [ax]
-    # If there are multiple groups, make a figure with multiple plots
-    else:
-        # Choose number of rows and columns to be used for the subplots
-        n_rows = int(np.ceil(np.sqrt(n_groups)))
-        n_cols = int(np.ceil(n_groups / n_rows))
-
-        # Make as many subplots as there are groups
-        fig, axes = plt.subplots(n_rows, n_cols, **kwargs)
+    # Make as many subplots as there are groups
+    fig, axes = plt.subplots(n_rows, n_cols, squeeze=False, **kwargs)
 
-        # Increase the spacing between the subplots
-        fig.subplots_adjust(hspace=0.35, wspace=0.25)
+    # Increase the spacing between the subplots
+    fig.subplots_adjust(hspace=0.35, wspace=0.25)
 
-        # Flatten the axes array
-        axes = axes.flatten()
+    # Flatten the axes array
+    axes = axes.flatten()
     return axes
 
 
diff --git a/pypesto/visualize/select.py b/pypesto/visualize/select.py
index 0a8efde34..e77126184 100644
--- a/pypesto/visualize/select.py
+++ b/pypesto/visualize/select.py
@@ -1,20 +1,18 @@
-"""Visualization routines for model selection with pyPESTO."""
+"""Deprecated. Use ``petab_select.plot`` methods instead."""
 
 
+import warnings
+
 import matplotlib.pyplot as plt
-import networkx as nx
-from petab_select import Model
-from petab_select.constants import VIRTUAL_INITIAL_MODEL, Criterion
+import petab_select.plot
+from petab_select import Criterion, Model, Models
 
 from .. import select as pypesto_select
 
-# TODO move methods to petab_select
-RELATIVE_LABEL_FONTSIZE = -2
-
 
 def default_label_maker(model: Model) -> str:
     """Create a model label, for plotting."""
-    return model.model_hash[:4]
+    return str(model.hash)[:4]
 
 
 # FIXME supply the problem to automatically detect the correct criterion?
@@ -27,89 +25,19 @@ def plot_selected_models(
     labels: dict[str, str] = None,
     ax: plt.Axes = None,
 ) -> plt.Axes:
-    """Plot criterion for calibrated models.
-
-    Parameters
-    ----------
-    selected_models:
-        First result in tuple returned by `ModelSelector.select()`.
-    criterion:
-        Key of criterion value in `selected_models` to be plotted.
-    relative:
-        If `True`, criterion values are plotted relative to the lowest
-        criterion value. TODO is the lowest value, always the best? May not
-        be for different criterion.
-    fz:
-        fontsize
-    size:
-        Figure size in inches.
-    labels:
-        A dictionary of model labels, where keys are model hashes, and
-        values are model labels, for plotting. If a model label is not
-        provided, it will be generated from its model ID.
-    ax:
-        The axis to use for plotting.
-
-    Returns
-    -------
-    matplotlib.pyplot.Axes
-        The plot axis.
-    """
-    zero = 0
-    if relative:
-        zero = selected_models[-1].get_criterion(criterion)
-
-    if labels is None:
-        labels = {}
-
-    # FIGURE
-    if ax is None:
-        _, ax = plt.subplots(figsize=size)
-    linewidth = 3
-
-    selected_models = [
-        model for model in selected_models if model != VIRTUAL_INITIAL_MODEL
-    ]
-
-    criterion_values = {
-        labels.get(
-            model.get_hash(), default_label_maker(model)
-        ): model.get_criterion(criterion) - zero
-        for model in selected_models
-    }
-
-    ax.plot(
-        criterion_values.keys(),
-        criterion_values.values(),
-        linewidth=linewidth,
-        color="lightgrey",
-        # edgecolor='k'
-    )
-
-    ax.get_xticks()
-    ax.set_xticks(list(range(len(criterion_values))))
-    ax.set_ylabel(
-        criterion + ("(relative)" if relative else "(absolute)"), fontsize=fz
-    )
-    # could change to compared_model_ids, if all models are plotted
-    ax.set_xticklabels(
-        criterion_values.keys(),
-        fontsize=fz + RELATIVE_LABEL_FONTSIZE,
+    """Use `petab_select.plot.line_best_by_iteration`` instead. Deprecated."""
+    warnings.warn(
+        "Use `petab_select.plot.line_best_by_iteration` instead.",
+        DeprecationWarning,
+        stacklevel=2,
     )
-    for tick in ax.yaxis.get_major_ticks():
-        tick.label1.set_fontsize(fz + RELATIVE_LABEL_FONTSIZE)
-    ytl = ax.get_yticks()
-    ax.set_ylim([min(ytl), max(ytl)])
-    # removing top and right borders
-    ax.spines["top"].set_visible(False)
-    ax.spines["right"].set_visible(False)
-    return ax
-
-
-def plot_history_digraph(*args, **kwargs):
-    """Ignore me. Renamed to `plot_calibrated_models_digraph`."""
-    raise NotImplementedError(
-        "This method is now `plot_calibrated_models_digraph`."
+    return petab_select.plot.line_best_by_iteration(
+        models=Models(selected_models),
+        criterion=criterion,
+        relative=relative,
+        fz=fz,
+        labels=labels,
+        ax=ax,
     )
 
 
@@ -123,82 +51,17 @@ def plot_calibrated_models_digraph(
     labels: dict[str, str] = None,
     ax: plt.Axes = None,
 ) -> plt.Axes:
-    """Plot all calibrated models in the model space, as a directed graph.
-
-    TODO replace magic numbers with options/constants
-
-    Parameters
-    ----------
-    problem:
-        The pyPESTO Select problem.
-    calibrated_models:
-        The models calibrated during model selection, in the format of
-        `pypesto.select.Problem.calibrated_models`.
-    criterion:
-        The criterion.
-    optimal_distance:
-        See docs for argument `k` in `networkx.spring_layout`.
-    relative:
-        If `True`, criterion values are offset by the minimum criterion
-        value.
-    options:
-        Additional keyword arguments for `networkx.draw_networkx`.
-    labels:
-        A dictionary of model labels, where keys are model hashes, and
-        values are model labels, for plotting. If a model label is not
-        provided, it will be generated from its model ID.
-    ax:
-        The axis to use for plotting.
-
-    Returns
-    -------
-    matplotlib.pyplot.Axes
-        The plot axis.
-    """
-    if criterion is None:
-        criterion = problem.petab_select_problem.criterion
-    if calibrated_models is None:
-        calibrated_models = problem.calibrated_models
-    if labels is None:
-        labels = {}
-
-    G = nx.DiGraph()
-    edges = []
-    for model in calibrated_models.values():
-        predecessor_model_hash = model.predecessor_model_hash
-        if predecessor_model_hash is not None:
-            from_ = labels.get(predecessor_model_hash, predecessor_model_hash)
-            # may only not be the case for
-            # COMPARED_MODEL_ID == INITIAL_VIRTUAL_MODEL
-            if predecessor_model_hash in calibrated_models:
-                predecessor_model = calibrated_models[predecessor_model_hash]
-                from_ = labels.get(
-                    predecessor_model.get_hash(),
-                    default_label_maker(predecessor_model),
-                )
-        else:
-            raise NotImplementedError(
-                "Plots for models with `None` as their predecessor model are "
-                "not yet implemented."
-            )
-            from_ = "None"
-        to = labels.get(model.get_hash(), default_label_maker(model))
-        edges.append((from_, to))
-
-    G.add_edges_from(edges)
-    default_options = {
-        "node_color": "lightgrey",
-        "arrowstyle": "-|>",
-        "node_shape": "s",
-        "node_size": 2500,
-    }
-    if options is not None:
-        default_options.update(options)
-
-    if ax is None:
-        _, ax = plt.subplots(figsize=(12, 12))
-
-    pos = nx.spring_layout(G, k=optimal_distance, iterations=20)
-    nx.draw_networkx(G, pos, ax=ax, **default_options)
-
-    return ax
+    """Use `petab_select.plot.graph_history`` instead. Deprecated."""
+    warnings.warn(
+        "Use `petab_select.plot.graph_history` instead.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return petab_select.plot.graph_history(
+        models=calibrated_models.values(),
+        criterion=criterion,
+        draw_networkx_kwargs=options,
+        relative=relative,
+        labels=labels,
+        ax=ax,
+    )
diff --git a/pytest.ini b/pytest.ini
index 7571948eb..82ae58578 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,3 +1,5 @@
 [pytest]
+addopts = "--doctest-modules"
 filterwarnings =
     ignore:.*inspect.getargspec\(\) is deprecated.*:DeprecationWarning
+norecursedirs = amici_models
diff --git a/setup.cfg b/setup.cfg
index 4fc1a4e1b..071c44248 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -105,7 +105,8 @@ ipopt =
 dlib =
     dlib >= 19.19.0
 nlopt =
-    nlopt >= 2.6.2
+    # != 2.9.0: https://github.com/stevengj/nlopt/issues/575
+    nlopt >= 2.6.2, != 2.9.0
 pyswarm =
     pyswarm >= 0.6
 cma =
@@ -163,10 +164,7 @@ example =
     ipywidgets >= 8.1.5
     benchmark_models_petab @ git+https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab.git@master#subdirectory=src/python
 select =
-    # Remove when vis is moved to PEtab Select version
-    networkx >= 2.5.1
-    # End remove
-    petab-select >= 0.1.12
+    petab-select[plot] >= 0.3.3
 test =
     pytest >= 5.4.3
     pytest-cov >= 2.10.0
diff --git a/test/base/test_engine.py b/test/base/test_engine.py
index 6db8e79c3..1481c4ca1 100644
--- a/test/base/test_engine.py
+++ b/test/base/test_engine.py
@@ -86,7 +86,14 @@ def test_deepcopy_objective():
         )
     )
     factory = petab_importer.create_objective_creator()
-    objective = factory.create_objective()
+    amici_model = factory.create_model()
+    amici_model.setSteadyStateSensitivityMode(
+        amici.SteadyStateSensitivityMode.integrateIfNewtonFails
+    )
+    amici_model.setSteadyStateComputationMode(
+        amici.SteadyStateComputationMode.integrateIfNewtonFails
+    )
+    objective = factory.create_objective(model=amici_model)
 
     objective.amici_solver.setSensitivityMethod(
         amici.SensitivityMethod_adjoint
diff --git a/test/base/test_store.py b/test/base/test_store.py
index d90a8030d..840440c70 100644
--- a/test/base/test_store.py
+++ b/test/base/test_store.py
@@ -1,9 +1,11 @@
 """Test the `pypesto.store` module."""
 
 import os
-import tempfile
+from pathlib import Path
+from tempfile import TemporaryDirectory
 
 import numpy as np
+import pytest
 import scipy.optimize as so
 
 import pypesto
@@ -52,7 +54,7 @@
 
 def test_storage_opt_result():
     minimize_result = create_optimization_result()
-    with tempfile.TemporaryDirectory(dir=".") as tmpdirname:
+    with TemporaryDirectory(dir=".") as tmpdirname:
         result_file_name = os.path.join(tmpdirname, "a", "b", "result.h5")
         opt_result_writer = OptimizationResultHDF5Writer(result_file_name)
         opt_result_writer.write(minimize_result)
@@ -89,6 +91,27 @@ def test_storage_opt_result_update(hdf5_file):
                 assert opt_res[key] == read_result.optimize_result[i][key]
 
 
+def test_write_optimizer_results_incrementally():
+    """Test writing optimizer results incrementally to the same file."""
+    res = create_optimization_result()
+    res1, res2 = res.optimize_result.list[:2]
+
+    with TemporaryDirectory() as tmp_dir:
+        result_path = Path(tmp_dir, "result.h5")
+        writer = OptimizationResultHDF5Writer(result_path)
+        writer.write_optimizer_result(res1)
+        writer.write_optimizer_result(res2)
+        reader = OptimizationResultHDF5Reader(result_path)
+        read_res = reader.read()
+        assert len(read_res.optimize_result) == 2
+
+        # overwriting works
+        writer.write_optimizer_result(res1, overwrite=True)
+        # overwriting attempt fails without overwrite=True
+        with pytest.raises(RuntimeError):
+            writer.write_optimizer_result(res1)
+
+
 def test_storage_problem(hdf5_file):
     problem = create_problem()
     problem_writer = ProblemHDF5Writer(hdf5_file)
diff --git a/test/optimize/test_optimize.py b/test/optimize/test_optimize.py
index 48ebdea55..ce83bc069 100644
--- a/test/optimize/test_optimize.py
+++ b/test/optimize/test_optimize.py
@@ -18,7 +18,9 @@
 
 import pypesto
 import pypesto.optimize as optimize
+from pypesto import Objective
 from pypesto.optimize.ess import (
+    ESSExitFlag,
     ESSOptimizer,
     FunctionEvaluatorMP,
     RefSet,
@@ -308,6 +310,7 @@ def check_minimize(problem, library, solver, allow_failed_starts=False):
     ]:
         assert np.isfinite(result.optimize_result.list[0]["fval"])
         assert result.optimize_result.list[0]["x"] is not None
+        assert result.optimize_result.list[0]["optimizer"] is not None
 
 
 def test_trim_results(problem):
@@ -499,12 +502,14 @@ def test_ess(problem, local_optimizer, ess_type, request):
                 adaptation_sent_coeff=5,
             ),
         )
+
     else:
         raise ValueError(f"Unsupported ESS type {ess_type}.")
 
     res = ess.minimize(
         problem=problem,
     )
+    assert ess.exit_flag in (ESSExitFlag.MAX_TIME, ESSExitFlag.MAX_ITER)
     print("ESS result: ", res.summary())
 
     # best values roughly: cr: 4.701; rosen 7.592e-10
@@ -577,6 +582,40 @@ def test_ess_refset_repr():
     )
 
 
+class FunctionOrError:
+    """Callable that raises an error every nth invocation."""
+
+    def __init__(self, fun, error_frequency=100):
+        self.counter = 0
+        self.error_frequency = error_frequency
+        self.fun = fun
+
+    def __call__(self, *args, **kwargs):
+        self.counter += 1
+        if self.counter % self.error_frequency == 0:
+            raise RuntimeError("Intentional error.")
+        return self.fun(*args, **kwargs)
+
+
+def test_sacess_worker_error(capsys):
+    """Check that SacessOptimizer does not hang if an error occurs on a worker."""
+    objective = Objective(
+        fun=FunctionOrError(sp.optimize.rosen), grad=sp.optimize.rosen_der
+    )
+    problem = pypesto.Problem(
+        objective=objective, lb=0 * np.ones((1, 2)), ub=1 * np.ones((1, 2))
+    )
+    sacess = SacessOptimizer(
+        num_workers=2,
+        max_walltime_s=2,
+        sacess_loglevel=logging.DEBUG,
+        ess_loglevel=logging.DEBUG,
+    )
+    res = sacess.minimize(problem)
+    assert isinstance(res, pypesto.Result)
+    assert "Intentional error." in capsys.readouterr().err
+
+
 def test_scipy_integrated_grad():
     integrated = True
     obj = rosen_for_sensi(max_sensi_order=2, integrated=integrated)["obj"]
diff --git a/test/petab/test_amici_objective.py b/test/petab/test_amici_objective.py
index 274962fc1..5ff4512b4 100644
--- a/test/petab/test_amici_objective.py
+++ b/test/petab/test_amici_objective.py
@@ -86,12 +86,21 @@ def test_preeq_guesses():
     importer = pypesto.petab.PetabImporter.from_yaml(
         os.path.join(models.MODELS_DIR, model_name, model_name + ".yaml")
     )
-    problem = importer.create_problem()
-    obj = problem.objective
-    obj.amici_solver.setNewtonMaxSteps(0)
+    obj_creator = importer.create_objective_creator()
+    amici_model = obj_creator.create_model()
+    amici_model.setSteadyStateComputationMode(
+        amici.SteadyStateComputationMode.integrateIfNewtonFails
+    )
+    amici_model.setSteadyStateSensitivityMode(
+        amici.SteadyStateSensitivityMode.integrateIfNewtonFails
+    )
+    obj = obj_creator.create_objective(model=amici_model)
+    problem = importer.create_problem(objective=obj)
     obj.amici_model.setSteadyStateSensitivityMode(
         amici.SteadyStateSensitivityMode.integrationOnly
     )
+    obj = problem.objective
+    obj.amici_solver.setNewtonMaxSteps(0)
     obj.amici_solver.setAbsoluteTolerance(1e-12)
     obj.amici_solver.setRelativeTolerance(1e-12)
 
diff --git a/test/petab/test_amici_predictor.py b/test/petab/test_amici_predictor.py
index 2d23620a4..d99fa7d52 100644
--- a/test/petab/test_amici_predictor.py
+++ b/test/petab/test_amici_predictor.py
@@ -2,7 +2,6 @@
 
 import os
 import shutil
-import sys
 
 import amici
 import libsbml
@@ -34,13 +33,10 @@ def conversion_reaction_model():
 
     # try to import the exisiting model, if possible
     try:
-        sys.path.insert(0, os.path.abspath(model_output_dir))
         model_module = amici.import_model_module(model_name, model_output_dir)
         model = model_module.getModel()
     except ValueError:
         # read in and adapt the sbml slightly
-        if os.path.abspath(model_output_dir) in sys.path:
-            sys.path.remove(os.path.abspath(model_output_dir))
         sbml_importer = amici.SbmlImporter(sbml_file)
 
         # add observables to sbml model
@@ -95,7 +91,6 @@ def create_intial_assignment(sbml_model, spec_id):
         )
 
         # Importing the module and loading the model
-        sys.path.insert(0, os.path.abspath(model_output_dir))
         model_module = amici.import_model_module(model_name, model_output_dir)
         model = model_module.getModel()
     except RuntimeError as err:
@@ -107,7 +102,6 @@ def create_intial_assignment(sbml_model, spec_id):
             "Delete the conversion_reaction_enhanced model from your python "
             "path and retry. Your python path is currently:"
         )
-        print(sys.path)
         print("Original error message:")
         raise err
 
diff --git a/test/petab/test_petab_import.py b/test/petab/test_petab_import.py
index aa4eb0067..d4a39f6d5 100644
--- a/test/petab/test_petab_import.py
+++ b/test/petab/test_petab_import.py
@@ -5,6 +5,7 @@
 import logging
 import os
 import unittest
+from itertools import chain
 
 import amici
 import benchmark_models_petab as models
@@ -18,8 +19,6 @@
 import pypesto.petab
 from pypesto.petab import PetabImporter
 
-from .test_sbml_conversion import ATOL, RTOL
-
 # In CI, bionetgen is installed here
 BNGPATH = os.path.abspath(
     os.path.join(os.path.dirname(__file__), "..", "..", "BioNetGen-2.8.5")
@@ -37,12 +36,13 @@ def setUpClass(cls):
         cls.obj_edatas = []
 
     def test_0_import(self):
-        for model_name in ["Zheng_PNAS2012", "Boehm_JProteomeRes2014"]:
+        for model_name in [
+            "Zheng_PNAS2012",
+            "Boehm_JProteomeRes2014",
+            "Weber_BMC2015",
+        ]:
             # test yaml import for one model:
-            yaml_config = os.path.join(
-                models.MODELS_DIR, model_name, model_name + ".yaml"
-            )
-            petab_problem = petab.Problem.from_yaml(yaml_config)
+            petab_problem = models.get_problem(model_name)
             self.petab_problems.append(petab_problem)
 
     def test_1_compile(self):
@@ -119,7 +119,7 @@ def test_check_gradients(self):
         # Check gradients of simple model (should always be a true positive)
         model_name = "Boehm_JProteomeRes2014"
         importer = pypesto.petab.PetabImporter.from_yaml(
-            os.path.join(models.MODELS_DIR, model_name, model_name + ".yaml")
+            models.get_problem_yaml_path(model_name)
         )
 
         objective = importer.create_problem().objective
@@ -137,35 +137,76 @@ def test_check_gradients(self):
 
 
 def test_plist_mapping():
-    """Test that the AMICI objective created via PEtab correctly maps
-    gradient entries when some parameters are not estimated (realized via
-    edata.plist)."""
-    model_name = "Boehm_JProteomeRes2014"
-    petab_problem = pypesto.petab.PetabImporter.from_yaml(
+    """Test that the AMICI objective created via PEtab correctly uses
+    ExpData.plist.
+
+    That means, ensure that
+    1) it only computes gradient entries that are really required, and
+    2) correctly maps model-gradient entries to the objective gradient
+    3) with or without pypesto-fixed parameters.
+
+    In Bruno_JExpBot2016, different parameters are estimated in different
+    conditions.
+    """
+    model_name = "Bruno_JExpBot2016"
+    petab_importer = pypesto.petab.PetabImporter.from_yaml(
         os.path.join(models.MODELS_DIR, model_name, model_name + ".yaml")
     )
-
-    # define test parameter
-    par = np.asarray(petab_problem.petab_problem.x_nominal_scaled)
-
-    problem = petab_problem.create_problem()
+    objective_creator = petab_importer.create_objective_creator()
+    problem = petab_importer.create_problem(
+        objective_creator.create_objective()
+    )
     objective = problem.objective
-    # check that x_names are correctly subsetted
-    assert objective.x_names == [
-        problem.x_names[ix] for ix in problem.x_free_indices
-    ]
     objective.amici_solver.setSensitivityMethod(
-        amici.SensitivityMethod_forward
+        amici.SensitivityMethod.forward
     )
-    objective.amici_solver.setAbsoluteTolerance(1e-10)
-    objective.amici_solver.setRelativeTolerance(1e-12)
+    objective.amici_solver.setAbsoluteTolerance(1e-16)
+    objective.amici_solver.setRelativeTolerance(1e-15)
+
+    # slightly perturb the parameters to avoid vanishing gradients
+    par = np.asarray(petab_importer.petab_problem.x_nominal_free_scaled) * 1.01
+
+    # call once to make sure ExpDatas are populated
+    fx1, grad1 = objective(par, sensi_orders=(0, 1))
+
+    plists1 = {edata.plist for edata in objective.edatas}
+    assert len(plists1) == 6, plists1
 
     df = objective.check_grad_multi_eps(
-        par[problem.x_free_indices], multi_eps=[1e-3, 1e-4, 1e-5]
+        par[problem.x_free_indices], multi_eps=[1e-5, 1e-6, 1e-7, 1e-8]
     )
     print("relative errors gradient: ", df.rel_err.values)
     print("absolute errors gradient: ", df.abs_err.values)
-    assert np.all((df.rel_err.values < RTOL) | (df.abs_err.values < ATOL))
+    assert np.all((df.rel_err.values < 1e-8) | (df.abs_err.values < 1e-10))
+
+    # do the same after fixing some parameters
+    # we fix them to the previous values, so we can compare the results
+    fixed_ids = ["init_b10_1", "init_bcry_1"]
+    # the corresponding amici parameters (they are only ever mapped to the
+    #  respective parameters in fixed_ids, and thus, should not occur in any
+    #  `plist` later on)
+    fixed_model_par_ids = ["init_b10", "init_bcry"]
+    fixed_model_par_idxs = [
+        objective.amici_model.getParameterIds().index(id)
+        for id in fixed_model_par_ids
+    ]
+    fixed_idxs = [problem.x_names.index(id) for id in fixed_ids]
+    problem.fix_parameters(fixed_idxs, par[fixed_idxs])
+    assert objective is problem.objective
+    assert problem.x_fixed_indices == fixed_idxs
+    fx2, grad2 = objective(par[problem.x_free_indices], sensi_orders=(0, 1))
+    assert np.isclose(fx1, fx2, rtol=1e-10, atol=1e-14)
+    assert np.allclose(
+        grad1[problem.x_free_indices], grad2, rtol=1e-10, atol=1e-14
+    )
+    plists2 = {edata.plist for edata in objective.edatas}
+    # the fixed parameters should have been in plist1, but not in plist2
+    assert (
+        set(fixed_model_par_idxs) - set(chain.from_iterable(plists1)) == set()
+    )
+    assert (
+        set(fixed_model_par_idxs) & set(chain.from_iterable(plists2)) == set()
+    )
 
 
 def test_max_sensi_order():
diff --git a/test/petab/test_sbml_conversion.py b/test/petab/test_sbml_conversion.py
index 22653a0a6..7284fb8a7 100644
--- a/test/petab/test_sbml_conversion.py
+++ b/test/petab/test_sbml_conversion.py
@@ -1,6 +1,5 @@
 import os
 import re
-import sys
 import unittest
 import warnings
 
@@ -11,8 +10,6 @@
 
 from ..util import load_amici_objective
 
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
 optimizers = {
     "scipy": [
         "Nelder-Mead",
diff --git a/test/select/get_test_cases.sh b/test/select/get_test_cases.sh
new file mode 100755
index 000000000..639f8ed94
--- /dev/null
+++ b/test/select/get_test_cases.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+if [ ! -d petab_select ]
+then
+    git clone -n --depth=1 --filter=tree:0 https://github.com/PEtab-dev/petab_select.git
+    cd petab_select
+    git sparse-checkout set --no-cone /test_cases
+    git checkout
+fi
diff --git a/test/select/test_select.py b/test/select/test_select.py
index b471365d6..40553f7ed 100644
--- a/test/select/test_select.py
+++ b/test/select/test_select.py
@@ -15,12 +15,13 @@
     Criterion,
     Method,
     Model,
+    get_best_by_iteration,
 )
 
 import pypesto.engine
 import pypesto.select
 import pypesto.visualize.select
-from pypesto.select import model_problem
+from pypesto.select import SacessMinimizeMethod, model_problem
 from pypesto.select.misc import correct_x_guesses
 
 model_problem_options = {
@@ -80,7 +81,9 @@ def initial_models(petab_problem_yaml) -> list[Model]:
     """Models that can be used to initialize a search."""
     initial_model_1 = Model(
         model_id="myModel1",
-        petab_yaml=petab_problem_yaml,
+        model_subspace_id="dummy",
+        model_subspace_indices=[],
+        model_subspace_petab_yaml=petab_problem_yaml,
         parameters={
             "k1": 0,
             "k2": 0,
@@ -90,7 +93,9 @@ def initial_models(petab_problem_yaml) -> list[Model]:
     )
     initial_model_2 = Model(
         model_id="myModel2",
-        petab_yaml=petab_problem_yaml,
+        model_subspace_id="dummy",
+        model_subspace_indices=[],
+        model_subspace_petab_yaml=petab_problem_yaml,
         parameters={
             "k1": ESTIMATE,
             "k2": ESTIMATE,
@@ -183,7 +188,7 @@ def test_problem_select_to_completion(pypesto_select_problem):
             method=Method.FORWARD
         )
     )
-    best_models = pypesto_select_problem.select_to_completion(
+    models = pypesto_select_problem.select_to_completion(
         criterion=Criterion.BIC,
         select_first_improvement=True,
         startpoint_latest_mle=True,
@@ -191,63 +196,53 @@ def test_problem_select_to_completion(pypesto_select_problem):
         candidate_space=candidate_space,
     )
 
-    expected_calibrated_models_subspace_ids = {
+    expected_calibrated_ids = {
         "M1_0",
         "M1_1",
         "M1_4",
         "M1_5",
         "M1_7",
     }
-    test_calibrated_models_subspace_ids = {
-        model.model_subspace_id
-        for model in pypesto_select_problem.calibrated_models.values()
-    }
+    test_calibrated_ids = {model.model_subspace_id for model in models}
     # Expected models were calibrated during the search.
-    assert (
-        test_calibrated_models_subspace_ids
-        == expected_calibrated_models_subspace_ids
+    assert test_calibrated_ids == expected_calibrated_ids
+
+    best_by_iteration = get_best_by_iteration(
+        models=models, criterion=Criterion.BIC
     )
 
-    expected_best_model_subspace_ids = [
-        # The first iteration is from a virtual model, which will appear
-        # as the best model for that iteration.
-        VIRTUAL_INITIAL_MODEL,
+    expected_best_by_iteration = [
         "M1_0",
         "M1_1",
-        # This iteration with models `{'M1_4', 'M1_5'}` didn't have a better
-        # model than the previous iteration.
-        "M1_1",
+        "M1_5",
         "M1_7",
     ]
-    test_best_model_subspace_ids = [
-        (model.model_subspace_id if model != VIRTUAL_INITIAL_MODEL else model)
-        for model in best_models
+    test_best_by_iteration = [
+        model.model_subspace_id for model in best_by_iteration.values()
     ]
     # Expected best models were found.
-    assert test_best_model_subspace_ids == expected_best_model_subspace_ids
+    assert test_best_by_iteration == expected_best_by_iteration
 
-    expected_best_model_criterion_values = [
-        # VIRTUAL_INITIAL_MODEL
-        np.inf,
+    expected_best_values = [
         36.767,
         -4.592,
         # This iteration with models `{'M1_4', 'M1_5'}` didn't have a better
         # model than the previous iteration.
-        -4.592,
+        -3.330,
         -4.889,
     ]
-    test_best_model_criterion_values = [
+    test_best_values = [
         (
             model.get_criterion(Criterion.BIC)
             if model != VIRTUAL_INITIAL_MODEL
             else np.inf
         )
-        for model in best_models
+        for model in best_by_iteration.values()
     ]
     # The best models have the expected criterion values.
     assert np.isclose(
-        test_best_model_criterion_values,
-        expected_best_model_criterion_values,
+        test_best_values,
+        expected_best_values,
         **tolerances,
     ).all()
 
@@ -268,7 +263,8 @@ def test_problem_multistart_select(pypesto_select_problem, initial_models):
     assert test_best_model_subspace_id == expected_best_model_subspace_id
 
     best_models = [
-        pypesto_select_problem.petab_select_problem.get_best(
+        petab_select.ui.get_best(
+            problem=pypesto_select_problem.petab_select_problem,
             models=models,
             criterion=criterion,
         )
@@ -279,6 +275,8 @@ def test_problem_multistart_select(pypesto_select_problem, initial_models):
         "M1_3": -4.705,
         # 'M1_7': -4.056,  # skipped -- reproducibility requires many starts
     }
+    # As M1_7 criterion comparison is skipped, at least ensure it is present
+    assert {m.model_subspace_id for m in best_models} == {"M1_3", "M1_7"}
     test_best_models_criterion_values = {
         model.model_subspace_id: model.get_criterion(Criterion.AIC)
         for model in best_models
@@ -292,7 +290,7 @@ def test_problem_multistart_select(pypesto_select_problem, initial_models):
     )
 
     initial_model_id_hash_map = {
-        initial_model.model_id: initial_model.get_hash()
+        initial_model.model_id: initial_model.hash
         for initial_model in initial_models
     }
     expected_predecessor_model_hashes = {
@@ -303,7 +301,7 @@ def test_problem_multistart_select(pypesto_select_problem, initial_models):
     }
     test_predecessor_model_hashes = {
         model.model_subspace_id: model.predecessor_model_hash
-        for model in pypesto_select_problem.calibrated_models.values()
+        for model in pypesto_select_problem.calibrated_models
     }
     # All calibrated models have the expected predecessor model.
     assert test_predecessor_model_hashes == expected_predecessor_model_hashes
@@ -342,7 +340,7 @@ def test_postprocessors(petab_select_problem):
 
     expected_newly_calibrated_models_subspace_ids = ["M1_0"]
     test_newly_calibrated_models_subspace_ids = [
-        model.model_subspace_id for model in newly_calibrated_models_1.values()
+        model.model_subspace_id for model in newly_calibrated_models_1
     ]
     # The expected "forward" models were found.
     assert (
@@ -361,8 +359,8 @@ def test_postprocessors(petab_select_problem):
     )
     # End Iteration 1 #########################################################
 
-    expected_png_file = output_path / (best_model_1.model_hash + ".png")
-    expected_hdf5_file = output_path / (best_model_1.model_hash + ".hdf5")
+    expected_png_file = output_path / f"{best_model_1.hash}.png"
+    expected_hdf5_file = output_path / f"{best_model_1.hash}.hdf5"
 
     # The expected files exist.
     assert expected_png_file.is_file()
@@ -398,33 +396,6 @@ def test_model_problem_fake_result():
     assert test_fval == expected_fval
 
 
-def test_vis(pypesto_select_problem):
-    """Test plotting routines."""
-    criterion = Criterion.AIC
-    best_models = pypesto_select_problem.select_to_completion(
-        method=Method.FORWARD,
-        criterion=criterion,
-        model_problem_options=model_problem_options,
-    )
-    labels = {
-        model.get_hash(): model.model_subspace_id
-        for model in pypesto_select_problem.calibrated_models.values()
-    }
-    pypesto.visualize.select.plot_selected_models(
-        selected_models=best_models,
-        criterion=criterion,
-        labels=labels,
-    )
-    # import matplotlib.pyplot as plt
-    # plt.savefig('output/selected.png')
-    pypesto.visualize.select.plot_calibrated_models_digraph(
-        problem=pypesto_select_problem,
-        criterion=criterion,
-        labels=labels,
-    )
-    # plt.savefig('output/digraph.png')
-
-
 def test_custom_objective(petab_problem_yaml):
     parameters = {
         "k2": 0.333,
@@ -443,7 +414,12 @@ def fun_grad(x, fun=expected_fun, grad=expected_grad):
         grad=True,
     )
 
-    model = Model(petab_yaml=petab_problem_yaml)
+    model = Model(
+        model_subspace_id="dummy",
+        model_subspace_indices=[],
+        model_subspace_petab_yaml=petab_problem_yaml,
+        parameters={},
+    )
 
     def get_pypesto_problem(model, objective, petab_problem, x_guesses):
         corrected_x_guesses = correct_x_guesses(
@@ -494,3 +470,26 @@ def model_to_pypesto_problem(
     # The custom objective and gradient were returned.
     assert test_fun == expected_fun
     assert np.isclose(test_grad, expected_grad).all()
+
+
+def test_sacess_minimize_method(pypesto_select_problem, initial_models):
+    """Test `SacessMinimizeMethod`.
+
+    Only ensures that the pipeline runs.
+    """
+    predecessor_model = initial_models[1]
+
+    minimize_method = SacessMinimizeMethod(
+        num_workers=2,
+        max_walltime_s=1,
+    )
+
+    model_problem_options = {
+        "minimize_method": minimize_method,
+    }
+
+    pypesto_select_problem.select_to_completion(
+        model_problem_options=model_problem_options,
+        method=petab_select.Method.FORWARD,
+        predecessor_model=predecessor_model,
+    )
diff --git a/test/select/test_test_cases.py b/test/select/test_test_cases.py
new file mode 100644
index 000000000..8bafdfd87
--- /dev/null
+++ b/test/select/test_test_cases.py
@@ -0,0 +1,218 @@
+import os
+import shlex
+import shutil
+import subprocess
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import petab_select
+import pytest
+import yaml
+from petab_select import Model
+from petab_select.constants import (
+    CRITERIA,
+    ESTIMATED_PARAMETERS,
+    TERMINATE,
+)
+
+import pypesto.engine
+import pypesto.optimize
+import pypesto.select
+
+# Set to `[]` to test all
+test_cases = [
+    # "0009",
+]
+
+# Do not use computationally-expensive test cases in CI
+skip_test_cases = [
+    "0009",
+]
+
+# Download test cases from GitHub
+test_cases_path = Path("petab_select") / "test_cases"
+if not test_cases_path.exists():
+    subprocess.run([Path(__file__).parent / "get_test_cases.sh"])  # noqa: S603
+
+# Reduce runtime but with high reproducibility
+minimize_options = {
+    "n_starts": 10,
+    "engine": pypesto.engine.MultiProcessEngine(),
+    "filename": None,
+    "progress_bar": False,
+}
+
+
+def objective_customizer(obj):
+    obj.amici_solver.setRelativeTolerance(1e-12)
+
+
+model_problem_options = {
+    "minimize_options": minimize_options,
+    "objective_customizer": objective_customizer,
+}
+
+
+@pytest.mark.parametrize(
+    "test_case_path_stem",
+    sorted(
+        [test_case_path.stem for test_case_path in test_cases_path.glob("*")]
+    ),
+)
+def test_pypesto(test_case_path_stem):
+    """Run all test cases with pyPESTO."""
+    if test_cases and test_case_path_stem not in test_cases:
+        pytest.skip("Test excluded from subset selected for debugging.")
+        return
+
+    if test_case_path_stem in skip_test_cases:
+        pytest.skip("Test marked to be skipped.")
+        return
+
+    test_case_path = test_cases_path / test_case_path_stem
+    expected_model_yaml = test_case_path / "expected.yaml"
+    # Setup the pyPESTO model selector instance.
+    petab_select_problem = petab_select.Problem.from_yaml(
+        test_case_path / "petab_select_problem.yaml",
+    )
+    pypesto_select_problem = pypesto.select.Problem(
+        petab_select_problem=petab_select_problem
+    )
+
+    # Run the selection process until "exhausted".
+    pypesto_select_problem.select_to_completion(
+        model_problem_options=model_problem_options,
+    )
+
+    # Get the best model
+    best_model = petab_select.analyze.get_best(
+        models=pypesto_select_problem.calibrated_models,
+        criterion=petab_select_problem.criterion,
+        compare=petab_select_problem.compare,
+    )
+
+    # Load the expected model.
+    expected_model = Model.from_yaml(expected_model_yaml)
+
+    def get_series(model, dict_attribute) -> pd.Series:
+        return pd.Series(
+            getattr(model, dict_attribute),
+            dtype=np.float64,
+        ).sort_index()
+
+    # The estimated parameters and criteria values are as expected.
+    for dict_attribute in [CRITERIA, ESTIMATED_PARAMETERS]:
+        pd.testing.assert_series_equal(
+            get_series(expected_model, dict_attribute),
+            get_series(best_model, dict_attribute),
+            rtol=1e-2,
+        )
+    # FIXME ensure `current model criterion` trajectory also matches, in summary.tsv file,
+    #       for test case 0009, after summary format is revised
+
+
+@pytest.mark.skipif(
+    os.getenv("GITHUB_ACTIONS") == "true",
+    reason="Too CPU heavy for CI.",
+)
+def test_famos_cli():
+    """Run test case 0009 with pyPESTO and the CLI interface."""
+    test_case_path = test_cases_path / "0009"
+    expected_model_yaml = test_case_path / "expected.yaml"
+    problem_yaml = test_case_path / "petab_select_problem.yaml"
+
+    problem = petab_select.Problem.from_yaml(problem_yaml)
+
+    # Setup working directory for intermediate files
+    work_dir = Path("output_famos_cli")
+    work_dir_str = str(work_dir)
+    if work_dir.exists():
+        shutil.rmtree(work_dir_str)
+    work_dir.mkdir(exist_ok=True, parents=True)
+
+    models_yamls = []
+    metadata_yaml = work_dir / "metadata.yaml"
+    state_dill = work_dir / "state.dill"
+    iteration = 0
+    while True:
+        iteration += 1
+        uncalibrated_models_yaml = (
+            work_dir / f"uncalibrated_models_{iteration}.yaml"
+        )
+        calibrated_models_yaml = (
+            work_dir / f"calibrated_models_{iteration}.yaml"
+        )
+        models_yaml = work_dir / f"models_{iteration}.yaml"
+        models_yamls.append(models_yaml)
+        # Start iteration
+        subprocess.run(
+            shlex.split(  # noqa: S603
+                f"""petab_select start_iteration
+                    --problem {problem_yaml}
+                    --state {state_dill}
+                    --output-uncalibrated-models {uncalibrated_models_yaml}
+                    --relative-paths
+                """
+            )
+        )
+        # Calibrate models
+        models = petab_select.Models.from_yaml(uncalibrated_models_yaml)
+        for model in models:
+            pypesto.select.ModelProblem(
+                model=model,
+                criterion=problem.criterion,
+                **model_problem_options,
+            )
+        models.to_yaml(filename=calibrated_models_yaml)
+        # End iteration
+        subprocess.run(
+            shlex.split(  # noqa: S603
+                f"""petab_select end_iteration
+                    --output-models {models_yaml}
+                    --output-metadata {metadata_yaml}
+                    --state {state_dill}
+                    --calibrated-models {calibrated_models_yaml}
+                    --relative-paths
+                """
+            )
+        )
+        with open(metadata_yaml) as f:
+            metadata = yaml.safe_load(f)
+        if metadata[TERMINATE]:
+            break
+
+    # Get the best model
+    models_yamls_arg = " ".join(
+        f"--models {models_yaml}" for models_yaml in models_yamls
+    )
+    subprocess.run(
+        shlex.split(  # noqa: S603
+            f"""petab_select get_best
+                --problem {problem_yaml}
+                {models_yamls_arg}
+                --output {work_dir / "best_model.yaml"}
+                --relative-paths
+            """
+        )
+    )
+    best_model = petab_select.Model.from_yaml(work_dir / "best_model.yaml")
+
+    # Load the expected model.
+    expected_model = Model.from_yaml(expected_model_yaml)
+
+    def get_series(model, dict_attribute) -> pd.Series:
+        return pd.Series(
+            getattr(model, dict_attribute),
+            dtype=np.float64,
+        ).sort_index()
+
+    # The estimated parameters and criteria values are as expected.
+    for dict_attribute in [CRITERIA, ESTIMATED_PARAMETERS]:
+        pd.testing.assert_series_equal(
+            get_series(expected_model, dict_attribute),
+            get_series(best_model, dict_attribute),
+            rtol=1e-2,
+        )
+    # FIXME ensure `current model criterion` trajectory also matches, in summary.tsv file,
+    # after summary format is revised
diff --git a/test/util.py b/test/util.py
index 696db87e4..10bbe4371 100644
--- a/test/util.py
+++ b/test/util.py
@@ -40,6 +40,8 @@ def obj_for_sensi(fun, grad, hess, max_sensi_order, integrated, x):
     ret: dict
         With fields obj, max_sensi_order, x, fval, grad, hess.
     """
+    x = np.asarray(x)
+
     if integrated:
         if max_sensi_order == 2:
 
diff --git a/test/visualize/test_visualize.py b/test/visualize/test_visualize.py
index 149c65719..57a4d8ddd 100644
--- a/test/visualize/test_visualize.py
+++ b/test/visualize/test_visualize.py
@@ -3,6 +3,7 @@
 import os
 from collections.abc import Sequence
 from functools import wraps
+from pathlib import Path
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -1219,7 +1220,7 @@ def test_time_trajectory_model():
 @close_fig
 def test_sacess_history():
     """Test pypesto.visualize.optimizer_history.sacess_history"""
-    from pypesto.optimize.ess.sacess import SacessOptimizer
+    from pypesto.optimize.ess.sacess import ESSExitFlag, SacessOptimizer
     from pypesto.visualize.optimizer_history import sacess_history
 
     problem = create_problem()
@@ -1227,6 +1228,7 @@ def test_sacess_history():
         max_walltime_s=1, num_workers=2, sacess_loglevel=logging.WARNING
     )
     sacess.minimize(problem)
+    assert sacess.exit_flag == ESSExitFlag.MAX_TIME
     sacess_history(sacess.histories)
 
 
@@ -1240,3 +1242,59 @@ def test_parameters_correlation_matrix(result_creation):
     result = result_creation()
 
     visualize.parameters_correlation_matrix(result)
+
+
+@close_fig
+def test_plot_ordinal_categories():
+    example_ordinal_yaml = (
+        Path(__file__).parent
+        / ".."
+        / ".."
+        / "doc"
+        / "example"
+        / "example_ordinal"
+        / "example_ordinal.yaml"
+    )
+    petab_problem = petab.Problem.from_yaml(example_ordinal_yaml)
+    # Set seed for reproducibility.
+    np.random.seed(0)
+    optimizer = pypesto.optimize.ScipyOptimizer(
+        method="L-BFGS-B", options={"maxiter": 1}
+    )
+    importer = pypesto.petab.PetabImporter(petab_problem, hierarchical=True)
+    problem = importer.create_problem()
+    result = pypesto.optimize.minimize(
+        problem=problem, n_starts=1, optimizer=optimizer
+    )
+    visualize.plot_categories_from_pypesto_result(result)
+
+
+@close_fig
+def test_visualize_estimated_observable_mapping():
+    example_semiquantitative_yaml = (
+        Path(__file__).parent
+        / ".."
+        / ".."
+        / "doc"
+        / "example"
+        / "example_semiquantitative"
+        / "example_semiquantitative_linear.yaml"
+    )
+    petab_problem = petab.Problem.from_yaml(example_semiquantitative_yaml)
+    # Set seed for reproducibility.
+    np.random.seed(0)
+    optimizer = pypesto.optimize.ScipyOptimizer(
+        method="L-BFGS-B",
+        options={
+            "disp": None,
+            "ftol": 2.220446049250313e-09,
+            "gtol": 1e-5,
+            "maxiter": 1,
+        },
+    )
+    importer = pypesto.petab.PetabImporter(petab_problem, hierarchical=True)
+    problem = importer.create_problem()
+    result = pypesto.optimize.minimize(
+        problem=problem, n_starts=1, optimizer=optimizer
+    )
+    visualize.visualize_estimated_observable_mapping(result, problem)
diff --git a/tox.ini b/tox.ini
index 6ad9f8719..98f96b7bf 100644
--- a/tox.ini
+++ b/tox.ini
@@ -29,7 +29,7 @@ envlist =
 # Base-environment
 
 [testenv]
-passenv = AMICI_PARALLEL_COMPILE,CC,CXX,MPLBACKEND
+passenv = AMICI_PARALLEL_COMPILE,BNGPATH,CC,CXX,GITHUB_ACTIONS,MPLBACKEND
 
 # Sub-environments
 #  inherit settings defined in the base
@@ -75,10 +75,14 @@ description =
     Test basic functionality on Windows
 
 [testenv:petab]
-extras = test,amici,petab,pyswarm,roadrunner
+extras = test,petab,pyswarm,roadrunner
 deps =
     git+https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab.git@master\#subdirectory=src/python
-    git+https://github.com/AMICI-dev/amici.git@develop\#egg=amici&subdirectory=python/sdist
+# always install amici from develop branch, avoid caching
+#  to skip re-installation, run `tox -e petab --override testenv:petab.commands_pre=`
+commands_pre =
+    python3 -m pip uninstall -y amici
+    python3 -m pip install git+https://github.com/AMICI-dev/amici.git@develop\#egg=amici&subdirectory=python/sdist
 commands =
     python3 -m pip install git+https://github.com/PEtab-dev/petab_test_suite@main
     python3 -m pip install git+https://github.com/pysb/pysb@master
@@ -120,9 +124,9 @@ description =
     Test hierarchical optimization module
 
 [testenv:select]
-extras = test,amici,petab,select
+extras = test,amici,petab,select,fides
 commands =
-    pytest --cov=pypesto --cov-report=xml --cov-append \
+    pytest -svvv --cov=pypesto --cov-report=xml --cov-append \
         test/select
 description =
     Test model selection functionality